Esempio n. 1
0
def dep_counts(name):
    commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title")
    ref_patterns = {
        'alone': r'^{} \d'.format(re.escape(name)),
        'commentor': r'{} on'.format(re.escape(name)),
        'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
    }

    commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))

    ret = {
        'version title exact match': model.VersionSet({"title": name}).count(),
        'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
        'history title exact match': model.HistorySet({"title": name}).count(),
        'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
    }

    for pname, pattern in ref_patterns.items():
        ret.update({
            'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(),
            'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(),
            'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(),
            'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count()
        })

    return ret
Esempio n. 2
0
def test_index_delete():
    #Simple Text
    ti = "Test Del"
    model.IndexSet({"title": ti}).delete()
    model.VersionSet({"title": ti}).delete()

    i = model.Index({
        "title": ti,
        "heTitle": u"כבכב",
        "titleVariants": [ti],
        "sectionNames": ["Chapter", "Paragraph"],
        "categories": ["Musar"],
        "lengths": [50, 501]
    }).save()
    new_version1 = model.Version(
                {
                    "chapter": i.nodes.create_skeleton(),
                    "versionTitle": "Version 1 TEST",
                    "versionSource": "blabla",
                    "language": "he",
                    "title": i.title
                }
    )
    new_version1.chapter = [[u''],[u''],[u"לה לה לה לא חשוב על מה"]]
    new_version1.save()
    new_version2 = model.Version(
                {
                    "chapter": i.nodes.create_skeleton(),
                    "versionTitle": "Version 2 TEST",
                    "versionSource": "blabla",
                    "language": "en",
                    "title": i.title
                }
    )
    new_version2.chapter = [[],["Hello goodbye bla bla blah"],[]]
    new_version2.save()

    i.delete()
    assert model.Index().load({'title': ti}) is None
    assert model.VersionSet({'title':ti}).count() == 0

    #Commentator
    from sefaria.helper.text import create_commentator_and_commentary_version

    commentator_name = "Commentator Del"
    he_commentator_name = u"פרשנדנן"
    base_book = 'Genesis'
    base_book2 = 'Pesach Haggadah'

    model.IndexSet({"title": commentator_name}).delete()
    model.VersionSet({"title": commentator_name + " on " + base_book}).delete()
    model.VersionSet({"title": commentator_name + " on " + base_book2}).delete()

    create_commentator_and_commentary_version(commentator_name, base_book, 'he', 'test', 'test', he_commentator_name)
    create_commentator_and_commentary_version(commentator_name, base_book2, 'he', 'test', 'test', he_commentator_name)

    ci = model.Index().load({'title': commentator_name}).delete()
    assert model.Index().load({'title': commentator_name}) is None
    assert model.VersionSet({'title':{'$regex': commentator_name}}).count() == 0
Esempio n. 3
0
def merge_text_versions_by_source(text_title, language, warn=False):
    """
	Merges all texts of text_title in langauge that share the same value for versionSource.
	"""
    v = model.VersionSet({"title": text_title, "language": language})

    for s in v.distinct("versionSource"):
        versions = model.VersionSet({
            "title": text_title,
            "versionSource": s,
            "language": language
        }).distinct("versionTitle")
        merge_multiple_text_versions(versions, text_title, language)
Esempio n. 4
0
def test_version_word_count():
    #simple
    assert model.Version().load({
        "title": "Genesis",
        "language": "he",
        "versionTitle": "Tanach with Ta'amei Hamikra"
    }).word_count() == 20813
    assert model.Version().load({
        "title": "Rashi on Shabbat",
        "language": "he"
    }).word_count() > 0
    #complex
    assert model.Version().load({
        "title": "Pesach Haggadah",
        "language": "he"
    }).word_count() > 0
    assert model.Version().load({
        "title": "Orot",
        "language": "he"
    }).word_count() > 0
    assert model.Version().load({
        "title": "Ephod Bad on Pesach Haggadah"
    }).word_count() > 0

    #sets
    assert model.VersionSet({
        "title": {
            "$regex": "Haggadah"
        }
    }).word_count() > 200000
Esempio n. 5
0
def dep_counts(name, indx):
    def construct_query(attribute, queries):
        query_list = [{attribute: {'$regex': query}} for query in queries]
        return {'$or': query_list}

    from sefaria.model.text import prepare_index_regex_for_dependency_process
    patterns = prepare_index_regex_for_dependency_process(indx, as_list=True)
    patterns = [
        pattern.replace(re.escape(indx.title), re.escape(name))
        for pattern in patterns
    ]

    ret = {
        'version title exact match':
        model.VersionSet({
            "title": name
        }, sort=[('title', 1)]).count(),
        'history title exact match':
        model.HistorySet({
            "title": name
        }, sort=[('title', 1)]).count(),
        'note match ':
        model.NoteSet(construct_query("ref", patterns),
                      sort=[('ref', 1)]).count(),
        'link match ':
        model.LinkSet(construct_query("refs", patterns)).count(),
        'history refs match ':
        model.HistorySet(construct_query("ref", patterns),
                         sort=[('ref', 1)]).count(),
        'history new refs match ':
        model.HistorySet(construct_query("new.refs", patterns),
                         sort=[('new.refs', 1)]).count()
    }

    return ret
Esempio n. 6
0
def test_index_delete():
    #Simple Text
    ti = "Test Del"

    i = model.Index({
        "title": ti,
        "heTitle": "כבכב",
        "titleVariants": [ti],
        "sectionNames": ["Chapter", "Paragraph"],
        "categories": ["Musar"],
        "lengths": [50, 501]
    }).save()
    new_version1 = model.Version({
        "chapter": i.nodes.create_skeleton(),
        "versionTitle": "Version 1 TEST",
        "versionSource": "blabla",
        "language": "he",
        "title": i.title
    })
    new_version1.chapter = [[''], [''], ["לה לה לה לא חשוב על מה"]]
    new_version1.save()
    new_version2 = model.Version({
        "chapter": i.nodes.create_skeleton(),
        "versionTitle": "Version 2 TEST",
        "versionSource": "blabla",
        "language": "en",
        "title": i.title
    })
    new_version2.chapter = [[], ["Hello goodbye bla bla blah"], []]
    new_version2.save()

    i.delete()
    assert model.Index().load({'title': ti}) is None
    assert model.VersionSet({'title': ti}).count() == 0
Esempio n. 7
0
 def teardown_class(cls):
     titles = [
         "New Toc Title Test", "New Toc Test", "Another New Toc Test",
         "Harchev Davar on Joshua", "Bob is your Uncle"
     ]
     for title in titles:
         model.IndexSet({"title": title}).delete()
         model.VersionSet({"title": title}).delete()
Esempio n. 8
0
def merge_text_versions_by_language(text_title, language, warn=False):
    """
	Merges all texts of text_title in langauge.
	"""
    versions = model.VersionSet({
        "title": text_title,
        "language": language
    }).distinct("versionTitle")
    merge_multiple_text_versions(versions, text_title, language)
Esempio n. 9
0
def teardown_module(module):
    titles = [
        'Test Commentator Name', 'Bartenura (The Next Generation)',
        'Test Index Name', "Changed Test Index", "Third Attempt", "Test Iu",
        "Test Del"
    ]

    for title in titles:
        model.IndexSet({"title": title}).delete()
        model.VersionSet({"title": title}).delete()
Esempio n. 10
0
def remove_trailing_empty_segments():
    """
    Removes empty segments from the end of any text section.
    """
    texts = model.VersionSet()
    for text in texts:
        if not model.Ref.is_ref(text.title):
            continue  # Ignore text versions we don't understand
        new_text = rtrim_jagged_string_array(deepcopy(text.chapter))
        if new_text != text.chapter:
            print text.title + " CHANGED"
            text.chapter = new_text
            text.save()
            model.VersionState(text.title).refresh()
Esempio n. 11
0
def update_counts(ref=None):
	"""
	Update the count records of all texts or the text specfied
	by ref (currently at book level only) by peforming a count
	"""
	if ref:
		update_text_count(ref)
		return

	indices = model.IndexSet()

	for index in indices:
		if index.is_commentary():
			cRef = "^{} on ".format(index.title)
			texts = model.VersionSet({"title": {"$regex": cRef}}).distinct("title")
			for text in texts:
				update_text_count(text)
		else:
			update_text_count(index.title)

	summaries.update_summaries()
Esempio n. 12
0
def remove_old_counts():
    """
    Deletes counts documents which no longer correspond to a text or category.
    """
    # If there are counts documents save in the DB with invalid titles,
    # instantiation of the Count will cause a BookNameError.
    # But in this code instantiation happens in the line 'for count in counts'
    # How do we catch that? Additionally, we need access to the bad title after
    # The error has occurred. How would we get that? Reverting to direct DB call for now.
    counts = db.vstate.find({}, {"title": 1})
    for count in counts:
        if count.get("title", None):
            print "Checking " + count["title"]
            try:
                i = model.library.get_index(count["title"])
                if model.VersionSet({"title": i.title}).count() == 0:
                    print u"Old count for Commentary with no content: %s" % count["title"]
                    db.vstate.remove({"_id": count["_id"]})                    
            except BookNameError:
                print u"Old count: %s" % count["title"]
                db.vstate.remove({"_id": count["_id"]})
Esempio n. 13
0
import pytest
import sefaria.summaries as s
import sefaria.model as model
import sefaria.system.cache as scache
from sefaria.utils.testing_utils import *

#create, update, delete, change categories
# test that old title goes away on index title change (regular + commentary)
# test that no commentator is added
# no wandering commentaries


""" SOME SETUP """

text_titles = model.VersionSet({}).distinct('title')
s.update_table_of_contents()
scache.delete_cache_elem('toc_cache')


""" THE TESTS """


class Test_Toc(object):
    def test_toc_integrity(self):
        toc = s.get_toc()
        self.recur_toc_integrity(toc)


    def recur_toc_integrity(self, toc, depth=0):
         for toc_elem in toc:
Esempio n. 14
0
print("changing old nikkud history")
update_version_title_in_history(default_version_title_vowels, rename_version_title_vowels, language)

#change Leningrad versions to have the old default names
print("renaming Leningrad taamei hamikra to old default name")
update_version_title(new_version_title_cantillaiton, default_version_title_cantillaiton, language)
update_version_title_in_history(new_version_title_cantillaiton, default_version_title_cantillaiton, language)

print("renaming Leningrad nikkud to old default name")
update_version_title(new_version_title_vowels, default_version_title_vowels, language)
update_version_title_in_history(new_version_title_vowels, default_version_title_vowels, language)

print("renaming Leningrad consonants to an old style default name")
update_version_title(new_version_title_consonants, default_version_title_consonants, language)
update_version_title_in_history(new_version_title_consonants, default_version_title_consonants, language)


#delete old renamed texts
print("deleting old texts versions")
model.VersionSet({"versionTitle": rename_version_title_cantillation, "language": language, "title" : { '$nin': ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']}}).delete()
model.VersionSet({"versionTitle": rename_version_title_vowels, "language": language	}).delete()

#make sure the new texts are the default.
print("un-defaulting Koren")
db.texts.update({"versionTitle": rename_version_title_cantillation, "language": language, "title" : { '$in': ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']}}, {"$set": {"status": "locked"}, "$unset": {"priority": ""}}, upsert=False, multi=True, writeConcern={ 'w': 1 })

print("prioritizing and locking Leningrad")
db.texts.update({"versionTitle": default_version_title_cantillaiton, "language": language}, {"$set": {"status": "locked", "priority":1}}, upsert=False, multi=True, writeConcern={ 'w': 1 })

print("prioritizing and locking JPS 1917")
db.texts.update({"versionTitle": "The Holy Scriptures: A New Translation (JPS 1917)", "language": 'en'}, {"$set": {"status": "locked", "priority":1}}, upsert=False, multi=True, writeConcern={ 'w': 1 })