def dep_counts(name): commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title") ref_patterns = { 'alone': r'^{} \d'.format(re.escape(name)), 'commentor': r'{} on'.format(re.escape(name)), 'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name)) } commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name)) ret = { 'version title exact match': model.VersionSet({"title": name}).count(), 'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(), 'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(), 'history title exact match': model.HistorySet({"title": name}).count(), 'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(), 'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(), } for pname, pattern in ref_patterns.items(): ret.update({ 'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(), 'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(), 'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(), 'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count() }) return ret
def test_index_delete(): #Simple Text ti = "Test Del" model.IndexSet({"title": ti}).delete() model.VersionSet({"title": ti}).delete() i = model.Index({ "title": ti, "heTitle": u"כבכב", "titleVariants": [ti], "sectionNames": ["Chapter", "Paragraph"], "categories": ["Musar"], "lengths": [50, 501] }).save() new_version1 = model.Version( { "chapter": i.nodes.create_skeleton(), "versionTitle": "Version 1 TEST", "versionSource": "blabla", "language": "he", "title": i.title } ) new_version1.chapter = [[u''],[u''],[u"לה לה לה לא חשוב על מה"]] new_version1.save() new_version2 = model.Version( { "chapter": i.nodes.create_skeleton(), "versionTitle": "Version 2 TEST", "versionSource": "blabla", "language": "en", "title": i.title } ) new_version2.chapter = [[],["Hello goodbye bla bla blah"],[]] new_version2.save() i.delete() assert model.Index().load({'title': ti}) is None assert model.VersionSet({'title':ti}).count() == 0 #Commentator from sefaria.helper.text import create_commentator_and_commentary_version commentator_name = "Commentator Del" he_commentator_name = u"פרשנדנן" base_book = 'Genesis' base_book2 = 'Pesach Haggadah' model.IndexSet({"title": commentator_name}).delete() model.VersionSet({"title": commentator_name + " on " + base_book}).delete() model.VersionSet({"title": commentator_name + " on " + base_book2}).delete() create_commentator_and_commentary_version(commentator_name, base_book, 'he', 'test', 'test', he_commentator_name) create_commentator_and_commentary_version(commentator_name, base_book2, 'he', 'test', 'test', he_commentator_name) ci = model.Index().load({'title': commentator_name}).delete() assert model.Index().load({'title': commentator_name}) is None assert model.VersionSet({'title':{'$regex': commentator_name}}).count() == 0
def merge_text_versions_by_source(text_title, language, warn=False): """ Merges all texts of text_title in langauge that share the same value for versionSource. """ v = model.VersionSet({"title": text_title, "language": language}) for s in v.distinct("versionSource"): versions = model.VersionSet({ "title": text_title, "versionSource": s, "language": language }).distinct("versionTitle") merge_multiple_text_versions(versions, text_title, language)
def test_version_word_count(): #simple assert model.Version().load({ "title": "Genesis", "language": "he", "versionTitle": "Tanach with Ta'amei Hamikra" }).word_count() == 20813 assert model.Version().load({ "title": "Rashi on Shabbat", "language": "he" }).word_count() > 0 #complex assert model.Version().load({ "title": "Pesach Haggadah", "language": "he" }).word_count() > 0 assert model.Version().load({ "title": "Orot", "language": "he" }).word_count() > 0 assert model.Version().load({ "title": "Ephod Bad on Pesach Haggadah" }).word_count() > 0 #sets assert model.VersionSet({ "title": { "$regex": "Haggadah" } }).word_count() > 200000
def dep_counts(name, indx): def construct_query(attribute, queries): query_list = [{attribute: {'$regex': query}} for query in queries] return {'$or': query_list} from sefaria.model.text import prepare_index_regex_for_dependency_process patterns = prepare_index_regex_for_dependency_process(indx, as_list=True) patterns = [ pattern.replace(re.escape(indx.title), re.escape(name)) for pattern in patterns ] ret = { 'version title exact match': model.VersionSet({ "title": name }, sort=[('title', 1)]).count(), 'history title exact match': model.HistorySet({ "title": name }, sort=[('title', 1)]).count(), 'note match ': model.NoteSet(construct_query("ref", patterns), sort=[('ref', 1)]).count(), 'link match ': model.LinkSet(construct_query("refs", patterns)).count(), 'history refs match ': model.HistorySet(construct_query("ref", patterns), sort=[('ref', 1)]).count(), 'history new refs match ': model.HistorySet(construct_query("new.refs", patterns), sort=[('new.refs', 1)]).count() } return ret
def test_index_delete(): #Simple Text ti = "Test Del" i = model.Index({ "title": ti, "heTitle": "כבכב", "titleVariants": [ti], "sectionNames": ["Chapter", "Paragraph"], "categories": ["Musar"], "lengths": [50, 501] }).save() new_version1 = model.Version({ "chapter": i.nodes.create_skeleton(), "versionTitle": "Version 1 TEST", "versionSource": "blabla", "language": "he", "title": i.title }) new_version1.chapter = [[''], [''], ["לה לה לה לא חשוב על מה"]] new_version1.save() new_version2 = model.Version({ "chapter": i.nodes.create_skeleton(), "versionTitle": "Version 2 TEST", "versionSource": "blabla", "language": "en", "title": i.title }) new_version2.chapter = [[], ["Hello goodbye bla bla blah"], []] new_version2.save() i.delete() assert model.Index().load({'title': ti}) is None assert model.VersionSet({'title': ti}).count() == 0
def teardown_class(cls): titles = [ "New Toc Title Test", "New Toc Test", "Another New Toc Test", "Harchev Davar on Joshua", "Bob is your Uncle" ] for title in titles: model.IndexSet({"title": title}).delete() model.VersionSet({"title": title}).delete()
def merge_text_versions_by_language(text_title, language, warn=False): """ Merges all texts of text_title in langauge. """ versions = model.VersionSet({ "title": text_title, "language": language }).distinct("versionTitle") merge_multiple_text_versions(versions, text_title, language)
def teardown_module(module): titles = [ 'Test Commentator Name', 'Bartenura (The Next Generation)', 'Test Index Name', "Changed Test Index", "Third Attempt", "Test Iu", "Test Del" ] for title in titles: model.IndexSet({"title": title}).delete() model.VersionSet({"title": title}).delete()
def remove_trailing_empty_segments(): """ Removes empty segments from the end of any text section. """ texts = model.VersionSet() for text in texts: if not model.Ref.is_ref(text.title): continue # Ignore text versions we don't understand new_text = rtrim_jagged_string_array(deepcopy(text.chapter)) if new_text != text.chapter: print text.title + " CHANGED" text.chapter = new_text text.save() model.VersionState(text.title).refresh()
def update_counts(ref=None): """ Update the count records of all texts or the text specfied by ref (currently at book level only) by peforming a count """ if ref: update_text_count(ref) return indices = model.IndexSet() for index in indices: if index.is_commentary(): cRef = "^{} on ".format(index.title) texts = model.VersionSet({"title": {"$regex": cRef}}).distinct("title") for text in texts: update_text_count(text) else: update_text_count(index.title) summaries.update_summaries()
def remove_old_counts(): """ Deletes counts documents which no longer correspond to a text or category. """ # If there are counts documents save in the DB with invalid titles, # instantiation of the Count will cause a BookNameError. # But in this code instantiation happens in the line 'for count in counts' # How do we catch that? Additionally, we need access to the bad title after # The error has occurred. How would we get that? Reverting to direct DB call for now. counts = db.vstate.find({}, {"title": 1}) for count in counts: if count.get("title", None): print "Checking " + count["title"] try: i = model.library.get_index(count["title"]) if model.VersionSet({"title": i.title}).count() == 0: print u"Old count for Commentary with no content: %s" % count["title"] db.vstate.remove({"_id": count["_id"]}) except BookNameError: print u"Old count: %s" % count["title"] db.vstate.remove({"_id": count["_id"]})
import pytest import sefaria.summaries as s import sefaria.model as model import sefaria.system.cache as scache from sefaria.utils.testing_utils import * #create, update, delete, change categories # test that old title goes away on index title change (regular + commentary) # test that no commentator is added # no wandering commentaries """ SOME SETUP """ text_titles = model.VersionSet({}).distinct('title') s.update_table_of_contents() scache.delete_cache_elem('toc_cache') """ THE TESTS """ class Test_Toc(object): def test_toc_integrity(self): toc = s.get_toc() self.recur_toc_integrity(toc) def recur_toc_integrity(self, toc, depth=0): for toc_elem in toc:
print("changing old nikkud history") update_version_title_in_history(default_version_title_vowels, rename_version_title_vowels, language) #change Leningrad versions to have the old default names print("renaming Leningrad taamei hamikra to old default name") update_version_title(new_version_title_cantillaiton, default_version_title_cantillaiton, language) update_version_title_in_history(new_version_title_cantillaiton, default_version_title_cantillaiton, language) print("renaming Leningrad nikkud to old default name") update_version_title(new_version_title_vowels, default_version_title_vowels, language) update_version_title_in_history(new_version_title_vowels, default_version_title_vowels, language) print("renaming Leningrad consonants to an old style default name") update_version_title(new_version_title_consonants, default_version_title_consonants, language) update_version_title_in_history(new_version_title_consonants, default_version_title_consonants, language) #delete old renamed texts print("deleting old texts versions") model.VersionSet({"versionTitle": rename_version_title_cantillation, "language": language, "title" : { '$nin': ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']}}).delete() model.VersionSet({"versionTitle": rename_version_title_vowels, "language": language }).delete() #make sure the new texts are the default. print("un-defaulting Koren") db.texts.update({"versionTitle": rename_version_title_cantillation, "language": language, "title" : { '$in': ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']}}, {"$set": {"status": "locked"}, "$unset": {"priority": ""}}, upsert=False, multi=True, writeConcern={ 'w': 1 }) print("prioritizing and locking Leningrad") db.texts.update({"versionTitle": default_version_title_cantillaiton, "language": language}, {"$set": {"status": "locked", "priority":1}}, upsert=False, multi=True, writeConcern={ 'w': 1 }) print("prioritizing and locking JPS 1917") db.texts.update({"versionTitle": "The Holy Scriptures: A New Translation (JPS 1917)", "language": 'en'}, {"$set": {"status": "locked", "priority":1}}, upsert=False, multi=True, writeConcern={ 'w': 1 })