def test_modify_text_by_function(): original = TextChunk(Ref("Job"), vtitle="The Holy Scriptures: A New Translation (JPS 1917)") total_spaces = JaggedTextArray(original.text).flatten_to_string(joiner="|").count(" ") v = Version({ "language": "en", "title": "Job", "versionSource": "http://foobar.com", "versionTitle": "TextChangeTest", "chapter": original.text }).save() modify_text_by_function("Job", "TextChangeTest", "en", lambda x, sections: x.replace(" ", "$"), 23432) modified = TextChunk(Ref("Job"), vtitle="TextChangeTest") total_dollars = JaggedTextArray(modified.text).flatten_to_string(joiner="|").count("$") v.delete() assert total_dollars > 0 assert total_spaces == total_dollars
def test_modify_text_by_function(): original = TextChunk(Ref("Job"), vtitle="The Holy Scriptures: A New Translation (JPS 1917)") total_spaces = JaggedTextArray(original.text).flatten_to_string(joiner=u"|").count(u" ") v = Version({ "language": "en", "title": "Job", "versionSource": "http://foobar.com", "versionTitle": "TextChangeTest", "chapter": original.text }).save() modify_text_by_function("Job", "TextChangeTest", "en", lambda x: x.replace(u" ", u"$"), 23432) modified = TextChunk(Ref("Job"), vtitle="TextChangeTest") total_dollars = JaggedTextArray(modified.text).flatten_to_string(joiner=u"|").count(u"$") v.delete() assert total_dollars > 0 assert total_spaces == total_dollars
import re from sefaria.model.text import IndexSet from sefaria.helper.text import modify_text_by_function def out_brace(raw_string): temp_string = re.sub(r"\((.*?)\)", r"#\1%", raw_string) temp_string = re.sub(r" [\u05d0-\u05ea]{1,2}\)", "", temp_string) temp_string = temp_string.replace("#", "(") temp_string = temp_string.replace("%", ")") return temp_string tyt = IndexSet({'title': {'$regex': '^Tosafot Yom Tov.*'}}) vtitle = 'Mishnah, ed. Romm, Vilna 1913' for i, dex in enumerate(tyt): print("{}/{}".format(i + 1, tyt.count())) modify_text_by_function(dex.title, vtitle, 'he', out_brace, 23432) dex.versionState().refresh()
import argparse import re from sefaria.model import * from sefaria.helper.text import modify_text_by_function def replace_double_quotes(text): """ :param text: the text to replace :return:a text with all 2 character length double quotes replaced with a single character double quote. """ return text.replace("''", '"') bible_books = library.get_indexes_in_category('Tanach') for book in bible_books: rashi_title = "Rashi on {}".format(book) print rashi_title.encode('utf-8') vs = VersionSet({'title': rashi_title, 'language': 'he'}) for v in vs: modify_text_by_function(rashi_title, v.versionTitle, 'he', replace_double_quotes, 8646, skip_links=True) talmud_books = library.get_indexes_in_category('Talmud') for book in talmud_books: rashi_title = "Rashi on {}".format(book) print rashi_title.encode('utf-8') vs = VersionSet({'title': rashi_title, 'language': 'he'}) for v in vs: modify_text_by_function(rashi_title, v.versionTitle, 'he', replace_double_quotes, 8646, skip_links=True)
""" :param text: the text to replace :return:a text with all 2 character length double quotes replaced with a single character double quote. """ return text.replace("''", '"') bible_books = library.get_indexes_in_category('Tanach') for book in bible_books: rashi_title = "Rashi on {}".format(book) print rashi_title.encode('utf-8') vs = VersionSet({'title': rashi_title, 'language': 'he'}) for v in vs: modify_text_by_function(rashi_title, v.versionTitle, 'he', replace_double_quotes, 8646, skip_links=True) talmud_books = library.get_indexes_in_category('Talmud') for book in talmud_books: rashi_title = "Rashi on {}".format(book) print rashi_title.encode('utf-8') vs = VersionSet({'title': rashi_title, 'language': 'he'}) for v in vs: modify_text_by_function(rashi_title, v.versionTitle, 'he', replace_double_quotes, 8646, skip_links=True)