Esempio n. 1
0
def test_modify_text_by_function():

    original = TextChunk(Ref("Job"), vtitle="The Holy Scriptures: A New Translation (JPS 1917)")
    total_spaces = JaggedTextArray(original.text).flatten_to_string(joiner="|").count(" ")

    v = Version({
        "language": "en",
        "title": "Job",
        "versionSource": "http://foobar.com",
        "versionTitle": "TextChangeTest",
        "chapter": original.text
    }).save()

    modify_text_by_function("Job", "TextChangeTest", "en", lambda x, sections: x.replace(" ", "$"), 23432)
    modified = TextChunk(Ref("Job"), vtitle="TextChangeTest")
    total_dollars = JaggedTextArray(modified.text).flatten_to_string(joiner="|").count("$")
    v.delete()
    assert total_dollars > 0
    assert total_spaces == total_dollars
Esempio n. 2
0
def test_modify_text_by_function():

    original = TextChunk(Ref("Job"), vtitle="The Holy Scriptures: A New Translation (JPS 1917)")
    total_spaces = JaggedTextArray(original.text).flatten_to_string(joiner=u"|").count(u" ")

    v = Version({
        "language": "en",
        "title": "Job",
        "versionSource": "http://foobar.com",
        "versionTitle": "TextChangeTest",
        "chapter": original.text
    }).save()

    modify_text_by_function("Job", "TextChangeTest", "en", lambda x: x.replace(u" ", u"$"), 23432)
    modified = TextChunk(Ref("Job"), vtitle="TextChangeTest")
    total_dollars = JaggedTextArray(modified.text).flatten_to_string(joiner=u"|").count(u"$")
    v.delete()
    assert total_dollars > 0
    assert total_spaces == total_dollars
Esempio n. 3
0
import re
from sefaria.model.text import IndexSet
from sefaria.helper.text import modify_text_by_function


def out_brace(raw_string):
    temp_string = re.sub(r"\((.*?)\)", r"#\1%", raw_string)
    temp_string = re.sub(r" [\u05d0-\u05ea]{1,2}\)", "", temp_string)
    temp_string = temp_string.replace("#", "(")
    temp_string = temp_string.replace("%", ")")
    return temp_string


tyt = IndexSet({'title': {'$regex': '^Tosafot Yom Tov.*'}})
vtitle = 'Mishnah, ed. Romm, Vilna 1913'

for i, dex in enumerate(tyt):
    print("{}/{}".format(i + 1, tyt.count()))
    modify_text_by_function(dex.title, vtitle, 'he', out_brace, 23432)
    dex.versionState().refresh()
import argparse
import re
from sefaria.model import *
from sefaria.helper.text import modify_text_by_function


def replace_double_quotes(text):
    """
    :param text: the text to replace
    :return:a text with all 2 character length double quotes replaced with a single character double quote.
    """
    return text.replace("''", '"')



bible_books = library.get_indexes_in_category('Tanach')
for book in bible_books:
    rashi_title = "Rashi on {}".format(book)
    print rashi_title.encode('utf-8')
    vs = VersionSet({'title': rashi_title, 'language': 'he'})
    for v in vs:
        modify_text_by_function(rashi_title, v.versionTitle, 'he', replace_double_quotes, 8646, skip_links=True)

talmud_books = library.get_indexes_in_category('Talmud')
for book in talmud_books:
    rashi_title = "Rashi on {}".format(book)
    print rashi_title.encode('utf-8')
    vs = VersionSet({'title': rashi_title, 'language': 'he'})
    for v in vs:
        modify_text_by_function(rashi_title, v.versionTitle, 'he', replace_double_quotes, 8646, skip_links=True)
Esempio n. 5
0
    """
    :param text: the text to replace
    :return:a text with all 2 character length double quotes replaced with a single character double quote.
    """
    return text.replace("''", '"')


bible_books = library.get_indexes_in_category('Tanach')
for book in bible_books:
    rashi_title = "Rashi on {}".format(book)
    print rashi_title.encode('utf-8')
    vs = VersionSet({'title': rashi_title, 'language': 'he'})
    for v in vs:
        modify_text_by_function(rashi_title,
                                v.versionTitle,
                                'he',
                                replace_double_quotes,
                                8646,
                                skip_links=True)

talmud_books = library.get_indexes_in_category('Talmud')
for book in talmud_books:
    rashi_title = "Rashi on {}".format(book)
    print rashi_title.encode('utf-8')
    vs = VersionSet({'title': rashi_title, 'language': 'he'})
    for v in vs:
        modify_text_by_function(rashi_title,
                                v.versionTitle,
                                'he',
                                replace_double_quotes,
                                8646,
                                skip_links=True)