コード例 #1
0
def copy_from_local():
    query = {"type": "sifrei mitzvot"}
    linkset = LinkSet(query)
    links = [l.contents() for l in linkset]
    # for link in links:
    #     for i, ref in enumerate(link["refs"]):
    #         if re.search("Sefer HaMitzvot", ref):
    #             link["refs"][i] = "Sefer HaMitzvot LaRambam"
    #             break

    # for link in links:
    #     ref_strings = link["refs"]
    #     for k, ref in enumerate(ref_strings):
    #         if text.Ref(ref).primary_category == u'Tanakh':  ## carfull Tanakh catagory refers also to Tanakh commentarys!
    #             newrefs = ref_strings[:]
    #             newrefs[k] = text.Ref(ref_strings[k]).section_ref().normal()
    #             broadLink = Link().load({'refs': [newrefs[k], newrefs[(k + 1) % 2]]})
    #             if broadLink:
    #                 # raise DuplicateRecordError(u"more than one broader link exists: {} - {}".format(broadLink[0].refs[0], broadLink[0].refs[1])
    #                 #
    #                 # tracker.delete(user, broadLink,)
    #                 broadLink.delete()
    #                 print 'deleting Link {} {}'.format(broadLink.refs[0], broadLink.refs[1])
    post_link(links, VERBOSE=True)
    return links
コード例 #2
0
def create_links(ls_ja):
    list_of_links = []
    for perek_index, perek in enumerate(ls_ja):
        for mishna_index, mishna in enumerate(perek):
            for comment_index, comment in enumerate(mishna):
                list_of_links.append(create_link_dicttionary(perek_index+1, mishna_index+1, comment_index+1))
    functions.post_link(list_of_links)
コード例 #3
0
def parse_and_upload():
    cards = get_cards()
    links = []
    for card in cards:
        node = JaggedArrayNode()
        node.add_title(card, 'en', primary=True)
        node.add_title(u'רמב"ם ' + Ref(card.replace('Rambam ', '')).he_normal(), 'he', primary=True)
        node.key = card
        node.depth = 3
        node.addressTypes = ['Integer', 'Integer', 'Integer']
        node.sectionNames = ['Chapter', 'Mishnah', 'Comment']
        node.validate()
        node.toc_zoom = 2

        index = {
            'title': card,
            'categories': ['Commentary2', 'Mishnah', 'Rambam'],
            'schema': node.serialize(),
        }

        parsed = parser(card)
        links.extend(parsed['links'])
        version = {
            'versionTitle': u'Vilna Edition',
            'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957',
            'language': 'he',
            'text': parsed['parsed']
        }
        print 'posting {}'.format(card)
        post_index(index)
        post_text(card, version, index_count='on')
    post_link(links)
コード例 #4
0
def postLinks(listOfPotentialLink, badLinksFile, goodLinksFile):
    for eachLink in listOfPotentialLink:
        if Ref.is_ref(eachLink['refs'][1]):
            functions.post_link(eachLink)
            goodLinksFile.write(eachLink['refs'][0] + ' linked with ' + eachLink['refs'][1] + '\r\n')
        else:
            badLinksFile.write(eachLink['refs'][0] + ' linked with ' + eachLink['refs'][1] + '\r\n')
コード例 #5
0
ファイル: chinuch.py プロジェクト: JonMosenkis/Sefaria-Data
def post():
    minchat = {'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename)}
    sefer = {'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text}

    chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>')

    with codecs.open('links.txt', 'w', 'utf-8') as outfile:
        for each_link in chinukh_links:
            outfile.write(u'{}\n'.format(each_link['refs']))

    alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv')

    cleaned = util.clean_jagged_array(minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}',
                                      u'\n', u'\r'])
    with codecs.open('parsed.txt', 'w', 'utf-8') as outfile:
        util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph'])

    full_text = {
        'versionTitle': 'Minchat Chinuch, Piotrków, 1902',
        'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092',
        'language': 'he',
        'text': cleaned
    }

    index = construct_index(alt)
    functions.post_index(index)
    functions.post_text('Minchat Chinuch', full_text)
    functions.post_link(chinukh_links)
コード例 #6
0
def upload():
    links = []
    for tractate in cards:
        he_name = Ref(' '.join(tractate.split()[1:])).he_normal()
        he_name = u'רמב"ם {}'.format(he_name)

        node = JaggedArrayNode()
        node.add_title(tractate, 'en', primary=True)
        node.add_title(he_name, 'he', primary=True)
        node.key = tractate
        node.depth = 3
        node.addressTypes = ['Integer', 'Integer', 'Integer']
        node.sectionNames = ['Chapter', 'Mishnah', 'Comment']
        node.validate()

        index = {
            'title': tractate,
            'categories': ['Commentary2', 'Mishnah', 'Rambam'],
            'schema': node.serialize(),
            'toc_zoom': 2
        }

        parsed = parse_file('{}.txt'.format(tractate))
        links.extend(parsed['links'])
        version = {
            'versionTitle': u'Vilna Edition',
            'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957',
            'language': 'he',
            'text': parsed['parsed text']
        }
        print 'posting {}'.format(tractate)
        post_index(index)
        post_text(tractate, version, index_count='on')
    post_link(links)
コード例 #7
0
 def post_links(self):
     links = [{
         'refs': [l[0], l[1]],
         'type': 'commentary',
         'auto': True,
         'generated_by': 'Divrei Emet linker'
     } for l in self.stored_links]
     post_link(links)
コード例 #8
0
def post_simple_commentaries():
    ramban_node, rasag_node = JaggedArrayNode(), JaggedArrayNode()
    ramban_text = parse_general('yitzira_ramban.txt')
    rasag_text = parse_general('yitzira_rasag.txt')

    ramban_node.add_title("Ramban on Sefer Yetzirah", 'en', primary=True)
    ramban_node.add_title(u'רמב"ן על ספר יצירה', 'he', primary=True)
    ramban_node.key = "Ramban on Sefer Yetzirah"
    ramban_node.addressTypes = ['Integer', 'Integer', 'Integer']
    ramban_node.sectionNames = ["Chapter", "Mishnah", "Comment"]
    ramban_node.toc_zoom = 2
    ramban_node.depth = 3
    ramban_node.validate()

    rasag_node.add_title("Rasag on Sefer Yetzirah", 'en', primary=True)
    rasag_node.add_title(u'רס"ג על ספר יצירה', 'he', primary=True)
    rasag_node.key = "Rasag on Sefer Yetzirah"
    rasag_node.addressTypes = ['Integer', 'Integer', 'Integer']
    rasag_node.sectionNames = ["Chapter", "Mishnah", "Comment"]
    rasag_node.toc_zoom = 2
    rasag_node.depth = 3
    rasag_node.validate()

    ramban_index = {
        "title": "Ramban on Sefer Yetzirah",
        "categories": ["Commentary2", "Kabbalah", "Ramban"],
        "language": "he",
        "schema": ramban_node.serialize()
    }
    post_index(ramban_index)
    post_text(
        "Ramban on Sefer Yetzirah", {
            'versionTitle': 'Ramban on Sefer Yetzirah, Warsaw 1884',
            'versionSource':
            'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968',
            'language': 'he',
            'text': ramban_text
        })

    rasag_index = {
        "title": "Rasag on Sefer Yetzirah",
        "categories": ["Commentary2", "Kabbalah", "Rasag"],
        "language": "he",
        "schema": rasag_node.serialize()
    }
    post_index(rasag_index)
    post_text(
        "Rasag on Sefer Yetzirah", {
            'versionTitle': 'Rasage on Sefer Yetzirah, Warsaw 1884',
            'versionSource':
            'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968',
            'language': 'he',
            'text': rasag_text
        })
    links = linker(ramban_text, "Ramban on Sefer Yetzirah")
    links.extend(linker(rasag_text, "Rasag on Sefer Yetzirah"))
    post_link(links)
コード例 #9
0
ファイル: parse_dcxml.py プロジェクト: YairRand/Sefaria-Data
 def post(self):
     for index in self.base_indices:
         post_index(index, weak_network=True)
     for index in self.commentaryIndices:
         post_index(index)
     for version in self.versionList:
         print version['ref']
         post_text(version['ref'], version['version'], index_count='on', weak_network=True)
     post_link(self.linkSet)
コード例 #10
0
def save_links_post_request(category):
    query = {
        "generated_by": "mesorat_hashas.cs (Dicta) {}".format(category),
        "auto": True,
        "type": "Automatic Mesorat HaShas"
    }
    ls = LinkSet(query)
    links = [l.contents() for l in ls]
    post_link(links)
コード例 #11
0
def create_links(gra_ja):
    list_of_links = []
    for perek_index, perek in enumerate(gra_ja):
        for mishna_index, mishna in enumerate(perek):
            for comment_index, comment in enumerate(mishna):
                list_of_links.append(
                    create_link_dicttionary(perek_index + 1, mishna_index + 1,
                                            comment_index + 1))
    functions.post_link(list_of_links)
コード例 #12
0
ファイル: raavad_parse.py プロジェクト: YairRand/Sefaria-Data
def main():
    text_dict = raavad_parse()

    post_raavad_index()

    post_raavad_text(text_dict)
    # save to mongo the links text <-> raavad.
    post_link(linking(text_dict['old_parsing_of_perush']))
    # save to mongo links 32 <-> perush 32
    post_link(link_32())
コード例 #13
0
def post_the_text(jagged_array, title_counter):
    ref = create_ref(title_counter)
    text = create_text(jagged_array)
    if title_counter > 0:
        list_of_links = create_links(jagged_array, title_counter)
    #The post_text must be after the creation of the links because create_links() changes the actual text
    functions.post_text(ref, text)
    if title_counter > 0:
        print 1
        functions.post_link(list_of_links)
コード例 #14
0
def main():
    text_dict = raavad_parse()

    post_raavad_index()

    post_raavad_text(text_dict)
    # save to mongo the links text <-> raavad.
    post_link(linking(text_dict['old_parsing_of_perush']))
    # save to mongo links 32 <-> perush 32
    post_link(link_32())
コード例 #15
0
def save_links_post_request(category):
    query = {"generated_by": "mesorat_hashas.py {}".format(category), "auto": True,
             "type": "Automatic Mesorat HaShas"}
    ls = LinkSet(query)
    links = [l.contents() for l in ls]
    i = 0
    while i < len(links):
        print "Posting [{}:{}]".format(i, i+4999)
        post_link(links[i:i+5000])
        i += 5000
        pytime.sleep(10)
コード例 #16
0
def post_ein_mishpat(massekhet):
    query = {"generated_by":"Ein Mishpat Cluster {}".format(massekhet)}
    # query_talmud = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet,massekhet)}
    # query_tush = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet)}
    # query_rambam = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet)}
    # query_semag = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet)}
    linkset = LinkSet(query)
    links = [l.contents() for l in linkset]
    # for l in links:
    #     l["generated_by"] = "Ein Mishpat Cluster"
    post_link(links)
    return links
コード例 #17
0
def post():
    parsed = parse('targum.txt')
    for i in range(1, 3):
        functions.post_index(build_index(i))
        version = {
            'versionTitle': 'Wikisource Aramaic Targum to Chronicles',
            'versionSource': url,
            'language': 'he',
            'text': parsed[i-1]
        }
        functions.post_text('Aramaic Targum to {} Chronicles'.format('I' * i), version)
    functions.post_link(build_links(parsed))
コード例 #18
0
def post_simple_commentaries():
    ramban_node, rasag_node = JaggedArrayNode(), JaggedArrayNode()
    ramban_text = parse_general('yitzira_ramban.txt')
    rasag_text = parse_general('yitzira_rasag.txt')

    ramban_node.add_title("Ramban on Sefer Yetzirah", 'en', primary=True)
    ramban_node.add_title(u'רמב"ן על ספר יצירה', 'he', primary=True)
    ramban_node.key = "Ramban on Sefer Yetzirah"
    ramban_node.addressTypes = ['Integer', 'Integer', 'Integer']
    ramban_node.sectionNames = ["Chapter", "Mishnah", "Comment"]
    ramban_node.toc_zoom = 2
    ramban_node.depth = 3
    ramban_node.validate()

    rasag_node.add_title("Rasag on Sefer Yetzirah", 'en', primary=True)
    rasag_node.add_title(u'רס"ג על ספר יצירה', 'he', primary=True)
    rasag_node.key = "Rasag on Sefer Yetzirah"
    rasag_node.addressTypes = ['Integer', 'Integer', 'Integer']
    rasag_node.sectionNames = ["Chapter", "Mishnah", "Comment"]
    rasag_node.toc_zoom = 2
    rasag_node.depth = 3
    rasag_node.validate()

    ramban_index = {
        "title": "Ramban on Sefer Yetzirah",
        "categories": ["Commentary2", "Kabbalah", "Ramban"],
        "language": "he",
        "schema": ramban_node.serialize()
    }
    post_index(ramban_index)
    post_text("Ramban on Sefer Yetzirah", {
        'versionTitle': 'Ramban on Sefer Yetzirah, Warsaw 1884',
        'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968',
        'language': 'he',
        'text': ramban_text
    })

    rasag_index = {
        "title": "Rasag on Sefer Yetzirah",
        "categories": ["Commentary2", "Kabbalah", "Rasag"],
        "language": "he",
        "schema": rasag_node.serialize()
    }
    post_index(rasag_index)
    post_text("Rasag on Sefer Yetzirah", {
        'versionTitle': 'Rasage on Sefer Yetzirah, Warsaw 1884',
        'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968',
        'language': 'he',
        'text': rasag_text
    })
    links = linker(ramban_text, "Ramban on Sefer Yetzirah")
    links.extend(linker(rasag_text, "Rasag on Sefer Yetzirah"))
    post_link(links)
コード例 #19
0
def post_ein_mishpat(massekhet):
    query = {"generated_by": "Ein Mishpat Cluster {}".format(massekhet)}
    # query_talmud = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet,massekhet)}
    # query_tush = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet)}
    # query_rambam = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet)}
    # query_semag = {''' "generated_by": "Ein Mishpat Cluster {}", $and: [ {{ "refs.0": /.*{}.*/i }} ] '''.format(massekhet)}
    linkset = LinkSet(query)
    links = [l.contents() for l in linkset]
    # for l in links:
    #     l["generated_by"] = "Ein Mishpat Cluster"
    post_link(links)
    return links
コード例 #20
0
def save_links_post_request(category):
    query = {
        "generated_by": "mesorat_hashas.py {}".format(category),
        "auto": True,
        "type": "Automatic Mesorat HaShas"
    }
    ls = LinkSet(query)
    links = [l.contents() for l in ls]
    i = 0
    while i < len(links):
        print "Posting [{}:{}]".format(i, i + 4999)
        post_link(links[i:i + 5000])
        i += 5000
        pytime.sleep(10)
コード例 #21
0
def post():
    post_index(construct_index())
    base_text = restructure_text()
    links = build_links(base_text)
    version = {
        'versionTitle': u'Derech Chaim, Maharal',
        'versionSource': u'http://mobile.tora.ws/',
        'language': 'he',
        'text': base_text
    }
    post_text("Derech Chaim", version)
    version['text'] = get_intro()
    post_text("Derech Chaim, Author's Introduction", version, index_count='on')
    post_link(links)
コード例 #22
0
 def post(self):
     for index in self.base_indices:
         post_index(index, weak_network=True)
     for he_author in self.commentarySchemas.keys():
         en_author = DCXMLsubs.commentatorNames[he_author]
         index = {
             'title': en_author,
             'categories': ['Commentary2', 'Masechtot Ketanot', en_author],
             'schema': self.commentarySchemas[he_author].serialize()
         }
         post_index(index)
     for version in self.versionList:
         post_text(version['ref'], version['version'], index_count='on', weak_network=True)
     post_link(self.linkSet)
コード例 #23
0
def post():
    post_index(construct_index())
    base_text = restructure_text()
    links = build_links(base_text)
    version = {
        'versionTitle': u'Derech Chaim, Maharal',
        'versionSource': u'http://mobile.tora.ws/',
        'language': 'he',
        'text': base_text
    }
    post_text("Derech Chaim", version)
    version['text'] = get_intro()
    post_text("Derech Chaim, Author's Introduction", version, index_count='on')
    post_link(links)
コード例 #24
0
def post_unambiguous_links(post=False):
    links = []
    with open(ROOT + "/unambiguous_links.csv", "r") as fin:
        cin = csv.DictReader(fin)
        for row in cin:
            link = {
                "generated_by": "link_disambiguator",
                "auto": True,
                "type": "",
                "refs": [row["Quoting Ref"], row["Quoted Ref"]]
            }
            links += [link]
    print("Total Links: {}".format(len(links)))
    if post:
        i = 0
        batch_size = 50
        while i < len(links):
            print("Posting [{}:{}]".format(i, i + batch_size - 1))
            print(post_link(links[i:i + batch_size]))
            i += batch_size
    else:
        for link_obj in tqdm(links):
            try:
                Link(link_obj).save()
            except DuplicateRecordError:
                pass  # poopy
コード例 #25
0
def post():
    books = file_to_books()
    for book in library.get_indexes_in_category('Torah'):
        books[book] = align_text(books[book], u'@\u05e4\u05e8\u05e7 [\u05d0-\u05ea]{1,2}', u'[0-9]{1,2}\.')

    functions.post_index(build_index())
    node_names = ['Introduction'] + library.get_indexes_in_category('Torah')
    for name in node_names:
        version = {
            'versionTitle': 'Tafsir al-Torah bi-al-Arabiya, Paris, 1893',
            'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001863864',
            'language': 'he',
            'text': books[name]
        }
        functions.post_text('Tafsir Rasag, {}'.format(name), version)

    functions.post_link(build_links(books))
コード例 #26
0
ファイル: tafsir_rasag.py プロジェクト: maxrabin/Sefaria-Data
def post():
    books = file_to_books()
    for book in library.get_indexes_in_category('Torah'):
        books[book] = align_text(books[book],
                                 u'@\u05e4\u05e8\u05e7 [\u05d0-\u05ea]{1,2}',
                                 u'[0-9]{1,2}\.')

    functions.post_index(build_index())
    node_names = ['Introduction'] + library.get_indexes_in_category('Torah')
    for name in node_names:
        version = {
            'versionTitle': 'Tafsir al-Torah bi-al-Arabiya, Paris, 1893',
            'versionSource':
            'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001863864',
            'language': 'he',
            'text': books[name]
        }
        functions.post_text('Tafsir Rasag, {}'.format(name), version)

    functions.post_link(build_links(books))
コード例 #27
0
ファイル: chinuch.py プロジェクト: smontagu/Sefaria-Data
def post():
    minchat = {
        'name': 'Minchat Chinuch',
        'text': produce_parsed_data(filename)
    }
    sefer = {
        'name': 'Sefer HaChinukh',
        'text': Ref('Sefer HaChinukh').text('he').text
    }

    chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>')

    with codecs.open('links.txt', 'w', 'utf-8') as outfile:
        for each_link in chinukh_links:
            outfile.write(u'{}\n'.format(each_link['refs']))

    alt = construct_alt_struct('Chinukh_by_Parsha.csv',
                               'Chinukh Mitzva names.csv')

    cleaned = util.clean_jagged_array(
        minchat['text'],
        [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r'])
    with codecs.open('parsed.txt', 'w', 'utf-8') as outfile:
        util.jagged_array_to_file(outfile, cleaned,
                                  [u'Mitzva', u'Seif', u'Paragraph'])

    full_text = {
        'versionTitle': 'Minchat Chinuch, Piotrków, 1902',
        'versionSource':
        'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092',
        'language': 'he',
        'text': cleaned
    }

    index = construct_index(alt)
    functions.post_index(index)
    functions.post_text('Minchat Chinuch', full_text)
    functions.post_link(chinukh_links)
コード例 #28
0
def post_index_text_links():
    tracs = library.get_indexes_in_category('Mishnah')
    parsed = parse_files()
    link_refs = [collect_links(tractate) for tractate in tracs]
    full_links = build_links(link_refs)
    for linker in full_links:
        add_dh_to_text(linker, parsed)
    for num, data in enumerate(sorted(parsed.keys())):
        print num + 1, data
        for attempt in range(3):
            try:
                upload(parsed[data], True)
            except URLError:
                print 'handling weak network'
            else:
                break
        else:
            raise URLError
        # util.ja_to_xml(parsed[data]['data'].array(), ['chapter', 'comment', 'line'])
        # break
    functions.post_link(full_links)

    os.remove('errors.html')
コード例 #29
0
ファイル: chizkuni.py プロジェクト: smontagu/Sefaria-Data
def add_links(full_text, upload=False):
    """
    :param full_text: Data structure from parse_text()
    :param upload: set to True, otherwise function will do nothing
    """

    if not upload:
        return

    for book in full_text.keys():
        for chap_index, chapter in enumerate(full_text[book]):
            for verse_index, verse in enumerate(chapter):
                for comment in xrange(len(verse)):

                    post_link({
                        'refs':[
                            '{}.{}.{}'.format(book, chap_index+1, verse_index+1),
                            'Chizkuni,_{}.{}.{}.{}'.format(book, chap_index+1, verse_index+1, comment+1)
                        ],
                        'type': 'commentary',
                        'auto': True,
                        'generated_by': 'Chizkuni linker'
                    })
コード例 #30
0
def post_links():
    from sources.functions import post_link
    from sefaria.system.exceptions import DuplicateRecordError
    with open("research/parallel_matcher_scripts/final_selichot_links.csv",
              "r") as fin:
        c = csv.DictReader(fin)
        links = []
        for row in c:
            if row["Keep?"] == "Yes":
                links += [{
                    "refs": [row["Tanakh Ref"], row["Selichot Ref"]],
                    "auto":
                    True,
                    "generated_by":
                    "selichot_edot_hamizrach_parallel_matcher"
                }]
        for l in links:
            link = Link(l)
            try:
                link.save()
            except DuplicateRecordError:
                print("DUP", l)
        post_link(links)
コード例 #31
0
def post_index_text_links():
    tracs = library.get_indexes_in_category('Mishnah')
    parsed = parse_files()
    link_refs = [collect_links(tractate) for tractate in tracs]
    full_links = build_links(link_refs)
    for linker in full_links:
        add_dh_to_text(linker, parsed)
    for num, data in enumerate(sorted(parsed.keys())):
        print num+1, data
        for attempt in range(3):
            try:
                upload(parsed[data], True)
            except URLError:
                print 'handling weak network'
            else:
                break
        else:
            raise URLError
        # util.ja_to_xml(parsed[data]['data'].array(), ['chapter', 'comment', 'line'])
        # break
    functions.post_link(full_links)

    os.remove('errors.html')
コード例 #32
0
def parse_and_upload():
    cards = get_cards()
    links = []
    for card in cards:
        node = JaggedArrayNode()
        node.add_title(card, 'en', primary=True)
        node.add_title(u'רמב"ם ' +
                       Ref(card.replace('Rambam ', '')).he_normal(),
                       'he',
                       primary=True)
        node.key = card
        node.depth = 3
        node.addressTypes = ['Integer', 'Integer', 'Integer']
        node.sectionNames = ['Chapter', 'Mishnah', 'Comment']
        node.validate()
        node.toc_zoom = 2

        index = {
            'title': card,
            'categories': ['Commentary2', 'Mishnah', 'Rambam'],
            'schema': node.serialize(),
        }

        parsed = parser(card)
        links.extend(parsed['links'])
        version = {
            'versionTitle': u'Vilna Edition',
            'versionSource':
            'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957',
            'language': 'he',
            'text': parsed['parsed']
        }
        print 'posting {}'.format(card)
        post_index(index)
        post_text(card, version, index_count='on')
    post_link(links)
コード例 #33
0
ファイル: gra_parse.py プロジェクト: maxrabin/Sefaria-Data
        "refs": [
            "HaGra on Sefer Yetzirah Gra Version " +
            '%d:%d:%d' % tuple(x + 1 for x in dh['indices']),
            "Sefer Yetzirah Gra Version " +
            '%d:%d' % tuple(x + 1 for x in dh['indices'][:2]),
        ],
        "type":
        "commentary",
        "auto":
        True,
        "generated_by":
        "gra_parse"
    })
    dh_text = dh['data']
    # append to links list
    gra_links.append(link)

# shave off the last link of "slik" shpuldn't be linked in
gra_links.pop()

# save to mongo the list of dictionaries.
post_link(gra_links)

# link_ofen = (
#             {
#             "refs": [
#                 "Pri Yitzhak on Sefer Yetzirah " + '%d:%d:%d' %tuple(x+1 for x in dh['indices']),
#                 "Sefer Yetzirah Ari Version " + '%d:%d' %tuple(x+1 for x in dh['indices'][:2]),
#             ],
#             "type": "reference"
#         })
コード例 #34
0
# -*- coding: utf-8 -*-
import codecs
import regex
from sefaria.model import *
from sources import functions
from data_utilities import util
from sources.Targum_Jerusalem_Hebrew import tjh_functions

english_names = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy']
index = tjh_functions.create_index_record()
functions.post_index(index)

all_of_humash = tjh_functions.parse()

for book, book_name in zip(all_of_humash, english_names):
    ref = 'Targum Jerusalem, {}'.format(book_name)
    text = tjh_functions.create_text(book)
    functions.post_text(ref, text)

list_of_links = tjh_functions.create_links(all_of_humash)
functions.post_link(list_of_links)

testing_file = codecs.open("testing_file.txt", 'w', 'utf-8')
util.jagged_array_to_file(testing_file, all_of_humash, ['Book', 'Chapter', 'Verse'])
testing_file.close()
コード例 #35
0
        else:
            raise AssertionError("{} has {} comments".format(comment_ref.normal(), len(comment_links)))
    return {'add': add, 'remove': remove}

server = 'http://*****:*****@55([\u05d0-\u05ea]{1,3})', u"Magen Avraham")
        to_add.extend(result['add'])
        to_remove.extend(result['remove'])

for i in to_remove:
    r = requests.delete('{}/api/links/{}'.format(server, i._id))
    print r.text

to_add = [{
    'refs': i,
    'type': 'commentary',
    'auto': True,
    'generated_by': 'Vilna Link Fixer'
} for i in to_add]
r = post_link(to_add, server=server)
コード例 #36
0
         first_b = x.find("<b>")
         second_b = x.find("</b>")
         x = x[first_b+3:second_b]
     x = x.replace(u"<b>", u"").replace(u"</b>", u"").replace(u"אלקים", u"אלהים").replace(u" כו", u"")
     if u"וכו'" in x:
         x = x.split(u"וכו'", 1)[0]
     return u" ".join(x.split(u" ")[0:10]).strip()
 base_tokenizer = lambda x: [x for x in x.split()]
 index = library.get_index("Maor VaShemesh")
 section_refs = index.all_section_refs()
 current_book = "Genesis"
 ls = []
 for section in section_refs:
     print section
     section_text = section.text('he').text
     # section_dh_text = [get_dh(line) if "<b>" in line and "</b>" in line for text in section_text for line in text]
     ja = section.index_node
     ja_title = ja.get_primary_title()
     parasha = db.parshiot.find({"parasha": ja_title})
     if list(parasha) != []:
         parasha = list(db.parshiot.find({"parasha": ja_title}))[0]
         current_parasha = parasha["ref"].split()[0]
         tc_current_book = Ref(current_parasha).text('he')
         matches = match_ref(tc_current_book, section_text, base_tokenizer, dh_extract_method=get_dh,
                             word_threshold=0.35, char_threshold=0.26)["matches"]
         for i, match in enumerate(matches):
             if match:
                 link = {"refs": [match.normal(), "{} {}".format(section.normal(), i+1)], "auto": True, "type": "Commentary", "generated_by": "maor_vashemesh"}
                 ls.append(link)
 post_link(ls, server="http://proto.sefaria.org")
コード例 #37
0
            'versionSource': 'http://www.toratemetfreeware.com/',
            'language': 'he',
            'text': parsed_data[book]
        }
        functions.post_text('Siftei Hakhamim, {}'.format(book), version)


def manual_links():
    """
    Some links had to be created manually by the content team. The refs to link were saved in a
    csv
    :return: Json object of links parsed from the aforementioned csv.
    """

    with open('siftei hakhamim manual links.csv') as infile:
        csv_reader = ucsv.reader(infile, delimiter=';')
        links = [{'refs': [ref[0], ref[1]],
                  'type': 'commentary',
                  'auto': False,
                  'generated_by': 'Sefaria Content Team'}
                 for ref in csv_reader]
    return links


parsed = parse_multiple()
slinks = generate_links(parsed)
functions.post_index(build_index())
post_text(parsed)
functions.post_link(slinks)
functions.post_link(manual_links())
コード例 #38
0
def post_all_smk(ja_smk, ja_raph, ja_hagahot, raph_links, hg_links):
    post_smk(ja_smk)
    post_raph(ja_raph)
    post_link(raph_links)
    post_hagahot(ja_hagahot)
    post_link(hg_links)
コード例 #39
0
    schema.validate()

    index = {
        "title": "Malbim on Genesis",
        "collective_title": "Malbim",
        "base_text_titles": ["Genesis"],
        "categories": ["Tanakh", "Torah", "Commentary", "Malbim"],
        "schema": schema.serialize()
    }

    return index


def upload_text(parser):

    assert isinstance(parser, Malbim)
    book = parser.parsed_text
    version = {
            "versionTitle": "Malbim, Vilna Romm, 1892.",
            "versionSource": 'http://dlib.rsl.ru/viewer/01006563898#?page=1',
            "language": 'he',
            "text": book
        }
    functions.post_text("Malbim on Genesis", version, index_count='on')


malbim = Malbim(path)
functions.post_index(build_index(malbim))
upload_text(malbim)
functions.post_link(build_links(malbim))
コード例 #40
0
def save_links_post_request():
    query = {"generated_by": "mesorat_hashas.py", "auto": True, "type": "Automatic Mesorat HaShas"}
    ls = LinkSet(query)
    links = [l.contents() for l in ls]
    post_link(links)
コード例 #41
0
ファイル: parse.py プロジェクト: JonMosenkis/Sefaria-Data
def post_links(links):
    functions.post_link(links)
コード例 #42
0
            'language': 'he',
            'text': parsed_data[book]
        }
        functions.post_text('Siftei Hakhamim, {}'.format(book), version)


def manual_links():
    """
    Some links had to be created manually by the content team. The refs to link were saved in a
    csv
    :return: Json object of links parsed from the aforementioned csv.
    """

    with open('siftei hakhamim manual links.csv') as infile:
        csv_reader = ucsv.reader(infile, delimiter=';')
        links = [{
            'refs': [ref[0], ref[1]],
            'type': 'commentary',
            'auto': False,
            'generated_by': 'Sefaria Content Team'
        } for ref in csv_reader]
    return links


parsed = parse_multiple()
slinks = generate_links(parsed)
functions.post_index(build_index())
post_text(parsed)
functions.post_link(slinks)
functions.post_link(manual_links())
コード例 #43
0
                            version,
                            index_count="on",
                            server=user_args.server)
    else:
        functions.post_text(book_name,
                            version,
                            index_count="on",
                            server=user_args.server)

    flags = dict(
        versionTitleInHebrew=u'אשלי רברבי: שלחן ערוך יורה דעה, למברג תרמ"ח')
    if user_args.title is None:
        flags['priority'] = 2
    functions.post_flags(
        dict(ref=book_name, lang='he', vtitle=version['versionTitle']), flags,
        user_args.server)

    if links:
        functions.post_link(links, server=user_args.server)

    if user_args.no_slack:
        pass
    else:
        requests.post(os.environ["SLACK_URL"],
                      json={
                          "text":
                          u"{} Upload Complete".format(
                              user_args.title if user_args.
                              title else u"Shulchan Arukh, Yoreh De'ah")
                      })
コード例 #44
0
def save_links_post_request():
    query = {"generated_by":"dibur_hamatchil_matcher review","auto":False}
    ls = LinkSet(query)
    links = [l.contents() for l in ls]
    post_link(links)
コード例 #45
0
        node.sectionNames = ['Chapter', 'Verse', 'Comment']
        node.toc_zoom = 2
        record.append(node)
    record.validate()

    index = {
        "title": "Baal HaTurim",
        "categories": ["Commentary2", "Torah", "Baal HaTurim"],
        "schema": record.serialize()
    }
    return index


def post_text(parsed_data):

    for book in library.get_indexes_in_category('Torah'):
        version = {
            'versionTitle': 'Baal HaTurim',
            'versionSource': 'http://www.toratemetfreeware.com/',
            'language': 'he',
            'text': parsed_data[book]
        }
        functions.post_text('Baal HaTurim, {}'.format(book), version)

parsed = parse_multiple()
links = linker(parsed)
index = build_index()
functions.post_index(index)
post_text(parsed)
functions.post_link(links)
コード例 #46
0
    with codecs.open('Manual links.txt', 'r', 'utf-8') as file_obj:
        manual_links_str =  file_obj.read()

    manual_links = re.split(r'\n', manual_links_str)

    for item in manual_links:
        match = {
            'refs': [
                f'Eliyah Rabbah on Shulchan Arukh, Orach Chayim {item[:5]}',
                f'Shulchan Arukh, Orach Chayim {item[6:]}'
            ],
            'auto': True,
            'generated_by': 'Parse_ER - Manual Links',
            'type': 'commentary',
        }
        links.append(match)

    #286:10 - 286:3
    match = {
        'refs': [
            'Eliyah Rabbah on Shulchan Arukh, Orach Chayim 286:10',
            'Shulchan Arukh, Orach Chayim 286:3'
        ],
        'auto': True,
        'generated_by': 'Parse_ER - Manual Links',
        'type': 'commentary',
    }
    links.append(match)

    post_link(links, server=server)
コード例 #47
0
 ja_smk = parse_semak('Semak.txt')
 # # siman_page = map_semak_page_siman(ja_smk, to_print=True)
 # letter_ja = parse_Raph_by_letter(u'Raph_on_Semak.txt')
 # raph_smk_alignment = raph_alignment_report(ja_smk, letter_ja)
 # ja_raph = parse_Raph_simanim(raph_smk_alignment)
 # # # post_raph(ja_raph)
 # # # link_raph(ja_raph)  # try to find where this is coming from
 # raph = parse_Raph_by_letter('Raph_on_Semak.txt')
 # raph_links = link_raph(ja_smk, ja_raph)
 # ja_hagahot = parse_hagahot_by_letter(u'Semak_hagahot_chadashot.txt')
 # hgh_align = hagahot_alignment(ja_smk, ja_raph, ja_hagahot)
 # ja_hagahot = hagahot_parse(ja_hagahot, hgh_align)
 # hg_links = link_hg(ja_hagahot, hgh_align, ja_raph)
 #
 # # post_all_smk(ja_smk, ja_raph, ja_hagahot, raph_links, hg_links)
 # smg_links = link_smg(ja_smk, u'smg_smk_test')
 # post_link(smg_links, VERBOSE=True)
 post_link(link_remazim(), VERBOSE=True)
 # remazim_sm_g_k = link_smk_remazim_to_smg_remazim(smg_links)
 # post_link(remazim_sm_g_k, VERBOSE=True)
 # link_rambam("testrambamibid.txt")
 # get_citations(ja_smk, "exctract")
 # fromCSV(u'exctract.csv', u'newfile', u'full')
 old = 22
 new = 23
 # rewrtie_csv(u'fixed{}.csv'.format(old), u'fixed{}'.format(new), u'full', toWriteHeaders=[u'siman', u'smk_segment', u'rambam', u'smg', u'tur', u'full'])
 # rewrtie_csv(u'fixed{}.csv'.format(old), u'smk_links', u'full', toWriteHeaders=[u'siman', u'smk_segment', u'rambam', u'smg', u'tur', u'full'])
 # smkDerivenLinks, links_smg = link_rambam_smg_tur(u'fixed{}.csv'.format(new))
 # post_link(smkDerivenLinks, VERBOSE=True)
 # remazim_sm_g_k = link_smk_remazim_to_smg_remazim(links_smg)
 # post_link(remazim_sm_g_k, VERBOSE=True)
コード例 #48
0
        node.addressTypes = ['Integer', 'Integer', 'Integer']
        node.sectionNames = ['Chapter', 'Verse', 'Comment']
        node.toc_zoom = 2
        record.append(node)
    record.validate()

    index = {
        "title": "Siftei Hakhamim",
        "categories": ["Commentary2", "Torah", "Rashi"],
        "schema": record.serialize()
    }
    return index


def post_text(parsed_data):

    for book in library.get_indexes_in_category('Torah'):
        version = {
            'versionTitle': 'Siftei Hakhamim',
            'versionSource': 'http://www.toratemetfreeware.com/',
            'language': 'he',
            'text': parsed_data[book]
        }
        functions.post_text('Siftei Hakhamim, {}'.format(book), version)

parsed = parse_multiple()
slinks = generate_links(parsed)
functions.post_index(build_index())
post_text(parsed)
functions.post_link(slinks)
コード例 #49
0
                      for title in storage_object.get_index_titles()])
    for category_list in categories:
        add_category(category_list[-1], category_list, server=destination)


ensure_categories(storage, server)

if num_processes > 1:
    pool = Pool(num_processes)
    pool.map(partial_upload_index, titles)
    pool.map(partial_upload_version, titles)

else:
    regular_output = sys.stdout
    log_file = open('upload_log.log', 'w')
    progress_bar = tqdm(total=len(titles))

    for t in titles:
        sys.stdout = log_file
        partial_upload_index(t)
        partial_upload_version(t)
        sys.stdout = regular_output
        progress_bar.update(1)
    log_file.close()

post_link(storage.generate_links(), server=server, weak_network=True)
with codecs.open('All_Peregrine_Titles.json', 'w', 'utf-8') as fp:
    json.dump(titles, fp)

requests.post(os.getenv('SLACK_URL'), json={'text': 'Peregrine Complete :owl:'})
コード例 #50
0
                    "Pri Yitzhak on Sefer Yetzirah Gra Version " + '%d:%d:%d' %tuple(x+1 for x in dh['indices']),
                    "Sefer Yetzirah Gra Version " + '%d:%d' %tuple(x+1 for x in dh['indices'][:2]),
                ],
                "type": "commentary",
                "auto": True,
                "generated_by": "pri_yitzhak_parse"
            })
            dh_text = dh['data']
            # append to links list
            pri_links.append(link)
            # shave off the last link of "slik" shpuldn't be linked in
    pri_links.pop()
    return pri_links


# find links in the pri that are of form bookName(chapter leter)
def find_links_in_pri():
    a = ur'\(.*?\)'
    with codecs.open('yitzira_pri_yitzhak.txt', 'r', 'utf-8') as fp:
        lines = fp.readlines()
        txt = " ".join(lines)
        books = library.get_titles_in_string(txt,'he')
        for b in books:
            site_re = ur"{}{}{} ".format('\(\s*',b,'.*?\)')
            site = regex.search(site_re,txt)
            if site:
                print site.span(), site.group()

# save to mongo the list of dictionaries.
post_link(link_pri(pri))
コード例 #51
0
ファイル: upload.py プロジェクト: maxrabin/Sefaria-Data
        if user_args.add_term:
            add_term(user_args.title,
                     book_xml.titles['he'],
                     server=user_args.server)

    index = index_methods.get(book_name,
                              create_simple_index)(en_title=book_name,
                                                   he_title=he_book_name,
                                                   commentator=user_args.title,
                                                   server=user_args.server)
    if user_args.verbose:
        print index

    post_index(index, server=user_args.server)

    if post_parse.get(book_name):
        post_parse[book_name](book_ja)
    version = {
        'versionTitle': "Shulhan Arukh, Hoshen ha-Mishpat; Lemberg, 1898",
        'versionSource':
        "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002097773",
        'language': 'he',
        'text': book_ja
    }
    post_text(book_name, version, index_count='on', server=user_args.server)
    if links:
        post_link(links, server=user_args.server)

    requests.post(os.environ['SLACK_URL'], json={'text': 'Upload Complete'})
コード例 #52
0
ファイル: upload.py プロジェクト: JonMosenkis/Sefaria-Data
    # version = {
    #     "versionTitle": "Maginei Eretz; Shulchan Aruch Orach Chaim, Lemberg, 1893",
    #     "versionTitleInHebrew": u"""ספר מגיני ארץ; שלחן ערוך. למברג, תרנ"ג""",
    #     "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002084080",
    #     "language": "he",
    #     "text": book_ja,
    # }
    version = {
        "versionTitle": "Maginei Eretz: Shulchan Aruch Orach Chaim, Lemberg, 1893",
        "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002084080",
        "language": "he",
        "text": book_ja,
    }
    functions.post_text(book_name, version, index_count="on", server=user_args.server)
    if links:
        functions.post_link(links, server=user_args.server)

    # for title, clean_func in post_parse.items():
    #     print
    #     print title
    #     comm = commentaries.get_commentary_by_title(title.split(" on")[0])
    #     comm = check_marks(comm, clean_func)
    #
    # print
    # print "Checking Orach Chaim"
    # base = check_marks(root.get_base_text(), orach_chaim_clean)
    functions.post_flags({'ref': book_name, 'lang': 'he', 'vtitle': version['versionTitle']},
                         {"versionTitleInHebrew": u"""ספר מגיני ארץ: שלחן ערוך. למברג, תרנ"ג""",}, user_args.server)

    try:
        requests.post(os.environ['SLACK_URL'], json={'text':'{} uploaded successfully'.format(book_name)})
コード例 #53
0
if __name__ == "__main__":
    ja_smk = parse_semak('Semak.txt')
    # # siman_page = map_semak_page_siman(ja_smk, to_print=True)
    # letter_ja = parse_Raph_by_letter(u'Raph_on_Semak.txt')
    # raph_smk_alignment = raph_alignment_report(ja_smk, letter_ja)
    # ja_raph = parse_Raph_simanim(raph_smk_alignment)
    # # # post_raph(ja_raph)
    # # # link_raph(ja_raph)  # try to find where this is coming from
    # raph = parse_Raph_by_letter('Raph_on_Semak.txt')
    # raph_links = link_raph(ja_smk, ja_raph)
    # ja_hagahot = parse_hagahot_by_letter(u'Semak_hagahot_chadashot.txt')
    # hgh_align = hagahot_alignment(ja_smk, ja_raph, ja_hagahot)
    # ja_hagahot = hagahot_parse(ja_hagahot, hgh_align)
    # hg_links = link_hg(ja_hagahot, hgh_align, ja_raph)
    #
    # # post_all_smk(ja_smk, ja_raph, ja_hagahot, raph_links, hg_links)
    # smg_links = link_smg(ja_smk, u'smg_smk_test')
    # post_link(smg_links, VERBOSE=True)
    # post_link(link_remazim(), VERBOSE=True)
    # remazim_sm_g_k = link_smk_remazim_to_smg_remazim(smg_links)
    # post_link(remazim_sm_g_k, VERBOSE=True)
    # link_rambam("testrambamibid.txt")
    # get_citations(ja_smk, "exctract")
    # fromCSV(u'exctract.csv', u'newfile', u'full')
    old = 22
    new = 23
    # rewrtie_csv(u'fixed{}.csv'.format(old), u'fixed{}'.format(new), u'full', toWriteHeaders=[u'siman', u'smk_segment', u'rambam', u'smg', u'tur', u'full'])
    smkDerivenLinks, links_smg = link_rambam_smg_tur(u'fixed{}.csv'.format(new))
    post_link(smkDerivenLinks, VERBOSE=True)
    remazim_sm_g_k = link_smk_remazim_to_smg_remazim(links_smg)
    post_link(remazim_sm_g_k, VERBOSE=True)
コード例 #54
0
gra_links = []
# use a generator to go over the text and find the 3 level indices
for dh in traverse_ja(gra):
        link = (
            {
            "refs": [
                "HaGra on Sefer Yetzirah Gra Version " + '%d:%d:%d' %tuple(x+1 for x in dh['indices']),
                "Sefer Yetzirah Gra Version " + '%d:%d' %tuple(x+1 for x in dh['indices'][:2]),
            ],
            "type": "commentary",
            "auto": True,
            "generated_by": "gra_parse"
        })
        dh_text = dh['data']
        # append to links list
        gra_links.append(link)

# shave off the last link of "slik" shpuldn't be linked in
gra_links.pop()

# save to mongo the list of dictionaries.
post_link(gra_links)

# link_ofen = (
#             {
#             "refs": [
#                 "Pri Yitzhak on Sefer Yetzirah " + '%d:%d:%d' %tuple(x+1 for x in dh['indices']),
#                 "Sefer Yetzirah Ari Version " + '%d:%d' %tuple(x+1 for x in dh['indices'][:2]),
#             ],
#             "type": "reference"
#         })
コード例 #55
0
def post_all_smk(ja_smk, ja_raph, ja_hagahot, raph_links, hg_links):
    post_smk(ja_smk)
    post_raph(ja_raph)
    post_link(raph_links)
    post_hagahot(ja_hagahot)
    post_link(hg_links)