def parse_and_upload(): cards = get_cards() links = [] for card in cards: node = JaggedArrayNode() node.add_title(card, 'en', primary=True) node.add_title(u'רמב"ם ' + Ref(card.replace('Rambam ', '')).he_normal(), 'he', primary=True) node.key = card node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Mishnah', 'Comment'] node.validate() node.toc_zoom = 2 index = { 'title': card, 'categories': ['Commentary2', 'Mishnah', 'Rambam'], 'schema': node.serialize(), } parsed = parser(card) links.extend(parsed['links']) version = { 'versionTitle': u'Vilna Edition', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957', 'language': 'he', 'text': parsed['parsed'] } print 'posting {}'.format(card) post_index(index) post_text(card, version, index_count='on') post_link(links)
def post_raph(ja_raph): replace_dict = {u"@22": u"<br>"} ja_raph = inlinereferencehtml(ja_raph) ja_raph = before_post_cleaner(ja_raph, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_raph } schema = JaggedArrayNode() schema.add_title('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות רבנו פרץ על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', u'הגהות רבנו פרץ על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan"], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(), # This line converts the schema into json 'collective_title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', text_version)
def upload(): functions.post_index(construct_index()) parsed = parse() names = node_names() en_parasha_names = get_parsha_dict() for book in names.keys(): for parasha in names[book].keys(): for year in names[book][parasha]: current_text = { 'versionTitle': 'Sefat emet, Piotrków, 1905-1908', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001186213', 'language': 'he', 'text': parsed[book][parasha][year] } en_parasha = en_parasha_names[parasha] civil_year = get_civil_year(year, book) url = 'Sefat Emet, {}, {}, {}'.format(book, en_parasha, civil_year) print url for i in range(10): try: functions.post_text(url, current_text) except (URLError, HTTPError): print 'handling weak network' continue else: break
def upload(): links = [] for tractate in cards: he_name = Ref(' '.join(tractate.split()[1:])).he_normal() he_name = u'רמב"ם {}'.format(he_name) node = JaggedArrayNode() node.add_title(tractate, 'en', primary=True) node.add_title(he_name, 'he', primary=True) node.key = tractate node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Mishnah', 'Comment'] node.validate() index = { 'title': tractate, 'categories': ['Commentary2', 'Mishnah', 'Rambam'], 'schema': node.serialize(), 'toc_zoom': 2 } parsed = parse_file('{}.txt'.format(tractate)) links.extend(parsed['links']) version = { 'versionTitle': u'Vilna Edition', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957', 'language': 'he', 'text': parsed['parsed text'] } print 'posting {}'.format(tractate) post_index(index) post_text(tractate, version, index_count='on') post_link(links)
def post_raph(ja_raph): text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_raph } schema = JaggedArrayNode() schema.add_title('Hagahot Rabbenu Peretz', 'en', True) schema.add_title(u'הגהות רבנו פרץ', 'he', True) schema.key = 'Hagahot Rabbenu Peretz' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() index_dict = { 'title': 'Hagahot Rabbenu Peretz', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan"], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('Hagahot Rabbenu Peretz', text_version)
def upload(data, post_index=True): # create index schema = JaggedArrayNode() schema.add_title(data['en'], 'en', True) schema.add_title(data['he'], 'he', True) schema.key = data['en'] schema.depth = 3 schema.addressTypes = ['Integer', 'Integer', 'Integer'] schema.sectionNames = ['Chapter', 'Seif', 'Comment'] schema.validate() index = { 'title': data['en'], 'categories': ['Commentary2', 'Mishnah', 'Yachin'], 'schema': schema.serialize() } if post_index: functions.post_index(index) # clean and upload text upload_text = util.clean_jagged_array(data['data'].array(), tags_to_strip()) text_version = { 'versionTitle': u'Mishnah, ed. Romm, Vilna 1913', 'versionSource': 'http://http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001741739', 'language': 'he', 'text': upload_text } functions.post_text(data['en'], text_version)
def bs_index(): index_dict = { 'title': 'Bekhor Shor', 'categories': ['Commentary2', 'Tanakh', 'Bekhor Shor'], 'schema': bs_schema().serialize() # This line converts the schema into json } post_index(index_dict)
def post_smk(ja_smk): # before posting earase all '@'s ja_smk = clean(ja_smk) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_smk } schema = JaggedArrayNode() schema.add_title('Sefer Mitzvot Katan', 'en', True) schema.add_title(u'ספר מצות קטן', 'he', True) schema.key = 'Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() index_dict = { 'title': 'Sefer Mitzvot Katan', 'categories': ['Halakhah'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('Sefer Mitzvot Katan', text_version)
def post_raph(ja_raph): replace_dict = {u"@22": u"<br>"} ja_raph = inlinereferencehtml(ja_raph) ja_raph = before_post_cleaner(ja_raph, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_raph } schema = JaggedArrayNode() schema.add_title('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות רבנו פרץ על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', u'הגהות רבנו פרץ על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan"], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(),# This line converts the schema into json 'collective_title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', text_version)
def post_this(): text_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': gra } schema = JaggedArrayNode() schema.add_title('HaGra on Sefer Yetzirah Gra Version', 'en', True) schema.add_title(u'פירוש הגר"א על ספר יצירה', 'he', True) schema.key = 'HaGra on Sefer Yetzirah Gra Version' schema.depth = 3 schema.addressTypes = ['Integer', 'Integer', 'Integer'] schema.sectionNames = ['Chapter', 'Mishnah', 'Comment'] schema.validate() index_dict = { 'title': 'HaGra on Sefer Yetzirah Gra Version', 'categories': ['Commentary2', 'Kabbalah', 'Gra'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('HaGra on Sefer Yetzirah Gra Version', text_version, index_count='on')
def post_yitzira(): node = JaggedArrayNode() node.add_title('Sefer Yetzirah', 'en', primary=True) node.add_title(u'ספר יצירה', 'he', primary=True) node.key = 'Sefer Yetzirah' node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Chapter', 'Mishnah'] node.validate() y_index = { 'title': 'Sefer Yetzirah', 'categories': ['Kabbalah'], 'language': 'he', 'schema': node.serialize() } y_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': parse_yitzira() } post_index(y_index) post_text("Sefer Yetzirah", y_version, index_count='on')
def upload_index(full_text, upload=False): """ :param full_text: Data structure from parse_text() :param upload: set to True, otherwise function will do nothing """ if not upload: return books = [u'Genesis', u'Exodus', u'Leviticus', u'Numbers', u'Deuteronomy'] # create index record record = SchemaNode() record.add_title('Chizkuni', 'en', primary=True,) record.add_title(u'חזקוני', 'he', primary=True) record.key = 'Chizkuni' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Verse', 'Comment'] record.append(node) record.validate() index = { "title": "Chizkuni", "categories": ["Commentary2", "Tanach", "Chizkuni"], "schema": record.serialize() } post_index(index)
def post_hagahot(ja_hg): replace_dict = {u"@11\([\u05d0-\u05ea]{1,3}\)\s?@33": u"", u"@77": u"", u"@44": u"<br>", u"@55": u"<b>", u"@66": u"</b>", u"@00(.+?)\s(.+)": u"", u"@(?:99|01)(.*?)@": ur"<br><small>\1</small><br>"} ja_hg = before_post_cleaner(ja_hg, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_hg } schema = JaggedArrayNode() schema.add_title('Haggahot Chadashot on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות חדשות על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Chadashot on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Chadashot on Sefer Mitzvot Katan',u'הגהות חדשות על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Chadashot on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan", 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan'], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(), # This line converts the schema into json 'collective_title': 'Haggahot Chadashot on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Chadashot on Sefer Mitzvot Katan', text_version)
def post_this(): text_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': gra } schema = JaggedArrayNode() schema.add_title('HaGra on Sefer Yetzirah Gra Version', 'en', True) schema.add_title(u'פירוש הגר"א על ספר יצירה', 'he', True) schema.key = 'HaGra on Sefer Yetzirah Gra Version' schema.depth = 3 schema.addressTypes = ['Integer', 'Integer','Integer'] schema.sectionNames = ['Chapter', 'Mishnah','Comment'] schema.validate() index_dict = { 'title': 'HaGra on Sefer Yetzirah Gra Version', 'categories': ['Commentary2','Kabbalah','Gra'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('HaGra on Sefer Yetzirah Gra Version', text_version, index_count='on')
def upload_footnote_index(): """ Footnotes are uploaded as a commentary2 - Schema with each book a depth 2 jaggedArray """ books = library.get_indexes_in_category('Tanach') # create index record record = SchemaNode() record.add_title('JPS 1985 Footnotes', 'en', primary=True, ) record.add_title(u'הערות שוליים תרגום 1985 של JPS', 'he', primary=True, ) record.key = 'JPS 1985 Footnotes' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Chapter', 'Footnote'] record.append(node) record.validate() index = { "title": "JPS 1985 Footnotes", "categories": ["Commentary2", "Tanach", "JPS"], "schema": record.serialize() } functions.post_index(index)
def post(): minchat = {'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename)} sefer = {'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text} chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>') with codecs.open('links.txt', 'w', 'utf-8') as outfile: for each_link in chinukh_links: outfile.write(u'{}\n'.format(each_link['refs'])) alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv') cleaned = util.clean_jagged_array(minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r']) with codecs.open('parsed.txt', 'w', 'utf-8') as outfile: util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph']) full_text = { 'versionTitle': 'Minchat Chinuch, Piotrków, 1902', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092', 'language': 'he', 'text': cleaned } index = construct_index(alt) functions.post_index(index) functions.post_text('Minchat Chinuch', full_text) functions.post_link(chinukh_links)
def post_smk(ja_smk): text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_smk } schema = JaggedArrayNode() schema.add_title('Sefer Mitzvot Katan', 'en', True) schema.add_title(u'ספר מצות קטן', 'he', True) schema.key = 'Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() index_dict = { 'title': 'Sefer Mitzvot Katan', 'categories': ['Halakhah'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('Sefer Mitzvot Katan', text_version)
def tt_index(): index_dict = { 'title': 'Tur HaAroch', 'categories': ['Commentary2', 'Tanakh', 'Tur HaAroch'], 'schema': tt_schema().serialize() # This line converts the schema into json } post_index(index_dict)
def post_raavad_index(): # add index for the perush index_ravaad = { "title": "Raavad on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Raavad"], "schema": ravaad_schema().serialize(), "author " : [u'Yosef ben Shalom Ashkenazi'] } post_index(index_ravaad)
def upload_index(storage_object, title, destination='http://localhost:8000'): """ :param CommentStore storage_object: :param title :param destination: :return: """ index = storage_object.get_index_for_title(title) post_index(index, server=destination, weak_network=True)
def post(self): for index in self.base_indices: post_index(index, weak_network=True) for index in self.commentaryIndices: post_index(index) for version in self.versionList: print version['ref'] post_text(version['ref'], version['version'], index_count='on', weak_network=True) post_link(self.linkSet)
def post_simple_commentaries(): ramban_node, rasag_node = JaggedArrayNode(), JaggedArrayNode() ramban_text = parse_general('yitzira_ramban.txt') rasag_text = parse_general('yitzira_rasag.txt') ramban_node.add_title("Ramban on Sefer Yetzirah", 'en', primary=True) ramban_node.add_title(u'רמב"ן על ספר יצירה', 'he', primary=True) ramban_node.key = "Ramban on Sefer Yetzirah" ramban_node.addressTypes = ['Integer', 'Integer', 'Integer'] ramban_node.sectionNames = ["Chapter", "Mishnah", "Comment"] ramban_node.toc_zoom = 2 ramban_node.depth = 3 ramban_node.validate() rasag_node.add_title("Rasag on Sefer Yetzirah", 'en', primary=True) rasag_node.add_title(u'רס"ג על ספר יצירה', 'he', primary=True) rasag_node.key = "Rasag on Sefer Yetzirah" rasag_node.addressTypes = ['Integer', 'Integer', 'Integer'] rasag_node.sectionNames = ["Chapter", "Mishnah", "Comment"] rasag_node.toc_zoom = 2 rasag_node.depth = 3 rasag_node.validate() ramban_index = { "title": "Ramban on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Ramban"], "language": "he", "schema": ramban_node.serialize() } post_index(ramban_index) post_text( "Ramban on Sefer Yetzirah", { 'versionTitle': 'Ramban on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': ramban_text }) rasag_index = { "title": "Rasag on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Rasag"], "language": "he", "schema": rasag_node.serialize() } post_index(rasag_index) post_text( "Rasag on Sefer Yetzirah", { 'versionTitle': 'Rasage on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': rasag_text }) links = linker(ramban_text, "Ramban on Sefer Yetzirah") links.extend(linker(rasag_text, "Rasag on Sefer Yetzirah")) post_link(links)
def post_raavad_index(): # add index for the perush index_ravaad = { "title": "Raavad on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Raavad"], "schema": ravaad_schema().serialize(), "author ": [u'Yosef ben Shalom Ashkenazi'] } post_index(index_ravaad)
def post(): parsed = parse() post_index(build_index(parsed['titles'])) version = { 'versionTitle': 'Sefer HaKana', 'versionSource': 'http://www.hebrew.grimoar.cz/anonym/sefer_ha-kana.htm', 'language': 'he', 'text': parsed['text'] } post_text('Sefer HaKana', version, index_count='on')
def post_simple_commentaries(): ramban_node, rasag_node = JaggedArrayNode(), JaggedArrayNode() ramban_text = parse_general('yitzira_ramban.txt') rasag_text = parse_general('yitzira_rasag.txt') ramban_node.add_title("Ramban on Sefer Yetzirah", 'en', primary=True) ramban_node.add_title(u'רמב"ן על ספר יצירה', 'he', primary=True) ramban_node.key = "Ramban on Sefer Yetzirah" ramban_node.addressTypes = ['Integer', 'Integer', 'Integer'] ramban_node.sectionNames = ["Chapter", "Mishnah", "Comment"] ramban_node.toc_zoom = 2 ramban_node.depth = 3 ramban_node.validate() rasag_node.add_title("Rasag on Sefer Yetzirah", 'en', primary=True) rasag_node.add_title(u'רס"ג על ספר יצירה', 'he', primary=True) rasag_node.key = "Rasag on Sefer Yetzirah" rasag_node.addressTypes = ['Integer', 'Integer', 'Integer'] rasag_node.sectionNames = ["Chapter", "Mishnah", "Comment"] rasag_node.toc_zoom = 2 rasag_node.depth = 3 rasag_node.validate() ramban_index = { "title": "Ramban on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Ramban"], "language": "he", "schema": ramban_node.serialize() } post_index(ramban_index) post_text("Ramban on Sefer Yetzirah", { 'versionTitle': 'Ramban on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': ramban_text }) rasag_index = { "title": "Rasag on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Rasag"], "language": "he", "schema": rasag_node.serialize() } post_index(rasag_index) post_text("Rasag on Sefer Yetzirah", { 'versionTitle': 'Rasage on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': rasag_text }) links = linker(ramban_text, "Ramban on Sefer Yetzirah") links.extend(linker(rasag_text, "Rasag on Sefer Yetzirah")) post_link(links)
def post(): parsed = parse('targum.txt') for i in range(1, 3): functions.post_index(build_index(i)) version = { 'versionTitle': 'Wikisource Aramaic Targum to Chronicles', 'versionSource': url, 'language': 'he', 'text': parsed[i-1] } functions.post_text('Aramaic Targum to {} Chronicles'.format('I' * i), version) functions.post_link(build_links(parsed))
def create_index(): # create index schema = JaggedArrayNode() schema.add_primary_titles("The Midrash of Philo", u"מדרש פילון") schema.add_structure(["Chapter", "Verse", "Comment"]) schema.validate() index_dict = { 'title': "The Midrash of Philo", 'categories': ['Other'], 'schema': schema.serialize() # This line converts the schema into json } functions.post_index(index_dict)
def post_text_and_index(text_struct, section_names): index = build_index(section_names) functions.post_index(index) for section_num, section in enumerate(section_names): new_text = { "versionTitle": 'Noda BeYehuda Warsaw 1880', "versionSource": 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001983501', "language": 'he', "text": text_struct[section_num] } functions.post_text('Noda BeYehuda, {}'.format(section), new_text)
def post(self): for index in self.base_indices: post_index(index, weak_network=True) for he_author in self.commentarySchemas.keys(): en_author = DCXMLsubs.commentatorNames[he_author] index = { 'title': en_author, 'categories': ['Commentary2', 'Masechtot Ketanot', en_author], 'schema': self.commentarySchemas[he_author].serialize() } post_index(index) for version in self.versionList: post_text(version['ref'], version['version'], index_count='on', weak_network=True) post_link(self.linkSet)
def post(): post_index(construct_index()) base_text = restructure_text() links = build_links(base_text) version = { 'versionTitle': u'Derech Chaim, Maharal', 'versionSource': u'http://mobile.tora.ws/', 'language': 'he', 'text': base_text } post_text("Derech Chaim", version) version['text'] = get_intro() post_text("Derech Chaim, Author's Introduction", version, index_count='on') post_link(links)
def parse_the_text(file_name_teshuvot, file_name_footnotes, dictionary): teshuvot_ja = function.parse(file_name_teshuvot) footnotes_ja = function.parse(file_name_footnotes) links = function.create_links(teshuvot_ja, dictionary) index_teshuvot = function.create_index(dictionary) index_footnotes = function.create_index(dictionary,footnotes, footnotes_hebrew) teshuvot_ja = util.clean_jagged_array(teshuvot_ja, ['\d+', '\+']) footnotes_ja = util.clean_jagged_array(footnotes_ja, ['\d+', '\+']) text_teshuvot = function.create_text(dictionary, teshuvot_ja) text_footnotes = function.create_text(dictionary, footnotes_ja) functions.post_index(index_teshuvot) functions.post_index(index_footnotes) functions.post_text_weak_connection('Teshuvot haRashba part {}'.format(dictionary['roman numeral']), text_teshuvot) functions.post_text_weak_connection('Footnotes to Teshuvot haRashba part {}'.format(dictionary['roman numeral']), text_footnotes) functions.post_link_weak_connection(links)
def upload(): post_index(construct_index()) version = { 'versionTitle': 'Placeholder', 'versionSource': 'http://www.hebrew.grimoar.cz/azulaj/chesed_le-avraham.htm', 'language': 'he', 'text': parse_intro() } post_text('Chesed LeAvraham, Introduction', version) body = parse_body() for i, part in enumerate(body): version['text'] = part post_text('Chesed LeAvraham, Even Shetiya, Maayan {}'.format(i+1), version) version['text'] = parse_shokets() post_text('Chesed LeAvraham, Breichat Avraham', version, index_count='on')
def post_hagahot(ja_hg): replace_dict = { u"@11\([\u05d0-\u05ea]{1,3}\)\s?@33": u"", u"@77": u"", u"@44": u"<br>", u"@55": u"<b>", u"@66": u"</b>", u"@00(.+?)\s(.+)": u"", u"@(?:99|01)(.*?)@": ur"<br><small>\1</small><br>" } ja_hg = before_post_cleaner(ja_hg, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_hg } schema = JaggedArrayNode() schema.add_title('Haggahot Chadashot on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות חדשות על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Chadashot on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Chadashot on Sefer Mitzvot Katan', u'הגהות חדשות על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Chadashot on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': [ "Sefer Mitzvot Katan", 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan' ], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(), # This line converts the schema into json 'collective_title': 'Haggahot Chadashot on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Chadashot on Sefer Mitzvot Katan', text_version)
def post(): books = file_to_books() for book in library.get_indexes_in_category('Torah'): books[book] = align_text(books[book], u'@\u05e4\u05e8\u05e7 [\u05d0-\u05ea]{1,2}', u'[0-9]{1,2}\.') functions.post_index(build_index()) node_names = ['Introduction'] + library.get_indexes_in_category('Torah') for name in node_names: version = { 'versionTitle': 'Tafsir al-Torah bi-al-Arabiya, Paris, 1893', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001863864', 'language': 'he', 'text': books[name] } functions.post_text('Tafsir Rasag, {}'.format(name), version) functions.post_link(build_links(books))
def build_index(mishna_name): jnode = JaggedArrayNode() jnode.add_title('Boaz on {}'.format(mishna_name), 'en', True) jnode.add_title(u'בועז על {}'.format(Ref(mishna_name).he_book()), 'he', True) jnode.key = 'Boaz on {}'.format(mishna_name) jnode.depth = 2 jnode.addressTypes = ['Integer', 'Integer'] jnode.sectionNames = ['Chapter', 'comment'] jnode.validate() index = { 'title': 'Boaz on {}'.format(mishna_name), 'categories': ['Commentary2', 'Mishnah', 'Boaz'], 'schema': jnode.serialize() } functions.post_index(index)
def upload(): functions.post_index(construct_index()) parsed = parse() names = node_names() en_parasha_names = get_parsha_dict() for book in names.keys(): for parasha in names[book].keys(): current_text = { 'versionTitle': 'Sefat emet, Piotrków, 1905-1908', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001186213', 'language': 'he', 'text': parsed[book][parasha] } en_parasha = en_parasha_names[parasha] url = 'Sefat Emet, {}, {}'.format(book, en_parasha) print url functions.post_text(url, current_text, weak_network=True)
def parse_the_text(file_name_teshuvot, file_name_footnotes, dictionary): teshuvot_ja = function.parse(file_name_teshuvot) footnotes_ja = function.parse(file_name_footnotes) links = function.create_links(teshuvot_ja, dictionary) index_teshuvot = function.create_index(dictionary) index_footnotes = function.create_index(dictionary, footnotes, footnotes_hebrew) teshuvot_ja = util.clean_jagged_array(teshuvot_ja, ['\d+', '\+']) footnotes_ja = util.clean_jagged_array(footnotes_ja, ['\d+', '\+']) text_teshuvot = function.create_text(dictionary, teshuvot_ja) text_footnotes = function.create_text(dictionary, footnotes_ja) functions.post_index(index_teshuvot) functions.post_index(index_footnotes) functions.post_text_weak_connection( 'Teshuvot haRashba part {}'.format(dictionary['roman numeral']), text_teshuvot) functions.post_text_weak_connection( 'Footnotes to Teshuvot haRashba part {}'.format( dictionary['roman numeral']), text_footnotes) functions.post_link_weak_connection(links)
def create_index(): root = SchemaNode() root.add_primary_titles("The War of the Jews", u"מלחמת היהודים") preface = JaggedArrayNode() preface.add_structure(["Paragraph"]) preface.add_shared_term("Preface") preface.key = "preface" content = JaggedArrayNode() content.default = True content.key = "default" content.add_structure(["Book", "Chapter", "Paragraph"]) root.append(preface) root.append(content) root.validate() index = { "title": "The War of the Jews", "schema": root.serialize(), "categories": ["Other"] } post_index(index, "http://proto.sefaria.org")
def upload_footnote_index(): """ Footnotes are uploaded as a commentary2 - Schema with each book a depth 2 jaggedArray """ books = library.get_indexes_in_category('Tanach') # create index record record = SchemaNode() record.add_title( 'JPS 1985 Footnotes', 'en', primary=True, ) record.add_title( u'הערות שוליים תרגום 1985 של JPS', 'he', primary=True, ) record.key = 'JPS 1985 Footnotes' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Chapter', 'Footnote'] record.append(node) record.validate() index = { "title": "JPS 1985 Footnotes", "categories": ["Commentary2", "Tanach", "JPS"], "schema": record.serialize() } functions.post_index(index)
def post(): minchat = { 'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename) } sefer = { 'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text } chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>') with codecs.open('links.txt', 'w', 'utf-8') as outfile: for each_link in chinukh_links: outfile.write(u'{}\n'.format(each_link['refs'])) alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv') cleaned = util.clean_jagged_array( minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r']) with codecs.open('parsed.txt', 'w', 'utf-8') as outfile: util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph']) full_text = { 'versionTitle': 'Minchat Chinuch, Piotrków, 1902', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092', 'language': 'he', 'text': cleaned } index = construct_index(alt) functions.post_index(index) functions.post_text('Minchat Chinuch', full_text) functions.post_link(chinukh_links)
import django django.setup() from sefaria.model import * from sources.functions import get_index_api, post_index, post_category if __name__ == "__main__": SERVER = "https://www.sefaria.org" # c = Category() # c.path = ["Tanakh", "Commentary", "Joseph ibn Yahya"] # c.add_shared_term("Joseph ibn Yahya") # c.save() # post_category(c.contents(), server=SERVER) # # c = Category() # c.path = ["Tanakh", "Commentary", "Joseph ibn Yahya", "Writings"] # c.add_shared_term("Writings") # c.save() # post_category(c.contents(), server=SERVER) index = get_index_api("Joseph ibn Yahya on Esther", server=SERVER) index["categories"] = ["Tanakh", "Commentary", "Joseph ibn Yahya", "Writings"] post_index(index, server=SERVER)
he_book_name = u"{} על {}".format(book_xml.titles['he'], he_base_title) links = book_xml.collect_links() if user_args.title == u"Sha'arei Teshuvah": links += shaarei_special_links() index = commentary_index(book_name, he_book_name, user_args.title) post_parse[user_args.title](book_ja) if user_args.add_term: functions.add_term(user_args.title, book_xml.titles['he'], server=user_args.server) functions.add_category(user_args.title, index['categories'], server=user_args.server) if user_args.verbose: print index functions.post_index(index, server=user_args.server) # version = { # "versionTitle": "Maginei Eretz; Shulchan Aruch Orach Chaim, Lemberg, 1893", # "versionTitleInHebrew": u"""ספר מגיני ארץ; שלחן ערוך. למברג, תרנ"ג""", # "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002084080", # "language": "he", # "text": book_ja, # } version = { "versionTitle": "Maginei Eretz: Shulchan Aruch Orach Chaim, Lemberg, 1893", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH002084080", "language": "he", "text": book_ja, } functions.post_text(book_name, version, index_count="on", server=user_args.server)
'language': 'he', 'text': parsed_data[book] } functions.post_text('Siftei Hakhamim, {}'.format(book), version) def manual_links(): """ Some links had to be created manually by the content team. The refs to link were saved in a csv :return: Json object of links parsed from the aforementioned csv. """ with open('siftei hakhamim manual links.csv') as infile: csv_reader = ucsv.reader(infile, delimiter=';') links = [{ 'refs': [ref[0], ref[1]], 'type': 'commentary', 'auto': False, 'generated_by': 'Sefaria Content Team' } for ref in csv_reader] return links parsed = parse_multiple() slinks = generate_links(parsed) functions.post_index(build_index()) post_text(parsed) functions.post_link(slinks) functions.post_link(manual_links())
perek_node.wholeRef = "Sifrei Devarim, " + str(perek_index[1]) + "-" + str( perek_index[2]) perek_nodes.append(perek_node) parsha_nodes = SchemaNode() for parsha_index in get_parsha_index(): parsha_node = ArrayMapNode() parsha_node.add_title(parsha_index[0], "en", primary=True) parsha_node.add_title(heb_parshiot[eng_parshiot.index(parsha_index[0])], "he", primary=True) parsha_node.includeSections = True parsha_node.depth = 0 parsha_node.wholeRef = "Sifrei Devarim, " + str( parsha_index[1]) + "-" + str(parsha_index[2]) parsha_nodes.append(parsha_node) record.validate() index = { "title": "Sifrei Devarim", "categories": ["Midrash", "Halachic Midrash"], "alt_structs": { "Parsha": parsha_nodes.serialize(), "Chapters": perek_nodes.serialize() }, "default_struct": "Chapters", "schema": record.serialize() } functions.post_index(index, weak_network=True)
def ch_index_post(com_name): section_titles = get_section_titles(com_name) # create index record com_record = com_dic[com_name] record = SchemaNode() record.add_title(com_name, 'en', primary=True) record.add_title(com_record["he_title"], 'he', primary=True) record.key = com_name #Tov Levanon has no commentator's intro if "Tov haLevanon" not in com_name: "HINTRO ",com_name #add commentor's intro node intro_node = JaggedArrayNode() intro_node.add_title(com_record["introduction_name_en"], 'en', primary=True) intro_node.add_title(com_record["introduction_name_he"], 'he', primary=True) intro_node.key = com_record["introduction_name_en"] intro_node.depth = 1 intro_node.addressTypes = ['Integer'] intro_node.sectionNames = ['Paragraph'] record.append(intro_node) # add nodes for author's introduction and rest of sections for title_index, title in enumerate(section_titles): #author's intro handled differently if title_index==0: intro_node = JaggedArrayNode() intro_node.add_title(title["en_title"],"en",primary=True) intro_node.add_title(title["he_title"],"he",primary=True) intro_node.key = title["en_title"] intro_node.depth = 1 intro_node.addressTypes = ['Integer'] intro_node.sectionNames = ['Comment'] record.append(intro_node) else: section_node = SchemaNode() section_node.add_title(title["en_title"],"en",primary=True) section_node.add_title(title["he_title"],"he",primary=True) section_node.key = title["en_title"] #first we make node for intro intro_node = JaggedArrayNode() intro_node.add_title("Introduction", 'en', primary=True) intro_node.add_title(u"הקדמה", 'he', primary=True) intro_node.key = "Introduction" intro_node.depth = 1 intro_node.addressTypes = ['Integer'] intro_node.sectionNames = ['Comment'] section_node.append(intro_node) #now add chapters, or default text_node = JaggedArrayNode() text_node.key = "default" text_node.default = True text_node.depth = 2 text_node.addressTypes = ['Integer', 'Integer'] text_node.sectionNames = ['Chapter','Comment'] section_node.append(text_node) record.append(section_node) record.validate() index = { "title":com_name, "base_text_titles": [ "Duties of the Heart" ], "dependence": "Commentary", "categories":['Philosophy','Commentary',com_name,'Duties of the Heart'], "schema": record.serialize(), "collective_title":com_name } functions.post_index(index,weak_network=True)
node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Verse', 'Comment'] node.toc_zoom = 2 record.append(node) record.validate() index = { "title": "Siftei Hakhamim", "categories": ["Commentary2", "Torah", "Rashi"], "schema": record.serialize() } return index def post_text(parsed_data): for book in library.get_indexes_in_category('Torah'): version = { 'versionTitle': 'Siftei Hakhamim', 'versionSource': 'http://www.toratemetfreeware.com/', 'language': 'he', 'text': parsed_data[book] } functions.post_text('Siftei Hakhamim, {}'.format(book), version) parsed = parse_multiple() slinks = generate_links(parsed) functions.post_index(build_index()) post_text(parsed) functions.post_link(slinks)
# -*- coding: utf-8 -*- import codecs import regex from sefaria.model import * from sources import functions from data_utilities import util from sources.Rabbeinu_Yonah_Avot import rb_yonah_functions """ Parse Link Clean index record text record """ rb_index = rb_yonah_functions.create_index() functions.post_index(rb_index) rb_yonah_ja = rb_yonah_functions.parse_and_post('rabbeinu_yonah_on_avot.txt') rb_yonah_functions.create_links(rb_yonah_ja)