def post_yitzira(): node = JaggedArrayNode() node.add_title('Sefer Yetzirah', 'en', primary=True) node.add_title(u'ספר יצירה', 'he', primary=True) node.key = 'Sefer Yetzirah' node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Chapter', 'Mishnah'] node.validate() y_index = { 'title': 'Sefer Yetzirah', 'categories': ['Kabbalah'], 'language': 'he', 'schema': node.serialize() } y_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': parse_yitzira() } post_index(y_index) post_text("Sefer Yetzirah", y_version, index_count='on')
def post_this(): text_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': gra } schema = JaggedArrayNode() schema.add_title('HaGra on Sefer Yetzirah Gra Version', 'en', True) schema.add_title(u'פירוש הגר"א על ספר יצירה', 'he', True) schema.key = 'HaGra on Sefer Yetzirah Gra Version' schema.depth = 3 schema.addressTypes = ['Integer', 'Integer','Integer'] schema.sectionNames = ['Chapter', 'Mishnah','Comment'] schema.validate() index_dict = { 'title': 'HaGra on Sefer Yetzirah Gra Version', 'categories': ['Commentary2','Kabbalah','Gra'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('HaGra on Sefer Yetzirah Gra Version', text_version, index_count='on')
def post_raph(ja_raph): text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_raph } schema = JaggedArrayNode() schema.add_title('Hagahot Rabbenu Peretz', 'en', True) schema.add_title(u'הגהות רבנו פרץ', 'he', True) schema.key = 'Hagahot Rabbenu Peretz' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() index_dict = { 'title': 'Hagahot Rabbenu Peretz', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan"], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('Hagahot Rabbenu Peretz', text_version)
def upload(): links = [] for tractate in cards: he_name = Ref(' '.join(tractate.split()[1:])).he_normal() he_name = u'רמב"ם {}'.format(he_name) node = JaggedArrayNode() node.add_title(tractate, 'en', primary=True) node.add_title(he_name, 'he', primary=True) node.key = tractate node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Mishnah', 'Comment'] node.validate() index = { 'title': tractate, 'categories': ['Commentary2', 'Mishnah', 'Rambam'], 'schema': node.serialize(), 'toc_zoom': 2 } parsed = parse_file('{}.txt'.format(tractate)) links.extend(parsed['links']) version = { 'versionTitle': u'Vilna Edition', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957', 'language': 'he', 'text': parsed['parsed text'] } print 'posting {}'.format(tractate) post_index(index) post_text(tractate, version, index_count='on') post_link(links)
def upload(data, post_index=True): # create index schema = JaggedArrayNode() schema.add_title(data['en'], 'en', True) schema.add_title(data['he'], 'he', True) schema.key = data['en'] schema.depth = 3 schema.addressTypes = ['Integer', 'Integer', 'Integer'] schema.sectionNames = ['Chapter', 'Seif', 'Comment'] schema.validate() index = { 'title': data['en'], 'categories': ['Commentary2', 'Mishnah', 'Yachin'], 'schema': schema.serialize() } if post_index: functions.post_index(index) # clean and upload text upload_text = util.clean_jagged_array(data['data'].array(), tags_to_strip()) text_version = { 'versionTitle': u'Mishnah, ed. Romm, Vilna 1913', 'versionSource': 'http://http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001741739', 'language': 'he', 'text': upload_text } functions.post_text(data['en'], text_version)
def upload(): functions.post_index(construct_index()) parsed = parse() names = node_names() en_parasha_names = get_parsha_dict() for book in names.keys(): for parasha in names[book].keys(): for year in names[book][parasha]: current_text = { 'versionTitle': 'Sefat emet, Piotrków, 1905-1908', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001186213', 'language': 'he', 'text': parsed[book][parasha][year] } en_parasha = en_parasha_names[parasha] civil_year = get_civil_year(year, book) url = 'Sefat Emet, {}, {}, {}'.format(book, en_parasha, civil_year) print url for i in range(10): try: functions.post_text(url, current_text) except (URLError, HTTPError): print 'handling weak network' continue else: break
def post_smk(ja_smk): text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_smk } schema = JaggedArrayNode() schema.add_title('Sefer Mitzvot Katan', 'en', True) schema.add_title(u'ספר מצות קטן', 'he', True) schema.key = 'Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() index_dict = { 'title': 'Sefer Mitzvot Katan', 'categories': ['Halakhah'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('Sefer Mitzvot Katan', text_version)
def post_smk(ja_smk): # before posting earase all '@'s ja_smk = clean(ja_smk) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_smk } schema = JaggedArrayNode() schema.add_title('Sefer Mitzvot Katan', 'en', True) schema.add_title(u'ספר מצות קטן', 'he', True) schema.key = 'Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() index_dict = { 'title': 'Sefer Mitzvot Katan', 'categories': ['Halakhah'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('Sefer Mitzvot Katan', text_version)
def post_the_text(ja): testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, ja, ['Perek', 'Mishna', 'Comment']) testing_file.close() ref = create_ref() text = create_text(ja) functions.post_text(ref, text)
def post_hagahot(ja_hg): replace_dict = {u"@11\([\u05d0-\u05ea]{1,3}\)\s?@33": u"", u"@77": u"", u"@44": u"<br>", u"@55": u"<b>", u"@66": u"</b>", u"@00(.+?)\s(.+)": u"", u"@(?:99|01)(.*?)@": ur"<br><small>\1</small><br>"} ja_hg = before_post_cleaner(ja_hg, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_hg } schema = JaggedArrayNode() schema.add_title('Haggahot Chadashot on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות חדשות על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Chadashot on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Chadashot on Sefer Mitzvot Katan',u'הגהות חדשות על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Chadashot on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan", 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan'], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(), # This line converts the schema into json 'collective_title': 'Haggahot Chadashot on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Chadashot on Sefer Mitzvot Katan', text_version)
def post_raph(ja_raph): replace_dict = {u"@22": u"<br>"} ja_raph = inlinereferencehtml(ja_raph) ja_raph = before_post_cleaner(ja_raph, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_raph } schema = JaggedArrayNode() schema.add_title('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות רבנו פרץ על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', u'הגהות רבנו פרץ על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan"], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(),# This line converts the schema into json 'collective_title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', text_version)
def post(): minchat = {'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename)} sefer = {'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text} chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>') with codecs.open('links.txt', 'w', 'utf-8') as outfile: for each_link in chinukh_links: outfile.write(u'{}\n'.format(each_link['refs'])) alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv') cleaned = util.clean_jagged_array(minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r']) with codecs.open('parsed.txt', 'w', 'utf-8') as outfile: util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph']) full_text = { 'versionTitle': 'Minchat Chinuch, Piotrków, 1902', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092', 'language': 'he', 'text': cleaned } index = construct_index(alt) functions.post_index(index) functions.post_text('Minchat Chinuch', full_text) functions.post_link(chinukh_links)
def upload_version(storage_object, title, destination='http://localhost:8000'): version = storage_object.get_version_for_title(title) post_text(title, version, index_count="on", server=destination, weak_network=True)
def post_this(): text_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': ari_parse() } # we decide to put it as another version of the SeferYetzira Gra version that is up already. # schema = JaggedArrayNode() # schema.add_title('Sefer Yetzirah Gra Version', 'en', True) # schema.add_title(u'ספר יצירה גרסאת הארי', 'he', True) # schema.key = 'Sefer Yetzirah Gra Version' # schema.depth = 2 # schema.addressTypes = ['Integer', 'Integer'] # schema.sectionNames = ['Chapter', 'Mishnah'] # schema.validate() # # index_dict = { # 'title': 'Sefer Yetzirah Gra Version', # 'categories': ['Kabbalah'], # 'schema': schema.serialize() # This line converts the schema into json # } # post_index(index_dict) post_text('Sefer Yetzirah Gra Version', text_version, index_count='on')
def parse_and_upload(): cards = get_cards() links = [] for card in cards: node = JaggedArrayNode() node.add_title(card, 'en', primary=True) node.add_title(u'רמב"ם ' + Ref(card.replace('Rambam ', '')).he_normal(), 'he', primary=True) node.key = card node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Mishnah', 'Comment'] node.validate() node.toc_zoom = 2 index = { 'title': card, 'categories': ['Commentary2', 'Mishnah', 'Rambam'], 'schema': node.serialize(), } parsed = parser(card) links.extend(parsed['links']) version = { 'versionTitle': u'Vilna Edition', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957', 'language': 'he', 'text': parsed['parsed'] } print 'posting {}'.format(card) post_index(index) post_text(card, version, index_count='on') post_link(links)
def post_the_text(ja): testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, ja, ['Perek', 'Mishna','Comment']) testing_file.close() ref = create_ref() text = create_text(ja) functions.post_text(ref, text)
def post_raph(ja_raph): replace_dict = {u"@22": u"<br>"} ja_raph = inlinereferencehtml(ja_raph) ja_raph = before_post_cleaner(ja_raph, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_raph } schema = JaggedArrayNode() schema.add_title('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות רבנו פרץ על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', u'הגהות רבנו פרץ על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': ["Sefer Mitzvot Katan"], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(), # This line converts the schema into json 'collective_title': 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan', text_version)
def post_this(): text_version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': gra } schema = JaggedArrayNode() schema.add_title('HaGra on Sefer Yetzirah Gra Version', 'en', True) schema.add_title(u'פירוש הגר"א על ספר יצירה', 'he', True) schema.key = 'HaGra on Sefer Yetzirah Gra Version' schema.depth = 3 schema.addressTypes = ['Integer', 'Integer', 'Integer'] schema.sectionNames = ['Chapter', 'Mishnah', 'Comment'] schema.validate() index_dict = { 'title': 'HaGra on Sefer Yetzirah Gra Version', 'categories': ['Commentary2', 'Kabbalah', 'Gra'], 'schema': schema.serialize() # This line converts the schema into json } post_index(index_dict) post_text('HaGra on Sefer Yetzirah Gra Version', text_version, index_count='on')
def post_this(): text_version = { "versionTitle": "Sefer Yetzirah, Warsaw 1884", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968", "language": "he", "text": ari_parse(), } # we decide to put it as another version of the SeferYetzira Gra version that is up already. # schema = JaggedArrayNode() # schema.add_title('Sefer Yetzirah Gra Version', 'en', True) # schema.add_title(u'ספר יצירה גרסאת הארי', 'he', True) # schema.key = 'Sefer Yetzirah Gra Version' # schema.depth = 2 # schema.addressTypes = ['Integer', 'Integer'] # schema.sectionNames = ['Chapter', 'Mishnah'] # schema.validate() # # index_dict = { # 'title': 'Sefer Yetzirah Gra Version', # 'categories': ['Kabbalah'], # 'schema': schema.serialize() # This line converts the schema into json # } # post_index(index_dict) post_text("Sefer Yetzirah Gra Version", text_version, index_count="on")
def post_avot_comm(title, book_text): send_text = { "text": book_text, "versionTitle": "ToratEmet", "versionSource": "http://www.toratemetfreeware.com/online/f_01313.html", "language": "he" } post_text(title, send_text, server="http://draft.sefaria.org")
def post_the_text(jagged_array, book_number, parsha_number, intro=False): ref = create_ref(book_number, parsha_number, intro) text = create_text(jagged_array) #print ref # print text functions.post_text(ref, text)
def post(self): for index in self.base_indices: post_index(index, weak_network=True) for index in self.commentaryIndices: post_index(index) for version in self.versionList: print version['ref'] post_text(version['ref'], version['version'], index_count='on', weak_network=True) post_link(self.linkSet)
def post_simple_commentaries(): ramban_node, rasag_node = JaggedArrayNode(), JaggedArrayNode() ramban_text = parse_general('yitzira_ramban.txt') rasag_text = parse_general('yitzira_rasag.txt') ramban_node.add_title("Ramban on Sefer Yetzirah", 'en', primary=True) ramban_node.add_title(u'רמב"ן על ספר יצירה', 'he', primary=True) ramban_node.key = "Ramban on Sefer Yetzirah" ramban_node.addressTypes = ['Integer', 'Integer', 'Integer'] ramban_node.sectionNames = ["Chapter", "Mishnah", "Comment"] ramban_node.toc_zoom = 2 ramban_node.depth = 3 ramban_node.validate() rasag_node.add_title("Rasag on Sefer Yetzirah", 'en', primary=True) rasag_node.add_title(u'רס"ג על ספר יצירה', 'he', primary=True) rasag_node.key = "Rasag on Sefer Yetzirah" rasag_node.addressTypes = ['Integer', 'Integer', 'Integer'] rasag_node.sectionNames = ["Chapter", "Mishnah", "Comment"] rasag_node.toc_zoom = 2 rasag_node.depth = 3 rasag_node.validate() ramban_index = { "title": "Ramban on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Ramban"], "language": "he", "schema": ramban_node.serialize() } post_index(ramban_index) post_text( "Ramban on Sefer Yetzirah", { 'versionTitle': 'Ramban on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': ramban_text }) rasag_index = { "title": "Rasag on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Rasag"], "language": "he", "schema": rasag_node.serialize() } post_index(rasag_index) post_text( "Rasag on Sefer Yetzirah", { 'versionTitle': 'Rasage on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': rasag_text }) links = linker(ramban_text, "Ramban on Sefer Yetzirah") links.extend(linker(rasag_text, "Rasag on Sefer Yetzirah")) post_link(links)
def post_text(text): text = { "title": "The Midrash of Philo", "versionTitle": "The Midrash of Philo, by Samuel Belkin, 1989", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001117662", "language": "he", "text": text, } functions.post_text("The Midrash of Philo", text, index_count="on")
def tt_text_post(text_dict): for key in text_dict.keys(): version_dict = { 'versionTitle': 'Perush al ha-Torah, Hanover, 1838', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001935796', 'language': 'he', 'text': text_dict[key] } post_text('Tur HaAroch, {}'.format(key), version_dict)
def intro_text_post(): version_dict = { 'versionTitle': 'Perush al ha-Torah, Hanover, 1838', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001935796', 'language': 'he', 'text': [u'צריך להקליד את ההקדמה'] # note: if ever coming back to this script } post_text('Tur HaAroch, Introduction', version_dict)
def post_the_text(jagged_array, title_counter): ref = create_ref(title_counter) text = create_text(jagged_array) if title_counter > 0: list_of_links = create_links(jagged_array, title_counter) #The post_text must be after the creation of the links because create_links() changes the actual text functions.post_text(ref, text) if title_counter > 0: print 1 functions.post_link(list_of_links)
def en_tt_text_post(en_text): for key in en_text.keys(): version_dict = { 'versionTitle': 'Tur on the Torah, trans. Eliyahu Munk', 'versionSource': 'http://www.urimpublications.com/Merchant2/merchant.mv?Screen=PROD&Store_Code=UP&Product_Code=TUR&Category_Code=bde', 'language': 'en', 'text': en_text[key].array() } post_text('Tur HaAroch, {}'.format(key), version_dict)
def post(): parsed = parse() post_index(build_index(parsed['titles'])) version = { 'versionTitle': 'Sefer HaKana', 'versionSource': 'http://www.hebrew.grimoar.cz/anonym/sefer_ha-kana.htm', 'language': 'he', 'text': parsed['text'] } post_text('Sefer HaKana', version, index_count='on')
def post_text(parsed_data): for book in library.get_indexes_in_category('Torah'): version = { 'versionTitle': 'Siftei Hakhamim', 'versionSource': 'http://www.toratemetfreeware.com/', 'language': 'he', 'text': parsed_data[book] } functions.post_text('Siftei Hakhamim, {}'.format(book), version)
def post(book): title = 'Ibn Ezra on {}'.format(book) version = { 'title': title, 'versionTitle': 'Ibn Ezra on the Pentateuch; trans. by Jay F. Shachter', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001028367', 'language': 'en', 'text': parse(file_data[book]) } post_text(title, version)
def upload_text(parser): assert isinstance(parser, Malbim) book = parser.parsed_text version = { "versionTitle": "Malbim, Vilna Romm, 1892.", "versionSource": 'http://dlib.rsl.ru/viewer/01006563898#?page=1', "language": 'he', "text": book } functions.post_text("Malbim on Genesis", version, index_count='on')
def post_raavad_text(text_dict): # version for all the ja for ja in text_dict.keys(): version = { 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': text_dict[ja] } if ja != 'old_parsing_of_perush': post_text(ja, version)
def post_raavad_text(text_dict): # version for all the ja for ja in text_dict.keys(): version = { # 'versionTitle': 'Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': text_dict[ja] } if ja != 'old_parsing_of_perush': post_text(ja, version)
def post(): parsed = parse('targum.txt') for i in range(1, 3): functions.post_index(build_index(i)) version = { 'versionTitle': 'Wikisource Aramaic Targum to Chronicles', 'versionSource': url, 'language': 'he', 'text': parsed[i-1] } functions.post_text('Aramaic Targum to {} Chronicles'.format('I' * i), version) functions.post_link(build_links(parsed))
def upload_text(parser): assert isinstance(parser, Malbim) book = parser.parsed_text version = { "versionTitle": "Malbim on Genesis -- Wikisource", "versionSource": 'https://he.wikisource.org/wiki/%D7%9E%D7%9C%D7%91%D7%99%22%D7%9D_%D7%A2%D7%9C_%D7%91%D7%A8%D7%90%D7%A9%D7%99%D7%AA', "language": 'he', "text": book } functions.post_text("Malbim on Genesis", version, index_count='on')
def post_simple_commentaries(): ramban_node, rasag_node = JaggedArrayNode(), JaggedArrayNode() ramban_text = parse_general('yitzira_ramban.txt') rasag_text = parse_general('yitzira_rasag.txt') ramban_node.add_title("Ramban on Sefer Yetzirah", 'en', primary=True) ramban_node.add_title(u'רמב"ן על ספר יצירה', 'he', primary=True) ramban_node.key = "Ramban on Sefer Yetzirah" ramban_node.addressTypes = ['Integer', 'Integer', 'Integer'] ramban_node.sectionNames = ["Chapter", "Mishnah", "Comment"] ramban_node.toc_zoom = 2 ramban_node.depth = 3 ramban_node.validate() rasag_node.add_title("Rasag on Sefer Yetzirah", 'en', primary=True) rasag_node.add_title(u'רס"ג על ספר יצירה', 'he', primary=True) rasag_node.key = "Rasag on Sefer Yetzirah" rasag_node.addressTypes = ['Integer', 'Integer', 'Integer'] rasag_node.sectionNames = ["Chapter", "Mishnah", "Comment"] rasag_node.toc_zoom = 2 rasag_node.depth = 3 rasag_node.validate() ramban_index = { "title": "Ramban on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Ramban"], "language": "he", "schema": ramban_node.serialize() } post_index(ramban_index) post_text("Ramban on Sefer Yetzirah", { 'versionTitle': 'Ramban on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': ramban_text }) rasag_index = { "title": "Rasag on Sefer Yetzirah", "categories": ["Commentary2", "Kabbalah", "Rasag"], "language": "he", "schema": rasag_node.serialize() } post_index(rasag_index) post_text("Rasag on Sefer Yetzirah", { 'versionTitle': 'Rasage on Sefer Yetzirah, Warsaw 1884', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001310968', 'language': 'he', 'text': rasag_text }) links = linker(ramban_text, "Ramban on Sefer Yetzirah") links.extend(linker(rasag_text, "Rasag on Sefer Yetzirah")) post_link(links)
def run(self): for chapter, length in enumerate(self.chapter_lengths): self.get_text(chapter, length) print self.extra send_text = { "text": self.text, "language": "he", "versionTitle": "Kuzari in Judeo-Arabic", "versionSource": "http://www.cs.toronto.edu/~yuvalf/kuzari.html" } post_text("Kuzari", send_text, server="http://proto.sefaria.org")
def post(self): for index in self.base_indices: post_index(index, weak_network=True) for he_author in self.commentarySchemas.keys(): en_author = DCXMLsubs.commentatorNames[he_author] index = { 'title': en_author, 'categories': ['Commentary2', 'Masechtot Ketanot', en_author], 'schema': self.commentarySchemas[he_author].serialize() } post_index(index) for version in self.versionList: post_text(version['ref'], version['version'], index_count='on', weak_network=True) post_link(self.linkSet)
def post(): post_index(construct_index()) base_text = restructure_text() links = build_links(base_text) version = { 'versionTitle': u'Derech Chaim, Maharal', 'versionSource': u'http://mobile.tora.ws/', 'language': 'he', 'text': base_text } post_text("Derech Chaim", version) version['text'] = get_intro() post_text("Derech Chaim, Author's Introduction", version, index_count='on') post_link(links)
def post_text_and_index(text_struct, section_names): index = build_index(section_names) functions.post_index(index) for section_num, section in enumerate(section_names): new_text = { "versionTitle": 'Noda BeYehuda Warsaw 1880', "versionSource": 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001983501', "language": 'he', "text": text_struct[section_num] } functions.post_text('Noda BeYehuda, {}'.format(section), new_text)
def upload_text(parser): assert isinstance(parser, PeneiDavid) for book in parser.book_names: for parsha in parser.parsha_by_book[book]: print 'uploading {}'.format(parsha) version = { "versionTitle": "Torat Emet Penei David", "versionSource": url, "language": 'he', "text": parser.parsed_as_dict[book][parsha] } functions.post_text("Penei David, {}, {}".format(book, parsha), version)
def upload(text, text_name): """ Upload Mishnah tractate. :param text: Jagged array like object of the tractate. :param text_name: name of the text - to be used for url derivation for upload """ tractate = { "versionTitle": "Vilna Mishna", "versionSource": "http://www.daat.ac.il/encyclopedia/value.asp?id1=836", "language": "he", "text": text, } print 'uploading {}'.format(text_name) functions.post_text(text_name, tractate)
def post_hagahot(ja_hg): replace_dict = { u"@11\([\u05d0-\u05ea]{1,3}\)\s?@33": u"", u"@77": u"", u"@44": u"<br>", u"@55": u"<b>", u"@66": u"</b>", u"@00(.+?)\s(.+)": u"", u"@(?:99|01)(.*?)@": ur"<br><small>\1</small><br>" } ja_hg = before_post_cleaner(ja_hg, replace_dict) text_version = { 'versionTitle': 'Sefer Mitzvot Katan, Kopys, 1820', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001771677', 'language': 'he', 'text': ja_hg } schema = JaggedArrayNode() schema.add_title('Haggahot Chadashot on Sefer Mitzvot Katan', 'en', True) schema.add_title(u'הגהות חדשות על ספר מצוות קטן', 'he', True) schema.key = 'Haggahot Chadashot on Sefer Mitzvot Katan' schema.depth = 2 schema.addressTypes = ['Integer', 'Integer'] schema.sectionNames = ['Siman', 'Segment'] schema.validate() add_term('Haggahot Chadashot on Sefer Mitzvot Katan', u'הגהות חדשות על ספר מצוות קטן') index_dict = { 'title': 'Haggahot Chadashot on Sefer Mitzvot Katan', 'dependence': "Commentary", 'base_text_titles': [ "Sefer Mitzvot Katan", 'Haggahot Rabbeinu Peretz on Sefer Mitzvot Katan' ], "categories": ["Halakhah", "Commentary"], 'schema': schema.serialize(), # This line converts the schema into json 'collective_title': 'Haggahot Chadashot on Sefer Mitzvot Katan', } post_index(index_dict) post_text('Haggahot Chadashot on Sefer Mitzvot Katan', text_version)
def post_rashi(en_text, he_text, title, server): if "II" in title: title = "Rashi on II Kings" else: title = "Rashi on I Kings" for lang, text in [("en", en_text), ("he", he_text)]: for ch_num in text.keys(): text[ch_num] = convertDictToArray(text[ch_num]) text = convertDictToArray(text) send_text = { "text": text, "language": lang, "versionTitle": "Metsudah {} -- {}".format(title, lang), "versionSource": "http://www.sefaria.org" } post_text(title, send_text, server=server)
def post(): books = file_to_books() for book in library.get_indexes_in_category('Torah'): books[book] = align_text(books[book], u'@\u05e4\u05e8\u05e7 [\u05d0-\u05ea]{1,2}', u'[0-9]{1,2}\.') functions.post_index(build_index()) node_names = ['Introduction'] + library.get_indexes_in_category('Torah') for name in node_names: version = { 'versionTitle': 'Tafsir al-Torah bi-al-Arabiya, Paris, 1893', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001863864', 'language': 'he', 'text': books[name] } functions.post_text('Tafsir Rasag, {}'.format(name), version) functions.post_link(build_links(books))
def upload_text(full_text, upload=False): """ :param full_text: Data structure from parse_text() :param upload: set to True, otherwise function will do nothing """ if not upload: return # make JSON object of book for ref in full_text.keys(): book = { "versionTitle": "Chizkuni", "versionSource": "Chizkuni", "language": "he", "text": full_text[ref] } print ref post_text('Chizkuni,_{}'.format(ref), book)
def upload(): post_index(construct_index()) version = { 'versionTitle': 'Placeholder', 'versionSource': 'http://www.hebrew.grimoar.cz/azulaj/chesed_le-avraham.htm', 'language': 'he', 'text': parse_intro() } post_text('Chesed LeAvraham, Introduction', version) body = parse_body() for i, part in enumerate(body): version['text'] = part post_text('Chesed LeAvraham, Even Shetiya, Maayan {}'.format(i+1), version) version['text'] = parse_shokets() post_text('Chesed LeAvraham, Breichat Avraham', version, index_count='on')