def post_the_text(ja): testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, ja, ['Perek', 'Mishna', 'Comment']) testing_file.close() ref = create_ref() text = create_text(ja) functions.post_text(ref, text)
def post(): minchat = {'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename)} sefer = {'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text} chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>') with codecs.open('links.txt', 'w', 'utf-8') as outfile: for each_link in chinukh_links: outfile.write(u'{}\n'.format(each_link['refs'])) alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv') cleaned = util.clean_jagged_array(minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r']) with codecs.open('parsed.txt', 'w', 'utf-8') as outfile: util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph']) full_text = { 'versionTitle': 'Minchat Chinuch, Piotrków, 1902', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092', 'language': 'he', 'text': cleaned } index = construct_index(alt) functions.post_index(index) functions.post_text('Minchat Chinuch', full_text) functions.post_link(chinukh_links)
def post_the_text(ja): testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, ja, ['Perek', 'Mishna','Comment']) testing_file.close() ref = create_ref() text = create_text(ja) functions.post_text(ref, text)
def post(): minchat = { 'name': 'Minchat Chinuch', 'text': produce_parsed_data(filename) } sefer = { 'name': 'Sefer HaChinukh', 'text': Ref('Sefer HaChinukh').text('he').text } chinukh_links = find_links(minchat, sefer, grab_dh, u'<b>', u'</b>') with codecs.open('links.txt', 'w', 'utf-8') as outfile: for each_link in chinukh_links: outfile.write(u'{}\n'.format(each_link['refs'])) alt = construct_alt_struct('Chinukh_by_Parsha.csv', 'Chinukh Mitzva names.csv') cleaned = util.clean_jagged_array( minchat['text'], [m_pattern, comment_pattern, u'@[0-9]{2}', u'\n', u'\r']) with codecs.open('parsed.txt', 'w', 'utf-8') as outfile: util.jagged_array_to_file(outfile, cleaned, [u'Mitzva', u'Seif', u'Paragraph']) full_text = { 'versionTitle': 'Minchat Chinuch, Piotrków, 1902', 'versionSource': 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001175092', 'language': 'he', 'text': cleaned } index = construct_index(alt) functions.post_index(index) functions.post_text('Minchat Chinuch', full_text) functions.post_link(chinukh_links)
# -*- coding: utf-8 -*- import codecs import regex from sefaria.model import * from sources import functions from data_utilities import util from sources.Targum_Jerusalem_English import tje_functions english_book_names = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy'] all_five_books = tje_functions.parse_targum_jerusalem_english() for book, book_name in zip(all_five_books, english_book_names): print(book_name) ref = 'Targum Jerusalem,_{}'.format(book_name) text = tje_functions.create_text(book) functions.post_text(ref, text) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, all_five_books, ['Book', 'Chapter', 'Verse']) testing_file.close()
return index def post_text_and_index(text_struct, section_names): index = build_index(section_names) functions.post_index(index) for section_num, section in enumerate(section_names): new_text = { "versionTitle": 'Noda BeYehuda Warsaw 1880', "versionSource": 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001983501', "language": 'he', "text": text_struct[section_num] } functions.post_text('Noda BeYehuda, {}'.format(section), new_text) patterns = [u'@00', u'@22'] names = [u'חלק', u'סימן', u'טקסט'] section_names = ['Orach Chaim', 'Yoreh Deah', 'Even HaEzer', 'Choshen Mishpat'] parsed = util.file_to_ja([[[]]], noda_file, patterns, clean_and_align) with codecs.open('testfile.txt', 'w', 'utf-8') as check_parse: util.jagged_array_to_file(check_parse, parsed.array(), names) post_text_and_index(parsed.array(), section_names) noda_file.close() os.remove('errors.html')
import codecs import regex from sefaria.model import * from sources import functions from data_utilities import util from sources.GRA_on_pirkei_avot import gra_functions """ index record parse text text record link clean """ index = gra_functions.create_index() functions.post_index(index) gra_on_pirkei_avot = gra_functions.parse() ref = 'Gra on Pirkei Avot' text = gra_functions.create_text(gra_on_pirkei_avot) functions.post_text(ref, text) list_of_links = gra_functions.create_links(gra_on_pirkei_avot) functions.post_link(list_of_links) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, gra_on_pirkei_avot, ['Perek', 'Mishna', 'Comment']) testing_file.close()
# -*- coding: utf-8 -*- import codecs import regex from sefaria.model import * from sources import functions from data_utilities import util from sources.Targum_Jerusalem_English import tje_functions english_book_names = [ 'Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy' ] all_five_books = tje_functions.parse_targum_jerusalem_english() for book, book_name in zip(all_five_books, english_book_names): print(book_name) ref = 'Targum Jerusalem,_{}'.format(book_name) text = tje_functions.create_text(book) functions.post_text(ref, text) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, all_five_books, ['Book', 'Chapter', 'Verse']) testing_file.close()
# -*- coding: utf-8 -*- import codecs from sefaria.model import * import regex from sources import functions from data_utilities import util from sources.Rif_on_Nedarim import rif_nedarim_functions """ index record parse text text record link clean """ index = rif_nedarim_functions.create_index() functions.post_index(index) rif_nedarim = rif_nedarim_functions.parse() ref = 'Rif_Nedarim' text = rif_nedarim_functions.create_text(rif_nedarim) functions.post_text(ref, text) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, rif_nedarim, ['Daf', 'Line']) testing_file.close() util.ja_to_xml(rif_nedarim, ['Daf', 'Line'])
from data_utilities import util from sources.Eben_Ezra_on_Eicha import eee_functions """ index record parse text text record link clean """ index = eee_functions.create_index() functions.post_index(index) eben_ezra = eee_functions.parse() for index, each_text in enumerate(eben_ezra): ref = 'Eben Ezra on Lamentations' if index == 0: ref = 'Eben Ezra on Lamentations,_Introduction' text = eee_functions.create_text(each_text) functions.post_text(ref, text) list_of_links = eee_functions.create_links(eben_ezra[1]) functions.post_link(list_of_links) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, eben_ezra, ["AHHHHH", 'PEREK', 'MISHNA', 'COMMENT']) testing_file.close()
def post_text_and_index(text_struct, section_names): index = build_index(section_names) functions.post_index(index) for section_num, section in enumerate(section_names): new_text = { "versionTitle": 'Noda BeYehuda Warsaw 1880', "versionSource": 'http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001983501', "language": 'he', "text": text_struct[section_num] } functions.post_text('Noda BeYehuda, {}'.format(section), new_text) patterns = [u'@00', u'@22'] names = [u'חלק', u'סימן', u'טקסט'] section_names = ['Orach Chaim', 'Yoreh Deah', 'Even HaEzer', 'Choshen Mishpat'] parsed = util.file_to_ja([[[]]], noda_file, patterns, clean_and_align) with codecs.open('testfile.txt', 'w', 'utf-8') as check_parse: util.jagged_array_to_file(check_parse, parsed.array(), names) post_text_and_index(parsed.array(), section_names) noda_file.close() os.remove('errors.html')
# -*- coding: utf-8 -*- import codecs from sefaria.model import * import regex from sources import functions from data_utilities import util from sources.Rif_on_Megillah import rif_megillah_functions """ index record parse text text record link clean """ index = rif_megillah_functions.create_index() functions.post_index(index) rif_megillah = rif_megillah_functions.parse() ref = 'Rif_Megillah' text = rif_megillah_functions.create_text(rif_megillah) functions.post_text(ref, text) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, rif_megillah, ['Daf', 'Line']) testing_file.close()
# -*- coding: utf-8 -*- import codecs import regex from sefaria.model import * from sources import functions from data_utilities import util from sources.Targum_Isaiah_English import ti_functions targum_isaiah = ti_functions.parse_targum_isaiah_english() # ref = 'Targum Isaiah' # text = ti_functions.create_text(targum_isaiah) # functions.post_text(ref, text) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, targum_isaiah, ['Chapter', 'Verse']) testing_file.close()
from sources import functions from data_utilities import util from sources.Lev_Sameach import ls_functions """ index record parse text text record link clean """ index = ls_functions.create_index() functions.post_index(index) lev_sameach = ls_functions.parse() a = ['Shorashim', 'Positive_Commandments', 'Negative_Commandments'] for index, each_depth_two in enumerate(lev_sameach): ref = 'Lev Sameach,_{}'.format(a[index]) text = ls_functions.create_text(each_depth_two) functions.post_text(ref, text) list_of_links = ls_functions.create_links(lev_sameach[0]) functions.post_link(list_of_links) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, lev_sameach, ['DEPTH ONE', 'DEPTH TWO', 'DEPTH THREE']) testing_file.close()
# -*- coding: utf-8 -*- import codecs import regex from sefaria.model import * from sources import functions from data_utilities import util from sources.Targum_Jerusalem_Hebrew import tjh_functions english_names = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy'] index = tjh_functions.create_index_record() functions.post_index(index) all_of_humash = tjh_functions.parse() for book, book_name in zip(all_of_humash, english_names): ref = 'Targum Jerusalem, {}'.format(book_name) text = tjh_functions.create_text(book) functions.post_text(ref, text) list_of_links = tjh_functions.create_links(all_of_humash) functions.post_link(list_of_links) testing_file = codecs.open("testing_file.txt", 'w', 'utf-8') util.jagged_array_to_file(testing_file, all_of_humash, ['Book', 'Chapter', 'Verse']) testing_file.close()