def run_post_links(): #we saved an array of links, still need to build them each into the correct obj with open("preprocess_json/links/Meshech_Hochma_links.json", 'r') as filep: links_arr = json.load(filep) print len(links_arr), " ", isinstance(links_arr, list) Helper.postLink(links_arr)
def post_links(book_name): dir_name = 'preprocess_json/links' #we saved an array of links, still need to build them each into the correct obj with open(dir_name + "/" + book_name + ".json", 'r') as filep: links_arr = json.load(filep) for link in links_arr: link_obj = { "type": "commentary", "refs": link, "anchorText": "", } Helper.postLink(link_obj)
def post_links(book_name): dir_name = 'preprocess_json/mishnahCommentary/links' links = [] #we saved an array of links, still need to build them each into the correct obj with open(dir_name + "/" + book_name + ".json", 'r') as filep: links_arr = json.load(filep) for link in links_arr: link_obj = { "type": "commentary", "refs": link, "anchorText": "", } links.append(link_obj) Helper.postLink(links)
tiferet_shmuel.save_parsed_text(parsed) tiferet_shmuel.run_post_to_api() for k, perek in enumerate(parsed_text): for i, seif in enumerate(perek): for j, siman in enumerate(seif): #if re.match('\(.\)', siman): if ur'(*)' in siman: a = re.findall('\(. \)', siman) for b in a: print siman count +=1 roash = "Rosh on %s." % masechet + str(k+2) + "." + str(i+1) + "." + str(j+1) shmuel = "Tiferet Shmuel on " + masechet + "." + str(count) shmuellinks.append(link(roash, shmuel)) print count Helper.postLink(shmuellinks) if __name__ == '__main__': if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)): print "has Korban 1" Helper.createBookRecord(nosekelim.book_record(commentator="Korban Netanel")) if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)): # print "has Pilpula 1" + masechet Helper.createBookRecord(nosekelim.book_record(commentator="Pilpula Charifta")) text = open_file() print masechet if test_depth(text) == True: print "true" parsed_text = parse(text)
"language": "he", "text": parsed_text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Rif_on_%s.json" % masechet, 'w') as out: json.dump(text_whole, out) def run_post_to_api(): Helper.createBookRecord(book_record()) with open("preprocess_json/Rif_on_%s.json" % masechet, 'r') as filep: file_text = filep.read() Helper.postText("Rif %s" % masechet, file_text, False) if __name__ == '__main__': #shas = get_shas() #Helper.createBookRecord(book_record()) text = open_file() parsed_text = parse(text) links(parsed_text, shas) clean_text = clean(parsed_text) save_parsed_text(clean_text) run_post_to_api() #new_tzitutim = list(set(tzitutim)) Helper.postLink(tzitutim)
def save_parsed_text(text): text_whole = { "title": 'Rosh on Taanit', "versionTitle": "Vilna, 1842", "versionSource": "???", "language": "he", "text": text, } #save Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Rosh_on_Taanit.json", 'w') as out: json.dump(text_whole, out) def run_post_to_api(): Helper.createBookRecord(book_record()) with open("preprocess_json/Rosh_on_Taanit.json", 'r') as filep: file_text = filep.read() Helper.postText("Rosh on Taanit", file_text, False) if __name__ == '__main__': text = open_file() parsed_text = parse(text) upload_text = clean(parsed_text) Helper.createBookRecord(book_record()) save_parsed_text(upload_text) run_post_to_api() Helper.postLink(links)
roash = "Rosh on {}".format(masechet) + ", " + part + "." + str(k+1) links.append(makeLink(talmud,roash)) except Exception as e: print e if __name__ == '__main__': depth = lambda L: isinstance(L, list) and max(map(depth, L))+1 Helper.createBookRecord(book_record()) text = open_file() parsed = parse(text) link_parsed = list( parsed[i] for i in [0,2,3,4,6,7,8,9,10,11]) ## need to fix the numbering link_tiferet_shmuel(link_parsed) link_yomtov(parsed) names =names() print depth(parsed[6]) for parse, name in zip(parsed,names): if depth(parse) == 2: print name search2(parse, name) cleantext = clean2(parse) elif depth(parse) ==1: print name search1(parse, name) cleantext = clean1(parse) save_text(cleantext, name) run_post_to_api(name) for link in links: Helper.postLink(link)
json.dump(text_whole, out) def run_post_to_api(): Helper.createBookRecord(build_index()) with open("preprocess_json/Tur.json", 'r') as filep: file_text = filep.read() Helper.postText("New Tur, Orach Chaim", file_text, False) if __name__ == '__main__': betyosef = bet_yosef.open_file() karo = bet_yosef.parse(betyosef) bet_yosef.save_parsed_text(karo) bet_yosef.book_record() try: bet_yosef.run_post_to_api() except BadStatusLine: print "got bad status" text = open_file() parsed = parse(text) #compare(text,karo) save_parsed_text(parsed) try: run_post_to_api() except BadStatusLine: print "got bad status" for link in links: Helper.postLink(link) pass
"versionTitle": "Vilna", "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957", "language": "he", "text": parsed_text, "digitizedBySefaria": True, "license": "Public Domain", "licenseVetted": True, "status": "locked", } Helper.mkdir_p("preprocess_json/") with open("preprocess_json/Rif_on_%s.json" % masechet, 'w') as out: json.dump(text_whole, out) def run_post_to_api(): Helper.createBookRecord(book_record()) with open("preprocess_json/Rif_on_%s.json" % masechet, 'r') as filep: file_text = filep.read() Helper.postText("Rif on %s" % masechet, file_text, False) if __name__ == '__main__': shas = get_shas() Helper.createBookRecord(book_record()) text = open_file() parsed_text = parse(text) links(parsed_text, shas) clean_text = clean(parsed_text) save_parsed_text(clean_text) run_post_to_api() Helper.postLink(tzitutim)
for k, perek in enumerate(parsed_text): for i, seif in enumerate(perek): for j, siman in enumerate(seif): #if re.match('\(.\)', siman): if ur'(*)' in siman: #print ur"הגעתי לכאן!" a = re.findall('\(\*\)', siman) for b in a: #print siman count +=1 roash = "Rosh on %s." % masechet + str(k+1) + "." + str(i+1) + "." + str(j+1) shmuel = "Tiferet Shmuel on " + masechet + "." + str(count) shmuellinks.append(link(roash, shmuel)) #print count #print roash, shmuel Helper.postLink(shmuellinks) def maadaney_yom_tov(parsed_text): yomtovlinks = [] count = 0 file = tiferet_shmuel.open_file1() parsed = tiferet_shmuel.parse(file) Helper.createBookRecord(tiferet_shmuel.book_record(record = "yomtov")) tiferet_shmuel.save_parsed_text(parsed, record = "yomtov") tiferet_shmuel.run_post_to_api(record = "yomtov") for k, perek in enumerate(parsed_text): for i, seif in enumerate(perek): for j, siman in enumerate(seif): #print siman #if re.match('\(.\)', siman):
def post_links(): with open("preprocess_json/links/Mekhilta DeRashbi links.json", 'r') as filep: links_arr = json.load(filep) Helper.postLink(links_arr)
def parse(text): a = re.finditer(ur"@00(.*?)@11(.*?)@33", text, re.DOTALL) for din in a: if len(din.group(1).strip()) > 0: #print din.group(1).strip() pass if len(din.group(2).strip()) > 0: #print din.group(2).strip() pass cheleks = re.split(ur'(@11א @33)', text) partI = cheleks[0] + cheleks[1] + cheleks[2] partII = cheleks[3] + cheleks[4] cheleckI = re.split(ur"(@11שאלה א @33)", partI) keyI = cheleckI[0] terumathadeshenI = cheleckI[1] + cheleckI[2] cheleckII = re.split(ur"(@11סימן א @33)", partII) keyII = cheleckII[0] terumathadeshenII = cheleckII[1] + cheleckII[2] simanim = re.finditer( ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?", keyI) old_num = 0 tdkeyone = [] for siman in simanim: #print siman.group(1) roman = sefaria.utils.hebrew.heb_string_to_int(siman.group(1).strip()) if roman - old_num != 1: for i in range(1, roman - old_num): tdkeyone.append("") old_num = roman siman_key = "<b>" + siman.group(1) + '</b>' + siman.group(2) tdkeyone.append(siman_key) save_parsed_text(tdkeyone, "Key part I") run_post_to_api("Key part I") tdone = [] seifim = re.split( ur"@11(שאלה\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33", partI) for num, seif in zip(seifim[1::2], seifim[2::2]): sh = [] #ans = re.split(ur"@11\s?(תשובה\s?)?@33",seif) ans = re.split(ur"@11", seif) for eoq, answer in zip(ans[0::2], ans[1::2]): a = re.findall(ur"@00(.*?)\n", answer) answer = re.sub(ur"@00(.*?)\n", " ", answer) if len(a) > 0: for b in a: #print b pass sheela = '<b>' + num + '</b>' + eoq tuva = re.split(ur"@33", answer) if len(tuva) > 1: tshuva = '<b>' + tuva[0] + '</b>' + tuva[1] else: tshuva = tuva[0] sh.append(sheela) sh.append(tshuva) tdone.append(sh) #print len(tdone) save_parsed_text(tdone, "Part I") run_post_to_api("Part I") for i, k in enumerate(tdkeyone): Helper.postLink(addlink("Key part I", "Part I", i)) pass simanimI = re.finditer( ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?", keyII) old_num = 0 tdkeytwo = [] for simanI in simanimI: #print simanI.group(1) romanI = sefaria.utils.hebrew.heb_string_to_int( simanI.group(1).strip()) if romanI - old_num != 1: #print simanI.group(1) for i in range(1, romanI - old_num): tdkeytwo.append("") old_num = romanI simanI_key = "<b>" + simanI.group(1) + '</b>' + simanI.group(2) tdkeytwo.append(simanI_key) save_parsed_text(tdkeytwo, "Key part II") run_post_to_api("Key part II") tdtwo = [] seifimI = re.split( ur"@11(סימן\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33", partII) for ansI, seifI in zip(seifimI[1::2], seifimI[2::2]): #print ansI.strip().split(" ")[1] teshuvaI = "<b>" + ansI + "</b>" + seifI teshuvaI = re.sub(ur"@00(.*?)\n", "", teshuvaI) tdtwo.append([teshuvaI]) save_parsed_text(tdtwo, "Part II") run_post_to_api("Part II") for i, k in enumerate(tdkeytwo): Helper.postLink(addlink("Key part II", "Part II", i)) pass
def parse(text): a = re.finditer(ur"@00(.*?)@11(.*?)@33",text,re.DOTALL) for din in a: if len(din.group(1).strip()) > 0: #print din.group(1).strip() pass if len(din.group(2).strip()) > 0: #print din.group(2).strip() pass cheleks = re.split(ur'(@11א @33)',text) partI = cheleks[0] + cheleks[1] +cheleks[2] partII = cheleks[3] + cheleks[4] cheleckI = re.split(ur"(@11שאלה א @33)",partI) keyI = cheleckI[0] terumathadeshenI = cheleckI[1] + cheleckI[2] cheleckII =re.split(ur"(@11סימן א @33)",partII) keyII = cheleckII[0] terumathadeshenII = cheleckII[1] + cheleckII[2] simanim = re.finditer(ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?",keyI) old_num = 0 tdkeyone =[] for siman in simanim: #print siman.group(1) roman= sefaria.utils.hebrew.heb_string_to_int(siman.group(1).strip()) if roman-old_num !=1: for i in range(1,roman-old_num): tdkeyone.append("") old_num=roman siman_key = "<b>" + siman.group(1) + '</b>' + siman.group(2) tdkeyone.append(siman_key) save_parsed_text(tdkeyone, "Key part I") run_post_to_api("Key part I") tdone=[] seifim = re.split(ur"@11(שאלה\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33", partI ) for num, seif in zip(seifim[1::2],seifim[2::2]): sh =[] #ans = re.split(ur"@11\s?(תשובה\s?)?@33",seif) ans = re.split(ur"@11",seif) for eoq, answer in zip(ans[0::2], ans[1::2]): a = re.findall(ur"@00(.*?)\n",answer) answer = re.sub(ur"@00(.*?)\n", " ", answer ) if len(a) > 0: for b in a: #print b pass sheela = '<b>' + num + '</b>' + eoq tuva = re.split(ur"@33",answer) if len(tuva) > 1: tshuva = '<b>' + tuva[0] + '</b>' + tuva[1] else: tshuva = tuva[0] sh.append(sheela) sh.append(tshuva) tdone.append(sh) #print len(tdone) save_parsed_text(tdone, "Part I") run_post_to_api("Part I") for i,k in enumerate(tdkeyone): Helper.postLink(addlink("Key part I", "Part I",i)) pass simanimI = re.finditer(ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?",keyII) old_num = 0 tdkeytwo =[] for simanI in simanimI: #print simanI.group(1) romanI= sefaria.utils.hebrew.heb_string_to_int(simanI.group(1).strip()) if romanI-old_num !=1: #print simanI.group(1) for i in range(1,romanI-old_num): tdkeytwo.append("") old_num=romanI simanI_key = "<b>" + simanI.group(1) + '</b>' + simanI.group(2) tdkeytwo.append(simanI_key) save_parsed_text(tdkeytwo, "Key part II") run_post_to_api("Key part II") tdtwo=[] seifimI = re.split(ur"@11(סימן\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33", partII ) for ansI, seifI in zip(seifimI[1::2], seifimI[2::2]): #print ansI.strip().split(" ")[1] teshuvaI = "<b>" + ansI + "</b>" +seifI teshuvaI = re.sub(ur"@00(.*?)\n","",teshuvaI) tdtwo.append([teshuvaI]) save_parsed_text(tdtwo, "Part II") run_post_to_api("Part II") for i,k in enumerate(tdkeytwo): Helper.postLink(addlink("Key part II", "Part II",i)) pass
for j, siman in enumerate(seif): #if re.match('\(.\)', siman): if ur'(*)' in siman: #print ur"הגעתי לכאן!" a = re.findall('\(\*\)', siman) for b in a: #print siman count += 1 roash = "Rosh on %s." % masechet + str( k + 1) + "." + str(i + 1) + "." + str(j + 1) shmuel = "Tiferet Shmuel on " + masechet + "." + str( count) shmuellinks.append(link(roash, shmuel)) #print count #print roash, shmuel Helper.postLink(shmuellinks) def maadaney_yom_tov(parsed_text): yomtovlinks = [] count = 0 file = tiferet_shmuel.open_file1() parsed = tiferet_shmuel.parse(file) Helper.createBookRecord(tiferet_shmuel.book_record(record="yomtov")) tiferet_shmuel.save_parsed_text(parsed, record="yomtov") tiferet_shmuel.run_post_to_api(record="yomtov") for k, perek in enumerate(parsed_text): for i, seif in enumerate(perek): for j, siman in enumerate(seif): #print siman #if re.match('\(.\)', siman):