def run_post_links():
    #we saved an array of links, still need to build them each into the correct obj
    with open("preprocess_json/links/Meshech_Hochma_links.json", 'r') as filep:
        links_arr = json.load(filep)
    print len(links_arr), " ", isinstance(links_arr, list)

    Helper.postLink(links_arr)
def run_post_links():
    #we saved an array of links, still need to build them each into the correct obj
    with open("preprocess_json/links/Meshech_Hochma_links.json", 'r') as filep:
        links_arr = json.load(filep)
    print len(links_arr), " ", isinstance(links_arr, list)

    Helper.postLink(links_arr)
Esempio n. 3
0
def post_links(book_name):
	dir_name = 'preprocess_json/links'
	#we saved an array of links, still need to build them each into the correct obj
	with open(dir_name + "/" + book_name + ".json", 'r') as filep:
		links_arr = json.load(filep)
	for link in links_arr:
		link_obj = {
			"type": "commentary",
			"refs": link,
			"anchorText": "",
		}
		Helper.postLink(link_obj)
def post_links(book_name):
    dir_name = 'preprocess_json/mishnahCommentary/links'
    links = []
    #we saved an array of links, still need to build them each into the correct obj
    with open(dir_name + "/" + book_name + ".json", 'r') as filep:
        links_arr = json.load(filep)
    for link in links_arr:
        link_obj = {
            "type": "commentary",
            "refs": link,
            "anchorText": "",
        }
        links.append(link_obj)
    Helper.postLink(links)
Esempio n. 5
0
    tiferet_shmuel.save_parsed_text(parsed)
    tiferet_shmuel.run_post_to_api()
    for k, perek in enumerate(parsed_text):
        for i, seif in enumerate(perek):
            for j, siman in enumerate(seif):
                #if re.match('\(.\)', siman):
                 if ur'(*)' in siman:
                    a = re.findall('\(. \)', siman)
                    for b in a:
                        print siman
                        count +=1
                        roash = "Rosh on %s." % masechet + str(k+2) + "." + str(i+1) + "." + str(j+1)
                        shmuel = "Tiferet Shmuel on " + masechet + "." + str(count)
                        shmuellinks.append(link(roash, shmuel))
                        print count
    Helper.postLink(shmuellinks)



if __name__ == '__main__':
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)):
       print "has Korban 1"
       Helper.createBookRecord(nosekelim.book_record(commentator="Korban Netanel"))
    if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
     #  print "has Pilpula 1" + masechet
       Helper.createBookRecord(nosekelim.book_record(commentator="Pilpula Charifta"))
    text = open_file()
    print masechet
    if test_depth(text) == True:
        print "true"
        parsed_text = parse(text)
Esempio n. 6
0
        "language": "he",
        "text": parsed_text,
        "digitizedBySefaria": True,
        "license": "Public Domain",
        "licenseVetted": True,
        "status": "locked",
    }
    Helper.mkdir_p("preprocess_json/")
    with open("preprocess_json/Rif_on_%s.json" % masechet, 'w') as out:
        json.dump(text_whole, out)


def run_post_to_api():
    Helper.createBookRecord(book_record())
    with open("preprocess_json/Rif_on_%s.json" % masechet, 'r') as filep:
        file_text = filep.read()
    Helper.postText("Rif %s" % masechet, file_text, False)


if __name__ == '__main__':
    #shas = get_shas()
    #Helper.createBookRecord(book_record())
    text = open_file()
    parsed_text = parse(text)
    links(parsed_text, shas)
    clean_text = clean(parsed_text)
    save_parsed_text(clean_text)
    run_post_to_api()
    #new_tzitutim = list(set(tzitutim))
    Helper.postLink(tzitutim)

def save_parsed_text(text):
    text_whole = {
        "title": 'Rosh on Taanit',
        "versionTitle": "Vilna, 1842",
        "versionSource": "???",
        "language": "he",
        "text": text,
    }
    #save
    Helper.mkdir_p("preprocess_json/")
    with open("preprocess_json/Rosh_on_Taanit.json", 'w') as out:
        json.dump(text_whole, out)


def run_post_to_api():
    Helper.createBookRecord(book_record())
    with open("preprocess_json/Rosh_on_Taanit.json", 'r') as filep:
        file_text = filep.read()
    Helper.postText("Rosh on Taanit", file_text, False)

if __name__ == '__main__':
    text = open_file()
    parsed_text = parse(text)
    upload_text = clean(parsed_text)
    Helper.createBookRecord(book_record())
    save_parsed_text(upload_text)
    run_post_to_api()
    Helper.postLink(links)
                        roash = "Rosh on {}".format(masechet) + ", " + part +  "." + str(k+1)
                        links.append(makeLink(talmud,roash))
                except Exception as e:
                    print e



if __name__ == '__main__':
    depth = lambda L: isinstance(L, list) and max(map(depth, L))+1
    Helper.createBookRecord(book_record())
    text = open_file()
    parsed = parse(text)
    link_parsed = list( parsed[i] for i in [0,2,3,4,6,7,8,9,10,11]) ## need to fix the numbering
    link_tiferet_shmuel(link_parsed)
    link_yomtov(parsed)
    names =names()
    print depth(parsed[6])
    for parse, name in zip(parsed,names):
        if depth(parse) == 2:
            print name
            search2(parse, name)
            cleantext = clean2(parse)
        elif depth(parse) ==1:
            print name
            search1(parse, name)
            cleantext = clean1(parse)
        save_text(cleantext, name)
        run_post_to_api(name)
    for link in links:
       Helper.postLink(link)
Esempio n. 9
0
        json.dump(text_whole, out)


def run_post_to_api():
    Helper.createBookRecord(build_index())
    with open("preprocess_json/Tur.json", 'r') as filep:
        file_text = filep.read()
    Helper.postText("New Tur, Orach Chaim", file_text, False)


if __name__ == '__main__':
    betyosef = bet_yosef.open_file()
    karo = bet_yosef.parse(betyosef)
    bet_yosef.save_parsed_text(karo)
    bet_yosef.book_record()
    try:
        bet_yosef.run_post_to_api()
    except BadStatusLine:
        print "got bad status"
    text = open_file()
    parsed = parse(text)
    #compare(text,karo)
    save_parsed_text(parsed)
    try:
        run_post_to_api()
    except BadStatusLine:
        print "got bad status"
    for link in links:
        Helper.postLink(link)
        pass
Esempio n. 10
0
def save_parsed_text(text):
    text_whole = {
        "title": 'Rosh on Taanit',
        "versionTitle": "Vilna, 1842",
        "versionSource": "???",
        "language": "he",
        "text": text,
    }
    #save
    Helper.mkdir_p("preprocess_json/")
    with open("preprocess_json/Rosh_on_Taanit.json", 'w') as out:
        json.dump(text_whole, out)


def run_post_to_api():
    Helper.createBookRecord(book_record())
    with open("preprocess_json/Rosh_on_Taanit.json", 'r') as filep:
        file_text = filep.read()
    Helper.postText("Rosh on Taanit", file_text, False)


if __name__ == '__main__':
    text = open_file()
    parsed_text = parse(text)
    upload_text = clean(parsed_text)
    Helper.createBookRecord(book_record())
    save_parsed_text(upload_text)
    run_post_to_api()
    Helper.postLink(links)
Esempio n. 11
0
        "versionTitle": "Vilna",
        "versionSource": "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001300957",
        "language": "he",
        "text": parsed_text,
        "digitizedBySefaria": True,
        "license": "Public Domain",
        "licenseVetted": True,
        "status": "locked",
    }
    Helper.mkdir_p("preprocess_json/")
    with open("preprocess_json/Rif_on_%s.json" % masechet, 'w') as out:
        json.dump(text_whole, out)


def run_post_to_api():
    Helper.createBookRecord(book_record())
    with open("preprocess_json/Rif_on_%s.json" % masechet, 'r') as filep:
        file_text = filep.read()
    Helper.postText("Rif on %s" % masechet, file_text, False)

if __name__ == '__main__':
    shas = get_shas()
    Helper.createBookRecord(book_record())
    text = open_file()
    parsed_text = parse(text)
    links(parsed_text, shas)
    clean_text = clean(parsed_text)
    save_parsed_text(clean_text)
    run_post_to_api()
    Helper.postLink(tzitutim)
Esempio n. 12
0
    for k, perek in enumerate(parsed_text):
        for i, seif in enumerate(perek):
            for j, siman in enumerate(seif):
                #if re.match('\(.\)', siman):
                 if ur'(*)' in siman:
                    #print ur"הגעתי לכאן!"
                    a = re.findall('\(\*\)', siman)
                    for b in a:
                        #print siman
                        count +=1
                        roash = "Rosh on %s." % masechet + str(k+1) + "." + str(i+1) + "." + str(j+1)
                        shmuel = "Tiferet Shmuel on " + masechet + "." + str(count)
                        shmuellinks.append(link(roash, shmuel))
                        #print count
                        #print roash, shmuel
    Helper.postLink(shmuellinks)


def maadaney_yom_tov(parsed_text):
    yomtovlinks = []
    count = 0
    file = tiferet_shmuel.open_file1()
    parsed = tiferet_shmuel.parse(file)
    Helper.createBookRecord(tiferet_shmuel.book_record(record = "yomtov"))
    tiferet_shmuel.save_parsed_text(parsed, record = "yomtov")
    tiferet_shmuel.run_post_to_api(record = "yomtov")
    for k, perek in enumerate(parsed_text):
        for i, seif in enumerate(perek):
            for j, siman in enumerate(seif):
                #print siman
                #if re.match('\(.\)', siman):
Esempio n. 13
0
def post_links():
    with open("preprocess_json/links/Mekhilta DeRashbi links.json",
              'r') as filep:
        links_arr = json.load(filep)
    Helper.postLink(links_arr)
Esempio n. 14
0
def parse(text):
    a = re.finditer(ur"@00(.*?)@11(.*?)@33", text, re.DOTALL)
    for din in a:
        if len(din.group(1).strip()) > 0:
            #print din.group(1).strip()
            pass
        if len(din.group(2).strip()) > 0:
            #print din.group(2).strip()
            pass
    cheleks = re.split(ur'(@11א @33)', text)
    partI = cheleks[0] + cheleks[1] + cheleks[2]
    partII = cheleks[3] + cheleks[4]
    cheleckI = re.split(ur"(@11שאלה א @33)", partI)
    keyI = cheleckI[0]
    terumathadeshenI = cheleckI[1] + cheleckI[2]
    cheleckII = re.split(ur"(@11סימן א @33)", partII)
    keyII = cheleckII[0]
    terumathadeshenII = cheleckII[1] + cheleckII[2]
    simanim = re.finditer(
        ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?",
        keyI)
    old_num = 0
    tdkeyone = []
    for siman in simanim:
        #print siman.group(1)
        roman = sefaria.utils.hebrew.heb_string_to_int(siman.group(1).strip())
        if roman - old_num != 1:
            for i in range(1, roman - old_num):
                tdkeyone.append("")
        old_num = roman
        siman_key = "<b>" + siman.group(1) + '</b>' + siman.group(2)
        tdkeyone.append(siman_key)
    save_parsed_text(tdkeyone, "Key part I")
    run_post_to_api("Key part I")
    tdone = []
    seifim = re.split(
        ur"@11(שאלה\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33",
        partI)
    for num, seif in zip(seifim[1::2], seifim[2::2]):
        sh = []
        #ans =  re.split(ur"@11\s?(תשובה\s?)?@33",seif)
        ans = re.split(ur"@11", seif)
        for eoq, answer in zip(ans[0::2], ans[1::2]):
            a = re.findall(ur"@00(.*?)\n", answer)
            answer = re.sub(ur"@00(.*?)\n", " ", answer)
            if len(a) > 0:
                for b in a:
                    #print b
                    pass
            sheela = '<b>' + num + '</b>' + eoq
            tuva = re.split(ur"@33", answer)
            if len(tuva) > 1:
                tshuva = '<b>' + tuva[0] + '</b>' + tuva[1]
            else:
                tshuva = tuva[0]
        sh.append(sheela)
        sh.append(tshuva)
        tdone.append(sh)
    #print len(tdone)
    save_parsed_text(tdone, "Part I")
    run_post_to_api("Part I")
    for i, k in enumerate(tdkeyone):
        Helper.postLink(addlink("Key part I", "Part I", i))
        pass
    simanimI = re.finditer(
        ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?",
        keyII)
    old_num = 0
    tdkeytwo = []
    for simanI in simanimI:
        #print simanI.group(1)
        romanI = sefaria.utils.hebrew.heb_string_to_int(
            simanI.group(1).strip())
        if romanI - old_num != 1:
            #print simanI.group(1)
            for i in range(1, romanI - old_num):
                tdkeytwo.append("")
        old_num = romanI
        simanI_key = "<b>" + simanI.group(1) + '</b>' + simanI.group(2)
        tdkeytwo.append(simanI_key)
    save_parsed_text(tdkeytwo, "Key part II")
    run_post_to_api("Key part II")
    tdtwo = []
    seifimI = re.split(
        ur"@11(סימן\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33",
        partII)
    for ansI, seifI in zip(seifimI[1::2], seifimI[2::2]):
        #print ansI.strip().split(" ")[1]
        teshuvaI = "<b>" + ansI + "</b>" + seifI
        teshuvaI = re.sub(ur"@00(.*?)\n", "", teshuvaI)
        tdtwo.append([teshuvaI])
    save_parsed_text(tdtwo, "Part II")
    run_post_to_api("Part II")
    for i, k in enumerate(tdkeytwo):
        Helper.postLink(addlink("Key part II", "Part II", i))
        pass
def parse(text):
    a =  re.finditer(ur"@00(.*?)@11(.*?)@33",text,re.DOTALL)
    for din in a:
        if len(din.group(1).strip()) > 0:
            #print din.group(1).strip()
            pass
        if len(din.group(2).strip()) > 0:
            #print din.group(2).strip()
            pass
    cheleks = re.split(ur'(@11א @33)',text)
    partI = cheleks[0] + cheleks[1] +cheleks[2]
    partII = cheleks[3] + cheleks[4]
    cheleckI = re.split(ur"(@11שאלה א @33)",partI)
    keyI = cheleckI[0]
    terumathadeshenI = cheleckI[1] + cheleckI[2]
    cheleckII =re.split(ur"(@11סימן א @33)",partII)
    keyII = cheleckII[0]
    terumathadeshenII = cheleckII[1] + cheleckII[2]
    simanim = re.finditer(ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?",keyI)
    old_num = 0
    tdkeyone =[]
    for siman in simanim:
        #print siman.group(1)
        roman= sefaria.utils.hebrew.heb_string_to_int(siman.group(1).strip())
        if roman-old_num !=1:
            for i in range(1,roman-old_num):
                tdkeyone.append("")
        old_num=roman
        siman_key = "<b>" + siman.group(1) + '</b>' + siman.group(2)
        tdkeyone.append(siman_key)
    save_parsed_text(tdkeyone, "Key part I")
    run_post_to_api("Key part I")
    tdone=[]
    seifim = re.split(ur"@11(שאלה\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33", partI )
    for num, seif in zip(seifim[1::2],seifim[2::2]):
        sh =[]
        #ans =  re.split(ur"@11\s?(תשובה\s?)?@33",seif)
        ans =  re.split(ur"@11",seif)
        for eoq, answer in zip(ans[0::2], ans[1::2]):
            a = re.findall(ur"@00(.*?)\n",answer)
            answer = re.sub(ur"@00(.*?)\n", " ", answer )
            if len(a) > 0:
                for b in a:
                    #print b
                    pass
            sheela = '<b>' + num + '</b>' + eoq
            tuva = re.split(ur"@33",answer)
            if len(tuva) > 1:
                tshuva = '<b>' + tuva[0] + '</b>' + tuva[1]
            else:
                tshuva = tuva[0]
        sh.append(sheela)
        sh.append(tshuva)
        tdone.append(sh)
    #print len(tdone)
    save_parsed_text(tdone, "Part I")
    run_post_to_api("Part I")
    for i,k in enumerate(tdkeyone):
        Helper.postLink(addlink("Key part I", "Part I",i))
        pass
    simanimI = re.finditer(ur"@11([u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33(.*)?",keyII)
    old_num = 0
    tdkeytwo =[]
    for simanI in simanimI:
        #print simanI.group(1)
        romanI= sefaria.utils.hebrew.heb_string_to_int(simanI.group(1).strip())
        if romanI-old_num !=1:
            #print simanI.group(1)
            for i in range(1,romanI-old_num):
                tdkeytwo.append("")
        old_num=romanI
        simanI_key = "<b>" + simanI.group(1) + '</b>' + simanI.group(2)
        tdkeytwo.append(simanI_key)
    save_parsed_text(tdkeytwo, "Key part II")
    run_post_to_api("Key part II")
    tdtwo=[]
    seifimI = re.split(ur"@11(סימן\s?[u'\u05d0-\u05ea'][u'\u05d0-\u05ea']?[u'\u05d0-\u05ea']?\s?)@33", partII )
    for ansI, seifI in zip(seifimI[1::2], seifimI[2::2]):
        #print ansI.strip().split(" ")[1]
        teshuvaI = "<b>" + ansI + "</b>" +seifI
        teshuvaI = re.sub(ur"@00(.*?)\n","",teshuvaI)
        tdtwo.append([teshuvaI])
    save_parsed_text(tdtwo, "Part II")
    run_post_to_api("Part II")
    for i,k in enumerate(tdkeytwo):
        Helper.postLink(addlink("Key part II", "Part II",i))
        pass
Esempio n. 16
0
            for j, siman in enumerate(seif):
                #if re.match('\(.\)', siman):
                if ur'(*)' in siman:
                    #print ur"הגעתי לכאן!"
                    a = re.findall('\(\*\)', siman)
                    for b in a:
                        #print siman
                        count += 1
                        roash = "Rosh on %s." % masechet + str(
                            k + 1) + "." + str(i + 1) + "." + str(j + 1)
                        shmuel = "Tiferet Shmuel on " + masechet + "." + str(
                            count)
                        shmuellinks.append(link(roash, shmuel))
                        #print count
                        #print roash, shmuel
    Helper.postLink(shmuellinks)


def maadaney_yom_tov(parsed_text):
    yomtovlinks = []
    count = 0
    file = tiferet_shmuel.open_file1()
    parsed = tiferet_shmuel.parse(file)
    Helper.createBookRecord(tiferet_shmuel.book_record(record="yomtov"))
    tiferet_shmuel.save_parsed_text(parsed, record="yomtov")
    tiferet_shmuel.run_post_to_api(record="yomtov")
    for k, perek in enumerate(parsed_text):
        for i, seif in enumerate(perek):
            for j, siman in enumerate(seif):
                #print siman
                #if re.match('\(.\)', siman):
Esempio n. 17
0
    tiferet_shmuel.save_parsed_text(parsed)
    tiferet_shmuel.run_post_to_api()
    for k, perek in enumerate(parsed_text):
        for i, seif in enumerate(perek):
            for j, siman in enumerate(seif):
                #if re.match('\(.\)', siman):
                 if ur'(*)' in siman:
                    a = re.findall('\(. \)', siman)
                    for b in a:
                        print siman
                        count +=1
                        roash = "Rosh on %s." % masechet + str(k+2) + "." + str(i+1) + "." + str(j+1)
                        shmuel = "Tiferet Shmuel on " + masechet + "." + str(count)
                        shmuellinks.append(link(roash, shmuel))
                        print count
    Helper.postLink(shmuellinks)



if __name__ == '__main__':
    if os.path.isfile('source/Korban_Netanel_on_{}.txt'.format(masechet)):
       print "has Korban 1"
       Helper.createBookRecord(nosekelim.book_record(commentator="Korban Netanel"))
    if os.path.isfile('source/Pilpula_Charifta_on_{}.txt'.format(masechet)):
     #  print "has Pilpula 1" + masechet
       Helper.createBookRecord(nosekelim.book_record(commentator="Pilpula Charifta"))
    text = open_file()
    print masechet
    if test_depth(text) == True:
        print "true"
        parsed_text = parse(text)
def post_links():
    with open("preprocess_json/links/Mekhilta DeRashbi links.json", 'r') as filep:
        links_arr = json.load(filep)
    Helper.postLink(links_arr)