Ejemplo n.º 1
0
    book.append(appendix)

    book.validate()
    index = {
    "title": title,
    "categories": ["Kabbalah"],
    "schema": book.serialize()
    }
    post_index(index)

if __name__ == "__main__":
    JA_array = [("Introduction", 2, False), ("PART I", 2, True), ("PART II", 2, True), ("PART III", 2, True), ("PART IV", 2, True), ("PART V", 2, True)]
    JA_array += [("PART VI", 2, True), ("PART VII", 1, False), ("Appendix The Introductory Material", 2, True)]
    post_info = {}
    post_info["versionTitle"] = "hi"
    post_info["versionSource"] = "hi"
    post_info["language"] = "en"
    allowed_tags = ["book", "intro", "part", "appendix", "chapter", "p", "ftnote", "title"]
    structural_tags = ["title"] #this is not all tags with structural significance, but just
                                #the ones we must explicitly mention, because it has no children,
                                #we want what comes after it until the next instance of it to be its children anyway
    allowed_attributes = ["id"]
    file_name = "../sources/DC labs/Robinson_MosesCordoveroIntroductionToKabbalah.xml"
    title = "Or Neerav"
    ramak = XML_to_JaggedArray(title, file_name, JA_array, allowed_tags, allowed_attributes, post_info, parse)


    create_schema()
    ramak.run()
Ejemplo n.º 2
0
if __name__ == "__main__":
    import csv
    en_parshiot = []
    en_parshiot.append("Foreword")
    en_parshiot.append("Introduction")
    with open("../../../Sefaria-Project/data/tmp/parsha.csv") as parsha_file:
        parshiot = csv.reader(parsha_file)
        parshiot.next()
        order = 1
        for row in parshiot:
            (en, he, ref) = row
            if en == "Lech-Lecha":
                en = "Lech Lecha"
            he = he.decode('utf-8')
            en_parshiot.append(en)
    post_info = {}
    post_info["versionTitle"] = "Midrash Tanhuma-Yelammedenu, trans. Samuel A. Berman"
    post_info["versionSource"] = "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001350458"
    post_info["language"] = "en"
    allowed_tags = ["book", "intro", "foreword", "part", "chapter", "p", "ftnote", "title"]
    allowed_attributes = ["id"]
    file_name = "Midrash-Tanhuma-Yelammedenu.xml"
    title = "Complex Midrash Tanchuma"
    tanhuma = XML_to_JaggedArray(title, file_name, allowed_tags, allowed_attributes, post_info, en_parshiot)
    grab_title_lambda = lambda x: x[0].tag == "title"
    reorder_lambda = lambda x: False
    tanhuma.set_funcs(parse, grab_title_lambda, reorder_lambda)


    tanhuma.run()
Ejemplo n.º 3
0
        "title": title,
        "categories": ["Kabbalah"],
        "schema": book.serialize()
    }
    post_index(index)


if __name__ == "__main__":
    post_info = {}
    post_info[
        "versionTitle"] = "Moses Cordovero’s Introduction to Kabbalah, Annotated trans. of Or ne'erav, Ira Robinson, 1994."
    post_info[
        "versionSource"] = "http://www.ktav.com/index.php/moses-cordovero-s-introduction-to-kabbalah.html"
    post_info["language"] = "en"
    allowed_tags = [
        "book", "intro", "part", "appendix", "chapter", "p", "ftnote", "title"
    ]
    structural_tags = [
        "title"
    ]  #this is not all tags with structural significance, but just
    #the ones we must explicitly mention, because it has no children,
    #we want what comes after it until the next instance of it to be its children anyway
    allowed_attributes = ["id"]
    file_name = "english_or_neerav.xml"
    title = "Or Neerav"
    ramak = XML_to_JaggedArray(title, file_name, allowed_tags,
                               allowed_attributes, post_info, parse)

    create_schema()
    ramak.run()