book.append(appendix) book.validate() index = { "title": title, "categories": ["Kabbalah"], "schema": book.serialize() } post_index(index) if __name__ == "__main__": JA_array = [("Introduction", 2, False), ("PART I", 2, True), ("PART II", 2, True), ("PART III", 2, True), ("PART IV", 2, True), ("PART V", 2, True)] JA_array += [("PART VI", 2, True), ("PART VII", 1, False), ("Appendix The Introductory Material", 2, True)] post_info = {} post_info["versionTitle"] = "hi" post_info["versionSource"] = "hi" post_info["language"] = "en" allowed_tags = ["book", "intro", "part", "appendix", "chapter", "p", "ftnote", "title"] structural_tags = ["title"] #this is not all tags with structural significance, but just #the ones we must explicitly mention, because it has no children, #we want what comes after it until the next instance of it to be its children anyway allowed_attributes = ["id"] file_name = "../sources/DC labs/Robinson_MosesCordoveroIntroductionToKabbalah.xml" title = "Or Neerav" ramak = XML_to_JaggedArray(title, file_name, JA_array, allowed_tags, allowed_attributes, post_info, parse) create_schema() ramak.run()
if __name__ == "__main__": import csv en_parshiot = [] en_parshiot.append("Foreword") en_parshiot.append("Introduction") with open("../../../Sefaria-Project/data/tmp/parsha.csv") as parsha_file: parshiot = csv.reader(parsha_file) parshiot.next() order = 1 for row in parshiot: (en, he, ref) = row if en == "Lech-Lecha": en = "Lech Lecha" he = he.decode('utf-8') en_parshiot.append(en) post_info = {} post_info["versionTitle"] = "Midrash Tanhuma-Yelammedenu, trans. Samuel A. Berman" post_info["versionSource"] = "http://primo.nli.org.il/primo_library/libweb/action/dlDisplay.do?vid=NLI&docId=NNL_ALEPH001350458" post_info["language"] = "en" allowed_tags = ["book", "intro", "foreword", "part", "chapter", "p", "ftnote", "title"] allowed_attributes = ["id"] file_name = "Midrash-Tanhuma-Yelammedenu.xml" title = "Complex Midrash Tanchuma" tanhuma = XML_to_JaggedArray(title, file_name, allowed_tags, allowed_attributes, post_info, en_parshiot) grab_title_lambda = lambda x: x[0].tag == "title" reorder_lambda = lambda x: False tanhuma.set_funcs(parse, grab_title_lambda, reorder_lambda) tanhuma.run()
"title": title, "categories": ["Kabbalah"], "schema": book.serialize() } post_index(index) if __name__ == "__main__": post_info = {} post_info[ "versionTitle"] = "Moses Cordovero’s Introduction to Kabbalah, Annotated trans. of Or ne'erav, Ira Robinson, 1994." post_info[ "versionSource"] = "http://www.ktav.com/index.php/moses-cordovero-s-introduction-to-kabbalah.html" post_info["language"] = "en" allowed_tags = [ "book", "intro", "part", "appendix", "chapter", "p", "ftnote", "title" ] structural_tags = [ "title" ] #this is not all tags with structural significance, but just #the ones we must explicitly mention, because it has no children, #we want what comes after it until the next instance of it to be its children anyway allowed_attributes = ["id"] file_name = "english_or_neerav.xml" title = "Or Neerav" ramak = XML_to_JaggedArray(title, file_name, allowed_tags, allowed_attributes, post_info, parse) create_schema() ramak.run()