def flatten_toc(toc, include_categories=False, categories_in_titles=False, version_granularity=False): """ Returns an array of strings which corresponds to each category and text in the Table of Contents in order. - categorie_in_titles: whether to include each category preceding a text title, e.g., "Tanach > Torah > Genesis". - version_granularity: whether to include a seperate entry for every text version. """ results = [] for x in toc: name = x.get("category", None) or x.get("title", None) if "category" in x: if include_categories: results += [name] subcats = flatten_toc(x["contents"], categories_in_titles=categories_in_titles) if categories_in_titles: subcats = ["%s > %s" %(name, y) for y in subcats] results += subcats elif "title" in x: if not version_granularity: results += [name] else: versions = texts.get_version_list(name) for v in versions: lang = {"he": "Hebrew", "en": "English"}[v["language"]] results += ["%s > %s > %s.json" % (name, lang, v["versionTitle"])] return results
def index_text(tref, version=None, lang=None): """ Index the text designated by ref. If no version and lang are given, this functon will be called for each availble version. Currently assumes ref is at section level. """ #tref = texts.norm_ref(unicode(tref)) #todo: why the unicode()? tref = model.Ref(tref).normal() # Recall this function for each specific text version, if non provided if not (version and lang): for v in texts.get_version_list(tref): index_text(tref, version=v["versionTitle"], lang=v["language"]) return # Index each segment of this document individually oref = model.Ref(tref).padded_ref() if len(oref.sections) < len(oref.index.sectionNames): text = texts.get_text(tref, context=0, commentary=False, version=version, lang=lang) if "error" in text: print text["error"] else: for i in range(max(len(text["text"]), len(text["he"]))): index_text("%s:%d" % (tref, i + 1)) # Don't try to index docs with depth 3 if len(oref.sections) < len(oref.index.sectionNames) - 1: return # Index this document as a whole doc = make_text_index_document(tref, version, lang) if doc: try: global doc_count if doc_count % 5000 == 0: print "[%d] Indexing %s / %s / %s" % (doc_count, tref, version, lang) es.index('sefaria', 'text', doc, make_text_doc_id(tref, version, lang)) doc_count += 1 except Exception, e: print "ERROR indexing %s / %s / %s" % (tref, version, lang) pprint(e)
def index_text(ref, version=None, lang=None): """ Index the text designated by ref. If no version and lang are given, this functon will be called for each availble version. Currently assumes ref is at section level. """ ref = texts.norm_ref(unicode(ref)) # Recall this function for each specific text version, if non provided if not (version and lang): for v in texts.get_version_list(ref): index_text(ref, version=v["versionTitle"], lang=v["language"]) return # Index each segment of this document individually pRef = texts.parse_ref(ref) if len(pRef["sections"]) < len(pRef["sectionNames"]): text = texts.get_text(ref, context=0, commentary=False, version=version, lang=lang) if "error" in text: print text["error"] else: for i in range(max(len(text["text"]), len(text["he"]))): index_text("%s:%d" % (ref, i + 1)) # Don't try to index docs with depth 3 if len(pRef["sections"]) < len(pRef["sectionNames"]) - 1: return # Index this document as a whole doc = make_text_index_document(ref, version, lang) if doc: try: es.index(doc, 'sefaria', 'text', make_text_doc_id(ref, version, lang)) global doc_count doc_count += 1 except Exception, e: print "Error indexing %s / %s / %s" % (ref, version, lang) print e
def index_text(tref, version=None, lang=None): """ Index the text designated by ref. If no version and lang are given, this functon will be called for each availble version. Currently assumes ref is at section level. """ #tref = texts.norm_ref(unicode(tref)) #todo: why the unicode()? tref = model.Ref(tref).normal() # Recall this function for each specific text version, if non provided if not (version and lang): for v in texts.get_version_list(tref): index_text(tref, version=v["versionTitle"], lang=v["language"]) return # Index each segment of this document individually oref = model.Ref(tref).padded_ref() if len(oref.sections) < len(oref.index.sectionNames): text = texts.get_text(tref, context=0, commentary=False, version=version, lang=lang) if "error" in text: print text["error"] else: for i in range(max(len(text["text"]), len(text["he"]))): index_text("%s:%d" % (tref, i+1)) # Don't try to index docs with depth 3 if len(oref.sections) < len(oref.index.sectionNames) - 1: return # Index this document as a whole doc = make_text_index_document(tref, version, lang) if doc: try: global doc_count if doc_count % 5000 == 0: print "[%d] Indexing %s / %s / %s" % (doc_count, tref, version, lang) es.index('sefaria', 'text', doc, make_text_doc_id(tref, version, lang)) doc_count += 1 except Exception, e: print "ERROR indexing %s / %s / %s" % (tref, version, lang) pprint(e)
def index_text(ref, version=None, lang=None): """ Index the text designated by ref. If no version and lang are given, this functon will be called for each availble version. Currently assumes ref is at section level. """ ref = texts.norm_ref(unicode(ref)) # Recall this function for each specific text version, if non provided if not (version and lang): for v in texts.get_version_list(ref): index_text(ref, version=v["versionTitle"], lang=v["language"]) return # Index each segment of this document individually pRef = texts.parse_ref(ref) if len(pRef["sections"]) < len(pRef["sectionNames"]): text = texts.get_text(ref, context=0, commentary=False, version=version, lang=lang) if "error" in text: print text["error"] else: for i in range(max(len(text["text"]), len(text["he"]))): index_text("%s:%d" % (ref, i+1)) # Don't try to index docs with depth 3 if len(pRef["sections"]) < len(pRef["sectionNames"]) - 1: return # Index this document as a whole doc = make_text_index_document(ref, version, lang) if doc: try: es.index(doc, 'sefaria', 'text', make_text_doc_id(ref, version, lang)) global doc_count doc_count += 1 except Exception, e: print "Error indexing %s / %s / %s" % (ref, version, lang) print e
def flatten_toc(toc, include_categories=False, categories_in_titles=False, version_granularity=False): """ Returns an array of strings which corresponds to each category and text in the Table of Contents in order. - categorie_in_titles: whether to include each category preceding a text title, e.g., "Tanach > Torah > Genesis". - version_granularity: whether to include a seperate entry for every text version. """ results = [] for x in toc: name = x.get("category", None) or x.get("title", None) if "category" in x: if include_categories: results += [name] subcats = flatten_toc(x["contents"], categories_in_titles=categories_in_titles) if categories_in_titles: subcats = ["%s > %s" % (name, y) for y in subcats] results += subcats elif "title" in x: if not version_granularity: results += [name] else: versions = texts.get_version_list(name) for v in versions: lang = {"he": "Hebrew", "en": "English"}[v["language"]] results += [ "%s > %s > %s.json" % (name, lang, v["versionTitle"]) ] return results