def generate_refs_list(query={}): """ Generate a list of refs to all available sections. """ trefs = [] counts = db.counts.find(query) for c in counts: if "title" not in c: continue # this is a category count try: i = model.get_index(c["title"]) except Exception: db.counts.remove(c) continue # If there is not index record to match the count record, # the count should be removed. title = c["title"] he = list_from_counts(c["availableTexts"]["he"]) en = list_from_counts(c["availableTexts"]["en"]) sections = texts.union(he, en) for n in sections: if i.categories[0] == "Talmud": n = section_to_daf(int(n)) if getattr(i, "commentaryCategories", None) and i.commentaryCategories[0] == "Talmud": split = n.split(":") n = ":".join([section_to_daf(int(n[0]))] + split[1:]) tref = "%s %s" % (title, n) if n else title trefs.append(tref) return trefs
def flatten(text, sectionNames): text = text or "" if len(sectionNames) == 1: text = [t if t else "" for t in text] # Bandaid for mismatch between text structure, join recursively if text # elements are lists instead of strings. return "\n".join([t if isinstance(t, basestring) else "\n".join(t) for t in text]) flat = "" for i in range(len(text)): section = section_to_daf(i + 1) if sectionNames[0] == "Daf" else str(i + 1) flat += "\n\n%s %s\n\n%s" % (sectionNames[0], section, flatten(text[i], sectionNames[1:])) return flat
def resolve_quote(self, quote, talmud_ref, tolerance=70): assert isinstance(talmud_ref, Ref) assert isinstance(quote, basestring) book = talmud_ref.book daf = section_to_daf(talmud_ref.sections[0]) try: ref_list = self.map[book]['Start Daf'][daf] except KeyError: ref_list = self.map[book]['End Daf'][daf] return self.find_best_match(quote, ref_list, tolerance)
def resolve(self, index_name, sections=None, match_str=None): """ note: if index_name is None match_str must be not None :param index_name: index name or "None" if it is a sham title citation :param list sections: If any entry in this list is "None", then we treat that as a "שם" or "ibid", resolve it, and return a refrence :param unicode match_str: optional match string. if this is provided, sections are ignored. index_name is assumed to be :return: Ref """ #todo: assert if table is empty. #todo: raise an error if can't find this sham constilation in table title = None is_index_sham = index_name is None if index_name is None: index_name = self._last_cit[0] if index_name is not None: if match_str is not None: # if index_name in [u'I Samuel', u'II Samuel']: #to disambiguate books that have 2 volumes. # re.search('', match_str) node = library.get_schema_node(index_name) # assert JaggedArrayNode? title, sections = CitationFinder.get_sham_ref_with_node(match_str, node, lang='he') else: raise IbidKeyNotFoundException("couldn't find this key") if sections is not None: last_depth = self.get_last_depth(index_name, sections) if is_index_sham or len(sections) == 0 or not sections[0]: # tzmod to beginning sections = tuple([None] * (max(len(sections), last_depth) - len(sections)) + list(sections)) # choosing the depth of the ref to resolve elif len(sections) > 0 and not sections[-1]: sections = tuple(list(sections) + [None] * ( max(len(sections), last_depth) - len( sections))) """if False and not index_name and sections == [None, None]: # it says only Sham (all were None) try: resolvedRef = self._table[(None,(None, None))] # notice that self._last_cit doesn't chnge so no need to reasign it except KeyError: raise IbidKeyNotFoundException("Ibid table is empty. Can't retrieve book name") else:""" # recognize what kind of key we are looking for key = [] found_sham = False for i, sect in enumerate(sections): if found_sham: # we are after the place that we need info from key.append(None) else: key.append(sections[i]) if not sect: # change bool on the first sham you meet found_sham = True # if self._table.has_key(index_name, tuple(key)): if found_sham: try: from_table = self._table[(index_name, tuple(key))].sections except: raise IbidKeyNotFoundException("couldn't find this key") # print "error, couldn't find this key", index_name, tuple(key) # return "error, couldn't find this key", index_name, tuple(key) else: from_table = sections # that is it wasn't in _table new_sections = [] # merge them, while preferring the sections that were retrieved from the citation for i, sect in enumerate(sections): if sect: new_sections.append(sect) else: new_sections.append(from_table[i]) try: book_ref = Ref(index_name) if self.assert_simple: assert not book_ref.index.is_complex() if self.assert_simple: addressTypes = book_ref.index_node.addressTypes else: addressTypes = [None]*len(new_sections) section_str_list = [] for section, addressType in zip(new_sections, addressTypes): if addressType == u'Talmud': section_str_list += [talmud.section_to_daf(section)] else: section_str_list += [str(section)] if title and title != index_name: index_name = title resolvedRef = Ref(u'{}.{}'.format(index_name, '.'.join(section_str_list))) except: raise IbidRefException(u"problem with the Ref iteslf. {}.{}".format(index_name, '.'.join(str(new_sections)))) # print 'error, problem with the Ref iteslf. ', u'{}.{}'.format(index_name, '.'.join(str(new_sections))) # return "error, problem with the Ref iteslf", index_name, tuple(key) if resolvedRef.is_empty(): raise IbidRefException('problem with the Ref iteslf') # return "error, problem with the Ref iteslf", resolvedRef else: self.registerRef(resolvedRef) return resolvedRef
from sefaria.model import * from helper.link import add_links_from_text from sefaria.system.database import db from sefaria.utils.talmud import section_to_daf # Find links that were generated from a Berakhot text oids = db.texts.find({"title": "Berakhot"}).distinct("_id") tanakh_links = LinkSet({"source_text_oid": {"$in": oids}}) tanakh_links.delete() berakhot = Version().load({"title": "Berakhot", "versionTitle": "Wikisource Talmud Bavli"}) for i in range(len(berakhot.chapter)): ref = "Berakhot %s" % section_to_daf(i+1) print "Scanning %s" % ref add_links_from_text(ref, berakhot.language, berakhot.chapter[i], berakhot._id, 1)
def resolve(self, index_name, sections=None, match_str=None): """ note: if index_name is None match_str must be not None :param index_name: index name or "None" if it is a sham title citation :param list sections: If any entry in this list is "None", then we treat that as a "שם" or "ibid", resolve it, and return a refrence :param unicode match_str: optional match string. if this is provided, sections are ignored. index_name is assumed to be :return: Ref """ #todo: assert if table is empty. #todo: raise an error if can't find this sham constilation in table title = None is_index_sham = index_name is None if index_name is None: index_name = self._last_cit[0] if index_name is not None: if match_str is not None: # if index_name in [u'I Samuel', u'II Samuel']: #to disambiguate books that have 2 volumes. # re.search('', match_str) node = library.get_schema_node( index_name) # assert JaggedArrayNode? title, sections = CitationFinder.get_sham_ref_with_node( match_str, node, lang='he') else: raise IbidKeyNotFoundException("couldn't find this key") if sections is not None: last_depth = self.get_last_depth(index_name, sections) if is_index_sham or len( sections) == 0 or not sections[0]: # tzmod to beginning sections = tuple( [None] * (max(len(sections), last_depth) - len(sections)) + list(sections)) # choosing the depth of the ref to resolve elif len(sections) > 0 and not sections[-1]: sections = tuple( list(sections) + [None] * (max(len(sections), last_depth) - len(sections))) """if False and not index_name and sections == [None, None]: # it says only Sham (all were None) try: resolvedRef = self._table[(None,(None, None))] # notice that self._last_cit doesn't chnge so no need to reasign it except KeyError: raise IbidKeyNotFoundException("Ibid table is empty. Can't retrieve book name") else:""" # recognize what kind of key we are looking for key = [] found_sham = False for i, sect in enumerate(sections): if found_sham: # we are after the place that we need info from key.append(None) else: key.append(sections[i]) if not sect: # change bool on the first sham you meet found_sham = True # if self._table.has_key(index_name, tuple(key)): if found_sham: try: from_table = self._table[(index_name, tuple(key))].sections except: raise IbidKeyNotFoundException("couldn't find this key") # print "error, couldn't find this key", index_name, tuple(key) # return "error, couldn't find this key", index_name, tuple(key) else: from_table = sections # that is it wasn't in _table new_sections = [] # merge them, while preferring the sections that were retrieved from the citation for i, sect in enumerate(sections): if sect: new_sections.append(sect) else: new_sections.append(from_table[i]) try: book_ref = Ref(index_name) if self.assert_simple: assert not book_ref.index.is_complex() if self.assert_simple: addressTypes = book_ref.index_node.addressTypes else: addressTypes = [None] * len(new_sections) section_str_list = [] for section, addressType in zip(new_sections, addressTypes): if addressType == u'Talmud': section_str_list += [talmud.section_to_daf(section)] else: section_str_list += [str(section)] if title and title != index_name: index_name = title resolvedRef = Ref(u'{}.{}'.format(index_name, '.'.join(section_str_list))) except: raise IbidRefException( u"problem with the Ref iteslf. {}.{}".format( index_name, '.'.join(str(new_sections)))) # print 'error, problem with the Ref iteslf. ', u'{}.{}'.format(index_name, '.'.join(str(new_sections))) # return "error, problem with the Ref iteslf", index_name, tuple(key) if resolvedRef.is_empty(): raise IbidRefException('problem with the Ref iteslf') # return "error, problem with the Ref iteslf", resolvedRef else: self.registerRef(resolvedRef) return resolvedRef
index = txt.library.get_index(text["title"]) except Exception as e: print "Error loading: {} index : {}".format(text["title"] , e) continue if not index or not getattr(index, "categories", None): print "No index found for " + text.title continue if "Tanach" in index.categories and "Commentary" not in index.categories: continue talmud = True if "Talmud" in index.categories else False for i in range(len(text['chapter'])): if talmud: if "Bavli" in index.categories and i < 2: continue chap = section_to_daf(i + 1) else: chap = i + 1 ref = text['title'] + " " + str(chap) print ref try: result = add_links_from_text(txt.Ref(ref), text['language'], text['chapter'][i], text['_id'], user) if result: text_total[text["title"]] += len(result) except Exception, e: print e total = 0 for text in text_order: num = text_total[text] try:
def get_text(tref, context=1, commentary=True, version=None, lang=None, pad=True): """ Take a string reference to a segment of text and return a dictionary including the text and other info. * 'context': how many levels of depth above the request ref should be returned. e.g., with context=1, ask for a verse and receive its surrounding chapter as well. context=0 gives just what is asked for. * 'commentary': whether or not to search for and return connected texts as well. * 'version' + 'lang': use to specify a particular version of a text to return. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if oref.is_spanning(): # If ref spans sections, call get_text for each section return get_spanning_text(oref) if len(oref.sections): skip = oref.sections[0] - 1 limit = 1 chapter_slice = { "_id": 0 } if len(oref.index.sectionNames) == 1 else { "_id": 0, "chapter": { "$slice": [skip, limit] } } else: chapter_slice = {"_id": 0} textCur = heCur = None # pull a specific version of text if version and lang == "en": textCur = db.texts.find( { "title": oref.book, "language": lang, "versionTitle": version }, chapter_slice) elif version and lang == "he": heCur = db.texts.find( { "title": oref.book, "language": lang, "versionTitle": version }, chapter_slice) # If no criteria set above, pull all versions, # Prioritize first according to "priority" field (if present), then by oldest text first # Order here will determine which versions are used in case of a merge textCur = textCur or db.texts.find({ "title": oref.book, "language": "en" }, chapter_slice).sort([["priority", -1], ["_id", 1]]) heCur = heCur or db.texts.find({ "title": oref.book, "language": "he" }, chapter_slice).sort([["priority", -1], ["_id", 1]]) # Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end # Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context) r = text_from_cur(oref.old_dict_format(), textCur, context) # Add fields pertaining the the Hebrew text under different field names r["he"] = heRef.get("text", []) r["heVersionTitle"] = heRef.get("versionTitle", "") r["heVersionSource"] = heRef.get("versionSource", "") r["heVersionStatus"] = heRef.get("versionStatus", "") r["heLicense"] = heRef.get("license", "unknown") if heRef.get("versionNotes", ""): r["heVersionNotes"] = heRef.get("versionNotes", "") if heRef.get("digitizedBySefaria", False): r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False) if "sources" in heRef: r["heSources"] = heRef.get("sources") # find commentary on this text if requested if commentary: from sefaria.client.wrapper import get_links searchRef = model.Ref(tref).padded_ref().context_ref(context).normal() links = get_links(searchRef) r["commentary"] = links if "error" not in links else [] # get list of available versions of this text # but only if you care enough to get commentary also (hack) r["versions"] = get_version_list(tref) # use shorthand if present, masking higher level sections if "shorthand" in r: r["book"] = r["shorthand"] d = r["shorthandDepth"] for key in ("sections", "toSections", "sectionNames"): r[key] = r[key][d:] # replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud if r["type"] == "Talmud" or r["type"] == "Commentary" and r[ "commentaryCategories"][0] == "Talmud": daf = r["sections"][0] r["sections"] = [section_to_daf(daf)] + r["sections"][1:] r["title"] = r["book"] + " " + r["sections"][0] if "heTitle" in r: r["heBook"] = r["heTitle"] r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he") if r["type"] == "Commentary" and len(r["sections"]) > 1: r["title"] = "%s Line %d" % (r["title"], r["sections"][1]) if "toSections" in r: r["toSections"] = [r["sections"][0]] + r["toSections"][1:] elif r["type"] == "Commentary": d = len(r["sections"]) if len(r["sections"]) < 2 else 2 r["title"] = r["book"] + " " + ":".join( ["%s" % s for s in r["sections"][:d]]) return r
text_total[text["title"]] = 0 text_order.append(text["title"]) print text["title"] index = txt.get_index(text["title"]) if not index or not index.get("categories"): print "No index found for " + text["title"] continue if "Tanach" in index.categories: continue talmud = True if "Talmud" in index.categories else False for i in range(len(text['chapter'])): if talmud: if "Bavli" in index.categories and i < 2: continue chap = section_to_daf(i + 1) else: chap = i + 1 ref = text['title'] + " " + str(chap) print ref try: result = add_links_from_text(ref, text['language'], text['chapter'][i], text['_id'], user) if result: text_total[text["title"]] += len(result) except Exception, e: print e total = 0 for text in text_order: num = text_total[text]
def get_text(tref, context=1, commentary=True, version=None, lang=None, pad=True): """ Take a string reference to a segment of text and return a dictionary including the text and other info. * 'context': how many levels of depth above the request ref should be returned. e.g., with context=1, ask for a verse and receive its surrounding chapter as well. context=0 gives just what is asked for. * 'commentary': whether or not to search for and return connected texts as well. * 'version' + 'lang': use to specify a particular version of a text to return. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if oref.is_spanning(): # If ref spans sections, call get_text for each section return get_spanning_text(oref) if len(oref.sections): skip = oref.sections[0] - 1 limit = 1 chapter_slice = {"_id": 0} if len(oref.index.sectionNames) == 1 else {"_id": 0, "chapter": {"$slice": [skip, limit]}} else: chapter_slice = {"_id": 0} textCur = heCur = None # pull a specific version of text if version and lang == "en": textCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice) elif version and lang == "he": heCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice) # If no criteria set above, pull all versions, # Prioritize first according to "priority" field (if present), then by oldest text first # Order here will determine which versions are used in case of a merge textCur = textCur or db.texts.find({"title": oref.book, "language": "en"}, chapter_slice).sort([["priority", -1], ["_id", 1]]) heCur = heCur or db.texts.find({"title": oref.book, "language": "he"}, chapter_slice).sort([["priority", -1], ["_id", 1]]) # Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end # Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context) r = text_from_cur(oref.old_dict_format(), textCur, context) # Add fields pertaining the the Hebrew text under different field names r["he"] = heRef.get("text", []) r["heVersionTitle"] = heRef.get("versionTitle", "") r["heVersionSource"] = heRef.get("versionSource", "") r["heVersionStatus"] = heRef.get("versionStatus", "") r["heLicense"] = heRef.get("license", "unknown") if heRef.get("versionNotes", ""): r["heVersionNotes"] = heRef.get("versionNotes", "") if heRef.get("digitizedBySefaria", False): r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False) if "sources" in heRef: r["heSources"] = heRef.get("sources") # find commentary on this text if requested if commentary: from sefaria.client.wrapper import get_links searchRef = model.Ref(tref).padded_ref().context_ref(context).normal() links = get_links(searchRef) r["commentary"] = links if "error" not in links else [] # get list of available versions of this text # but only if you care enough to get commentary also (hack) r["versions"] = get_version_list(tref) # use shorthand if present, masking higher level sections if "shorthand" in r: r["book"] = r["shorthand"] d = r["shorthandDepth"] for key in ("sections", "toSections", "sectionNames"): r[key] = r[key][d:] # replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud if r["type"] == "Talmud" or r["type"] == "Commentary" and r["commentaryCategories"][0] == "Talmud": daf = r["sections"][0] r["sections"] = [section_to_daf(daf)] + r["sections"][1:] r["title"] = r["book"] + " " + r["sections"][0] if "heTitle" in r: r["heBook"] = r["heTitle"] r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he") if r["type"] == "Commentary" and len(r["sections"]) > 1: r["title"] = "%s Line %d" % (r["title"], r["sections"][1]) if "toSections" in r: r["toSections"] = [r["sections"][0]] + r["toSections"][1:] elif r["type"] == "Commentary": d = len(r["sections"]) if len(r["sections"]) < 2 else 2 r["title"] = r["book"] + " " + ":".join(["%s" % s for s in r["sections"][:d]]) return r