def get_items_linked_to_ref(tref): oref = Ref(tref) section_ref = oref.section_ref() commentary_links = [] commentary_author_set = set() # set is used b/c sometimes there are duplicate links direct_links = {(x["ref"], x["category"] in ("Commentary", "Modern Commentary")) for x in get_links(section_ref.normal(), with_text=False) if oref in Ref(x["anchorRef"]).range_list()} for link_tref, is_comment in direct_links: # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections if is_comment and not link_tref.startswith("Steinsaltz on "): link_oref = Ref(link_tref) author = getattr(link_oref.index, "collective_title", None) temp_commentary_links, _, _ = normalize_related_refs([x["ref"] for x in get_links(link_tref, with_text=False)], None, COMMENTARY_LINK_SCORE) for commentary_link in temp_commentary_links: if author is not None and (commentary_link, author) in commentary_author_set: # don't add same ref twice from same author continue commentary_author_set.add((commentary_link, author)) commentary_links += [(commentary_link, COMMENTARY_LINK_SCORE, link_tref)] is_comment_list = [x[1] for x in direct_links] direct_links, _, is_comment_list = normalize_related_refs([x[0] for x in direct_links], None, DIRECT_LINK_SCORE, other_data=is_comment_list) final_refs = [(x, DIRECT_LINK_SCORE, "direct") for x in direct_links] + commentary_links commentary_ref_set = set(map(lambda x: x[0], filter(lambda x: x[1], zip(direct_links, is_comment_list)))) return final_refs, commentary_ref_set
def test_get_links_on_range(self): r3 = [l["ref"] + l["type"] for l in get_links("Exodus 2:3")] r4 = [l["ref"] + l["type"] for l in get_links("Exodus 2:4")] r34 = [l["ref"] + l["type"] for l in get_links("Exodus 2:3-4")] # All links in first segment present in range assert all([r in r34 for r in r3]) # All links in second segment present in range assert all([r in r34 for r in r4]) # No links in range absent from segments assert all(r in r3 or r in r4 for r in r34)
def get_recs_thru_links(oref): ''' Given a ref, returns items connected to central ref through links - direct links and links through commentaries. :param oref: :return: Twos things: list of `Recommendation`s [tref, tref] - all of the refs in the above set that are direct commentaries of original tref ''' section_ref = oref.section_ref() commentary_links = [] commentary_author_set = set() # set is used b/c sometimes there are duplicate links direct_links = { (x["ref"], x["category"] in ("Commentary", "Modern Commentary")) for x in get_links(section_ref.normal(), with_text=False) if oref in Ref(x["anchorRef"]).range_list() } for link_tref, is_comment in direct_links: # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections if is_comment and not link_tref.startswith("Steinsaltz on "): link_oref = Ref(link_tref) author = getattr(link_oref.index, "collective_title", None) temp_commentary_links, _, _ = RecommendationEngine.normalize_related_refs( [x["ref"] for x in get_links(link_tref, with_text=False)], None, COMMENTARY_LINK_SCORE) for commentary_link in temp_commentary_links: if author is not None and ( commentary_link, author) in commentary_author_set: # don't add same ref twice from same author continue commentary_author_set.add((commentary_link, author)) commentary_links += [ Recommendation(Ref(commentary_link), relevance=COMMENTARY_LINK_SCORE, sources=[link_tref]) ] is_comment_list = [x[1] for x in direct_links] direct_links, _, is_comment_list = RecommendationEngine.normalize_related_refs( [x[0] for x in direct_links], None, DIRECT_LINK_SCORE, other_data=is_comment_list) direct_ref_set = set(direct_links) final_rex = [ Recommendation( Ref(x), relevance=DIRECT_LINK_SCORE, sources=["direct"]) for x in direct_links ] + commentary_links commentary_ref_set = set( map(lambda x: x[0], filter(lambda x: x[1], zip(direct_links, is_comment_list)))) return final_rex, commentary_ref_set, direct_ref_set
def get_items_linked_to_ref(tref): oref = Ref(tref) section_ref = oref.section_ref() commentary_links = [] commentary_author_set = set() # set is used b/c sometimes there are duplicate links direct_links = {(x["ref"], x["category"] in ("Commentary", "Modern Commentary")) for x in get_links(section_ref.normal(), with_text=False) if oref in Ref(x["anchorRef"]).range_list()} for link_tref, is_comment in direct_links: # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections if is_comment and not link_tref.startswith("Steinsaltz on "): link_oref = Ref(link_tref) author = getattr(link_oref.index, "collective_title", None) temp_commentary_links, _, _ = normalize_related_refs( [x["ref"] for x in get_links(link_tref, with_text=False)], None, COMMENTARY_LINK_SCORE) for commentary_link in temp_commentary_links: if author is not None and (commentary_link, author) in commentary_author_set: # don't add same ref twice from same author continue commentary_author_set.add((commentary_link, author)) commentary_links += [(commentary_link, COMMENTARY_LINK_SCORE, link_tref)] is_comment_list = [x[1] for x in direct_links] direct_links, _, is_comment_list = normalize_related_refs( [x[0] for x in direct_links], None, DIRECT_LINK_SCORE, other_data=is_comment_list) final_refs = [(x, DIRECT_LINK_SCORE, "direct") for x in direct_links] + commentary_links commentary_ref_set = set( map(lambda x: x[0], filter(lambda x: x[1], zip(direct_links, is_comment_list)))) return final_refs, commentary_ref_set
def get_recs_thru_links(oref): ''' Given a ref, returns items connected to central ref through links - direct links and links through commentaries. :param oref: :return: Twos things: list of `Recommendation`s [tref, tref] - all of the refs in the above set that are direct commentaries of original tref ''' direct_links = set() section_ref_list = [r.section_ref() for r in oref.split_spanning_ref()] range_set = {r.normal() for r in oref.all_segment_refs()} for section_ref in section_ref_list: section_ref = oref.section_ref() commentary_links = [] commentary_author_set = set() # set is used b/c sometimes there are duplicate links temp_direct_links = set() initial_links = get_links(section_ref.normal(), with_text=False) filtered_links = [l for l in initial_links if len(range_set & {r.normal() for r in Ref(l['anchorRef']).range_list()}) > 0] direct_links |= {(l['ref'], l['category'] in ('Commentary', 'Modern Commentary'), Ref(l['anchorRef'])) for l in filtered_links} for link_tref, is_comment, anchor_ref in direct_links: # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections if is_comment and not link_tref.startswith("Steinsaltz on "): link_oref = Ref(link_tref) author = getattr(link_oref.index, "collective_title", None) temp_commentary_links, _, _, _ = RecommendationEngine.normalize_related_refs([x["ref"] for x in get_links(link_tref, with_text=False)], None, COMMENTARY_LINK_SCORE) for commentary_link in temp_commentary_links: if author is not None and (commentary_link, author) in commentary_author_set: # don't add same ref twice from same author continue commentary_author_set.add((commentary_link, author)) commentary_links += [Recommendation(Ref(commentary_link), relevance=COMMENTARY_LINK_SCORE, sources=[RecommendationSource(link_tref, anchor_ref)])] other_data = [(x[1], x[2]) for x in direct_links] direct_links, _, other_data, focus_ref_subref = RecommendationEngine.normalize_related_refs([x[0] for x in direct_links], None, DIRECT_LINK_SCORE, other_data=other_data) direct_ref_set = set(direct_links) is_comment_list, anchor_ref_list = list(zip(*other_data)) final_rex = [Recommendation(Ref(x), relevance=DIRECT_LINK_SCORE, sources=[RecommendationSource('direct', anchor_ref)]) for x, anchor_ref in zip(direct_links, anchor_ref_list)] + commentary_links commentary_ref_set = set([x[0] for x in [x for x in zip(direct_links, is_comment_list) if x[1]]]) return final_rex, commentary_ref_set, direct_ref_set
def section_data(self, oref: model.Ref, default_versions: dict) -> dict: """ :param oref: section level Ref instance :param default_versions: {'en': Version, 'he': Version} :param prev_next: tuple, with the oref before oref and after oref (or None if this is the first/last ref) Returns a dictionary with all the data we care about for section level `oref`. """ prev, next_ref = oref.prev_section_ref(vstate=self.version_state),\ oref.next_section_ref(vstate=self.version_state) data = { "ref": oref.normal(), "heRef": oref.he_normal(), "indexTitle": oref.index.title, "heTitle": oref.index.get_title('he'), "sectionRef": oref.normal(), "next": next_ref.normal() if next_ref else None, "prev": prev.normal() if prev else None, "content": [], } def get_version_title(chunk): if not chunk.is_merged: version = chunk.version() if version and version.language in default_versions and version.versionTitle != default_versions[ version.language].versionTitle: #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang) try: vnotes = version.versionNotes except AttributeError: vnotes = None try: vlicense = version.license except AttributeError: vlicense = None try: vsource = version.versionSource except AttributeError: vsource = None try: vnotesInHebrew = version.versionNotesInHebrew except AttributeError: vnotesInHebrew = None try: versionTitleInHebrew = version.versionTitleInHebrew except AttributeError: versionTitleInHebrew = None return version.versionTitle, vnotes, vlicense, vsource, versionTitleInHebrew, vnotesInHebrew else: return None, None, None, None, None, None # default version else: #merged #print "MERGED SECTION {} ({})".format(oref, chunk.lang) all_versions = set(chunk.sources) merged_version = 'Merged from {}'.format( ', '.join(all_versions)) return merged_version, None, None, None, None, None node_title = oref.index_node.full_title() en_chunk, he_chunk = self._text_map[node_title][ 'en_chunk'], self._text_map[node_title]['en_chunk'] en_vtitle, en_vnotes, en_vlicense, en_vsource, en_vtitle_he, en_vnotes_he = get_version_title( en_chunk) he_vtitle, he_vnotes, he_vlicense, he_vsource, he_vtitle_he, he_vnotes_he = get_version_title( he_chunk) if en_vtitle: data['versionTitle'] = en_vtitle if he_vtitle: data['heVersionTitle'] = he_vtitle if en_vnotes: data['versionNotes'] = en_vnotes if he_vnotes: data['heVersionNotes'] = he_vnotes if en_vlicense: data['license'] = en_vlicense if he_vlicense: data['heLicense'] = he_vlicense if en_vsource: data['versionSource'] = en_vsource if he_vsource: data['heVersionSource'] = he_vsource if en_vtitle_he: data['versionTitleInHebrew'] = en_vtitle_he if he_vtitle_he: data['heVersionTitleInHebrew'] = he_vtitle_he if en_vnotes_he: data['versionNotesInHebrew'] = en_vnotes_he if he_vnotes_he: data['heVersionNotesInHebrew'] = he_vnotes_he try: en_text = self._text_map[node_title]['en_ja'].get_element( [j - 1 for j in oref.sections]) except IndexError: en_text = [] try: he_text = self._text_map[node_title]['he_ja'].get_element( [j - 1 for j in oref.sections]) except IndexError: he_text = [] en_len = len(en_text) he_len = len(he_text) section_links = get_links(oref.normal(), False) anchor_ref_dict = defaultdict(list) for link in section_links: anchor_oref = model.Ref(link["anchorRef"]) if not anchor_oref.is_segment_level() or len( anchor_oref.sections) == 0: continue # don't bother with section level links start_seg_num = anchor_oref.sections[-1] # make sure sections are the same in range # TODO doesn't deal with links that span sections end_seg_num = anchor_oref.toSections[-1] if anchor_oref.sections[ 0] == anchor_oref.toSections[0] else max(en_len, he_len) for x in range(start_seg_num, end_seg_num + 1): anchor_ref_dict[x] += [simple_link(link)] for x in range(0, max(en_len, he_len)): curContent = {} curContent["segmentNumber"] = str(x + 1) links = anchor_ref_dict[x + 1] if len(links) > 0: curContent["links"] = links if x < en_len: curContent["text"] = en_text[x] if x < he_len: curContent["he"] = he_text[x] data["content"] += [curContent] return data
def get_sorted_links(self): return sorted(get_links(self.index_obj.title, False, False), key=lambda x: self.sort_key(x['anchorRef']))
def export_text(title): """ Takes a single document from the `texts` collection exports it, by chopping it up Add helpful data like """ print title try: for oref in model.Ref(title).all_subrefs(): text = model.TextFamily(oref, version=None, lang=None, commentary=0, context=0, pad=0, alts=False).contents() text["next"] = oref.next_section_ref().normal() if oref.next_section_ref() else None text["prev"] = oref.prev_section_ref().normal() if oref.prev_section_ref() else None text["content"] = [] if str(oref) == "Sha'ar Ha'Gemul of the Ramban 1": print "Sha'ar Ha'Gemul of the Ramban 1 is the worst" else: for x in range (0,max([len(text["text"]),len(text["he"])])): curContent = {} curContent["segmentNumber"] = str(x+1) links = get_links(text["ref"]+":"+curContent["segmentNumber"], False) for link in links: del link['commentator'] del link['heCommentator'] del link['type'] del link['anchorText'] del link['commentaryNum'] if 'heTitle' in link: del link['heTitle'] del link['_id'] del link['anchorRef'] del link['ref'] del link['anchorVerse'] curContent["links"] = links if x < len(text["text"]): curContent["text"]=text["text"][x] else: curContent["text"]="" if x < len(text["he"]): curContent["he"]=text["he"][x] else: curContent["he"]="" text["content"].append(curContent) text.pop("maps", None) text.pop("versionSource", None) text.pop("heDigitizedBySefaria", None) text.pop("heVersionTitle", None) text.pop("heVersionNotes", None) text.pop("heVersionStatus", None) text.pop("isSpanning", None) text.pop("heVersionSource", None) text.pop("versionNotes", None) text.pop("versionTitle", None) text.pop("heLicense", None) text.pop("digitizedBySefaria", None) text.pop("versions", None) text.pop("license", None) text.pop("versionStatus", None) text.pop("heSources", None) text.pop("sources", None) text.pop("he",None) text.pop("text",None) path = make_path(text, "json") write_doc(text, path) except Exception, e: logging.warning(e) pass
def test_get_links_on_range(self): x = len(get_links("Exodus 2:3")) y = len(get_links("Exodus 2:4")) assert len(get_links("Exodus 2:3-4")) == (x+y)
def section_data(oref, defaultVersions): """ :param defaultVersions dict: {'en': Version, 'he': Version} Returns a dictionary with all the data we care about for section level `oref`. """ tf = model.TextFamily(oref, version=None, lang=None, commentary=0, context=0, pad=0, alts=False) text = tf.contents() data = { "ref": text["ref"], "heRef": text["heRef"], "indexTitle": text["indexTitle"], "heTitle": text["heTitle"], "sectionRef": text["sectionRef"], "next": oref.next_section_ref().normal() if oref.next_section_ref() else None, "prev": oref.prev_section_ref().normal() if oref.prev_section_ref() else None, "content": [], } def get_version_title(chunk): if not chunk.is_merged: version = chunk.version() if version and version.language in defaultVersions and version.versionTitle != defaultVersions[ version.language].versionTitle: #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang) try: vnotes = version.versionNotes except AttributeError: vnotes = None try: vlicense = version.license except AttributeError: vlicense = None try: vsource = version.versionSource except AttributeError: vsource = None return version.versionTitle, vnotes, vlicense, vsource else: return None, None, None, None # default version else: #merged #print "MERGED SECTION {} ({})".format(oref, chunk.lang) all_versions = set(chunk.sources) merged_version = u'Merged from {}'.format(u', '.join(all_versions)) return merged_version, None, None, None en_vtitle, en_vnotes, en_vlicense, en_vsource = get_version_title( tf._chunks['en']) he_vtitle, he_vnotes, he_vlicense, he_vsource = get_version_title( tf._chunks['he']) if en_vtitle: data['versionTitle'] = en_vtitle if he_vtitle: data['heVersionTitle'] = he_vtitle if en_vnotes: data['versionNotes'] = en_vnotes if he_vnotes: data['heVersionNotes'] = he_vnotes if en_vlicense: data['license'] = en_vlicense if he_vlicense: data['heLicense'] = he_vlicense if en_vsource: data['versionSource'] = en_vsource if he_vsource: data['heVersionSource'] = he_vsource en_len = len(text["text"]) he_len = len(text["he"]) for x in xrange(0, max([en_len, he_len])): curContent = {} curContent["segmentNumber"] = str(x + 1) links = get_links(text["ref"] + ":" + curContent["segmentNumber"], False) print links if len(links) > 0: curContent["links"] = [simple_link(link) for link in links] if x < en_len: curContent["text"] = text["text"][x] if x < he_len: curContent["he"] = text["he"][x] data["content"] += [curContent] return data
def get_text(tref, context=1, commentary=True, version=None, lang=None, pad=True): """ Take a string reference to a segment of text and return a dictionary including the text and other info. * 'context': how many levels of depth above the request ref should be returned. e.g., with context=1, ask for a verse and receive its surrounding chapter as well. context=0 gives just what is asked for. * 'commentary': whether or not to search for and return connected texts as well. * 'version' + 'lang': use to specify a particular version of a text to return. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if oref.is_spanning(): # If ref spans sections, call get_text for each section return get_spanning_text(oref) if len(oref.sections): skip = oref.sections[0] - 1 limit = 1 chapter_slice = { "_id": 0 } if len(oref.index.sectionNames) == 1 else { "_id": 0, "chapter": { "$slice": [skip, limit] } } else: chapter_slice = {"_id": 0} textCur = heCur = None # pull a specific version of text if version and lang == "en": textCur = db.texts.find( { "title": oref.book, "language": lang, "versionTitle": version }, chapter_slice) elif version and lang == "he": heCur = db.texts.find( { "title": oref.book, "language": lang, "versionTitle": version }, chapter_slice) # If no criteria set above, pull all versions, # Prioritize first according to "priority" field (if present), then by oldest text first # Order here will determine which versions are used in case of a merge textCur = textCur or db.texts.find({ "title": oref.book, "language": "en" }, chapter_slice).sort([["priority", -1], ["_id", 1]]) heCur = heCur or db.texts.find({ "title": oref.book, "language": "he" }, chapter_slice).sort([["priority", -1], ["_id", 1]]) # Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end # Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context) r = text_from_cur(oref.old_dict_format(), textCur, context) # Add fields pertaining the the Hebrew text under different field names r["he"] = heRef.get("text", []) r["heVersionTitle"] = heRef.get("versionTitle", "") r["heVersionSource"] = heRef.get("versionSource", "") r["heVersionStatus"] = heRef.get("versionStatus", "") r["heLicense"] = heRef.get("license", "unknown") if heRef.get("versionNotes", ""): r["heVersionNotes"] = heRef.get("versionNotes", "") if heRef.get("digitizedBySefaria", False): r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False) if "sources" in heRef: r["heSources"] = heRef.get("sources") # find commentary on this text if requested if commentary: from sefaria.client.wrapper import get_links searchRef = model.Ref(tref).padded_ref().context_ref(context).normal() links = get_links(searchRef) r["commentary"] = links if "error" not in links else [] # get list of available versions of this text # but only if you care enough to get commentary also (hack) r["versions"] = get_version_list(tref) # use shorthand if present, masking higher level sections if "shorthand" in r: r["book"] = r["shorthand"] d = r["shorthandDepth"] for key in ("sections", "toSections", "sectionNames"): r[key] = r[key][d:] # replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud if r["type"] == "Talmud" or r["type"] == "Commentary" and r[ "commentaryCategories"][0] == "Talmud": daf = r["sections"][0] r["sections"] = [section_to_daf(daf)] + r["sections"][1:] r["title"] = r["book"] + " " + r["sections"][0] if "heTitle" in r: r["heBook"] = r["heTitle"] r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he") if r["type"] == "Commentary" and len(r["sections"]) > 1: r["title"] = "%s Line %d" % (r["title"], r["sections"][1]) if "toSections" in r: r["toSections"] = [r["sections"][0]] + r["toSections"][1:] elif r["type"] == "Commentary": d = len(r["sections"]) if len(r["sections"]) < 2 else 2 r["title"] = r["book"] + " " + ":".join( ["%s" % s for s in r["sections"][:d]]) return r
def get_text(tref, context=1, commentary=True, version=None, lang=None, pad=True): """ Take a string reference to a segment of text and return a dictionary including the text and other info. * 'context': how many levels of depth above the request ref should be returned. e.g., with context=1, ask for a verse and receive its surrounding chapter as well. context=0 gives just what is asked for. * 'commentary': whether or not to search for and return connected texts as well. * 'version' + 'lang': use to specify a particular version of a text to return. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if oref.is_spanning(): # If ref spans sections, call get_text for each section return get_spanning_text(oref) if len(oref.sections): skip = oref.sections[0] - 1 limit = 1 chapter_slice = {"_id": 0} if len(oref.index.sectionNames) == 1 else {"_id": 0, "chapter": {"$slice": [skip, limit]}} else: chapter_slice = {"_id": 0} textCur = heCur = None # pull a specific version of text if version and lang == "en": textCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice) elif version and lang == "he": heCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice) # If no criteria set above, pull all versions, # Prioritize first according to "priority" field (if present), then by oldest text first # Order here will determine which versions are used in case of a merge textCur = textCur or db.texts.find({"title": oref.book, "language": "en"}, chapter_slice).sort([["priority", -1], ["_id", 1]]) heCur = heCur or db.texts.find({"title": oref.book, "language": "he"}, chapter_slice).sort([["priority", -1], ["_id", 1]]) # Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end # Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context) r = text_from_cur(oref.old_dict_format(), textCur, context) # Add fields pertaining the the Hebrew text under different field names r["he"] = heRef.get("text", []) r["heVersionTitle"] = heRef.get("versionTitle", "") r["heVersionSource"] = heRef.get("versionSource", "") r["heVersionStatus"] = heRef.get("versionStatus", "") r["heLicense"] = heRef.get("license", "unknown") if heRef.get("versionNotes", ""): r["heVersionNotes"] = heRef.get("versionNotes", "") if heRef.get("digitizedBySefaria", False): r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False) if "sources" in heRef: r["heSources"] = heRef.get("sources") # find commentary on this text if requested if commentary: from sefaria.client.wrapper import get_links searchRef = model.Ref(tref).padded_ref().context_ref(context).normal() links = get_links(searchRef) r["commentary"] = links if "error" not in links else [] # get list of available versions of this text # but only if you care enough to get commentary also (hack) r["versions"] = get_version_list(tref) # use shorthand if present, masking higher level sections if "shorthand" in r: r["book"] = r["shorthand"] d = r["shorthandDepth"] for key in ("sections", "toSections", "sectionNames"): r[key] = r[key][d:] # replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud if r["type"] == "Talmud" or r["type"] == "Commentary" and r["commentaryCategories"][0] == "Talmud": daf = r["sections"][0] r["sections"] = [section_to_daf(daf)] + r["sections"][1:] r["title"] = r["book"] + " " + r["sections"][0] if "heTitle" in r: r["heBook"] = r["heTitle"] r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he") if r["type"] == "Commentary" and len(r["sections"]) > 1: r["title"] = "%s Line %d" % (r["title"], r["sections"][1]) if "toSections" in r: r["toSections"] = [r["sections"][0]] + r["toSections"][1:] elif r["type"] == "Commentary": d = len(r["sections"]) if len(r["sections"]) < 2 else 2 r["title"] = r["book"] + " " + ":".join(["%s" % s for s in r["sections"][:d]]) return r
def test_get_links_on_range(self): x = len(get_links("Exodus 2:3")) y = len(get_links("Exodus 2:4")) assert len(get_links("Exodus 2:3-4")) == (x + y)
def section_data(oref, defaultVersions): """ :param defaultVersions dict: {'en': Version, 'he': Version} Returns a dictionary with all the data we care about for section level `oref`. """ tf = model.TextFamily(oref, version=None, lang=None, commentary=0, context=0, pad=0, alts=False, stripItags=True) text = tf.contents() data = { "ref": text["ref"], "heRef": text["heRef"], "indexTitle": text["indexTitle"], "heTitle": text["heTitle"], "sectionRef": text["sectionRef"], "next": oref.next_section_ref().normal() if oref.next_section_ref() else None, "prev": oref.prev_section_ref().normal() if oref.prev_section_ref() else None, "content": [], } def get_version_title(chunk): if not chunk.is_merged: version = chunk.version() if version and version.language in defaultVersions and version.versionTitle != defaultVersions[ version.language].versionTitle: #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang) try: vnotes = version.versionNotes except AttributeError: vnotes = None try: vlicense = version.license except AttributeError: vlicense = None try: vsource = version.versionSource except AttributeError: vsource = None try: vnotesInHebrew = version.versionNotesInHebrew except AttributeError: vnotesInHebrew = None try: versionTitleInHebrew = version.versionTitleInHebrew except AttributeError: versionTitleInHebrew = None return version.versionTitle, vnotes, vlicense, vsource, versionTitleInHebrew, vnotesInHebrew else: return None, None, None, None, None, None # default version else: #merged #print "MERGED SECTION {} ({})".format(oref, chunk.lang) all_versions = set(chunk.sources) merged_version = 'Merged from {}'.format(', '.join(all_versions)) return merged_version, None, None, None, None, None en_vtitle, en_vnotes, en_vlicense, en_vsource, en_vtitle_he, en_vnotes_he = get_version_title( tf._chunks['en']) he_vtitle, he_vnotes, he_vlicense, he_vsource, he_vtitle_he, he_vnotes_he = get_version_title( tf._chunks['he']) if en_vtitle: data['versionTitle'] = en_vtitle if he_vtitle: data['heVersionTitle'] = he_vtitle if en_vnotes: data['versionNotes'] = en_vnotes if he_vnotes: data['heVersionNotes'] = he_vnotes if en_vlicense: data['license'] = en_vlicense if he_vlicense: data['heLicense'] = he_vlicense if en_vsource: data['versionSource'] = en_vsource if he_vsource: data['heVersionSource'] = he_vsource if en_vtitle_he: data['versionTitleInHebrew'] = en_vtitle_he if he_vtitle_he: data['heVersionTitleInHebrew'] = he_vtitle_he if en_vnotes_he: data['versionNotesInHebrew'] = en_vnotes_he if he_vnotes_he: data['heVersionNotesInHebrew'] = he_vnotes_he en_len = len(text["text"]) he_len = len(text["he"]) section_links = get_links(text["ref"], False) anchor_ref_dict = defaultdict(list) for link in section_links: anchor_oref = model.Ref(link["anchorRef"]) if not anchor_oref.is_segment_level() or len( anchor_oref.sections) == 0: continue # don't bother with section level links start_seg_num = anchor_oref.sections[-1] # make sure sections are the same in range # TODO doesn't deal with links that span sections end_seg_num = anchor_oref.toSections[-1] if anchor_oref.sections[ 0] == anchor_oref.toSections[0] else max(en_len, he_len) for x in range(start_seg_num, end_seg_num + 1): anchor_ref_dict[x] += [simple_link(link)] for x in range(0, max(en_len, he_len)): curContent = {} curContent["segmentNumber"] = str(x + 1) links = anchor_ref_dict[x + 1] if len(links) > 0: curContent["links"] = links if x < en_len: curContent["text"] = text["text"][x] if x < he_len: curContent["he"] = text["he"][x] data["content"] += [curContent] return data