def get_items_linked_to_ref(tref):
    oref = Ref(tref)
    section_ref = oref.section_ref()
    commentary_links = []
    commentary_author_set = set()
    # set is used b/c sometimes there are duplicate links
    direct_links = {(x["ref"], x["category"] in ("Commentary", "Modern Commentary"))
                    for x in get_links(section_ref.normal(), with_text=False) if oref in Ref(x["anchorRef"]).range_list()}
    for link_tref, is_comment in direct_links:
        # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections
        if is_comment and not link_tref.startswith("Steinsaltz on "):
            link_oref = Ref(link_tref)
            author = getattr(link_oref.index, "collective_title", None)
            temp_commentary_links, _, _ = normalize_related_refs([x["ref"] for x in get_links(link_tref, with_text=False)], None, COMMENTARY_LINK_SCORE)
            for commentary_link in temp_commentary_links:
                if author is not None and (commentary_link, author) in commentary_author_set:
                    # don't add same ref twice from same author
                    continue
                commentary_author_set.add((commentary_link, author))
                commentary_links += [(commentary_link, COMMENTARY_LINK_SCORE, link_tref)]
    is_comment_list = [x[1] for x in direct_links]
    direct_links, _, is_comment_list = normalize_related_refs([x[0] for x in direct_links], None, DIRECT_LINK_SCORE, other_data=is_comment_list)
    final_refs = [(x, DIRECT_LINK_SCORE, "direct") for x in direct_links] + commentary_links
    commentary_ref_set = set(map(lambda x: x[0], filter(lambda x: x[1], zip(direct_links, is_comment_list))))
    return final_refs, commentary_ref_set
Ejemplo n.º 2
0
    def test_get_links_on_range(self):
        r3 = [l["ref"] + l["type"] for l in get_links("Exodus 2:3")]
        r4 = [l["ref"] + l["type"] for l in get_links("Exodus 2:4")]
        r34 = [l["ref"] + l["type"] for l in get_links("Exodus 2:3-4")]

        # All links in first segment present in range
        assert all([r in r34 for r in r3])
        # All links in second segment present in range
        assert all([r in r34 for r in r4])
        # No links in range absent from segments
        assert all(r in r3 or r in r4 for r in r34)
Ejemplo n.º 3
0
    def get_recs_thru_links(oref):
        '''
        Given a ref, returns items connected to central ref through links - direct links and links through commentaries.
        :param oref:
        :return: Twos things:
                    list of `Recommendation`s
                    [tref, tref] - all of the refs in the above set that are direct commentaries of original tref
        '''

        section_ref = oref.section_ref()
        commentary_links = []
        commentary_author_set = set()
        # set is used b/c sometimes there are duplicate links
        direct_links = {
            (x["ref"], x["category"] in ("Commentary", "Modern Commentary"))
            for x in get_links(section_ref.normal(), with_text=False)
            if oref in Ref(x["anchorRef"]).range_list()
        }
        for link_tref, is_comment in direct_links:
            # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections
            if is_comment and not link_tref.startswith("Steinsaltz on "):
                link_oref = Ref(link_tref)
                author = getattr(link_oref.index, "collective_title", None)
                temp_commentary_links, _, _ = RecommendationEngine.normalize_related_refs(
                    [x["ref"] for x in get_links(link_tref, with_text=False)],
                    None, COMMENTARY_LINK_SCORE)
                for commentary_link in temp_commentary_links:
                    if author is not None and (
                            commentary_link, author) in commentary_author_set:
                        # don't add same ref twice from same author
                        continue
                    commentary_author_set.add((commentary_link, author))
                    commentary_links += [
                        Recommendation(Ref(commentary_link),
                                       relevance=COMMENTARY_LINK_SCORE,
                                       sources=[link_tref])
                    ]
        is_comment_list = [x[1] for x in direct_links]
        direct_links, _, is_comment_list = RecommendationEngine.normalize_related_refs(
            [x[0] for x in direct_links],
            None,
            DIRECT_LINK_SCORE,
            other_data=is_comment_list)
        direct_ref_set = set(direct_links)
        final_rex = [
            Recommendation(
                Ref(x), relevance=DIRECT_LINK_SCORE, sources=["direct"])
            for x in direct_links
        ] + commentary_links
        commentary_ref_set = set(
            map(lambda x: x[0],
                filter(lambda x: x[1], zip(direct_links, is_comment_list))))
        return final_rex, commentary_ref_set, direct_ref_set
def get_items_linked_to_ref(tref):
    oref = Ref(tref)
    section_ref = oref.section_ref()
    commentary_links = []
    commentary_author_set = set()
    # set is used b/c sometimes there are duplicate links
    direct_links = {(x["ref"], x["category"]
                     in ("Commentary", "Modern Commentary"))
                    for x in get_links(section_ref.normal(), with_text=False)
                    if oref in Ref(x["anchorRef"]).range_list()}
    for link_tref, is_comment in direct_links:
        # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections
        if is_comment and not link_tref.startswith("Steinsaltz on "):
            link_oref = Ref(link_tref)
            author = getattr(link_oref.index, "collective_title", None)
            temp_commentary_links, _, _ = normalize_related_refs(
                [x["ref"] for x in get_links(link_tref, with_text=False)],
                None, COMMENTARY_LINK_SCORE)
            for commentary_link in temp_commentary_links:
                if author is not None and (commentary_link,
                                           author) in commentary_author_set:
                    # don't add same ref twice from same author
                    continue
                commentary_author_set.add((commentary_link, author))
                commentary_links += [(commentary_link, COMMENTARY_LINK_SCORE,
                                      link_tref)]
    is_comment_list = [x[1] for x in direct_links]
    direct_links, _, is_comment_list = normalize_related_refs(
        [x[0] for x in direct_links],
        None,
        DIRECT_LINK_SCORE,
        other_data=is_comment_list)
    final_refs = [(x, DIRECT_LINK_SCORE, "direct")
                  for x in direct_links] + commentary_links
    commentary_ref_set = set(
        map(lambda x: x[0],
            filter(lambda x: x[1], zip(direct_links, is_comment_list))))
    return final_refs, commentary_ref_set
    def get_recs_thru_links(oref):
        '''
        Given a ref, returns items connected to central ref through links - direct links and links through commentaries.
        :param oref:
        :return: Twos things:
                    list of `Recommendation`s
                    [tref, tref] - all of the refs in the above set that are direct commentaries of original tref
        '''

        direct_links = set()
        section_ref_list = [r.section_ref() for r in oref.split_spanning_ref()]
        range_set = {r.normal() for r in oref.all_segment_refs()}
        for section_ref in section_ref_list:
            section_ref = oref.section_ref()
            commentary_links = []
            commentary_author_set = set()
            # set is used b/c sometimes there are duplicate links
            temp_direct_links = set()
            initial_links = get_links(section_ref.normal(), with_text=False)
            filtered_links = [l for l in initial_links if len(range_set & {r.normal() for r in Ref(l['anchorRef']).range_list()}) > 0]
            direct_links |= {(l['ref'], l['category'] in ('Commentary', 'Modern Commentary'), Ref(l['anchorRef'])) for l in filtered_links}
        for link_tref, is_comment, anchor_ref in direct_links:
            # Steinsaltz is hard-coded to have same connections as Talmud which will double count Talmud connections
            if is_comment and not link_tref.startswith("Steinsaltz on "):
                link_oref = Ref(link_tref)
                author = getattr(link_oref.index, "collective_title", None)
                temp_commentary_links, _, _, _ = RecommendationEngine.normalize_related_refs([x["ref"] for x in get_links(link_tref, with_text=False)], None, COMMENTARY_LINK_SCORE)
                for commentary_link in temp_commentary_links:
                    if author is not None and (commentary_link, author) in commentary_author_set:
                        # don't add same ref twice from same author
                        continue
                    commentary_author_set.add((commentary_link, author))
                    commentary_links += [Recommendation(Ref(commentary_link), relevance=COMMENTARY_LINK_SCORE, sources=[RecommendationSource(link_tref, anchor_ref)])]
        other_data = [(x[1], x[2]) for x in direct_links]
        direct_links, _, other_data, focus_ref_subref = RecommendationEngine.normalize_related_refs([x[0] for x in direct_links], None, DIRECT_LINK_SCORE, other_data=other_data)
        direct_ref_set = set(direct_links)
        is_comment_list, anchor_ref_list = list(zip(*other_data))
        final_rex = [Recommendation(Ref(x), relevance=DIRECT_LINK_SCORE, sources=[RecommendationSource('direct', anchor_ref)]) for x, anchor_ref in zip(direct_links, anchor_ref_list)] + commentary_links
        commentary_ref_set = set([x[0] for x in [x for x in zip(direct_links, is_comment_list) if x[1]]])
        return final_rex, commentary_ref_set, direct_ref_set
    def section_data(self, oref: model.Ref, default_versions: dict) -> dict:
        """
        :param oref: section level Ref instance
        :param default_versions: {'en': Version, 'he': Version}
        :param prev_next: tuple, with the oref before oref and after oref (or None if this is the first/last ref)
        Returns a dictionary with all the data we care about for section level `oref`.
        """
        prev, next_ref = oref.prev_section_ref(vstate=self.version_state),\
                         oref.next_section_ref(vstate=self.version_state)

        data = {
            "ref": oref.normal(),
            "heRef": oref.he_normal(),
            "indexTitle": oref.index.title,
            "heTitle": oref.index.get_title('he'),
            "sectionRef": oref.normal(),
            "next": next_ref.normal() if next_ref else None,
            "prev": prev.normal() if prev else None,
            "content": [],
        }

        def get_version_title(chunk):
            if not chunk.is_merged:
                version = chunk.version()
                if version and version.language in default_versions and version.versionTitle != default_versions[
                        version.language].versionTitle:
                    #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang)
                    try:
                        vnotes = version.versionNotes
                    except AttributeError:
                        vnotes = None
                    try:
                        vlicense = version.license
                    except AttributeError:
                        vlicense = None
                    try:
                        vsource = version.versionSource
                    except AttributeError:
                        vsource = None
                    try:
                        vnotesInHebrew = version.versionNotesInHebrew
                    except AttributeError:
                        vnotesInHebrew = None
                    try:
                        versionTitleInHebrew = version.versionTitleInHebrew
                    except AttributeError:
                        versionTitleInHebrew = None

                    return version.versionTitle, vnotes, vlicense, vsource, versionTitleInHebrew, vnotesInHebrew
                else:
                    return None, None, None, None, None, None  # default version
            else:
                #merged
                #print "MERGED SECTION {} ({})".format(oref, chunk.lang)
                all_versions = set(chunk.sources)
                merged_version = 'Merged from {}'.format(
                    ', '.join(all_versions))
                return merged_version, None, None, None, None, None

        node_title = oref.index_node.full_title()
        en_chunk, he_chunk = self._text_map[node_title][
            'en_chunk'], self._text_map[node_title]['en_chunk']
        en_vtitle, en_vnotes, en_vlicense, en_vsource, en_vtitle_he, en_vnotes_he = get_version_title(
            en_chunk)
        he_vtitle, he_vnotes, he_vlicense, he_vsource, he_vtitle_he, he_vnotes_he = get_version_title(
            he_chunk)

        if en_vtitle:
            data['versionTitle'] = en_vtitle
        if he_vtitle:
            data['heVersionTitle'] = he_vtitle
        if en_vnotes:
            data['versionNotes'] = en_vnotes
        if he_vnotes:
            data['heVersionNotes'] = he_vnotes
        if en_vlicense:
            data['license'] = en_vlicense
        if he_vlicense:
            data['heLicense'] = he_vlicense
        if en_vsource:
            data['versionSource'] = en_vsource
        if he_vsource:
            data['heVersionSource'] = he_vsource
        if en_vtitle_he:
            data['versionTitleInHebrew'] = en_vtitle_he
        if he_vtitle_he:
            data['heVersionTitleInHebrew'] = he_vtitle_he
        if en_vnotes_he:
            data['versionNotesInHebrew'] = en_vnotes_he
        if he_vnotes_he:
            data['heVersionNotesInHebrew'] = he_vnotes_he

        try:
            en_text = self._text_map[node_title]['en_ja'].get_element(
                [j - 1 for j in oref.sections])
        except IndexError:
            en_text = []
        try:
            he_text = self._text_map[node_title]['he_ja'].get_element(
                [j - 1 for j in oref.sections])
        except IndexError:
            he_text = []

        en_len = len(en_text)
        he_len = len(he_text)
        section_links = get_links(oref.normal(), False)
        anchor_ref_dict = defaultdict(list)
        for link in section_links:
            anchor_oref = model.Ref(link["anchorRef"])
            if not anchor_oref.is_segment_level() or len(
                    anchor_oref.sections) == 0:
                continue  # don't bother with section level links
            start_seg_num = anchor_oref.sections[-1]
            # make sure sections are the same in range
            # TODO doesn't deal with links that span sections
            end_seg_num = anchor_oref.toSections[-1] if anchor_oref.sections[
                0] == anchor_oref.toSections[0] else max(en_len, he_len)
            for x in range(start_seg_num, end_seg_num + 1):
                anchor_ref_dict[x] += [simple_link(link)]
        for x in range(0, max(en_len, he_len)):
            curContent = {}
            curContent["segmentNumber"] = str(x + 1)
            links = anchor_ref_dict[x + 1]
            if len(links) > 0:
                curContent["links"] = links

            if x < en_len:
                curContent["text"] = en_text[x]
            if x < he_len:
                curContent["he"] = he_text[x]

            data["content"] += [curContent]

        return data
 def get_sorted_links(self):
     return sorted(get_links(self.index_obj.title, False, False),
                   key=lambda x: self.sort_key(x['anchorRef']))
Ejemplo n.º 8
0
def export_text(title):
	"""
	Takes a single document from the `texts` collection exports it, by chopping it up 
	Add helpful data like 
	"""
	print title
	try:
		for oref in model.Ref(title).all_subrefs():
			text = model.TextFamily(oref, version=None, lang=None, commentary=0, context=0, pad=0, alts=False).contents()
			text["next"]	= oref.next_section_ref().normal() if oref.next_section_ref() else None
			text["prev"]	= oref.prev_section_ref().normal() if oref.prev_section_ref() else None
			text["content"] = []
			
			if str(oref) == "Sha'ar Ha'Gemul of the Ramban 1":
				print "Sha'ar Ha'Gemul of the Ramban 1 is the worst"
			else:
				for x in range (0,max([len(text["text"]),len(text["he"])])):
					curContent = {}
					curContent["segmentNumber"] = str(x+1)

					links = get_links(text["ref"]+":"+curContent["segmentNumber"], False)
					for link in links:
						del link['commentator']
						del link['heCommentator']
						del link['type']
						del link['anchorText']
						del link['commentaryNum']
						if 'heTitle' in link: del link['heTitle']
						del link['_id']
						del link['anchorRef']
						del link['ref']
						del link['anchorVerse']
					
					curContent["links"] = links
				
					if x < len(text["text"]): curContent["text"]=text["text"][x]
					else: curContent["text"]=""
					if x < len(text["he"]): curContent["he"]=text["he"][x]
					else: curContent["he"]=""

					text["content"].append(curContent)

				text.pop("maps", None)
				text.pop("versionSource", None)
				text.pop("heDigitizedBySefaria", None)
				text.pop("heVersionTitle", None)
				text.pop("heVersionNotes", None)
				text.pop("heVersionStatus", None)
				text.pop("isSpanning", None)
				text.pop("heVersionSource", None)
				text.pop("versionNotes", None)
				text.pop("versionTitle", None)
				text.pop("heLicense", None)
				text.pop("digitizedBySefaria", None)
				text.pop("versions", None)
				text.pop("license", None)
				text.pop("versionStatus", None)
				text.pop("heSources", None)
				text.pop("sources", None)
				text.pop("he",None)
				text.pop("text",None)
				
				path = make_path(text, "json")
				write_doc(text, path)
	except Exception, e:
		logging.warning(e) 	
		pass
Ejemplo n.º 9
0
	def test_get_links_on_range(self):
		x = len(get_links("Exodus 2:3"))
		y = len(get_links("Exodus 2:4"))
		assert len(get_links("Exodus 2:3-4")) == (x+y)
Ejemplo n.º 10
0
def section_data(oref, defaultVersions):
    """
    :param defaultVersions dict: {'en': Version, 'he': Version}
    Returns a dictionary with all the data we care about for section level `oref`.
    """
    tf = model.TextFamily(oref,
                          version=None,
                          lang=None,
                          commentary=0,
                          context=0,
                          pad=0,
                          alts=False)
    text = tf.contents()
    data = {
        "ref": text["ref"],
        "heRef": text["heRef"],
        "indexTitle": text["indexTitle"],
        "heTitle": text["heTitle"],
        "sectionRef": text["sectionRef"],
        "next":
        oref.next_section_ref().normal() if oref.next_section_ref() else None,
        "prev":
        oref.prev_section_ref().normal() if oref.prev_section_ref() else None,
        "content": [],
    }

    def get_version_title(chunk):
        if not chunk.is_merged:
            version = chunk.version()
            if version and version.language in defaultVersions and version.versionTitle != defaultVersions[
                    version.language].versionTitle:
                #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang)
                try:
                    vnotes = version.versionNotes
                except AttributeError:
                    vnotes = None
                try:
                    vlicense = version.license
                except AttributeError:
                    vlicense = None
                try:
                    vsource = version.versionSource
                except AttributeError:
                    vsource = None

                return version.versionTitle, vnotes, vlicense, vsource
            else:
                return None, None, None, None  # default version
        else:
            #merged
            #print "MERGED SECTION {} ({})".format(oref, chunk.lang)
            all_versions = set(chunk.sources)
            merged_version = u'Merged from {}'.format(u', '.join(all_versions))
            return merged_version, None, None, None

    en_vtitle, en_vnotes, en_vlicense, en_vsource = get_version_title(
        tf._chunks['en'])
    he_vtitle, he_vnotes, he_vlicense, he_vsource = get_version_title(
        tf._chunks['he'])

    if en_vtitle:
        data['versionTitle'] = en_vtitle
    if he_vtitle:
        data['heVersionTitle'] = he_vtitle
    if en_vnotes:
        data['versionNotes'] = en_vnotes
    if he_vnotes:
        data['heVersionNotes'] = he_vnotes
    if en_vlicense:
        data['license'] = en_vlicense
    if he_vlicense:
        data['heLicense'] = he_vlicense
    if en_vsource:
        data['versionSource'] = en_vsource
    if he_vsource:
        data['heVersionSource'] = he_vsource

    en_len = len(text["text"])
    he_len = len(text["he"])
    for x in xrange(0, max([en_len, he_len])):
        curContent = {}
        curContent["segmentNumber"] = str(x + 1)

        links = get_links(text["ref"] + ":" + curContent["segmentNumber"],
                          False)
        print links
        if len(links) > 0:
            curContent["links"] = [simple_link(link) for link in links]

        if x < en_len: curContent["text"] = text["text"][x]
        if x < he_len: curContent["he"] = text["he"][x]

        data["content"] += [curContent]

    return data
Ejemplo n.º 11
0
def get_text(tref,
             context=1,
             commentary=True,
             version=None,
             lang=None,
             pad=True):
    """
	Take a string reference to a segment of text and return a dictionary including
	the text and other info.
		* 'context': how many levels of depth above the request ref should be returned.
			e.g., with context=1, ask for a verse and receive its surrounding chapter as well.
			context=0 gives just what is asked for.
		* 'commentary': whether or not to search for and return connected texts as well.
		* 'version' + 'lang': use to specify a particular version of a text to return.
	"""
    oref = model.Ref(tref)
    if pad:
        oref = oref.padded_ref()

    if oref.is_spanning():
        # If ref spans sections, call get_text for each section
        return get_spanning_text(oref)

    if len(oref.sections):
        skip = oref.sections[0] - 1
        limit = 1
        chapter_slice = {
            "_id": 0
        } if len(oref.index.sectionNames) == 1 else {
            "_id": 0,
            "chapter": {
                "$slice": [skip, limit]
            }
        }
    else:
        chapter_slice = {"_id": 0}

    textCur = heCur = None
    # pull a specific version of text
    if version and lang == "en":
        textCur = db.texts.find(
            {
                "title": oref.book,
                "language": lang,
                "versionTitle": version
            }, chapter_slice)

    elif version and lang == "he":
        heCur = db.texts.find(
            {
                "title": oref.book,
                "language": lang,
                "versionTitle": version
            }, chapter_slice)

    # If no criteria set above, pull all versions,
    # Prioritize first according to "priority" field (if present), then by oldest text first
    # Order here will determine which versions are used in case of a merge
    textCur = textCur or db.texts.find({
        "title": oref.book,
        "language": "en"
    }, chapter_slice).sort([["priority", -1], ["_id", 1]])
    heCur = heCur or db.texts.find({
        "title": oref.book,
        "language": "he"
    }, chapter_slice).sort([["priority", -1], ["_id", 1]])

    # Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end
    # Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref
    heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context)
    r = text_from_cur(oref.old_dict_format(), textCur, context)

    # Add fields pertaining the the Hebrew text under different field names
    r["he"] = heRef.get("text", [])
    r["heVersionTitle"] = heRef.get("versionTitle", "")
    r["heVersionSource"] = heRef.get("versionSource", "")
    r["heVersionStatus"] = heRef.get("versionStatus", "")
    r["heLicense"] = heRef.get("license", "unknown")
    if heRef.get("versionNotes", ""):
        r["heVersionNotes"] = heRef.get("versionNotes", "")
    if heRef.get("digitizedBySefaria", False):
        r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False)
    if "sources" in heRef:
        r["heSources"] = heRef.get("sources")

    # find commentary on this text if requested
    if commentary:
        from sefaria.client.wrapper import get_links
        searchRef = model.Ref(tref).padded_ref().context_ref(context).normal()
        links = get_links(searchRef)
        r["commentary"] = links if "error" not in links else []

        # get list of available versions of this text
        # but only if you care enough to get commentary also (hack)
        r["versions"] = get_version_list(tref)

    # use shorthand if present, masking higher level sections
    if "shorthand" in r:
        r["book"] = r["shorthand"]
        d = r["shorthandDepth"]
        for key in ("sections", "toSections", "sectionNames"):
            r[key] = r[key][d:]

    # replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud
    if r["type"] == "Talmud" or r["type"] == "Commentary" and r[
            "commentaryCategories"][0] == "Talmud":
        daf = r["sections"][0]
        r["sections"] = [section_to_daf(daf)] + r["sections"][1:]
        r["title"] = r["book"] + " " + r["sections"][0]
        if "heTitle" in r:
            r["heBook"] = r["heTitle"]
            r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he")
        if r["type"] == "Commentary" and len(r["sections"]) > 1:
            r["title"] = "%s Line %d" % (r["title"], r["sections"][1])
        if "toSections" in r:
            r["toSections"] = [r["sections"][0]] + r["toSections"][1:]

    elif r["type"] == "Commentary":
        d = len(r["sections"]) if len(r["sections"]) < 2 else 2
        r["title"] = r["book"] + " " + ":".join(
            ["%s" % s for s in r["sections"][:d]])

    return r
Ejemplo n.º 12
0
def get_text(tref, context=1, commentary=True, version=None, lang=None, pad=True):
	"""
	Take a string reference to a segment of text and return a dictionary including
	the text and other info.
		* 'context': how many levels of depth above the request ref should be returned.
			e.g., with context=1, ask for a verse and receive its surrounding chapter as well.
			context=0 gives just what is asked for.
		* 'commentary': whether or not to search for and return connected texts as well.
		* 'version' + 'lang': use to specify a particular version of a text to return.
	"""
	oref = model.Ref(tref)
	if pad:
		oref = oref.padded_ref()

	if oref.is_spanning():
		# If ref spans sections, call get_text for each section
		return get_spanning_text(oref)

	if len(oref.sections):
		skip = oref.sections[0] - 1
		limit = 1
		chapter_slice = {"_id": 0} if len(oref.index.sectionNames) == 1 else {"_id": 0, "chapter": {"$slice": [skip, limit]}}
	else:
		chapter_slice = {"_id": 0}

	textCur = heCur = None
	# pull a specific version of text
	if version and lang == "en":
		textCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice)

	elif version and lang == "he":
		heCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice)

	# If no criteria set above, pull all versions,
	# Prioritize first according to "priority" field (if present), then by oldest text first
	# Order here will determine which versions are used in case of a merge
	textCur = textCur or db.texts.find({"title": oref.book, "language": "en"}, chapter_slice).sort([["priority", -1], ["_id", 1]])
	heCur   = heCur   or db.texts.find({"title": oref.book, "language": "he"}, chapter_slice).sort([["priority", -1], ["_id", 1]])

	# Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end
	# Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref
	heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context)
	r = text_from_cur(oref.old_dict_format(), textCur, context)

	# Add fields pertaining the the Hebrew text under different field names
	r["he"]                   = heRef.get("text", [])
	r["heVersionTitle"]       = heRef.get("versionTitle", "")
	r["heVersionSource"]      = heRef.get("versionSource", "")
	r["heVersionStatus"]      = heRef.get("versionStatus", "")
	r["heLicense"]            = heRef.get("license", "unknown")
	if heRef.get("versionNotes", ""):
		r["heVersionNotes"]       = heRef.get("versionNotes", "")
	if heRef.get("digitizedBySefaria", False):
		r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False)
	if "sources" in heRef:
		r["heSources"]            = heRef.get("sources")

	# find commentary on this text if requested
	if commentary:
		from sefaria.client.wrapper import get_links
		searchRef = model.Ref(tref).padded_ref().context_ref(context).normal()
		links = get_links(searchRef)
		r["commentary"] = links if "error" not in links else []

		# get list of available versions of this text
		# but only if you care enough to get commentary also (hack)
		r["versions"] = get_version_list(tref)

	# use shorthand if present, masking higher level sections
	if "shorthand" in r:
		r["book"] = r["shorthand"]
		d = r["shorthandDepth"]
		for key in ("sections", "toSections", "sectionNames"):
			r[key] = r[key][d:]

	# replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud
	if r["type"] == "Talmud" or r["type"] == "Commentary" and r["commentaryCategories"][0] == "Talmud":
		daf = r["sections"][0]
		r["sections"] = [section_to_daf(daf)] + r["sections"][1:]
		r["title"] = r["book"] + " " + r["sections"][0]
		if "heTitle" in r:
			r["heBook"] = r["heTitle"]
			r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he")
		if r["type"] == "Commentary" and len(r["sections"]) > 1:
			r["title"] = "%s Line %d" % (r["title"], r["sections"][1])
		if "toSections" in r:
			r["toSections"] = [r["sections"][0]] + r["toSections"][1:]

	elif r["type"] == "Commentary":
		d = len(r["sections"]) if len(r["sections"]) < 2 else 2
		r["title"] = r["book"] + " " + ":".join(["%s" % s for s in r["sections"][:d]])

	return r
Ejemplo n.º 13
0
 def test_get_links_on_range(self):
     x = len(get_links("Exodus 2:3"))
     y = len(get_links("Exodus 2:4"))
     assert len(get_links("Exodus 2:3-4")) == (x + y)
Ejemplo n.º 14
0
def section_data(oref, defaultVersions):
    """
    :param defaultVersions dict: {'en': Version, 'he': Version}
    Returns a dictionary with all the data we care about for section level `oref`.
    """
    tf = model.TextFamily(oref,
                          version=None,
                          lang=None,
                          commentary=0,
                          context=0,
                          pad=0,
                          alts=False,
                          stripItags=True)
    text = tf.contents()
    data = {
        "ref": text["ref"],
        "heRef": text["heRef"],
        "indexTitle": text["indexTitle"],
        "heTitle": text["heTitle"],
        "sectionRef": text["sectionRef"],
        "next":
        oref.next_section_ref().normal() if oref.next_section_ref() else None,
        "prev":
        oref.prev_section_ref().normal() if oref.prev_section_ref() else None,
        "content": [],
    }

    def get_version_title(chunk):
        if not chunk.is_merged:
            version = chunk.version()
            if version and version.language in defaultVersions and version.versionTitle != defaultVersions[
                    version.language].versionTitle:
                #print "VERSION NOT DEFAULT {} ({})".format(oref, chunk.lang)
                try:
                    vnotes = version.versionNotes
                except AttributeError:
                    vnotes = None
                try:
                    vlicense = version.license
                except AttributeError:
                    vlicense = None
                try:
                    vsource = version.versionSource
                except AttributeError:
                    vsource = None
                try:
                    vnotesInHebrew = version.versionNotesInHebrew
                except AttributeError:
                    vnotesInHebrew = None
                try:
                    versionTitleInHebrew = version.versionTitleInHebrew
                except AttributeError:
                    versionTitleInHebrew = None

                return version.versionTitle, vnotes, vlicense, vsource, versionTitleInHebrew, vnotesInHebrew
            else:
                return None, None, None, None, None, None  # default version
        else:
            #merged
            #print "MERGED SECTION {} ({})".format(oref, chunk.lang)
            all_versions = set(chunk.sources)
            merged_version = 'Merged from {}'.format(', '.join(all_versions))
            return merged_version, None, None, None, None, None

    en_vtitle, en_vnotes, en_vlicense, en_vsource, en_vtitle_he, en_vnotes_he = get_version_title(
        tf._chunks['en'])
    he_vtitle, he_vnotes, he_vlicense, he_vsource, he_vtitle_he, he_vnotes_he = get_version_title(
        tf._chunks['he'])

    if en_vtitle:
        data['versionTitle'] = en_vtitle
    if he_vtitle:
        data['heVersionTitle'] = he_vtitle
    if en_vnotes:
        data['versionNotes'] = en_vnotes
    if he_vnotes:
        data['heVersionNotes'] = he_vnotes
    if en_vlicense:
        data['license'] = en_vlicense
    if he_vlicense:
        data['heLicense'] = he_vlicense
    if en_vsource:
        data['versionSource'] = en_vsource
    if he_vsource:
        data['heVersionSource'] = he_vsource
    if en_vtitle_he:
        data['versionTitleInHebrew'] = en_vtitle_he
    if he_vtitle_he:
        data['heVersionTitleInHebrew'] = he_vtitle_he
    if en_vnotes_he:
        data['versionNotesInHebrew'] = en_vnotes_he
    if he_vnotes_he:
        data['heVersionNotesInHebrew'] = he_vnotes_he

    en_len = len(text["text"])
    he_len = len(text["he"])
    section_links = get_links(text["ref"], False)
    anchor_ref_dict = defaultdict(list)
    for link in section_links:
        anchor_oref = model.Ref(link["anchorRef"])
        if not anchor_oref.is_segment_level() or len(
                anchor_oref.sections) == 0:
            continue  # don't bother with section level links
        start_seg_num = anchor_oref.sections[-1]
        # make sure sections are the same in range
        # TODO doesn't deal with links that span sections
        end_seg_num = anchor_oref.toSections[-1] if anchor_oref.sections[
            0] == anchor_oref.toSections[0] else max(en_len, he_len)
        for x in range(start_seg_num, end_seg_num + 1):
            anchor_ref_dict[x] += [simple_link(link)]
    for x in range(0, max(en_len, he_len)):
        curContent = {}
        curContent["segmentNumber"] = str(x + 1)
        links = anchor_ref_dict[x + 1]
        if len(links) > 0:
            curContent["links"] = links

        if x < en_len: curContent["text"] = text["text"][x]
        if x < he_len: curContent["he"] = text["he"][x]

        data["content"] += [curContent]

    return data