Esempio n. 1
0
def generate_refs_list(query={}):
	"""
	Generate a list of refs to all available sections.
	"""
	trefs = []
	counts = db.counts.find(query)
	for c in counts:
		if "title" not in c:
			continue  # this is a category count

		try:
			i = model.get_index(c["title"])
		except Exception:
			db.counts.remove(c)
			continue
			# If there is not index record to match the count record,
			# the count should be removed.

		title = c["title"]
		he = list_from_counts(c["availableTexts"]["he"])
		en = list_from_counts(c["availableTexts"]["en"])
		sections = texts.union(he, en)
		for n in sections:
			if i.categories[0] == "Talmud":
				n = section_to_daf(int(n))
			if getattr(i, "commentaryCategories", None) and i.commentaryCategories[0] == "Talmud":
				split = n.split(":")
				n = ":".join([section_to_daf(int(n[0]))] + split[1:])
			tref = "%s %s" % (title, n) if n else title
			trefs.append(tref)

	return trefs
Esempio n. 2
0
	def flatten(text, sectionNames):
		text = text or ""
		if len(sectionNames) == 1:
			text = [t if t else "" for t in text]
			# Bandaid for mismatch between text structure, join recursively if text
			# elements are lists instead of strings.
			return "\n".join([t if isinstance(t, basestring) else "\n".join(t) for t in text])
		flat = ""
		for i in range(len(text)):
			section = section_to_daf(i + 1) if sectionNames[0] == "Daf" else str(i + 1)
			flat += "\n\n%s %s\n\n%s" % (sectionNames[0], section, flatten(text[i], sectionNames[1:]))
		return flat
Esempio n. 3
0
	def flatten(text, sectionNames):
		text = text or ""
		if len(sectionNames) == 1:
			text = [t if t else "" for t in text]
			# Bandaid for mismatch between text structure, join recursively if text
			# elements are lists instead of strings.
			return "\n".join([t if isinstance(t, basestring) else "\n".join(t) for t in text])
		flat = ""
		for i in range(len(text)):
			section = section_to_daf(i + 1) if sectionNames[0] == "Daf" else str(i + 1)
			flat += "\n\n%s %s\n\n%s" % (sectionNames[0], section, flatten(text[i], sectionNames[1:]))
		return flat
    def resolve_quote(self, quote, talmud_ref, tolerance=70):

        assert isinstance(talmud_ref, Ref)
        assert isinstance(quote, basestring)

        book = talmud_ref.book
        daf = section_to_daf(talmud_ref.sections[0])
        try:
            ref_list = self.map[book]['Start Daf'][daf]
        except KeyError:
            ref_list = self.map[book]['End Daf'][daf]
        return self.find_best_match(quote, ref_list, tolerance)
Esempio n. 5
0
    def resolve(self, index_name, sections=None, match_str=None):
        """
        note: if index_name is None match_str must be not None
        :param index_name: index name or "None" if it is a sham title citation
        :param list sections: If any entry in this list is "None", then we treat that as a "שם" or "ibid", resolve it, and return a refrence
        :param unicode match_str: optional match string. if this is provided, sections are ignored. index_name is assumed to be
        :return: Ref
        """
        #todo: assert if table is empty.
        #todo: raise an error if can't find this sham constilation in table
        title = None
        is_index_sham = index_name is None
        if index_name is None:
            index_name = self._last_cit[0]
            if index_name is not None:
                if match_str is not None:
                    # if index_name in [u'I Samuel', u'II Samuel']: #to disambiguate books that have 2 volumes.
                    #     re.search('', match_str)
                    node = library.get_schema_node(index_name)  # assert JaggedArrayNode?
                    title, sections = CitationFinder.get_sham_ref_with_node(match_str, node, lang='he')
            else:
                raise IbidKeyNotFoundException("couldn't find this key")

        if sections is not None:
            last_depth = self.get_last_depth(index_name, sections)
            if is_index_sham or len(sections) == 0 or not sections[0]: # tzmod to beginning
                sections = tuple([None] * (max(len(sections), last_depth) - len(sections)) + list(sections))  # choosing the depth of the ref to resolve
            elif len(sections) > 0 and not sections[-1]:
                sections = tuple(list(sections) + [None] * (
                    max(len(sections), last_depth) - len(
                        sections)))
        """if False and not index_name and sections == [None, None]:  # it says only Sham (all were None)
            try:
                resolvedRef = self._table[(None,(None, None))]
                # notice that self._last_cit doesn't chnge so no need to reasign it
            except KeyError:
                raise IbidKeyNotFoundException("Ibid table is empty. Can't retrieve book name")
        else:"""

        # recognize what kind of key we are looking for
        key = []
        found_sham = False
        for i, sect in enumerate(sections):
            if found_sham:  # we are after the place that we need info from
                key.append(None)
            else:
                key.append(sections[i])
            if not sect:  # change bool on the first sham you meet
                found_sham = True
        # if self._table.has_key(index_name, tuple(key)):
        if found_sham:
            try:
                from_table = self._table[(index_name, tuple(key))].sections
            except:
                raise IbidKeyNotFoundException("couldn't find this key")
                # print "error, couldn't find this key", index_name, tuple(key)
                # return "error, couldn't find this key", index_name, tuple(key)
        else:
            from_table = sections # that is it wasn't in _table
        new_sections = []
        # merge them, while preferring the sections that were retrieved from the citation
        for i, sect in enumerate(sections):
            if sect:
                new_sections.append(sect)
            else:
                new_sections.append(from_table[i])
        try:
            book_ref = Ref(index_name)
            if self.assert_simple:
                assert not book_ref.index.is_complex()
            if self.assert_simple:
                addressTypes = book_ref.index_node.addressTypes
            else:
                addressTypes = [None]*len(new_sections)
            section_str_list = []
            for section, addressType in zip(new_sections, addressTypes):
                if addressType == u'Talmud':
                    section_str_list += [talmud.section_to_daf(section)]
                else:
                    section_str_list += [str(section)]
            if title and title != index_name:
                index_name = title
            resolvedRef = Ref(u'{}.{}'.format(index_name, '.'.join(section_str_list)))
        except:
            raise IbidRefException(u"problem with the Ref iteslf. {}.{}".format(index_name, '.'.join(str(new_sections))))
            # print 'error, problem with the Ref iteslf. ', u'{}.{}'.format(index_name, '.'.join(str(new_sections)))
            # return "error, problem with the Ref iteslf", index_name, tuple(key)

        if resolvedRef.is_empty():
            raise IbidRefException('problem with the Ref iteslf')
            # return "error, problem with the Ref iteslf", resolvedRef
        else:
            self.registerRef(resolvedRef)
        return resolvedRef
from sefaria.model import *
from helper.link import add_links_from_text
from sefaria.system.database import db
from sefaria.utils.talmud import section_to_daf

# Find links that were generated from a Berakhot text
oids = db.texts.find({"title": "Berakhot"}).distinct("_id")
tanakh_links = LinkSet({"source_text_oid": {"$in": oids}})

tanakh_links.delete()

berakhot = Version().load({"title": "Berakhot", "versionTitle": "Wikisource Talmud Bavli"})

for i in range(len(berakhot.chapter)):
    ref = "Berakhot %s" % section_to_daf(i+1)
    print "Scanning %s" % ref
    add_links_from_text(ref, berakhot.language, berakhot.chapter[i], berakhot._id, 1)
Esempio n. 7
0
    def resolve(self, index_name, sections=None, match_str=None):
        """
        note: if index_name is None match_str must be not None
        :param index_name: index name or "None" if it is a sham title citation
        :param list sections: If any entry in this list is "None", then we treat that as a "שם" or "ibid", resolve it, and return a refrence
        :param unicode match_str: optional match string. if this is provided, sections are ignored. index_name is assumed to be
        :return: Ref
        """
        #todo: assert if table is empty.
        #todo: raise an error if can't find this sham constilation in table
        title = None
        is_index_sham = index_name is None
        if index_name is None:
            index_name = self._last_cit[0]
            if index_name is not None:
                if match_str is not None:
                    # if index_name in [u'I Samuel', u'II Samuel']: #to disambiguate books that have 2 volumes.
                    #     re.search('', match_str)
                    node = library.get_schema_node(
                        index_name)  # assert JaggedArrayNode?
                    title, sections = CitationFinder.get_sham_ref_with_node(
                        match_str, node, lang='he')
            else:
                raise IbidKeyNotFoundException("couldn't find this key")

        if sections is not None:
            last_depth = self.get_last_depth(index_name, sections)
            if is_index_sham or len(
                    sections) == 0 or not sections[0]:  # tzmod to beginning
                sections = tuple(
                    [None] * (max(len(sections), last_depth) - len(sections)) +
                    list(sections))  # choosing the depth of the ref to resolve
            elif len(sections) > 0 and not sections[-1]:
                sections = tuple(
                    list(sections) + [None] *
                    (max(len(sections), last_depth) - len(sections)))
        """if False and not index_name and sections == [None, None]:  # it says only Sham (all were None)
            try:
                resolvedRef = self._table[(None,(None, None))]
                # notice that self._last_cit doesn't chnge so no need to reasign it
            except KeyError:
                raise IbidKeyNotFoundException("Ibid table is empty. Can't retrieve book name")
        else:"""

        # recognize what kind of key we are looking for
        key = []
        found_sham = False
        for i, sect in enumerate(sections):
            if found_sham:  # we are after the place that we need info from
                key.append(None)
            else:
                key.append(sections[i])
            if not sect:  # change bool on the first sham you meet
                found_sham = True
        # if self._table.has_key(index_name, tuple(key)):
        if found_sham:
            try:
                from_table = self._table[(index_name, tuple(key))].sections
            except:
                raise IbidKeyNotFoundException("couldn't find this key")
                # print "error, couldn't find this key", index_name, tuple(key)
                # return "error, couldn't find this key", index_name, tuple(key)
        else:
            from_table = sections  # that is it wasn't in _table
        new_sections = []
        # merge them, while preferring the sections that were retrieved from the citation
        for i, sect in enumerate(sections):
            if sect:
                new_sections.append(sect)
            else:
                new_sections.append(from_table[i])
        try:
            book_ref = Ref(index_name)
            if self.assert_simple:
                assert not book_ref.index.is_complex()
            if self.assert_simple:
                addressTypes = book_ref.index_node.addressTypes
            else:
                addressTypes = [None] * len(new_sections)
            section_str_list = []
            for section, addressType in zip(new_sections, addressTypes):
                if addressType == u'Talmud':
                    section_str_list += [talmud.section_to_daf(section)]
                else:
                    section_str_list += [str(section)]
            if title and title != index_name:
                index_name = title
            resolvedRef = Ref(u'{}.{}'.format(index_name,
                                              '.'.join(section_str_list)))
        except:
            raise IbidRefException(
                u"problem with the Ref iteslf. {}.{}".format(
                    index_name, '.'.join(str(new_sections))))
            # print 'error, problem with the Ref iteslf. ', u'{}.{}'.format(index_name, '.'.join(str(new_sections)))
            # return "error, problem with the Ref iteslf", index_name, tuple(key)

        if resolvedRef.is_empty():
            raise IbidRefException('problem with the Ref iteslf')
            # return "error, problem with the Ref iteslf", resolvedRef
        else:
            self.registerRef(resolvedRef)
        return resolvedRef
Esempio n. 8
0
        index = txt.library.get_index(text["title"])
    except Exception as e:
        print "Error loading: {} index : {}".format(text["title"] , e)
        continue
    if not index or not getattr(index, "categories", None):
        print "No index found for " + text.title
        continue
    if "Tanach" in index.categories and "Commentary" not in index.categories:
        continue
    talmud = True if "Talmud" in index.categories else False

    for i in range(len(text['chapter'])):
        if talmud:
            if "Bavli" in index.categories and i < 2:
                continue
            chap = section_to_daf(i + 1)
        else:
            chap = i + 1
        ref = text['title'] + " " + str(chap)
        print ref
        try:
            result = add_links_from_text(txt.Ref(ref), text['language'], text['chapter'][i], text['_id'], user)
            if result:
                text_total[text["title"]] += len(result)
        except Exception, e:
            print e

total = 0
for text in text_order:
    num = text_total[text]
    try:
Esempio n. 9
0
def get_text(tref,
             context=1,
             commentary=True,
             version=None,
             lang=None,
             pad=True):
    """
	Take a string reference to a segment of text and return a dictionary including
	the text and other info.
		* 'context': how many levels of depth above the request ref should be returned.
			e.g., with context=1, ask for a verse and receive its surrounding chapter as well.
			context=0 gives just what is asked for.
		* 'commentary': whether or not to search for and return connected texts as well.
		* 'version' + 'lang': use to specify a particular version of a text to return.
	"""
    oref = model.Ref(tref)
    if pad:
        oref = oref.padded_ref()

    if oref.is_spanning():
        # If ref spans sections, call get_text for each section
        return get_spanning_text(oref)

    if len(oref.sections):
        skip = oref.sections[0] - 1
        limit = 1
        chapter_slice = {
            "_id": 0
        } if len(oref.index.sectionNames) == 1 else {
            "_id": 0,
            "chapter": {
                "$slice": [skip, limit]
            }
        }
    else:
        chapter_slice = {"_id": 0}

    textCur = heCur = None
    # pull a specific version of text
    if version and lang == "en":
        textCur = db.texts.find(
            {
                "title": oref.book,
                "language": lang,
                "versionTitle": version
            }, chapter_slice)

    elif version and lang == "he":
        heCur = db.texts.find(
            {
                "title": oref.book,
                "language": lang,
                "versionTitle": version
            }, chapter_slice)

    # If no criteria set above, pull all versions,
    # Prioritize first according to "priority" field (if present), then by oldest text first
    # Order here will determine which versions are used in case of a merge
    textCur = textCur or db.texts.find({
        "title": oref.book,
        "language": "en"
    }, chapter_slice).sort([["priority", -1], ["_id", 1]])
    heCur = heCur or db.texts.find({
        "title": oref.book,
        "language": "he"
    }, chapter_slice).sort([["priority", -1], ["_id", 1]])

    # Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end
    # Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref
    heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context)
    r = text_from_cur(oref.old_dict_format(), textCur, context)

    # Add fields pertaining the the Hebrew text under different field names
    r["he"] = heRef.get("text", [])
    r["heVersionTitle"] = heRef.get("versionTitle", "")
    r["heVersionSource"] = heRef.get("versionSource", "")
    r["heVersionStatus"] = heRef.get("versionStatus", "")
    r["heLicense"] = heRef.get("license", "unknown")
    if heRef.get("versionNotes", ""):
        r["heVersionNotes"] = heRef.get("versionNotes", "")
    if heRef.get("digitizedBySefaria", False):
        r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False)
    if "sources" in heRef:
        r["heSources"] = heRef.get("sources")

    # find commentary on this text if requested
    if commentary:
        from sefaria.client.wrapper import get_links
        searchRef = model.Ref(tref).padded_ref().context_ref(context).normal()
        links = get_links(searchRef)
        r["commentary"] = links if "error" not in links else []

        # get list of available versions of this text
        # but only if you care enough to get commentary also (hack)
        r["versions"] = get_version_list(tref)

    # use shorthand if present, masking higher level sections
    if "shorthand" in r:
        r["book"] = r["shorthand"]
        d = r["shorthandDepth"]
        for key in ("sections", "toSections", "sectionNames"):
            r[key] = r[key][d:]

    # replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud
    if r["type"] == "Talmud" or r["type"] == "Commentary" and r[
            "commentaryCategories"][0] == "Talmud":
        daf = r["sections"][0]
        r["sections"] = [section_to_daf(daf)] + r["sections"][1:]
        r["title"] = r["book"] + " " + r["sections"][0]
        if "heTitle" in r:
            r["heBook"] = r["heTitle"]
            r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he")
        if r["type"] == "Commentary" and len(r["sections"]) > 1:
            r["title"] = "%s Line %d" % (r["title"], r["sections"][1])
        if "toSections" in r:
            r["toSections"] = [r["sections"][0]] + r["toSections"][1:]

    elif r["type"] == "Commentary":
        d = len(r["sections"]) if len(r["sections"]) < 2 else 2
        r["title"] = r["book"] + " " + ":".join(
            ["%s" % s for s in r["sections"][:d]])

    return r
Esempio n. 10
0
        text_total[text["title"]] = 0
        text_order.append(text["title"])
    print text["title"]
    index = txt.get_index(text["title"])
    if not index or not index.get("categories"):
        print "No index found for " + text["title"]
        continue
    if "Tanach" in index.categories:
        continue
    talmud = True if "Talmud" in index.categories else False

    for i in range(len(text['chapter'])):
        if talmud:
            if "Bavli" in index.categories and i < 2:
                continue
            chap = section_to_daf(i + 1)
        else:
            chap = i + 1
        ref = text['title'] + " " + str(chap)
        print ref
        try:
            result = add_links_from_text(ref, text['language'],
                                         text['chapter'][i], text['_id'], user)
            if result:
                text_total[text["title"]] += len(result)
        except Exception, e:
            print e

total = 0
for text in text_order:
    num = text_total[text]
Esempio n. 11
0
def get_text(tref, context=1, commentary=True, version=None, lang=None, pad=True):
	"""
	Take a string reference to a segment of text and return a dictionary including
	the text and other info.
		* 'context': how many levels of depth above the request ref should be returned.
			e.g., with context=1, ask for a verse and receive its surrounding chapter as well.
			context=0 gives just what is asked for.
		* 'commentary': whether or not to search for and return connected texts as well.
		* 'version' + 'lang': use to specify a particular version of a text to return.
	"""
	oref = model.Ref(tref)
	if pad:
		oref = oref.padded_ref()

	if oref.is_spanning():
		# If ref spans sections, call get_text for each section
		return get_spanning_text(oref)

	if len(oref.sections):
		skip = oref.sections[0] - 1
		limit = 1
		chapter_slice = {"_id": 0} if len(oref.index.sectionNames) == 1 else {"_id": 0, "chapter": {"$slice": [skip, limit]}}
	else:
		chapter_slice = {"_id": 0}

	textCur = heCur = None
	# pull a specific version of text
	if version and lang == "en":
		textCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice)

	elif version and lang == "he":
		heCur = db.texts.find({"title": oref.book, "language": lang, "versionTitle": version}, chapter_slice)

	# If no criteria set above, pull all versions,
	# Prioritize first according to "priority" field (if present), then by oldest text first
	# Order here will determine which versions are used in case of a merge
	textCur = textCur or db.texts.find({"title": oref.book, "language": "en"}, chapter_slice).sort([["priority", -1], ["_id", 1]])
	heCur   = heCur   or db.texts.find({"title": oref.book, "language": "he"}, chapter_slice).sort([["priority", -1], ["_id", 1]])

	# Conversion to Ref bogged down here, and resorted to old_dict_format(). todo: Push through to the end
	# Extract / merge relevant text. Pull Hebrew from a copy of ref first, since text_from_cur alters ref
	heRef = text_from_cur(copy.copy(oref.old_dict_format()), heCur, context)
	r = text_from_cur(oref.old_dict_format(), textCur, context)

	# Add fields pertaining the the Hebrew text under different field names
	r["he"]                   = heRef.get("text", [])
	r["heVersionTitle"]       = heRef.get("versionTitle", "")
	r["heVersionSource"]      = heRef.get("versionSource", "")
	r["heVersionStatus"]      = heRef.get("versionStatus", "")
	r["heLicense"]            = heRef.get("license", "unknown")
	if heRef.get("versionNotes", ""):
		r["heVersionNotes"]       = heRef.get("versionNotes", "")
	if heRef.get("digitizedBySefaria", False):
		r["heDigitizedBySefaria"] = heRef.get("digitizedBySefaria", False)
	if "sources" in heRef:
		r["heSources"]            = heRef.get("sources")

	# find commentary on this text if requested
	if commentary:
		from sefaria.client.wrapper import get_links
		searchRef = model.Ref(tref).padded_ref().context_ref(context).normal()
		links = get_links(searchRef)
		r["commentary"] = links if "error" not in links else []

		# get list of available versions of this text
		# but only if you care enough to get commentary also (hack)
		r["versions"] = get_version_list(tref)

	# use shorthand if present, masking higher level sections
	if "shorthand" in r:
		r["book"] = r["shorthand"]
		d = r["shorthandDepth"]
		for key in ("sections", "toSections", "sectionNames"):
			r[key] = r[key][d:]

	# replace ints with daf strings (3->"2a") if text is Talmud or commentary on Talmud
	if r["type"] == "Talmud" or r["type"] == "Commentary" and r["commentaryCategories"][0] == "Talmud":
		daf = r["sections"][0]
		r["sections"] = [section_to_daf(daf)] + r["sections"][1:]
		r["title"] = r["book"] + " " + r["sections"][0]
		if "heTitle" in r:
			r["heBook"] = r["heTitle"]
			r["heTitle"] = r["heTitle"] + " " + section_to_daf(daf, lang="he")
		if r["type"] == "Commentary" and len(r["sections"]) > 1:
			r["title"] = "%s Line %d" % (r["title"], r["sections"][1])
		if "toSections" in r:
			r["toSections"] = [r["sections"][0]] + r["toSections"][1:]

	elif r["type"] == "Commentary":
		d = len(r["sections"]) if len(r["sections"]) < 2 else 2
		r["title"] = r["book"] + " " + ":".join(["%s" % s for s in r["sections"][:d]])

	return r