Esempio n. 1
0
def generate_refs_list(query={}):
	"""
	Generate a list of refs to all available sections.
	"""
	refs = []
	counts = db.counts.find(query)
	for c in counts:
		if "title" not in c:
			continue # this is a category count

		i = texts.get_index(c["title"])
		if ("error" in i):
			# If there is not index record to match the count record,
			# the count should be removed.
			db.counts.remove(c)
			continue
		title = c["title"]
		he = list_from_counts(c["availableTexts"]["he"])
		en = list_from_counts(c["availableTexts"]["en"])
		sections = union(he, en)
		for n in sections:
			if i["categories"][0] == "Talmud":
				n = texts.section_to_daf(int(n))
			if "commentaryCategories" in i and i["commentaryCategories"][0] == "Talmud":
				split = n.split(":")
				n = ":".join([texts.section_to_daf(int(n[0]))] + split[1:])
			ref = "%s %s" % (title, n) if n else title
			refs.append(ref)

	return refs
Esempio n. 2
0
def get_counts_doc(text):
	"""
	Returns the stored count doc for 'text',
	where text is a text title, text category or list of categories. 
	"""	
	if isinstance(text, list):
		query = {"category": {"$all": text}}
	else:
		i = sefaria.get_index(text)
		if "error" in i:
			# This isn't a text title, try treating it as a category.
			# Look up the first text matching this category and 
			# use its complete categories list
			# (e.g., "Prophets" -> ["Tanach", "Prophets"])
			example = sefaria.db.index.find_one({"categories": text})
			if not example:
				# if we don't have a single text in this category,
				# then we have nothing.
				return None
			# Don't use subcategories if this is a top level category
			if example["categories"][0] == text:
				query = {"$and": [{'category.0': {"$exists": False}}, {"category": text}]}
			else:
				query = {"category": {"$all": example["categories"]}}
		else:
			query = {"title": text}

	c = sefaria.db.counts.find_one(query)

	return c
Esempio n. 3
0
def update_table_of_contents():
	toc = []

	# Add an entry for every text we know about
	indices = sefaria.db.index.find()
	for i in indices:
		del i["_id"]
		if i["categories"][0] == "Commentary":
			# Special case commentary below
			continue
		if i["categories"][0] not in order:
			i["categories"].insert(0, "Other")
		node = get_or_make_summary_node(toc, i["categories"])
		text = add_counts_to_index(i)
		node.append(text)

	# Special handling to list available commentary texts which do not have
	# individual index records
	commentary_texts = sefaria.get_commentary_texts_list()
	for c in commentary_texts:
		i = sefaria.get_index(c)
		node = get_or_make_summary_node(toc, i["categories"])
		text = add_counts_to_index(i)
		node.append(text)

	# Annotate categories nodes with counts
	for cat in toc:
		add_counts_to_category(cat)

	# Recursively sort categories and texts
	toc = sort_toc_node(toc, recur=True)

	save_toc(toc)
	return toc
Esempio n. 4
0
def update_table_of_contents():
    toc = []

    # Add an entry for every text we know about
    indices = sefaria.db.index.find()
    for i in indices:
        del i["_id"]
        if i["categories"][0] == "Commentary":
            # Special case commentary below
            continue
        if i["categories"][0] not in order:
            i["categories"].insert(0, "Other")
        node = get_or_make_summary_node(toc, i["categories"])
        text = add_counts_to_index(i)
        node.append(text)

    # Special handling to list available commentary texts which do not have
    # individual index records
    commentary_texts = sefaria.get_commentary_texts_list()
    for c in commentary_texts:
        i = sefaria.get_index(c)
        node = get_or_make_summary_node(toc, i["categories"])
        text = add_counts_to_index(i)
        node.append(text)

    # Annotate categories nodes with counts
    for cat in toc:
        add_counts_to_category(cat)

    # Recursively sort categories and texts
    toc = sort_toc_node(toc, recur=True)

    save_toc(toc)
    return toc
Esempio n. 5
0
def export_text(text):
	"""
	Iterates through all text documents, writing a document to disk 
	according to formats in export_formats
	"""
	print text["title"]
	index = get_index(text["title"])
	if "error" in index:
		print "Skipping %s - %s" % (text["title"], index["error"])
		return

	text.update(index)
	del text["_id"]
	text["text"] = text.pop("chapter")

	export_text_doc(text)
Esempio n. 6
0
def update_table_of_contents():
	toc = []

	# Add an entry for every text we know about
	indices = db.index.find()
	for i in indices:
		del i["_id"]
		if i["categories"][0] == "Commentary":
			# Special case commentary below
			continue
		if i["categories"][0] not in order:
			i["categories"].insert(0, "Other")
		node = get_or_make_summary_node(toc, i["categories"])
		#the toc "contents" attr is returned above so for each text appends the counts and index info
		text = add_counts_to_index(i)
		node.append(text)

	# Special handling to list available commentary texts which do not have
	# individual index records
	commentary_texts = texts.get_commentary_texts_list()
	for c in commentary_texts:
		i = texts.get_index(c)
		#TODO: duplicate index records where one is a commentary and another is not labeled as one can make this crash.
		#this fix takes care of the crash.
		if len(i["categories"]) >= 1 and i["categories"][0] == "Commentary":
			cats = i["categories"][1:2] + ["Commentary"] + i["categories"][2:]
		else:
			cats = i["categories"][0:1] + ["Commentary"] + i["categories"][1:]
		node = get_or_make_summary_node(toc, cats)
		text = add_counts_to_index(i)
		node.append(text)

	# Annotate categories nodes with counts
	for cat in toc:
		add_counts_to_category(cat)

	# Recursively sort categories and texts
	toc = sort_toc_node(toc, recur=True)

	save_toc(toc)
	save_toc_to_db()

	return toc
Esempio n. 7
0
def get_counts_doc(text):
    """
	Returns the stored count doc for 'text',
	where text is a text title, text category or list of categories. 
	"""
    if isinstance(text, list):
        query = {"category": {"$all": text}}
    else:
        i = sefaria.get_index(text)
        if "error" in i:
            # This isn't a text title, try treating it as a category.
            # Look up the first text matching this category and
            # use its complete categories list
            # (e.g., "Prophets" -> ["Tanach", "Prophets"])
            example = sefaria.db.index.find_one({"categories": text})
            if not example:
                # if we don't have a single text in this category,
                # then we have nothing.
                return None
            # Don't use subcategories if this is a top level category
            if example["categories"][0] == text:
                query = {
                    "$and": [{
                        'category.0': {
                            "$exists": False
                        }
                    }, {
                        "category": text
                    }]
                }
            else:
                query = {"category": {"$all": example["categories"]}}
        else:
            query = {"title": text}

    c = sefaria.db.counts.find_one(query)

    return c
Esempio n. 8
0
def update_summaries_on_change(ref, old_ref=None, recount=True):
	"""
	Update text summary docs to account for change or insertion of 'text'
	* recount - whether or not to perform a new count of available text
	"""
	global toc
	toc = get_toc()
	index = sefaria.get_index(ref)
	if "error" in index:
		return index

	if recount:
		sefaria.update_text_count(ref)

	resort_other = False
	if index["categories"][0] not in order:
		index["categories"].insert(0, "Other")
		resort_other = True

	node = get_or_make_summary_node(toc, index["categories"])
	text = add_counts_to_index(index)
	
	found = False
	test_title = old_ref or text["title"]
	for item in node:
		if item.get("title") == test_title:
			item.update(text)
			found = True
			break
	if not found:
		node.append(text)
		node[:] = sort_toc_node(node)

	# If a new category may have been added to other, resort the cateogries
	if resort_other:
		toc[-1]["contents"] = sort_toc_node(toc[-1]["contents"])

	save_toc(toc)
Esempio n. 9
0
def update_summaries_on_change(ref, old_ref=None, recount=True):
    """
	Update text summary docs to account for change or insertion of 'text'
	* recount - whether or not to perform a new count of available text
	"""
    global toc
    toc = get_toc()
    index = sefaria.get_index(ref)
    if "error" in index:
        return index

    if recount:
        sefaria.update_text_count(ref)

    resort_other = False
    if index["categories"][0] not in order:
        index["categories"].insert(0, "Other")
        resort_other = True

    node = get_or_make_summary_node(toc, index["categories"])
    text = add_counts_to_index(index)

    found = False
    test_title = old_ref or text["title"]
    for item in node:
        if item.get("title") == test_title:
            item.update(text)
            found = True
            break
    if not found:
        node.append(text)
        node[:] = sort_toc_node(node)

    # If a new category may have been added to other, resort the cateogries
    if resort_other:
        toc[-1]["contents"] = sort_toc_node(toc[-1]["contents"])

    save_toc(toc)
Esempio n. 10
0
def update_links_count(text=None):
	"""
	Counts the links that point to a particular text, or all of them

	Results are stored them on the 'linksCount' field of the counts document
	"""
	if not text:
		counts = db.counts.find({"title": {"$exists": 1}})
		for c in counts:
			if c["title"]:
				update_links_count(text=c["title"])

	print "%s" % text
	index = texts.get_index(text)
	if "error" in index:
		return index

	c = { "title": text }
	c = db.counts.find_one(c)

	c["linksCount"] = db.links.find({"refs": {"$regex": texts.make_ref_re(text)}}).count()

	db.counts.save(c)
Esempio n. 11
0
def update_text_count(ref, index=None):
    """
	Update the count records of the text specfied 
	by ref (currently at book level only) by peforming a count
	"""
    index = sefaria.get_index(ref)
    if "error" in index:
        return index

    c = {"title": ref}
    sefaria.db.counts.remove(c)

    if index["categories"][0] in ("Tanach", "Mishna", "Talmud"):
        # For these texts, consider what is present in the db across
        # English and Hebrew to represent actual total counts
        counts = count_texts(ref)
        if "error" in counts:
            return counts
        index["lengths"] = counts["lengths"]
        c["sectionCounts"] = zero_jagged_array(counts["counts"])
    else:
        if "length" in index:
            index["lengths"] = [index["length"]]

    en = count_texts(ref, lang="en")
    if "error" in en:
        return en
    he = count_texts(ref, lang="he")
    if "error" in he:
        return he

    if "sectionCounts" in c:
        totals = c["sectionCounts"]
    else:
        totals = zero_jagged_array(sum_count_arrays(en["counts"],
                                                    he["counts"]))

    enCount = sum_count_arrays(en["counts"], totals)
    heCount = sum_count_arrays(he["counts"], totals)

    c["availableTexts"] = {
        "en": enCount,
        "he": heCount,
    }

    c["availableCounts"] = {
        "en": en["lengths"],
        "he": he["lengths"],
    }

    if "length" in index:
        depth = len(index["lengths"])
        heTotal = enTotal = total = 0
        for i in range(depth):
            heTotal += he["lengths"][i]
            enTotal += en["lengths"][i]
            total += index["lengths"][i]
        if total == 0:
            hp = ep = 0
        else:
            hp = heTotal / float(total) * 100
            ep = enTotal / float(total) * 100
    else:
        hp = ep = 0

    c["percentAvailable"] = {
        "he": hp,
        "en": ep,
    }
    c["textComplete"] = {
        "he": hp > 99.9,
        "en": ep > 99.9,
    }

    sefaria.db.index.save(index)
    sefaria.db.counts.save(c)

    return c
Esempio n. 12
0
def update_text_count(ref, index=None):
	"""
	Update the count records of the text specfied 
	by ref (currently at book level only) by peforming a count
	"""	
	index = sefaria.get_index(ref)
	if "error" in index:
		return index

	c = { "title": ref }
	sefaria.db.counts.remove(c)

	if index["categories"][0] in ("Tanach", "Mishna", "Talmud"):
		# For these texts, consider what is present in the db across 
		# English and Hebrew to represent actual total counts
		counts = count_texts(ref)
		if "error" in counts:
			return counts
		c["sectionCounts"] = zero_jagged_array(counts["counts"])

	en = count_texts(ref, lang="en")
	if "error" in en:
		return en
	he = count_texts(ref, lang="he")
	if "error" in he:
		return he

	if "sectionCounts" in c:
		totals = c["sectionCounts"]
	else:
		totals = zero_jagged_array(sum_count_arrays(en["counts"], he["counts"]))

	enCount = sum_count_arrays(en["counts"], totals)
	heCount = sum_count_arrays(he["counts"], totals) 

	c["availableTexts"] = {
		"en": enCount,
		"he": heCount,
	}

	c["availableCounts"] = {
		"en": en["lengths"],
		"he": he["lengths"],
	}

	if "length" in index and "lengths" in index:
		depth = len(index["lengths"])
		heTotal = enTotal = total = 0
		for i in range(depth):
			heTotal += he["lengths"][i]
			enTotal += en["lengths"][i]
			total += index["lengths"][i]
		if total == 0:
			hp = ep = 0
		else:
			hp = heTotal / float(total) * 100
			ep = enTotal / float(total) * 100
	else: 
		hp = ep = 0

	c["percentAvailable"] = {
		"he": hp,
		"en": ep,
	}
	c["textComplete"] = {
		"he": hp > 99.9,
		"en": ep > 99.9,
	}

	sefaria.db.counts.save(c)
	return c
Esempio n. 13
0
connection = pymongo.Connection()
db = connection[t.SEFARIA_DB]
if t.SEFARIA_DB_USER and t.SEFARIA_DB_PASSWORD:
	db.authenticate(t.SEFARIA_DB_USER, t.SEFARIA_DB_PASSWORD)

user = 28
texts = db.texts.find({"language": "he"})

text_total = {}
text_order = []
for text in texts:
	if text['title'] not in text_total:
		text_total[text["title"]] = 0
		text_order.append(text["title"])
	print text["title"]
	index = t.get_index(text["title"])
	if not index or not index.get("categories"):
		print "No index found for " + text["title"]
		continue
	if "Tanach" in index['categories']:
		continue
	talmud = True if "Talmud" in index['categories'] else False

	for i in range(len(text['chapter'])):
		if talmud:
			if "Bavli" in index['categories'] and i < 2:
				continue
			chap = t.section_to_daf(i + 1)
		else:
			chap = i + 1
		ref = text['title'] + " " + str(chap)
Esempio n. 14
0
def update_text_count(ref, index=None):
	"""
	Update the count records of the text specfied 
	by ref (currently at book level only) by peforming a count
	"""
	index = texts.get_index(ref)
	if "error" in index:
		return index

	c = { "title": ref }
	existing = db.counts.find_one(c)
	if existing:
		c = existing

	if index["categories"][0] in ("Tanach", "Mishnah", "Talmud"):
		# For these texts, consider what is present in the db across 
		# English and Hebrew to represent actual total counts
		counts = count_texts(ref)
		if "error" in counts:
			return counts
		c["sectionCounts"] = zero_jagged_array(counts["counts"])

	en = count_texts(ref, lang="en")
	if "error" in en:
		return en
	he = count_texts(ref, lang="he")
	if "error" in he:
		return he

	if "sectionCounts" in c:
		totals = c["sectionCounts"]
	else:
		totals = zero_jagged_array(sum_count_arrays(en["counts"], he["counts"]))

	enCount = sum_count_arrays(en["counts"], totals)
	heCount = sum_count_arrays(he["counts"], totals)

	c["availableTexts"] = {
		"en": enCount,
		"he": heCount,
	}

	c["availableCounts"] = {
		"en": en["lengths"],
		"he": he["lengths"],
	}

	if "length" in index and "lengths" in index:
		depth = len(index["lengths"])
		heTotal = enTotal = total = 0
		for i in range(depth):
			heTotal += he["lengths"][i]
			enTotal += en["lengths"][i]
			total += index["lengths"][i]
		if total == 0:
			hp = ep = 0
		else:
			hp = heTotal / float(total) * 100
			ep = enTotal / float(total) * 100

			#temp check to see if text has wrong metadata leading to incorrect (to high) percentage
			"""if hp > 100:
				print index["title"], " in hebrew has stats out of order: ", heTotal, "/", total, "=", hp
			if ep > 100:
				print index["title"], " in english has stats out of order: ", enTotal, "/", total, "=", ep"""

	elif "length" in index:
		hp = c["availableCounts"]["he"][0] / float(index["length"]) * 100
		ep = c["availableCounts"]["en"][0] / float(index["length"]) * 100
	else: 
		hp = ep = 0


	c["percentAvailable"] = {
		"he": hp,
		"en": ep,
	}
	c["textComplete"] = {
		"he": hp > 99.9,
		"en": ep > 99.9,
	}

	#function to estimate how much of a text we have
	c['estimatedCompleteness'] = {
		"he" : estimate_completeness('he', index, c),
		"en" : estimate_completeness('en', index, c)
	}

	db.counts.save(c)
	return c