def remove_refs_with_false(): """ Removes any links and history records about links that contain False as one of the refs. """ model.LinkSet({"refs": False}).delete() model.HistorySet({"new.refs": False}).delete()
def broken_links(tref=None, auto_links=False, manual_links=False, delete_links=False, check_text_exists=False): links = model.LinkSet(model.Ref(tref)) if tref else model.LinkSet() broken_links_list = [] for link in links: errors = [0, 0, 0, 0] try: rf1 = model.Ref(link.refs[0]) errors[0] = 1 if check_text_exists and rf1.is_empty(): raise Exception("no text at this Ref") errors[1] = 1 rf2 = model.Ref(link.refs[1]) errors[2] = 1 if check_text_exists and rf2.is_empty(): raise Exception("no text at this Ref") errors[3] = 1 except: if link.auto: if auto_links is False: continue else: if manual_links is False: continue link_type = "auto - {}".format( link.generated_by) if link.auto else "manual" error_code = sum(errors) if error_code == 0: error_msg = "Ref 1 is bad" elif error_code == 1: error_msg = "Ref 1 has no text in the system" elif error_code == 2: error_msg = "Ref 2 is bad" elif error_code == 3: error_msg = "Ref 2 has no text in the system" broken_links_list.append("{}\t{}\t{}".format( link.refs, link_type, error_msg)) print(broken_links_list[-1]) if delete_links: link.delete() return broken_links_list
def get_book_link_collection(book, cat): if cat == "Tanach" or cat == "Torah" or cat == "Prophets" or cat == "Writings": query = { "$and": [{ "categories": cat }, { "categories": { "$ne": "Commentary" } }, { "categories": { "$ne": "Targum" } }] } else: query = {"categories": cat} titles = model.IndexSet(query).distinct("title") if len(titles) == 0: return {"error": "No results for {}".format(query)} book_re = r'^{} \d'.format(book) cat_re = r'^({}) \d'.format('|'.join(titles)) link_re = r'^(?P<title>.+) (?P<loc>\d.*)$' ret = [] links = model.LinkSet({ "$and": [{ "refs": { "$regex": book_re } }, { "refs": { "$regex": cat_re } }] }) for link in links: l1 = re.match(link_re, link.refs[0]) l2 = re.match(link_re, link.refs[1]) ret.append({ "r1": { "title": l1.group("title").replace(" ", "-"), "loc": l1.group("loc") }, "r2": { "title": l2.group("title").replace(" ", "-"), "loc": l2.group("loc") } }) return ret
def dep_counts(name, indx): def construct_query(attribute, queries): query_list = [{attribute: {'$regex': query}} for query in queries] return {'$or': query_list} from sefaria.model.text import prepare_index_regex_for_dependency_process patterns = prepare_index_regex_for_dependency_process(indx, as_list=True) patterns = [pattern.replace(re.escape(indx.title), re.escape(name)) for pattern in patterns] ret = { 'version title exact match': model.VersionSet({"title": name}, sort=[('title', 1)]).count(), 'history title exact match': model.HistorySet({"title": name}, sort=[('title', 1)]).count(), 'note match ': model.NoteSet(construct_query("ref", patterns), sort=[('ref', 1)]).count(), 'link match ': model.LinkSet(construct_query("refs", patterns)).count(), 'history refs match ': model.HistorySet(construct_query("ref", patterns), sort=[('ref', 1)]).count(), 'history new refs match ': model.HistorySet(construct_query("new.refs", patterns), sort=[('new.refs', 1)]).count() } return ret
def update_links_count(text=None): """ Counts the links that point to a particular text, or all of them Results are stored them on the 'linksCount' field of the counts document """ if not text: counts = db.counts.find({"title": {"$exists": 1}}) for c in counts: if c["title"]: update_links_count(text=c["title"]) print "%s" % text index = model.get_index(text) #This is likely here just to catch any exceptions that are thrown c = { "title": text } c = db.counts.find_one(c) c["linksCount"] = model.LinkSet(model.Ref(text)).count() #db.links.find({"refs": {"$regex": model.Ref(text).regex()}}).count() db.counts.save(c)
def dep_counts(name): commentators = model.IndexSet({ "categories.0": "Commentary" }).distinct("title") ref_patterns = { 'alone': r'^{} \d'.format(re.escape(name)), 'commentor': r'{} on'.format(re.escape(name)), 'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name)) } commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name)) ret = { 'version title exact match': model.VersionSet({ "title": name }).count(), 'version title match commentor': model.VersionSet({ "title": { "$regex": ref_patterns["commentor"] } }).count(), 'version title match commentee': model.VersionSet({ "title": { "$regex": commentee_title_pattern } }).count(), 'history title exact match': model.HistorySet({ "title": name }).count(), 'history title match commentor': model.HistorySet({ "title": { "$regex": ref_patterns["commentor"] } }).count(), 'history title match commentee': model.HistorySet({ "title": { "$regex": commentee_title_pattern } }).count(), } for pname, pattern in ref_patterns.items(): ret.update({ 'note match ' + pname: model.NoteSet({ "ref": { "$regex": pattern } }).count(), 'link match ' + pname: model.LinkSet({ "refs": { "$regex": pattern } }).count(), 'history refs match ' + pname: model.HistorySet({ "ref": { "$regex": pattern } }).count(), 'history new refs match ' + pname: model.HistorySet({ "new.refs": { "$regex": pattern } }).count() }) return ret