def dep_counts(name): commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title") ref_patterns = { 'alone': r'^{} \d'.format(re.escape(name)), 'commentor': r'{} on'.format(re.escape(name)), 'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name)) } commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name)) ret = { 'version title exact match': model.VersionSet({"title": name}).count(), 'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(), 'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(), 'history title exact match': model.HistorySet({"title": name}).count(), 'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(), 'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(), } for pname, pattern in ref_patterns.items(): ret.update({ 'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(), 'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(), 'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(), 'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count() }) return ret
def dep_counts(name, indx): def construct_query(attribute, queries): query_list = [{attribute: {'$regex': query}} for query in queries] return {'$or': query_list} from sefaria.model.text import prepare_index_regex_for_dependency_process patterns = prepare_index_regex_for_dependency_process(indx, as_list=True) patterns = [ pattern.replace(re.escape(indx.title), re.escape(name)) for pattern in patterns ] ret = { 'version title exact match': model.VersionSet({ "title": name }, sort=[('title', 1)]).count(), 'history title exact match': model.HistorySet({ "title": name }, sort=[('title', 1)]).count(), 'note match ': model.NoteSet(construct_query("ref", patterns), sort=[('ref', 1)]).count(), 'link match ': model.LinkSet(construct_query("refs", patterns)).count(), 'history refs match ': model.HistorySet(construct_query("ref", patterns), sort=[('ref', 1)]).count(), 'history new refs match ': model.HistorySet(construct_query("new.refs", patterns), sort=[('new.refs', 1)]).count() } return ret
from sefaria.clean import remove_old_counts from sefaria.counts import update_counts # Remove duplicate 'Sefer Abudraham' db.index.remove({"title": "Sefer Abudraham "}) db.index.remove({"title": "Tiferet Yisrael "}) db.index.remove({"title": "Igrot Moshe "}) db.index.remove({"title": "The Sabbath, Heschel "}) db.index.remove({"title": "Sifre Devarim "}) remove_old_counts() texts = model.IndexSet({}) for t in texts: if t.title != t.title.strip(): t.title = t.title.strip() t.save() ns = model.NoteSet({"public": {"$exists": False}}) for n in ns: if not getattr(n, "owner", None): n.owner = 1 ns.update({"public": False}) # Remove "sectionCounts" field form sectionCounts db.counts.update({}, {"$unset": {"sectionCounts": ""}}, multi=True) # Rebuild counts docs, so they get the allVersionCounts field update_counts()