Exemple #1
0
def dep_counts(name):
    commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title")
    ref_patterns = {
        'alone': r'^{} \d'.format(re.escape(name)),
        'commentor': r'{} on'.format(re.escape(name)),
        'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
    }

    commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))

    ret = {
        'version title exact match': model.VersionSet({"title": name}).count(),
        'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
        'history title exact match': model.HistorySet({"title": name}).count(),
        'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
        'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
    }

    for pname, pattern in ref_patterns.items():
        ret.update({
            'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(),
            'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(),
            'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(),
            'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count()
        })

    return ret
Exemple #2
0
def dep_counts(name, indx):
    def construct_query(attribute, queries):
        query_list = [{attribute: {'$regex': query}} for query in queries]
        return {'$or': query_list}

    from sefaria.model.text import prepare_index_regex_for_dependency_process
    patterns = prepare_index_regex_for_dependency_process(indx, as_list=True)
    patterns = [
        pattern.replace(re.escape(indx.title), re.escape(name))
        for pattern in patterns
    ]

    ret = {
        'version title exact match':
        model.VersionSet({
            "title": name
        }, sort=[('title', 1)]).count(),
        'history title exact match':
        model.HistorySet({
            "title": name
        }, sort=[('title', 1)]).count(),
        'note match ':
        model.NoteSet(construct_query("ref", patterns),
                      sort=[('ref', 1)]).count(),
        'link match ':
        model.LinkSet(construct_query("refs", patterns)).count(),
        'history refs match ':
        model.HistorySet(construct_query("ref", patterns),
                         sort=[('ref', 1)]).count(),
        'history new refs match ':
        model.HistorySet(construct_query("new.refs", patterns),
                         sort=[('new.refs', 1)]).count()
    }

    return ret
from sefaria.clean import remove_old_counts
from sefaria.counts import update_counts

# Remove duplicate 'Sefer Abudraham'

db.index.remove({"title": "Sefer Abudraham "})
db.index.remove({"title": "Tiferet Yisrael "})
db.index.remove({"title": "Igrot Moshe "})
db.index.remove({"title": "The Sabbath, Heschel  "})
db.index.remove({"title": "Sifre Devarim "})

remove_old_counts()

texts = model.IndexSet({})
for t in texts:
    if t.title != t.title.strip():
        t.title = t.title.strip()
        t.save()

ns = model.NoteSet({"public": {"$exists": False}})
for n in ns:
    if not getattr(n, "owner", None):
        n.owner = 1

ns.update({"public": False})

# Remove "sectionCounts" field form sectionCounts
db.counts.update({}, {"$unset": {"sectionCounts": ""}}, multi=True)

# Rebuild counts docs, so they get the allVersionCounts field
update_counts()