예제 #1
0
def remove_refs_with_false():
    """
    Removes any links and history records about links that contain False
    as one of the refs.
    """
    model.LinkSet({"refs": False}).delete()
    model.HistorySet({"new.refs": False}).delete()
예제 #2
0
def broken_links(tref=None,
                 auto_links=False,
                 manual_links=False,
                 delete_links=False,
                 check_text_exists=False):
    links = model.LinkSet(model.Ref(tref)) if tref else model.LinkSet()
    broken_links_list = []
    for link in links:
        errors = [0, 0, 0, 0]
        try:
            rf1 = model.Ref(link.refs[0])
            errors[0] = 1
            if check_text_exists and rf1.is_empty():
                raise Exception("no text at this Ref")
            errors[1] = 1
            rf2 = model.Ref(link.refs[1])
            errors[2] = 1
            if check_text_exists and rf2.is_empty():
                raise Exception("no text at this Ref")
            errors[3] = 1
        except:
            if link.auto:
                if auto_links is False:
                    continue
            else:
                if manual_links is False:
                    continue
            link_type = "auto - {}".format(
                link.generated_by) if link.auto else "manual"
            error_code = sum(errors)
            if error_code == 0:
                error_msg = "Ref 1 is bad"
            elif error_code == 1:
                error_msg = "Ref 1 has no text in the system"
            elif error_code == 2:
                error_msg = "Ref 2 is bad"
            elif error_code == 3:
                error_msg = "Ref 2 has no text in the system"

            broken_links_list.append("{}\t{}\t{}".format(
                link.refs, link_type, error_msg))
            print(broken_links_list[-1])
            if delete_links:
                link.delete()
    return broken_links_list
예제 #3
0
def get_book_link_collection(book, cat):

    if cat == "Tanach" or cat == "Torah" or cat == "Prophets" or cat == "Writings":
        query = {
            "$and": [{
                "categories": cat
            }, {
                "categories": {
                    "$ne": "Commentary"
                }
            }, {
                "categories": {
                    "$ne": "Targum"
                }
            }]
        }
    else:
        query = {"categories": cat}

    titles = model.IndexSet(query).distinct("title")
    if len(titles) == 0:
        return {"error": "No results for {}".format(query)}

    book_re = r'^{} \d'.format(book)
    cat_re = r'^({}) \d'.format('|'.join(titles))

    link_re = r'^(?P<title>.+) (?P<loc>\d.*)$'
    ret = []

    links = model.LinkSet({
        "$and": [{
            "refs": {
                "$regex": book_re
            }
        }, {
            "refs": {
                "$regex": cat_re
            }
        }]
    })
    for link in links:
        l1 = re.match(link_re, link.refs[0])
        l2 = re.match(link_re, link.refs[1])
        ret.append({
            "r1": {
                "title": l1.group("title").replace(" ", "-"),
                "loc": l1.group("loc")
            },
            "r2": {
                "title": l2.group("title").replace(" ", "-"),
                "loc": l2.group("loc")
            }
        })
    return ret
예제 #4
0
def dep_counts(name, indx):

    def construct_query(attribute, queries):
        query_list = [{attribute: {'$regex': query}} for query in queries]
        return {'$or': query_list}

    from sefaria.model.text import prepare_index_regex_for_dependency_process
    patterns = prepare_index_regex_for_dependency_process(indx, as_list=True)
    patterns = [pattern.replace(re.escape(indx.title), re.escape(name)) for pattern in patterns]

    ret = {
        'version title exact match': model.VersionSet({"title": name}, sort=[('title', 1)]).count(),
        'history title exact match': model.HistorySet({"title": name}, sort=[('title', 1)]).count(),
        'note match ': model.NoteSet(construct_query("ref", patterns), sort=[('ref', 1)]).count(),
        'link match ': model.LinkSet(construct_query("refs", patterns)).count(),
        'history refs match ': model.HistorySet(construct_query("ref", patterns), sort=[('ref', 1)]).count(),
        'history new refs match ': model.HistorySet(construct_query("new.refs", patterns), sort=[('new.refs', 1)]).count()
    }

    return ret
예제 #5
0
def update_links_count(text=None):
	"""
	Counts the links that point to a particular text, or all of them

	Results are stored them on the 'linksCount' field of the counts document
	"""
	if not text:
		counts = db.counts.find({"title": {"$exists": 1}})
		for c in counts:
			if c["title"]:
				update_links_count(text=c["title"])

	print "%s" % text
	index = model.get_index(text)   #This is likely here just to catch any exceptions that are thrown

	c = { "title": text }
	c = db.counts.find_one(c)

	c["linksCount"] = model.LinkSet(model.Ref(text)).count()
		#db.links.find({"refs": {"$regex": model.Ref(text).regex()}}).count()

	db.counts.save(c)
예제 #6
0
def dep_counts(name):
    commentators = model.IndexSet({
        "categories.0": "Commentary"
    }).distinct("title")
    ref_patterns = {
        'alone':
        r'^{} \d'.format(re.escape(name)),
        'commentor':
        r'{} on'.format(re.escape(name)),
        'commentee':
        r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
    }

    commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators),
                                                       re.escape(name))

    ret = {
        'version title exact match':
        model.VersionSet({
            "title": name
        }).count(),
        'version title match commentor':
        model.VersionSet({
            "title": {
                "$regex": ref_patterns["commentor"]
            }
        }).count(),
        'version title match commentee':
        model.VersionSet({
            "title": {
                "$regex": commentee_title_pattern
            }
        }).count(),
        'history title exact match':
        model.HistorySet({
            "title": name
        }).count(),
        'history title match commentor':
        model.HistorySet({
            "title": {
                "$regex": ref_patterns["commentor"]
            }
        }).count(),
        'history title match commentee':
        model.HistorySet({
            "title": {
                "$regex": commentee_title_pattern
            }
        }).count(),
    }

    for pname, pattern in ref_patterns.items():
        ret.update({
            'note match ' + pname:
            model.NoteSet({
                "ref": {
                    "$regex": pattern
                }
            }).count(),
            'link match ' + pname:
            model.LinkSet({
                "refs": {
                    "$regex": pattern
                }
            }).count(),
            'history refs match ' + pname:
            model.HistorySet({
                "ref": {
                    "$regex": pattern
                }
            }).count(),
            'history new refs match ' + pname:
            model.HistorySet({
                "new.refs": {
                    "$regex": pattern
                }
            }).count()
        })

    return ret