Esempio n. 1
0
def get_sheets_by_topic(topic,
                        public=True,
                        uid=None,
                        group=None,
                        proj=None,
                        limit=0,
                        page=0):
    """
	Returns all sheets tagged with 'topic'
	"""
    # try to normalize for backwards compatibility
    from sefaria.model.abstract import AbstractMongoRecord
    topic = AbstractMongoRecord.normalize_slug(topic)
    query = {"topics.slug": topic} if topic else {"topics": {"$exists": 0}}

    if uid:
        query["owner"] = uid
    elif group:
        query["group"] = group
    elif public:
        query["status"] = "public"

    sheets = db.sheets.find(query,
                            proj).sort([["views",
                                         -1]]).limit(limit).skip(page * limit)
    return sheets
Esempio n. 2
0
        has_slug_issues = True
    if len(slug.strip()) == 0: continue
    internal_slug_count[slug] += 1
for slug, count in internal_slug_count.items():
    if count > 1:
        print(
            f"ERROR: slug {slug} appears {count} times on this sheet. Please update slug in sheet to be internally unique"
        )
        has_slug_issues = True
    non_author = Topic().load({"slug": slug, "subclass": {"$ne": "author"}})
    if non_author is not None:
        print(
            f"ERROR: slug {slug} exists as a non-author. Please update slug in sheet to be globally unique."
        )
        has_slug_issues = True
    if AbstractMongoRecord.normalize_slug(slug) != slug:
        print(
            f"ERROR: slug '{slug}' does not match slugified version which is '{AbstractMongoRecord.normalize_slug(slug)}'. Please slugify in the sheet."
        )
        has_slug_issues = True
if has_slug_issues:
    raise Exception("Issues found. See above errors.")

print("*** Deleting old authorTopic relationships ***")
link_query = {"generatedBy": "update_authors_data"}
print("links to delete", db.topic_links.count_documents(link_query))
db.topic_links.delete_many(link_query)
# Dependencies take too long here.  Getting rid of relationship dependencies above.  Assumption is that we'll import works right after to handle those dependencies.


def _(p: Topic, attr, value):
def import_bonayich_into_topics():
    with open(f"{RESEARCH_NAMED_ENTITY_LOC}/sperling_named_entities.json",
              "r") as fin:
        j = json.load(fin)
    tds_json = {
        "slug": "sperling-bonayich",
        "displayName": {
            "en": "Bonyaich via Michael Sperling",
            "he": "Bonyaich via Michael Sperling"
        }
    }
    tds = TopicDataSource().load({"slug": tds_json['slug']})
    if tds is None:
        TopicDataSource(tds_json).save()
    for r in tqdm(j):
        en_prime = None
        he_prime = None
        titles = list(
            {f"{t['text']}|{t['lang']}": t
             for t in r['manualTitles']}.values())

        for title in titles:
            if title['lang'] == 'en' and en_prime is None:
                en_prime = title['text']
                title['primary'] = True
            if title['lang'] == 'he' and he_prime is None:
                he_prime = title['text']
                title['primary'] = True

        slug = en_prime if en_prime is not None else he_prime
        if slug is None:
            print("SLUG IS NONE", r)
        topic_json = {
            "slug": AbstractMongoRecord.normalize_slug(slug),
            "titles": titles
        }
        try:
            bid = int(r['id'].replace('BONAYICH:', ''))
            topic_json['alt_ids'] = {"bonayich": bid}
        except ValueError:
            print("BAD ID", r['id'])
            pass
        type_is_guess = False
        try:
            assert r['type'] in {'תנא', 'אמורא', 'בדור תנאים',
                                 'בדור אמוראים'}, r
        except AssertionError:
            # print("GUESSING AMORA", r)
            type_is_guess = True
            r['type'] = 'אמורא'
        type_symbol = "T" if 'תנא' in r['type'] else 'A'
        if 'gen' in r and r['gen'] is not None and len(r['gen']) > 0:
            try:
                r['gen'] = re.sub('[אב]', '', r['gen'])
                gens = re.split('[\-/]', r['gen'])
                gen_list = []
                for g in gens:
                    gen_list += [f"{type_symbol}{int(g)}"]
                symbol = "/".join(gen_list)

                try:
                    assert TimePeriod().load({"symbol": symbol}) is not None, r
                    topic_json['properties'] = {
                        "generation": {
                            "value": symbol,
                            "dataSource": tds_json['slug']
                        }
                    }
                except AssertionError:
                    print("BAD GEN SYMBOL", symbol, r)
            except ValueError:
                print("BAD GEN NUM", r)
        # doesn't work...
        # t = Topic(topic_json)
        # t = Topic.init(t.normalize_slug_field('slug'))
        # if t is not None:
        #     t.delete()

        t = Topic(topic_json)
        t.save()

        if r['tag'] == 'NORP':
            toTopic = "group-of-mishnaic-people" if type_symbol == "T" else "group-of-talmudic-people"
            print(t.slug)
        else:
            toTopic = "mishnaic-people" if type_symbol == "T" else "talmudic-people"
        link_json = {
            "class": "intraTopic",
            "fromTopic": t.slug,
            "toTopic": toTopic,
            "linkType": "is-a",
            "dataSource": tds_json['slug']
        }
        if type_is_guess:
            link_json[
                'generatedBy'] = "import_bonayich_into_topics. may not be amora."
        itl = IntraTopicLink().load(link_json)
        if itl is not None:
            itl.delete()
        itl = IntraTopicLink(link_json)
        itl.save()