Python add_links_from_textの例、sefaria.helper.link.add_links_from_text Pythonの例

コード例 #1

0

ファイルを表示

ファイル: tracker.py プロジェクト: yairm210/Sefaria-Project

def post_modify_text(user, action, oref, lang, vtitle, old_text, curr_text,
                     version_id, **kwargs) -> None:
    model.log_text(user, action, oref, lang, vtitle, old_text, curr_text,
                   **kwargs)
    if USE_VARNISH:
        invalidate_ref(oref, lang=lang, version=vtitle, purge=True)
        if oref.next_section_ref():
            invalidate_ref(oref.next_section_ref(),
                           lang=lang,
                           version=vtitle,
                           purge=True)
        if oref.prev_section_ref():
            invalidate_ref(oref.prev_section_ref(),
                           lang=lang,
                           version=vtitle,
                           purge=True)
    if not kwargs.get("skip_links", None):
        from sefaria.helper.link import add_links_from_text
        # Some commentaries can generate links to their base text automatically
        linker = oref.autolinker(user=user)
        if linker:
            linker.refresh_links(**kwargs)
        # scan text for links to auto add
        add_links_from_text(oref, lang, curr_text, version_id, user, **kwargs)

        if USE_VARNISH:
            invalidate_linked(oref)

    count_and_index(oref, lang, vtitle, to_count=kwargs.get("count_after", 1))

コード例 #2

0

ファイルを表示

ファイル: tracker.py プロジェクト: spenhos/Sefaria-Project

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status", "") == "locked" and not is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        model.log_text(user, action, oref, lang, vtitle, old_text, text, **kwargs)

        from sefaria.helper.link import add_commentary_links, add_links_from_text
        # Commentaries generate links to their base text automatically
        if oref.type == "Commentary":
            add_commentary_links(oref, user, **kwargs)
        # scan text for links to auto add
        add_links_from_text(oref.normal(), lang, chunk.text, chunk.full_version._id, user, **kwargs)

    return chunk

コード例 #3

0

ファイルを表示

ファイル: tracker.py プロジェクト: spenhos/Sefaria-Project

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status",
               "") == "locked" and not is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        model.log_text(user, action, oref, lang, vtitle, old_text, text,
                       **kwargs)

        from sefaria.helper.link import add_commentary_links, add_links_from_text
        # Commentaries generate links to their base text automatically
        if oref.type == "Commentary":
            add_commentary_links(oref, user, **kwargs)
        # scan text for links to auto add
        add_links_from_text(oref.normal(), lang, chunk.text,
                            chunk.full_version._id, user, **kwargs)

    return chunk

コード例 #4

0

ファイルを表示

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status",
               "") == "locked" and not model.user_profile.is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        model.log_text(user, action, oref, lang, vtitle, old_text, text,
                       **kwargs)
        if USE_VARNISH:
            invalidate_ref(oref, lang=lang, version=vtitle, purge=True)
            if oref.next_section_ref():
                invalidate_ref(oref.next_section_ref(),
                               lang=lang,
                               version=vtitle,
                               purge=True)
            if oref.prev_section_ref():
                invalidate_ref(oref.prev_section_ref(),
                               lang=lang,
                               version=vtitle,
                               purge=True)
        if not kwargs.get("skip_links", None):
            from sefaria.helper.link import add_links_from_text
            # Some commentaries can generate links to their base text automatically
            linker = oref.autolinker(user=user)
            if linker:
                linker.refresh_links(**kwargs)
            # scan text for links to auto add
            add_links_from_text(oref, lang, chunk.text, chunk.full_version._id,
                                user, **kwargs)

            if USE_VARNISH:
                invalidate_linked(oref)

    return chunk

コード例 #5

0

ファイルを表示

def add_edited_links(dry_run=True):
    # TODO use fixed citations
    edit_map = defaultdict(lambda: defaultdict(list))
    with open("data/Ramban links - Modified Citations.csv", "r") as fin:
        c = csv.DictReader(fin)
        for row in c:
            original = row["Original"]
            modified = row["Modified"]
            if row['Is Correct?'] == 'n':
                try:
                    modified = f"({Ref(row['Correct Citation']).he_normal()})"
                except InputError:
                    print(row['Correct Citation'])
                    continue
                except AttributeError:
                    print(row['Correct Citation'])
                    continue
            edit_map[row['Ref']][original] += [modified]
    
    issue_count = 0
    fout = open('data/ramban_edited_links.txt', 'w')
    for ref, sub_edit_map in tqdm(edit_map.items(), total=len(edit_map), desc='mod cits'):
        tc = get_ramban_tc(ref)
        orig_text = tc.text
        for original, modified_list in sub_edit_map.items():
            count = tc.text.count(original)
            if len(set(modified_list)) > 1:
                print(f'Too many: {count} - {ref} - {original}')
                issue_count += 1
                continue
            if count == 0:
                print(f'Couldn\'t find - {ref} - {original}')
                issue_count += 1
                continue
            modified = modified_list[0]
            tc.text = tc.text.replace(original, modified)
        if not dry_run:
            if orig_text != tc.text:
                # there was a change
                tc.save()
                add_links_from_text(Ref(ref), 'he', tc.text, tc.full_version._id, 5842)
        else:
            fout.write(tc.text + '\n\n')
    fout.close()
    print("Issues", issue_count)

コード例 #6

0

ファイルを表示

ファイル: tracker.py プロジェクト: joshuagoldmeier/Sefaria-Project

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status", "") == "locked" and not model.user_profile.is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        model.log_text(user, action, oref, lang, vtitle, old_text, text, **kwargs)
        if USE_VARNISH:
            invalidate_ref(oref, lang=lang, version=vtitle, purge=True)
            if oref.next_section_ref():
                invalidate_ref(oref.next_section_ref(), lang=lang, version=vtitle, purge=True)
            if oref.prev_section_ref():
                invalidate_ref(oref.prev_section_ref(), lang=lang, version=vtitle, purge=True)
        if not kwargs.get("skip_links", None):
            from sefaria.helper.link import add_links_from_text
            # Some commentaries can generate links to their base text automatically
            linker = oref.autolinker(user=user)
            if linker:
                linker.refresh_links(**kwargs)
            # scan text for links to auto add
            add_links_from_text(oref, lang, chunk.text, chunk.full_version._id, user, **kwargs)

            if USE_VARNISH:
                invalidate_linked(oref)

    return chunk

コード例 #7

0

ファイルを表示

ファイル: add_all_links.py プロジェクト: 2la2/Sefaria-Project

        continue
    if "Tanach" in index.categories and "Commentary" not in index.categories:
        continue
    talmud = True if "Talmud" in index.categories else False

    for i in range(len(text['chapter'])):
        if talmud:
            if "Bavli" in index.categories and i < 2:
                continue
            chap = section_to_daf(i + 1)
        else:
            chap = i + 1
        ref = text['title'] + " " + str(chap)
        print ref
        try:
            result = add_links_from_text(txt.Ref(ref), text['language'], text['chapter'][i], text['_id'], user)
            if result:
                text_total[text["title"]] += len(result)
        except Exception, e:
            print e

total = 0
for text in text_order:
    num = text_total[text]
    try:
        index = txt.library.get_index(text)
    except Exception as e:
        print "Error loading: {} index : {}".format(text, e)
        continue
    if getattr(index, "categories", None):
        print text.replace(",",";") + "," + str(num) + "," + ",".join(index.categories)

コード例 #8

0

ファイルを表示

def add_commentary_links(onkelos_kw_resolver, rashi_kw_resolver, ibn_kw_resolver, dry_run=True):
    resolver_map = {
        'Onkelos': onkelos_kw_resolver,
        'Rashi': rashi_kw_resolver,
        'Ibn Ezra': ibn_kw_resolver
    }
    edit_map = defaultdict(list)
    bad_citation_set = set()
    links_to_add = []
    with open('data/Ramban links - Links to Rashi, Ibn Ezra & Onkelos.csv', 'r') as fin:
        c = csv.DictReader(fin)
        for row in c:
            comm_oref = Ref(row['Ref Commentator'])
            resolver = resolver_map[comm_oref.index.collective_title]
            kw = get_keyword_from_window(row['Window Ramban'], resolver)
            if row['Is Correct?'] == 'n':
                bad_citation_set.add((kw, row['Ref Ramban']))
            else:

                edit_map[row['Ref Ramban']] += [(row['Ref Commentator'], kw)]
    issue_count = 0
    fout = open('data/ramban_comm_links.txt', 'w')
    for ref, edit_list in tqdm(edit_map.items(), total=len(edit_map), desc='comm links'):
        tc = get_ramban_tc(ref)
        orig_text = tc.text
        for comm_ref, kw in edit_list:
            count = tc.text.count(kw)
            if count > 1 and (kw, ref) in bad_citation_set:
                print(f'Too many: {count} - {ref} - {kw}')
                issue_count += 1
                continue
            if count == 0:
                kw_words = kw.split()
                kw = f'{kw_words[0]}</b> {" ".join(kw_words[1:])}'
                if kw not in tc.text:
                    print(f'Couldn\'t find - {ref} - {kw}')
                    issue_count += 1
                    continue
            splice_index = tc.text.index(kw) + len(kw)
            comm_oref = Ref(comm_ref)
            if comm_oref.index.collective_title in {'Rashi', 'Ibn Ezra'}:
                links_to_add += [{
                    "refs": [ref, comm_ref],
                    "auto": True,
                    "generated_by": "add_ramban_links"
                }]
                comm_oref = comm_oref.section_ref()
            comm_heref = comm_oref.he_normal()
            tc.text = tc.text[:splice_index] + f' ({comm_heref})' + tc.text[splice_index:]
        if not dry_run:
            if orig_text != tc.text:
                tc.save()
                add_links_from_text(Ref(ref), 'he', tc.text, tc.full_version._id, 5842)
        else:
            fout.write(tc.text + '\n\n')
    fout.close()
    for l in links_to_add:
        try:
            Link(l).save()
        except InputError as e:
            print("Error for link", ", ".join(l['refs']))
            print(e)
    print("Issues", issue_count)

コード例 #9

0

ファイルを表示

ファイル: add_all_links.py プロジェクト: zoejf/Sefaria-Project

        continue
    if "Tanach" in index.categories and "Commentary" not in index.categories:
        continue
    talmud = True if "Talmud" in index.categories else False

    for i in range(len(text['chapter'])):
        if talmud:
            if "Bavli" in index.categories and i < 2:
                continue
            chap = section_to_daf(i + 1)
        else:
            chap = i + 1
        ref = text['title'] + " " + str(chap)
        print ref
        try:
            result = add_links_from_text(txt.Ref(ref), text['language'],
                                         text['chapter'][i], text['_id'], user)
            if result:
                text_total[text["title"]] += len(result)
        except Exception, e:
            print e

total = 0
for text in text_order:
    num = text_total[text]
    try:
        index = txt.library.get_index(text)
    except Exception as e:
        print "Error loading: {} index : {}".format(text, e)
        continue
    if getattr(index, "categories", None):
        print text.replace(",", ";") + "," + str(num) + "," + ",".join(