Python TextChunkの例、sefaria.model.TextChunk Pythonの例

コード例 #1

0

ファイルを表示

def bulktext_api(request, refs):
    """
    Used by the linker.
    :param request:
    :param refs:
    :return:
    """
    if request.method == "GET":
        cb = request.GET.get("callback", None)
        refs = set(refs.split("|"))
        res = {}
        for tref in refs:
            try:
                oref = model.Ref(tref)
                lang = "he" if is_hebrew(tref) else "en"
                he = model.TextChunk(oref, "he").text
                en = model.TextChunk(oref, "en").text
                res[tref] = {
                    'he': he if isinstance(he, basestring) else JaggedTextArray(he).flatten_to_string(),  # these could be flattened on the client, if need be.
                    'en': en if isinstance(en, basestring) else JaggedTextArray(en).flatten_to_string(),
                    'lang': lang,
                    'ref': oref.normal(),
                    'heRef': oref.he_normal(),
                    'url': oref.url()
                }
            except (InputError, ValueError, AttributeError) as e:
                referer = request.META.get("HTTP_REFERER", "unknown page")
                logger.warning(u"Linker failed to parse {} from {} : {}".format(tref, referer, e))
                res[tref] = {"error": 1}
        resp = jsonResponse(res, cb)
        resp['Access-Control-Allow-Origin'] = '*'
        return resp

コード例 #2

0

ファイルを表示

ファイル: views.py プロジェクト: chezib/Sefaria-Project

def bulktext_api(request, refs):
    """
    Used by the linker.
    :param request:
    :param refs:
    :return:
    """
    if request.method == "GET":
        cb = request.GET.get("callback", None)
        useTextFamily = request.GET.get("useTextFamily", None)
        refs = set(refs.split("|"))
        res = {}
        for tref in refs:
            try:
                oref = model.Ref(tref)
                lang = "he" if is_hebrew(tref) else "en"
                if useTextFamily:
                    text_fam = model.TextFamily(oref,
                                                commentary=0,
                                                context=0,
                                                pad=False)
                    he = text_fam.he
                    en = text_fam.text
                    res[tref] = {
                        'he': he,
                        'en': en,
                        'lang': lang,
                        'ref': oref.normal(),
                        'primary_category':
                        text_fam.contents()['primary_category'],
                        'heRef': oref.he_normal(),
                        'url': oref.url()
                    }
                else:
                    he = model.TextChunk(oref, "he").text
                    en = model.TextChunk(oref, "en").text
                    res[tref] = {
                        'he':
                        he if isinstance(he, basestring) else
                        JaggedTextArray(he).flatten_to_string(
                        ),  # these could be flattened on the client, if need be.
                        'en':
                        en if isinstance(en, basestring) else
                        JaggedTextArray(en).flatten_to_string(),
                        'lang':
                        lang,
                        'ref':
                        oref.normal(),
                        'heRef':
                        oref.he_normal(),
                        'url':
                        oref.url()
                    }
            except (InputError, ValueError, AttributeError, KeyError) as e:
                # referer = request.META.get("HTTP_REFERER", "unknown page")
                # This chatter fills up the logs.  todo: put in it's own file
                # logger.warning(u"Linker failed to parse {} from {} : {}".format(tref, referer, e))
                res[tref] = {"error": 1}
        resp = jsonResponse(res, cb)
        return resp

コード例 #3

0

ファイルを表示

def rebuild_sheet_nodes(sheet):
    def find_next_unused_node(node_number, used_nodes):
        while True:
            node_number += 1
            if node_number not in used_nodes:
                return node_number

    try:
        sheet_id = sheet["id"]
    except KeyError:  # this will occur on new sheets, as we won't know the id until the sheet is succesfully saved
        sheet_id = 'New Sheet'
    next_node, checked_sources, nodes_used = 0, [], set()

    for source in sheet.get("sources", []):
        if "node" not in source:
            print("adding nodes to sheet {}".format(sheet_id))
            next_node = find_next_unused_node(next_node, nodes_used)
            source["node"] = next_node

        elif source["node"] is None:
            print("found null node in sheet {}".format(sheet_id))
            next_node = find_next_unused_node(next_node, nodes_used)
            source["node"] = next_node
            nodes_used.add(next_node)

        elif source["node"] in nodes_used:
            print("found repeating node in sheet " + str(sheet_id))
            next_node = find_next_unused_node(next_node, nodes_used)
            source["node"] = next_node

        nodes_used.add(source["node"])

        if "ref" in source and "text" not in source:
            print("adding sources to sheet {}".format(sheet_id))
            source["text"] = {}

            try:
                oref = model.Ref(source["ref"])
                tc_eng = model.TextChunk(oref, "en")
                tc_heb = model.TextChunk(oref, "he")
                if tc_eng:
                    source["text"]["en"] = tc_eng.ja().flatten_to_string()
                if tc_heb:
                    source["text"]["he"] = tc_heb.ja().flatten_to_string()

            except:
                print("error on {} on sheet {}".format(source["ref"],
                                                       sheet_id))
                continue

        checked_sources.append(source)

    sheet["sources"] = checked_sources
    sheet["nextNode"] = find_next_unused_node(next_node, nodes_used)
    return sheet

コード例 #4

0

ファイルを表示

ファイル: views.py プロジェクト: yairm210/Sefaria-Project

def bundle_many_texts(refs, useTextFamily=False, as_sized_string=False, min_char=None, max_char=None):
    res = {}
    for tref in refs:
        try:
            oref = model.Ref(tref)
            lang = "he" if is_hebrew(tref) else "en"
            if useTextFamily:
                text_fam = model.TextFamily(oref, commentary=0, context=0, pad=False)
                he = text_fam.he
                en = text_fam.text
                res[tref] = {
                    'he': he,
                    'en': en,
                    'lang': lang,
                    'ref': oref.normal(),
                    'primary_category': text_fam.contents()['primary_category'],
                    'heRef': oref.he_normal(),
                    'url': oref.url()
                }
            else:
                he_tc = model.TextChunk(oref, "he")
                en_tc = model.TextChunk(oref, "en")
                if as_sized_string:
                    kwargs = {}
                    if min_char:
                        kwargs['min_char'] = min_char
                    if max_char:
                        kwargs['max_char'] = max_char
                    he_text = he_tc.as_sized_string(**kwargs)
                    en_text = en_tc.as_sized_string(**kwargs)
                else:
                    he = he_tc.text
                    en = en_tc.text
                    # these could be flattened on the client, if need be.
                    he_text = he if isinstance(he, str) else JaggedTextArray(he).flatten_to_string()
                    en_text = en if isinstance(en, str) else JaggedTextArray(en).flatten_to_string()

                res[tref] = {
                    'he': he_text,
                    'en': en_text,
                    'lang': lang,
                    'ref': oref.normal(),
                    'heRef': oref.he_normal(),
                    'url': oref.url()
                }
        except (InputError, ValueError, AttributeError, KeyError) as e:
            # referer = request.META.get("HTTP_REFERER", "unknown page")
            # This chatter fills up the logs.  todo: put in it's own file
            # logger.warning(u"Linker failed to parse {} from {} : {}".format(tref, referer, e))
            res[tref] = {"error": 1}
    return res

コード例 #5

0

ファイルを表示

ファイル: tracker.py プロジェクト: yairm210/Sefaria-Project

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status",
               "") == "locked" and not model.user_profile.is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        post_modify_text(user, action, oref, lang, vtitle, old_text,
                         chunk.text, chunk.full_version._id, **kwargs)

    return chunk

コード例 #6

0

ファイルを表示

ファイル: tracker.py プロジェクト: spenhos/Sefaria-Project

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status",
               "") == "locked" and not is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        model.log_text(user, action, oref, lang, vtitle, old_text, text,
                       **kwargs)

        from sefaria.helper.link import add_commentary_links, add_links_from_text
        # Commentaries generate links to their base text automatically
        if oref.type == "Commentary":
            add_commentary_links(oref, user, **kwargs)
        # scan text for links to auto add
        add_links_from_text(oref.normal(), lang, chunk.text,
                            chunk.full_version._id, user, **kwargs)

    return chunk

コード例 #7

0

ファイルを表示

 def action(segment_str, tref, heTref, version):
     r = model.Ref(tref)
     tc = model.TextChunk(r,
                          lang=version.language,
                          vtitle=version.versionTitle)
     assert tc.text == segment_str
     assert tref == r.normal()
     assert heTref == r.he_normal()

コード例 #8

0

ファイルを表示

def modify_text(user, oref, vtitle, lang, text, vsource=None, **kwargs):
    """
    Updates a chunk of text, identified by oref, versionTitle, and lang, and records history.
    :param user:
    :param oref:
    :param vtitle:
    :param lang:
    :param text:
    :param vsource:
    :return:
    """
    chunk = model.TextChunk(oref, lang, vtitle)
    if getattr(chunk.version(), "status",
               "") == "locked" and not model.user_profile.is_user_staff(user):
        raise InputError("This text has been locked against further edits.")
    action = kwargs.get("type") or "edit" if chunk.text else "add"
    old_text = chunk.text
    chunk.text = text
    if vsource:
        chunk.versionSource = vsource  # todo: log this change
    if chunk.save():
        model.log_text(user, action, oref, lang, vtitle, old_text, text,
                       **kwargs)
        if USE_VARNISH:
            invalidate_ref(oref, lang=lang, version=vtitle, purge=True)
            if oref.next_section_ref():
                invalidate_ref(oref.next_section_ref(),
                               lang=lang,
                               version=vtitle,
                               purge=True)
            if oref.prev_section_ref():
                invalidate_ref(oref.prev_section_ref(),
                               lang=lang,
                               version=vtitle,
                               purge=True)
        if not kwargs.get("skip_links", None):
            from sefaria.helper.link import add_links_from_text
            # Some commentaries can generate links to their base text automatically
            linker = oref.autolinker(user=user)
            if linker:
                linker.refresh_links(**kwargs)
            # scan text for links to auto add
            add_links_from_text(oref, lang, chunk.text, chunk.full_version._id,
                                user, **kwargs)

            if USE_VARNISH:
                invalidate_linked(oref)

    return chunk

コード例 #9

0

ファイルを表示

ファイル: sheets.py プロジェクト: nakee/Sefaria-Project

def refine_ref_by_text(ref, text):
    """
	Returns a ref (string) which refines 'ref' (string) by comparing 'text' (string),
	to the hebrew text stored in the Library.
	"""
    try:
        oref = model.Ref(ref).section_ref()
    except:
        return ref
    needle = strip_tags(text).strip().replace("\n", "")
    hay = model.TextChunk(oref, lang="he").text

    start, end = None, None
    for n in range(len(hay)):
        if not isinstance(hay[n], basestring):
            # TODO handle this case
            # happens with spanning ref like "Shabbat 3a-3b"
            return ref

        if needle in hay[n]:
            start, end = n + 1, n + 1
            break

        if not start and string_overlap(hay[n], needle):
            start = n + 1
        elif string_overlap(needle, hay[n]):
            end = n + 1
            break

    if start and end:
        if start == end:
            refined = "%s:%d" % (oref.normal(), start)
        else:
            refined = "%s:%d-%d" % (oref.normal(), start, end)
        ref = refined

    return ref