Python Annotationの例

プログラミング言語: Python

名前空間/パッケージ名: lif

クラス/型: Annotation

hotexamples.comのコード掲載数: 9

Python Annotation - 9件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのlif.Annotationの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Annotation(9)

text(1)

よく使われるメソッド

Annotation (9)

text (1)

コード例 #1

ファイルを表示

ファイル: create_lif.py プロジェクト: brandeis-llc/dtriac-pipeline

def _add_annotation(annotations, text_value, annotation_type, text, offset):
    if text is None:
        return offset
    prefix = None
    if annotation_type in ('Title', 'Abstract'):
        prefix = annotation_type.upper()
    if prefix is not None:
        anno = {
            "id": IdentifierFactory.next_id('Header'),
            "@type": vocab('Header'),
            "start": offset,
            "end": offset + len(prefix)
        }
        annotations.append(Annotation(anno))
        text_value.write(prefix + u"\n\n")
        offset += len(prefix) + 2
    anno = {
        "id": IdentifierFactory.next_id(annotation_type),
        "@type": vocab(annotation_type),
        "start": offset,
        "end": offset + len(text)
    }
    annotations.append(Annotation(anno))
    text_value.write(text + u"\n\n")
    return offset + len(text) + 2

コード例 #2

ファイルを表示

 def _add_technologies(self):
     """Takes the technology ontology and tries to add each element to the
     technologies index of this document. Add only if the technology term
     occurs in the text. This is done rather inefficiently by searching the
     entire text # for each technology, but on a 30K LIF document this takes
     less than # 0.01 seconds for 100 technology terms, so we can live with
     this."""
     technologies = self.annotations.technologies
     if technologies:
         next_id = max([int(a.id[1:])
                        for a in technologies.annotations]) + 1
         # print len(technologies.texts), len(technologies.annotations)
         for term in self.ontology.technologies:
             searchterm = r'\b%s\b' % term
             matches = list(
                 re.finditer(searchterm, self.annotations.text, flags=re.I))
             for match in matches:
                 json_obj = {
                     "id": "t%d" % next_id,
                     "@type": 'http://vocab.lappsgrid.org/Technology',
                     "start": match.start(),
                     "end": match.end()
                 }
                 next_id += 1
                 anno = Annotation(json_obj)
                 anno.text = term
                 technologies.add(anno)

コード例 #3

ファイルを表示

def markable_annotation(lif_obj):
    return Annotation({
        "id": "m1",
        "@type": 'http://vocab.lappsgrid.org/Markable',
        "start": 0,
        "end": len(lif_obj.text.value)
    })

コード例 #4

ファイルを表示

ファイル: generate_topics.py プロジェクト: brandeis-llc/dtriac-pipeline

def topic_annotation(topic, topic_id, lemmas):
    return Annotation({"id": "t{:d}".format(topic_id),
                       "@type": 'http://vocab.lappsgrid.org/SemanticTag',
                       "target": "m1",
                       "features": {
                           "type": "gensim-topic",
                           "topic_id": topic[0],
                           "topic_score": "{:.04f}".format(topic[1]),
                           "topic_name": lemmas}})

コード例 #5

ファイルを表示

 def _add_docelement_anno(self, docelement_type, p1, p2):
     self.view.add(
         Annotation({
             'id': Identifiers.new_id('de'),
             '@type': 'Section',
             'start': p1,
             'end': p2,
             'features': {
                 'section_type': docelement_type
             }
         }))

コード例 #6

ファイルを表示

ファイル: lookup.py プロジェクト: brandeis-llc/dtriac-pipeline

def _create_annotation(lif, tokens, w, term, i, length, ttype):
    p1, p2, w_in_text = _get_match_information(lif, tokens, i, length)
    if DEBUG:
        OUT.write("%s\t%s\t%s\n" % (p1, p2, w))
    next_id = TECHNOLOGIES.get_next_id()
    json_obj = { "id": "t%d" % next_id,
                 "@type": 'http://vocab.lappsgrid.org/Technology',
                 "start": p1, "end": p2,
                 "features": { "term": w, "type": ttype }}
    if w != term:
        json_obj['features']['term_normalized'] = term
    return Annotation(json_obj)

コード例 #7

ファイルを表示

 def as_annotation(self):
     properties = {
         "id": "p%s" % self.number,
         "@type": vocab('Page'),
         "start": self.start,
         "end": self.end,
         "features": {}
     }
     if self.header is not None:
         properties['features']['header'] = self.header
     if self.footer is not None:
         properties['features']['footer'] = self.footer
     return Annotation(properties)

コード例 #8

ファイルを表示

def generate_lif(txt, vnc):
    """
    * txt is a plain text file only with the original text value. 
    * vnc (verbnetclass) is a output from clearwsd file (mostly in conll format)
    This function will generate a LIF json file using disambiguation annotation 
    encoded in the vnc file, using txt as top-level `text` field. 
    """
    t = open(txt, encoding="utf-8")
    v = open(vnc, encoding="utf-8")
    lif_obj = LIF()
    cont_obj = Container()
    cont_obj.discriminator = "http://vocab.lappsgrid.org/ns/media/jsonld#lif"
    cont_obj.payload = lif_obj

    raw_text = t.read()
    t.close()
    lif_obj.text.value = raw_text

    vnc_view = View()
    lif_obj.views.append(vnc_view)
    vnc_view.id = "verbnettag"
    vnc_view.metadata['contains'] = {vocab('SemanticTag'): {}}

    annotations = [line for line in v if line.startswith('#')]
    v.close()
    for annotation in annotations:
        splitted = annotation.split('\t')[0].split()

        oid = splitted[1]
        osent = splitted[2]
        otoken = splitted[3]
        olemma = " ".join(splitted[4:-1])  # some lemmas have space inside
        olabel = splitted[-1]
        properly_annotated = re.match(r'\d+\[(\d+),(\d+)\]', otoken)
        if properly_annotated is None:
            continue
        s, e = map(int, properly_annotated.groups())
        ann = {}
        ann["id"] = "vnc_" + oid
        ann["start"] = s
        ann["end"] = e
        ann["@type"] = vocab("SemanticTag")
        ann["features"] = {
            "tags": [olabel],
            "type": "VerbNetClass",
            "lemma": olemma,
            "text": raw_text[s:e]
        }
        ann_obj = Annotation(ann)
        vnc_view.annotations.append(ann_obj)
    cont_obj.write()

コード例 #9

ファイルを表示

def fix_view(identifier, view):
    annos = view.id['annotations']
    view.id = identifier
    view.annotations = []
    for a in annos:
        view.annotations.append(Annotation(a))