Python Document.content 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sem.storage

클래스/타입: Document

메소드/함수: content

hotexamples.com에서의 예제들: 2

Python Document.content - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sem.storage.Document.content에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Document(12)

add_annotation(5)

_corpus(4)

add_segmentation(2)

content(2)

_content(1)

add_metadata(1)

escaped_name(1)

from_conll(1)

예제 #1

파일 보기

def brat_file(filename, encoding="utf-8"):
    no_ext, ext = os.path.splitext(filename)
    txt_file = no_ext + ".txt"
    ann_file = no_ext + ".ann"
    if not (os.path.exists(txt_file) and os.path.exists(ann_file)):
        raise ValueError("missing either .ann or .txt file")

    document = Document(os.path.basename(txt_file),
                        encoding=encoding,
                        mime_type="text/plain")
    document.content = codecs.open(txt_file, "rU",
                                   encoding).read().replace(u"\r", u"")
    annotations = Annotation("NER")
    for line in codecs.open(ann_file, "rU", encoding):
        line = line.strip()
        if line != u"" and line.startswith(u'T'):
            parts = line.split(u"\t")
            value, bounds = parts[1].split(" ", 1)
            for bound in bounds.split(";"):
                lb, ub = bound.split()
                lb = int(lb)
                ub = int(ub)
                annotations.append(Tag(lb=lb, ub=ub, value=value))
    annotations.sort()
    document.add_annotation(annotations)

    return document

예제 #2

파일 보기

def gate_data(data, name=None):
    document = Document(name or "__DOCUMENT__", mime_type="text/plain")

    textwithnodes = data.findall("TextWithNodes")[0]
    annotation_sets = data.findall("AnnotationSet")

    text_parts = [textwithnodes.text or u""]
    nodes = {}
    for node in list(textwithnodes):
        nodes[int(node.attrib["id"])] = sum([len(part) for part in text_parts])
        text_parts.append(node.tail or u"")
    document.content = u"".join(text_parts)

    annotations = []
    for annotation_set in annotation_sets:
        annotation_name = annotation_set.attrib["Name"]
        sem_annotation = Annotation(annotation_name)
        for annotation in annotation_set:
            lb = nodes[int(annotation.attrib["StartNode"])]
            ub = nodes[int(annotation.attrib["EndNode"])]
            sem_annotation.append(Tag(lb, ub, annotation.attrib["Type"]))
        document.add_annotation(sem_annotation)

    return document