コード例 #1
0
def write_sentence_documents(sentences: List[str],
                             labels: List[str],
                             path: Path,
                             labeled=True):
    typesystem = TypeSystem()
    cas = Cas(typesystem=typesystem)

    SentenceType = typesystem.create_type(
        "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence")
    SentimentType = typesystem.create_type("webanno.custom.Sentiment")
    typesystem.add_feature(type_=SentimentType,
                           name="value",
                           rangeTypeName="uima.cas.String")

    cas.sofa_string = " ".join(sentences)

    begin = 0
    for sentence, label in zip(sentences, labels):
        end = begin + len(sentence)
        cas_sentence = SentenceType(begin=begin, end=end)
        sentiment_annotation = SentimentType(begin=begin, end=end, value=label)
        begin = end + 1

        cas.add_annotation(cas_sentence)

        if labeled:
            cas.add_annotation(sentiment_annotation)

    cas.to_xmi(path, pretty_print=True)

    for sentence in cas.select(SENTENCE_TYPE):
        print(cas.get_covered_text(sentence))
コード例 #2
0
    def predict(self, cas: Cas, layer: str, feature: str, project_id: str,
                document_id: str, user_id: str):
        # Extract the tokens from the CAS and create a spacy doc from it
        cas_tokens = cas.select(TOKEN_TYPE)
        words = [cas.get_covered_text(cas_token) for cas_token in cas_tokens]

        doc = Doc(self._model.vocab, words=words)

        # Find the named entities
        self._model.get_pipe("ner")(doc)

        # For every entity returned by spacy, create an annotation in the CAS
        for named_entity in doc.ents:
            begin = cas_tokens[named_entity.start].begin
            end = cas_tokens[named_entity.end - 1].end
            label = named_entity.label_
            prediction = create_prediction(cas, layer, feature, begin, end,
                                           label)
            cas.add_annotation(prediction)
コード例 #3
0
    def predict(self, cas: Cas, layer: str, feature: str, project_id: str,
                document_id: str, user_id: str):
        # Extract the tokens from the CAS and create a spacy doc from it
        words = [
            cas.get_covered_text(cas_token)
            for cas_token in self.iter_tokens(cas)
        ]

        doc = Doc(self._model.vocab, words=words)

        # Find the named entities
        self._model.tagger(doc)

        # For every token, extract the POS tag and create an annotation in the CAS
        for cas_token, spacy_token in zip(self.iter_tokens(cas), doc):
            prediction = self.create_prediction(cas, layer, feature,
                                                cas_token.begin, cas_token.end,
                                                spacy_token.pos_)
            cas.add_annotation(prediction)
コード例 #4
0
    def predict(self, cas: Cas, layer: str, feature: str, project_id: str,
                document_id: str, user_id: str):
        # Extract the tokens from the CAS and create a spacy doc from it
        words = [
            cas.get_covered_text(cas_token)
            for cas_token in cas.select(TOKEN_TYPE)
        ]

        doc = Doc(self._model.vocab, words=words)

        # Get the pos tags
        self._model.get_pipe("tok2vec")(doc)
        self._model.get_pipe("tagger")(doc)

        # For every token, extract the POS tag and create an annotation in the CAS
        for cas_token, spacy_token in zip(cas.select(TOKEN_TYPE), doc):
            prediction = create_prediction(cas, layer, feature,
                                           cas_token.begin, cas_token.end,
                                           spacy_token.tag_)
            cas.add_annotation(prediction)