def write_sentence_documents(sentences: List[str], labels: List[str], path: Path, labeled=True): typesystem = TypeSystem() cas = Cas(typesystem=typesystem) SentenceType = typesystem.create_type( "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence") SentimentType = typesystem.create_type("webanno.custom.Sentiment") typesystem.add_feature(type_=SentimentType, name="value", rangeTypeName="uima.cas.String") cas.sofa_string = " ".join(sentences) begin = 0 for sentence, label in zip(sentences, labels): end = begin + len(sentence) cas_sentence = SentenceType(begin=begin, end=end) sentiment_annotation = SentimentType(begin=begin, end=end, value=label) begin = end + 1 cas.add_annotation(cas_sentence) if labeled: cas.add_annotation(sentiment_annotation) cas.to_xmi(path, pretty_print=True) for sentence in cas.select(SENTENCE_TYPE): print(cas.get_covered_text(sentence))
def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str): # Extract the tokens from the CAS and create a spacy doc from it cas_tokens = cas.select(TOKEN_TYPE) words = [cas.get_covered_text(cas_token) for cas_token in cas_tokens] doc = Doc(self._model.vocab, words=words) # Find the named entities self._model.get_pipe("ner")(doc) # For every entity returned by spacy, create an annotation in the CAS for named_entity in doc.ents: begin = cas_tokens[named_entity.start].begin end = cas_tokens[named_entity.end - 1].end label = named_entity.label_ prediction = create_prediction(cas, layer, feature, begin, end, label) cas.add_annotation(prediction)
def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str): # Extract the tokens from the CAS and create a spacy doc from it words = [ cas.get_covered_text(cas_token) for cas_token in self.iter_tokens(cas) ] doc = Doc(self._model.vocab, words=words) # Find the named entities self._model.tagger(doc) # For every token, extract the POS tag and create an annotation in the CAS for cas_token, spacy_token in zip(self.iter_tokens(cas), doc): prediction = self.create_prediction(cas, layer, feature, cas_token.begin, cas_token.end, spacy_token.pos_) cas.add_annotation(prediction)
def predict(self, cas: Cas, layer: str, feature: str, project_id: str, document_id: str, user_id: str): # Extract the tokens from the CAS and create a spacy doc from it words = [ cas.get_covered_text(cas_token) for cas_token in cas.select(TOKEN_TYPE) ] doc = Doc(self._model.vocab, words=words) # Get the pos tags self._model.get_pipe("tok2vec")(doc) self._model.get_pipe("tagger")(doc) # For every token, extract the POS tag and create an annotation in the CAS for cas_token, spacy_token in zip(cas.select(TOKEN_TYPE), doc): prediction = create_prediction(cas, layer, feature, cas_token.begin, cas_token.end, spacy_token.tag_) cas.add_annotation(prediction)