def run(self, articleText): article = Article(articleId=articleText.articleTextId, url=articleText.url, publishDate=articleText.publishDate, title=articleText.title, text=articleText.text, sentences=[]) for k, sentence in enumerate(self.spacyModel(articleText.text).sents): tokenOffset = sentence[0].i tokenCharOffset = sentence[0].idx article.sentences.append( Sentence( startChar=sentence[0].idx, endChar=sentence[-1].idx + len(str(sentence[-1])), articleId=article.articleId, sentenceId=k, tokens=[ Token(i=token.i - tokenOffset, startChar=token.idx - tokenCharOffset, endChar=token.idx + len(str(token)) - tokenCharOffset, depType=token.dep_, depHead=token.head.i - tokenOffset, depLeftEdge=token.left_edge.i - tokenOffset, depRightEdge=token.right_edge.i - tokenOffset, posType=token.pos_, posFineType=token.tag_, lemma=token.lemma_, text=token.text) for token in sentence ])) self.output.append(article)
class TestSentence(TestCase): def __init__(self, methodName): super(TestSentence, self).__init__(methodName=methodName) self.sentence = Sentence(startChar=0, endChar=0, articleId=0, sentenceId=0, tokens=None) def setUp(self): super().setUp() def tearDown(self): super().tearDown() def test_sentence_test_get_hash_id(self): self.assertEqual(self.sentence.test_get_hash_id(), True) def test_sentence_test_to_data(self): self.assertEqual(self.sentence.test_to_data(), True) def test_sentence_test_from_data(self): self.assertEqual(self.sentence.test_from_data(), True)
def get_outputs(sentences: List[pyDanticSentence]): sentences = [ Sentence.from_data(json.loads(sentence.json())) for sentence in sentences ] tensorInputs = batchProcessor.process_input_batch(sentences) tensorOutputs = model.get_outputs(**tensorInputs) modelOutputs = batchProcessor.process_output_batch(tensorOutputs) return [ pyDanticModelOutput.parse_raw(json.dumps(modelOutput.to_data())) for modelOutput in modelOutputs ]
def __init__(self, methodName): super(TestSentence, self).__init__(methodName=methodName) self.sentence = Sentence(startChar=0, endChar=0, articleId=0, sentenceId=0)
def from_data(cls, data): data['publishDate'] = datetime.fromisoformat(data['publishDate']) data['sentences'] = [Sentence.from_data(v) for v in data['sentences']] return cls(**data)