Esempio n. 1
0
 def run(self, articleText):
     article = Article(articleId=articleText.articleTextId,
                       url=articleText.url,
                       publishDate=articleText.publishDate,
                       title=articleText.title,
                       text=articleText.text,
                       sentences=[])
     for k, sentence in enumerate(self.spacyModel(articleText.text).sents):
         tokenOffset = sentence[0].i
         tokenCharOffset = sentence[0].idx
         article.sentences.append(
             Sentence(
                 startChar=sentence[0].idx,
                 endChar=sentence[-1].idx + len(str(sentence[-1])),
                 articleId=article.articleId,
                 sentenceId=k,
                 tokens=[
                     Token(i=token.i - tokenOffset,
                           startChar=token.idx - tokenCharOffset,
                           endChar=token.idx + len(str(token)) -
                           tokenCharOffset,
                           depType=token.dep_,
                           depHead=token.head.i - tokenOffset,
                           depLeftEdge=token.left_edge.i - tokenOffset,
                           depRightEdge=token.right_edge.i - tokenOffset,
                           posType=token.pos_,
                           posFineType=token.tag_,
                           lemma=token.lemma_,
                           text=token.text) for token in sentence
                 ]))
     self.output.append(article)
Esempio n. 2
0
class TestSentence(TestCase):

    def __init__(self, methodName):
        super(TestSentence, self).__init__(methodName=methodName)
        self.sentence = Sentence(startChar=0, endChar=0, articleId=0, sentenceId=0, tokens=None)

    def setUp(self):
        super().setUp()

    def tearDown(self):
        super().tearDown()

    def test_sentence_test_get_hash_id(self):
        self.assertEqual(self.sentence.test_get_hash_id(), True)

    def test_sentence_test_to_data(self):
        self.assertEqual(self.sentence.test_to_data(), True)

    def test_sentence_test_from_data(self):
        self.assertEqual(self.sentence.test_from_data(), True)
Esempio n. 3
0
def get_outputs(sentences: List[pyDanticSentence]):
    sentences = [
        Sentence.from_data(json.loads(sentence.json()))
        for sentence in sentences
    ]
    tensorInputs = batchProcessor.process_input_batch(sentences)
    tensorOutputs = model.get_outputs(**tensorInputs)
    modelOutputs = batchProcessor.process_output_batch(tensorOutputs)
    return [
        pyDanticModelOutput.parse_raw(json.dumps(modelOutput.to_data()))
        for modelOutput in modelOutputs
    ]
Esempio n. 4
0
 def __init__(self, methodName):
     super(TestSentence, self).__init__(methodName=methodName)
     self.sentence = Sentence(startChar=0,
                              endChar=0,
                              articleId=0,
                              sentenceId=0)
Esempio n. 5
0
 def from_data(cls, data):
     data['publishDate'] = datetime.fromisoformat(data['publishDate'])
     data['sentences'] = [Sentence.from_data(v) for v in data['sentences']]
     return cls(**data)