예제 #1
0
 def test_create(self, con):
     some_dict = {"id": "value"}
     some_response = "value, " \
                     "{'title': None, 'excludeUttWithoutEntities': True, 'numWorkers': 8, 'chunk': 'PAGE', " \
                     "'searchDictionaries': [{'searchMode': 'ORDERED_SPAN', 'analyzeStrategy': 'SIMPLE'}]}"
     model = Model()
     extractor = Extractor()
     model.add_extractor(extractor)
     response = Mock()
     response.json.return_value = some_dict
     con.return_value = response
     res = model.create()
     self.assertEqual(str(res), some_response)
예제 #2
0
    def test_add_extractor(self):
        ex = Extractor()
        ex.set_vocabulary('123')
        ex.set_type(Type.DATETIME)
        ex.set_mode(Mode.SIMPLE)
        ex.set_stop_word_list(['stop'])
        ex.set_synonym_list(['synonym'])
        ex.set_validator('val')
        some_list = [{
            'vocabId': '123',
            'vocabValueType': 'REGEX',
            'dataType': 'DATETIME',
            'searchMode': 'ORDERED_SPAN',
            'analyzeStrategy': 'SIMPLE',
            'stopwordList': ['stop'],
            'synonymList': ['synonym'],
            'phraseMatchingPattern': 'val'
        }]

        model = Model()
        model.add_extractor(ex)
        self.assertEqual(model.temp_dictionary["searchDictionaries"],
                         some_list)
vocabulary = Vocabulary()
vocabulary.add_entry("Industrials")
vocabulary.add_entry("Quasi-Governments")
vocabulary.add_entry("Governments")
vocabulary.name("Allocations (%)").create()

# 3- Creator Extractor - Regex must have 1 capturing group
extractor = Extractor()
extractor.set_vocabulary(vocabulary.get_id())
extractor.set_validator("^ +(\\d[\\d\\.\\,]+\\d)")
extractor.set_type(Type.DOUBLE)

# 4- Run
model = Model()
model.set_description("test data process")
model.add_extractor(extractor)
model.with_documents(list_of_documents)
model.create()

# 5- Wait to finish
model.wait_for_completion()

# 6- Export Field results
result = Result(model.get_id())
# print(result.read())
for item in result.read():
    field_value = item.get_values()[0]
    print(f"{item.get_str()} -> {field_value.get_str()}")

# 7- Export raw results to XLSX
result.result_xlsx_exporter("sample.xlsx")
예제 #4
0
 def test_add_extractor_regex_err(self):
     ex = Extractor()
     ex.set_validator('[')
     model = Model()
     with self.assertRaises(QtArgumentError):
         model.add_extractor(ex)