def test_create(self, con): some_dict = {"id": "value"} some_response = "value, " \ "{'title': None, 'excludeUttWithoutEntities': True, 'numWorkers': 8, 'chunk': 'PAGE', " \ "'searchDictionaries': [{'searchMode': 'ORDERED_SPAN', 'analyzeStrategy': 'SIMPLE'}]}" model = Model() extractor = Extractor() model.add_extractor(extractor) response = Mock() response.json.return_value = some_dict con.return_value = response res = model.create() self.assertEqual(str(res), some_response)
def test_add_extractor(self): ex = Extractor() ex.set_vocabulary('123') ex.set_type(Type.DATETIME) ex.set_mode(Mode.SIMPLE) ex.set_stop_word_list(['stop']) ex.set_synonym_list(['synonym']) ex.set_validator('val') some_list = [{ 'vocabId': '123', 'vocabValueType': 'REGEX', 'dataType': 'DATETIME', 'searchMode': 'ORDERED_SPAN', 'analyzeStrategy': 'SIMPLE', 'stopwordList': ['stop'], 'synonymList': ['synonym'], 'phraseMatchingPattern': 'val' }] model = Model() model.add_extractor(ex) self.assertEqual(model.temp_dictionary["searchDictionaries"], some_list)
vocabulary = Vocabulary() vocabulary.add_entry("Industrials") vocabulary.add_entry("Quasi-Governments") vocabulary.add_entry("Governments") vocabulary.name("Allocations (%)").create() # 3- Creator Extractor - Regex must have 1 capturing group extractor = Extractor() extractor.set_vocabulary(vocabulary.get_id()) extractor.set_validator("^ +(\\d[\\d\\.\\,]+\\d)") extractor.set_type(Type.DOUBLE) # 4- Run model = Model() model.set_description("test data process") model.add_extractor(extractor) model.with_documents(list_of_documents) model.create() # 5- Wait to finish model.wait_for_completion() # 6- Export Field results result = Result(model.get_id()) # print(result.read()) for item in result.read(): field_value = item.get_values()[0] print(f"{item.get_str()} -> {field_value.get_str()}") # 7- Export raw results to XLSX result.result_xlsx_exporter("sample.xlsx")
def test_add_extractor_regex_err(self): ex = Extractor() ex.set_validator('[') model = Model() with self.assertRaises(QtArgumentError): model.add_extractor(ex)