# 3- Creator Extractor - Regex must have 1 capturing group
extractor = Extractor()
extractor.set_vocabulary(vocabulary.get_id())
extractor.set_validator("^ +(\\d[\\d\\.\\,]+\\d)")
extractor.set_type(Type.DOUBLE)

# 4- Run
model = Model()
model.set_description("test data process")
model.add_extractor(extractor)
model.with_documents(list_of_documents)
model.create()

# 5- Wait to finish
model.wait_for_completion()

# 6- Export Field results
result = Result(model.get_id())
# print(result.read())
for item in result.read():
    field_value = item.get_values()[0]
    print(f"{item.get_str()} -> {field_value.get_str()}")

# 7- Export raw results to XLSX
result.result_xlsx_exporter("sample.xlsx")

# 8- Clean up
vocabulary.delete(vocabulary.get_id())
model.delete(model.get_id())
예제 #2
0
 def test_get_id(self):
     some_id = None
     model = Model()
     self.assertEqual(model.get_id(), str(some_id))