def _faro_run(input_path, input_file, file_type=ENTITY_EXT): _type = '%s.%s' % (input_file, file_type) params = argparse.Namespace() params.input_file = '%s/%s' % (input_path, input_file) faro_execute(params) faro_data = _get_file_data('%s/%s' % (input_path, _type)) if file_type == ENTITY_EXT: faro_data = faro_data['entities'] return faro_data
def test_params_split_lines(self): params = argparse.Namespace() params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE_SPLIT_LINES) params.split_lines = True faro_execute(params) faro_split_lines = _get_file_data(params.output_entity_file) faro_split_lines_entity = faro_split_lines['entities'] self.assertTrue(faro_split_lines_entity.get('mobile_phone_number') is None)
def run(params): log_level = os.getenv('FARO_LOG_LEVEL', "INFO") log_file = os.getenv('FARO_LOG_FILE', None) handlers = [logging.StreamHandler()] if log_file is not None: handlers.append(logging.FileHandler(log_file)) logging.basicConfig(level=log_level, format="%(levelname)s: %(name)20s: %(message)s", handlers=handlers) faro_execute(params)
def test_params_rename_output_files(self): entity_file_name = 'test_entity' score_file_name = 'test_score' params = argparse.Namespace() params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE) params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT) params.output_score_file = '%s/%s.%s' % (INPUT_PATH, score_file_name, SCORE_EXT) faro_execute(params) self.assertTrue(path.exists(params.output_entity_file)) self.assertTrue(path.exists(params.output_score_file))
def test_corp_emails(self): entity_file_name = 'test_corp_email_entity' score_file_name = 'test_corp_email_score' params = argparse.Namespace() params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE_TESTS_TXT) params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT) params.output_score_file = '%s/%s.%s' % (INPUT_PATH, score_file_name, SCORE_EXT) params.verbose = True faro_execute(params) faro_entities = _get_file_data(params.output_entity_file)['entities'] self.assertTrue(faro_entities['corporate_email'] is not None) self.assertEqual(len(faro_entities['corporate_email']), 2)
def test_params_verbose(self): entity_file_name = 'test_verbose_entity' score_file_name = 'test_verbose_score' params = argparse.Namespace() params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE) params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT) params.output_score_file = '%s/%s.%s' % (INPUT_PATH, score_file_name, SCORE_EXT) params.verbose = True faro_execute(params) faro_verbose = _get_file_data(params.output_entity_file) faro_verbose_entity = faro_verbose['entities'] self.assertTrue(faro_verbose_entity['person'] is not None) self.assertTrue(faro_verbose_entity['phone_number'] is not None) self.assertTrue(faro_verbose_entity['probable_currency_amount'] is not None)
def test_organizations(self): entity_file_name = 'test_verbose_entity_org' params = argparse.Namespace() params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE_ORG) params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT) params.verbose = True faro_execute(params) faro_verbose = _get_file_data(params.output_entity_file) faro_verbose_entity = faro_verbose['entities']['organization'] self.assertTrue(faro_verbose_entity is not None) self.assertTrue(len(faro_verbose_entity) == len(ORGANIZATIONS)) diff_list = (set(faro_verbose_entity) ^ set(ORGANIZATIONS)) self.assertTrue(len(diff_list) == 0)
help=('Json file with detected entities ' + '(defaults: $INPUT_FILE.entity)')) parser.add_argument('--output_score_file', dest="output_score_file", type=str, default=None, help=('Json with sensitivity score and ' + 'summary information ' + '(defaults: $INPUT_FILE.score)')) parser.add_argument('--split_lines', dest="split_lines", action="store_true", default=False, help=("Do not join sentences of a document " + " (use only if every line in the document " + "is already line in the document " + "(e.g. a raw text file) " + "(defaults: %(default)s)")) parser.add_argument('--verbose', dest="verbose", action="store_true", default=False, help=("Store all entities in json " + "(defaults: %(default)s)")) parser.add_argument('--dump', dest="dump", action="store_true", default=False, help=("Dump information to stdout instead of file" + "(defaults: %(default)s")) params = parser.parse_args() if params.output_entity_file is None: params.output_entity_file = "{}{}".format(params.input_file, ".entity") if params.output_score_file is None: params.output_score_file = "{}{}".format(params.input_file, ".score") faro_execute(params)