Beispiel #1
0
def _faro_run(input_path, input_file, file_type=ENTITY_EXT):
    _type = '%s.%s' % (input_file, file_type)
    params = argparse.Namespace()
    params.input_file = '%s/%s' % (input_path, input_file)
    faro_execute(params)
    faro_data = _get_file_data('%s/%s' % (input_path, _type))
    if file_type == ENTITY_EXT:
        faro_data = faro_data['entities']
    return faro_data
Beispiel #2
0
    def test_params_split_lines(self):
        params = argparse.Namespace()
        params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE_SPLIT_LINES)
        params.split_lines = True
        faro_execute(params)

        faro_split_lines = _get_file_data(params.output_entity_file)
        faro_split_lines_entity = faro_split_lines['entities']

        self.assertTrue(faro_split_lines_entity.get('mobile_phone_number') is None)
Beispiel #3
0
def run(params):
    log_level = os.getenv('FARO_LOG_LEVEL', "INFO")
    log_file = os.getenv('FARO_LOG_FILE', None)
    handlers = [logging.StreamHandler()]
    if log_file is not None:
        handlers.append(logging.FileHandler(log_file))
    logging.basicConfig(level=log_level,
                        format="%(levelname)s: %(name)20s: %(message)s",
                        handlers=handlers)
    faro_execute(params)
Beispiel #4
0
    def test_params_rename_output_files(self):
        entity_file_name = 'test_entity'
        score_file_name = 'test_score'

        params = argparse.Namespace()
        params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE)
        params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT)
        params.output_score_file = '%s/%s.%s' % (INPUT_PATH, score_file_name, SCORE_EXT)
        faro_execute(params)

        self.assertTrue(path.exists(params.output_entity_file))
        self.assertTrue(path.exists(params.output_score_file))
Beispiel #5
0
    def test_corp_emails(self):
        entity_file_name = 'test_corp_email_entity'
        score_file_name = 'test_corp_email_score'

        params = argparse.Namespace()
        params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE_TESTS_TXT)
        params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT)
        params.output_score_file = '%s/%s.%s' % (INPUT_PATH, score_file_name, SCORE_EXT)
        params.verbose = True
        faro_execute(params)

        faro_entities = _get_file_data(params.output_entity_file)['entities']
        self.assertTrue(faro_entities['corporate_email'] is not None)
        self.assertEqual(len(faro_entities['corporate_email']), 2)
Beispiel #6
0
    def test_params_verbose(self):
        entity_file_name = 'test_verbose_entity'
        score_file_name = 'test_verbose_score'

        params = argparse.Namespace()
        params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE)
        params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name, ENTITY_EXT)
        params.output_score_file = '%s/%s.%s' % (INPUT_PATH, score_file_name, SCORE_EXT)
        params.verbose = True
        faro_execute(params)

        faro_verbose = _get_file_data(params.output_entity_file)
        faro_verbose_entity = faro_verbose['entities']

        self.assertTrue(faro_verbose_entity['person'] is not None)
        self.assertTrue(faro_verbose_entity['phone_number'] is not None)
        self.assertTrue(faro_verbose_entity['probable_currency_amount'] is not None)
Beispiel #7
0
    def test_organizations(self):
        entity_file_name = 'test_verbose_entity_org'

        params = argparse.Namespace()
        params.input_file = '%s/%s' % (INPUT_PATH, INPUT_FILE_ORG)
        params.output_entity_file = '%s/%s.%s' % (INPUT_PATH, entity_file_name,
                                                  ENTITY_EXT)
        params.verbose = True
        faro_execute(params)

        faro_verbose = _get_file_data(params.output_entity_file)
        faro_verbose_entity = faro_verbose['entities']['organization']

        self.assertTrue(faro_verbose_entity is not None)
        self.assertTrue(len(faro_verbose_entity) == len(ORGANIZATIONS))
        diff_list = (set(faro_verbose_entity) ^ set(ORGANIZATIONS))
        self.assertTrue(len(diff_list) == 0)
Beispiel #8
0
                        help=('Json file with detected entities ' +
                              '(defaults: $INPUT_FILE.entity)'))
    parser.add_argument('--output_score_file',
                        dest="output_score_file",
                        type=str, default=None,
                        help=('Json with sensitivity score and ' +
                              'summary information ' +
                              '(defaults: $INPUT_FILE.score)'))
    parser.add_argument('--split_lines', dest="split_lines",
                        action="store_true", default=False,
                        help=("Do not join sentences of a document " +
                              " (use only if every line in the document " +
                              "is already line in the document " +
                              "(e.g. a raw text file) " +
                              "(defaults: %(default)s)"))
    parser.add_argument('--verbose', dest="verbose",
                        action="store_true", default=False,
                        help=("Store all entities in json " +
                              "(defaults: %(default)s)"))
    parser.add_argument('--dump', dest="dump",
                        action="store_true", default=False,
                        help=("Dump information to stdout instead of file" +
                              "(defaults: %(default)s"))
    params = parser.parse_args()
    if params.output_entity_file is None:
        params.output_entity_file = "{}{}".format(params.input_file, ".entity")
    if params.output_score_file is None:
        params.output_score_file = "{}{}".format(params.input_file, ".score")

    faro_execute(params)