def test_get_durations():
    """
    Test durations.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_durations,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (unit, float(duration_units), float(duration_days))
            for unit, duration_units, duration_days in expected
        ])
def test_get_durations_source():
    """
    Test durations with source.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_durations,
        return_sources=True,
        expected_data_converter=lambda expected: [
            (unit, float(duration_units), float(duration_days), source)
            for unit, duration_units, duration_days, source in expected
        ])
Ejemplo n.º 3
0
def test_get_ratios():
    """
    Test ratio extraction.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_ratios,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (float(numerator) if numerator else None, float(consequent)
             if consequent else None, float(decimal) if decimal else None)
            for numerator, consequent, decimal in expected
        ])
Ejemplo n.º 4
0
def test_get_money_source():
    """
    Test money extraction with source.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_money,
        return_sources=True,
        expected_data_converter=lambda expected: [
            (float(amount) if amount else None, currency, source)
            for amount, currency, source in expected
            if amount or currency or source
        ])
Ejemplo n.º 5
0
def test_get_percents_source():
    """
    Test get percent behavior with source return.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_percents, return_sources=True,
                                                   expected_data_converter=lambda expected:
                                                   [(unit,
                                                     float(value_units) if value_units else None,
                                                     float(value_decimal) if value_decimal else None,
                                                     source)
                                                    for unit, value_units, value_decimal, source in expected
                                                    if unit or value_units or value_decimal or source])
def test_geoentities_alias_filtering():
    prepared_alias_blacklist = prepare_alias_blacklist_dict([
        ('Afghanistan', None, False), ('Mississippi', 'en', False),
        ('AL', 'en', True)
    ])
    lexnlp_tests.test_extraction_func_on_test_data(
        get_geoentities,
        geo_config_list=_CONFIG,
        prepared_alias_black_list=prepared_alias_blacklist,
        actual_data_converter=lambda actual:
        [get_entity_name(c[0]) for c in actual],
        debug_print=True,
        start_from_csv_line=6)
Ejemplo n.º 7
0
def test_geoentities_alias_filtering():
    prepared_alias_banlist = prepare_alias_banlist_dict([
        AliasBanRecord('Afghanistan', None, False),
        AliasBanRecord('Mississippi', 'en', False),
        AliasBanRecord('AL', 'en', True)
    ])
    lexnlp_tests.test_extraction_func_on_test_data(
        get_geoentities_routine,
        geo_config_list=_CONFIG,
        prepared_alias_ban_list=prepared_alias_banlist,
        actual_data_converter=lambda actual: [c[0].name for c in actual],
        debug_print=True,
        start_from_csv_line=6)
Ejemplo n.º 8
0
def test_get_distance():
    """
    Test distance extraction.
    :return:
    """
    # TODO: Do we need this separate method? test_get_distance_source()
    #   ... tests both distances and sources
    lexnlp_tests.test_extraction_func_on_test_data(
        func=get_distances,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (Decimal(distance), units) for distance, units in expected
        ])
Ejemplo n.º 9
0
def test_courts():
    court_df = pandas \
        .read_csv("https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts"
                  ".csv")

    # Create config objects
    court_config_list = []
    for _, row in court_df.iterrows():
        court_config_list.append(build_dictionary_entry(row))
    lexnlp_tests.test_extraction_func_on_test_data(
        get_courts,
        court_config_list=court_config_list,
        actual_data_converter=lambda actual: [cc[0].name for cc in actual])
Ejemplo n.º 10
0
def test_get_ratios_source():
    """
    Test ratio extraction with source.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        func=get_ratios,
        return_sources=True,
        expected_data_converter=lambda expected: [(
            Decimal(numerator) if numerator else None,
            Decimal(consequent) if consequent else None,
            Decimal(decimal) if decimal else None,
            source,
        ) for numerator, consequent, decimal, source in expected])
Ejemplo n.º 11
0
    def test_get_regulations_csv(self):
        """
        Test default get regulations behavior.
        :return:
        """
        test_data_path = os.path.join(
            lexnlp_test_path,
            'lexnlp/extract/en/tests/test_regulations/test_get_regulations.csv'
        )
        lexnlp_tests.test_extraction_func_on_test_data(
            get_regulations,
            expected_data_converter=lambda d: [
                (reg_type, reg_code) for reg_type, reg_code, _reg_str in d
            ],
            return_source=False,
            as_dict=False,
            test_data_path=test_data_path)
        lexnlp_tests.test_extraction_func_on_test_data(
            get_regulations,
            expected_data_converter=lambda d: [(reg_type, reg_code, reg_str)
                                               for reg_type, reg_code, reg_str
                                               in d],
            return_source=True,
            as_dict=False,
            test_data_path=test_data_path)

        cmp = DictionaryComparer(check_order=True)
        errors = []

        for (i, text, _input_args, expected) in \
                lexnlp_tests.iter_test_data_text_and_tuple(file_name=test_data_path):
            expected = [{
                'regulation_type': reg_type,
                'regulation_code': reg_code,
                'regulation_text': reg_str
            } for reg_type, reg_code, reg_str in expected]
            actual = list(
                lexnlp_tests.benchmark_extraction_func(get_regulations,
                                                       text,
                                                       return_source=True,
                                                       as_dict=True))

            line_errors = cmp.compare_list_of_dicts(expected, actual)
            if line_errors:
                line_errors_str = '\n'.join(line_errors)
                errors.append(f'Regulation tests, line [{i + 1}] errors:\n' +
                              line_errors_str)

        if errors:
            raise Exception('\n\n'.join(errors))
Ejemplo n.º 12
0
def test_get_percents():
    """
    Test default get percent behavior.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(
        get_percents,
        return_sources=False,
        expected_data_converter=lambda expected: [
            (unit, float(value_units)
             if value_units else None, float(value_decimal)
             if value_decimal else None)
            for unit, value_units, value_decimal in expected
            if unit or value_units or value_decimal
        ])
def test_get_regulations():
    """
    Test default get regulations behavior.
    :return:
    """

    lexnlp_tests.test_extraction_func_on_test_data(
        get_regulations,
        expected_data_converter=lambda d: [
            (reg_type, reg_code) for reg_type, reg_code, _reg_str in d
        ],
        return_source=False,
        as_dict=False)
    lexnlp_tests.test_extraction_func_on_test_data(
        get_regulations,
        expected_data_converter=lambda d: [
            (reg_type, reg_code, reg_str) for reg_type, reg_code, reg_str in d
        ],
        return_source=True,
        as_dict=False)

    # TODO Impl test_extraction_func_on_test_data() comparing lists of dicts
    for (_i, text, _input_args,
         expected) in lexnlp_tests.iter_test_data_text_and_tuple():
        expected_no_source_dict = [{
            'regulation_type': reg_type,
            'regulation_code': reg_code
        } for reg_type, reg_code, _reg_str in expected]
        expected_source_dict = [{
            'regulation_type': reg_type,
            'regulation_code': reg_code,
            'regulation_str': reg_str
        } for reg_type, reg_code, reg_str in expected]
        assert_list_equal(
            list(
                lexnlp_tests.benchmark_extraction_func(get_regulations,
                                                       text,
                                                       return_source=False,
                                                       as_dict=True)),
            expected_no_source_dict)
        assert_list_equal(
            list(
                lexnlp_tests.benchmark_extraction_func(get_regulations,
                                                       text,
                                                       return_source=True,
                                                       as_dict=True)),
            expected_source_dict)
Ejemplo n.º 14
0
def test_courts_longest_match():
    """
    Tests the case when there are courts having names/aliases being one a substring of another.
    In such case the court having longest alias should be returned for each conflicting matching.
    But for the case when there is another match of the court having shorter alias in that conflict,
    they both should be returned.
    :return:
    """
    courts_config_fn = os.path.join(os.path.dirname(lexnlp_tests.this_test_data_path()), 'us_courts.csv')
    courts_config_list = []
    with open(courts_config_fn, 'r', encoding='utf8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            cc = entity_config(row['Court ID'], row['Court Type'] + '|' + row['Court Name'], 0,
                               row['Alias'].split(';') if row['Alias'] else [],
                               name_is_alias=False)
            add_alias_to_entity(cc, row['Court Name'])

            courts_config_list.append(cc)
    lexnlp_tests.test_extraction_func_on_test_data(get_courts, court_config_list=courts_config_list,
                                                   actual_data_converter=lambda actual:
                                                   [tuple(c[0][1].split('|')) for c in actual],
                                                   debug_print=True)
Ejemplo n.º 15
0
def test_courts():
    """
    Test court extraction.
    :return:
    """

    # Read master data
    import pandas

    # Load court data
    court_df = pandas \
        .read_csv("https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts"
                  ".csv")

    # Create config objects
    court_config_list = []
    for _, row in court_df.iterrows():
        c = entity_config(row["Court ID"], row["Court Name"], 0,
                          row["Alias"].split(";") if not pandas.isnull(row["Alias"]) else [])
        court_config_list.append(c)

    lexnlp_tests.test_extraction_func_on_test_data(get_courts, court_config_list=court_config_list,
                                                   actual_data_converter=lambda actual:
                                                   [cc[0][1] for cc in actual])
Ejemplo n.º 16
0
 def test_normalize_text(self):
     lexnlp_tests.test_extraction_func_on_test_data(
         normalize_text,
         actual_data_converter=lambda text: (text, ),
         debug_print=True)
Ejemplo n.º 17
0
def test_trademarks():
    lexnlp_tests.test_extraction_func_on_test_data(get_trademarks)
Ejemplo n.º 18
0
def test_copyright():
    lexnlp_tests.test_extraction_func_on_test_data(get_copyright,
                                                   return_sources=True)
Ejemplo n.º 19
0
def test_stems_lowercase():
    lexnlp_tests.test_extraction_func_on_test_data(get_stem_list,
                                                   lowercase=True)
Ejemplo n.º 20
0
def test_stems():
    lexnlp_tests.test_extraction_func_on_test_data(
        get_stem_list,
        expected_data_converter=lambda stems: list(stem.lower()
                                                   for stem in stems)
        if stems else None)
Ejemplo n.º 21
0
def test_adverbs_lemma():
    lexnlp_tests.test_extraction_func_on_test_data(get_adverbs, lemmatize=True)
Ejemplo n.º 22
0
def test_adverbs():
    lexnlp_tests.test_extraction_func_on_test_data(get_adverbs)
Ejemplo n.º 23
0
def test_adjectives():
    lexnlp_tests.test_extraction_func_on_test_data(get_adjectives)
Ejemplo n.º 24
0
def test_nouns():
    lexnlp_tests.test_extraction_func_on_test_data(get_nouns)
Ejemplo n.º 25
0
def test_lemmas_sw():
    lexnlp_tests.test_extraction_func_on_test_data(get_lemma_list,
                                                   stopword=True)
    lexnlp_tests.test_extraction_func_on_test_data(get_lemmas, stopword=True)
Ejemplo n.º 26
0
def test_gpes():
    """
    Test get_geopolitical methods.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_geopolitical)
Ejemplo n.º 27
0
def test_persons():
    """
    Test get_persons methods.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_persons)
Ejemplo n.º 28
0
def test_lemmas():
    lexnlp_tests.test_extraction_func_on_test_data(get_lemma_list)
    lexnlp_tests.test_extraction_func_on_test_data(get_lemmas)
Ejemplo n.º 29
0
def test_noun_phrases():
    """
    Test get_noun_phrases methods.
    :return:
    """
    lexnlp_tests.test_extraction_func_on_test_data(get_noun_phrases)
Ejemplo n.º 30
0
def test_lemmas_lc():
    # Snowball returns lowercase always
    lexnlp_tests.test_extraction_func_on_test_data(get_lemma_list,
                                                   lowercase=True)
    lexnlp_tests.test_extraction_func_on_test_data(get_lemmas, lowercase=True)