def test_deltah(unspecified_test_corpus):
    type_calls = [({'segment_pairs':[('s','ʃ')]},0.13333),
            ({'segment_pairs':[('m','n')]},0.13333),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.26667),]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c:
        for kwargs, v in type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    type_calls = [({'segment_pairs':[('s','ʃ')]},0.16667),
            ({'segment_pairs':[('m','n')]},0),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.16667)]

    with CanonicalVariantContext(unspecified_test_corpus,
                            'transcription', 'type', frequency_threshold = 3) as c:
        for kwargs, v in type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    token_calls = [({'segment_pairs':[('s','ʃ')]},0.24794),
            ({'segment_pairs':[('m','n')]},0.00691),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.25485),]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token') as c:
        for kwargs, v in token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    token_calls = [({'segment_pairs':[('s','ʃ')]},0.25053),
            ({'segment_pairs':[('m','n')]},0),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.25053),]

    with CanonicalVariantContext(unspecified_test_corpus,
                            'transcription', 'token', frequency_threshold = 3) as c:
        for kwargs, v in token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)
Exemple #2
0
def test_deltah(unspecified_test_corpus):
    type_calls = [({'segment_pairs':[('s','ʃ')]},0.02547695),
            ({'segment_pairs':[('m','n')]},0.02547695),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.05284),]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c:
        for kwargs, v in type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    type_calls = [({'segment_pairs':[('s','ʃ')], 'prevent_normalization':True},0.09953567),
            ({'segment_pairs':[('m','n')], 'prevent_normalization':True},0.09953567),
            ({'segment_pairs':[('e','o')], 'prevent_normalization':True},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')], 'prevent_normalization':True},0.206450877),]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c:
        for kwargs, v in type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)


    type_calls = [({'segment_pairs':[('s','ʃ')]},0.035015954),
            ({'segment_pairs':[('m','n')]},0),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.035015954)]

    with CanonicalVariantContext(unspecified_test_corpus,
                            'transcription', 'type', frequency_threshold = 3) as c:
        for kwargs, v in type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    token_calls = [({'segment_pairs':[('s','ʃ')]},0.08305),
            ({'segment_pairs':[('m','n')]},0.002314),
            ({'segment_pairs':[('e','o')]},0.0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.0853641),]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token') as c:
        for kwargs, v in token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)
def test_deltah_wordtokens(unspecified_discourse_corpus):
    corpus = unspecified_discourse_corpus.lexicon
    frequent_type_calls = [({'segment_pairs':[('s','ʃ')]},0.13333),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.26667),]
    with MostFrequentVariantContext(corpus, 'transcription', 'type') as c:
        for kwargs, v in frequent_type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    frequent_type_calls = [({'segment_pairs':[('m','n')]},0),]
    with MostFrequentVariantContext(corpus,
                    'transcription', 'type', frequency_threshold = 3) as c:
        for kwargs, v in frequent_type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    frequent_token_calls = [({'segment_pairs':[('s','ʃ')]},0.24794),
            ({'segment_pairs':[('e','o')]},0),
            ({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.25485),]
    with MostFrequentVariantContext(corpus, 'transcription', 'token') as c:
        for kwargs, v in frequent_token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    frequent_token_calls = [({'segment_pairs':[('m','n')]},0),]
    with MostFrequentVariantContext(corpus,
                    'transcription', 'token', frequency_threshold = 3) as c:
        for kwargs, v in frequent_token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    count_token_calls = [({'segment_pairs':[('s','ʃ'),
                                ('m','n'),
                                ('e','o')]},0.25483),
            ({'segment_pairs':[('m','n')]},0.00691),]
    with SeparatedTokensVariantContext(corpus, 'transcription', 'token') as c:
        for kwargs, v in count_token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    count_token_calls = [({'segment_pairs':[('s','ʃ')]},0.25053),
            ({'segment_pairs':[('e','o')]},0),]
    with SeparatedTokensVariantContext(corpus,
                    'transcription', 'token', frequency_threshold = 3) as c:
        for kwargs, v in count_token_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    relative_type_calls = [({'segment_pairs':[('m','n')]},0.13333),]
    with WeightedVariantContext(corpus, 'transcription', 'type') as c:
        for kwargs, v in relative_type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)

    relative_type_calls = [({'segment_pairs':[('s','ʃ'),
                                        ('m','n'),
                                        ('e','o')]},0.16667),
                    ({'segment_pairs':[('s','ʃ')]},0.16667),
                    ({'segment_pairs':[('e','o')]},0),]
    with WeightedVariantContext(corpus,
                    'transcription', 'type', frequency_threshold = 3) as c:
        for kwargs, v in relative_type_calls:
            assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)
Exemple #4
0
def test_deltah_wordtokens(unspecified_discourse_corpus):
    corpus = unspecified_discourse_corpus.lexicon
    frequent_type_calls = [
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.13333),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.26667),
    ]
    with MostFrequentVariantContext(corpus, 'transcription', 'type') as c:
        for kwargs, v in frequent_type_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    frequent_type_calls = [
        ({
            'segment_pairs': [('m', 'n')]
        }, 0),
    ]
    with MostFrequentVariantContext(corpus,
                                    'transcription',
                                    'type',
                                    frequency_threshold=3) as c:
        for kwargs, v in frequent_type_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    frequent_token_calls = [
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.24794),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.25485),
    ]
    with MostFrequentVariantContext(corpus, 'transcription', 'token') as c:
        for kwargs, v in frequent_token_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    frequent_token_calls = [
        ({
            'segment_pairs': [('m', 'n')]
        }, 0),
    ]
    with MostFrequentVariantContext(corpus,
                                    'transcription',
                                    'token',
                                    frequency_threshold=3) as c:
        for kwargs, v in frequent_token_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    count_token_calls = [
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.25483),
        ({
            'segment_pairs': [('m', 'n')]
        }, 0.00691),
    ]
    with SeparatedTokensVariantContext(corpus, 'transcription', 'token') as c:
        for kwargs, v in count_token_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    count_token_calls = [
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.25053),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
    ]
    with SeparatedTokensVariantContext(corpus,
                                       'transcription',
                                       'token',
                                       frequency_threshold=3) as c:
        for kwargs, v in count_token_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    relative_type_calls = [
        ({
            'segment_pairs': [('m', 'n')]
        }, 0.13333),
    ]
    with WeightedVariantContext(corpus, 'transcription', 'type') as c:
        for kwargs, v in relative_type_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    relative_type_calls = [
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.16667),
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.16667),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
    ]
    with WeightedVariantContext(corpus,
                                'transcription',
                                'type',
                                frequency_threshold=3) as c:
        for kwargs, v in relative_type_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)
Exemple #5
0
def test_deltah(unspecified_test_corpus):
    type_calls = [
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.13333),
        ({
            'segment_pairs': [('m', 'n')]
        }, 0.13333),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.26667),
    ]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription',
                                 'type') as c:
        for kwargs, v in type_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    type_calls = [({
        'segment_pairs': [('s', 'ʃ')]
    }, 0.16667), ({
        'segment_pairs': [('m', 'n')]
    }, 0), ({
        'segment_pairs': [('e', 'o')]
    }, 0), ({
        'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
    }, 0.16667)]

    with CanonicalVariantContext(unspecified_test_corpus,
                                 'transcription',
                                 'type',
                                 frequency_threshold=3) as c:
        for kwargs, v in type_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    token_calls = [
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.24794),
        ({
            'segment_pairs': [('m', 'n')]
        }, 0.00691),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.25485),
    ]

    with CanonicalVariantContext(unspecified_test_corpus, 'transcription',
                                 'token') as c:
        for kwargs, v in token_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)

    token_calls = [
        ({
            'segment_pairs': [('s', 'ʃ')]
        }, 0.25053),
        ({
            'segment_pairs': [('m', 'n')]
        }, 0),
        ({
            'segment_pairs': [('e', 'o')]
        }, 0),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')]
        }, 0.25053),
    ]

    with CanonicalVariantContext(unspecified_test_corpus,
                                 'transcription',
                                 'token',
                                 frequency_threshold=3) as c:
        for kwargs, v in token_calls:
            assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)