def test_deltah(unspecified_test_corpus): type_calls = [({'segment_pairs':[('s','ʃ')]},0.13333), ({'segment_pairs':[('m','n')]},0.13333), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.26667),] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: for kwargs, v in type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) type_calls = [({'segment_pairs':[('s','ʃ')]},0.16667), ({'segment_pairs':[('m','n')]},0), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.16667)] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type', frequency_threshold = 3) as c: for kwargs, v in type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) token_calls = [({'segment_pairs':[('s','ʃ')]},0.24794), ({'segment_pairs':[('m','n')]},0.00691), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.25485),] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token') as c: for kwargs, v in token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) token_calls = [({'segment_pairs':[('s','ʃ')]},0.25053), ({'segment_pairs':[('m','n')]},0), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.25053),] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token', frequency_threshold = 3) as c: for kwargs, v in token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)
def test_deltah(unspecified_test_corpus): type_calls = [({'segment_pairs':[('s','ʃ')]},0.02547695), ({'segment_pairs':[('m','n')]},0.02547695), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.05284),] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: for kwargs, v in type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) type_calls = [({'segment_pairs':[('s','ʃ')], 'prevent_normalization':True},0.09953567), ({'segment_pairs':[('m','n')], 'prevent_normalization':True},0.09953567), ({'segment_pairs':[('e','o')], 'prevent_normalization':True},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')], 'prevent_normalization':True},0.206450877),] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: for kwargs, v in type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) type_calls = [({'segment_pairs':[('s','ʃ')]},0.035015954), ({'segment_pairs':[('m','n')]},0), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.035015954)] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type', frequency_threshold = 3) as c: for kwargs, v in type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) token_calls = [({'segment_pairs':[('s','ʃ')]},0.08305), ({'segment_pairs':[('m','n')]},0.002314), ({'segment_pairs':[('e','o')]},0.0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.0853641),] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token') as c: for kwargs, v in token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)
def test_deltah_wordtokens(unspecified_discourse_corpus): corpus = unspecified_discourse_corpus.lexicon frequent_type_calls = [({'segment_pairs':[('s','ʃ')]},0.13333), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.26667),] with MostFrequentVariantContext(corpus, 'transcription', 'type') as c: for kwargs, v in frequent_type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) frequent_type_calls = [({'segment_pairs':[('m','n')]},0),] with MostFrequentVariantContext(corpus, 'transcription', 'type', frequency_threshold = 3) as c: for kwargs, v in frequent_type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) frequent_token_calls = [({'segment_pairs':[('s','ʃ')]},0.24794), ({'segment_pairs':[('e','o')]},0), ({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.25485),] with MostFrequentVariantContext(corpus, 'transcription', 'token') as c: for kwargs, v in frequent_token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) frequent_token_calls = [({'segment_pairs':[('m','n')]},0),] with MostFrequentVariantContext(corpus, 'transcription', 'token', frequency_threshold = 3) as c: for kwargs, v in frequent_token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) count_token_calls = [({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.25483), ({'segment_pairs':[('m','n')]},0.00691),] with SeparatedTokensVariantContext(corpus, 'transcription', 'token') as c: for kwargs, v in count_token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) count_token_calls = [({'segment_pairs':[('s','ʃ')]},0.25053), ({'segment_pairs':[('e','o')]},0),] with SeparatedTokensVariantContext(corpus, 'transcription', 'token', frequency_threshold = 3) as c: for kwargs, v in count_token_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) relative_type_calls = [({'segment_pairs':[('m','n')]},0.13333),] with WeightedVariantContext(corpus, 'transcription', 'type') as c: for kwargs, v in relative_type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001) relative_type_calls = [({'segment_pairs':[('s','ʃ'), ('m','n'), ('e','o')]},0.16667), ({'segment_pairs':[('s','ʃ')]},0.16667), ({'segment_pairs':[('e','o')]},0),] with WeightedVariantContext(corpus, 'transcription', 'type', frequency_threshold = 3) as c: for kwargs, v in relative_type_calls: assert(abs(deltah_fl(c, **kwargs)-v) < 0.0001)
def test_deltah_wordtokens(unspecified_discourse_corpus): corpus = unspecified_discourse_corpus.lexicon frequent_type_calls = [ ({ 'segment_pairs': [('s', 'ʃ')] }, 0.13333), ({ 'segment_pairs': [('e', 'o')] }, 0), ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.26667), ] with MostFrequentVariantContext(corpus, 'transcription', 'type') as c: for kwargs, v in frequent_type_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) frequent_type_calls = [ ({ 'segment_pairs': [('m', 'n')] }, 0), ] with MostFrequentVariantContext(corpus, 'transcription', 'type', frequency_threshold=3) as c: for kwargs, v in frequent_type_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) frequent_token_calls = [ ({ 'segment_pairs': [('s', 'ʃ')] }, 0.24794), ({ 'segment_pairs': [('e', 'o')] }, 0), ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.25485), ] with MostFrequentVariantContext(corpus, 'transcription', 'token') as c: for kwargs, v in frequent_token_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) frequent_token_calls = [ ({ 'segment_pairs': [('m', 'n')] }, 0), ] with MostFrequentVariantContext(corpus, 'transcription', 'token', frequency_threshold=3) as c: for kwargs, v in frequent_token_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) count_token_calls = [ ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.25483), ({ 'segment_pairs': [('m', 'n')] }, 0.00691), ] with SeparatedTokensVariantContext(corpus, 'transcription', 'token') as c: for kwargs, v in count_token_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) count_token_calls = [ ({ 'segment_pairs': [('s', 'ʃ')] }, 0.25053), ({ 'segment_pairs': [('e', 'o')] }, 0), ] with SeparatedTokensVariantContext(corpus, 'transcription', 'token', frequency_threshold=3) as c: for kwargs, v in count_token_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) relative_type_calls = [ ({ 'segment_pairs': [('m', 'n')] }, 0.13333), ] with WeightedVariantContext(corpus, 'transcription', 'type') as c: for kwargs, v in relative_type_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) relative_type_calls = [ ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.16667), ({ 'segment_pairs': [('s', 'ʃ')] }, 0.16667), ({ 'segment_pairs': [('e', 'o')] }, 0), ] with WeightedVariantContext(corpus, 'transcription', 'type', frequency_threshold=3) as c: for kwargs, v in relative_type_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)
def test_deltah(unspecified_test_corpus): type_calls = [ ({ 'segment_pairs': [('s', 'ʃ')] }, 0.13333), ({ 'segment_pairs': [('m', 'n')] }, 0.13333), ({ 'segment_pairs': [('e', 'o')] }, 0), ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.26667), ] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: for kwargs, v in type_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) type_calls = [({ 'segment_pairs': [('s', 'ʃ')] }, 0.16667), ({ 'segment_pairs': [('m', 'n')] }, 0), ({ 'segment_pairs': [('e', 'o')] }, 0), ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.16667)] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type', frequency_threshold=3) as c: for kwargs, v in type_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) token_calls = [ ({ 'segment_pairs': [('s', 'ʃ')] }, 0.24794), ({ 'segment_pairs': [('m', 'n')] }, 0.00691), ({ 'segment_pairs': [('e', 'o')] }, 0), ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.25485), ] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token') as c: for kwargs, v in token_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001) token_calls = [ ({ 'segment_pairs': [('s', 'ʃ')] }, 0.25053), ({ 'segment_pairs': [('m', 'n')] }, 0), ({ 'segment_pairs': [('e', 'o')] }, 0), ({ 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')] }, 0.25053), ] with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'token', frequency_threshold=3) as c: for kwargs, v in token_calls: assert (abs(deltah_fl(c, **kwargs) - v) < 0.0001)