def test_minpair(unspecified_test_corpus):

    calls = [({'segment_pairs':[('s','ʃ')],
                    'relative_count':True},0.125),
            ({'segment_pairs':[('s','ʃ')],
                    'relative_count':False},1),
            ({'segment_pairs':[('m','n')],
                    'relative_count':True},0.11111),
            ({'segment_pairs':[('m','n')],
                    'relative_count':False},1),
            ({'segment_pairs':[('e','o')],
                    'relative_count':True},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':False},0),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':True},0.14286),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':False},2),]

    with CanonicalVariantContext(unspecified_test_corpus,
                                'transcription', 'type') as c:
        for kwargs, v in calls:
            print(kwargs)
            assert(abs(minpair_fl(c, **kwargs)[0]-v) < 0.0001)

    calls = [({'segment_pairs':[('s','ʃ')],
                    'relative_count':True},0.14286),
            ({'segment_pairs':[('s','ʃ')],
                    'relative_count':False},1),
            ({'segment_pairs':[('m','n')],
                    'relative_count':True},0),
            ({'segment_pairs':[('m','n')],
                    'relative_count':False},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':True},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':False},0),

            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':True},0.09091),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':False},1)]

    with CanonicalVariantContext(unspecified_test_corpus,
                                'transcription', 'type', frequency_threshold = 3) as c:
        for kwargs, v in calls:
            print(kwargs)
            assert(abs(minpair_fl(c, **kwargs)[0]-v) < 0.0001)
Example #2
0
def test_minpair(unspecified_test_corpus):

    calls = [({'segment_pairs':[('s','ʃ')],
                    'relative_count':True},0.125),
            ({'segment_pairs':[('s','ʃ')],
                    'relative_count':False},1),
            ({'segment_pairs':[('m','n')],
                    'relative_count':True},0.11111),
            ({'segment_pairs':[('m','n')],
                    'relative_count':False},1),
            ({'segment_pairs':[('e','o')],
                    'relative_count':True},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':False},0),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':True},0.14286),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':False},2),]

    with CanonicalVariantContext(unspecified_test_corpus,
                                'transcription', 'type') as c:
        for kwargs, v in calls:
            print(kwargs)
            assert(abs(minpair_fl(c, **kwargs)[0]-v) < 0.0001)

    calls = [({'segment_pairs':[('s','ʃ')],
                    'relative_count':True},0.14286),
            ({'segment_pairs':[('s','ʃ')],
                    'relative_count':False},1),
            ({'segment_pairs':[('m','n')],
                    'relative_count':True},0),
            ({'segment_pairs':[('m','n')],
                    'relative_count':False},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':True},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':False},0),

            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':True},0.09091),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':False},1)]

    with CanonicalVariantContext(unspecified_test_corpus,
                                'transcription', 'type', frequency_threshold = 3) as c:
        for kwargs, v in calls:
            print(kwargs)
            assert(abs(minpair_fl(c, **kwargs)[0]-v) < 0.0001)
def test_minimal_pair_wordtokens(unspecified_discourse_corpus):
    corpus = unspecified_discourse_corpus.lexicon

    calls = [({'segment_pairs':[('s','ʃ')],
                    'relative_count':True},0.125),
            ({'segment_pairs':[('s','ʃ')],
                    'relative_count':False},1),
            ({'segment_pairs':[('m','n')],
                    'relative_count':True},0.11111),
            ({'segment_pairs':[('m','n')],
                    'relative_count':False},1),
            ({'segment_pairs':[('e','o')],
                    'relative_count':True},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':False},0),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':True},0.14286),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':False},2),]
    with MostFrequentVariantContext(corpus, 'transcription', 'type') as c:
        for kwargs, v in calls:
            assert(abs(minpair_fl(c, **kwargs)[0]-v) < 0.0001)

    calls = [({ 'segment_pairs':[('s','ʃ')],
                    'relative_count':True},0.14286),
            ({'segment_pairs':[('s','ʃ')],
                    'relative_count':False},1),
            ({'segment_pairs':[('m','n')],
                    'relative_count':True},0),
            ({'segment_pairs':[('m','n')],
                    'relative_count':False},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':True},0),
            ({'segment_pairs':[('e','o')],
                    'relative_count':False},0),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':True},0.09091),
            ({'segment_pairs':[('s','ʃ'),
                                    ('m','n'),
                                    ('e','o')],
                    'relative_count':False},1)]
    with MostFrequentVariantContext(corpus,
            'transcription', 'type',frequency_threshold = 3) as c:
        for kwargs, v in calls:
            assert(abs(minpair_fl(c, **kwargs)[0]-v) < 0.0001)
Example #4
0
    def run(self):
        if self.name == 'functional_load':
            try:
                results = minpair_fl(self.kwargs['corpus'],
                                     self.kwargs['segment_pair'],
                                     stop_check=self.kwargs['stop_check'],
                                     call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'string_similarity':
            try:
                results = string_similarity(
                    self.kwargs['corpus'],
                    self.kwargs['query'],
                    self.kwargs['algorithm'],
                    stop_check=self.kwargs['stop_check'],
                    call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'phonotactic_probability':
            try:
                results = phonotactic_probability_vitevitch(
                    self.kwargs['corpus'],
                    self.kwargs['query'],
                    self.kwargs['sequence_type'],
                    probability_type=self.kwargs['probability_type'],
                    stop_check=self.kwargs['stop_check'],
                    call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'kullback_leibler':
            try:
                results = KullbackLeibler(self.kwargs['corpus'],
                                          self.kwargs['seg1'],
                                          self.kwargs['seg2'],
                                          self.kwargs['side'],
                                          stop_check=self.kwargs['stop_check'],
                                          call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        else:
            raise UnLuckyException(
                'No analysis function called {} could be found'.format(
                    self.name))
Example #5
0
    def run(self):
        if self.name == 'functional_load':
            try:
                results = minpair_fl(self.kwargs['corpus'], self.kwargs['segment_pair'],
                                     stop_check = self.kwargs['stop_check'],
                                     call_back = self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name,e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'string_similarity':
            try:
                results = string_similarity(self.kwargs['corpus'], self.kwargs['query'], self.kwargs['algorithm'],
                                                        stop_check = self.kwargs['stop_check'],
                                                            call_back = self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name,e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'phonotactic_probability':
            try:
                results = phonotactic_probability_vitevitch(self.kwargs['corpus'],self.kwargs['query'],
                                                            self.kwargs['sequence_type'],
                                                            probability_type=self.kwargs['probability_type'],
                                                            stop_check = self.kwargs['stop_check'],
                                                            call_back = self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name,e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'kullback_leibler':
            try:
                results = KullbackLeibler(self.kwargs['corpus'], self.kwargs['seg1'], self.kwargs['seg2'], self.kwargs['side'],
                                          stop_check = self.kwargs['stop_check'],
                                            call_back = self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name,e)
                self.errorEncountered.emit(message)
                return

        else:
            raise UnLuckyException('No analysis function called {} could be found'.format(self.name))
Example #6
0
def test_minimal_pair_wordtokens(unspecified_discourse_corpus):
    corpus = unspecified_discourse_corpus.lexicon

    calls = [
        ({
            'segment_pairs': [('s', 'ʃ')],
            'relative_count': True
        }, 0.125),
        ({
            'segment_pairs': [('s', 'ʃ')],
            'relative_count': False
        }, 1),
        ({
            'segment_pairs': [('m', 'n')],
            'relative_count': True
        }, 0.11111),
        ({
            'segment_pairs': [('m', 'n')],
            'relative_count': False
        }, 1),
        ({
            'segment_pairs': [('e', 'o')],
            'relative_count': True
        }, 0),
        ({
            'segment_pairs': [('e', 'o')],
            'relative_count': False
        }, 0),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')],
            'relative_count': True
        }, 0.14286),
        ({
            'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')],
            'relative_count': False
        }, 2),
    ]
    with MostFrequentVariantContext(corpus, 'transcription', 'type') as c:
        for kwargs, v in calls:
            assert (abs(minpair_fl(c, **kwargs)[0] - v) < 0.0001)

    calls = [({
        'segment_pairs': [('s', 'ʃ')],
        'relative_count': True
    }, 0.14286), ({
        'segment_pairs': [('s', 'ʃ')],
        'relative_count': False
    }, 1), ({
        'segment_pairs': [('m', 'n')],
        'relative_count': True
    }, 0), ({
        'segment_pairs': [('m', 'n')],
        'relative_count': False
    }, 0), ({
        'segment_pairs': [('e', 'o')],
        'relative_count': True
    }, 0), ({
        'segment_pairs': [('e', 'o')],
        'relative_count': False
    }, 0),
             ({
                 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')],
                 'relative_count': True
             }, 0.09091),
             ({
                 'segment_pairs': [('s', 'ʃ'), ('m', 'n'), ('e', 'o')],
                 'relative_count': False
             }, 1)]
    with MostFrequentVariantContext(corpus,
                                    'transcription',
                                    'type',
                                    frequency_threshold=3) as c:
        for kwargs, v in calls:
            assert (abs(minpair_fl(c, **kwargs)[0] - v) < 0.0001)