def test_prod_wordtokens_type(specified_discourse_corpus):
    seg1 = 's'
    seg2 = 'ʃ'
    expected = {
        "-voc": 0.0,
        "+voc,+high": 0.863120568566631,
        "+voc,-high": 0.9852281360342515,
        "#": 0.0
    }
    env_list = []
    expected_envs = {}
    for k, v in expected.items():
        if k != '#':
            segs = specified_discourse_corpus.lexicon.features_to_segments(k)
        else:
            segs = k
        env = EnvironmentFilter(['s', 'ʃ'], None, [segs])
        env_list.append(env)
        expected_envs[env] = v
    expected_envs["AVG"] = 0.9241743523004413

    type_or_token = 'type'
    tier = 'transcription'
    with MostFrequentVariantContext(specified_discourse_corpus.lexicon, tier,
                                    type_or_token) as c:
        result = calc_prod(c, env_list, all_info=False)
    for k, v in result.items():
        assert (expected_envs[k] - v < 0.001)

    with WeightedVariantContext(specified_discourse_corpus.lexicon, tier,
                                type_or_token) as c:
        result = calc_prod(c, env_list, all_info=False)
    for k, v in result.items():
        assert (expected_envs[k] - v < 0.001)
def test_prod_wordtokens_type(specified_discourse_corpus):
    seg1 = 's'
    seg2 = 'ʃ'
    expected = {"-voc":0.0,
                "+voc,+high":0.863120568566631,
                "+voc,-high":0.9852281360342515,
                "#":0.0}
    env_list = []
    expected_envs = {}
    for k, v in expected.items():
        if k != '#':
            segs = specified_discourse_corpus.lexicon.features_to_segments(k)
        else:
            segs = k
        env = EnvironmentFilter(['s', 'ʃ'], None, [segs])
        env_list.append(env)
        expected_envs[env] = v
    expected_envs["AVG"] = 0.9241743523004413

    type_or_token = 'type'
    tier = 'transcription'
    with MostFrequentVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c:
        result = calc_prod(c, env_list, all_info=False)
    for k,v in result.items():
        assert(expected_envs[k]-v < 0.001)

    with WeightedVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c:
        result = calc_prod(c, env_list, all_info=False)
    for k,v in result.items():
        assert(expected_envs[k]-v < 0.001)
def test_prod_wordtokens_token(specified_discourse_corpus):
    seg1 = 's'
    seg2 = 'ʃ'
    expected = {"-voc":0.0,
                "+voc,+high":0.8631205, #0.9321115676166747,          #Error!!!?!?!?
                "+voc,-high":0.9660096062568557,
                "#":0.0}
    env_list = []
    expected_envs = {}
    for k, v in expected.items():
        if k != '#':
            segs = specified_discourse_corpus.lexicon.features_to_segments(k)
        else:
            segs = k
        env = EnvironmentFilter(['s', 'ʃ'], None, [segs])
        env_list.append(env)
        expected_envs[env] = v
    expected_envs["AVG"] = 0.9241743523004413

    type_or_token = 'token'
    tier = 'transcription'

    with MostFrequentVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c:
        result = calc_prod(c, env_list)
    for k,v in result.items():
        assert(expected_envs[k]-v < 0.001)

    type_or_token = 'token'
    tier = 'transcription'
    with WeightedVariantContext(specified_discourse_corpus.lexicon, tier, type_or_token) as c:
        result = calc_prod(c, env_list)
    for k,v in result.items():
        assert(expected_envs[k]-v < 0.001)
def test_prod_token(specified_test_corpus):
    seg1 = 's'
    seg2 = 'ʃ'
    expected = {
        "-voc": 0.0,
        "+voc,+high": 0.9321115676166747,
        "+voc,-high": 0.9660096062568557,
        "#": 0.0
    }
    env_list = []
    expected_envs = {}
    for k, v in expected.items():
        if k != '#':
            segs = specified_test_corpus.features_to_segments(k)
        else:
            segs = k
        env = EnvironmentFilter(['s', 'ʃ'], None, [segs])
        env_list.append(env)
        expected_envs[env] = v
    expected_envs["AVG"] = 0.9241743523004413
    type_or_token = 'token'
    tier = 'transcription'
    with CanonicalVariantContext(specified_test_corpus, tier,
                                 type_or_token) as c:
        result = calc_prod(c, env_list)
    for k, v in result.items():
        assert (expected_envs[k] - v < 0.001)
Beispiel #5
0
 def run(self):
     kwargs = self.kwargs
     self.results = []
     context = kwargs.pop('context')
     if context == ContextWidget.canonical_value:
         cm = CanonicalVariantContext
     elif context == ContextWidget.frequent_value:
         cm = MostFrequentVariantContext
     elif context == ContextWidget.separate_value:
         cm = SeparatedTokensVariantContext
     elif context == ContextWidget.relative_value:
         cm = WeightedVariantContext
     with cm(kwargs['corpus'],
             kwargs['sequence_type'],
             kwargs['type_token'],
             frequency_threshold=kwargs['frequency_cutoff']) as c:
         try:
             envs = kwargs.pop('envs', None)
             for pair in kwargs['segment_pairs']:
                 ordered_pair = pair
                 if envs is not None:
                     for env in envs:
                         env.middle = set(pair)
                     res = calc_prod(c,
                                     envs,
                                     kwargs['strict'],
                                     ordered_pair=ordered_pair,
                                     all_info=True,
                                     stop_check=kwargs['stop_check'],
                                     call_back=kwargs['call_back'])
                 else:
                     res = calc_prod_all_envs(
                         c,
                         pair[0],
                         pair[1],
                         all_info=True,
                         stop_check=kwargs['stop_check'],
                         call_back=kwargs['call_back'])
                 if self.stopped:
                     break
                 self.results.append(res)
         except PCTError as e:
             self.errorEncountered.emit(e)
             return
         except Exception as e:
             e = PCTPythonError(e)
             self.errorEncountered.emit(e)
             return
     if self.stopped:
         self.finishedCancelling.emit()
         return
     self.dataReady.emit(self.results)
 def run(self):
     kwargs = self.kwargs
     self.results = []
     context = kwargs.pop('context')
     if context == ContextWidget.canonical_value:
         cm = CanonicalVariantContext
     elif context == ContextWidget.frequent_value:
         cm = MostFrequentVariantContext
     elif context == ContextWidget.separate_value:
         cm = SeparatedTokensVariantContext
     elif context == ContextWidget.relative_value:
         cm = WeightedVariantContext
     with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'],
             frequency_threshold = kwargs['frequency_cutoff']) as c:
         try:
             envs = kwargs.pop('envs', None)
             for pair in kwargs['segment_pairs']:
                 ordered_pair = pair
                 if envs is not None:
                     for env in envs:
                         env.middle = set(pair)
                     res = calc_prod(c,
                             envs,
                             kwargs['strict'],
                             ordered_pair = ordered_pair,
                             all_info = True,
                             stop_check = kwargs['stop_check'],
                             call_back = kwargs['call_back'])
                 else:
                     res = calc_prod_all_envs(c, pair[0], pair[1],
                         all_info = True,
                         stop_check = kwargs['stop_check'],
                         call_back = kwargs['call_back'])
                 if self.stopped:
                     break
                 self.results.append(res)
         except PCTError as e:
             self.errorEncountered.emit(e)
             return
         except Exception as e:
             e = PCTPythonError(e)
             self.errorEncountered.emit(e)
             return
     if self.stopped:
         self.finishedCancelling.emit()
         return
     self.dataReady.emit(self.results)