def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if kwargs.pop('algorithm') == 'min_pairs': func = FL.minpair_fl rel_func = FL.relative_minpair_fl else: func = FL.deltah_fl rel_func = FL.relative_deltah_fl if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs.pop('corpus'), kwargs.pop('sequence_type'), kwargs.pop('type_token'), frequency_threshold=kwargs.pop('frequency_cutoff')) as c: try: pairs = kwargs.pop('segment_pairs') output_filename = kwargs.pop('output_filename', None) if output_filename is not None: to_output = [] outf = open(output_filename, mode='w', encoding='utf-8-sig') save_minimal_pairs(outf, [], write_header= True) else: outf = None for pair in pairs: if len(pair) == 1: res = rel_func(c, pair[0], **kwargs) #output_filename = outf, **kwargs) else: if isinstance(pair[0], (list, tuple)): in_list = list(zip(pair[0], pair[1])) else: in_list = [pair] res = func(c, in_list, **kwargs) if output_filename is not None: to_output.append((pair, res[1])) if self.stopped: break self.results.append(res) if output_filename is not None: save_minimal_pairs(outf, to_output) outf.close() except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def relative_minpair_fl(corpus_context, segment, relative_count_to_relevant_sounds=False, relative_count_to_whole_corpus=True, distinguish_homophones=False, output_filename=None, environment_filter=None, prevent_normalization=False, stop_check=None, call_back=None): """Calculate the average functional load of the contrasts between a segment and all other segments, as a count of minimal pairs. Parameters ---------- corpus_context : CorpusContext Context manager for a corpus segment : str The target segment. relative_count_to_relevant_sounds : bool, optional If True, divide the number of minimal pairs by the total number of words that contain either of the two segments. relative_count_to_whole_corpus : bool, optional If True, divide the number of minimal pairs by the total number of words in the corpus (regardless of whether those words contain the target sounds). Defaults to True. distinguish_homophones : bool, optional If False, then you'll count sock~shock (sock=clothing) and sock~shock (sock=punch) as just one minimal pair; but if True, you'll overcount alternative spellings of the same word, e.g. axel~actual and axle~actual. False is the value used by Wedel et al. environment_filter : EnvironmentFilter Allows the user to restrict the neutralization process to segments in particular segmental contexts stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function Returns ------- int or float If `relative_count_to_relevant_sounds`==False and `relative_count_to_whole_corpus`==False, returns an int of the raw number of minimal pairs. If `relative_count_to_relevant_sounds`==True, returns a float of that count divided by the total number of words in the corpus that include either `s1` or `s2`. If `relative_count_to_whole_corpus`==True, a float of the raw count divided by the total number of words in the corpus. """ all_segments = corpus_context.inventory segment_pairs = [(segment, other.symbol) for other in all_segments if other.symbol != segment and other.symbol != '#'] results = [] to_output = [] for sp in segment_pairs: res = minpair_fl( corpus_context, [sp], relative_count_to_relevant_sounds=relative_count_to_relevant_sounds, relative_count_to_whole_corpus=relative_count_to_whole_corpus, distinguish_homophones=distinguish_homophones, environment_filter=environment_filter, prevent_normalization=prevent_normalization, stop_check=stop_check, call_back=call_back) results.append(res[0]) if output_filename is not None: to_output.append((sp, res[1])) if output_filename is not None: save_minimal_pairs(output_filename, to_output) return sum(results) / len(segment_pairs)
def relative_minpair_fl(corpus_context, segment, relative_count_to_relevant_sounds=False, relative_count_to_whole_corpus=True, distinguish_homophones=False, output_filename=None, environment_filter=None, prevent_normalization=False, stop_check=None, call_back=None): """Calculate the average functional load of the contrasts between a segment and all other segments, as a count of minimal pairs. Parameters ---------- corpus_context : CorpusContext Context manager for a corpus segment : str The target segment. relative_count_to_relevant_sounds : bool, optional If True, divide the number of minimal pairs by the total number of words that contain either of the two segments. relative_count_to_whole_corpus : bool, optional If True, divide the number of minimal pairs by the total number of words in the corpus (regardless of whether those words contain the target sounds). Defaults to True. distinguish_homophones : bool, optional If False, then you'll count sock~shock (sock=clothing) and sock~shock (sock=punch) as just one minimal pair; but if True, you'll overcount alternative spellings of the same word, e.g. axel~actual and axle~actual. False is the value used by Wedel et al. environment_filter : EnvironmentFilter Allows the user to restrict the neutralization process to segments in particular segmental contexts stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function Returns ------- int or float If `relative_count_to_relevant_sounds`==False and `relative_count_to_whole_corpus`==False, returns an int of the raw number of minimal pairs. If `relative_count_to_relevant_sounds`==True, returns a float of that count divided by the total number of words in the corpus that include either `s1` or `s2`. If `relative_count_to_whole_corpus`==True, a float of the raw count divided by the total number of words in the corpus. """ all_segments = corpus_context.inventory segment_pairs = [(segment, other.symbol) for other in all_segments if other.symbol != segment and other.symbol != '#'] results = [] to_output = [] for sp in segment_pairs: res = minpair_fl(corpus_context, [sp], relative_count_to_relevant_sounds=relative_count_to_relevant_sounds, relative_count_to_whole_corpus=relative_count_to_whole_corpus, distinguish_homophones=distinguish_homophones, environment_filter=environment_filter, prevent_normalization=prevent_normalization, stop_check=stop_check, call_back=call_back) results.append(res[0]) if output_filename is not None: to_output.append((sp, res[1])) if output_filename is not None: save_minimal_pairs(output_filename, to_output) return sum(results) / len(segment_pairs)