def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext corpus = kwargs.pop('corpus') sequence_type = kwargs.pop('sequence_type') type_token = kwargs.pop('type_token') rounding = kwargs.pop('rounding') with cm(corpus, sequence_type, type_token) as c: try: # if len(kwargs['segs']) == 1: # seg = kwargs['segs'][0] # seg = c.inventory[seg] # result = informativity.get_informativity(c, seg, sequence_type, # rounding=rounding,stop_check= kwargs['stop_check'], call_back=kwargs['call_back']) # try: # result.pop('Rounding') # self.results.append(result) # except AttributeError: # self.stopped = True #result is None if user cancelled # else: results = informativity.get_multiple_informativity( c, kwargs['segs'], sequence_type, type_or_token=kwargs['type_or_token'], rounding=rounding, stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) try: for result in results: result.pop('Rounding') self.results.append(result) except (TypeError, AttributeError): self.stopped = True #result is None if user cancelled except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == RestrictedContextWidget.canonical_value: cm = CanonicalVariantContext elif context == RestrictedContextWidget.frequent_value: cm = MostFrequentVariantContext corpus = kwargs['corpusModel'].corpus st = kwargs['sequence_type'] tt = kwargs['type_token'] att = kwargs.get('attribute', None) ft = kwargs['frequency_cutoff'] log_count = kwargs['log_count'] with cm(corpus, st, tt, attribute=att, frequency_threshold=ft, log_count=log_count) as c: try: if 'query' in kwargs: for q in kwargs['query']: res = phonotactic_probability( c, q, algorithm=kwargs['algorithm'], probability_type=kwargs['probability_type'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append([q, res]) else: end = kwargs['corpusModel'].beginAddColumn(att) phonotactic_probability_all_words( c, algorithm=kwargs['algorithm'], probability_type=kwargs['probability_type'], #num_cores = kwargs['num_cores'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) end = kwargs['corpusModel'].endAddColumn(end) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold=kwargs['frequency_cutoff']) as c: try: envs = kwargs.pop('envs', None) for pair in kwargs['segment_pairs']: ordered_pair = pair if envs is not None: for env in envs: env.middle = set(pair) res = calc_prod(c, envs, kwargs['strict'], ordered_pair=ordered_pair, all_info=True, stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) else: res = calc_prod_all_envs( c, pair[0], pair[1], all_info=True, stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) textType = self.kwargs.pop('text_type') isDirectory = self.kwargs.pop('isDirectory') logging.info('Importing {} corpus named {}'.format( textType, self.kwargs['corpus_name'])) logging.info('Path: '.format(self.kwargs['path'])) log_annotation_types(self.kwargs['annotation_types']) try: if textType == 'spelling': if isDirectory: corpus = load_directory_spelling(**self.kwargs) else: corpus = load_discourse_spelling(**self.kwargs) elif textType == 'transcription': if isDirectory: corpus = load_directory_transcription(**self.kwargs) else: corpus = load_discourse_transcription(**self.kwargs) elif textType == 'ilg': if isDirectory: corpus = load_directory_ilg(**self.kwargs) else: corpus = load_discourse_ilg(**self.kwargs) elif textType == 'textgrid': if isDirectory: corpus = load_directory_textgrid(**self.kwargs) else: corpus = load_discourse_textgrid(**self.kwargs) elif textType == 'csv': corpus = load_corpus_csv(**self.kwargs) elif textType in ['buckeye', 'timit']: self.kwargs['dialect'] = textType if isDirectory: corpus = load_directory_multiple_files(**self.kwargs) else: corpus = load_discourse_multiple_files(**self.kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit(corpus)
def do_check(*args, **kwargs): self = args[0] try: function(*args, **kwargs) except PCTError as e: if not hasattr(self, 'handleError'): raise self.handleError(e) except Exception as e: if not hasattr(self, 'handleError'): raise e = PCTPythonError(e) self.handleError(e)
def run(self): time.sleep(0.1) try: system = load_feature_matrix_csv(**self.kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit(system)
def run(self): time.sleep(0.1) if self.stopCheck(): return try: self.results = load_binary(self.kwargs['path']) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopCheck(): return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold=kwargs['frequency_cutoff']) as c: try: envs = kwargs.pop('envs', None) if envs is not None: # if env is set, c(orpus context) is 'extracted' context_output_path = kwargs.pop( 'context_output_path' ) # context_output_path for env context export c = mi_env_filter(c, envs, context_output_path) kwargs['in_word'] = False for pair in kwargs['segment_pairs']: res = pointwise_mi(c, pair, halve_edges=kwargs['halve_edges'], in_word=kwargs['in_word'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) kwargs = self.kwargs try: self.results = phonological_search(**kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == RestrictedContextWidget.canonical_value: cm = CanonicalVariantContext elif context == RestrictedContextWidget.frequent_value: cm = MostFrequentVariantContext corpus = kwargs.pop('corpus') st = kwargs.pop('sequence_type') tt = kwargs.pop('type_token') ft = kwargs.pop('frequency_cutoff') with cm(corpus, st, tt, frequency_threshold=ft) as c: try: for pair in kwargs['segment_pairs']: res = calc_freq_of_alt( c, pair[0], pair[1], kwargs['algorithm'], min_rel=kwargs['min_rel'], max_rel=kwargs['max_rel'], min_pairs_okay=kwargs['include_minimal_pairs'], from_gui=True, phono_align=kwargs['phono_align'], output_filename=kwargs['output_filename'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) kwargs = self.kwargs if 'envs' not in kwargs or not kwargs['envs']: return #user clicked "search" without actually entering a single environment try: self.results = phonological_search(**kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold=kwargs['frequency_cutoff']) as c: try: for pair in kwargs['segment_pairs']: res = KullbackLeibler(c, pair[0], pair[1], outfile=None, side=kwargs['side'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token']) as c: try: for pair in kwargs['segment_pairs']: res = pointwise_mi(c, pair, halve_edges=kwargs['halve_edges'], in_word=kwargs['in_word'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if kwargs.pop('algorithm') == 'min_pairs': func = FL.minpair_fl rel_func = FL.relative_minpair_fl else: func = FL.deltah_fl rel_func = FL.relative_deltah_fl if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs.pop('corpus'), kwargs.pop('sequence_type'), kwargs.pop('type_token'), frequency_threshold=kwargs.pop('frequency_cutoff')) as c: try: pairs = kwargs.pop('segment_pairs') output_filename = kwargs.pop('output_filename', None) if output_filename is not None: to_output = [] outf = open(output_filename, mode='w', encoding='utf-8-sig', newline='') save_minimal_pairs(outf, [], write_header=True) else: outf = None for pair in pairs: if len(pair) == 1: res = rel_func(c, pair[0], **kwargs) #output_filename = outf, **kwargs) else: if isinstance(pair[0], (list, tuple)): in_list = list(zip(pair[0], pair[1])) else: in_list = [pair] res = func(c, in_list, **kwargs) if output_filename is not None: to_output.append((pair, res[1])) if self.stopped: break self.results.append(res) if output_filename is not None: save_minimal_pairs(outf, to_output) outf.close() except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == RestrictedContextWidget.canonical_value: cm = CanonicalVariantContext elif context == RestrictedContextWidget.frequent_value: cm = MostFrequentVariantContext corpus = kwargs['corpusModel'].corpus st = kwargs['sequence_type'] tt = kwargs['type_token'] att = kwargs.get('attribute', None) ft = kwargs['frequency_cutoff'] output = list() with cm(corpus, st, tt, attribute=att, frequency_threshold=ft) as c: try: tierdict = defaultdict(list) # Create a dict with sequence_type keys for constant-time lookup for entry in c: w = getattr(entry, kwargs['sequence_type']) key = str(w) tierdict[key].append(entry) if 'query' in kwargs: #this will be true when searching for a single word (in the corpus or not) last_value_removed = None last_key_removed = None for q in kwargs['query']: q = ensure_query_is_word(q, c, c.sequence_type, kwargs['tier_type']) #the following code for adding/removing keys is to ensure that homophones are counted later in #the ND algorithm (if the user wants to), but that words are not considered their own neighbours #however, we only do this when comparing inside a corpus. when using a list of external words #we don't want to do this, since it's possible for the external list to contain words that #are in the corpus, and removing them gives the wrong ND value in this case if kwargs['in_corpus']: if last_value_removed: tierdict[last_key_removed].append( last_value_removed) w = getattr(q, kwargs['sequence_type']) last_key_removed = str(w) #last_value_removed = tierdict[last_key_removed].pop() for i, item in enumerate( tierdict[last_key_removed]): if str(item) == str(q): last_value_removed = tierdict[ last_key_removed].pop(i) break #now we call the actual ND algorithms if kwargs['algorithm'] != 'substitution': res = neighborhood_density( c, q, tierdict, algorithm=kwargs['algorithm'], max_distance=kwargs['max_distance'], force_quadratic=kwargs['force_quadratic'], collapse_homophones=kwargs[ 'collapse_homophones'], file_type=kwargs['file_type'], tier_type=kwargs['tier_type'], sequence_type=kwargs['sequence_type'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) else: res = find_mutation_minpairs( c, q, tier_type=kwargs['tier_type'], collapse_homophones=kwargs[ 'collapse_homophones'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if 'output_filename' in kwargs and kwargs[ 'output_filename'] is not None: print_neighden_results(kwargs['output_filename'], res[1], kwargs['output_format']) if self.stopped: break if kwargs['file_list'] is not None: output.append(','.join([ str(q), str(res[0]), ','.join([str(r) for r in res[1]]) ])) self.results.append([q, res[0]]) else: #this will be the case if searching the entire corpus end = kwargs['corpusModel'].beginAddColumn(att) if kwargs['algorithm'] != 'substitution': results = neighborhood_density_all_words( c, tierdict, tier_type=kwargs['tier_type'], algorithm=kwargs['algorithm'], output_format=kwargs['output_format'], max_distance=kwargs['max_distance'], num_cores=kwargs['num_cores'], call_back=kwargs['call_back'], stop_check=kwargs['stop_check'], settable_attr=kwargs['attribute'], collapse_homophones=kwargs['collapse_homophones']) else: results = find_mutation_minpairs_all_words( c, tierdict, tier_type=kwargs['tier_type'], collapse_homophones=kwargs['collapse_homophones'], num_cores=kwargs['num_cores'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) end = kwargs['corpusModel'].endAddColumn(end) if 'output_filename' in kwargs and kwargs[ 'output_filename'] is not None: print_all_neighden_results(kwargs['output_filename'], results) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return if output and 'output_filename' in kwargs: with open(kwargs['output_filename'], encoding='utf-8', mode='w') as outf: print('Word,Density,Neighbors', file=outf) for item in output: print(item, file=outf) self.dataReady.emit(self.results)
def run(self): time.sleep(0.1) kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == RestrictedContextWidget.canonical_value: cm = CanonicalVariantContext elif context == RestrictedContextWidget.frequent_value: cm = MostFrequentVariantContext corpus = kwargs['corpusModel'].corpus st = kwargs['sequence_type'] tt = kwargs['type_token'] att = kwargs.get('attribute', None) with cm(corpus, st, tt, att) as c: try: if 'query' in kwargs: for q in kwargs['query']: if kwargs['algorithm'] != 'substitution': res = neighborhood_density( c, q, algorithm=kwargs['algorithm'], max_distance=kwargs['max_distance'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) else: res = find_mutation_minpairs( c, q, stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if 'output_filename' in kwargs and kwargs[ 'output_filename'] is not None: print_neighden_results(kwargs['output_filename'], res[1]) if self.stopped: break self.results.append([q, res[0]]) else: end = kwargs['corpusModel'].beginAddColumn(att) if kwargs['algorithm'] != 'substitution': neighborhood_density_all_words( c, algorithm=kwargs['algorithm'], max_distance=kwargs['max_distance'], num_cores=kwargs['num_cores'], call_back=kwargs['call_back'], stop_check=kwargs['stop_check']) else: find_mutation_minpairs_all_words( c, num_cores=kwargs['num_cores'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) end = kwargs['corpusModel'].endAddColumn(end) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == RestrictedContextWidget.canonical_value: cm = CanonicalVariantContext elif context == RestrictedContextWidget.frequent_value: cm = MostFrequentVariantContext corpus = kwargs['corpusModel'].corpus st = kwargs['sequence_type'] tt = kwargs['type_token'] att = kwargs.get('attribute', None) ft = kwargs['frequency_cutoff'] output = list() with cm(corpus, st, tt, attribute=att, frequency_threshold=ft) as c: try: tierdict = defaultdict(list) for entry in c: w = getattr(entry, kwargs['sequence_type']) tierdict[str(w)].append(entry) if 'query' in kwargs: # Create a dict with sequence_type keys for constaint-time lookup for q in kwargs['query']: if kwargs['algorithm'] != 'substitution': res = neighborhood_density( c, q, tierdict, algorithm=kwargs['algorithm'], max_distance=kwargs['max_distance'], force_quadratic=kwargs['force_quadratic'], file_type=kwargs['file_type'], tier_type=kwargs['tier_type'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) else: res = find_mutation_minpairs( c, q, tier_type=kwargs['tier_type'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if 'output_filename' in kwargs and kwargs[ 'output_filename'] is not None: print_neighden_results(kwargs['output_filename'], res[1]) if self.stopped: break if kwargs['file_list'] is not None: output.append(','.join([ q, str(res[0]), ','.join([str(r) for r in res[1]]) ])) self.results.append([q, res[0]]) else: end = kwargs['corpusModel'].beginAddColumn(att) if kwargs['algorithm'] != 'substitution': neighborhood_density_all_words( c, tierdict, tier_type=kwargs['tier_type'], algorithm=kwargs['algorithm'], max_distance=kwargs['max_distance'], num_cores=kwargs['num_cores'], call_back=kwargs['call_back'], stop_check=kwargs['stop_check'], settable_attr=kwargs['attribute']) else: find_mutation_minpairs_all_words( c, tier_type=kwargs['tier_type'], num_cores=kwargs['num_cores'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) end = kwargs['corpusModel'].endAddColumn(end) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return if output and kwargs['file_list']: with open(kwargs['output_filename'], encoding='utf-8', mode='w') as outf: print('Word,Density,Neighbors', file=outf) for item in output: print(item, file=outf) self.dataReady.emit(self.results)