def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold = kwargs['frequency_cutoff']) as c: try: for pair in kwargs['segment_pairs']: res = pointwise_mi(c, pair, halve_edges = kwargs['halve_edges'], in_word = kwargs['in_word'], stop_check = kwargs['stop_check'], call_back = kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def doVowelHarmony(self): text = QInputDialog.getText(self, 'Vowel harmony', 'Which feature is unique to vowels? In SPE this is [+voc]') text = text[0].lstrip('[').rstrip(']') if not self.corpusHasFeature(text): return self.harmony_feature = text self.corpusModel.corpus.add_tier('AutoGeneratedVowels',text) inventory = [seg for seg in self.corpusModel.corpus.inventory if seg.features[text[1:]]==text[0]] probs = defaultdict(list) for pair in itertools.product(inventory,repeat=2): pair = (pair[0].symbol, pair[1].symbol) try: mi = mutual_information.pointwise_mi(self.corpusModel.corpus, pair, 'AutoGeneratedVowels') probs[pair[0]].append( (pair[1], mi) ) except mutual_information.MutualInformationError: probs[pair[0]].append( (pair[1], '*') ) harmonic_features = ['high', 'back', 'round'] commentary = list() for feature in harmonic_features: plus = [seg for seg in inventory if seg.features[feature]=='+'] minus = [seg for seg in inventory if not seg in plus] avg_pp_mi = list() avg_pm_mi = list() avg_mm_mi = list() avg_mp_mi = list() for seg in inventory: for seg2,mi in probs[seg.symbol]: if mi == '*': continue seg2 = self.corpusModel.corpus.symbol_to_segment(seg2) seg2_sign = seg.features[feature] if seg in plus: if seg2_sign == '+': avg_pp_mi.append(mi) else: avg_pm_mi.append(mi) elif seg in minus: if seg2_sign == '-': avg_mm_mi.append(mi) else: avg_mp_mi.append(mi) self.resultsLayout.addWidget(QLabel('Average [+{0}][+{0}] MI = {1}'.format(feature, avg_pp_mi))) self.resultsLayout.addWidget(QLabel('Average [+{0}][-{0}] MI = {1}'.format(feature, avg_pm_mi))) self.resultsLayout.addWidget(QLabel('Average [-{0}][-{0}] MI = {1}'.format(feature, avg_mm_mi))) self.resultsLayout.addWidget(QLabel('Average [-{0}][+{0}] MI = {1}'.format(feature, avg_mp_mi))) if avg_pp_mi > avg_pm_mi and avg_mm_mi > avg_mp_mi: commentary.append('There might be harmony based on {}'.format(feature)) commentary = '\n'.join([c for c in commentary]) self.outputHarmonyResults(avg_pp_mi,avg_pm_mi,avg_mm_mi,avg_mp_mi,commentary) self.corpusModel.corpus.remove_attribute('AutoGeneratedVowels') return
def doVowelHarmony(self): text = QInputDialog.getText(self, 'Vowel harmony', 'Which feature is unique to vowels? In SPE this is [+voc]') text = text[0].lstrip('[').rstrip(']') if not self.corpusHasFeature(text): return self.harmony_feature = text self.corpusModel.corpus.add_tier('AutoGeneratedVowels',text) inventory = [seg for seg in self.corpusModel.corpus.inventory if seg.features[text[1:]]==text[0]] probs = defaultdict(list) for pair in itertools.product(inventory,repeat=2): pair = (pair[0].symbol, pair[1].symbol) try: mi = mutual_information.pointwise_mi(self.corpusModel.corpus, pair, 'AutoGeneratedVowels') probs[pair[0]].append( (pair[1], mi) ) except mutual_information.MutualInformationError: probs[pair[0]].append( (pair[1], '*') ) harmonic_features = ['high', 'back', 'round'] commentary = list() for feature in harmonic_features: plus = [seg for seg in inventory if seg.features[feature]=='+'] minus = [seg for seg in inventory if not seg in plus] avg_pp_mi = list() avg_pm_mi = list() avg_mm_mi = list() avg_mp_mi = list() for seg in inventory: for seg2,mi in probs[seg.symbol]: if mi == '*': continue seg2 = self.corpusModel.corpus.symbol_to_segment(seg2) seg2_sign = seg.features[feature] if seg in plus: if seg2_sign == '+': avg_pp_mi.append(mi) else: avg_pm_mi.append(mi) elif seg in minus: if seg2_sign == '-': avg_mm_mi.append(mi) else: avg_mp_mi.append(mi) self.resultsLayout.addWidget(QLabel('Average [+{0}][+{0}] MI = {1}'.format(feature, avg_pp_mi))) self.resultsLayout.addWidget(QLabel('Average [+{0}][-{0}] MI = {1}'.format(feature, avg_pm_mi))) self.resultsLayout.addWidget(QLabel('Average [-{0}][-{0}] MI = {1}'.format(feature, avg_mm_mi))) self.resultsLayout.addWidget(QLabel('Average [-{0}][+{0}] MI = {1}'.format(feature, avg_mp_mi))) if avg_pp_mi > avg_pm_mi and avg_mm_mi > avg_mp_mi: commentary.append('There might be harmony based on {}'.format(feature)) commentary = '\n'.join([c for c in commentary]) self.outputHarmonyResults(avg_pp_mi,avg_pm_mi,avg_mm_mi,avg_mp_mi,commentary) self.corpusModel.corpus.remove_attribute('AutoGeneratedVowels') return
def run(self): kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token'], frequency_threshold=kwargs['frequency_cutoff']) as c: try: envs = kwargs.pop('envs', None) if envs is not None: # if env is set, c(orpus context) is 'extracted' context_output_path = kwargs.pop( 'context_output_path' ) # context_output_path for env context export c = mi_env_filter(c, envs, context_output_path) kwargs['in_word'] = False for pair in kwargs['segment_pairs']: res = pointwise_mi(c, pair, halve_edges=kwargs['halve_edges'], in_word=kwargs['in_word'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)
def test_pointwise_mi(unspecified_test_corpus): with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: calls = [ ({'corpus_context': c, 'query':('e', 'm')}, 2.7319821866519507), ({'corpus_context': c, 'query':('t', 'n'), 'in_word':True}, 0.5849625007211564), ({'corpus_context': c, 'query':('e', 'm'), 'halve_edges':True}, 2.7319821866519507) ] for c,v in calls: result = pointwise_mi(**c) assert(abs(result-v) < 0.0001)
def test_pointwise_mi(unspecified_test_corpus): with CanonicalVariantContext(unspecified_test_corpus, 'transcription', 'type') as c: calls = [({ 'corpus_context': c, 'query': ('e', 'm') }, 2.7319821866519507), ({ 'corpus_context': c, 'query': ('t', 'n'), 'in_word': True }, 0.5849625007211564), ({ 'corpus_context': c, 'query': ('e', 'm'), 'halve_edges': True }, 2.7319821866519507)] for c, v in calls: result = pointwise_mi(**c) assert (abs(result - v) < 0.0001)
def run(self): time.sleep(0.1) kwargs = self.kwargs self.results = [] context = kwargs.pop('context') if context == ContextWidget.canonical_value: cm = CanonicalVariantContext elif context == ContextWidget.frequent_value: cm = MostFrequentVariantContext elif context == ContextWidget.separate_value: cm = SeparatedTokensVariantContext elif context == ContextWidget.relative_value: cm = WeightedVariantContext with cm(kwargs['corpus'], kwargs['sequence_type'], kwargs['type_token']) as c: try: for pair in kwargs['segment_pairs']: res = pointwise_mi(c, pair, halve_edges=kwargs['halve_edges'], in_word=kwargs['in_word'], stop_check=kwargs['stop_check'], call_back=kwargs['call_back']) if self.stopped: break self.results.append(res) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: self.finishedCancelling.emit() return self.dataReady.emit(self.results)