def test_missing(self): fm = FeatureMatrix('test',self.missing_info) self.assertEqual(fm.features, ['feature1','feature2']) self.assertEqual(fm.possible_values,{'+','-'}) #Error, there should be a default default value? fm.validate()
def test_missing(self): fm = FeatureMatrix('test', self.missing_info) self.assertEqual(fm.features, ['feature1', 'feature2']) self.assertEqual(fm.possible_values, {'+', '-'}) #Error, there should be a default default value? fm.validate()
def test_add_segment(self): fm = FeatureMatrix('test',self.basic_info) fm.add_segment('e',{'feature1':'+','feature2':'-'}) self.assertEqual(fm['e','feature1'],'+') #Fails, need to raise exception if the added segment contains a feature that no other segment has self.assertRaises(AttributeError,fm.add_segment,'e',{'feature1':'+','feature3':'-'})
def load_feature_matrix_csv(name, path, delimiter, stop_check = None, call_back = None): """ Load a FeatureMatrix from a column-delimited text file Parameters ---------- name : str Informative identifier to refer to feature system path : str Full path to text file delimiter : str Character to use for spliting lines into columns stop_check : callable, optional Optional function to check whether to gracefully terminate early call_back : callable, optional Optional function to supply progress information during the function Returns ------- FeatureMatrix FeatureMatrix generated from the text file """ text_input = [] with open(path, encoding='utf-8-sig', mode='r') as f: reader = DictReader(f, delimiter = delimiter) lines = list(reader) if call_back is not None: call_back('Reading file...') call_back(0, len(lines)) for i, line in enumerate(lines): if stop_check is not None and stop_check(): return if call_back is not None: call_back(i) if line: if len(line.keys()) == 1: raise(DelimiterError) if 'symbol' not in line: raise(KeyError) #Compat newline = {} for k,v in line.items(): if k == 'symbol': newline[k] = v elif v is not None: newline[k] = v[0] text_input.append(newline) feature_matrix = FeatureMatrix(name,text_input) feature_matrix.validate() return feature_matrix
def test_missing_with_default(self): fm = FeatureMatrix('test',self.missing_with_default_info) self.assertEqual(fm.features, ['feature1','feature2']) self.assertEqual(fm.possible_values,{'+','-','n'}) fm.validate() self.assertEqual(fm['b','feature2'], 'n')
def test_missing_with_default(self): fm = FeatureMatrix('test', self.missing_with_default_info) self.assertEqual(fm.features, ['feature1', 'feature2']) self.assertEqual(fm.possible_values, {'+', '-', 'n'}) fm.validate() self.assertEqual(fm['b', 'feature2'], 'n')
def test_add_segment(self): fm = FeatureMatrix('test', self.basic_info) fm.add_segment('e', {'feature1': '+', 'feature2': '-'}) self.assertEqual(fm['e', 'feature1'], '+') #Fails, need to raise exception if the added segment contains a feature that no other segment has self.assertRaises(AttributeError, fm.add_segment, 'e', { 'feature1': '+', 'feature3': '-' })
def test_basic(self): corpus = Corpus('test') for w in self.corpus_basic_info: corpus.add_word(Word(**w)) fm = FeatureMatrix('test', self.feature_basic_info) corpus.set_feature_matrix(fm)
def test_add_feature(self): fm = FeatureMatrix('test', self.missing_with_default_info) fm.add_feature('feature3') self.assertEqual(fm['a', 'feature3'], 'n') fm = FeatureMatrix('test', self.basic_info) #Error, no default value fm.add_feature('feature3')
def test_coverage(self): corpus = Corpus('test') for w in self.corpus_basic_info: corpus.add_word(Word(**w)) fm = FeatureMatrix('test', self.feature_no_d_info) corpus.set_feature_matrix(fm) self.assertEqual(corpus.check_coverage(), ['d'])
def test_basic(self): fm = FeatureMatrix('test', self.basic_info) self.assertTrue(fm.name == 'test') self.assertEqual(fm.features, ['feature1', 'feature2']) self.assertEqual(fm.possible_values, {'+', '-'}) #fails, should be sorted list? self.assertEqual(sorted(fm.segments), sorted(['#', 'a', 'b', 'c', 'd']))
def test_dots(self): fm = FeatureMatrix('test', self.dots_info) self.assertEqual(fm.features, ['feature1', 'feature2']) self.assertEqual(fm.possible_values, {'+', '-', '.'}) self.assertEqual(fm['b', 'feature2'], '.') #Fails, should be sorted list of _features? Or set of _features? Would need to be hashed then self.assertEqual(fm['b']['feature2'], '.')
def test_feats_to_segs(self): corpus = Corpus('test') for w in self.corpus_basic_info: corpus.add_word(Word(**w)) fm = FeatureMatrix('test', self.feature_basic_info) corpus.set_feature_matrix(fm) self.assertEqual(sorted(corpus.features_to_segments(['+feature1'])), sorted(['a', 'b']))
def setUp(self): self.corpus_info = [ { 'spelling': 'a', 'transcription': ['a', 'b'], 'frequency': 32.0 }, { 'spelling': 'b', 'transcription': ['a', 'b'], 'frequency': 32.0 }, { 'spelling': 'c', 'transcription': ['c', 'a', 'b'], 'frequency': 32.0 }, { 'spelling': 'd', 'transcription': ['a', 'd'], 'frequency': 32.0 }, ] self.feature_info = [{ 'symbol': 'a', 'feature1': '+', 'feature2': '+' }, { 'symbol': 'b', 'feature1': '+', 'feature2': '-' }, { 'symbol': 'c', 'feature1': '-', 'feature2': '+' }, { 'symbol': 'd', 'feature1': '-', 'feature2': '-' }] self.corpus = Corpus('test') for w in self.corpus_info: self.corpus.add_word(Word(**w)) fm = FeatureMatrix('test', self.feature_info) self.corpus.set_feature_matrix(fm) self.corpus.inventory.update_features(self.corpus.specifier)
def test_add_tier(self): corpus = Corpus('test') for w in self.corpus_basic_info: corpus.add_word(Word(**w)) fm = FeatureMatrix('test', self.feature_basic_info) corpus.set_feature_matrix(fm) corpus.add_tier('t', '+feature1') self.assertEqual(corpus['d'].t, [corpus['d'].transcription[0]]) corpus.remove_attribute('t') self.assertRaises(AttributeError, getattr, corpus['d'], 't')
def test_add_feature(self): fm = FeatureMatrix('test',self.missing_with_default_info) fm.add_feature('feature3') self.assertEqual(fm['a','feature3'], 'n') fm = FeatureMatrix('test',self.basic_info) #Error, no default value fm.add_feature('feature3')
def spe_specifier(): fm_input = [{'symbol':'ɑ','EXTRA':'-','LONG':'-','ant':'-','back':'+','cont':'+','cor':'-', 'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'n','low':'+','mv_glot_cl':'n','nasal':'-','round':'-','son':'+', 'strid':'-','tense':'-','voc':'+','voice':'+'}, {'symbol':'u','EXTRA':'-','LONG':'-','ant':'-','back':'+','cont':'+','cor':'-', 'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'+', 'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'+','son':'+', 'strid':'-','tense':'+','voc':'+','voice':'+'}, {'symbol':'o','EXTRA':'-','LONG':'-','ant':'-','back':'+','cont':'+','cor':'-', 'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'+','son':'+', 'strid':'-','tense':'+','voc':'+','voice':'+'}, {'symbol':'e','EXTRA':'-','LONG':'-','ant':'-','back':'-','cont':'+','cor':'-', 'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'+', 'strid':'-','tense':'+','voc':'+','voice':'+'}, {'symbol':'s','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'+','cor':'+', 'del_rel':'n','distr':'-','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'-','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'-', 'strid':'+','tense':'.','voc':'-','voice':'-'}, {'symbol':'m','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'-','cor':'-', 'del_rel':'-','distr':'+','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'n','low':'-','mv_glot_cl':'n','nasal':'+','round':'-','son':'+', 'strid':'-','tense':'-','voc':'-','voice':'+'}, {'symbol':'i','EXTRA':'-','LONG':'-','ant':'-','back':'-','cont':'+','cor':'-', 'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'+', 'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'+', 'strid':'-','tense':'+','voc':'+','voice':'+'}, {'symbol':'n','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'-','cor':'+', 'del_rel':'-','distr':'-','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'-','low':'-','mv_glot_cl':'n','nasal':'+','round':'-','son':'+', 'strid':'-','tense':'-','voc':'-','voice':'+'}, {'symbol':'ʃ','EXTRA':'-','LONG':'-','ant':'-','back':'-','cont':'+','cor':'+', 'del_rel':'n','distr':'+','glot_cl':'-','hi_subgl_pr':'-','high':'+', 'lat':'-','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'-', 'strid':'+','tense':'.','voc':'-','voice':'-'}, {'symbol':'t','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'-','cor':'+', 'del_rel':'-','distr':'-','glot_cl':'-','hi_subgl_pr':'-','high':'-', 'lat':'-','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'-', 'strid':'-','tense':'.','voc':'-','voice':'-'}] fm = FeatureMatrix('spe',fm_input) return fm