def test_missing(self):
        fm = FeatureMatrix('test',self.missing_info)

        self.assertEqual(fm.features, ['feature1','feature2'])

        self.assertEqual(fm.possible_values,{'+','-'})

        #Error, there should be a default default value?
        fm.validate()
Example #2
0
    def test_missing(self):
        fm = FeatureMatrix('test', self.missing_info)

        self.assertEqual(fm.features, ['feature1', 'feature2'])

        self.assertEqual(fm.possible_values, {'+', '-'})

        #Error, there should be a default default value?
        fm.validate()
    def test_add_segment(self):
        fm = FeatureMatrix('test',self.basic_info)

        fm.add_segment('e',{'feature1':'+','feature2':'-'})

        self.assertEqual(fm['e','feature1'],'+')

        #Fails, need to raise exception if the added segment contains a feature that no other segment has
        self.assertRaises(AttributeError,fm.add_segment,'e',{'feature1':'+','feature3':'-'})
Example #4
0
def load_feature_matrix_csv(name, path, delimiter, stop_check = None, call_back = None):
    """
    Load a FeatureMatrix from a column-delimited text file

    Parameters
    ----------
    name : str
        Informative identifier to refer to feature system
    path : str
        Full path to text file
    delimiter : str
        Character to use for spliting lines into columns
    stop_check : callable, optional
        Optional function to check whether to gracefully terminate early
    call_back : callable, optional
        Optional function to supply progress information during the function

    Returns
    -------
    FeatureMatrix
        FeatureMatrix generated from the text file

    """
    text_input = []
    with open(path, encoding='utf-8-sig', mode='r') as f:
        reader = DictReader(f, delimiter = delimiter)
        lines = list(reader)

    if call_back is not None:
        call_back('Reading file...')
        call_back(0, len(lines))


    for i, line in enumerate(lines):
        if stop_check is not None and stop_check():
            return
        if call_back is not None:
            call_back(i)

        if line:
            if len(line.keys()) == 1:
                raise(DelimiterError)
            if 'symbol' not in line:
                raise(KeyError)
            #Compat
            newline = {}
            for k,v in line.items():
                if k == 'symbol':
                    newline[k] = v
                elif v is not None:
                    newline[k] = v[0]
            text_input.append(newline)

    feature_matrix = FeatureMatrix(name,text_input)
    feature_matrix.validate()
    return feature_matrix
Example #5
0
def load_feature_matrix_csv(name, path, delimiter, stop_check = None, call_back = None):
    """
    Load a FeatureMatrix from a column-delimited text file

    Parameters
    ----------
    name : str
        Informative identifier to refer to feature system
    path : str
        Full path to text file
    delimiter : str
        Character to use for spliting lines into columns
    stop_check : callable, optional
        Optional function to check whether to gracefully terminate early
    call_back : callable, optional
        Optional function to supply progress information during the function

    Returns
    -------
    FeatureMatrix
        FeatureMatrix generated from the text file

    """
    text_input = []
    with open(path, encoding='utf-8-sig', mode='r') as f:
        reader = DictReader(f, delimiter = delimiter)
        lines = list(reader)

    if call_back is not None:
        call_back('Reading file...')
        call_back(0, len(lines))


    for i, line in enumerate(lines):
        if stop_check is not None and stop_check():
            return
        if call_back is not None:
            call_back(i)

        if line:
            if len(line.keys()) == 1:
                raise(DelimiterError)
            if 'symbol' not in line:
                raise(KeyError)
            #Compat
            newline = {}
            for k,v in line.items():
                if k == 'symbol':
                    newline[k] = v
                elif v is not None:
                    newline[k] = v[0]
            text_input.append(newline)

    feature_matrix = FeatureMatrix(name,text_input)
    feature_matrix.validate()
    return feature_matrix
    def test_missing_with_default(self):
        fm = FeatureMatrix('test',self.missing_with_default_info)

        self.assertEqual(fm.features, ['feature1','feature2'])

        self.assertEqual(fm.possible_values,{'+','-','n'})

        fm.validate()

        self.assertEqual(fm['b','feature2'], 'n')
Example #7
0
    def test_missing_with_default(self):
        fm = FeatureMatrix('test', self.missing_with_default_info)

        self.assertEqual(fm.features, ['feature1', 'feature2'])

        self.assertEqual(fm.possible_values, {'+', '-', 'n'})

        fm.validate()

        self.assertEqual(fm['b', 'feature2'], 'n')
Example #8
0
    def test_add_segment(self):
        fm = FeatureMatrix('test', self.basic_info)

        fm.add_segment('e', {'feature1': '+', 'feature2': '-'})

        self.assertEqual(fm['e', 'feature1'], '+')

        #Fails, need to raise exception if the added segment contains a feature that no other segment has
        self.assertRaises(AttributeError, fm.add_segment, 'e', {
            'feature1': '+',
            'feature3': '-'
        })
Example #9
0
    def test_basic(self):
        corpus = Corpus('test')
        for w in self.corpus_basic_info:
            corpus.add_word(Word(**w))

        fm = FeatureMatrix('test', self.feature_basic_info)

        corpus.set_feature_matrix(fm)
Example #10
0
    def test_add_feature(self):

        fm = FeatureMatrix('test', self.missing_with_default_info)
        fm.add_feature('feature3')

        self.assertEqual(fm['a', 'feature3'], 'n')

        fm = FeatureMatrix('test', self.basic_info)

        #Error, no default value
        fm.add_feature('feature3')
Example #11
0
    def test_coverage(self):
        corpus = Corpus('test')
        for w in self.corpus_basic_info:
            corpus.add_word(Word(**w))

        fm = FeatureMatrix('test', self.feature_no_d_info)

        corpus.set_feature_matrix(fm)

        self.assertEqual(corpus.check_coverage(), ['d'])
Example #12
0
    def test_basic(self):
        fm = FeatureMatrix('test', self.basic_info)
        self.assertTrue(fm.name == 'test')

        self.assertEqual(fm.features, ['feature1', 'feature2'])

        self.assertEqual(fm.possible_values, {'+', '-'})

        #fails, should be sorted list?
        self.assertEqual(sorted(fm.segments), sorted(['#', 'a', 'b', 'c',
                                                      'd']))
Example #13
0
    def test_dots(self):
        fm = FeatureMatrix('test', self.dots_info)

        self.assertEqual(fm.features, ['feature1', 'feature2'])

        self.assertEqual(fm.possible_values, {'+', '-', '.'})

        self.assertEqual(fm['b', 'feature2'], '.')

        #Fails, should be sorted list of _features? Or set of _features? Would need to be hashed then
        self.assertEqual(fm['b']['feature2'], '.')
Example #14
0
    def test_feats_to_segs(self):
        corpus = Corpus('test')
        for w in self.corpus_basic_info:
            corpus.add_word(Word(**w))

        fm = FeatureMatrix('test', self.feature_basic_info)

        corpus.set_feature_matrix(fm)

        self.assertEqual(sorted(corpus.features_to_segments(['+feature1'])),
                         sorted(['a', 'b']))
Example #15
0
    def setUp(self):
        self.corpus_info = [
            {
                'spelling': 'a',
                'transcription': ['a', 'b'],
                'frequency': 32.0
            },
            {
                'spelling': 'b',
                'transcription': ['a', 'b'],
                'frequency': 32.0
            },
            {
                'spelling': 'c',
                'transcription': ['c', 'a', 'b'],
                'frequency': 32.0
            },
            {
                'spelling': 'd',
                'transcription': ['a', 'd'],
                'frequency': 32.0
            },
        ]

        self.feature_info = [{
            'symbol': 'a',
            'feature1': '+',
            'feature2': '+'
        }, {
            'symbol': 'b',
            'feature1': '+',
            'feature2': '-'
        }, {
            'symbol': 'c',
            'feature1': '-',
            'feature2': '+'
        }, {
            'symbol': 'd',
            'feature1': '-',
            'feature2': '-'
        }]

        self.corpus = Corpus('test')
        for w in self.corpus_info:
            self.corpus.add_word(Word(**w))

        fm = FeatureMatrix('test', self.feature_info)

        self.corpus.set_feature_matrix(fm)
        self.corpus.inventory.update_features(self.corpus.specifier)
Example #16
0
    def test_add_tier(self):
        corpus = Corpus('test')
        for w in self.corpus_basic_info:
            corpus.add_word(Word(**w))

        fm = FeatureMatrix('test', self.feature_basic_info)

        corpus.set_feature_matrix(fm)

        corpus.add_tier('t', '+feature1')
        self.assertEqual(corpus['d'].t, [corpus['d'].transcription[0]])

        corpus.remove_attribute('t')

        self.assertRaises(AttributeError, getattr, corpus['d'], 't')
    def test_add_feature(self):

        fm = FeatureMatrix('test',self.missing_with_default_info)
        fm.add_feature('feature3')

        self.assertEqual(fm['a','feature3'], 'n')

        fm = FeatureMatrix('test',self.basic_info)

        #Error, no default value
        fm.add_feature('feature3')
Example #18
0
def spe_specifier():
    fm_input = [{'symbol':'ɑ','EXTRA':'-','LONG':'-','ant':'-','back':'+','cont':'+','cor':'-',
                'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'n','low':'+','mv_glot_cl':'n','nasal':'-','round':'-','son':'+',
                'strid':'-','tense':'-','voc':'+','voice':'+'},
                {'symbol':'u','EXTRA':'-','LONG':'-','ant':'-','back':'+','cont':'+','cor':'-',
                'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'+',
                'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'+','son':'+',
                'strid':'-','tense':'+','voc':'+','voice':'+'},
                {'symbol':'o','EXTRA':'-','LONG':'-','ant':'-','back':'+','cont':'+','cor':'-',
                'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'+','son':'+',
                'strid':'-','tense':'+','voc':'+','voice':'+'},
                {'symbol':'e','EXTRA':'-','LONG':'-','ant':'-','back':'-','cont':'+','cor':'-',
                'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'+',
                'strid':'-','tense':'+','voc':'+','voice':'+'},
                {'symbol':'s','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'+','cor':'+',
                'del_rel':'n','distr':'-','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'-','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'-',
                'strid':'+','tense':'.','voc':'-','voice':'-'},
                {'symbol':'m','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'-','cor':'-',
                'del_rel':'-','distr':'+','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'n','low':'-','mv_glot_cl':'n','nasal':'+','round':'-','son':'+',
                'strid':'-','tense':'-','voc':'-','voice':'+'},
                {'symbol':'i','EXTRA':'-','LONG':'-','ant':'-','back':'-','cont':'+','cor':'-',
                'del_rel':'n','distr':'n','glot_cl':'-','hi_subgl_pr':'-','high':'+',
                'lat':'n','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'+',
                'strid':'-','tense':'+','voc':'+','voice':'+'},
                {'symbol':'n','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'-','cor':'+',
                'del_rel':'-','distr':'-','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'-','low':'-','mv_glot_cl':'n','nasal':'+','round':'-','son':'+',
                'strid':'-','tense':'-','voc':'-','voice':'+'},
                {'symbol':'ʃ','EXTRA':'-','LONG':'-','ant':'-','back':'-','cont':'+','cor':'+',
                'del_rel':'n','distr':'+','glot_cl':'-','hi_subgl_pr':'-','high':'+',
                'lat':'-','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'-',
                'strid':'+','tense':'.','voc':'-','voice':'-'},
                {'symbol':'t','EXTRA':'-','LONG':'-','ant':'+','back':'-','cont':'-','cor':'+',
                'del_rel':'-','distr':'-','glot_cl':'-','hi_subgl_pr':'-','high':'-',
                'lat':'-','low':'-','mv_glot_cl':'n','nasal':'-','round':'-','son':'-',
                'strid':'-','tense':'.','voc':'-','voice':'-'}]
    fm = FeatureMatrix('spe',fm_input)
    return fm