Ejemplo n.º 1
0
    def test_dna_symbols(self):
        feature_dic = FeatureDictionary()
        eval_binary_feature(feature_dic, 'dna_symbols', self.feature.reg_dna_symbols.match, 'A')
        self.assertEqual(feature_dic.get('dna_symbols[0]'), True)

        feature_dic = FeatureDictionary()
        eval_binary_feature(feature_dic, 'dna_symbols', self.feature.reg_dna_symbols.match, 'asd')
        self.assertEqual(feature_dic.get('dna_symbols[0]'), None)
Ejemplo n.º 2
0
    def test_type2(self):
        feature_dic = FeatureDictionary()
        eval_binary_feature(feature_dic, 'type2', lambda x: x == 'p', 'p')
        self.assertEqual(feature_dic.get('type2[0]'), 1)

        feature_dic = FeatureDictionary()
        eval_binary_feature(feature_dic, 'type2', lambda x: x == 'p', 'as')
        self.assertEqual(feature_dic.get('type2[0'), None)
Ejemplo n.º 3
0
    def test_mutation_word(self):
        feature_dic = FeatureDictionary()
        eval_binary_feature(feature_dic, 'mutat_word', self.feature.reg_mutat_word.match, 'repeats')
        self.assertEqual(feature_dic.get('mutat_word[0]'), True)

        feature_dic = FeatureDictionary()
        eval_binary_feature(feature_dic, 'mutat_word', self.feature.reg_mutat_word.match, 'repssts')
        self.assertEqual(feature_dic.get('mutat_word[0]'), None)
Ejemplo n.º 4
0
    def test_regex_evaluator(self):
        regex = re.compile('^[A-Z]+$')

        feature_dict = FeatureDictionary()
        eval_binary_feature(feature_dict, 'name', regex.search, 'ABC')

        self.assertEqual(feature_dict.get('name[0]'), 1)
        self.assertEqual(len(feature_dict), 1)

        feature_dict = FeatureDictionary()
        eval_binary_feature(feature_dict, 'name', regex.search, 'abc')

        self.assertEqual(feature_dict.get('name[0]'), None)
        self.assertEqual(len(feature_dict), 0)
Ejemplo n.º 5
0
    def generate(self, dataset):
        """
        :type dataset: nalaf.structures.data.Dataset
        """
        last_token_str = ""
        for token in dataset.tokens():

            token.features['num_nr'] = self.num_digits(token.word)
            token.features['num_up'] = self.num_capital_chars(token.word)
            token.features['num_lo'] = self.num_lower_chars(token.word)
            token.features['num_alpha'] = self.num_alpha(token.word)
            token.features['num_spec_chars'] = self.num_spec_chars(token.word)
            token.features['shape1'] = self.word_shape_1(token.word)
            token.features['shape2'] = self.word_shape_2(token.word)
            token.features['shape3'] = self.word_shape_3(token.word)
            token.features['shape4'] = self.word_shape_4(token.word)

            # prefix patterns
            for index, value in enumerate(self.prefix_pattern(token.word)):
                token.features['prefix{}'.format(index+1)] = value

            # suffix patterns
            for index, value in enumerate(self.suffix_pattern(token.word)):
                token.features['suffix{}'.format(index+1)] = value

            # Mutation related
            if self.get_mutation_features:

                token.features['mutat_article_bp'] = self.mutation_article_bp(token.word)
                token.features['type1'] = self.is_special_type_1(token.word)
                token.features['mutat_type'] = self.mutation_type(token.word)
                token.features['protein_symbols'] = self.has_protein_symbols(token.word, last_token_str)
                token.features['rs_code'] = self.has_rscode(token.word)

                # binary features
                eval_binary_feature(token.features, 'mutat_word', self.reg_mutat_word.match, token.word.lower())
                eval_binary_feature(token.features, 'num_has_chr_key', self.reg_chr_keys.search, token.word)
                eval_binary_feature(token.features, 'type2', lambda x: x == 'p', token.word)
                eval_binary_feature(token.features, 'dna_symbols', self.reg_dna_symbols.match, token.word)

                # last token
                last_token_str = token.word
Ejemplo n.º 6
0
    def test_lambda_evaluator(self):
        feature_dict = FeatureDictionary()
        eval_binary_feature(feature_dict, 'name', lambda x: x == 'ABC', 'ABC')
        self.assertEqual(feature_dict.get('name[0]'), 1)
        self.assertEqual(len(feature_dict), 1)

        feature_dict = FeatureDictionary()
        eval_binary_feature(feature_dict, 'name', lambda x: x == 'ABC', 'abc')
        self.assertEqual(feature_dict.get('name[0]'), None)
        self.assertEqual(len(feature_dict), 0)

        feature_dict = FeatureDictionary()
        eval_binary_feature(feature_dict, 'name', lambda x, y: x == y, 'xx',
                            'xx')
        self.assertEqual(feature_dict.get('name[0]'), 1)
        self.assertEqual(len(feature_dict), 1)

        feature_dict = FeatureDictionary()
        eval_binary_feature(feature_dict, 'name', lambda x, y: x == y, 'xx',
                            'yy')
        self.assertEqual(feature_dict.get('name[0]'), None)
        self.assertEqual(len(feature_dict), 0)