Ejemplo n.º 1
0
class TestNameUtils(unittest.TestCase):
    def setUp(self):
        self.name_map = NameMap('test_data/test_maps/test_map_0.pkl')

    def test_add_mappings1(self):
        self.name_map.add_mappings('test_data/test_map_csv/test_map_0.csv')
        print self.name_map
        self.assertEqual(self.name_map['alanine'][standard_name], 'Alanine')
        self.assertEqual(self.name_map['anthranilate'][standard_name], '2-Aminobenzoic acid')

    def test_add_mappings2(self):
        self.name_map.add_mappings('test_data/test_map_csv/test_map_1.csv')
        print self.name_map
        self.assertEqual(self.name_map['cystathionine'],
                         {standard_name: 'L-Cystathionine', hmdb: 'HMDB00099', pubchem: '439258', kegg: 'C02291'})

        self.assertEqual(self.name_map['proline'],
                         {standard_name: 'L-Proline', hmdb: 'HMDB00162', pubchem: '145742', kegg: 'C00148'})

        self.assertEqual(self.name_map['alanine'],
                         {standard_name: 'Alanine', hmdb: 'METPA0179', kegg: 'C01401'})

    def test_remove_mappings(self):
        pass
Ejemplo n.º 2
0
def standardized_compound_names(original_names,
                                map_pkl_path=default_map_pkl_path,
                                id_type=standard_name):
    """
    :param original_names: A list of original field names to be standardized
    :param map_pkl_path: Path to a pkl file containing mappings used to standardize the names
    :return: A list of standardized field names
    """
    print 'Standardizing names to: {0}'.format(id_type)
    name_map = NameMap(map_pkl_path)
    standardized = original_names
    for i in range(len(standardized)):
        name = _strip_suffix(standardized[i])
        if name in name_map:
            new_id = name_map[name][id_type] if id_type in name_map[
                name] else name
            print '{0} --> {1}'.format(standardized[i], new_id)
            standardized[i] = new_id
        else:
            standardized[i] = name
    return standardized
Ejemplo n.º 3
0
        print "Incorporating all name map files in {0}.".format(
            args.find_in_dir)
        name_map_files = [
            join(args.find_in_dir, f) for f in listdir(args.find_in_dir)
            if f.endswith('.csv')
        ]

        # Find "manual" csv files and move them to the end so that their contents take priority.
        index_of_manual = filter(
            lambda j: 'manual' in basename(name_map_files[j]),
            range(len(name_map_files)))
        index_of_manual.sort(reverse=True)
        for i in index_of_manual:
            name_map_files.append(name_map_files.pop(i))

    current_map = NameMap()

    if args.clear_first:
        current_map.clear()

    for f in name_map_files:
        print f
        if args.a:
            current_map.add_mappings(f)
        elif args.d:
            current_map.remove_mappings(f)
        elif args.prune_csv:
            name_map_utils.prune(f)
        else:
            print('There are currently {0} values in the name map.'.format(
                len(current_map)))
Ejemplo n.º 4
0
 def setUp(self):
     self.name_map = NameMap('test_data/test_maps/test_map.pkl')
     self.name_map.clear()
     self.name_map['existing entry 1'] = {standard_name: 'foo'}
     self.name_map['existing entry 2'] = {standard_name: 'bar'}
     self.name_map._commit()
Ejemplo n.º 5
0
class SimpleTestNameMapUtils(unittest.TestCase):
    def setUp(self):
        self.name_map = NameMap('test_data/test_maps/test_map.pkl')
        self.name_map.clear()
        self.name_map['existing entry 1'] = {standard_name: 'foo'}
        self.name_map['existing entry 2'] = {standard_name: 'bar'}
        self.name_map._commit()

    def tearDown(self):
        pass

    def test_get_map_from_csv(self):
        result_map = self.name_map.get_map_from_csv('./test_data/test_map_csv/test_map_tiny.csv')
        self.assertTrue(contains_only(result_map, {'speshul': {standard_name: 'special'},
                                                   'tomahto': {standard_name: 'tomato'},
                                                   'weird': {standard_name: 'unicorn'},
                                                   'existing entry 1': {standard_name: 'not foo'}}))

    def test_add_mappings(self):
        self.name_map.add_mappings('./test_data/test_map_csv/test_map_small.csv')
        self.assertEqual(self.name_map['speshul'][standard_name], 'special')
        self.assertEqual(self.name_map['speshol'][standard_name], 'special')
        self.assertEqual(self.name_map['irregardless of'][standard_name], 'regardless of')
        self.assertEqual(self.name_map['tomahto'][standard_name], 'tomato')
        self.assertEqual(self.name_map['existing entry 1'][standard_name], 'foo')
        self.assertEqual(self.name_map['existing entry 2'][standard_name], 'bar')
        with open(self.name_map.map_pkl_path) as map_pkl:
            loaded_map = pickle.load(map_pkl)


    def test_remove_mappings(self):
        self.name_map.add_mappings('test_data/test_map_csv/test_map_small.csv')
        self.name_map.remove_mappings('test_data/test_map_csv/test_map_tiny.csv')
        self.assertEqual(self.name_map['speshol'][standard_name], 'special')
        self.assertFalse('speshul' in self.name_map)
        with open(self.name_map.map_pkl_path) as map_pkl:
            loaded_map = pickle.load(map_pkl)
            self.assertTrue(contains_only(loaded_map, {
                'speshol': {standard_name: 'special'},
                'irregardless of': {standard_name: 'regardless of'},
                'existing entry 1': {standard_name: 'foo'},
                'existing entry 2': {standard_name: 'bar'}
            }))

    def test_clear(self):
        self.name_map.clear()
        self.assertEqual(len(self.name_map), 0)
        with open(self.name_map.map_pkl_path) as map_pkl:
            loaded_map = pickle.load(map_pkl)
            self.assertEqual(len(loaded_map), 0)
Ejemplo n.º 6
0
 def setUp(self):
     self.name_map = NameMap('test_data/test_maps/test_map_0.pkl')
Ejemplo n.º 7
0
    if args.use_file:
        use_file = join(args.find_in_dir, args.use_file)
        print "Incorporating file {0}".format(use_file)
        name_map_files = [use_file]
    elif args.find_in_dir:
        print "Incorporating all name map files in {0}.".format(args.find_in_dir)
        name_map_files = [join(args.find_in_dir, f) for f in listdir(args.find_in_dir) if f.endswith('.csv')]

        # Find "manual" csv files and move them to the end so that their contents take priority.
        index_of_manual = filter(lambda j: 'manual' in basename(name_map_files[j]), range(len(name_map_files)))
        index_of_manual.sort(reverse=True)
        for i in index_of_manual:
            name_map_files.append(name_map_files.pop(i))

    current_map = NameMap()

    if args.clear_first:
        current_map.clear()

    for f in name_map_files:
        print f
        if args.a:
            current_map.add_mappings(f)
        elif args.d:
            current_map.remove_mappings(f)
        elif args.prune_csv:
            name_map_utils.prune(f)
        else:
            print('There are currently {0} values in the name map.'.format(len(current_map)))