class TestNameUtils(unittest.TestCase): def setUp(self): self.name_map = NameMap('test_data/test_maps/test_map_0.pkl') def test_add_mappings1(self): self.name_map.add_mappings('test_data/test_map_csv/test_map_0.csv') print self.name_map self.assertEqual(self.name_map['alanine'][standard_name], 'Alanine') self.assertEqual(self.name_map['anthranilate'][standard_name], '2-Aminobenzoic acid') def test_add_mappings2(self): self.name_map.add_mappings('test_data/test_map_csv/test_map_1.csv') print self.name_map self.assertEqual(self.name_map['cystathionine'], {standard_name: 'L-Cystathionine', hmdb: 'HMDB00099', pubchem: '439258', kegg: 'C02291'}) self.assertEqual(self.name_map['proline'], {standard_name: 'L-Proline', hmdb: 'HMDB00162', pubchem: '145742', kegg: 'C00148'}) self.assertEqual(self.name_map['alanine'], {standard_name: 'Alanine', hmdb: 'METPA0179', kegg: 'C01401'}) def test_remove_mappings(self): pass
def standardized_compound_names(original_names, map_pkl_path=default_map_pkl_path, id_type=standard_name): """ :param original_names: A list of original field names to be standardized :param map_pkl_path: Path to a pkl file containing mappings used to standardize the names :return: A list of standardized field names """ print 'Standardizing names to: {0}'.format(id_type) name_map = NameMap(map_pkl_path) standardized = original_names for i in range(len(standardized)): name = _strip_suffix(standardized[i]) if name in name_map: new_id = name_map[name][id_type] if id_type in name_map[ name] else name print '{0} --> {1}'.format(standardized[i], new_id) standardized[i] = new_id else: standardized[i] = name return standardized
print "Incorporating all name map files in {0}.".format( args.find_in_dir) name_map_files = [ join(args.find_in_dir, f) for f in listdir(args.find_in_dir) if f.endswith('.csv') ] # Find "manual" csv files and move them to the end so that their contents take priority. index_of_manual = filter( lambda j: 'manual' in basename(name_map_files[j]), range(len(name_map_files))) index_of_manual.sort(reverse=True) for i in index_of_manual: name_map_files.append(name_map_files.pop(i)) current_map = NameMap() if args.clear_first: current_map.clear() for f in name_map_files: print f if args.a: current_map.add_mappings(f) elif args.d: current_map.remove_mappings(f) elif args.prune_csv: name_map_utils.prune(f) else: print('There are currently {0} values in the name map.'.format( len(current_map)))
def setUp(self): self.name_map = NameMap('test_data/test_maps/test_map.pkl') self.name_map.clear() self.name_map['existing entry 1'] = {standard_name: 'foo'} self.name_map['existing entry 2'] = {standard_name: 'bar'} self.name_map._commit()
class SimpleTestNameMapUtils(unittest.TestCase): def setUp(self): self.name_map = NameMap('test_data/test_maps/test_map.pkl') self.name_map.clear() self.name_map['existing entry 1'] = {standard_name: 'foo'} self.name_map['existing entry 2'] = {standard_name: 'bar'} self.name_map._commit() def tearDown(self): pass def test_get_map_from_csv(self): result_map = self.name_map.get_map_from_csv('./test_data/test_map_csv/test_map_tiny.csv') self.assertTrue(contains_only(result_map, {'speshul': {standard_name: 'special'}, 'tomahto': {standard_name: 'tomato'}, 'weird': {standard_name: 'unicorn'}, 'existing entry 1': {standard_name: 'not foo'}})) def test_add_mappings(self): self.name_map.add_mappings('./test_data/test_map_csv/test_map_small.csv') self.assertEqual(self.name_map['speshul'][standard_name], 'special') self.assertEqual(self.name_map['speshol'][standard_name], 'special') self.assertEqual(self.name_map['irregardless of'][standard_name], 'regardless of') self.assertEqual(self.name_map['tomahto'][standard_name], 'tomato') self.assertEqual(self.name_map['existing entry 1'][standard_name], 'foo') self.assertEqual(self.name_map['existing entry 2'][standard_name], 'bar') with open(self.name_map.map_pkl_path) as map_pkl: loaded_map = pickle.load(map_pkl) def test_remove_mappings(self): self.name_map.add_mappings('test_data/test_map_csv/test_map_small.csv') self.name_map.remove_mappings('test_data/test_map_csv/test_map_tiny.csv') self.assertEqual(self.name_map['speshol'][standard_name], 'special') self.assertFalse('speshul' in self.name_map) with open(self.name_map.map_pkl_path) as map_pkl: loaded_map = pickle.load(map_pkl) self.assertTrue(contains_only(loaded_map, { 'speshol': {standard_name: 'special'}, 'irregardless of': {standard_name: 'regardless of'}, 'existing entry 1': {standard_name: 'foo'}, 'existing entry 2': {standard_name: 'bar'} })) def test_clear(self): self.name_map.clear() self.assertEqual(len(self.name_map), 0) with open(self.name_map.map_pkl_path) as map_pkl: loaded_map = pickle.load(map_pkl) self.assertEqual(len(loaded_map), 0)
def setUp(self): self.name_map = NameMap('test_data/test_maps/test_map_0.pkl')
if args.use_file: use_file = join(args.find_in_dir, args.use_file) print "Incorporating file {0}".format(use_file) name_map_files = [use_file] elif args.find_in_dir: print "Incorporating all name map files in {0}.".format(args.find_in_dir) name_map_files = [join(args.find_in_dir, f) for f in listdir(args.find_in_dir) if f.endswith('.csv')] # Find "manual" csv files and move them to the end so that their contents take priority. index_of_manual = filter(lambda j: 'manual' in basename(name_map_files[j]), range(len(name_map_files))) index_of_manual.sort(reverse=True) for i in index_of_manual: name_map_files.append(name_map_files.pop(i)) current_map = NameMap() if args.clear_first: current_map.clear() for f in name_map_files: print f if args.a: current_map.add_mappings(f) elif args.d: current_map.remove_mappings(f) elif args.prune_csv: name_map_utils.prune(f) else: print('There are currently {0} values in the name map.'.format(len(current_map)))