def test_create_from_rsat_feature_names_no_transform(self): """test the creation from RSAT feature names file""" thes = thesaurus.create_from_rsat_feature_names( MockRsatFeatureNameFile()) self.assertEquals('NAME1', thes['PRIME1']) self.assertEquals('NAME1', thes['ALT1']) self.assertEquals('NAME2', thes['PRIME2']) self.assertEquals('NAME2', thes['VNG2664Gm'])
def test_create_from_rsat_feature_names_with_transform(self): """test the creation from RSAT feature names using a key transformer""" thes = thesaurus.create_from_rsat_feature_names( MockRsatFeatureNameFile(), [lambda x: [x, x.rstrip('m')]]) self.assertEquals('NAME1', thes['PRIME1']) self.assertEquals('NAME1', thes['ALT1']) self.assertEquals('NAME2', thes['PRIME2']) self.assertEquals('NAME2', thes['VNG2664G'])
def addThesaurusFile(self, thFile, thFileType): # here we add a thesaurus file, can choose from different filetypes # at least later if thFileType == "RSAT": infile = util.DelimitedFile.read(thFile, sep='\t', has_header=False, comment = "--") tempD = th.create_from_rsat_feature_names(infile) self.__synonyms = self.MergeDicts(self.__synonyms, tempD) else: thFileType == "unassigned - ERROR" logging.info("\x1b[31mKapsel:\t\x1b[0mThesaurus added, %s type" % (thFileType))
def thesaurus(self): """reads the thesaurus from a feature_names file. The thesaurus is also cached, because it is used many times """ if not self.__synonyms: feature_names_dfile = util.DelimitedFile.create_from_text( self.__rsatdb().get_feature_names(self.species()), comment='--') self.__synonyms = thesaurus.create_from_rsat_feature_names( feature_names_dfile, [thesaurus.strip_vng_modification]) return self.__synonyms
def thesaurus(self): """reads the thesaurus from a feature_names file. The thesaurus is also cached, because it is used many times """ if not self.__synonyms: feature_names_dfile = util.dfile_from_text( self.__rsatdb().get_feature_names(self.species()), comment='--') self.__synonyms = thesaurus.create_from_rsat_feature_names( feature_names_dfile, [thesaurus.strip_vng_modification]) return self.__synonyms
def __make_organism(self): """makes a mock organism with almost real data""" features = {} dfile = util.DelimitedFile.read( 'testdata/Halobacterium_sp_features', comment='--') for line in dfile.lines(): features[line[0]] = st.Feature( line[0], line[1], line[2], st.Location(line[3], int(line[4]), int(line[5]), line[6] == 'R')) tfile = util.DelimitedFile.read( 'testdata/Halobacterium_sp_feature_names', comment='--') synonyms = th.create_from_rsat_feature_names(tfile) return MockOrganismWithSynonyms('64091', features, synonyms)
def __make_organism(self): """makes a mock organism with almost real data""" features = {} dfile = util.read_dfile('testdata/Halobacterium_sp_features', comment='--') for line in dfile.lines: features[line[0]] = st.Feature( line[0], line[1], line[2], st.Location(line[3], int(line[4]), int(line[5]), line[6] == 'R')) tfile = util.read_dfile('testdata/Halobacterium_sp_feature_names', comment='--') synonyms = th.create_from_rsat_feature_names(tfile) return MockOrganismWithSynonyms('64091', features, synonyms)
print("extract_string_links.py, (c) 2012, Institute for Systems Biology") print('This program is licensed under the General Public License V3.') print('See README and LICENSE for details.\n') if len(sys.argv) <= 2: print( 'Usage: python extract_string_links.py <stringdb-path> <organism-code>' ) else: rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR) kegg_mapper = organism.make_kegg_code_mapper( util.DelimitedFile.read(KEGG_FILE, sep='\t', has_header=True, comment='#')) kegg_org = kegg_mapper(sys.argv[2]) rsat_info = organism.make_rsat_organism_mapper(rsatdb)(kegg_org) print "RSAT SPECIES: ", rsat_info.species print "TAX ID: ", rsat_info.taxonomy_id feature_names = rsatdb.get_feature_names(rsat_info.species) feature_names_dfile = util.DelimitedFile.create_from_text( feature_names, comment='--') synonyms = thesaurus.create_from_rsat_feature_names( feature_names_dfile) string_filepath = sys.argv[1] if string_filepath.endswith('.gz'): with gzip.open(string_filepath) as stringfile: process_stringdb(stringfile, rsat_info.taxonomy_id) else: with open(string_filepath) as stringfile: process_stringdb(stringfile, rsat_info.taxonomy_id)
print "edges normalized." for edge in normalized_result: print "%s\t%s\%f" % (edge.source(), edge.target(), edge.score()) """ if __name__ == '__main__': print("extract_string_links.py, (c) 2012, Institute for Systems Biology") print('This program is licensed under the General Public License V3.') print('See README and LICENSE for details.\n') if len(sys.argv) <= 2: print('Usage: python extract_string_links.py <stringdb-path> <organism-code>') else: rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR) kegg_mapper = organism.make_kegg_code_mapper(util.DelimitedFile.read(KEGG_FILE, sep='\t', has_header=True, comment='#')) kegg_org = kegg_mapper(sys.argv[2]) rsat_info = organism.make_rsat_organism_mapper(rsatdb)(kegg_org) print "RSAT SPECIES: ", rsat_info.species print "TAX ID: ", rsat_info.taxonomy_id feature_names = rsatdb.get_feature_names(rsat_info.species) feature_names_dfile = util.DelimitedFile.create_from_text(feature_names, comment='--') synonyms = thesaurus.create_from_rsat_feature_names(feature_names_dfile) string_filepath = sys.argv[1] if string_filepath.endswith('.gz'): with gzip.open(string_filepath) as stringfile: process_stringdb(stringfile, rsat_info.taxonomy_id) else: with open(string_filepath) as stringfile: process_stringdb(stringfile, rsat_info.taxonomy_id)