def test_create_from_rsat_feature_names_no_transform(self):
     """test the creation from RSAT feature names file"""
     thes = thesaurus.create_from_rsat_feature_names(
         MockRsatFeatureNameFile())
     self.assertEquals('NAME1', thes['PRIME1'])
     self.assertEquals('NAME1', thes['ALT1'])
     self.assertEquals('NAME2', thes['PRIME2'])
     self.assertEquals('NAME2', thes['VNG2664Gm'])
 def test_create_from_rsat_feature_names_with_transform(self):
     """test the creation from RSAT feature names using a key transformer"""
     thes = thesaurus.create_from_rsat_feature_names(
         MockRsatFeatureNameFile(), [lambda x: [x, x.rstrip('m')]])
     self.assertEquals('NAME1', thes['PRIME1'])
     self.assertEquals('NAME1', thes['ALT1'])
     self.assertEquals('NAME2', thes['PRIME2'])
     self.assertEquals('NAME2', thes['VNG2664G'])
 def test_create_from_rsat_feature_names_no_transform(self):
     """test the creation from RSAT feature names file"""
     thes = thesaurus.create_from_rsat_feature_names(
         MockRsatFeatureNameFile())
     self.assertEquals('NAME1', thes['PRIME1'])
     self.assertEquals('NAME1', thes['ALT1'])
     self.assertEquals('NAME2', thes['PRIME2'])
     self.assertEquals('NAME2', thes['VNG2664Gm'])
 def test_create_from_rsat_feature_names_with_transform(self):
     """test the creation from RSAT feature names using a key transformer"""
     thes = thesaurus.create_from_rsat_feature_names(
         MockRsatFeatureNameFile(), [lambda x: [x, x.rstrip('m')]])
     self.assertEquals('NAME1', thes['PRIME1'])
     self.assertEquals('NAME1', thes['ALT1'])
     self.assertEquals('NAME2', thes['PRIME2'])
     self.assertEquals('NAME2', thes['VNG2664G'])
Example #5
0
 def addThesaurusFile(self, thFile, thFileType):
     # here we add a thesaurus file, can choose from different filetypes
     # at least later
     if thFileType == "RSAT":
         infile = util.DelimitedFile.read(thFile, sep='\t', has_header=False, comment = "--")
         tempD = th.create_from_rsat_feature_names(infile)
         self.__synonyms = self.MergeDicts(self.__synonyms, tempD)
     else:
         thFileType == "unassigned - ERROR"
     logging.info("\x1b[31mKapsel:\t\x1b[0mThesaurus added, %s type" % (thFileType))
 def thesaurus(self):
     """reads the thesaurus from a feature_names file. The thesaurus
     is also cached, because it is used many times
     """
     if not self.__synonyms:
         feature_names_dfile = util.DelimitedFile.create_from_text(
             self.__rsatdb().get_feature_names(self.species()),
             comment='--')
         self.__synonyms = thesaurus.create_from_rsat_feature_names(
             feature_names_dfile, [thesaurus.strip_vng_modification])
     return self.__synonyms
Example #7
0
 def thesaurus(self):
     """reads the thesaurus from a feature_names file. The thesaurus
     is also cached, because it is used many times
     """
     if not self.__synonyms:
         feature_names_dfile = util.dfile_from_text(
             self.__rsatdb().get_feature_names(self.species()),
             comment='--')
         self.__synonyms = thesaurus.create_from_rsat_feature_names(
             feature_names_dfile, [thesaurus.strip_vng_modification])
     return self.__synonyms
 def __make_organism(self):
     """makes a mock organism with almost real data"""
     features = {}
     dfile = util.DelimitedFile.read(
         'testdata/Halobacterium_sp_features', comment='--')
     for line in dfile.lines():
         features[line[0]] = st.Feature(
             line[0], line[1], line[2],
             st.Location(line[3], int(line[4]), int(line[5]),
                         line[6] == 'R'))
     tfile = util.DelimitedFile.read(
         'testdata/Halobacterium_sp_feature_names', comment='--')
     synonyms = th.create_from_rsat_feature_names(tfile)
     return MockOrganismWithSynonyms('64091', features, synonyms)
Example #9
0
 def __make_organism(self):
     """makes a mock organism with almost real data"""
     features = {}
     dfile = util.read_dfile('testdata/Halobacterium_sp_features',
                             comment='--')
     for line in dfile.lines:
         features[line[0]] = st.Feature(
             line[0], line[1], line[2],
             st.Location(line[3], int(line[4]), int(line[5]),
                         line[6] == 'R'))
     tfile = util.read_dfile('testdata/Halobacterium_sp_feature_names',
                             comment='--')
     synonyms = th.create_from_rsat_feature_names(tfile)
     return MockOrganismWithSynonyms('64091', features, synonyms)
Example #10
0
    print("extract_string_links.py, (c) 2012, Institute for Systems Biology")
    print('This program is licensed under the General Public License V3.')
    print('See README and LICENSE for details.\n')
    if len(sys.argv) <= 2:
        print(
            'Usage: python extract_string_links.py <stringdb-path> <organism-code>'
        )
    else:
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR)
        kegg_mapper = organism.make_kegg_code_mapper(
            util.DelimitedFile.read(KEGG_FILE,
                                    sep='\t',
                                    has_header=True,
                                    comment='#'))
        kegg_org = kegg_mapper(sys.argv[2])
        rsat_info = organism.make_rsat_organism_mapper(rsatdb)(kegg_org)
        print "RSAT SPECIES: ", rsat_info.species
        print "TAX ID: ", rsat_info.taxonomy_id
        feature_names = rsatdb.get_feature_names(rsat_info.species)
        feature_names_dfile = util.DelimitedFile.create_from_text(
            feature_names, comment='--')
        synonyms = thesaurus.create_from_rsat_feature_names(
            feature_names_dfile)
        string_filepath = sys.argv[1]
        if string_filepath.endswith('.gz'):
            with gzip.open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)
        else:
            with open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)
    print "edges normalized."
    for edge in normalized_result:
        print "%s\t%s\%f" % (edge.source(), edge.target(), edge.score())
"""

if __name__ == '__main__':
    print("extract_string_links.py, (c) 2012, Institute for Systems Biology")
    print('This program is licensed under the General Public License V3.')
    print('See README and LICENSE for details.\n')
    if len(sys.argv) <= 2:
        print('Usage: python extract_string_links.py <stringdb-path> <organism-code>')
    else:
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR)
        kegg_mapper = organism.make_kegg_code_mapper(util.DelimitedFile.read(KEGG_FILE, sep='\t',
                                                                             has_header=True,
                                                                             comment='#'))
        kegg_org = kegg_mapper(sys.argv[2])
        rsat_info = organism.make_rsat_organism_mapper(rsatdb)(kegg_org)
        print "RSAT SPECIES: ", rsat_info.species
        print "TAX ID: ", rsat_info.taxonomy_id
        feature_names = rsatdb.get_feature_names(rsat_info.species)
        feature_names_dfile = util.DelimitedFile.create_from_text(feature_names, comment='--')
        synonyms = thesaurus.create_from_rsat_feature_names(feature_names_dfile)
        string_filepath = sys.argv[1]
        if string_filepath.endswith('.gz'):
            with gzip.open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)
        else:
            with open(string_filepath) as stringfile:
                process_stringdb(stringfile, rsat_info.taxonomy_id)