def get_rsat_organism(self, kegg_organism): """returns the HTML page for the directory listing""" logging.debug('RSAT - get_directory()') cache_file = "/".join([self.cache_dir, 'rsat_dir.html']) text = util.read_url_cached("/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file) suggestion1 = util.best_matching_links(self.kegg_species, text)[0].rstrip('/') suggestion2 = util.best_matching_links(kegg_organism, text)[0].rstrip('/') if suggestion1 != suggestion2: ncbi_code1 = self.__get_ncbi_code(suggestion1) ncbi_code2 = self.__get_ncbi_code(suggestion2) if str(ncbi_code1) == str(self.ncbi_code): return suggestion1 elif str(ncbi_code2) == str(self.ncbi_code): return suggestion2 else: logging.warn("can't find the correct RSAT mapping !") return suggestion1 else: ncbi_code = self.__get_ncbi_code(suggestion1) if str(ncbi_code) == str(self.ncbi_code): return suggestion1 else: logging.warn("can't find the correct RSAT mapping !") return suggestion1
def get_rsat_organism(self, kegg_organism): """returns the HTML page for the directory listing""" logging.debug('RSAT - get_directory()') cache_file = "/".join([self.cache_dir, 'rsat_dir.html']) text = util.read_url_cached( "/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file) suggestion1 = util.best_matching_links(self.kegg_species, text)[0].rstrip('/') suggestion2 = util.best_matching_links(kegg_organism, text)[0].rstrip('/') if suggestion1 != suggestion2: ncbi_code1 = self.__get_ncbi_code(suggestion1) ncbi_code2 = self.__get_ncbi_code(suggestion2) if str(ncbi_code1) == str(self.ncbi_code): return suggestion1 elif str(ncbi_code2) == str(self.ncbi_code): return suggestion2 else: logging.warn("can't find the correct RSAT mapping !") return suggestion1 else: ncbi_code = self.__get_ncbi_code(suggestion1) if str(ncbi_code) == str(self.ncbi_code): return suggestion1 else: logging.warn("can't find the correct RSAT mapping !") return suggestion1
def get_rsat_organism(self, kegg_organism): """returns the HTML page for the directory listing""" logging.info('RSAT - get_directory()') cache_file = "/".join([self.cache_dir, 'rsat_dir.html']) text = util.read_url_cached( "/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file) return util.best_matching_links(kegg_organism, text)[0].rstrip('/')
def get_rsat_organism(self, kegg_organism): """returns the HTML page for the directory listing""" logging.info('RSAT - get_directory()') cache_file = "/".join([self.cache_dir, 'rsat_dir.html']) text = util.read_url_cached("/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file) return util.best_matching_links(kegg_organism, text)[0].rstrip('/')
def mapper_fun(kegg_organism): """Mapper function to return basic information about an organism stored in the RSAT database. Only the genes in gene_names will be considered in the construction""" rsat_organism = util.best_matching_links( kegg_organism, rsatdb.get_directory())[0].rstrip('/') return RsatSpeciesInfo(rsatdb, rsat_organism, is_eukaryote(rsat_organism), get_taxonomy_id(rsat_organism))
def mapper_fun(kegg_organism): """Mapper function to return basic information about an organism stored in the RSAT database. Only the genes in gene_names will be considered in the construction""" # in many cases, the fuzzy match delivers the correct RSAT organism # name, but there are exceptions if kegg_organism in patches.KEGG_EXCEPTIONS: kegg_organism = patches.KEGG_EXCEPTIONS[kegg_organism] rsat_organism = util.best_matching_links( kegg_organism, rsatdb.get_directory())[0].rstrip('/') print "mapper_fun(), kegg org = '%s', rsat org = '%s'" % (kegg_organism, rsat_organism) return RsatSpeciesInfo(rsatdb, rsat_organism, is_eukaryote(rsat_organism), get_taxonomy_id(rsat_organism))
def test_best_rsat_matches(self): """test the best_matching_links function""" with open(RSAT_LIST_FILE_PATH) as inputfile: html = inputfile.read() matches = util.best_matching_links('Halobacterium', html) self.assertEquals("Halobacterium_sp/", matches[0])