def get_rsat_organism(self, kegg_organism): """returns the HTML page for the directory listing""" logging.debug('RSAT - get_directory()') cache_file = "/".join([self.cache_dir, 'rsat_dir.html']) text = util.read_url_cached("/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file).decode('utf-8') suggestion1 = util.best_matching_links(self.kegg_species, text)[0].rstrip('/') suggestion2 = util.best_matching_links(kegg_organism, text)[0].rstrip('/') if suggestion1 != suggestion2: ncbi_code1 = self.__get_ncbi_code(suggestion1) ncbi_code2 = self.__get_ncbi_code(suggestion2) if str(ncbi_code1) == str(self.ncbi_code): return suggestion1 elif str(ncbi_code2) == str(self.ncbi_code): return suggestion2 else: logging.warn("can't find the correct RSAT mapping !") return suggestion1 else: ncbi_code = self.__get_ncbi_code(suggestion1) if str(ncbi_code) == str(self.ncbi_code): return suggestion1 else: logging.warn("can't find the correct RSAT mapping !") return suggestion1
def test_best_rsat_matches(self): """test the best_matching_links function""" with open(RSAT_LIST_FILE_PATH) as inputfile: html = inputfile.read() matches = util.best_matching_links('Halobacterium', html) self.assertEquals("Halobacterium_sp/", matches[0])