def genomeID2Bio(genome_IDs): """ bio include BioSample and corresponding BioProject """ tqdm.write("with genome ID, start to retrieve genome information") convertor = NCBI_convertor(genome_IDs, db='assembly') convertor.get_GI() convertor.get_db_summary() aid2info = convertor.dbsummary bs_list = list( set([ _.get('BioSampleAccn') for _ in aid2info.values() if _.get('BioSampleAccn') ])) bp_list = list( set([ _.get('BioprojectAccn') for _ in aid2info.values() if _.get('BioprojectAccn') ])) tqdm.write("retrieving relative Bioproject and its Biosample info") bp2info = get_bioproject(bp_list) bs2info = get_biosample(bs_list) return aid2info, bp2info, bs2info
from bin.ncbi_convertor import NCBI_convertor if __name__ == "__main__": # test pids = open('./protein_ids').read().split('\n') convertor = NCBI_convertor(pids, db='protein') # convertor.check_cache(suffix=suffix, redo=redo) convertor.get_taxons_from_tid() pid2assembly_dict = convertor.pid2assembly() aids = open('./assembly_ids').read().split('\n') convertor = NCBI_convertor(aids, db='assembly') # convertor.check_cache(suffix=suffix, redo=redo) convertor.get_taxons_from_tid() nids = open('./nucleotide_ids').read().split('\n') convertor = NCBI_convertor(nids, db='nuccore') convertor.get_GI() convertor.get_db_summary() # convertor.check_cache(suffix=suffix, redo=redo) convertor.get_taxons_from_tid()