Example #1
0
def genomeID2Bio(genome_IDs):
    """
    bio include BioSample and corresponding BioProject
    """
    tqdm.write("with genome ID, start to retrieve genome information")
    convertor = NCBI_convertor(genome_IDs, db='assembly')
    convertor.get_GI()
    convertor.get_db_summary()
    aid2info = convertor.dbsummary

    bs_list = list(
        set([
            _.get('BioSampleAccn') for _ in aid2info.values()
            if _.get('BioSampleAccn')
        ]))
    bp_list = list(
        set([
            _.get('BioprojectAccn') for _ in aid2info.values()
            if _.get('BioprojectAccn')
        ]))
    tqdm.write("retrieving relative Bioproject and its Biosample info")
    bp2info = get_bioproject(bp_list)
    bs2info = get_biosample(bs_list)
    return aid2info, bp2info, bs2info
Example #2
0
from bin.ncbi_convertor import NCBI_convertor

if __name__ == "__main__":
    # test
    pids = open('./protein_ids').read().split('\n')
    convertor = NCBI_convertor(pids, db='protein')
    # convertor.check_cache(suffix=suffix, redo=redo)
    convertor.get_taxons_from_tid()
    pid2assembly_dict = convertor.pid2assembly()

    aids = open('./assembly_ids').read().split('\n')
    convertor = NCBI_convertor(aids, db='assembly')
    # convertor.check_cache(suffix=suffix, redo=redo)
    convertor.get_taxons_from_tid()

    nids = open('./nucleotide_ids').read().split('\n')
    convertor = NCBI_convertor(nids, db='nuccore')
    convertor.get_GI()
    convertor.get_db_summary()
    # convertor.check_cache(suffix=suffix, redo=redo)
    convertor.get_taxons_from_tid()