def download_files(args): """Main function for the `taxadb download` sub-command. This function can download taxump.tar.gz and the content of the accession2taxid directory from the ncbi ftp. Arguments: args (object): The arguments from argparse """ logger = logging.getLogger(__name__) # files to download nucl_est = 'nucl_est.accession2taxid.gz' nucl_gb = 'nucl_gb.accession2taxid.gz' nucl_gss = 'nucl_gss.accession2taxid.gz' nucl_wgs = 'nucl_wgs.accession2taxid.gz' prot = 'prot.accession2taxid.gz' taxdump = 'taxdump.tar.gz' args.type = [x for y in args.type for x in y] acc_dl_list = [taxdump] for div in args.type: if div in ['full', 'nucl', 'est']: acc_dl_list.append(nucl_est) if div in ['full', 'nucl', 'gb']: acc_dl_list.append(nucl_gb) if div in ['full', 'nucl', 'gss']: acc_dl_list.append(nucl_gss) if div in ['full', 'nucl', 'wgs']: acc_dl_list.append(nucl_wgs) if div in ['full', 'prot']: acc_dl_list.append(prot) try: out = args.outdir os.makedirs(os.path.abspath(out), exist_ok=args.force) os.chdir(os.path.abspath(out)) except FileExistsError as e: logger.error('%s exists. Consider using -f if you want to overwrite' % out) sys.exit(1) for file in acc_dl_list: if file != taxdump: download.ncbi('pub/taxonomy/accession2taxid/', file) download.ncbi('pub/taxonomy/accession2taxid/', file + '.md5') util.md5_check(file) else: download.ncbi('pub/taxonomy/', taxdump) download.ncbi('pub/taxonomy/', taxdump + '.md5') util.md5_check(taxdump) download.unpack(taxdump)
def download_files(args): """Main function for the `taxadb download` sub-command. This function can download taxump.tar.gz and the content of the accession2taxid directory from the ncbi ftp. Arguments: args (object): The arguments from argparse """ logger = logging.getLogger(__name__) # files to download # nucl_est = 'nucl_est.accession2taxid.gz' # deprecated nucl_gb = 'nucl_gb.accession2taxid.gz' # nucl_gss = 'nucl_gss.accession2taxid.gz' # deprecated nucl_wgs = 'nucl_wgs.accession2taxid.gz' prot = 'prot.accession2taxid.gz' taxdump = 'taxdump.tar.gz' args.type = [x for y in args.type for x in y] acc_dl_list = [taxdump] for div in args.type: if div in ['full', 'nucl', 'gb']: acc_dl_list.append(nucl_gb) if div in ['full', 'nucl', 'wgs']: acc_dl_list.append(nucl_wgs) if div in ['full', 'prot']: acc_dl_list.append(prot) try: out = args.outdir os.makedirs(os.path.abspath(out), exist_ok=args.force) os.chdir(os.path.abspath(out)) except FileExistsError as e: logger.error('%s exists. Consider using -f if you want to overwrite' % out) sys.exit(1) for file in acc_dl_list: if file != taxdump: download.ncbi('pub/taxonomy/accession2taxid/', file) download.ncbi('pub/taxonomy/accession2taxid/', file + '.md5') util.md5_check(file) else: download.ncbi('pub/taxonomy/', taxdump) download.ncbi('pub/taxonomy/', taxdump + '.md5') util.md5_check(taxdump) download.unpack(taxdump)
def download(args): """Main function for the 'taxadb download' sub-command. This function downloads taxump.tar.gz and the content of the accession2taxid directory from the ncbi ftp. Arguments: args.output (:obj:`str`): output directory """ ncbi_ftp = 'ftp.ncbi.nlm.nih.gov' # files to download in accession2taxid nucl_est = 'nucl_est.accession2taxid.gz' nucl_gb = 'nucl_gb.accession2taxid.gz' nucl_gss = 'nucl_gss.accession2taxid.gz' nucl_wgs = 'nucl_wgs.accession2taxid.gz' prot = 'prot.accession2taxid.gz' acc_dl_list = [nucl_est, nucl_gb, nucl_gss, nucl_wgs, prot] taxdump = 'taxdump.tar.gz' out = args.outdir os.makedirs(os.path.abspath(out), exist_ok=True) os.chdir(os.path.abspath(out)) for file in acc_dl_list: print('Started Downloading %s' % file) with ftputil.FTPHost(ncbi_ftp, 'anonymous', 'password') as ncbi: ncbi.chdir('pub/taxonomy/accession2taxid/') ncbi.download_if_newer(file, file) ncbi.download_if_newer(file + '.md5', file + '.md5') util.md5_check(file) print('Started Downloading %s' % taxdump) with ftputil.FTPHost(ncbi_ftp, 'anonymous', 'password') as ncbi: ncbi.chdir('pub/taxonomy/') ncbi.download_if_newer(taxdump, taxdump) ncbi.download_if_newer(taxdump + '.md5', taxdump + '.md5') util.md5_check(taxdump) print('Unpacking %s' % taxdump) with tarfile.open(taxdump, "r:gz") as tar: tar.extractall() tar.close()
def test_md5check_fails(self): """Check md5 fails""" badfile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'wrong.txt') with self.assertRaises(AssertionError): md5_check(badfile)
def test_md5check_success(self): """Check md5 is ok""" okfile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'good.txt') self.assertIsNone(md5_check(okfile))
def test_md5check_fails(self): """Check md5 fails""" badfile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'wrong.txt') with self.assertRaises(SystemExit): md5_check(badfile)