Example #1
0
def download_files(args):
    """Main function for the `taxadb download` sub-command.

    This function can download taxump.tar.gz and the content of the
    accession2taxid directory from the ncbi ftp.

    Arguments:
             args (object): The arguments from argparse

    """
    logger = logging.getLogger(__name__)

    # files to download
    nucl_est = 'nucl_est.accession2taxid.gz'
    nucl_gb = 'nucl_gb.accession2taxid.gz'
    nucl_gss = 'nucl_gss.accession2taxid.gz'
    nucl_wgs = 'nucl_wgs.accession2taxid.gz'
    prot = 'prot.accession2taxid.gz'
    taxdump = 'taxdump.tar.gz'

    args.type = [x for y in args.type for x in y]
    acc_dl_list = [taxdump]

    for div in args.type:
        if div in ['full', 'nucl', 'est']:
            acc_dl_list.append(nucl_est)
        if div in ['full', 'nucl', 'gb']:
            acc_dl_list.append(nucl_gb)
        if div in ['full', 'nucl', 'gss']:
            acc_dl_list.append(nucl_gss)
        if div in ['full', 'nucl', 'wgs']:
            acc_dl_list.append(nucl_wgs)
        if div in ['full', 'prot']:
            acc_dl_list.append(prot)

    try:
        out = args.outdir
        os.makedirs(os.path.abspath(out), exist_ok=args.force)
        os.chdir(os.path.abspath(out))
    except FileExistsError as e:
        logger.error('%s exists. Consider using -f if you want to overwrite' %
                     out)
        sys.exit(1)

    for file in acc_dl_list:
        if file != taxdump:
            download.ncbi('pub/taxonomy/accession2taxid/', file)
            download.ncbi('pub/taxonomy/accession2taxid/', file + '.md5')
            util.md5_check(file)
        else:
            download.ncbi('pub/taxonomy/', taxdump)
            download.ncbi('pub/taxonomy/', taxdump + '.md5')
            util.md5_check(taxdump)
            download.unpack(taxdump)
Example #2
0
def download_files(args):
    """Main function for the `taxadb download` sub-command.

    This function can download taxump.tar.gz and the content of the
    accession2taxid directory from the ncbi ftp.

    Arguments:
             args (object): The arguments from argparse

    """
    logger = logging.getLogger(__name__)

    # files to download
    # nucl_est = 'nucl_est.accession2taxid.gz'  # deprecated
    nucl_gb = 'nucl_gb.accession2taxid.gz'
    # nucl_gss = 'nucl_gss.accession2taxid.gz'  # deprecated
    nucl_wgs = 'nucl_wgs.accession2taxid.gz'
    prot = 'prot.accession2taxid.gz'
    taxdump = 'taxdump.tar.gz'

    args.type = [x for y in args.type for x in y]
    acc_dl_list = [taxdump]

    for div in args.type:
        if div in ['full', 'nucl', 'gb']:
            acc_dl_list.append(nucl_gb)
        if div in ['full', 'nucl', 'wgs']:
            acc_dl_list.append(nucl_wgs)
        if div in ['full', 'prot']:
            acc_dl_list.append(prot)

    try:
        out = args.outdir
        os.makedirs(os.path.abspath(out), exist_ok=args.force)
        os.chdir(os.path.abspath(out))
    except FileExistsError as e:
        logger.error('%s exists. Consider using -f if you want to overwrite'
                     % out)
        sys.exit(1)

    for file in acc_dl_list:
        if file != taxdump:
            download.ncbi('pub/taxonomy/accession2taxid/', file)
            download.ncbi('pub/taxonomy/accession2taxid/', file + '.md5')
            util.md5_check(file)
        else:
            download.ncbi('pub/taxonomy/', taxdump)
            download.ncbi('pub/taxonomy/', taxdump + '.md5')
            util.md5_check(taxdump)
            download.unpack(taxdump)
Example #3
0
def download(args):
    """Main function for the 'taxadb download' sub-command.

    This function downloads taxump.tar.gz and the content of the
    accession2taxid directory from the ncbi ftp.

    Arguments:
             args.output (:obj:`str`): output directory

    """
    ncbi_ftp = 'ftp.ncbi.nlm.nih.gov'

    # files to download in accession2taxid
    nucl_est = 'nucl_est.accession2taxid.gz'
    nucl_gb = 'nucl_gb.accession2taxid.gz'
    nucl_gss = 'nucl_gss.accession2taxid.gz'
    nucl_wgs = 'nucl_wgs.accession2taxid.gz'
    prot = 'prot.accession2taxid.gz'
    acc_dl_list = [nucl_est, nucl_gb, nucl_gss, nucl_wgs, prot]
    taxdump = 'taxdump.tar.gz'

    out = args.outdir
    os.makedirs(os.path.abspath(out), exist_ok=True)
    os.chdir(os.path.abspath(out))

    for file in acc_dl_list:
        print('Started Downloading %s' % file)
        with ftputil.FTPHost(ncbi_ftp, 'anonymous', 'password') as ncbi:
            ncbi.chdir('pub/taxonomy/accession2taxid/')
            ncbi.download_if_newer(file, file)
            ncbi.download_if_newer(file + '.md5', file + '.md5')
            util.md5_check(file)

    print('Started Downloading %s' % taxdump)
    with ftputil.FTPHost(ncbi_ftp, 'anonymous', 'password') as ncbi:
        ncbi.chdir('pub/taxonomy/')
        ncbi.download_if_newer(taxdump, taxdump)
        ncbi.download_if_newer(taxdump + '.md5', taxdump + '.md5')
        util.md5_check(taxdump)
    print('Unpacking %s' % taxdump)
    with tarfile.open(taxdump, "r:gz") as tar:
        tar.extractall()
        tar.close()
Example #4
0
 def test_md5check_fails(self):
     """Check md5 fails"""
     badfile = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'wrong.txt')
     with self.assertRaises(AssertionError):
         md5_check(badfile)
Example #5
0
 def test_md5check_success(self):
     """Check md5 is ok"""
     okfile = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'good.txt')
     self.assertIsNone(md5_check(okfile))
Example #6
0
 def test_md5check_fails(self):
     """Check md5 fails"""
     badfile = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'wrong.txt')
     with self.assertRaises(SystemExit):
         md5_check(badfile)
Example #7
0
 def test_md5check_success(self):
     """Check md5 is ok"""
     okfile = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           'good.txt')
     self.assertIsNone(md5_check(okfile))