Beispiel #1
0
def download_pfam_descriptions(output_dir='.', verbose=True):
    pfam_hmm_dat = path.join(output_dir, 'Pfam-A.hmm.dat.gz')
    download_file(
        'ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.dat.gz',
        pfam_hmm_dat,
        verbose=verbose)
    return pfam_hmm_dat
Beispiel #2
0
def download_and_process_viral_refseq(merged_viral_faas=None,
                                      output_dir='.',
                                      viral_files=2,
                                      threads=10,
                                      verbose=True):
    """Can only download newest version"""
    # download all of the viral protein files, need to know the number of files
    # TODO: Make it so that you don't need to know number of viral files in refseq viral

    if merged_viral_faas is None:  # download database if not provided
        faa_base_name = 'viral.%s.protein.faa.gz'
        viral_faa_glob = path.join(output_dir, faa_base_name % '*')
        for number in range(viral_files):
            number += 1
            refseq_url = 'ftp://ftp.ncbi.nlm.nih.gov/refseq/release/viral/viral.%s.protein.faa.gz' % number
            refseq_faa = path.join(output_dir, faa_base_name % number)
            download_file(refseq_url, refseq_faa, verbose=verbose)

        # then merge files from above
        merged_viral_faas = path.join(output_dir,
                                      'viral.merged.protein.faa.gz')
        run_process([
            'cat %s > %s' % (' '.join(glob(viral_faa_glob)), merged_viral_faas)
        ],
                    shell=True)

    # make mmseqs database
    refseq_viral_mmseqs_db = path.join(
        output_dir, 'refseq_viral.%s.mmsdb' % get_iso_date())
    make_mmseqs_db(merged_viral_faas,
                   refseq_viral_mmseqs_db,
                   create_index=True,
                   threads=threads,
                   verbose=verbose)
    return refseq_viral_mmseqs_db
Beispiel #3
0
def download_vog_annotations(output_dir, vogdb_version='latest', verbose=True):
    vog_annotations = path.join(output_dir,
                                'vog_annotations_%s.tsv.gz' % vogdb_version)
    download_file(
        'http://fileshare.csb.univie.ac.at/vog/%s/vog.annotations.tsv.gz' %
        vogdb_version,
        vog_annotations,
        verbose=verbose)
    return vog_annotations
Beispiel #4
0
def download_and_process_amg_database(output_dir,
                                      branch='master',
                                      verbose=True):
    amg_database = path.join(output_dir,
                             'amg_database.%s.tsv' % get_iso_date())
    download_file(
        'https://raw.githubusercontent.com/shafferm/DRAM/%s/data/amg_database.tsv'
        % branch,
        amg_database,
        verbose=verbose)
    return amg_database
Beispiel #5
0
def download_and_process_function_heatmap_form(output_dir,
                                               branch='master',
                                               verbose=True):
    function_heatmap_form = path.join(
        output_dir, 'function_heatmap_form.%s.tsv' % get_iso_date())
    download_file(
        'https://raw.githubusercontent.com/shafferm/DRAM/%s/data/function_heatmap_form.tsv'
        % branch,
        function_heatmap_form,
        verbose=verbose)
    return function_heatmap_form
Beispiel #6
0
def download_and_process_genome_summary_form(output_dir,
                                             branch='master',
                                             verbose=True):
    genome_summary_form = path.join(
        output_dir, 'genome_summary_form.%s.tsv' % get_iso_date())
    download_file(
        'https://raw.githubusercontent.com/shafferm/DRAM/%s/data/genome_summary_form.tsv'
        % branch,
        genome_summary_form,
        verbose=verbose)
    return genome_summary_form
Beispiel #7
0
def download_dbcan_descriptions(output_dir='.',
                                upload_date='07302020',
                                verbose=True):
    dbcan_fam_activities = path.join(
        output_dir, 'CAZyDB.%s.fam-activities.txt' % upload_date)
    download_file(
        'http://bcb.unl.edu/dbCAN2/download/Databases/CAZyDB.%s.fam-activities.txt'
        % upload_date,
        dbcan_fam_activities,
        verbose=verbose)
    return dbcan_fam_activities
Beispiel #8
0
def download_and_process_kofam_ko_list(kofam_ko_list_gz=None,
                                       output_dir='.',
                                       verbose=False):
    if kofam_ko_list_gz is None:
        kofam_ko_list_gz = path.join(output_dir, 'kofam_ko_list.tsv.gz')
        download_file('ftp://ftp.genome.jp/pub/db/kofam/ko_list.gz',
                      kofam_ko_list_gz,
                      verbose=verbose)
    # TODO: fix this so that it is gunzipped to the path
    kofam_ko_list = path.join(output_dir, 'kofam_ko_list.tsv')
    run_process(['gunzip', kofam_ko_list_gz], verbose=verbose)
    return kofam_ko_list
Beispiel #9
0
def download_and_process_pfam(pfam_full_zipped=None,
                              output_dir='.',
                              threads=10,
                              verbose=True):
    if pfam_full_zipped is None:  # download database if not provided
        pfam_full_zipped = path.join(output_dir, 'Pfam-A.full.gz')
        download_file(
            'ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.full.gz',
            pfam_full_zipped,
            verbose=verbose)
    pfam_profile = process_mmspro(pfam_full_zipped, output_dir, 'pfam',
                                  threads, verbose)
    return pfam_profile
Beispiel #10
0
def download_and_process_dbcan(dbcan_hmm=None,
                               output_dir='.',
                               dbcan_release='8',
                               verbose=True):
    if dbcan_hmm is None:  # download database if not provided
        dbcan_hmm = path.join(output_dir,
                              'dbCAN-HMMdb-V%s.txt' % dbcan_release)
        download_file(
            'http://bcb.unl.edu/dbCAN2/download/Databases/dbCAN-HMMdb-V%s.txt'
            % dbcan_release,
            dbcan_hmm,
            verbose=verbose)
    run_process(['hmmpress', '-f', dbcan_hmm], verbose=verbose)
    return dbcan_hmm
Beispiel #11
0
def download_and_process_vogdb(vog_hmm_targz=None,
                               output_dir='.',
                               vogdb_release='latest',
                               verbose=True):
    if vog_hmm_targz is None:
        vog_hmm_targz = path.join(output_dir, 'vog.hmm.tar.gz')
        vogdb_url = 'http://fileshare.csb.univie.ac.at/vog/%s/vog.hmm.tar.gz' % vogdb_release
        download_file(vogdb_url, vog_hmm_targz, verbose=verbose)
    hmm_dir = path.join(output_dir, 'vogdb_hmms')
    mkdir(hmm_dir)
    vogdb_targz = tarfile.open(vog_hmm_targz)
    vogdb_targz.extractall(hmm_dir)
    vog_hmms = path.join(output_dir, 'vog_%s_hmms.txt' % vogdb_release)
    merge_files(glob(path.join(hmm_dir, 'VOG*.hmm')), vog_hmms)
    run_process(['hmmpress', '-f', vog_hmms], verbose=verbose)
    return vog_hmms
Beispiel #12
0
def download_and_process_merops_peptidases(peptidase_faa=None,
                                           output_dir='.',
                                           threads=10,
                                           verbose=True):
    if peptidase_faa is None:  # download database if not provided
        peptidase_faa = path.join(output_dir, 'merops_peptidases_nr.faa')
        merops_url = 'ftp://ftp.ebi.ac.uk/pub/databases/merops/current_release/pepunit.lib'
        download_file(merops_url, peptidase_faa, verbose=verbose)
    peptidase_mmseqs_db = path.join(output_dir,
                                    'peptidases.%s.mmsdb' % get_iso_date())
    make_mmseqs_db(peptidase_faa,
                   peptidase_mmseqs_db,
                   create_index=True,
                   threads=threads,
                   verbose=verbose)
    return peptidase_mmseqs_db
Beispiel #13
0
def download_and_process_kofam_hmms(kofam_profile_tar_gz=None,
                                    output_dir='.',
                                    verbose=False):
    if kofam_profile_tar_gz is None:
        kofam_profile_tar_gz = path.join(output_dir, 'kofam_profiles.tar.gz')
        download_file('ftp://ftp.genome.jp/pub/db/kofam/profiles.tar.gz',
                      kofam_profile_tar_gz,
                      verbose=verbose)
    kofam_profiles = path.join(output_dir, 'kofam_profiles')
    mkdir(kofam_profiles)
    run_process(['tar', '-xzf', kofam_profile_tar_gz, '-C', kofam_profiles],
                verbose=verbose)
    merged_kofam_profiles = path.join(output_dir, 'kofam_profiles.hmm')
    merge_files(glob(path.join(kofam_profiles, 'profiles', '*.hmm')),
                merged_kofam_profiles)
    run_process(['hmmpress', '-f', merged_kofam_profiles], verbose=verbose)
    return merged_kofam_profiles
Beispiel #14
0
def download_and_process_uniref(uniref_fasta_zipped=None,
                                output_dir='.',
                                uniref_version='90',
                                threads=10,
                                verbose=True):
    """"""
    if uniref_fasta_zipped is None:  # download database if not provided
        uniref_fasta_zipped = path.join(output_dir,
                                        'uniref%s.fasta.gz' % uniref_version)
        uniref_url = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref%s/uniref%s.fasta.gz' % \
                     (uniref_version, uniref_version)
        download_file(uniref_url, uniref_fasta_zipped, verbose=verbose)
    uniref_mmseqs_db = path.join(
        output_dir, 'uniref%s.%s.mmsdb' % (uniref_version, get_iso_date()))
    make_mmseqs_db(uniref_fasta_zipped,
                   uniref_mmseqs_db,
                   create_index=True,
                   threads=threads,
                   verbose=verbose)
    return uniref_mmseqs_db