예제 #1
0
def download_and_process_viral_refseq(merged_viral_faas=None,
                                      output_dir='.',
                                      viral_files=2,
                                      threads=10,
                                      verbose=True):
    """Can only download newest version"""
    # download all of the viral protein files, need to know the number of files
    # TODO: Make it so that you don't need to know number of viral files in refseq viral

    if merged_viral_faas is None:  # download database if not provided
        faa_base_name = 'viral.%s.protein.faa.gz'
        viral_faa_glob = path.join(output_dir, faa_base_name % '*')
        for number in range(viral_files):
            number += 1
            refseq_url = 'ftp://ftp.ncbi.nlm.nih.gov/refseq/release/viral/viral.%s.protein.faa.gz' % number
            refseq_faa = path.join(output_dir, faa_base_name % number)
            download_file(refseq_url, refseq_faa, verbose=verbose)

        # then merge files from above
        merged_viral_faas = path.join(output_dir,
                                      'viral.merged.protein.faa.gz')
        run_process([
            'cat %s > %s' % (' '.join(glob(viral_faa_glob)), merged_viral_faas)
        ],
                    shell=True)

    # make mmseqs database
    refseq_viral_mmseqs_db = path.join(
        output_dir, 'refseq_viral.%s.mmsdb' % get_iso_date())
    make_mmseqs_db(merged_viral_faas,
                   refseq_viral_mmseqs_db,
                   create_index=True,
                   threads=threads,
                   verbose=verbose)
    return refseq_viral_mmseqs_db
예제 #2
0
def download_and_process_kofam_ko_list(kofam_ko_list_gz=None,
                                       output_dir='.',
                                       verbose=False):
    if kofam_ko_list_gz is None:
        kofam_ko_list_gz = path.join(output_dir, 'kofam_ko_list.tsv.gz')
        download_file('ftp://ftp.genome.jp/pub/db/kofam/ko_list.gz',
                      kofam_ko_list_gz,
                      verbose=verbose)
    # TODO: fix this so that it is gunzipped to the path
    kofam_ko_list = path.join(output_dir, 'kofam_ko_list.tsv')
    run_process(['gunzip', kofam_ko_list_gz], verbose=verbose)
    return kofam_ko_list
예제 #3
0
def download_and_process_dbcan(dbcan_hmm=None,
                               output_dir='.',
                               dbcan_release='8',
                               verbose=True):
    if dbcan_hmm is None:  # download database if not provided
        dbcan_hmm = path.join(output_dir,
                              'dbCAN-HMMdb-V%s.txt' % dbcan_release)
        download_file(
            'http://bcb.unl.edu/dbCAN2/download/Databases/dbCAN-HMMdb-V%s.txt'
            % dbcan_release,
            dbcan_hmm,
            verbose=verbose)
    run_process(['hmmpress', '-f', dbcan_hmm], verbose=verbose)
    return dbcan_hmm
예제 #4
0
def download_and_process_vogdb(vog_hmm_targz=None,
                               output_dir='.',
                               vogdb_release='latest',
                               verbose=True):
    if vog_hmm_targz is None:
        vog_hmm_targz = path.join(output_dir, 'vog.hmm.tar.gz')
        vogdb_url = 'http://fileshare.csb.univie.ac.at/vog/%s/vog.hmm.tar.gz' % vogdb_release
        download_file(vogdb_url, vog_hmm_targz, verbose=verbose)
    hmm_dir = path.join(output_dir, 'vogdb_hmms')
    mkdir(hmm_dir)
    vogdb_targz = tarfile.open(vog_hmm_targz)
    vogdb_targz.extractall(hmm_dir)
    vog_hmms = path.join(output_dir, 'vog_%s_hmms.txt' % vogdb_release)
    merge_files(glob(path.join(hmm_dir, 'VOG*.hmm')), vog_hmms)
    run_process(['hmmpress', '-f', vog_hmms], verbose=verbose)
    return vog_hmms
예제 #5
0
def download_and_process_kofam_hmms(kofam_profile_tar_gz=None,
                                    output_dir='.',
                                    verbose=False):
    if kofam_profile_tar_gz is None:
        kofam_profile_tar_gz = path.join(output_dir, 'kofam_profiles.tar.gz')
        download_file('ftp://ftp.genome.jp/pub/db/kofam/profiles.tar.gz',
                      kofam_profile_tar_gz,
                      verbose=verbose)
    kofam_profiles = path.join(output_dir, 'kofam_profiles')
    mkdir(kofam_profiles)
    run_process(['tar', '-xzf', kofam_profile_tar_gz, '-C', kofam_profiles],
                verbose=verbose)
    merged_kofam_profiles = path.join(output_dir, 'kofam_profiles.hmm')
    merge_files(glob(path.join(kofam_profiles, 'profiles', '*.hmm')),
                merged_kofam_profiles)
    run_process(['hmmpress', '-f', merged_kofam_profiles], verbose=verbose)
    return merged_kofam_profiles
예제 #6
0
def process_mmspro(full_alignment,
                   output_dir,
                   db_name='db',
                   threads=10,
                   verbose=True):
    mmseqs_msa = path.join(output_dir, '%s.mmsmsa' % db_name)
    run_process(['mmseqs', 'convertmsa', full_alignment, mmseqs_msa],
                verbose=verbose)
    mmseqs_profile = path.join(output_dir, '%s.mmspro' % db_name)
    run_process([
        'mmseqs', 'msa2profile', mmseqs_msa, mmseqs_profile, '--match-mode',
        '1', '--threads',
        str(threads)
    ],
                verbose=verbose)
    tmp_dir = path.join(output_dir, 'tmp')
    run_process([
        'mmseqs', 'createindex', mmseqs_profile, tmp_dir, '-k', '5', '-s', '7',
        '--threads',
        str(threads)
    ],
                verbose=verbose)
    return mmseqs_profile
예제 #7
0
def test_run_process():
    run_process(['echo', 'Hello', 'World'], verbose=True)
    assert True