Esempio n. 1
0
def print_run_bidirectional_blast(reference, other_genome, dbtype, outdir):
    """Write torque submission files for running bidirectional blast on a server and print execution command.

    Args:
        reference (str): Path to "reference" genome, aka your "base strain"
        other_genome (str): Path to other genome which will be BLASTed to the reference
        dbtype (str): "nucl" or "prot" - what format your genome files are in
        outdir (str): Path to folder where Torque scripts should be placed

    """
    # TODO: add force_rerun option

    if dbtype == 'nucl':
        command = 'blastn'
    elif dbtype == 'prot':
        command = 'blastp'
    else:
        raise ValueError('dbtype must be "nucl" or "prot"')

    r_folder, r_name, r_ext = utils.split_folder_and_path(reference)
    g_folder, g_name, g_ext = utils.split_folder_and_path(other_genome)

    # Reference vs genome
    r_vs_g_name = r_name + '_vs_' + g_name
    r_vs_g = r_vs_g_name + '_blast.out'
    if op.exists(op.join(
            outdir, r_vs_g)) and os.stat(op.join(outdir, r_vs_g)).st_size != 0:
        log.debug('{} vs {} BLAST already run'.format(r_name, g_name))
    else:
        cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format(
            command, reference, g_name, r_vs_g)
        utils.write_torque_script(command=cmd,
                                  err=r_vs_g_name,
                                  out=r_vs_g_name,
                                  name=r_vs_g_name,
                                  outfile=op.join(outdir, r_vs_g_name) + '.sh',
                                  walltime='00:15:00',
                                  queue='regular')

    # Genome vs reference
    g_vs_r_name = g_name + '_vs_' + r_name
    g_vs_r = g_vs_r_name + '_blast.out'
    if op.exists(op.join(
            outdir, g_vs_r)) and os.stat(op.join(outdir, g_vs_r)).st_size != 0:
        log.debug('{} vs {} BLAST already run'.format(g_name, r_name))
    else:
        cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format(
            command, other_genome, r_name, g_vs_r)
        utils.write_torque_script(command=cmd,
                                  err=g_vs_r_name,
                                  out=g_vs_r_name,
                                  name=g_vs_r_name,
                                  outfile=op.join(outdir, g_vs_r_name) + '.sh',
                                  walltime='00:15:00',
                                  queue='regular')
Esempio n. 2
0
def run_makeblastdb(infile, dbtype, outdir=''):
    """Make the BLAST database for a genome file.

    Args:
        infile (str): path to genome FASTA file
        dbtype (str): "nucl" or "prot" - what format your genome files are in
        outdir (str): path to directory to output database files (default is original folder)

    Returns:
        Paths to BLAST databases.

    """
    # TODO: add force_rerun option
    # TODO: rewrite using utils function command

    # Output location
    og_dir, name, ext = utils.split_folder_and_path(infile)
    if not outdir:
        outdir = og_dir
    outfile_basename = op.join(outdir, name)

    # Check if BLAST DB was already made
    if dbtype == 'nucl':
        outext = ['.nhr', '.nin', '.nsq']
    elif dbtype == 'prot':
        outext = ['.phr', '.pin', '.psq']
    else:
        raise ValueError('dbtype must be "nucl" or "prot"')
    outfile_all = [outfile_basename + x for x in outext]
    db_made = True
    for f in outfile_all:
        if not op.exists(f):
            db_made = False

    # Run makeblastdb if DB does not exist
    if db_made:
        log.debug(
            'BLAST database already exists at {}'.format(outfile_basename))
        return outfile_all
    else:
        retval = subprocess.call(
            'makeblastdb -in {} -dbtype {} -out {}'.format(
                infile, dbtype, outfile_basename),
            shell=True)
        if retval == 0:
            log.debug('Made BLAST database at {}'.format(outfile_basename))
            return outfile_all
        else:
            log.error('Error running makeblastdb, exit code {}'.format(retval))
Esempio n. 3
0
def run_bidirectional_blast(reference, other_genome, dbtype, outdir=''):
    """BLAST a genome against another, and vice versa.

    This function requires BLAST to be installed, do so by running:
    sudo apt install ncbi-blast+

    Args:
        reference (str): path to "reference" genome, aka your "base strain"
        other_genome (str): path to other genome which will be BLASTed to the reference
        dbtype (str): "nucl" or "prot" - what format your genome files are in
        outdir (str): path to folder where BLAST outputs should be placed

    Returns:
        Paths to BLAST output files.
        (reference_vs_othergenome.out, othergenome_vs_reference.out)

    """
    # TODO: add force_rerun option

    if dbtype == 'nucl':
        command = 'blastn'
    elif dbtype == 'prot':
        command = 'blastp'
    else:
        raise ValueError('dbtype must be "nucl" or "prot"')

    r_folder, r_name, r_ext = utils.split_folder_and_path(reference)
    g_folder, g_name, g_ext = utils.split_folder_and_path(other_genome)

    # make sure BLAST DBs have been made
    run_makeblastdb(infile=reference, dbtype=dbtype, outdir=r_folder)
    run_makeblastdb(infile=other_genome, dbtype=dbtype, outdir=g_folder)

    # Reference vs genome
    r_vs_g = r_name + '_vs_' + g_name + '_blast.out'
    r_vs_g = op.join(outdir, r_vs_g)
    if op.exists(r_vs_g) and os.stat(r_vs_g).st_size != 0:
        log.debug('{} vs {} BLAST already run'.format(r_name, g_name))
    else:
        cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format(
            command, reference, op.join(g_folder, g_name), r_vs_g)
        log.debug('Running: {}'.format(cmd))
        retval = subprocess.call(cmd, shell=True)
        if retval == 0:
            log.debug('BLASTed {} vs {}'.format(g_name, r_name))
        else:
            log.error('Error running {}, exit code {}'.format(command, retval))

    # Genome vs reference
    g_vs_r = g_name + '_vs_' + r_name + '_blast.out'
    g_vs_r = op.join(outdir, g_vs_r)
    if op.exists(g_vs_r) and os.stat(g_vs_r).st_size != 0:
        log.debug('{} vs {} BLAST already run'.format(g_name, r_name))
    else:
        cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format(
            command, other_genome, op.join(r_folder, r_name), g_vs_r)
        log.debug('Running: {}'.format(cmd))
        retval = subprocess.call(cmd, shell=True)
        if retval == 0:
            log.debug('BLASTed {} vs {}'.format(g_name, r_name))
        else:
            log.error('Error running {}, exit code {}'.format(command, retval))

    return r_vs_g, g_vs_r