def print_run_bidirectional_blast(reference, other_genome, dbtype, outdir): """Write torque submission files for running bidirectional blast on a server and print execution command. Args: reference (str): Path to "reference" genome, aka your "base strain" other_genome (str): Path to other genome which will be BLASTed to the reference dbtype (str): "nucl" or "prot" - what format your genome files are in outdir (str): Path to folder where Torque scripts should be placed """ # TODO: add force_rerun option if dbtype == 'nucl': command = 'blastn' elif dbtype == 'prot': command = 'blastp' else: raise ValueError('dbtype must be "nucl" or "prot"') r_folder, r_name, r_ext = utils.split_folder_and_path(reference) g_folder, g_name, g_ext = utils.split_folder_and_path(other_genome) # Reference vs genome r_vs_g_name = r_name + '_vs_' + g_name r_vs_g = r_vs_g_name + '_blast.out' if op.exists(op.join( outdir, r_vs_g)) and os.stat(op.join(outdir, r_vs_g)).st_size != 0: log.debug('{} vs {} BLAST already run'.format(r_name, g_name)) else: cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format( command, reference, g_name, r_vs_g) utils.write_torque_script(command=cmd, err=r_vs_g_name, out=r_vs_g_name, name=r_vs_g_name, outfile=op.join(outdir, r_vs_g_name) + '.sh', walltime='00:15:00', queue='regular') # Genome vs reference g_vs_r_name = g_name + '_vs_' + r_name g_vs_r = g_vs_r_name + '_blast.out' if op.exists(op.join( outdir, g_vs_r)) and os.stat(op.join(outdir, g_vs_r)).st_size != 0: log.debug('{} vs {} BLAST already run'.format(g_name, r_name)) else: cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format( command, other_genome, r_name, g_vs_r) utils.write_torque_script(command=cmd, err=g_vs_r_name, out=g_vs_r_name, name=g_vs_r_name, outfile=op.join(outdir, g_vs_r_name) + '.sh', walltime='00:15:00', queue='regular')
def run_makeblastdb(infile, dbtype, outdir=''): """Make the BLAST database for a genome file. Args: infile (str): path to genome FASTA file dbtype (str): "nucl" or "prot" - what format your genome files are in outdir (str): path to directory to output database files (default is original folder) Returns: Paths to BLAST databases. """ # TODO: add force_rerun option # TODO: rewrite using utils function command # Output location og_dir, name, ext = utils.split_folder_and_path(infile) if not outdir: outdir = og_dir outfile_basename = op.join(outdir, name) # Check if BLAST DB was already made if dbtype == 'nucl': outext = ['.nhr', '.nin', '.nsq'] elif dbtype == 'prot': outext = ['.phr', '.pin', '.psq'] else: raise ValueError('dbtype must be "nucl" or "prot"') outfile_all = [outfile_basename + x for x in outext] db_made = True for f in outfile_all: if not op.exists(f): db_made = False # Run makeblastdb if DB does not exist if db_made: log.debug( 'BLAST database already exists at {}'.format(outfile_basename)) return outfile_all else: retval = subprocess.call( 'makeblastdb -in {} -dbtype {} -out {}'.format( infile, dbtype, outfile_basename), shell=True) if retval == 0: log.debug('Made BLAST database at {}'.format(outfile_basename)) return outfile_all else: log.error('Error running makeblastdb, exit code {}'.format(retval))
def run_bidirectional_blast(reference, other_genome, dbtype, outdir=''): """BLAST a genome against another, and vice versa. This function requires BLAST to be installed, do so by running: sudo apt install ncbi-blast+ Args: reference (str): path to "reference" genome, aka your "base strain" other_genome (str): path to other genome which will be BLASTed to the reference dbtype (str): "nucl" or "prot" - what format your genome files are in outdir (str): path to folder where BLAST outputs should be placed Returns: Paths to BLAST output files. (reference_vs_othergenome.out, othergenome_vs_reference.out) """ # TODO: add force_rerun option if dbtype == 'nucl': command = 'blastn' elif dbtype == 'prot': command = 'blastp' else: raise ValueError('dbtype must be "nucl" or "prot"') r_folder, r_name, r_ext = utils.split_folder_and_path(reference) g_folder, g_name, g_ext = utils.split_folder_and_path(other_genome) # make sure BLAST DBs have been made run_makeblastdb(infile=reference, dbtype=dbtype, outdir=r_folder) run_makeblastdb(infile=other_genome, dbtype=dbtype, outdir=g_folder) # Reference vs genome r_vs_g = r_name + '_vs_' + g_name + '_blast.out' r_vs_g = op.join(outdir, r_vs_g) if op.exists(r_vs_g) and os.stat(r_vs_g).st_size != 0: log.debug('{} vs {} BLAST already run'.format(r_name, g_name)) else: cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format( command, reference, op.join(g_folder, g_name), r_vs_g) log.debug('Running: {}'.format(cmd)) retval = subprocess.call(cmd, shell=True) if retval == 0: log.debug('BLASTed {} vs {}'.format(g_name, r_name)) else: log.error('Error running {}, exit code {}'.format(command, retval)) # Genome vs reference g_vs_r = g_name + '_vs_' + r_name + '_blast.out' g_vs_r = op.join(outdir, g_vs_r) if op.exists(g_vs_r) and os.stat(g_vs_r).st_size != 0: log.debug('{} vs {} BLAST already run'.format(g_name, r_name)) else: cmd = '{} -query {} -db {} -outfmt 6 -out {}'.format( command, other_genome, op.join(r_folder, r_name), g_vs_r) log.debug('Running: {}'.format(cmd)) retval = subprocess.call(cmd, shell=True) if retval == 0: log.debug('BLASTed {} vs {}'.format(g_name, r_name)) else: log.error('Error running {}, exit code {}'.format(command, retval)) return r_vs_g, g_vs_r