Beispiel #1
0
def blast_all(query, subject, xml_dir, n_proc):
    """Perform a reciprocal all-vs-all BLAST search between two
    proteomes.

    Args:
        query (str): filename of first proteome in FASTA format.
        subject (str): filename of second proteome in FASTA format.
        xml_dir (str): directory for BLAST output in XML format.
        n_proc (int): number of threads for BLAST search.
    """
    subj_id = os.path.splitext(os.path.basename(subject))[0]

    forward = NcbiblastpCommandline(query=query,
                                    db=subject,
                                    evalue=BLAST_EVALUE,
                                    outfmt=5,
                                    num_threads=n_proc,
                                    out=os.path.join(xml_dir,
                                                     "%s_f.xml" % subj_id))
    out, err = forward()

    reverse = NcbiblastpCommandline(query=subject,
                                    db=query,
                                    evalue=BLAST_EVALUE,
                                    outfmt=5,
                                    num_threads=n_proc,
                                    out=os.path.join(xml_dir,
                                                     "%s_r.xml" % subj_id))
    out, err = reverse()
Beispiel #2
0
    def runBlast(self, queryFile, db, outFile, evalue = 10,
                    task = '', ncpus = 1, additional = ''):
        '''Run Blast with the desired parameters'''
        # Create the command line
        from Bio.Blast.Applications import NcbiblastpCommandline
        self._out = outFile
        cmd = NcbiblastpCommandline(query=queryFile, db=db,
                evalue=float(evalue),
                outfmt='5',out=outFile,
                num_threads=ncpus)
        if task != '':
            cmd.set_parameter('task', task)
        if additional !='':
            cmd = str(cmd)+' '+additional
        cmd=str(cmd)
        logger.debug('Run Blast cmd: %s'%cmd)
        # Run Blast and check the return code
        proc = subprocess.Popen(cmd,shell=(sys.platform!="win32"),
                    stdin=subprocess.PIPE,stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE)
        out = proc.communicate()
        return_code = proc.returncode
        if return_code != 0:
            logger.warning('Run Blast failed with error %d'
                            %return_code)

        return bool(not return_code)
def search_hit(record, cutoff):
    cwd = os.getcwd()
    blast_cmd = '/usr/local/bin/blastp'
    blast_db = '%s/HA_db/HA_pdb' % cwd
    SeqIO.write(record, "blast_query.fas", "fasta")
    blast_output = NcbiblastpCommandline(cmd=blast_cmd, db=blast_db, evalue=cutoff, outfmt=6, query="blast_query.fas")()[0] #Blast command
    blast_list = blast_output.rstrip('\s').split('\n')[:-1]

    blast_header = [
                    'query_id',
                    'subject_id',
                    'identity',
                    'alignment_length',
                    'mismatches',
                    'gap_opens',
                    'q_start',
                    'q_end',
                    's_start',
                    's_end',
                    'evalue',
                    'bit score'
                    ]

    record_dict = {record.id : []}

    for tmp in blast_list:
        bl = tmp.split('\t')
        type_set(bl)
        bl_dict = dict(zip(blast_header, bl))
        record_dict[record.id].append(bl_dict)
    ordered_dict_list = sorted(record_dict[record.id], key=itemgetter('identity'), reverse=True)
    record_dict[record.id] = ordered_dict_list
    return record_dict
def getBlastScoreRatios(FASTAfile, allelescore, queryDef, databasePath, queryProteomeName, referenceGenomeArray, referenceCDS, isXML):
    
    alleleProt=''
    proteome=""
    countP=0
    countCDS=0

    if isXML == 'True':
        blast_out_file = 'BLASTresults.xml'

        cline = NcbiblastpCommandline(query=queryProteomeName, db=databasePath, out=blast_out_file, outfmt=5, num_alignments=7000, num_descriptions=7000)

        print 'BSR:'
            #print cline
        blast_records = runBlastParser(cline,blast_out_file, False)

        startTime = datetime.now()
        ToNewAllele = parseBLASTRecordsXML(blast_records, allelescore, queryDef, referenceGenomeArray, referenceCDS)
        print 'CheckResults:' + str(datetime.now() - startTime)

    else:
        blast_out_file = 'BLASTresults.tab'

        cline = NcbiblastpCommandline(query=queryProteomeName, db=databasePath, out=blast_out_file, outfmt=6, num_alignments=7000, num_descriptions=7000)

        print 'BSR:'
        blast_records = runBlastParserTAB(cline,blast_out_file, False)

        startTime = datetime.now()
        ToNewAllele = parseBLASTRecordsTAB(blast_records, allelescore, queryDef, referenceGenomeArray, referenceCDS)
        print 'CheckResults:' + str(datetime.now() - startTime)

    os.remove(queryProteomeName)

    return ToNewAllele
Beispiel #5
0
def fetch_indentity_from_local(seq):
    def extract_prot_id(string):
        s = string.split('|')[2]
        s = s.split(' ')[1]
        return s

    result = []
    record = SeqRecord(Seq(seq), id="tmp", name="", description="")
    SeqIO.write(record, "tmp.fastaa", "fasta")

    NcbiblastpCommandline(query='tmp.fastaa', db='_data_/_db_/HUMAN_DB', outfmt=5, out='blastp_human_output.xml')()
    NcbiblastpCommandline(query='tmp.fastaa', db='_data_/_db_/RODENTS_DB', outfmt=5, out='blastp_rodents_output.xml')()

    result_handle = open("blastp_human_output.xml")
    b_record = NCBIXML.read(result_handle)
    for alignment in b_record.alignments:
        for hsp in alignment.hsps:
            if hsp.positives == hsp.identities:
                result.append(extract_prot_id(alignment.title))

    result_handle = open("blastp_rodents_output.xml")
    b_record = NCBIXML.read(result_handle)
    for alignment in b_record.alignments:
        for hsp in alignment.hsps:
            if hsp.positives == hsp.identities:
                result.append(extract_prot_id(alignment.title))

    return ";".join(result)
Beispiel #6
0
def local_blast(query, blast_type, db, remote=True, **kwargs):
    """Function to run with the local blast program"""

    logging.info("Running blast locally with {} and {}".format(query, db))

    if remote:
        blast_cline = NcbiblastpCommandline(query=query,
                                            db=db,
                                            remote=True,
                                            out="blast.out",
                                            outfmt="5",
                                            evalue=0.001,
                                            **kwargs)
    else:
        blast_cline = NcbiblastpCommandline(query=query,
                                            db=db,
                                            outfmt="5",
                                            out="blast.out",
                                            evalue=0.001,
                                            **kwargs)
    print(blast_cline)
    stdout, stderr = blast_cline()
    logging.debug(stderr)
    blast_out = open("blast.out", "w")
    blast_record = NCBIXML.read(blast_out)
    return blast_record
Beispiel #7
0
def runBlastp(query_seq_faa, subject_faa, header=["sseqid", "evalue", "bitscore"], debug=False):
    """
    Run blastp on querry_seq vs subectj faa and return output based on header
    Use NcbiblastpCommandline fct and extract output
    Extract 1st best hit based on bitscore

    Parameters
    ----------
    query_seq_faa: str
        path to query fasta sequence
    subject_faa: str
        path to subject fasta sequence
    header: list
        output format of blastp
    debug: bool
        if true print all raw blastp output

    Returns
    -------
    dict
        dict of the best blastp hit, add 'blastp_' tag, or empty dict if no hit
    """
    if debug:
        print("\tRunning Blastp %s vs %s" %(os.path.basename(query_seq_faa), os.path.basename(subject_faa)))
    outfmt_arg = '"%s %s"'%(6, " ".join(header))
    output = NcbiblastpCommandline(query=query_seq_faa, subject=subject_faa, evalue=1e-20, outfmt=outfmt_arg)()[0]
    output = [line.split("\t") for line in output.splitlines()]
    blastp_result = {}
    count = 0
    for line in output:
        count += 1
        blastp_result[count] = {}
        for (index, h) in enumerate(header):
            blastp_result[count]["blastp_"+h] = line[index]
    try:
        max_bitscore = max([float(hsp["blastp_bitscore"]) for hsp in blastp_result.values()])
    except ValueError:
        max_bitscore = "N.A"
    if debug:
        print("\t\tAll hsp from blastp:")
        if blastp_result.values():
            for hsp in blastp_result.values():
                print(hsp)
            print("\t\tMax biscore: %s"%max_bitscore)
        else:
            print("\t\tNo HPS")
    try:
        result = [hsp for hsp in blastp_result.values() if float(hsp["blastp_bitscore"]) == max_bitscore][0]
    except IndexError:
        result = {}
    return result
Beispiel #8
0
def blast_sequences(comp_seq, ref_seq):
    '''
    Perform BLAST of two protein sequences using NCBI BLAST+ package.

    Output is two dictionaries: residue numbering in PDB chain (key) mapped to
    the residue position in the reference sequence (value), and vice versa.

    Notes:
        User must have NCBI BLAST+ package installed in user's PATH.

    Args:
        comp_seq (str): A comparison protein sequence.
        ref_seq (str): A reference protein sequence.

    Returns:
        dict: A dictionary mapping comparison sequence numbering (key) to
            reference sequence numbering (value)
        dict: A dictionary mapping reference sequence numbering (key) to
            comparison sequence numbering (value)
    '''
    with tempfile.NamedTemporaryFile(mode='w') as comp_seq_file, \
         tempfile.NamedTemporaryFile(mode='w') as ref_seq_file:
        comp_seq_file.write(">\n" + str(comp_seq) + "\n")
        ref_seq_file.write(">\n" + str(ref_seq) + "\n")
        ref_seq_file.flush()
        comp_seq_file.flush()
        blastp_cline = NcbiblastpCommandline(query=comp_seq_file.name,
                                             subject=ref_seq_file.name,
                                             evalue=0.001,
                                             outfmt=5)
        alignment, _stderror = blastp_cline()
    blast_xml = StringIO(alignment)
    blast_record = NCBIXML.read(blast_xml)
    temp_score = 0
    high_scoring_hsp = None
    #Retrieve highest scoring HSP
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            if hsp.score > temp_score:
                temp_score = hsp.score
                high_scoring_hsp = hsp
    #Create dictionary mapping position in PDB chain to position in ref sequence
    pdb_to_ref = {}
    ref_to_pdb = {}
    if high_scoring_hsp is not None:
        query_string = high_scoring_hsp.query
        sbjct_string = high_scoring_hsp.sbjct
        key = high_scoring_hsp.query_start
        ref = high_scoring_hsp.sbjct_start
        for i, res in enumerate(query_string):
            if res.isalpha() and sbjct_string[i].isalpha():
                pdb_to_ref[key] = ref
                ref_to_pdb[ref] = key
                key += 1
                ref += 1
            elif res.isalpha():
                key += 1
            elif sbjct_string[i].isalpha():
                ref += 1
    return pdb_to_ref, ref_to_pdb
Beispiel #9
0
    def run_blastp(self):
        from Bio.Blast.Applications import NcbiblastpCommandline
        import os

        blast_id = self.id_generator(8)

        outpath = os.path.join(self.working_dir, '%s.tab' % blast_id)
        blastp_cline = NcbiblastpCommandline(query=self.query,
                                             db=self.database,
                                             evalue=0.005,
                                             outfmt=6,
                                             out=outpath,
                                             num_threads=8)
        print(blastp_cline)
        stdout, stderr = blastp_cline()
        print(stderr)

        with open(outpath, 'r') as result_handle:

            self.best_hit_list = []
            self.complete_hit_list = []
            for line in result_handle:
                self.complete_hit_list.append(line.rstrip().split('\t'))
                if line.split('\t')[0] in self.best_hit_list:
                    continue
                else:
                    self.best_hit_list.append(line.rstrip().split('\t'))

        return outpath
Beispiel #10
0
def callProc(gene, seqfile, dbfile, outfile):
    spec = seqfile.replace('_prot.fa', '')

    #Make fasta file of the individual protein
    seqiter = SeqIO.parse(open(seqfile), 'fasta')
    SeqIO.write((seq for seq in seqiter if seq.id in gene),
                "temp" + gene + ".fa", "fasta")

    blastp_cline = NcbiblastpCommandline(query="temp" + gene + ".fa",
                                         db=dbfile,
                                         evalue=1e-10,
                                         outfmt=5,
                                         out="blast" + gene + ".xml")

    stdout, stderr = blastp_cline()

    result_handle = open("blast" + gene + ".xml")
    blast_record = NCBIXML.read(result_handle)
    E_VALUE_THRESH = 1e-10

    #Get first match
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            if hsp.expect < E_VALUE_THRESH:
                with open(outfile, 'a') as f:
                    f.write(gene + '\t' + alignment.title + '\n')
                break
        break

    #Remove temporary files
    call(["rm", "temp" + gene + ".fa", "blast" + gene + ".xml"])
Beispiel #11
0
def blast(cmd, query, db, **kwargs):

    outfmt = "'6 {}'".format(' '.join(TABULAR_BLAST_FIELDS.keys()))
    ext = '.tsv'

    out_file = Path(query.parent, 'result{}'.format(ext))

    if cmd == 'blastn':
        blast_on_db = NcbiblastnCommandline(query=str(query), db=str(db),
                                            out=str(out_file), outfmt=outfmt, **kwargs)
    elif cmd == 'blastp':
        blast_on_db = NcbiblastpCommandline(query=str(query), db=str(db),
                                            out=str(out_file), outfmt=outfmt, **kwargs)
    elif cmd == 'blastx':
        blast_on_db = NcbiblastxCommandline(query=str(query), db=str(db),
                                            out=str(out_file), outfmt=outfmt, **kwargs)
    elif cmd == 'tblastn':
        blast_on_db = NcbitblastnCommandline(query=str(query), db=str(db),
                                             out=str(out_file), outfmt=outfmt, **kwargs)
        
    else:
        sys.exit(f'Unknown command: {cmd}')
    print(blast_on_db)
    blast_on_db()

    return out_file
def _runBlast(query, outfilename, db, evalue=1, maxthreads=cpu_count()):
    '''
	Private method to run Blast. This may take a long time (several minutes up to half-an-hour depending on the computer power and the size of the query and the database.

	:param query: The filename of the query sequence (may include a path)
	:param outfilename: The filename (including path) to save the result to
	:param db: The database file
	:param evalue: The e-value cut-off (should usually be very high, as the results will be filtered out afterwards)
	:param maxthreads: The maximum number of threads to use by Blast
	:creates: `outfilename`
	'''

    cmd = NcbiblastpCommandline(query=query,
                                db=db,
                                evalue=evalue,
                                outfmt=5,
                                out=outfilename,
                                num_threads=maxthreads,
                                max_target_seqs=20000)
    stdout, stderr = cmd()

    print('Blasting', query, 'done')
    if stdout:
        print(stdout)
    if stderr:
        print(stderr, file=sys.stderr)
    else:
        print('No errors')
Beispiel #13
0
def blastp(blast_file, blast_db, blast_path, blastoutput_xml):
    """
    Run a BLASTP search with blast_file against blast_db
    
    Args:
        blast_file (str):	  Path to fasta file used to BLAST against blast_db
        blast_db (str):		  Name of the database to BLAST against 
        blast_path (str):	  Path to the blastp program
        blastoutput_csv (str): location of the blast output csv file
    
    Returns:
        An iterable of blast records as returned by NCBIXML.parse
    """
    def cline(blastp_cline, blastoutput_xml):
        blastp_cline()
        result_handle = open(blastoutput_xml)
        blast_records = NCBIXML.parse(result_handle)
        return blast_records

    blastp_cline = NcbiblastpCommandline(blast_path + '/blastp',
                                         query=blast_file,
                                         subject=blast_db,
                                         outfmt=5,
                                         out=blastoutput_xml)

    return cline(blastp_cline, blastoutput_xml)
Beispiel #14
0
def blast(FASTA, PFAM, OUT):
    """
    Make a blast with a evalue of 10000
    
    FASTA = the path to FASTA file
    PFAM = The BLAST DB PFAM Database (protein)
    OUT = Output blast repport in XML format (outfmt = 5)
    """
    size = False
    cpt = 0
    while size == False:
        blastp_cline = NcbiblastpCommandline(query=FASTA, db=PFAM, evalue=10000,outfmt=5, out=OUT)
        cmd = str(blastp_cline)
        if FLAG:
            CREATE_NO_WINDOW = 0x08000000
            a = subprocess.call(cmd, creationflags=CREATE_NO_WINDOW)
        else:
            a = subprocess.call(cmd, shell=True)

        #check the size
        #if the file size is not correct the while loop will continue
        if os.stat(OUT).st_size != 0:
            break
        cpt +=1
        if cpt == 100:
            print('Trying 100 iteration, not working, exit for '+FASTA)
def run():
    blast_exe = "/usr/bin/blastp"
    # blast_exe="/Users/qingye/Downloads/ncbi-blast-2.7.1+/bin/blastp"
    make_db_output = o
    blast_out_path = x
    a = os.walk(p)

    blast_valid_file_list_first = []
    for path, dir_list, file_list in a:
        for file_name in file_list:
            prog = re.compile('^\d')
            result = prog.match(file_name)
            if (result):
                blast_valid_file_list_first.append(file_name)
            else:
                print('Invalid File Name :' + file_name)
        for file_name in blast_valid_file_list_first:
            blast_query_path = path + file_name
            blast_name = file_name[0:(len(file_name) - 6)]
            des_blast_file = blast_out_path + blast_name + '.xml'
            blastp_cline = NcbiblastpCommandline(blast_exe,
                                                 query=blast_query_path,
                                                 db=make_db_output,
                                                 evalue=0.001,
                                                 outfmt=5,
                                                 out=des_blast_file,
                                                 max_target_seqs=10)
            subprocess.call(str(blastp_cline),
                            stdout=subprocess.PIPE,
                            shell=True)
Beispiel #16
0
def blast(record, qcovs, pident, dir_query, subject):
    '''
	Iterando sobre un archivo fasta, realiza un blastp de cada query contra un
	archivo multifasta que actúa como subject. Filtra por valores de pident,
    qcovs y evalue.
    '''
    query_file = record.id + "query_file.fasta"
    with open(query_file, "w") as query:
        query.write(">{}\n{}\n".format(record.id, record.seq))
    blast_nofiltered = record.id + "_blast_nf.tsv"
    blast_filtered = dir_query + record.id + "_blast.tsv"
    blast_cline = NcbiblastpCommandline(
        cmd='blastp',
        query=query_file,
        subject=subject,
        evalue=1e-5,
        qcov_hsp_perc=qcovs,
        outfmt='6 sseqid pident qcovs evalue sseq',
        out=blast_nofiltered)
    stdout, stderr = blast_cline()

    subject_ids = pident_filter(blast_nofiltered, pident, blast_filtered)
    os.remove(blast_nofiltered)
    blast_fasta = record.id + "blast_fasta.fa"
    blast_parser_to_fasta(subject_ids, blast_fasta, query_file,
                          "multifasta.txt")
    os.remove(query_file)
    return
Beispiel #17
0
def globalRun(d_dataset, p_dir_blast, debug=1):

    for PDB_ID in d_dataset.keys():
        if d_dataset[PDB_ID]["conserve"] == 1:
            p_fasta = d_dataset[PDB_ID]["best"]["fasta"]
            p_out_blast = p_dir_blast + PDB_ID + ".xml"
            blastp_cline = NcbiblastpCommandline(query=p_fasta,
                                                 db="pdb",
                                                 outfmt=5,
                                                 out=p_out_blast)
            if debug: print blastp_cline
            if not path.exists(p_out_blast):
                stdout, stderr = blastp_cline()
            d_dataset[PDB_ID]["xml"] = p_out_blast
            d_dataset[PDB_ID]["align"] = {}
            # parse blast out
            result_handle = open(p_out_blast)
            blast_records = NCBIXML.read(result_handle)
            for alignment in blast_records.alignments:
                for hsp in alignment.hsps:
                    #                     print alignment.title
                    PDB_find = alignment.title.split("|")[4].split(" ")[0]
                    d_dataset[PDB_ID]["align"][PDB_find] = hsp.expect

            result_handle.close()
Beispiel #18
0
def query_blastpdb(query, db_dir, output_file, cols, threads=2):
    NcbiblastpCommandline(query=query,
                          db=db_dir,
                          out=output_file,
                          evalue=1e-6,
                          outfmt="'6 {}'".format(" ".join(cols)),
                          num_threads=threads)()
Beispiel #19
0
def write_blast(str1, str2, name1, name2):
    '''
    Blast two sequences in fasta format

    Input:
        str1: the first sequence string
        str2: the second sequence string
        name1: the first sequence name
        name2: the second sequence name

    Return: None
    '''
    seq1 = SeqRecord(Seq(str1),
                   id=name1)
    seq2 = SeqRecord(Seq(str2),
                   id=name2)
    SeqIO.write(seq1, "seq1.fasta", "fasta")
    SeqIO.write(seq2, "seq2.fasta", "fasta")
    output = NcbiblastpCommandline(query="seq1.fasta", subject="seq2.fasta", outfmt=5)()[0]
    blast_result_record = NCBIXML.read(StringIO(output))
    for alignment in blast_result_record.alignments:
        for hsp in alignment.hsps:
            print('****Alignment****')
            print('sequence:', alignment.title)
            print('length:', alignment.length)
            print('e value:', hsp.expect)
            print(hsp.query)
            print(hsp.match)
            print(hsp.sbjct)
Beispiel #20
0
def blastp(user_query, user_subject, user_evalue):

    ncbi_blastp = NcbiblastpCommandline(query=user_query,
                                        subject=user_subject,
                                        evalue=user_evalue)
    return ncbi_blastp(
    )  # Returns blastp query output in tuple of form (stdout, stderr)
Beispiel #21
0
    def blastPToPDB(self, dbPATH):

        retBool = True
        OFH = getOutputTempFile()
        try:
            #wraps the input sequence into an input temporary file, then executes the blastP
            IFH = getInputTempFile(self.info['sequence'])


            cline = NcbiblastpCommandline(query=IFH.name, db=dbPATH,\
                                          out=OFH.name,\
                                          outfmt='"10 qcovs evalue bitscore sseqid qlen qstart qend qseq slen sstart send sseq qcovhsp"')

            cline()
            IFH.close()
        except ApplicationError:
            retBool = False
            IFH.close()
            OFH.close()

        #returns the table and whether or not the blastP ran successfully
        ret = {'Success': retBool}
        if retBool:
            ret['Table'] = OFH.read()
            OFH.close()
        return ret
def collect_blast_data(query, subject, id_list, out):
    """" Runs the BLASTp job
        Parses the output
        Imputes missing values"""

    # run the BLASTp job
    print 'Running BLASTp job...'
    blastp_cline = NcbiblastpCommandline(query=query, subject=subject, evalue=1e-3, outfmt=6, out=out)
    stdout, stderr = blastp_cline()

    # read & filter the blast results
    df_master = pd.read_table(out, header=None)
    blast_header = 'qseqid sseqid pident len mm gapopen qstart qend sstart send evalue bitscore'.strip().split(' ')
    df_master.columns = blast_header

    # tidy the table
    df = df_master[df_master['pident'] != 100.0]  # remove identical hits
    df = df.drop_duplicates(subset=['qseqid'], keep='first')  # only keep top hit per query
    df = df.drop(['sseqid', 'mm', 'gapopen', 'qstart', 'qend', 'sstart', 'send'], axis=1)

    print 'Imputing missing data...'
    # impute missing values, for queries where no alignment found
    # qseqid = id, pident = 0, len = 0, mm = evalue = 1, bitscore =0
    tmp = pd.DataFrame([[seq, 0, 0, 1, 0] for seq in id_list if seq not in df.values[:, 0]])
    tmp.columns = df.columns
    df = pd.concat([df, tmp])
    df = df.sort_values(by=df.columns[0])
    print 'Collected BLAST df: {}'.format(df.shape)

    return df
Beispiel #23
0
def Run_blast(local=False, infile=None, outfile=None, max_hits=100):

    # Read the input file and get the sequence
    print('\nBlasting sequence from ' + infile + '...')
    print('Maximum number of hits set to ' + str(max_hits))
    with open(infile, "r") as f:
        in_file = f.read().strip()

    if local == False:

        # Blast the sequence over the internet (you may have to wait) and
        # save the results in a qblast object
        result_handle = NCBIWWW.qblast("blastp",
                                       "nr",
                                       in_file,
                                       hitlist_size=max_hits)

        # Open the output file then read the results object and write to
        # the output file
        print('\tDone: writing to ' + outfile)

        with open(outfile, "w") as out_handle:
            out_handle.write(result_handle.read())

    if local == True:

        cmd = NcbiblastpCommandline(query=in_file,
                                    db='nr',
                                    outfmt=5,
                                    out=outfile)
        cmd
Beispiel #24
0
def run_blastp(match, blastdb):
    """run blastp"""
    from Bio.Blast.Applications import NcbiblastpCommandline
    for feature in match.features:
        rec = None
        fasta = feature.protein_fasta()
        if fasta == "":
            continue
        try:
            cline = NcbiblastpCommandline(db=blastdb, outfmt=5, num_threads=4)
            pipe = subprocess.Popen(str(cline),
                                    shell=True,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            pipe.stdin.write(fasta)
            pipe.stdin.close()
            recs = NCBIXML.parse(pipe.stdout)
            rec = recs.next()
            pipe.stdout.close()
            pipe.stderr.close()
        except OSError, err:
            logging.warning("Failed to run blastp: %s" % err)
            continue
        except ValueError, err:
            logging.warning("Parsing blast output failed: %s" % err)
            continue
 def blast_alleles(self, runmetadata, amino_acid):
     """
     Run the BLAST analyses on the query
     :param runmetadata: List of metadata objects for each query
     :param amino_acid: Boolean of whether the query sequence is amino acid or nucleotide
     """
     logging.info('Running BLAST analyses')
     for sample in runmetadata.samples:
         if not amino_acid:
             blast = NcbiblastnCommandline(db=os.path.splitext(self.combined_targets)[0],
                                           query=sample.general.bestassemblyfile,
                                           num_alignments=100000000,
                                           evalue=0.001,
                                           num_threads=self.cpus,
                                           task='blastn',
                                           outfmt=self.outfmt,
                                           out=sample.alleles.blast_report)
         else:
             blast = NcbiblastpCommandline(query=sample.general.bestassemblyfile,
                                           db=os.path.splitext(self.combined_targets)[0],
                                           evalue=0.001,
                                           num_alignments=100000000,
                                           num_threads=self.cpus,
                                           outfmt=self.outfmt,
                                           out=sample.alleles.blast_report)
         blast()
Beispiel #26
0
def blastp_analyze():
    tmpFileName = TMP_QUERY_FOLDER + "query-" + str(random.randint(1, 99999))
    tmpFile = open(tmpFileName, "w")
    tmpFile.write(request.args.get('query'))
    tmpFile.close()
    query = tmpFileName
    db = UPLOAD_FOLDER + request.args.get('db')
    result_file = 'tmp_res'
    outFormat = request.args.get('outputFormat')
    blastx_cline = NcbiblastpCommandline(query=query,
                                         db=db,
                                         evalue=50.0,
                                         outfmt=outFormat,
                                         out=TMP_RESULT_FOLDER + result_file,
                                         matrix="PAM30")
    stdout, stderr = blastx_cline()
    #f = open(TMP_RESULT_FOLDER+result_file ,'r')
    result = ""
    with open(TMP_RESULT_FOLDER + result_file) as f:
        for line in f:
            #if(outFormat==7):
            #    words=line.split()
            #    result = result+'<tr>'
            #    for c in words:
            #        result = result +'<td>'+c+'</td>'
            #    result = result+'</tr>'
            #else:
            result = result + line
    f.close()
    os.remove(tmpFileName)
    return result
Beispiel #27
0
 def blast_iden(self, threads=2):
     shutil.copyfile(self.db, os.path.join(self.tmp_dir, 'database.fasta'))
     makeblastdb_cline = NcbimakeblastdbCommandline(dbtype='prot',
                                                    input_file=os.path.join(
                                                        self.tmp_dir,
                                                        'database.fasta'))
     blastp_cline = NcbiblastpCommandline(
         query=self.seed,
         db=os.path.join(self.tmp_dir, 'database.fasta'),
         evalue='1e-5',
         outfmt="6 qacc sacc qlen slen length pident evalue",
         max_hsps=1,
         num_threads=threads,
         out=os.path.join(self.tmp_dir, 'blast.tbl'))
     makeblastdb_cline()
     blastp_cline()
     blast_result = pd.read_table(os.path.join(self.tmp_dir, 'blast.tbl'),
                                  header=None,
                                  names=[
                                      'qacc', 'sacc', 'qlen', 'slen',
                                      'length', 'pident', 'evalue'
                                  ])
     blast_result = blast_result[
         (blast_result['pident'] > 50)
         & (blast_result['length'] / blast_result['slen'] > 0.5) &
         (blast_result['length'] / blast_result['qlen'] > 0.5)]
     blast_result.to_csv(os.path.join(self.tmp_dir, 'blast2.tbl'),
                         sep='\t',
                         index=False)
     seq_list = [
         _ for _ in SeqIO.parse(self.db, 'fasta')
         if _.id in blast_result['sacc'].to_list()
     ]
     SeqIO.write(seq_list, os.path.join(self.tmp_dir, 'subgenes.fasta'),
                 'fasta')
Beispiel #28
0
def gfg(): 
    if request.method == "POST": 
        # getting input with name = seq in HTML form 
        
        ip_sequence = request.form.get("seq")
        ip_type = request.form.get("ip_type")
        blast_type = request.form.get("blast_type")
        database_type = request.form.get("database")
        my_blast_db = request.form.get("db_typeo")
        e_value_thresh = request.form.get("evalue")
        e_value_thresh = float(e_value_thresh)
        
        #default e-value
        if e_value_thresh=="":
            e_value_thresh=0.05
        
    
        if ip_type =="fastq":
            seq_id = ip_sequence.split("\n")[0] #sequence id only
            seq_fasta = "".join(ip_sequence.split("\n")[1]) #gives only sequence
            
            fasta_seq= seq_id + "\n" + seq_fasta
            
        elif ip_type =="fasta":
            seq_id = ip_sequence.split("\n")[0]
            seq_fasta = ip_sequence.split("\n")[1]
            fasta_seq= "\n".join(ip_sequence.split("\n")[1:])
            
        if my_blast_db=="":
            print("1")
            #blast over internet
            result_handle=NCBIWWW.qblast(blast_type, database_type, fasta_seq)
            with open("outputhtml.xml", "w") as save_to:
                save_to.write(result_handle.read())
                result_handle.close()
        else:
            #local blast
            #if loop for each blast type:
            if blast_type=="blastn":
                result_handle=NcbiblastnCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml")
            elif blast_type=="blastp":
                result_handle=NcbiblastpCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml")
            elif blast_type=="blastx":
                result_handle=NcbiblastxCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml")
            elif blast_type=="tblastx":
                result_handle=NcbitblastxCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml")
            elif blast_type=="tblastn":
                result_handle=NcbitblastnCommandline(cmd=blast_type, query=fasta_seq, db=my_blast_db, evalue=e_value_thresh, out="outputhtml.xml")
        #blast parsing
        blast_records = NCBIXML.parse(result_handle)
        
        with open("outputhtml.xml") as f:
            blast_records = NCBIXML.parse(f)
            blast_record = list(blast_records)[0]
        
        return render_template("output.html", 
                                blast_record=blast_record, 
                                e_value_threshold=e_value_thresh)
    
    return render_template("input.html") 
Beispiel #29
0
    def run(self):
        # windows path
        # blast_exe = str(Path().absolute()) + "\tools\ncbi-blast-2.9.0+\bin\blastp"
        # make_db_output = str(Path().absolute()) + "\database\blast_database"
        blast_exe = str(Path().absolute())+"/tools/ncbi-blast-2.9.0+/bin/blastp"
        make_db_output = str(Path().absolute())+"/database/blast_database"

        blast_out_path = self.blast_run
        a = os.walk(self.queries)
        blast_valid_file_list_first = []
        for path,dir_list,file_list in a:
            for file_name in file_list:
                prog = re.compile('^\d')
                result = prog.match(file_name)
                if (result):
                    blast_valid_file_list_first.append(file_name)
                else:
                    print ('Invalid File Name :' + file_name)
            for file_name in blast_valid_file_list_first:
                blast_query_path = path + file_name
                blast_name = file_name[0:(len(file_name)-6)]
                blast_xml = blast_out_path + blast_name + '.xml'
                blastp_cline = NcbiblastpCommandline(blast_exe, query=blast_query_path, db=make_db_output, evalue=0.001,
                                                     outfmt=5, out=blast_xml, max_target_seqs=10)
                subprocess.call(str(blastp_cline), stdout=subprocess.PIPE, shell=True)
 def _reverse_blast_iden(self, threads):
     os.symlink(os.path.abspath(self.ref),
                os.path.join(self.tmp_dir, 'ref.fasta'))
     makeblastdb_cline = NcbimakeblastdbCommandline(dbtype='prot',
                                                    input_file=os.path.join(
                                                        self.tmp_dir,
                                                        'ref.fasta'))
     blastp_cline = NcbiblastpCommandline(
         query=os.path.join(self.tmp_dir, 'putative.fasta'),
         db=os.path.join(self.tmp_dir, 'ref.fasta'),
         evalue='1e-3',
         outfmt="6 qacc sacc qlen slen length pident evalue",
         max_hsps=1,
         num_threads=threads,
         out=os.path.join(self.tmp_dir, 'reverse_blast.tbl'))
     makeblastdb_cline()
     blastp_cline()
     blast_result = pd.read_table(os.path.join(self.tmp_dir,
                                               'reverse_blast.tbl'),
                                  header=None,
                                  names=[
                                      'qacc', 'sacc', 'qlen', 'slen',
                                      'length', 'pident', 'evalue'
                                  ])
     seq_idx_lst = [
         _[0] for _ in enumerate(blast_result['sacc'].to_list())
         if _[1] in self.seed
     ]
     _tmp_lst = blast_result['qacc'].to_list()
     seq_lst = [_tmp_lst[_] for _ in seq_idx_lst]
     return set(seq_lst), blast_result[blast_result['qacc'].isin(seq_lst)]
Beispiel #31
0
def call_blast(fastafilename,
               dbpath,
               n=100000,
               blastresultsfile=None,
               evalue=100):
    if blastresultsfile is None:
        blastresultsfilename = '.'.join(
            str(fastafilename).split('.')[:-1]) + "_blast.xml"
        blastresultsfile = pathlib.Path(blastresultsfilename)

    cline = NcbiblastpCommandline(query=str(fastafilename),
                                  db=dbpath,
                                  remote=False,
                                  out=str(blastresultsfile),
                                  outfmt="5",
                                  evalue=evalue,
                                  max_target_seqs=n)
    print('Running:', cline)
    stdout, stderr = cline()
    if stdout:
        print('stdout:')
        print(stdout)
    if stderr:
        print('stderr')
        print(stderr)

    return blastresultsfile
Beispiel #32
0
def do_blast(inputName, organism, outputName,eVal):
    blastp_cline = NcbiblastpCommandline(query=inputName,
                                         db=organism,
                                         outfmt=5,
                                         out=outputName,
                                         evalue=eVal)
    stdout, stderr = blastp_cline()