def test_blastp(self): """Pairwise BLASTP search.""" global exe_names cline = Applications.NcbiblastpCommandline( exe_names["blastp"], query="Fasta/rose.pro", subject="GenBank/NC_005816.faa", evalue=1) self.assertEqual(str(cline), _escape_filename(exe_names["blastp"]) + " -query Fasta/rose.pro -evalue 1" + " -subject GenBank/NC_005816.faa") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual(return_code, 0, "Got error code %i back from:\n%s" % (return_code, cline)) # Used to get 10 matches from 10 pairwise searches, # as of NCBI BLAST+ 2.3.0 only get 1 Query= line: if stdoutdata.count("Query= ") == 10: if stdoutdata.count("***** No hits found *****") == 7: # This happens with BLAST 2.2.26+ which is potentially a bug pass else: self.assertEqual(9, stdoutdata.count("***** No hits found *****")) else: # Assume this is NCBI BLAST+ 2.3.0 or later, self.assertEqual(1, stdoutdata.count("Query= ")) self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
def test_tblastn(self): """Pairwise TBLASTN search""" global exe_names cline = Applications.NcbitblastnCommandline( exe_names["tblastn"], query="GenBank/NC_005816.faa", subject="GenBank/NC_005816.fna", evalue="1e-6") self.assertEqual( str(cline), _escape_filename(exe_names["tblastn"]) + " -query GenBank/NC_005816.faa -evalue 1e-6" + " -subject GenBank/NC_005816.fna") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual( return_code, 0, "Got error code %i back from:\n%s" % (return_code, cline)) self.assertEqual(10, stdoutdata.count("Query= ")) self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
def test_fasta_db_nucl(self): """Test makeblastdb wrapper with nucleotide database.""" global exe_names cline = Applications.NcbimakeblastdbCommandline( exe_names["makeblastdb"], input_file="GenBank/NC_005816.fna", dbtype="nucl", hash_index=True, max_file_sz="20MB", parse_seqids=True, taxid=10) self.assertEqual(str(cline), _escape_filename(exe_names["makeblastdb"]) + " -dbtype nucl -in GenBank/NC_005816.fna" " -parse_seqids -hash_index -max_file_sz 20MB" " -taxid 10") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhr")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nin")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nog")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsq"))
def test_requires_dbtype(self): """Check that dbtype throws error if not set.""" global exe_names cline = Applications.NcbimakeblastdbCommandline( exe_names["makeblastdb"], input_file="GenBank/NC_005816.faa") with self.assertRaises(ValueError): str(cline)
def test_blastp(self): """Pairwise BLASTP search""" global exe_names cline = Applications.NcbiblastpCommandline( exe_names["blastp"], query="Fasta/rose.pro", subject="GenBank/NC_005816.faa", evalue=1) self.assertEqual(str(cline), exe_names["blastp"] \ + " -query Fasta/rose.pro -evalue 1" \ + " -subject GenBank/NC_005816.faa") child = subprocess.Popen(str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32")) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertEqual( return_code, 0, "Got error code %i back from:\n%s" % (return_code, cline)) self.assertEqual(10, stdoutdata.count("Query= ")) if stdoutdata.count("***** No hits found *****") == 7: #This happens with BLAST 2.2.26+ which is potentially a bug pass else: self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
def test_fasta_db_prot_legacy(self): """Test makeblastdb wrapper with protein database legacy, version 4.""" global exe_names cline = Applications.NcbimakeblastdbCommandline( exe_names["makeblastdb"], blastdb_version=4, input_file="GenBank/NC_005816.faa", dbtype="prot", hash_index=True, max_file_sz="20MB", parse_seqids=True, taxid=10, ) self.assertEqual( str(cline), _escape_filename(exe_names["makeblastdb"]) + " -blastdb_version 4" " -dbtype prot -in GenBank/NC_005816.faa" " -parse_seqids -hash_index -max_file_sz 20MB" " -taxid 10", ) child = subprocess.Popen( str(cline), stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=(sys.platform != "win32"), ) stdoutdata, stderrdata = child.communicate() return_code = child.returncode self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phd")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phi")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phr")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pin")) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pog")) self.assertTrue( os.path.isfile("GenBank/NC_005816.faa.psd") or os.path.isfile("GenBank/NC_005816.faa.pnd") ) self.assertTrue( os.path.isfile("GenBank/NC_005816.faa.psi") or os.path.isfile("GenBank/NC_005816.faa.pni") ) self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.psq"))
def _get_alignments(seq_u, atom_u, seq_b, atom_b): """ Get alignment between two chains. Does structural and sequence alignment. """ fasta_u = _tmp_fasta(seq_u) fasta_b = _tmp_fasta(seq_b) # Only use small word size for small sequences. # TODO. This is a bit brittle. word_size = 2 if len(seq_u) < 10 and len(seq_b) < 10 else 3 blastp_cline = app.NcbiblastpCommandline( subject=fasta_u, query=fasta_b, outfmt="'10 qstart qend sstart send qseq sseq ppos evalue'", num_alignments=1, word_size=word_size, culling_limit=1, evalue=0.1) out, err = blastp_cline() # The trailing token is empty. alignments = [x for x in out.split('\n') if x] b2r, u2r = {}, {} b2u, u2b = {}, {} aligned_pos_b, aligned_pos_u = [], [] all_ppos = [] if len(out) == 0: # No hits found. return 0.0, float('inf'), (None, None) warned = False if len(alignments) > 1: logging.warning("More than one alignment found.") for i, curr in enumerate(alignments): start_b, end_b, start_u, end_u, align_b, align_u, ppos, evalue = \ curr.split(',') start_b, end_b = int(start_b), int(end_b) start_u, end_u = int(start_u), int(end_u) # logging.info('Alignment {:} (score {:}) from {:} to {:} on bound, ' # '{:} to {:} on unbound.'.format( # i, evalue, start_b, end_b, start_u, end_u)) idx_b, idx_u = start_b - 1, start_u - 1 assert len(align_u) == len(align_b) align_size = len(align_u) for k in range(align_size): if align_b[k] != '-' and align_u[k] != '-': if idx_b not in b2u and idx_u not in u2b: b2u[idx_b] = idx_u u2b[idx_u] = idx_b aligned_pos_b.append(idx_b) aligned_pos_u.append(idx_u) else: if not warned: logging.warning('ignoring double prediction {:} bound ' 'to {:} unbound'.format(idx_u, idx_b)) logging.warning('not showing future warnings for this ' 'alignment') warned = True if align_u[k] != '-': idx_u += 1 if align_b[k] != '-': idx_b += 1 all_ppos.append((align_size, float(ppos))) idx_u, idx_b = 0, 0 idx_r = 1 u2r, b2r = {}, {} while idx_u != len(seq_u) or idx_b != len(seq_b): if idx_u in u2b and idx_b in b2u: u2r[idx_u] = idx_r b2r[idx_b] = idx_r idx_u += 1 idx_b += 1 elif idx_u not in u2b and idx_u != len(seq_u): u2r[idx_u] = idx_r idx_u += 1 elif idx_b not in b2u and idx_b != len(seq_b): b2r[idx_b] = idx_r idx_b += 1 idx_r += 1 total = 0 total_ppos = 0 for align_size, ppos in all_ppos: total_ppos += ppos * align_size total += align_size total_ppos /= total sup = svd.SVDSuperimposer() sup.set(atom_u[aligned_pos_u], atom_b[aligned_pos_b]) return total_ppos, sup.get_init_rms(), (b2r, u2r)