예제 #1
0
 def test_blastp(self):
     """Pairwise BLASTP search."""
     global exe_names
     cline = Applications.NcbiblastpCommandline(
         exe_names["blastp"],
         query="Fasta/rose.pro",
         subject="GenBank/NC_005816.faa",
         evalue=1)
     self.assertEqual(str(cline), _escape_filename(exe_names["blastp"]) +
                      " -query Fasta/rose.pro -evalue 1" +
                      " -subject GenBank/NC_005816.faa")
     child = subprocess.Popen(str(cline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(return_code, 0, "Got error code %i back from:\n%s"
                      % (return_code, cline))
     # Used to get 10 matches from 10 pairwise searches,
     # as of NCBI BLAST+ 2.3.0 only get 1 Query= line:
     if stdoutdata.count("Query= ") == 10:
         if stdoutdata.count("***** No hits found *****") == 7:
             # This happens with BLAST 2.2.26+ which is potentially a bug
             pass
         else:
             self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
     else:
         # Assume this is NCBI BLAST+ 2.3.0 or later,
         self.assertEqual(1, stdoutdata.count("Query= "))
         self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
예제 #2
0
 def test_tblastn(self):
     """Pairwise TBLASTN search"""
     global exe_names
     cline = Applications.NcbitblastnCommandline(
         exe_names["tblastn"],
         query="GenBank/NC_005816.faa",
         subject="GenBank/NC_005816.fna",
         evalue="1e-6")
     self.assertEqual(
         str(cline),
         _escape_filename(exe_names["tblastn"]) +
         " -query GenBank/NC_005816.faa -evalue 1e-6" +
         " -subject GenBank/NC_005816.fna")
     child = subprocess.Popen(str(cline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(
         return_code, 0,
         "Got error code %i back from:\n%s" % (return_code, cline))
     self.assertEqual(10, stdoutdata.count("Query= "))
     self.assertEqual(0, stdoutdata.count("***** No hits found *****"))
예제 #3
0
    def test_fasta_db_nucl(self):
        """Test makeblastdb wrapper with nucleotide database."""
        global exe_names
        cline = Applications.NcbimakeblastdbCommandline(
            exe_names["makeblastdb"],
            input_file="GenBank/NC_005816.fna",
            dbtype="nucl",
            hash_index=True,
            max_file_sz="20MB",
            parse_seqids=True,
            taxid=10)

        self.assertEqual(str(cline),
                         _escape_filename(exe_names["makeblastdb"]) +
                         " -dbtype nucl -in GenBank/NC_005816.fna"
                         " -parse_seqids -hash_index -max_file_sz 20MB"
                         " -taxid 10")

        child = subprocess.Popen(str(cline),
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE,
                                 universal_newlines=True,
                                 shell=(sys.platform != "win32"))
        stdoutdata, stderrdata = child.communicate()
        return_code = child.returncode

        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nhr"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nin"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nog"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.fna.nsq"))
예제 #4
0
 def test_requires_dbtype(self):
     """Check that dbtype throws error if not set."""
     global exe_names
     cline = Applications.NcbimakeblastdbCommandline(
         exe_names["makeblastdb"], input_file="GenBank/NC_005816.faa")
     with self.assertRaises(ValueError):
         str(cline)
예제 #5
0
 def test_blastp(self):
     """Pairwise BLASTP search"""
     global exe_names
     cline = Applications.NcbiblastpCommandline(
         exe_names["blastp"],
         query="Fasta/rose.pro",
         subject="GenBank/NC_005816.faa",
         evalue=1)
     self.assertEqual(str(cline), exe_names["blastp"] \
                      + " -query Fasta/rose.pro -evalue 1" \
                      + " -subject GenBank/NC_005816.faa")
     child = subprocess.Popen(str(cline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     stdoutdata, stderrdata = child.communicate()
     return_code = child.returncode
     self.assertEqual(
         return_code, 0,
         "Got error code %i back from:\n%s" % (return_code, cline))
     self.assertEqual(10, stdoutdata.count("Query= "))
     if stdoutdata.count("***** No hits found *****") == 7:
         #This happens with BLAST 2.2.26+ which is potentially a bug
         pass
     else:
         self.assertEqual(9, stdoutdata.count("***** No hits found *****"))
예제 #6
0
    def test_fasta_db_prot_legacy(self):
        """Test makeblastdb wrapper with protein database legacy, version 4."""
        global exe_names
        cline = Applications.NcbimakeblastdbCommandline(
            exe_names["makeblastdb"],
            blastdb_version=4,
            input_file="GenBank/NC_005816.faa",
            dbtype="prot",
            hash_index=True,
            max_file_sz="20MB",
            parse_seqids=True,
            taxid=10,
        )

        self.assertEqual(
            str(cline),
            _escape_filename(exe_names["makeblastdb"]) + " -blastdb_version 4"
            " -dbtype prot -in GenBank/NC_005816.faa"
            " -parse_seqids -hash_index -max_file_sz 20MB"
            " -taxid 10",
        )

        child = subprocess.Popen(
            str(cline),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            shell=(sys.platform != "win32"),
        )
        stdoutdata, stderrdata = child.communicate()
        return_code = child.returncode

        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phd"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phi"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.phr"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pin"))
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.pog"))
        self.assertTrue(
            os.path.isfile("GenBank/NC_005816.faa.psd")
            or os.path.isfile("GenBank/NC_005816.faa.pnd")
        )
        self.assertTrue(
            os.path.isfile("GenBank/NC_005816.faa.psi")
            or os.path.isfile("GenBank/NC_005816.faa.pni")
        )
        self.assertTrue(os.path.isfile("GenBank/NC_005816.faa.psq"))
예제 #7
0
def _get_alignments(seq_u, atom_u, seq_b, atom_b):
    """
    Get alignment between two chains.

    Does structural and sequence alignment.
    """
    fasta_u = _tmp_fasta(seq_u)
    fasta_b = _tmp_fasta(seq_b)

    # Only use small word size for small sequences.
    # TODO. This is a bit brittle.
    word_size = 2 if len(seq_u) < 10 and len(seq_b) < 10 else 3

    blastp_cline = app.NcbiblastpCommandline(
        subject=fasta_u, query=fasta_b,
        outfmt="'10 qstart qend sstart send qseq sseq ppos evalue'",
        num_alignments=1, word_size=word_size, culling_limit=1, evalue=0.1)
    out, err = blastp_cline()

    # The trailing token is empty.
    alignments = [x for x in out.split('\n') if x]
    b2r, u2r = {}, {}
    b2u, u2b = {}, {}
    aligned_pos_b, aligned_pos_u = [], []
    all_ppos = []
    if len(out) == 0:
        # No hits found.
        return 0.0, float('inf'), (None, None)

    warned = False
    if len(alignments) > 1:
        logging.warning("More than one alignment found.")
    for i, curr in enumerate(alignments):
        start_b, end_b, start_u, end_u, align_b, align_u, ppos, evalue = \
            curr.split(',')
        start_b, end_b = int(start_b), int(end_b)
        start_u, end_u = int(start_u), int(end_u)
#        logging.info('Alignment {:} (score {:}) from {:} to {:} on bound, '
#                     '{:} to {:} on unbound.'.format(
#                         i, evalue, start_b, end_b, start_u, end_u))
        idx_b, idx_u = start_b - 1, start_u - 1
        assert len(align_u) == len(align_b)
        align_size = len(align_u)
        for k in range(align_size):
            if align_b[k] != '-' and align_u[k] != '-':
                if idx_b not in b2u and idx_u not in u2b:
                    b2u[idx_b] = idx_u
                    u2b[idx_u] = idx_b
                    aligned_pos_b.append(idx_b)
                    aligned_pos_u.append(idx_u)
                else:
                    if not warned:
                        logging.warning('ignoring double prediction {:} bound '
                                        'to {:} unbound'.format(idx_u, idx_b))
                        logging.warning('not showing future warnings for this '
                                        'alignment')
                        warned = True
            if align_u[k] != '-':
                idx_u += 1
            if align_b[k] != '-':
                idx_b += 1
        all_ppos.append((align_size, float(ppos)))

    idx_u, idx_b = 0, 0
    idx_r = 1
    u2r, b2r = {}, {}

    while idx_u != len(seq_u) or idx_b != len(seq_b):
        if idx_u in u2b and idx_b in b2u:
            u2r[idx_u] = idx_r
            b2r[idx_b] = idx_r
            idx_u += 1
            idx_b += 1
        elif idx_u not in u2b and idx_u != len(seq_u):
            u2r[idx_u] = idx_r
            idx_u += 1
        elif idx_b not in b2u and idx_b != len(seq_b):
            b2r[idx_b] = idx_r
            idx_b += 1
        idx_r += 1

    total = 0
    total_ppos = 0
    for align_size, ppos in all_ppos:
        total_ppos += ppos * align_size
        total += align_size
    total_ppos /= total

    sup = svd.SVDSuperimposer()
    sup.set(atom_u[aligned_pos_u], atom_b[aligned_pos_b])
    return total_ppos, sup.get_init_rms(), (b2r, u2r)