def test_get_nonexist(self):
     """
     TemplateNotFoundException raised when looking up nonexistent template.
     """
     template_db = TemplateDatabase(str(self.database), str(self.file_root))
     with self.assertRaises(TemplateDatabase.TemplateNotFoundException):
         template_db.get_template("1FOO", "A")
    def test_add_retrieve_del_template(self):
        """Add PDB and template to database."""
        template_db = TemplateDatabase(str(self.database), str(self.file_root))
        template_db.add_pdb("12AS", self._METADATA)
        template_db.add_template(self.sample_template)
        template_db.commit()

        template = template_db.get_template("12AS", "A")
        self.assertEqual(template.canonical_seq,
                         self.sample_template.canonical_seq)
        self.assertEqual(template.canonical_indices,
                         self.sample_template.canonical_indices)
        self.assertEqual(template.mapping, self.sample_template.mapping)

        template_db.del_template("12as", "A")
        template_db.commit()
        with self.assertRaises(template_db.TemplateNotFoundException):
            template_db.get_template("12as", "A")
Example #3
0
class HomologyModeller(Component):
    """
    Generate a homology model from an alignment.

    This modeller requires pre-generated PDB files for the chain specified by
    the ``PDB`` and ``chain`` fields. The PDB template *must* contain the
    ``REMARK`` fields written by
    :py:class:`phyre_engine.component.db.db.ChainPDBBuilder` so that the
    sequence alignment can be correctly mapped onto the PDB structure. The
    ``alignment`` key must contain the mapping between the query sequence and
    the template sequence.

    This component will add the ``model`` key to the pipeline state. This will
    contain a file path pointing to the generated model.

    .. versionchanged:: 0.1a1

        Previously, this component operated on the ``templates`` list, rather
        than on a single hit. Now, it requires the ``query_sequence`` key to be
        present in the pipeline state. You can use
        :py:class:`phyre_engine.component.jmespath.Update` to copy the query
        sequence from the top level of the pipeline state into each template:

        .. code-block:: yaml

            # In the "components" section
            - phyre_engine.component.jmespath.Update:
              select_expr: templates
              value_expr: '{query_sequence: root().sequence}'

    :param str chain_dir: Top-level directory containing the PDB chains.
    :param str model_name: Python template string, formatted with all the
        elements of the pipeline state.

    .. seealso::

        :py:class:`phyre_engine.component.db.db.ChainPDBBuilder`
            For generating PDB files of the correct format for use as templates.
    """

    REQUIRED = ["PDB", "chain", "query_sequence", "alignment"]
    ADDS = ["model"]
    REMOVES = []

    def __init__(self,
                 template_db,
                 chain_dir,
                 model_name="{rank:02d}-{PDB}_{chain}-crude.pdb"):
        self.template_db = TemplateDatabase(template_db, chain_dir)
        self.chain_dir = chain_dir
        self.model_name = model_name

    @classmethod
    def config(cls, params, config):
        """
        Extract ``foldlib.template_db`` and ``foldlib.chain_dir``.

        .. csv-table:: Configuration mapping
            :header: "Section", "Field", "Parameter"

            ``foldlib``,   ``template_db``, ``template_db``
                       ,   ``chain_dir``,   ``chain_dir``
        """
        return config.extract({
            "foldlib": ["template_db", "chain_dir"]
        }).merge_params(params)

    def run(self, data, config=None, pipeline=None):
        """Build a model."""
        pdb_id, chain, query_seq, alignment = self.get_vals(data)
        pdb_io = Bio.PDB.PDBIO()

        model_file = self.model_name.format(**data)

        if not Path(model_file).exists():
            self.logger.debug("Creating model file %s", model_file)
            db_template = self.template_db.get_template(pdb_id, chain)

            model_name = "model from {}_{}".format(pdb_id, chain)
            model_structure = Bio.PDB.Structure.Structure(model_name)
            model_model = Bio.PDB.Model.Model(1)
            model_chain = Bio.PDB.Chain.Chain("A")
            model_model.add(model_chain)
            model_structure.add(model_model)

            for residue_pair in alignment:
                # Residue indices
                i, j = residue_pair[0:2]

                # Residue ID from canonical sequence map
                j_id = db_template.canonical_indices[j - 1]

                # Template residue
                template_res = db_template.chain[j_id]

                query_res_type = Bio.SeqUtils.seq3(query_seq[i - 1]).upper()
                query_res = Bio.PDB.Residue.Residue((" ", i, " "),
                                                    query_res_type, " ")
                for atom in template_res:
                    if atom.get_name() in BACKBONE_ATOMS:
                        query_res.add(atom.copy())
                model_chain.add(query_res)

            pdb_io.set_structure(model_structure)
            pdb_io.save(model_file)
        data["model"] = model_file
        return data