Exemple #1
0
def get_FUZZLE_hhs(domain):
    """
    :param domain: str. The domain to download from Fuzzle as hhs

    :return: filepath: path where the file is located.
    """
    logger.info(
        f'Attempting to download hhs file for {domain} from the FUZZLE server')
    url = f'https://fuzzle.uni-bayreuth.de/hhs/scop95_2.07.psi.hhs/{domain}.hhs'
    connected = False
    while not connected:
        try:
            response = urllib.request.urlopen(url)
            text = response.read()
        except Exception as e:
            import time
            logger.warning(
                f'Failed to connect to FUZZLE with error {e}. Sleeping 5s and retrying.'
            )
            time.sleep(5)
            continue
        connected = True
    filepath = string_to_tempfile(text.decode('ascii'), 'hhs')
    logger.info(f"File downloaded as {filepath}")

    return filepath
Exemple #2
0
def _domain_from_PDB(pdb: str):
    """ Returns all domains included in a SCOP version that come from a PDB.

    :param pdb: A 4-letter pdb code
    :return: A list of domains that this PDB contains
    """
    pdb = pdb.lower()
    cur.execute(
        f"SELECT sdomain FROM scop_pdbref_scop WHERE pdbref like '{pdb}'")
    hits = cur.fetchall()
    domains = tuple(np.unique([x[0] for x in hits]))
    if domains:
        if len(domains) == 1:
            domains = str("('" + domains[0] + "')")
        cur.execute(f"SELECT ref FROM astral_astral95 WHERE id in {domains}")
        hits = cur.fetchall()
        domains = np.unique([x[0] for x in hits])
        logger.info(f"Domain(s) {domains} are present in the Fuzzle database"
                    f" as representatives for pdb {pdb.capitalize()}")
    else:
        from protlego.builder.builder import NotCorrectPDBError
        raise NotCorrectPDBError(
            f"PDB code {pdb.capitalize()} does not yet appear in the SCOP database"
        )

    return domains
Exemple #3
0
 def unique_clusters(self):
     """ It returns a list of unique domains"""
     unique_clusters = np.unique([(hit.q_cluster, hit.s_cluster)
                                  for hit in self.hits])
     logger.info(
         f"The query contains {len(unique_clusters)} different clusters")
     return unique_clusters
Exemple #4
0
 def unique_domains(self):
     """ It returns a list of unique domains"""
     unique_domains = np.unique([(hit.query, hit.sbjct)
                                 for hit in self.hits])
     logger.info(
         f"The query contains {len(unique_domains)} different domains")
     return unique_domains
Exemple #5
0
    def _superimpose_chunk(self, qpairs: list, spairs: list, qmol: Molecule, smol: Molecule) -> Tuple[
        Molecule, np.ndarray]:
        """
        Superimposes the part of the total sequence alignment defined
        by the indexes qpairs/spairs
        :param qpairs: list of indexes to align from the pdb query
        :param spairs: list of indexes to align from the pdb subject
        :param qmol: the query pdb
        :param smol: the subject pdb
        :return: smol: the subject molecule aligned.
                distances: a list of the distances between the alpha Carbons after alignment.
        """

        # Copying because we are going to cut the pdbs into the chunks
        copyq = qmol.copy()
        copys = smol.copy()

        copyq.filter('protein and backbone and same residue as index %s' % ' '.join(map(str, qpairs)))
        copys.filter('protein and backbone and same residue as index %s' % ' '.join(map(str, spairs)))
        copyq.write('/tmp/copyq.pdb')
        copys.write('/tmp/copys.pdb')

        # Matrix for VMD
        try:
            # We align subject onto the query
            tm_matrix = get_tmalign_output('/tmp/copys.pdb', '/tmp/copyq.pdb', "matrix")
        except Exception as e:
            raise ChildProcessError(f"TMalign cannot align the PDBs. Error follows: {e}")
        vectran, matrot = tm2vmd(tm_matrix)

        # remove copy files
        os.remove('/tmp/copyq.pdb')
        os.remove('/tmp/copys.pdb')

        # align the whole subject domain and fragment.
        # Copying so that the original smol does not lose the origin of coordinates
        s1mol = smol.copy()
        s1mol.rotateBy(matrot)
        s1mol.moveBy(vectran)
        copys.rotateBy(matrot)
        copys.moveBy(vectran)

        # Compute RMSD for the fragments
        rmsd = MetricRmsd(copyq, 'protein and name CA', pbc=False)
        data = rmsd.project(copys)
        logger.info(f"The RMSD between the fragments is {data} over {len(spairs)} alpha carbons")

        # Compute distances between the two selections
        bbq = copyq.get("coords", sel="protein and name CA")
        bbs = copys.get("coords", sel="protein and name CA")
        distances = np.diagonal(cdist(bbq, bbs))
        return s1mol, distances
Exemple #6
0
 def fragments(self):
     """
     This function creates and prints out the number of fragments in the graph
     :rtype: integer, number of fragments
     """
     if not self.graph:
         logger.info(
             " You need to create a network first before computing its sizes."
             " Call create_network(). Exiting...")
         return
     self.comp, hist = label_components(self.graph)
     self.numFrags = max(self.comp.a) + 1
     logger.info("There are ", self.numFrags, " fragments")
     return self.comp
Exemple #7
0
 def find_nonstandards(pdb: Molecule) -> list:
     """
     Finds non-standard aminoacids
     :param pdb: Molecule or Chimera object where to find non-standard aminoacids.
     :return: list of non-standard residues
     """
     non_standards = [aa for aa in np.unique(pdb.resname) if (aa in aa_keys or aa not in standard_aas)]
     if non_standards:
         for i in non_standards:
             if i != 'UNK':
                 logger.info(f"Found the following non-standar residue: {i}. "
                             f"Preserving in the original PDB")
             else:
                 logger.warning("Protein presents unknown residue UNK."
                                " Call remove_residue() to remove it or provide parameters"
                                " if you want to minimize it with AMBER or CHARMM.")
     return non_standards
Exemple #8
0
    def superimpose_structures(self, aln: HHpredHitAlignment, partial_alignment: bool = False):
        """ Moves the two molecules to the origin of coordinates.
            Aligns the full structures according the fragment and obtains RMSDs and distances.

        :param qpairs: List of CA to be aligned in the query structure
        :param spairs: List of CA to be aligned in the subject structure
        :return:
        """

        self._get_pairs(aln)

        # Re-align if command is called twice
        if self.qaPDB:
            self.qaPDB = {}
            self.saPDB = {}
            self.dst = []

        if partial_alignment is False:
            qpairs = self.global_qpairs
            spairs = self.global_spairs
        else:
            qpairs = self.qpairs
            spairs = self.spairs

        # Print info if the alignment was intended partial but there's only one chunk
        if len(qpairs) == 1 and partial_alignment is True:
            logger.info("The sequence alignment only contains one chunk. Performing global alignment")

        # We only need one query, center to the origin of coordinates.
        # It is the subject that aligns to this template.
        qmol = self.qPDB.copy()
        smol = self.sPDB.copy()
        qmol.center()
        smol.center()
        self.qaPDB[0] = qmol.copy()

        for index, qpair_chunk in enumerate(qpairs):
            logger.info(f"Performing alignment {index+1} with TMalign")
            saPDB, distance = self._superimpose_chunk(qpair_chunk, spairs[index], qmol, smol)
            self.dst.append(distance)
            self.saPDB[index] = saPDB.copy()

        self.global_dst = [[item for chunk in self.dst for item in chunk]]
        return self.qaPDB, self.saPDB
Exemple #9
0
    def compute_hydrophobic_clusters(
            self,
            chain: str = 'A',
            sel:
        str = "protein and not backbone and noh and resname ILE VAL LEU",
            cutoff_area: float = 10):
        """
        :param chain: Chain in the PDB to compute the hydrophobic clusters. Examples: "A", "A B C". Default: "A"
        :param sel: VMD selection on which to compute the clusters. Default is every sidechain heavy atom ILE, VAL and LEU residues. "protein and not backbone and noh and resname ILE VAL LEU"

        :return: A representation for each cluster
        """
        clusters = None

        # Removing previous visualizations
        [
            self.reps.remove(index)
            for index, rep in reversed(list(enumerate(self.reps.replist)))
        ]

        resids = np.unique(self.get("resid", sel=f"{sel} and chain {chain}"))
        dims = len(resids)
        indices = self.get("index", sel=f"{sel} and chain {chain}")
        dims_indices = len(self.get("index", sel=f"protein and chain {chain}"))

        logger.info("Initializing final output")
        contacts = np.zeros((dims, dims))
        atoms_to_atoms = np.zeros((dims_indices, dims_indices))
        logger.info("Computing clusters")
        for index in indices:
            a = Atom(index, self)
            if not a.neighbor_indices.any():
                continue
            _, contacts = fill_matrices(a, self, atoms_to_atoms, contacts,
                                        indices, resids)
        graph = create_graph(contacts, resids, cutoff_area=cutoff_area)
        comp, _ = label_components(graph)
        if comp.a.any():
            clusters = add_clusters(self, graph, comp)
        else:
            logger.warning(
                "There are not residues in contact for this selection")

        return clusters
Exemple #10
0
 def __init__(self, hit: Hit):
     qpdb_path = get_SCOP_domain(hit.query)
     spdb_path = get_SCOP_domain(hit.sbjct)
     logger.info(f'Loading {qpdb_path} as a chimera object') 
     self.qPDB = Chimera(qpdb_path, validateElements=False)
     os.remove(qpdb_path)
     if self.qPDB.numFrames > 1:
         self.qPDB.dropFrames(keep=0)
         logger.info("Query protein contains more than one model. Keeping only the first one")
     logger.info(f'Loading {spdb_path} as a chimera object')
     self.sPDB = Chimera(spdb_path, validateElements=False)
     os.remove(spdb_path)
     if self.sPDB.numFrames > 1:
         self.sPDB.dropFrames(keep=0)
         logger.info("Subject protein contains more than one model. Keeping only the first one")
     self.qaPDB, self.saPDB = {}, {}
     self.qpairs,self.spairs = [], []
     self.dst = []
     self.chim_positions = {}