コード例 #1
0
ファイル: analysis.py プロジェクト: dhingra-s/protlego
def calc_contact_order(chimera: Chimera = None, filename: str = None, diss_cutoff: int = 8):
    """
    The contact order of a protein is a measure of the locality of the inter-amino acid contacts in the
    native folded state. It is computed as the average seqeuence distance between residues that form contacts
    below a threshold in the folded protein divided by the total length of the protein"
    :param chimera: A Chimera object with n residues.
    :param filename: path to a pdb file
    :param diss_cutoff: The maximum distance in Armstrong between two residues to be in contact, default 8 Angstroms
    :return: the contact order (%)
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if filename:
        chimera = Chimera(filename=filename)
    chimera.renumberResidues()
    metr = MetricSelfDistance("protein and noh", groupsel="residue", metric="contacts", threshold=diss_cutoff,
                              pbc=False)
    a = metr.project(chimera)
    mapping = metr.getMapping(chimera)
    matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes)
    triang = np.triu(matrix)
    idx1, idx2 = np.where(triang)
    total_contacts = len(idx1)
    total_residues = chimera.numResidues
    summation = np.sum(idx2 - idx1)
    co = 1 / (total_contacts * total_residues) * summation
    print(f"Contact order is {co*100} %")
    return co * 100
コード例 #2
0
ファイル: builder.py プロジェクト: Hoecker-Lab/protlego
 def __init__(self, hit: Hit):
     qpdb_path = get_SCOP_domain(hit.query)
     spdb_path = get_SCOP_domain(hit.sbjct)
     logger.info(f'Loading {qpdb_path} as a chimera object') 
     self.qPDB = Chimera(qpdb_path, validateElements=False)
     os.remove(qpdb_path)
     if self.qPDB.numFrames > 1:
         self.qPDB.dropFrames(keep=0)
         logger.info("Query protein contains more than one model. Keeping only the first one")
     logger.info(f'Loading {spdb_path} as a chimera object')
     self.sPDB = Chimera(spdb_path, validateElements=False)
     os.remove(spdb_path)
     if self.sPDB.numFrames > 1:
         self.sPDB.dropFrames(keep=0)
         logger.info("Subject protein contains more than one model. Keeping only the first one")
     self.qaPDB, self.saPDB = {}, {}
     self.qpairs,self.spairs = [], []
     self.dst = []
     self.chim_positions = {}
コード例 #3
0
ファイル: analysis.py プロジェクト: dhingra-s/protlego
def calc_dist_matrix(chimera: Chimera = None, filename: str = None, selection: str = 'residue', type='contacts',
                     plot=False):
    """
    Returns a matrix of C-alpha distances for a given pdb
    :param chimera: A Chimera object with n residues.
    :param filename: path to a pdb file
    :param selection: How to compute the distance. 'residue' (the closest two
    :param type: between contacts (contact map when distances are below 8 armstrongs) or distances atoms between two residues) or 'alpha' distance of the alpha carbons.
    :param plot: whether to plot the distance matrix. Default is False
    :return: matrix. np.array. An n by n distance matrix.
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if filename:
        chimera = Chimera(filename=filename)

    if selection == 'residue':
        metr = MetricSelfDistance("protein", groupsel="residue", metric="distances", pbc=False)
        mapping = metr.getMapping(chimera)
        a = metr.project(chimera)
        matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes)
    elif selection == 'alpha':
        metr = MetricSelfDistance("protein and name CA", metric="distances", pbc=False)
        a = metr.project(chimera)
        mapping = metr.getMapping(chimera)
        matrix, _, _ = contactVecToMatrix(a, mapping.atomIndexes)
    else:
        raise ValueError("Specify a selection type: 'residue' or 'atom'")
    if type == "contacts":
        matrix = matrix < 8
    elif type != "contacts" and type != "distances":
        raise ValueError("Please select contact type between 'contacts' or distances")

    if plot:
        fig = plt.figure(figsize=(12, 12))
        ax = fig.add_subplot(111)
        cmap = 'binary'
        cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower")
        if type == 'distances':
            cmap = 'gist_rainbow'
            cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower")
            cbar = fig.colorbar(cax, cmap=matplotlib.cm.get_cmap(cmap))
        plt.xlabel('xlabel', fontsize=24)
        plt.ylabel('ylabel', fontsize=24)
        plt.xticks(fontsize=22)
        plt.yticks(fontsize=22)
        plt.xlabel("Residue index")
        plt.ylabel("Residue index")

    return matrix
コード例 #4
0
ファイル: builder.py プロジェクト: Hoecker-Lab/protlego
    def _construct_chimera(self, qmol, smol, qstart, qend, sstart, send, combination):
        """
        :param qmol: Molecule. The query protein
        :param smol: Molecule. The subject protein in any of its positions
        :param qstart: int. Position to start the cut in the query
        :param qend: int. Position to end the cut in the query
        :param sstart: int. Position to start the cut in the sbjct
        :param send: int. Position to end the cut in the sbjct.
        :return: Molecule, DataFrame Objects.
        chim1: The resulting chimera
        mapping: The mapping from the old residue numbering to the new one
        """
        qmol_copy = qmol.copy()
        smol_copy = smol.copy()
        qmol_copy.filter(f"(protein and same residue as index '{qstart}' to '{qend}')\
         or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')")
        smol_copy.filter(f"(protein and same residue as index '{sstart}' to '{send}')\
         or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')")
        # Avoid chimeras that only have a few mutations from
        # one of the parents
        qmol_resid = qmol_copy.get("resid", sel="protein and name CA")
        smol_resid = smol_copy.get("resid", sel="protein and name CA")
        if qmol_resid.size < 10 or smol_resid.size < 10:
            raise NotDiverseChimeraError

        bbq = qmol_copy.get("coords", sel=f"protein and backbone")
        bbs = smol_copy.get("coords", sel=f"protein and backbone")

        distances = cdist(bbq, bbs)
        idx1, idx2 = np.where(distances < 1.3)
        if idx1.any() or idx2.any():
            raise BackboneClashError
        else:
            chim1 = Chimera()
            qmol_copy.renumberResidues()
            smol_copy.renumberResidues()

            if combination == 1:
                last_id = smol_resid[-1] + 1
                new_ids = get_new_resIDs(qmol_copy, last_id)
                qmol_copy.set("resid", new_ids)
                chim1.append(smol_copy)
                chim1.append(qmol_copy)
            else:
                last_id = qmol_resid[-1] + 1
                new_ids = get_new_resIDs(smol_copy, last_id)
                smol_copy.set("resid", new_ids)
                chim1.append(qmol_copy)
                chim1.append(smol_copy)
            chim1.set("chain", "A", "all")
        return chim1, last_id
コード例 #5
0
ファイル: networks.py プロジェクト: dhingra-s/protlego
 def show_vertex(self, vertex: Graph.vertex) -> Chimera:
     """
     Shows the protein that corresponds to that specific vertex with the
     fragment colored in red
     :param vertex: A Graph.vertex object. The domain to be shown,
     :return: A Chimera object with an internal representation of the fragment
     """
     graph = self.graph
     domain = graph.vp.domain[vertex]
     start = int(round(np.mean(graph.vp.start[vertex])))
     end = int(round(np.mean(graph.vp.end[vertex])))
     domain_path = get_SCOP_domain(domain)
     mol = Chimera(filename=domain_path, validateElements=False)
     mol.renumberResidues()
     mol.reps.add(sel='protein', style='NewCartoon', color=8)
     mol.reps.add(sel=f"protein and resid '{start}' to '{end}'",
                  style='NewCartoon',
                  color=1)
     mol.view(name=domain)
     return mol
コード例 #6
0
def minimize_potential_energy(
        chimera,
        ff: str,
        output: str = "/tmp/build",
        keep_output_files=True,
        cuda=False,
        restraint_backbone: bool = True
) -> Tuple[unit.quantity.Quantity, Chimera]:
    """
    :param chimera: A chimera object where to perform the minimization
    :param forcefield: The forcefield to use for the minimization. Select between "amber" and "charmm"
    :param output: A folder where to keep the files. If not provided they will be stored in the /tmp folder and later removed.
    :param cuda: Whether to use GPU acceleration
    :param restraint_backbone: Keep the backbone atoms constraint in space

    :return: The chimera object that was minimized and the potential energy value.
    """

    if not os.path.exists(output):
        os.mkdir(output)

    smol = prepare_protein(chimera)
    smol.write(f"{output}/protein.pdb")
    pdb = PDBFile(f"{output}/protein.pdb")
    parm = load_file(f"{output}/protein.pdb")
    modeller = Modeller(pdb.topology, pdb.positions)

    if ff == 'amber':
        forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
    if ff == 'charmm':
        forcefield = ForceField('charmm36.xml', 'charmm36/tip3p-pme-b.xml')

    modeller.addSolvent(forcefield, padding=1.0 * unit.nanometer)
    system = forcefield.createSystem(modeller.topology,
                                     nonbondedMethod=PME,
                                     nonbondedCutoff=1 * unit.nanometer,
                                     constraints=HBonds)
    if restraint_backbone:
        # Applies an external force on backbone atoms
        # This allows the backbone to stay rigid, while severe clashes can still be resolved
        force = mm.CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
        force.addGlobalParameter(
            "k", 5.0 * unit.kilocalories_per_mole / unit.angstroms**2)
        force.addPerParticleParameter("x0")
        force.addPerParticleParameter("y0")
        force.addPerParticleParameter("z0")
        for idx, atom_crd in enumerate(parm.positions):
            if idx >= len(parm.atoms): continue
            if parm.atoms[idx] in ('CA', 'C', 'N'):
                force.addParticle(idx, atom_crd.value_in_unit(unit.nanometers))
        system.addForce(force)

    integrator = mm.LangevinIntegrator(temperature, friction, error_tolerance)
    simulation = Simulation(modeller.topology, system, integrator)
    simulation.context.setPositions(modeller.positions)

    # Get pre-minimization energy (scoring)
    state = simulation.context.getState(getEnergy=True, getForces=True)
    pre_energy = state.getPotentialEnergy().in_units_of(
        unit.kilocalories_per_mole)
    logger.info(f"Energy before minimization {pre_energy}")

    # Setup CPU minimization
    integrator.setConstraintTolerance(distance_tolerance)
    simulation.minimizeEnergy()
    post_position = simulation.context.getState(
        getPositions=True).getPositions()
    post_state = simulation.context.getState(getEnergy=True, getForces=True)
    if cuda:
        min_coords = simulation.context.getState(getPositions=True)
        platform = mm.Platform.getPlatformByName('CUDA')
        properties = {'CudaPrecision': 'mixed'}
        gpu_integrator = mm.VariableLangevinIntegrator(temperature, friction,
                                                       error_tolerance)
        gpu_integrator.setConstraintTolerance(distance_tolerance)
        gpu_min = Simulation(modeller.topology, system, gpu_integrator,
                             platform, properties)
        gpu_min.context.setPositions(min_coords.getPositions())
        gpu_min.minimizeEnergy()
        post_position = gpu_min.context.getState(
            getPositions=True).getPositions()
        post_state = gpu_min.context.getState(getEnergy=True, getForces=True)

    post_energy = post_state.getPotentialEnergy().in_units_of(
        unit.kilocalories_per_mole)
    logger.info(f"Energy after minimization {post_energy}")

    PDBFile.writeFile(modeller.topology,
                      post_position,
                      open(f"{output}/structure_minimized.pdb", 'w'),
                      keepIds=True)
    min_mol = Chimera(filename=f"{output}/structure_minimized.pdb")

    if keep_output_files is False:
        shutil.rmtree(output)

    return post_energy, min_mol
コード例 #7
0
def _mol_chimera_wrapper(molecule: Molecule, chimera: Chimera) -> Chimera:
    molecule.write("/tmp/molecule.pdb")
    new_chimera = Chimera(filename="/tmp/molecule.pdb")
    os.remove("/tmp/molecule.pdb")

    return new_chimera