コード例 #1
0
ファイル: utils.py プロジェクト: minghao2016/perses
def write_equilibrium_trajectory(trajectory: md.Trajectory,
                                 trajectory_filename: str) -> float:
    """
    Write the results of an equilibrium simulation to disk. This task will append the results to the given filename.

    Arguments
    ----------
    trajectory : md.Trajectory
        the trajectory resulting from an equilibrium simulation
    trajectory_filename : str
        the name of the trajectory file to which we should append

    Returns
    -------
    True
    """
    if not os.path.exists(trajectory_filename):
        trajectory.save_hdf5(trajectory_filename)
        _logger.debug(
            f"{trajectory_filename} does not exist; instantiating and writing to."
        )
    else:
        _logger.debug(f"{trajectory_filename} exists; appending.")
        written_traj = md.load_hdf5(trajectory_filename)
        concatenated_traj = written_traj.join(trajectory)
        concatenated_traj.save_hdf5(trajectory_filename)

    return True
コード例 #2
0
ファイル: feptasks.py プロジェクト: choderalab/perses
def write_equilibrium_trajectory(equilibrium_result: EquilibriumResult, trajectory: md.Trajectory, trajectory_filename: str) -> float:
    """
    Write the results of an equilibrium simulation to disk. This task will append the results to the given filename.
    Parameters
    ----------
    equilibrium_result : EquilibriumResult namedtuple
        the result of an equilibrium calculation
    trajectory : md.Trajectory
        the trajectory resulting from an equilibrium simulation
    trajectory_filename : str
        the name of the trajectory file to which we should append

    Returns
    -------
    reduced_potential_final_frame : float
        the reduced potential of the final frame
    """
    if not os.path.exists(trajectory_filename):
        trajectory.save_hdf5(trajectory_filename)
    else:
        written_traj = md.load_hdf5(trajectory_filename)
        concatenated_traj = written_traj.join(trajectory)
        concatenated_traj.save_hdf5(trajectory_filename)

    return equilibrium_result.reduced_potential
コード例 #3
0
def write_equilibrium_trajectory(equilibrium_result: EquilibriumResult,
                                 trajectory: md.Trajectory,
                                 trajectory_filename: str) -> float:
    """
    Write the results of an equilibrium simulation to disk. This task will append the results to the given filename.
    Parameters
    ----------
    equilibrium_result : EquilibriumResult namedtuple
        the result of an equilibrium calculation
    trajectory : md.Trajectory
        the trajectory resulting from an equilibrium simulation
    trajectory_filename : str
        the name of the trajectory file to which we should append

    Returns
    -------
    reduced_potential_final_frame : float
        the reduced potential of the final frame
    """
    if not os.path.exists(trajectory_filename):
        trajectory.save_hdf5(trajectory_filename)
    else:
        written_traj = md.load_hdf5(trajectory_filename)
        concatenated_traj = written_traj.join(trajectory)
        concatenated_traj.save_hdf5(trajectory_filename)

    return equilibrium_result.reduced_potential
コード例 #4
0
ファイル: data_loader.py プロジェクト: sonyahanson/kinase_msm
def load_frame(base_dir, protein, filename, frame_index):
    """
    :param base_dir: Project's base dir
    :param protein: Protein of interest
    :param filename: file to load
    :param frame_index: needed frame
    :return: The required frame
    """
    os.chdir(os.path.join(base_dir, protein))
    filename = os.path.splitext(filename)[0]
    return mdt.load_hdf5(filename="./protein_traj/%s.hdf5" % filename,
                          frame=frame_index)
コード例 #5
0
def extract_aligned_prot_lig_wat_traj(md_components,
                                      flask,
                                      trj_fn,
                                      opt,
                                      nmax=30,
                                      water_cutoff=15.0):
    """
    Extracts the aligned protein trajectory and aligned ligand trajectory and aligned
    Water trajectory from a MD trajectory of a larger system that includes other
    components (eg water).
    The passed in setup mol must have the topology that matches the trajectory, and its xyz
    coordinates are the reference for the alignment. The alignment is done on the
    alpha carbons (atom name CA) of the active site residues within cutoff
    from the ligand. Once the alignment is done, the protein and ligand trajectories
    are each placed into a separate OEMol, one conformer per trajectory frame.
    Water trajectory is selecting the nmax waters from the ligand and protein CA
    within the cutoff distance for each trajectory snapshot

    Inputs:
        md_components: MDComponents object
            The md components carrying the setup starting flask.

        flask: OEMol
            The system flask

        trj_fn: String
            The filename of the hdf5-format MD trajectory or Gromacs .trr file format
        water_cutoff: Float
            The cutoff distance between the PL binding site and the waters in angstroms
        nmax: Integer
            max number of waters to select
    Outputs:
        multi_conf_protein: A multi conformer OEMol for the protein, one conformer per frame.
        multi_conf_ligand: A multi conformer OEMol for the ligand, one conformer per frame.
        multi_conf_water: A multi conformer OEMol for the waters, one conformer per frame.
    """

    # Extract protein, ligand, water and excipients from the flask
    # protein, ligand, water, excipients = oeommutils.split(flask, ligand_res_name="LIG")

    set_up_flask, map_dic = md_components.create_flask
    protein = md_components.get_protein
    ligand = md_components.get_ligand

    check_nmax = nmax_waters(protein, ligand, water_cutoff)

    if check_nmax < nmax:
        opt['Logger'].warn(
            "The selected number of max waters cannot fit around the protein binding site: {} vs {}"
            .format(nmax, check_nmax))

    void, traj_ext = os.path.splitext(trj_fn)

    traj_dir = os.path.dirname(trj_fn)

    if traj_ext == '.h5':
        trj = md.load_hdf5(trj_fn)

    elif traj_ext == '.trr':
        pdb_fn = glob.glob(os.path.join(traj_dir, '*.pdb'))[0]
        trj = md.load_trr(trj_fn, top=pdb_fn)
        trj = trj[1:]
    else:
        raise ValueError(
            "Trajectory file format {} not recognized in the trajectory {}".
            format(traj_ext, trj_fn))

    # System topology
    top_trj = trj.topology

    # Ligand indexes
    # lig_idx = top_trj.select("resname LIG")
    lig_idx = map_dic['ligand']

    # Protein indexes
    # prot_idx = top_trj.select("protein")

    # It is safer to use OE toolkits than mdtraj which is missing the protein caps
    prot_idx = map_dic['protein']

    # for at in protein.GetAtoms():
    #     prot_idx.append(at.GetIdx())

    # Water oxygen indexes
    water_O_idx = top_trj.select("water and element O")

    # Protein carbon alpha indexes
    prot_ca_idx = top_trj.select("backbone and element C")

    # Cutoff for the selection of the binding site atoms in A
    cutoff_bs = 5.0

    # Carbon alpha binding site indexes
    ca_bs_idx = md.compute_neighbors(trj[0],
                                     cutoff_bs / 10.0,
                                     lig_idx,
                                     haystack_indices=prot_ca_idx,
                                     periodic=True)[0]

    # Carbon alpha binding site and ligand indexes
    ca_bs_lig_idx = np.concatenate((ca_bs_idx, lig_idx))

    # Image the protein-ligand trajectory so the complex does not jump across box boundaries
    protlig = trj[0].atom_slice(np.concatenate((prot_idx, lig_idx)))
    protligAtoms = [atom for atom in protlig.topology.atoms]

    with open(os.devnull, 'w') as devnull:
        with contextlib.redirect_stderr(devnull):
            trjImaged = trj.image_molecules(inplace=False,
                                            anchor_molecules=[protligAtoms],
                                            make_whole=True)

    # trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True)

    count = 0
    water_max_frames = []

    # TODO DEBUG
    # trjImaged = trjImaged[:10]

    for frame in trjImaged:
        # print(count, flush=True)

        # Water oxygen binding site indexes
        water_O_bs_idx = md.compute_neighbors(frame,
                                              water_cutoff / 10.0,
                                              ca_bs_lig_idx,
                                              haystack_indices=water_O_idx,
                                              periodic=True)

        # Pair combination water indexes times ligand indexes
        wat_lig_pairs = np.array(np.meshgrid(water_O_bs_idx,
                                             lig_idx)).T.reshape(-1, 2)

        # Distances between the waters and the ligand in nm
        wat_lig_distances = md.compute_distances(frame,
                                                 wat_lig_pairs,
                                                 periodic=True,
                                                 opt=True)

        # Reshape the wat_lig_distances
        ns = np.reshape(wat_lig_distances,
                        (len(water_O_bs_idx[0]), len(lig_idx)))

        # Min distances in nm between the oxygen waters and the ligand
        min_wat_O_lig_distances = np.min(ns, axis=1)

        # Pair combination water indexes times protein binding site carbon alpha indexes
        wat_ca_bs_pairs = np.array(np.meshgrid(water_O_bs_idx,
                                               ca_bs_idx)).T.reshape(-1, 2)

        # Distances between the waters and the protein binding site carbon alpha in nm
        wat_ca_bs_distances = md.compute_distances(frame,
                                                   wat_ca_bs_pairs,
                                                   periodic=True,
                                                   opt=True)

        # Reshape the wat_ca_bs_distances
        ns = np.reshape(wat_ca_bs_distances,
                        (len(water_O_bs_idx[0]), len(ca_bs_idx)))

        # Min distances in nm between the oxygen waters and the protein binding site carbon alpha
        min_wat_O_ca_bs_distances = np.min(ns, axis=1)

        metrics = min_wat_O_lig_distances + min_wat_O_ca_bs_distances

        metric_distances = dict()

        for wat_idx, m in zip(water_O_bs_idx[0], metrics):
            metric_distances[int(wat_idx)] = m

        water_list_sorted_max = sorted(metric_distances.items(),
                                       key=lambda x: x[1])[:nmax]

        if len(water_list_sorted_max) != nmax:
            raise ValueError(
                "The ordered water list has the wrong size {} vs expected {} for the frame {}"
                .format(len(water_list_sorted_max), nmax, count))

        water_max_frames.append(water_list_sorted_max)

        # print(min_wat_O_ca_bs_distances)
        # print(pairs[:len(lig_idx), :])
        # for p,d in zip(wat_ca_bs_pairs, wat_ca_bs_distances[0]):
        #     print(p,d)

        count += 1

    # Put the reference mol xyz into the 1-frame topologyTraj to use as a reference in the fit
    setup_mol_array_coords = oechem.OEDoubleArray(3 *
                                                  set_up_flask.GetMaxAtomIdx())
    set_up_flask.GetCoords(setup_mol_array_coords)

    setup_mol_xyzArr = np.array(setup_mol_array_coords)
    setup_mol_xyzArr.shape = (-1, 3)

    trj_reference = trjImaged[0]
    # convert from angstroms to nanometers
    trj_reference.xyz[0] = setup_mol_xyzArr / 10.0

    # Fitting
    trjImaged.superpose(trj_reference, 0, ca_bs_idx)

    # Delete Original Trajectory to save memory
    del trj

    # Molecule copies
    ligand_reference = oechem.OEMol(ligand)
    protein_reference = oechem.OEMol(protein)

    count = 0

    # Create the multi conformer protein, ligand and water molecules
    for frame in trjImaged.xyz:
        # print("Trj Image loop", count, flush=True)

        # Extract coordinates in A
        xyz = frame * 10

        # Set flask Coordinates as the current frame for the water extraction
        flask.SetCoords(xyz.flatten())
        water_list_sorted_max = water_max_frames[count]

        # print(water_list_sorted_max)

        # TODO The following solution to extract the waters do not
        #  keep the water order

        # Mark the close water atoms and extract them
        bv = oechem.OEBitVector(nmax * 3)
        water_idx = []

        for pair in water_list_sorted_max:

            ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0]))

            # Select the whole water molecule
            for atw in oechem.OEGetResidueAtoms(ow):
                bv.SetBitOn(atw.GetIdx())
                water_idx.append(atw.GetIdx())

        pred_vec = oechem.OEAtomIdxSelected(bv)
        water_nmax_reference = oechem.OEMol()
        oechem.OESubsetMol(water_nmax_reference, flask, pred_vec)

        # TODO The following solution to extract the waters
        #  keep the water order but is it seems extremely inefficient

        # water_list = []
        # for pair in water_list_sorted_max:
        #     bv = oechem.OEBitVector(3)
        #     water_idx = []
        #     ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0]))
        #
        #     # Select the whole water molecule
        #     for atw in oechem.OEGetResidueAtoms(ow):
        #         bv.SetBitOn(atw.GetIdx())
        #         water_idx.append(atw.GetIdx())
        #
        #     pred_vec = oechem.OEAtomIdxSelected(bv)
        #     water = oechem.OEMol()
        #     oechem.OESubsetMol(water, flask, pred_vec)
        #
        #     water_list.append(water)
        #
        #
        # # print(len(water_list))
        #
        # water_nmax_reference = oechem.OEMol()

        # for w in water_list:
        #     oechem.OEAddMols(water_nmax_reference, w)

        # ligand and protein conf coordinates
        lig_xyz_list = [10 * frame[idx] for idx in lig_idx]
        lig_confxyz = oechem.OEFloatArray(np.array(lig_xyz_list).ravel())

        prot_xyz_list = [10 * frame[idx] for idx in prot_idx]
        prot_confxyz = oechem.OEFloatArray(np.array(prot_xyz_list).ravel())

        # Initialize the protein, ligand and water molecule topologies
        if count == 0:

            multi_conf_water = oechem.OEMol(water_nmax_reference)

            if multi_conf_water.NumAtoms() % 3 != 0:
                raise ValueError("Number of Water atoms is not multiple of 3")

            # Clean ResNumber and Chain on the multi conf water molecule
            # oechem.OEPerceiveResidues(multi_conf_water, oechem.OEPreserveResInfo_All)
            multi_conf_water.SetTitle("Water_" + str(nmax))

            res_num = 0
            i = 0
            for at in multi_conf_water.GetAtoms():

                res = oechem.OEAtomGetResidue(at)
                res.SetSerialNumber(i)
                res.SetName("HOH")
                res.SetChainID("Z")
                if i % 3 == 0:
                    res_num += 1
                res.SetResidueNumber(res_num)
                i += 1

            ligand_reference.SetCoords(lig_confxyz)
            protein_reference.SetCoords(prot_confxyz)
            multi_conf_ligand = oechem.OEMol(ligand_reference)
            multi_conf_protein = oechem.OEMol(protein_reference)

        # Attach the conformers on the multi conformer protein, ligand and water molecules
        else:
            water_confxyz = oechem.OEFloatArray(
                water_nmax_reference.NumAtoms() * 3)
            water_nmax_reference.GetCoords(water_confxyz)

            multi_conf_water.NewConf(water_confxyz)
            multi_conf_ligand.NewConf(lig_confxyz)
            multi_conf_protein.NewConf(prot_confxyz)

        count += 1

    return multi_conf_protein, multi_conf_ligand, multi_conf_water