Beispiel #1
0
def delete_shell(core_mol, del_mol, cut_off, in_out='in'):
    """
    This function deletes molecules present in the passed argument
    del_mol that are far (in_out=out) or close (in_out=in) than the
    selected cutoff distance (in A) from the passed molecules core_mol

    Parameters:
    -----------
    core_mol: OEMol molecule
        The core molecules
    del_mol: OEMol molecule
        The molecules to be deleted if their distances from the core_mol
        molecules are greater or closer that the selected cutoff distance
    cut_off: python float number
        The threshold distance in A used to mark atom for deletion
    in_out: python string
        A flag used to select if delete molecules far or close than
        the cutoff distance from the core_mol

    Return:
    -------
    reset_del: copy of del_mol where atoms have been deleted with
        reset atom indexes
    """

    if in_out not in ['in', 'out']:
        raise ValueError(
            "The passed in_out parameter is not recognized: {}".format(in_out))

    # Copy the passed molecule to delete in
    to_del = oechem.OEMol(del_mol)

    # Create a OE bit vector mask for each atoms of the
    # molecule to delete
    bv = oechem.OEBitVector(to_del.GetMaxAtomIdx())
    bv.NegateBits()

    # Create the Nearest neighbours
    nn = oechem.OENearestNbrs(to_del, cut_off)
    for nbrs in nn.GetNbrs(core_mol):
        # bv.SetBitOff(nbrs.GetBgn().GetIdx())
        for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()):
            bv.SetBitOff(atom.GetIdx())

    # Invert selection mask
    if in_out == 'in':
        bv.NegateBits()

    pred = oechem.OEAtomIdxSelected(bv)
    for atom in to_del.GetAtoms(pred):
        to_del.DeleteAtom(atom)

    # It is necessary to reset the atom indexes of the molecule with
    # delete atoms to avoid possible mismatching
    reset_del = oechem.OEMol(to_del)

    return reset_del
Beispiel #2
0
def _OEFixBuiltLoopFragmentNumbers(protein):
    """
    Temporary fix, thanks to Jesper!
    """
    prev_fn = -1
    # Checking for CA atoms, since this will avoid messing with the caps and built sidechains,
    # since this is only a built loop problem
    builtPred = oespruce.OEIsModeledAtom()
    for atom in protein.GetAtoms(oechem.OEIsCAlpha()):
        res = oechem.OEAtomGetResidue(atom)
        fn = res.GetFragmentNumber()
        if builtPred(atom) and prev_fn != -1:
            for ra in oechem.OEGetResidueAtoms(atom):
                r = oechem.OEAtomGetResidue(ra)
                r.SetFragmentNumber(prev_fn)
                oechem.OEAtomSetResidue(ra, r)
        else:
            prev_fn = fn
Beispiel #3
0
        def around(dist, ls):
            """
            This function select atom not far than the threshold distance from
            the current selection. The threshold distance is in Angstrom

            selection can be:
            mask = '5.0 around ligand'
            """
            # at = system.GetAtom(oechem.OEHasAtomIdx(idx))

            # Atom set selection
            atom_set_around = set()

            # Create a OE bit vector mask for each atoms
            bv_around = oechem.OEBitVector(system.GetMaxAtomIdx())

            # Set the mask atom
            for at in system.GetAtoms():
                if at.GetIdx() in ls:
                    bv_around.SetBitOn(at.GetIdx())

            # Predicate
            pred = oechem.OEAtomIdxSelected(bv_around)

            # Create the system molecule based on the atom mask
            molecules = oechem.OEMol()
            oechem.OESubsetMol(molecules, system, pred)

            # Create the Nearest neighbours
            nn = oechem.OENearestNbrs(system, float(dist))

            for nbrs in nn.GetNbrs(molecules):
                for atom in oechem.OEGetResidueAtoms(nbrs.GetBgn()):
                    if atom.GetIdx() in ls:
                        continue
                    atom_set_around.add(atom.GetIdx())

            return atom_set_around
Beispiel #4
0
def extract_aligned_prot_lig_wat_traj(md_components,
                                      flask,
                                      trj_fn,
                                      opt,
                                      nmax=30,
                                      water_cutoff=15.0):
    """
    Extracts the aligned protein trajectory and aligned ligand trajectory and aligned
    Water trajectory from a MD trajectory of a larger system that includes other
    components (eg water).
    The passed in setup mol must have the topology that matches the trajectory, and its xyz
    coordinates are the reference for the alignment. The alignment is done on the
    alpha carbons (atom name CA) of the active site residues within cutoff
    from the ligand. Once the alignment is done, the protein and ligand trajectories
    are each placed into a separate OEMol, one conformer per trajectory frame.
    Water trajectory is selecting the nmax waters from the ligand and protein CA
    within the cutoff distance for each trajectory snapshot

    Inputs:
        md_components: MDComponents object
            The md components carrying the setup starting flask.

        flask: OEMol
            The system flask

        trj_fn: String
            The filename of the hdf5-format MD trajectory or Gromacs .trr file format
        water_cutoff: Float
            The cutoff distance between the PL binding site and the waters in angstroms
        nmax: Integer
            max number of waters to select
    Outputs:
        multi_conf_protein: A multi conformer OEMol for the protein, one conformer per frame.
        multi_conf_ligand: A multi conformer OEMol for the ligand, one conformer per frame.
        multi_conf_water: A multi conformer OEMol for the waters, one conformer per frame.
    """

    # Extract protein, ligand, water and excipients from the flask
    # protein, ligand, water, excipients = oeommutils.split(flask, ligand_res_name="LIG")

    set_up_flask, map_dic = md_components.create_flask
    protein = md_components.get_protein
    ligand = md_components.get_ligand

    check_nmax = nmax_waters(protein, ligand, water_cutoff)

    if check_nmax < nmax:
        opt['Logger'].warn(
            "The selected number of max waters cannot fit around the protein binding site: {} vs {}"
            .format(nmax, check_nmax))

    void, traj_ext = os.path.splitext(trj_fn)

    traj_dir = os.path.dirname(trj_fn)

    if traj_ext == '.h5':
        trj = md.load_hdf5(trj_fn)

    elif traj_ext == '.trr':
        pdb_fn = glob.glob(os.path.join(traj_dir, '*.pdb'))[0]
        trj = md.load_trr(trj_fn, top=pdb_fn)
        trj = trj[1:]
    else:
        raise ValueError(
            "Trajectory file format {} not recognized in the trajectory {}".
            format(traj_ext, trj_fn))

    # System topology
    top_trj = trj.topology

    # Ligand indexes
    # lig_idx = top_trj.select("resname LIG")
    lig_idx = map_dic['ligand']

    # Protein indexes
    # prot_idx = top_trj.select("protein")

    # It is safer to use OE toolkits than mdtraj which is missing the protein caps
    prot_idx = map_dic['protein']

    # for at in protein.GetAtoms():
    #     prot_idx.append(at.GetIdx())

    # Water oxygen indexes
    water_O_idx = top_trj.select("water and element O")

    # Protein carbon alpha indexes
    prot_ca_idx = top_trj.select("backbone and element C")

    # Cutoff for the selection of the binding site atoms in A
    cutoff_bs = 5.0

    # Carbon alpha binding site indexes
    ca_bs_idx = md.compute_neighbors(trj[0],
                                     cutoff_bs / 10.0,
                                     lig_idx,
                                     haystack_indices=prot_ca_idx,
                                     periodic=True)[0]

    # Carbon alpha binding site and ligand indexes
    ca_bs_lig_idx = np.concatenate((ca_bs_idx, lig_idx))

    # Image the protein-ligand trajectory so the complex does not jump across box boundaries
    protlig = trj[0].atom_slice(np.concatenate((prot_idx, lig_idx)))
    protligAtoms = [atom for atom in protlig.topology.atoms]

    with open(os.devnull, 'w') as devnull:
        with contextlib.redirect_stderr(devnull):
            trjImaged = trj.image_molecules(inplace=False,
                                            anchor_molecules=[protligAtoms],
                                            make_whole=True)

    # trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True)

    count = 0
    water_max_frames = []

    # TODO DEBUG
    # trjImaged = trjImaged[:10]

    for frame in trjImaged:
        # print(count, flush=True)

        # Water oxygen binding site indexes
        water_O_bs_idx = md.compute_neighbors(frame,
                                              water_cutoff / 10.0,
                                              ca_bs_lig_idx,
                                              haystack_indices=water_O_idx,
                                              periodic=True)

        # Pair combination water indexes times ligand indexes
        wat_lig_pairs = np.array(np.meshgrid(water_O_bs_idx,
                                             lig_idx)).T.reshape(-1, 2)

        # Distances between the waters and the ligand in nm
        wat_lig_distances = md.compute_distances(frame,
                                                 wat_lig_pairs,
                                                 periodic=True,
                                                 opt=True)

        # Reshape the wat_lig_distances
        ns = np.reshape(wat_lig_distances,
                        (len(water_O_bs_idx[0]), len(lig_idx)))

        # Min distances in nm between the oxygen waters and the ligand
        min_wat_O_lig_distances = np.min(ns, axis=1)

        # Pair combination water indexes times protein binding site carbon alpha indexes
        wat_ca_bs_pairs = np.array(np.meshgrid(water_O_bs_idx,
                                               ca_bs_idx)).T.reshape(-1, 2)

        # Distances between the waters and the protein binding site carbon alpha in nm
        wat_ca_bs_distances = md.compute_distances(frame,
                                                   wat_ca_bs_pairs,
                                                   periodic=True,
                                                   opt=True)

        # Reshape the wat_ca_bs_distances
        ns = np.reshape(wat_ca_bs_distances,
                        (len(water_O_bs_idx[0]), len(ca_bs_idx)))

        # Min distances in nm between the oxygen waters and the protein binding site carbon alpha
        min_wat_O_ca_bs_distances = np.min(ns, axis=1)

        metrics = min_wat_O_lig_distances + min_wat_O_ca_bs_distances

        metric_distances = dict()

        for wat_idx, m in zip(water_O_bs_idx[0], metrics):
            metric_distances[int(wat_idx)] = m

        water_list_sorted_max = sorted(metric_distances.items(),
                                       key=lambda x: x[1])[:nmax]

        if len(water_list_sorted_max) != nmax:
            raise ValueError(
                "The ordered water list has the wrong size {} vs expected {} for the frame {}"
                .format(len(water_list_sorted_max), nmax, count))

        water_max_frames.append(water_list_sorted_max)

        # print(min_wat_O_ca_bs_distances)
        # print(pairs[:len(lig_idx), :])
        # for p,d in zip(wat_ca_bs_pairs, wat_ca_bs_distances[0]):
        #     print(p,d)

        count += 1

    # Put the reference mol xyz into the 1-frame topologyTraj to use as a reference in the fit
    setup_mol_array_coords = oechem.OEDoubleArray(3 *
                                                  set_up_flask.GetMaxAtomIdx())
    set_up_flask.GetCoords(setup_mol_array_coords)

    setup_mol_xyzArr = np.array(setup_mol_array_coords)
    setup_mol_xyzArr.shape = (-1, 3)

    trj_reference = trjImaged[0]
    # convert from angstroms to nanometers
    trj_reference.xyz[0] = setup_mol_xyzArr / 10.0

    # Fitting
    trjImaged.superpose(trj_reference, 0, ca_bs_idx)

    # Delete Original Trajectory to save memory
    del trj

    # Molecule copies
    ligand_reference = oechem.OEMol(ligand)
    protein_reference = oechem.OEMol(protein)

    count = 0

    # Create the multi conformer protein, ligand and water molecules
    for frame in trjImaged.xyz:
        # print("Trj Image loop", count, flush=True)

        # Extract coordinates in A
        xyz = frame * 10

        # Set flask Coordinates as the current frame for the water extraction
        flask.SetCoords(xyz.flatten())
        water_list_sorted_max = water_max_frames[count]

        # print(water_list_sorted_max)

        # TODO The following solution to extract the waters do not
        #  keep the water order

        # Mark the close water atoms and extract them
        bv = oechem.OEBitVector(nmax * 3)
        water_idx = []

        for pair in water_list_sorted_max:

            ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0]))

            # Select the whole water molecule
            for atw in oechem.OEGetResidueAtoms(ow):
                bv.SetBitOn(atw.GetIdx())
                water_idx.append(atw.GetIdx())

        pred_vec = oechem.OEAtomIdxSelected(bv)
        water_nmax_reference = oechem.OEMol()
        oechem.OESubsetMol(water_nmax_reference, flask, pred_vec)

        # TODO The following solution to extract the waters
        #  keep the water order but is it seems extremely inefficient

        # water_list = []
        # for pair in water_list_sorted_max:
        #     bv = oechem.OEBitVector(3)
        #     water_idx = []
        #     ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0]))
        #
        #     # Select the whole water molecule
        #     for atw in oechem.OEGetResidueAtoms(ow):
        #         bv.SetBitOn(atw.GetIdx())
        #         water_idx.append(atw.GetIdx())
        #
        #     pred_vec = oechem.OEAtomIdxSelected(bv)
        #     water = oechem.OEMol()
        #     oechem.OESubsetMol(water, flask, pred_vec)
        #
        #     water_list.append(water)
        #
        #
        # # print(len(water_list))
        #
        # water_nmax_reference = oechem.OEMol()

        # for w in water_list:
        #     oechem.OEAddMols(water_nmax_reference, w)

        # ligand and protein conf coordinates
        lig_xyz_list = [10 * frame[idx] for idx in lig_idx]
        lig_confxyz = oechem.OEFloatArray(np.array(lig_xyz_list).ravel())

        prot_xyz_list = [10 * frame[idx] for idx in prot_idx]
        prot_confxyz = oechem.OEFloatArray(np.array(prot_xyz_list).ravel())

        # Initialize the protein, ligand and water molecule topologies
        if count == 0:

            multi_conf_water = oechem.OEMol(water_nmax_reference)

            if multi_conf_water.NumAtoms() % 3 != 0:
                raise ValueError("Number of Water atoms is not multiple of 3")

            # Clean ResNumber and Chain on the multi conf water molecule
            # oechem.OEPerceiveResidues(multi_conf_water, oechem.OEPreserveResInfo_All)
            multi_conf_water.SetTitle("Water_" + str(nmax))

            res_num = 0
            i = 0
            for at in multi_conf_water.GetAtoms():

                res = oechem.OEAtomGetResidue(at)
                res.SetSerialNumber(i)
                res.SetName("HOH")
                res.SetChainID("Z")
                if i % 3 == 0:
                    res_num += 1
                res.SetResidueNumber(res_num)
                i += 1

            ligand_reference.SetCoords(lig_confxyz)
            protein_reference.SetCoords(prot_confxyz)
            multi_conf_ligand = oechem.OEMol(ligand_reference)
            multi_conf_protein = oechem.OEMol(protein_reference)

        # Attach the conformers on the multi conformer protein, ligand and water molecules
        else:
            water_confxyz = oechem.OEFloatArray(
                water_nmax_reference.NumAtoms() * 3)
            water_nmax_reference.GetCoords(water_confxyz)

            multi_conf_water.NewConf(water_confxyz)
            multi_conf_ligand.NewConf(lig_confxyz)
            multi_conf_protein.NewConf(prot_confxyz)

        count += 1

    return multi_conf_protein, multi_conf_ligand, multi_conf_water