Beispiel #1
0
output_filename = '%s-pdbfixer.pdb' % pdbid
print('Writing PDB file to "%s"...' % output_filename)
app.PDBFile.writeFile(fixer.topology, fixer.positions, open(output_filename, 'w'))

# Create OpenMM System.
print('Creating OpenMM system...')
system = forcefield.createSystem(fixer.topology, nonbondedMethod=nonbondedMethod, constraints=constraints, rigidWater=True, removeCMMotion=False)

# Minimimze to update positions.
print('Minimizing...')
integrator = openmm.VerletIntegrator(1.0 * unit.femtosecond)
context = openmm.Context(system, integrator)
context.setPositions(fixer.positions)
openmm.LocalEnergyMinimizer.minimize(context)
state = context.getState(getPositions=True)
fixer.positions = state.getPositions()

# Write final coordinates.
output_filename = '%s-minimized.pdb' % pdbid
print('Writing PDB file to "%s"...' % output_filename)
app.PDBFile.writeFile(fixer.topology, fixer.positions, open(output_filename, 'w'))

# Serialize final coordinates.
print('Serializing to XML...')
system_filename = 'system.xml'
integrator_filename = 'integrator.xml'
state_filename = 'state.xml'
write_file(system_filename, openmm.XmlSerializer.serialize(system))
write_file(integrator_filename, openmm.XmlSerializer.serialize(integrator))
state = context.getState(getPositions=True, getVelocities=True, getForces=True, getEnergy=True, getParameters=True, enforcePeriodicBox=True)
write_file(state_filename, openmm.XmlSerializer.serialize(state))
Beispiel #2
0
    def add_droplet(
        self,
        topology: md.Topology,
        coordinates: unit.quantity.Quantity,
        diameter: unit.quantity.Quantity = (30.0 * unit.angstrom),
        restrain_hydrogen_bonds: bool = True,
        restrain_hydrogen_angles: bool = False,
        top_file: str = "",
    ) -> md.Trajectory:
        """
        Adding a droplet with a given diameter around a small molecule.

        Parameters
        ----------
        topology: md.Topology
            topology of the molecule
        coordinates: np.array, unit'd
        diameter: float, unit'd
        top_file: str
            if top_file is provided the final droplet pdb is either kept and can be reused or if
            top_file already exists it will be used to create the same droplet.
        Returns
        ----------
        A mdtraj.Trajectory object with the ligand centered in the solvent for inspection.
        """

        assert type(diameter) == unit.Quantity
        assert type(topology) == md.Topology
        assert type(coordinates) == unit.Quantity
        if restrain_hydrogen_bonds:
            logger.debug("Hydrogen bonds are restraint.")

        if restrain_hydrogen_angles:
            logger.warning("HOH angles are restraint.")

        # get topology from mdtraj to PDBfixer via pdb file
        radius = diameter.value_in_unit(unit.angstrom) / 2
        center = np.array([radius, radius, radius])

        # if no solvated pdb file is provided generate one
        if top_file:
            # read in the file with the defined droplet
            pdb_filepath = top_file
        else:
            # generage a one time droplet
            pdb_filepath = f"tmp{random.randint(1,10000000)}.pdb"

        if not os.path.exists(pdb_filepath):
            logger.info(f"Generating droplet for {pdb_filepath}...")

            # mdtraj works with nanomter
            md.Trajectory(coordinates.value_in_unit(unit.nanometer),
                          topology).save_pdb(pdb_filepath)
            pdb = PDBFixer(filename=pdb_filepath)
            os.remove(pdb_filepath)

            # put the ligand in the center
            l_in_nanometer = diameter.value_in_unit(unit.nanometer)
            pdb.positions = np.array(
                pdb.positions.value_in_unit(
                    unit.nanometer)) + (l_in_nanometer / 2)
            # add water
            pdb.addSolvent(boxVectors=(
                Vec3(l_in_nanometer, 0.0, 0.0),
                Vec3(0.0, l_in_nanometer, 0.0),
                Vec3(0.0, 0.0, l_in_nanometer),
            ))
            # get topology from PDBFixer to mdtraj # NOTE: a second tmpfile - not happy about this
            from simtk.openmm.app import PDBFile

            PDBFile.writeFile(pdb.topology, pdb.positions,
                              open(pdb_filepath, "w"))
            # load pdb in parmed
            logger.debug("Load with parmed ...")
            structure = pm.load_file(pdb_filepath)
            os.remove(pdb_filepath)

            # search for residues that are outside of the cutoff and delete them
            to_delete = []
            logger.debug("Flag residues ...")

            for residue in structure.residues:
                for atom in residue:
                    p1 = np.array([atom.xx, atom.xy, atom.xz])
                    p2 = center
                    squared_dist = np.sum((p1 - p2)**2, axis=0)
                    dist = np.sqrt(squared_dist)
                    if (
                            dist > radius + 1
                    ):  # NOTE: distance must be greater than radius + 1 Angstrom
                        to_delete.append(residue)

            # only delete water molecules
            for residue in list(set(to_delete)):
                if residue.name == "HOH":
                    logger.debug(f"Remove: {residue}")
                    structure.residues.remove(residue)
                else:
                    logger.warning(
                        f"Residue {residue} reaches outside the droplet")
                    print(f"Residue {residue} reaches outside the droplet")

            structure.write_pdb(pdb_filepath)

        # load pdb with mdtraj
        traj = md.load(pdb_filepath)
        if not top_file:
            os.remove(pdb_filepath)

        # set coordinates #NOTE: note the xyz[0]
        self._ligand_in_water_coordinates = traj.xyz[0] * unit.nanometer

        # generate atom string
        atom_list = []
        for atom in traj.topology.atoms:
            atom_list.append(atom.element.symbol)

        # set atom string
        self.ligand_in_water_atoms = "".join(atom_list)
        # set mdtraj topology
        self.ligand_in_water_topology = traj.topology

        # set FlattBottomRestraintToCenter on each oxygen
        self.solvent_restraints = []
        for residue in traj.topology.residues:
            if residue.is_water:
                for atom in residue.atoms:
                    if str(atom.element.symbol) == "O":
                        self.solvent_restraints.append(
                            CenterFlatBottomRestraint(
                                sigma=0.1 * unit.angstrom,
                                point=center * unit.angstrom,
                                radius=(diameter / 2),
                                atom_idx=atom.index,
                                active_at=-1,
                            ))
                        logger.debug("Adding restraint to center to {}".format(
                            atom.index))

        if restrain_hydrogen_bonds or restrain_hydrogen_angles:
            for residue in traj.topology.residues:
                if residue.is_water:
                    oxygen_idx = -1
                    hydrogen_idxs = []
                    for atom in residue.atoms:
                        if str(atom.element.symbol) == "O":
                            oxygen_idx = atom.index
                        elif str(atom.element.symbol) == "H":
                            hydrogen_idxs.append(atom.index)
                        else:
                            raise RuntimeError(
                                "Water should only consist of O and H atoms.")
                    if restrain_hydrogen_bonds:
                        self.solvent_restraints.append(
                            BondFlatBottomRestraint(
                                sigma=0.2 * unit.angstrom,
                                atom_i_idx=oxygen_idx,
                                atom_j_idx=hydrogen_idxs[0],
                                atoms=self.ligand_in_water_atoms,
                            ))
                        self.solvent_restraints.append(
                            BondFlatBottomRestraint(
                                sigma=0.2 * unit.angstrom,
                                atom_i_idx=oxygen_idx,
                                atom_j_idx=hydrogen_idxs[1],
                                atoms=self.ligand_in_water_atoms,
                            ))
                    if restrain_hydrogen_angles:
                        self.solvent_restraints.append(
                            AngleHarmonicRestraint(
                                sigma=0.1 * unit.radian,
                                atom_i_idx=hydrogen_idxs[0],
                                atom_j_idx=oxygen_idx,
                                atom_k_idx=hydrogen_idxs[1],
                            ))
        # return a mdtraj object for visual check
        return md.Trajectory(
            self._ligand_in_water_coordinates.value_in_unit(unit.nanometer),
            self.ligand_in_water_topology,
        )
Beispiel #3
0
# This is basically the pdbfixer code, but without the amber lines.
#
modeller = Modeller(fixer.topology, fixer.positions)
forcefield = ForceField('amber99sb.xml', 'tip5p.xml')
system = forcefield.createSystem(fixer.topology,
                                 nonbondedMethod=PME,
                                 nonbondedCutoff=0.05 * nanometer,
                                 constraints=HBonds)
modeller.addSolvent(forcefield,
                    padding=0.05 * nanometer,
                    boxSize=None,
                    boxVectors=None)
#modeller.addSolvent(forcefield, padding=0.4*nanometer, boxSize, boxVectors=boxVectors, model='tip5p')
# modeller.addSolvent(forcefield, padding=padding, boxSize=boxSize, boxVectors=boxVectors, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength)
fixer.topology = modeller.topology
fixer.positions = modeller.positions

proatoms = [atom.element._symbol for atom in modeller.topology.atoms()]
procoords = np.array(
    [fixer.positions[atom.index]._value for atom in modeller.topology.atoms()])


def WriteXYZfile(atoms, coords, nm_="out.xyz"):
    natom = len(atoms)
    f = open(nm_, "w")
    f.write(str(natom) + "\n" + "\n")
    for i in range(natom):
        f.write(atoms[i] + " " + str(coords[i][0]) + " " + str(coords[i][1]) +
                " " + str(coords[i][2]) + "\n")

Beispiel #4
0
# Create OpenMM System.
print('Creating OpenMM system...')
system = forcefield.createSystem(fixer.topology,
                                 nonbondedMethod=nonbondedMethod,
                                 constraints=constraints,
                                 rigidWater=True,
                                 removeCMMotion=False)

# Minimimze to update positions.
print('Minimizing...')
integrator = openmm.VerletIntegrator(1.0 * unit.femtosecond)
context = openmm.Context(system, integrator)
context.setPositions(fixer.positions)
openmm.LocalEnergyMinimizer.minimize(context)
state = context.getState(getPositions=True)
fixer.positions = state.getPositions()

# Write final coordinates.
output_filename = '%s-minimized.pdb' % pdbid
print('Writing PDB file to "%s"...' % output_filename)
app.PDBFile.writeFile(fixer.topology, fixer.positions,
                      open(output_filename, 'w'))

# Serialize final coordinates.
print('Serializing to XML...')
system_filename = 'system.xml'
integrator_filename = 'integrator.xml'
state_filename = 'state.xml'
write_file(system_filename, openmm.XmlSerializer.serialize(system))
write_file(integrator_filename, openmm.XmlSerializer.serialize(integrator))
state = context.getState(getPositions=True,
Beispiel #5
0
def hydrate(system, opt):
    """
    This function solvates the system by using PDBFixer

    Parameters:
    -----------
    system: OEMol molecule
        The system to solvate
    opt: python dictionary
        The parameters used to solvate the system

    Return:
    -------
    oe_mol: OEMol
        The solvated system
    """
    def BoundingBox(molecule):
        """
        This function calculates the Bounding Box of the passed
        molecule

        molecule: OEMol

        return: bb (numpy array)
            the calculated bounding box is returned as numpy array:
            [(xmin,ymin,zmin), (xmax,ymax,zmax)]
        """
        coords = [v for k, v in molecule.GetCoords().items()]
        np_coords = np.array(coords)
        min_coord = np_coords.min(axis=0)
        max_coord = np_coords.max(axis=0)
        bb = np.array([min_coord, max_coord])
        return bb

    # Create a system copy
    sol_system = system.CreateCopy()

    # Calculate system BoundingBox (Angstrom units)
    BB = BoundingBox(sol_system)

    # Estimation of the box cube length in A
    box_edge = 2.0 * opt['solvent_padding'] + np.max(BB[1] - BB[0])

    # BB center
    xc = (BB[0][0]+BB[1][0])/2.
    yc = (BB[0][1]+BB[1][1])/2.
    zc = (BB[0][2]+BB[1][2])/2.

    delta = np.array([box_edge/2., box_edge/2., box_edge/2.]) - np.array([xc, yc, zc])

    sys_coord_dic = {k: (v+delta) for k, v in sol_system.GetCoords().items()}

    sol_system.SetCoords(sys_coord_dic)

    # Load a fake system to initialize PDBfixer
    filename = resource_filename('pdbfixer', 'tests/data/test.pdb')
    fixer = PDBFixer(filename=filename)

    # Convert between OE and OpenMM topology
    omm_top, omm_pos = oeommutils.oemol_to_openmmTop(sol_system)

    chain_names = []

    for chain in omm_top.chains():
        chain_names.append(chain.id)

    # Set the correct topology to the fake system
    fixer.topology = omm_top
    fixer.positions = omm_pos

    # Solvate the system
    fixer.addSolvent(padding=unit.Quantity(opt['solvent_padding'], unit.angstroms),
                     ionicStrength=unit.Quantity(opt['salt_concentration'], unit.millimolar))

    # The OpenMM topology produced by the solvation fixer has missing bond
    # orders and aromaticity. The following section is creating a new openmm
    # topology made of just water molecules and ions. The new topology is then
    # converted in an OEMol and added to the passed molecule to produce the
    # solvated system

    wat_ion_top = app.Topology()

    # Atom dictionary between the the PDBfixer topology and the water_ion topology
    fixer_atom_to_wat_ion_atom = {}

    for chain in fixer.topology.chains():
        if chain.id not in chain_names:
            n_chain = wat_ion_top.addChain(chain.id)
            for res in chain.residues():
                n_res = wat_ion_top.addResidue(res.name, n_chain)
                for at in res.atoms():
                    n_at = wat_ion_top.addAtom(at.name, at.element, n_res)
                    fixer_atom_to_wat_ion_atom[at] = n_at

    for bond in fixer.topology.bonds():
        at0 = bond[0]
        at1 = bond[1]
        try:
            wat_ion_top.addBond(fixer_atom_to_wat_ion_atom[at0],
                                fixer_atom_to_wat_ion_atom[at1], type=None, order=1)
        except:
            pass

    wat_ion_pos = fixer.positions[len(omm_pos):]

    oe_mol = oeommutils.openmmTop_to_oemol(wat_ion_top, wat_ion_pos)

    # Setting the box vectors
    omm_box_vectors = fixer.topology.getPeriodicBoxVectors()
    box_vectors = utils.PackageOEMol.encodePyObj(omm_box_vectors)
    oe_mol.SetData(oechem.OEGetTag('box_vectors'), box_vectors)

    oechem.OEAddMols(oe_mol, sol_system)

    return oe_mol
Beispiel #6
0
def process_pdb(path,
                corr_path,
                chain_id,
                max_atoms,
                gsd_file,
                embedding_dicts,
                NN,
                nlist_model,
                keep_residues=[-1, 1],
                debug=False,
                units=unit.nanometer,
                frame_number=3,
                model_index=0,
                log_file=None,
                shiftx_style=False):

    global MA_LOST_FRAGS
    if shiftx_style:
        frame_number = 1
    # load pdb
    pdb = app.PDBFile(path)

    # load cs sets
    peak_data, sequence_map, peak_seq = process_corr(corr_path, debug,
                                                     shiftx_style)

    result = []
    # check for weird/null chain
    if chain_id == '_':
        chain_id = list(pdb.topology.residues())[0].chain.id[0]
    # sometimes chains have extra characters (why?)
    residues = list(
        filter(lambda r: r.chain.id[0] == chain_id, pdb.topology.residues()))
    if len(residues) == 0:
        if debug:
            raise ValueError('Failed to find requested chain ', chain_id)

    pdb_offset, seq_offset = None, None

    # from pdb residue index to our aligned residue index
    residue_lookup = {}
    # bonded neighbor mask
    nlist_mask = None
    peak_count = 0
    # select a random set of frames for generating data without replacement
    frame_choices = random.sample(range(0, pdb.getNumFrames()),
                                  k=min(pdb.getNumFrames(), frame_number))
    for fi in frame_choices:
        peak_successes = set()
        # clean up individual frame
        frame = pdb.getPositions(frame=fi)
        # have to fix at each frame since inserted atoms may change
        # fix missing residues/atoms
        fixer = PDBFixer(filename=path)
        # overwrite positions with frame positions
        fixer.positions = frame
        # we want to add missing atoms,
        # but not replace missing residue. We'd
        # rather just ignore those
        fixer.findMissingResidues()
        # remove the missing residues
        fixer.missingResidues = []
        # remove water!
        fixer.removeHeterogens(False)
        if not shiftx_style:
            fixer.findMissingAtoms()
            fixer.findNonstandardResidues()
            fixer.replaceNonstandardResidues()
            fixer.addMissingAtoms()
            fixer.addMissingHydrogens(7.0)
        # get new positions
        frame = fixer.positions
        num_atoms = len(frame)
        # remake residue list each time so they have correct atom ids
        residues = list(
            filter(lambda r: r.chain.id[0] == chain_id,
                   fixer.topology.residues()))
        if num_atoms > 20000:
            MA_LOST_FRAGS += len(residues)
            if debug:
                print(
                    'Exceeded number of atoms for building nlist (change this if you have big GPU memory) in frame {} in pdb {}'
                    .format(fi, path))
            break
        # check alignment once
        if pdb_offset is None:
            # create sequence from residues
            pdb_seq = ['XXX'] * max([int(r.id) + 1 for r in residues])
            for r in residues:
                rid = int(r.id)
                if rid >= 0:
                    pdb_seq[int(r.id)] = r.name
            if debug:
                print('pdb_seq', pdb_seq)
                print('peak_seq', peak_seq)
            pdb_offset, seq_offset = align(pdb_seq, peak_seq, debug)
            #TOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDOOOOOOOOOOOOOOOOOOOOOOO?????
            # Maybe it's ok
            pdb_offset = 0
            if debug:
                print('pdb_offset', pdb_offset)
                print('seq_offset', seq_offset)
                print(sequence_map)
                # now check alignment - rarely perfect
                saw_one = False
                aligned = 0
                for i in range(len(residues)):
                    segid = int(residues[i].id) + pdb_offset
                    saw_one = pdb_seq[segid] == residues[i].name
                    if not saw_one:
                        print('Mismatch (A) at position {} ({}). {} != {}'.
                              format(segid, residues[i].id, pdb_seq[segid],
                                     residues[i].name))
                        continue
                    if segid + seq_offset in sequence_map:
                        peakid = sequence_map[segid + seq_offset]
                        print(segid, segid + seq_offset, len(pdb_seq),
                              len(peak_seq))
                        saw_one = pdb_seq[segid] == peak_seq[segid +
                                                             seq_offset]
                        if not saw_one:
                            print(
                                'Mismatch (B) at position {}. pdb seq: {}, peak seq: {}'
                                .format(segid, peak_seq[segid + seq_offset],
                                        pdb_seq[peakid]))
                            continue
                        saw_one = peak_data[peakid]['name'] == residues[i].name
                        if not saw_one:
                            print(
                                'Mismatch (C) at position {}. peak seq: {}, peak data: {}, residue: {}'
                                .format(segid, i, peak_seq[segid + seq_offset],
                                        peak_data[peakid]['name'],
                                        residues[i].name))
                            continue
                        aligned += 1
                if aligned < 5:
                    raise ValueError(
                        'Could not find more than 5 aligned residues, very unusual'
                    )

            # create resiud look-up from atom index
            for i, r in enumerate(residues):
                for a in r.atoms():
                    residue_lookup[a.index] = i
            # This alignment will be checked as we compare shifts against the pdb
        # get neighbor list for frame
        np_pos = np.array([v.value_in_unit(units) for v in frame])
        frame_nlist = nlist_model(np_pos)

        for ri in range(len(residues)):
            # we build up fragment by getting residues around us, both in chain
            # and those within a certain distance of us
            rmin = max(0, ri + keep_residues[0])
            # have to +1 here (and not in range) to get min to work :)
            rmax = min(len(residues), ri + keep_residues[1] + 1)
            # do we have any residues to consider?
            success = rmax - rmin > 0

            consider = set(range(rmin, rmax))

            # Used to indicate an atom should be included from a different residue
            marked = [False for _ in range(len(frame))]

            # now grab spatial neighbor residues
            # NOTE: I checked this by hand a lot
            # Believe this code.
            for a in residues[ri].atoms():
                for ni in range(NN):
                    j = int(frame_nlist[a.index, ni, 1])
                    try:
                        consider.add(residue_lookup[j])
                        marked[j] = True
                    except KeyError as e:
                        success = False
                        if debug:
                            print(
                                'Neighboring residue in different chain, skipping'
                            )
                        break
            atoms = np.zeros((max_atoms), dtype=np.int64)
            # we will put dummy atom at end to keep bond counts the same by bonding to it
            # Z-DISABLED
            #atoms[-1] = embedding_dicts['atom']['Z']
            mask = np.zeros((max_atoms), dtype=np.float)
            bonds = np.zeros((max_atoms, max_atoms), dtype=np.int64)
            # nlist:
            # :,:,0 -> distance
            # :,:,1 -> neighbor index
            # :,:,2 -> bond count
            nlist = np.zeros((max_atoms, NEIGHBOR_NUMBER, 3), dtype=np.float)
            positions = np.zeros((max_atoms, 3), dtype=np.float)
            peaks = np.zeros((max_atoms), dtype=np.float)
            names = np.zeros((max_atoms), dtype=np.int64)
            # going from pdb atom index to index in these data structures
            rmap = dict()
            index = 0
            # check our two conditions that could have made this false: there are residues and
            # we didn't have off-chain spatial neighboring residues
            if not success:
                continue
            for rj in consider:
                residue = residues[rj]
                # use the alignment result to get offset
                segid = int(residue.id) + pdb_offset
                if segid + seq_offset not in sequence_map:
                    if debug:
                        print('Could not find residue index', rj, ': ',
                              residue, 'in the sequence map. Its index is',
                              segid + seq_offset, 'ri: ', ri)
                        print('We are considering', consider)
                    success = False
                    break
                peak_id = sequence_map[segid + seq_offset]
                #peak_id = segid
                if peak_id >= len(peak_data):
                    success = False
                    if debug:
                        print('peakd id is outside of peak range')
                    break
                # only check for residue we actually care about
                if ri == rj and residue.name != peak_data[peak_id]['name']:
                    if debug:
                        print('Mismatch between residue ', ri, rj, peak_id,
                              residue, segid, peak_data[peak_id], path,
                              corr_path, chain_id)
                    success = False
                    break
                for atom in residue.atoms():
                    # Make sure atom is in residue or neighbor of residue atom
                    if ri != rj and not marked[atom.index]:
                        continue
                    mask[index] = float(ri == rj)
                    atom_name = residue.name + '-' + atom.name
                    if atom_name not in embedding_dicts['name']:
                        embedding_dicts['name'][atom_name] = len(
                            embedding_dicts['name'])
                    names[index] = embedding_dicts['name'][atom_name]

                    if atom.element.symbol not in embedding_dicts['atom']:
                        if debug:
                            print('Could not identify atom',
                                  atom.element.symbol)
                        success = False
                        break
                    atoms[index] = embedding_dicts['atom'][atom.element.symbol]
                    positions[index] = np_pos[atom.index, :]
                    rmap[atom.index] = index
                    peaks[index] = 0
                    if mask[index]:
                        if atom.name[:3] in peak_data[peak_id]:
                            peaks[index] = peak_data[peak_id][atom.name[:3]]
                            peak_count += 1
                            peak_successes.add(peak_id)
                        else:
                            mask[index] = 0
                    index += 1
                    # Z-DISABLED
                    # -1 for dummy atom which is stored at end
                    if index == max_atoms - 1:  #2:
                        MA_LOST_FRAGS += 1
                        if debug:
                            print('Not enough space for all atoms in ri', ri)
                        success = False
                        break
                if ri == rj and sum(mask) == 0:
                    if debug:
                        print('Warning found no peaks for', ri, rj, residue,
                              peak_data[peak_id])
                    success = False
                if not success:
                    break
            if not success:
                continue
            # do this after so our reverse mapping is complete
            for rj in consider:
                residue = residues[rj]
                for b in residue.bonds():
                    # set bonds
                    try:
                        bonds[rmap[b.atom1.index], rmap[b.atom2.index]] = 1
                        bonds[rmap[b.atom2.index], rmap[b.atom1.index]] = 1
                    except KeyError:
                        # for bonds that cross residue
                        pass
            for rj in consider:
                residue = residues[rj]
                for a in residue.atoms():
                    # Make sure atom is in residue or neighbor of residue atom
                    if ri != rj and not marked[a.index]:
                        continue
                    index = rmap[a.index]
                    # convert to local indices and filter neighbors
                    n_index = 0
                    for ni in range(NN):
                        if frame_nlist[a.index, ni, 0] > 50.0:
                            # large distances are sentinels for things
                            # like self neighbors
                            continue
                        try:
                            j = rmap[int(frame_nlist[a.index, ni, 1])]
                        except KeyError:
                            # either we couldn't find a neighbor on the root residue (which is bad)
                            # or just one of the neighbors is not on a considered residue.
                            if rj == ri:
                                success = False
                                if debug:
                                    print('Could not find all neighbors',
                                          int(frame_nlist[a.index, ni, 1]),
                                          consider)
                                break
                            # Z-DISABLED
                            #j = max_atoms - 1 # point to dummy atom
                            continue
                        # mark as not a neighbor if out of molecule (only for non-subject nlists)
                        if False and j == max_atoms - 1:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist']['none']
                            n_index += 1
                        # a 0 -> non-bonded
                        elif bonds[index, j] == 0:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist']['nonbonded']
                            n_index += 1
                        # single bonded
                        else:
                            #set index
                            nlist[index, n_index, 1] = j
                            # set distance
                            nlist[index, n_index, 0] = frame_nlist[a.index, ni,
                                                                   0]
                            #set type
                            nlist[index, n_index,
                                  2] = embedding_dicts['nlist'][1]
                            n_index += 1
                        if n_index == NEIGHBOR_NUMBER:
                            break
                    # how did we do on peaks
                    if False and (peaks[index] > 0 and peaks[index] < 25):
                        nonbonded_count = np.sum(
                            nlist[index, :,
                                  2] == embedding_dicts['nlist']['nonbonded'])
                        bonded_count = np.sum(
                            nlist[index, :, 2] == embedding_dicts['nlist'][1])
                        print(
                            'neighbor summary: non-bonded: {}, bonded: {}, total: {}'
                            .format(nonbonded_count, bonded_count,
                                    NEIGHBOR_NUMBER))
                        print(nlist[index, :, :])
                        exit()
            if not success:
                if debug:
                    raise RuntimeError()
                continue
            if gsd_file is not None:
                snapshot = write_record_traj(
                    positions, atoms, mask, nlist, peaks,
                    embedding_dicts['class'][residues[ri].name], names,
                    embedding_dicts)
                snapshot.configuration.step = len(gsd_file)
                gsd_file.append(snapshot)
            result.append(
                make_tfrecord(atoms,
                              mask,
                              nlist,
                              peaks,
                              embedding_dicts['class'][residues[ri].name],
                              names,
                              indices=np.array(
                                  [model_index, fi,
                                   int(residues[ri].id)],
                                  dtype=np.int64)))
            if log_file is not None:
                log_file.write('{} {} {} {} {} {} {} {}\n'.format(
                    path.split('/')[-1],
                    corr_path.split('/')[-1], chain_id, len(peak_successes),
                    len(gsd_file), model_index, fi, residues[ri].id))
    return result, len(peak_successes) / len(peak_data), len(
        result), peak_count
Beispiel #7
0
def solvate(system, opt):
    """
    This function solvates the system by using PDBFixer

    Parameters:
    -----------
    system: OEMol molecule
        The system to solvate
    opt: python dictionary
        The parameters used to solvate the system

    Return:
    -------
    oe_mol: OEMol
        The solvated system
    """

    # Load a fake system to initialize PDBfixer
    filename = resource_filename('pdbfixer', 'tests/data/test.pdb')
    fixer = PDBFixer(filename=filename)

    # Convert between OE and OpenMM topology
    omm_top, omm_pos = oeommutils.oemol_to_openmmTop(system)

    chain_names = []

    for chain in omm_top.chains():
        chain_names.append(chain.id)

    # Set the correct topology to the fake system
    fixer.topology = omm_top
    fixer.positions = omm_pos

    # Solvate the system
    fixer.addSolvent(padding=unit.Quantity(opt['solvent_padding'], unit.angstroms),
                     ionicStrength=unit.Quantity(opt['salt_concentration'], unit.millimolar))

    # The OpenMM topology produced by the solvation fixer has missing bond
    # orders and aromaticity. The following section is creating a new openmm
    # topology made of just water molecules and ions. The new topology is then
    # converted in an OEMol and added to the passed molecule to produce the
    # solvated system

    wat_ion_top = app.Topology()

    # Atom dictionary between the the PDBfixer topology and the water_ion topology
    fixer_atom_to_wat_ion_atom = {}

    for chain in fixer.topology.chains():
        if chain.id not in chain_names:
            n_chain = wat_ion_top.addChain(chain.id)
            for res in chain.residues():
                n_res = wat_ion_top.addResidue(res.name, n_chain)
                for at in res.atoms():
                    n_at = wat_ion_top.addAtom(at.name, at.element, n_res)
                    fixer_atom_to_wat_ion_atom[at] = n_at

    for bond in fixer.topology.bonds():
        at0 = bond[0]
        at1 = bond[1]
        try:
            wat_ion_top.addBond(fixer_atom_to_wat_ion_atom[at0],
                                fixer_atom_to_wat_ion_atom[at1], type=None, order=1)
        except:
            pass

    wat_ion_pos = fixer.positions[len(omm_pos):]

    oe_mol = oeommutils.openmmTop_to_oemol(wat_ion_top, wat_ion_pos)

    # Setting the box vectors
    omm_box_vectors = fixer.topology.getPeriodicBoxVectors()
    box_vectors = utils.PackageOEMol.encodePyObj(omm_box_vectors)
    oe_mol.SetData(oechem.OEGetTag('box_vectors'), box_vectors)

    oechem.OEAddMols(oe_mol, system)

    return oe_mol
Beispiel #8
0
def prepare_pdb(pdb,
                chains='A',
                ff=('amber99sbildn.xml', 'tip3p.xml'),
                ph=7,
                pad=10 * unit.angstroms,
                nbonded=app.PME,
                constraints=app.HBonds,
                crystal_water=True):
    """
    Fetch, solvate and minimize a protein PDB structure.

    Parameters
    ----------
    pdb : str
        PDB Id.
    chains : str or list
        Chain(s) to keep in the system.
    ff : tuple of xml ff files.
        Forcefields for parametrization.
    ph : float
        pH value for adding missing hydrogens.
    pad: Quantity object
        Padding around macromolecule for filling box with water.
    nbonded : object
        The method to use for nonbonded interactions.  Allowed values are
        NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME.
    constraints : object
        Specifies which bonds and angles should be implemented with
        constraints. Allowed values are None, HBonds, AllBonds, or HAngles.
    crystal_water : bool
        Keep crystal water.

    """

    # Load forcefield.
    logger.info('Retrieving %s from PDB...', pdb)
    ff = app.ForceField(*ff)

    # Retrieve structure from PDB.
    fixer = PDBFixer(pdbid=pdb)

    # Remove unselected chains.
    logger.info('Removing all chains but %s', chains)
    all_chains = [c.id for c in fixer.topology.chains()]
    fixer.removeChains(chainIds=set(all_chains) - set(chains))

    # Find missing residues.
    logger.info('Finding missing residues...')
    fixer.findMissingResidues()

    # Replace nonstandard residues.
    logger.info('Replacing nonstandard residues...')
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()

    # Add missing atoms.
    logger.info('Adding missing atoms...')
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()

    # Remove heterogens.
    logger.info('Removing heterogens...')
    fixer.removeHeterogens(keepWater=crystal_water)

    # Add missing hydrogens.
    logger.info('Adding missing hydrogens appropriate for pH %s', ph)
    fixer.addMissingHydrogens(ph)

    if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]:
        # Add solvent.
        logger.info('Adding solvent...')
        fixer.addSolvent(padding=pad)

    # Write PDB file.
    logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb)
    app.PDBFile.writeFile(fixer.topology, fixer.positions,
                          open('%s-pdbfixer.pdb' % pdb, 'w'))

    # Create OpenMM System.
    logger.info('Creating OpenMM system...')
    system = ff.createSystem(fixer.topology,
                             nonbondedMethod=nbonded,
                             constraints=constraints,
                             rigidWater=True,
                             removeCMMotion=False)

    # Minimimze to update positions.
    logger.info('Minimizing...')
    integrator = mm.VerletIntegrator(1.0 * unit.femtosecond)
    context = mm.Context(system, integrator)
    context.setPositions(fixer.positions)
    mm.LocalEnergyMinimizer.minimize(context)
    # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
    state = context.getState(getPositions=True)
    fixer.positions = state.getPositions()

    # Write final coordinates.
    logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb)
    with open('%s-minimized.pdb' % pdb, 'w') as fp:
        app.PDBFile.writeFile(fixer.topology, fixer.positions, fp)

    # Serialize final coordinates.
    logger.info('Serializing to XML...')
    serialize_system(context, system, integrator)