Ejemplo n.º 1
0
def main():
    args = get_parser()
    print(f"[INFO] load {args.prediction_output}")
    data = joblib.load(args.prediction_output)
    print(f"[INFO] load {args.map_file}")
    em_map = get_em_map(args.map_file)
    em_map_coords = np.array(np.where(em_map > args.threshold)).T
    map_keys = {','.join(map(str, coord)) for coord in em_map_coords}

    filtered_dict = dict(filter(lambda x: x[0] in map_keys, data.items()))
    beta = np.squeeze(np.array(list(filtered_dict.values())))
    coords = [x.split(',') for x in list(filtered_dict.keys())]
    coords = np.array([list(map(int, i)) for i in coords])

    mol = Molecule().empty(numAtoms=len(filtered_dict.keys()))
    mol.set('record', 'ATOM')
    mol.set('resname', 'MG')
    mol.set('element', 'MG')
    mol.box = np.array([[0.], [0.], [0.]], dtype=np.float32)
    mol.coords = coords[:, :, np.newaxis].astype(np.float32)
    mol.set('beta', beta)
    filename = os.path.basename(args.map_file)
    filename = f"{os.path.splitext(filename)[0]}.pdb"
    print(f"[INFO] save to {filename}")
    mol.write(filename)
Ejemplo n.º 2
0
    def toMolecule(self, formalcharges=False, ids=None):
        """
        Return the moleculekit.molecule.Molecule

        Parameters
        ----------
        formalcharges: bool
            If True,the formal charges are used instead of partial ones
        ids: list
            The list of conformer ids to store in the moleculekit Molecule object- If None, all are returned
            Default: None

        Returns
        -------
        mol: moleculekit.molecule.Molecule
            The moleculekit Molecule object

        """
        from moleculekit.molecule import Molecule

        class NoConformerError(Exception):
            pass

        if ids is None:
            ids = np.arange(self.numFrames)

        if self.numFrames == 0:
            raise NoConformerError(
                "No Conformers are found in the molecule. Generate at least one conformer."
            )
        elif not isinstance(ids, list) and not isinstance(ids, np.ndarray):
            raise ValueError(
                'The argument ids should be a list of confomer ids')

        mol = Molecule()
        mol.empty(self.numAtoms)
        mol.record[:] = 'HETATM'
        mol.resname[:] = self.ligname[:3]
        mol.resid[:] = self._resid
        mol.coords = self._coords[:, :, ids]
        mol.name[:] = self._name
        mol.element[:] = self._element
        if formalcharges:
            mol.charge[:] = self._formalcharge
        else:
            mol.charge[:] = self._charge
        mol.box = np.zeros((3, self.numFrames), dtype=np.float32)
        mol.viewname = self.ligname
        mol.bonds = self._bonds
        mol.bondtype = self._bondtype
        mol.atomtype = self._atomtype
        return mol
Ejemplo n.º 3
0
def setup(args):
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    device = torch.device(args.device)

    if args.topology is not None:
        mol = Molecule(args.topology)
    elif args.structure is not None:
        mol = Molecule(args.structure)
        mol.box = np.array(
            [mol.crystalinfo['a'], mol.crystalinfo['b'],
             mol.crystalinfo['c']]).reshape(3, 1).astype(np.float32)

    if args.coordinates is not None:
        mol.read(args.coordinates)

    if args.extended_system is not None:
        mol.read(args.extended_system)

    precision = precisionmap[args.precision]

    print("Force terms: ", args.forceterms)
    ff = ForceField.create(mol, args.forcefield)
    parameters = Parameters(ff,
                            mol,
                            args.forceterms,
                            precision=precision,
                            device=device)

    external = None
    if args.external is not None:
        externalmodule = importlib.import_module(args.external["module"])
        embeddings = torch.tensor(args.external["embeddings"]).repeat(
            args.replicas, 1)
        external = externalmodule.External(args.external["file"], embeddings,
                                           device)

    system = System(mol.numAtoms, args.replicas, precision, device)
    system.set_positions(mol.coords)
    system.set_box(mol.box)
    system.set_velocities(
        maxwell_boltzmann(parameters.masses, args.temperature, args.replicas))

    forces = Forces(parameters,
                    terms=args.forceterms,
                    external=external,
                    cutoff=args.cutoff,
                    rfa=args.rfa,
                    switch_dist=args.switch_dist)
    return mol, system, forces
Ejemplo n.º 4
0
    def setUpClass(self):
        from moleculekit.home import home
        from moleculekit.molecule import Molecule
        import numpy as np
        import os

        self.testfolder = home(dataDir='molecule-writers')
        mol = Molecule(os.path.join(self.testfolder, 'filtered.pdb'))
        mol.coords = np.tile(mol.coords, (1, 1, 2))
        mol.filter('protein and resid 1 to 20')
        mol.boxangles = np.ones((3, 2), dtype=np.float32) * 90
        mol.box = np.ones((3, 2), dtype=np.float32) * 15
        mol.step = np.arange(2)
        mol.time = np.arange(2) * 1E5
        mol.fileloc = [mol.fileloc[0], mol.fileloc[0]]
        self.mol = mol
Ejemplo n.º 5
0
    def setUpClass(self):
        from moleculekit.home import home
        from moleculekit.molecule import Molecule
        import numpy as np
        import os

        self.testfolder = home(dataDir="molecule-writers")
        mol = Molecule(os.path.join(self.testfolder, "filtered.psf"))
        mol.read(os.path.join(self.testfolder, "filtered.pdb"))
        mol.coords = np.tile(mol.coords, (1, 1, 2))
        mol.filter("protein and resid 1 to 20")
        mol.boxangles = np.ones((3, 2), dtype=np.float32) * 90
        mol.box = np.ones((3, 2), dtype=np.float32) * 15
        mol.step = np.arange(2)
        mol.time = np.arange(2) * 1e5
        mol.fileloc = [mol.fileloc[0], mol.fileloc[0]]
        mol.bondtype[:] = "1"
        self.mol = mol
Ejemplo n.º 6
0
    def viewStates(self, protein=None, ligand=None, nsamples=20):
        from htmd.projections.metric import _singleMolfile
        from moleculekit.molecule import Molecule
        from moleculekit.vmdviewer import getCurrentViewer

        (single, molfile) = _singleMolfile(self.data.simlist)
        if not single:
            raise RuntimeError("Can"
                               "t visualize states without unique molfile")

        viewer = getCurrentViewer()
        colors = [0, 1, 3, 4, 5, 6, 7, 9]

        print("Active set includes macrostates: {}".format(
            self.hmm.active_set))

        # dtraj = np.vstack(self.hmm.discrete_trajectories_full)
        res = self.hmm.sample_by_observation_probabilities(nsamples)
        refmol = Molecule(molfile)

        for i, s in enumerate(self.hmm.active_set):
            mol = Molecule(molfile)
            mol.coords = []
            mol.box = []
            # idx = np.where(dtraj == i)[0]
            # samples = np.random.choice(idx, 20)
            # frames = self.data.abs2sim(samples)

            frames = self.data.rel2sim(res[i])
            for f in frames:
                mol._readTraj(f.sim.trajectory[f.piece],
                              frames=[f.frame],
                              append=True)
            mol.wrap("protein")
            mol.align("protein", refmol=refmol)
            viewer.loadMol(mol, name="hmm macro " + str(s))
            if ligand is not None:
                viewer.rep("ligand",
                           sel=ligand,
                           color=colors[np.mod(i, len(colors))])
            if protein is not None:
                viewer.rep("protein")
            viewer.send("start_sscache")
Ejemplo n.º 7
0
def _fillMolecule(name, resname, chain, resid, insertion, coords, segid, element,
                  occupancy, beta, charge, record):
    numAtoms = len(name)
    mol = Molecule()
    mol.empty(numAtoms)

    mol.name = np.array(name, dtype=mol._dtypes['name'])
    mol.resname = np.array(resname, dtype=mol._dtypes['resname'])
    mol.chain = np.array(chain, dtype=mol._dtypes['chain'])
    mol.resid = np.array(resid, dtype=mol._dtypes['resid'])
    mol.insertion = np.array(insertion, dtype=mol._dtypes['insertion'])
    mol.coords = np.array(np.atleast_3d(np.vstack(coords)), dtype=mol._dtypes['coords'])
    mol.segid = np.array(segid, dtype=mol._dtypes['segid'])
    mol.element = np.array(element, dtype=mol._dtypes['element'])
    mol.occupancy = np.array(occupancy, dtype=mol._dtypes['occupancy'])
    mol.beta = np.array(beta, dtype=mol._dtypes['beta'])
    mol.box = np.zeros((3, mol.coords.shape[2]), dtype=mol._dtypes['box'])
    # mol.charge = np.array(charge, dtype=mol._dtypes['charge'])
    # mol.record = np.array(record, dtype=mol._dtypes['record'])
    return mol
Ejemplo n.º 8
0
def reconstructAdaptiveTraj(simlist, trajID):
    """ Reconstructs a long trajectory out of short adaptive runs.

    Parameters
    ----------
    simlist : numpy.ndarray of :class:`Sim <htmd.simlist.Sim>` objects
        A simulation list generated by the :func:`simlist <htmd.simlist.simlist>` function
    trajID : int
        The id of the trajectory from which to start going back.

    Returns
    -------
    mol : :class:`Molecule <moleculekit.molecule.Molecule>` object
        A Molecule object containing the reconstructed trajectory
    chain : np.ndarray
        The simulation IDs of all simulations involved
    pathlist : np.ndarray of str
        The names of all simulations involved.

    Examples
    --------
    >>> mol, chain, pathlist = reconstructAdaptiveTraj(data.simlist, 52)
    """

    sim = None
    for s in simlist:
        if s.simid == trajID:
            sim = s
            break
    if sim is None:
        raise NameError(
            'Could not find sim with ID {} in the simlist.'.format(trajID))

    pathlist = []
    pathlist.append(sim.trajectory[0])
    chain = []
    chain.append((sim, -1, -1))

    epo = None
    while epo != 1:
        [sim, piece, frame,
         epo] = _findprevioustraj(simlist, _simName(sim.trajectory[0]))
        pathlist.append(sim.trajectory[piece])
        chain.append((sim, piece, frame))
    pathlist = pathlist[::-1]
    chain = chain[::-1]

    mol = Molecule(sim.molfile)
    mol.coords = np.zeros((mol.numAtoms, 3, 0), dtype=np.float32)
    mol.fileloc = []
    mol.box = np.zeros((3, 0))
    for i, c in enumerate(chain):
        tmpmol = Molecule(sim.molfile)
        tmpmol.read(c[0].trajectory)
        endpiece = c[1]
        fileloc = np.vstack(tmpmol.fileloc)
        filenames = fileloc[:, 0]
        pieces = np.unique(filenames)
        firstpieceframe = np.where(filenames == pieces[endpiece])[0][0]
        endFrame = firstpieceframe + c[2]
        if endFrame != -1:
            tmpmol.coords = tmpmol.coords[:, :, 0:endFrame +
                                          1]  # Adding the actual respawned frame (+1) since the respawned sim doesn't include it in the xtc
            tmpmol.fileloc = tmpmol.fileloc[0:endFrame + 1]
            tmpmol.box = tmpmol.box[:, 0:endFrame + 1]
        mol.coords = np.concatenate((mol.coords, tmpmol.coords), axis=2)
        mol.box = np.concatenate((mol.box, tmpmol.box), axis=1)
        mol.fileloc += tmpmol.fileloc
    #mol.fileloc[:, 1] = range(np.size(mol.fileloc, 0))

    return mol, chain, pathlist
Ejemplo n.º 9
0
    def test_distances_trivial(self):
        from moleculekit.molecule import Molecule
        import numpy as np

        mol = Molecule().empty(3)
        mol.name[:] = "C"
        mol.element[:] = "C"
        mol.chain[:] = list(
            map(str, range(3))
        )  # If they are in the same chain, no wrapping is done for distances
        mol.coords = np.zeros(
            (3, 3, 2), dtype=np.float32
        )  # Make two frames so we check if the code works for nframes
        mol.coords[1, :, 0] = [3, 3, 3]
        mol.coords[2, :, 0] = [5, 5, 5]
        mol.coords[1, :, 1] = [7, 7, 7]
        mol.coords[2, :, 1] = [6, 6, 6]

        realdistances = np.linalg.norm(mol.coords[[1, 2], :, :], axis=1).T

        metr = MetricDistance("index 0",
                              "index 1 2",
                              metric="distances",
                              pbc=False)
        data = metr.project(mol)
        assert np.allclose(
            data, realdistances), "Trivial distance calculation is broken"

        # Test wrapped distances
        wrappedcoords = np.mod(mol.coords, 2)
        wrappedrealdistances = np.linalg.norm(wrappedcoords[[1, 2], :, :],
                                              axis=1).T

        mol.box = np.full((3, 2), 2, dtype=np.float32)  # Make box 2x2x2A large
        metr = MetricDistance("index 0",
                              "index 1 2",
                              metric="distances",
                              pbc=True)
        data = metr.project(mol)
        assert np.allclose(data, wrappedrealdistances
                           ), "Trivial wrapped distance calculation is broken"

        # Test min distances
        metr = MetricDistance(
            "index 0",
            "index 1 2",
            metric="distances",
            pbc=False,
            groupsel1="all",
            groupsel2="all",
        )
        data = metr.project(mol)
        assert np.allclose(data.flatten(), np.min(
            realdistances, axis=1)), "Trivial distance calculation is broken"

        # Test ordering
        mol = Molecule().empty(4)
        mol.name[:] = "C"
        mol.element[:] = "C"
        mol.chain[:] = list(
            map(str, range(4))
        )  # If they are in the same chain, no wrapping is done for distances
        mol.coords = np.zeros(
            (4, 3, 2), dtype=np.float32
        )  # Make two frames so we check if the code works for nframes
        mol.coords[1, :, 0] = [1, 1, 1]
        mol.coords[2, :, 0] = [3, 3, 3]
        mol.coords[3, :, 0] = [5, 5, 5]
        mol.coords[1, :, 1] = [1, 1, 1]
        mol.coords[2, :, 1] = [7, 7, 7]
        mol.coords[3, :, 1] = [6, 6, 6]

        realdistances = np.linalg.norm(mol.coords[[2, 3], :, :] -
                                       mol.coords[0],
                                       axis=1).T
        realdistances = np.hstack((
            realdistances,
            np.linalg.norm(mol.coords[[2, 3], :, :] - mol.coords[1], axis=1).T,
        ))

        metr = MetricDistance("index 0 1",
                              "index 2 3",
                              metric="distances",
                              pbc=False)
        data = metr.project(mol)
        assert np.allclose(
            data,
            realdistances), "Trivial distance calculation has broken ordering"