Example #1
0
def test_convert_chunked_project_1():
    fetch_alanine_dipeptide()
    with tempdir():
        root = os.path.join(get_data_home(), 'alanine_dipeptide')
        if sys.platform == 'win32':
            pattern = "*.dcd"
        else:
            pattern = "'*.dcd'"
        cmd = ('msmb ConvertChunkedProject out {root} --pattern {pattern} '
               '-t {root}/ala2.pdb'
               .format(root=root, pattern=pattern))
        shell(cmd)
        assert set(os.listdir('out')) == {'traj-00000000.dcd',
                                          'trajectories.jsonl'}

        # check that out/traj-00000.dcd really has concatenated all of
        # the input trajs
        length = len(md.open('out/traj-00000000.dcd'))
        assert length == sum(len(md.open(f))
                             for f in glob.glob('%s/*.dcd' % root))

        with open('out/trajectories.jsonl') as f:
            record = json.load(f)
        assert set(record.keys()) == {'filename', 'chunks'}
        assert record['filename'] == 'traj-00000000.dcd'
        assert sorted(glob.glob('%s/*.dcd' % root)) == record['chunks']
Example #2
0
def test_open_and_load(get_fn):
    # These aren't tested in test_length because they don't support length!
    files = ['frame0.mdcrd', '4waters.arc', 'frame0.lammpstrj']

    for file in files:
        if file.endswith('.mdcrd'):
            opened = md.open(get_fn(file), n_atoms=22)
        else:
            opened = md.open(get_fn(file))

        loaded = md.load(get_fn(file), top=get_fn('native.pdb'))
Example #3
0
def test_lengths():
    num = 3
    inptrajs = ['PROJ9761/RUN3/CLONE9/frame{}.xtc'.format(i)
                for i in range(num)]
    stride = 8
    subprocess.check_call(
        ['gmx', 'trjcat', '-f'] + inptrajs + ['-o', 'catty.xtc'],
        stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL)

    with mdtraj.open("catty.xtc") as xtc:
        stridelen = len(xtc) // stride
        remain = len(xtc) % stride
        assert stridelen == num * PROJ61_LENGTH_PER_GEN, (stridelen, remain)

    top = mdtraj.load_prmtop("tops-p9712/4bw5.prmtop")
    traj1 = mdtraj.load("catty.xtc", top=top)[::stride]
    # blarg! the last frame is duplicatey
    traj2 = mdtraj.load(inptrajs[0], top=top)[::stride][:-1]
    traj2 += mdtraj.load(inptrajs[1], top=top)[::stride][:-1]
    traj2 += mdtraj.load(inptrajs[2], top=top)[::stride]
    traj3 = mdtraj.load(inptrajs, top=top,
                        discard_overlapping_frames=True)[::stride]

    np.testing.assert_array_equal(traj1.xyz, traj3.xyz)
    np.testing.assert_array_equal(traj1.xyz, traj2.xyz)
Example #4
0
def run(traj_dir, conf_filename, project_filename, iext):
    logger.info("Rebuilding project.")
    file_list = glob.glob(traj_dir + "/trj*%s" % iext)
    num_traj = len(file_list)

    traj_lengths = np.zeros(num_traj, 'int')
    traj_paths = []

    if not os.path.exists(conf_filename):
        raise(IOError("Cannot find conformation file %s" % conf_filename))

    file_list = sorted(file_list, key=utils.keynat)
    for i, filename in enumerate(file_list):
        traj_lengths[i] = len(md.open(filename))
        traj_paths.append(filename)

    records = {
        "conf_filename": conf_filename,
        "traj_lengths": traj_lengths,
        "traj_paths": traj_paths,
        "traj_errors": [None for i in xrange(num_traj)],
        "traj_converted_from": [[] for i in xrange(num_traj)]
    }

    p = Project(records)
    p.save(project_filename)
    logger.info("Wrote %s" % project_filename)
Example #5
0
def delete_trajectory_if_broken(filename, verbose=True):
    """
    Check the integrity of an MDTraj trajectory, deleting it if it is broken.

    Parameters
    ----------
    filename : str
       The trajectory filename.
    verbose : bool, optional, default=True
       If True, write some logging messages if broken trajectories are detected.

    """
    if os.path.exists(filename):
        try:
            trj = md.open(filename)
        except Exception as e:
            msg = "The integrity of trajectory file '%s' was compromised; deleting so that it will be regenerated.\n" % filename
            msg += "\n"
            msg += str(e)
            if verbose:
                print(msg)
            os.path.unlink(filename)

        # Clean up.
        del trj
Example #6
0
    def write(self, filename, coordinates=None):
        '''
        Writes selected data to an output file, of format specified by the
        given filename's extension.

        Arguments:
            filename:
            Name of the file to be written. All MDTraj-supported formats are
            available.

            coordinates:
            An [nframes, natoms, 3] numpy array defining what will be written,
            else all frames in the Cofasu will be output.
        '''

        # Note: currently ignores box data.
        ext = os.path.splitext(filename)[1].lower()
        needs_topology = ext in ['.gro', '.pdb']

        if coordinates is None:
            coordinates = self.x.compute()

        with mdt.open(filename, 'w') as f:
            if needs_topology:
                f.write(coordinates, self.top)
            else:
                f.write(coordinates)
Example #7
0
    def parse_fn(self, fn):
        meta = {
            'traj_fn': fn,
            'top_fn': self.top_fn,
            'top_abs_fn': os.path.abspath(self.top_fn),
        }
        try:
            with md.open(fn) as f:
                meta['nframes'] = len(f)
        except Exception as e:
            warnings.warn("Could not determine the number of frames for {}: {}"
                          .format(fn, e), ParseWarning)

        if self.step_ps is not None:
            meta['step_ps'] = self.step_ps

        # Get indices
        ma = self.fn_re.search(fn)
        if ma is None:
            raise ValueError("Filename {} did not match the "
                             "regular rexpression {}".format(fn, self.fn_re))
        meta.update({gn: transform(ma.group(gi))
                     for gn, transform, gi
                     in zip(self.group_names, self.group_transforms,
                            range(1, len(self.group_names) + 1))
                     })
        return meta
Example #8
0
    def _get_traj_info(self, filename):
        with mdtraj.open(filename, mode='r') as fh:
            length = len(fh)
            frame = fh.read(1)[0]
            ndim = np.shape(frame)[1]
            offsets = fh.offsets if hasattr(fh, 'offsets') else []

        return TrajInfo(ndim, length, offsets)
Example #9
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     for i in xrange(self.n_trajs):
         filename = self.traj_filename(i)
         with md.open(filename) as f:
             lengths[i] = len(f)
         n_atoms[i] = md.load_frame(filename, 0).n_atoms
     return lengths, n_atoms
Example #10
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     for i in xrange(self.n_trajs):
         filename = self.traj_filename(i)
         with md.open(filename) as f:
             lengths[i] = len(f)
         n_atoms[i] = md.load_frame(filename, 0).n_atoms
     return lengths, n_atoms
Example #11
0
def load_partial(netcdf, prmtop, start, stop, stride=1):
    topology = md.load_topology(prmtop)
    with md.open(netcdf) as f:
        f.seek(start)
        t = f.read_as_traj(
            topology,
            n_frames=int((stop-start)/stride),
            stride=stride,
        )
        return t
Example #12
0
def test_length(get_fn):
    files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr',
             'frame0.dcd', '2EQQ.pdb',
             'frame0.binpos', 'frame0.xyz', 'frame0.tng']
    if not (on_win and on_py3):
        files.append('frame0.lh5')

    for file in files:
        opened = md.open(get_fn(file))
        loaded = md.load(get_fn(file), top=get_fn('native.pdb'))
        assert len(opened) == len(loaded)
Example #13
0
def test_length(get_fn):
    files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr',
             'frame0.dcd', '2EQQ.pdb',
             'frame0.binpos', 'frame0.xyz', 'frame0.tng']
    if not (on_win and on_py3):
        files.append('frame0.lh5')

    for file in files:
        opened = md.open(get_fn(file))
        loaded = md.load(get_fn(file), top=get_fn('native.pdb'))
        assert len(opened) == len(loaded)
Example #14
0
def test_cnv():
    proj = Processor("p1234", "data/PROJ1234", 'xa4')
    trajectories = [Trajectory(info, proj, None) for info in proj.get_infos()]
    cnv2_infos = list(map(_process_trajectory, trajectories))

    assert len(cnv2_infos) > 0

    for info in cnv2_infos:
        for g in range(len(info['cnv2']['gens'])):
            assert os.path.exists(info['cnv1']['gens'][g])
            assert os.path.exists(info['cnv2']['gens'][g])

            with mdtraj.open(info['cnv1']['gens'][g]) as tfile:
                with mdtraj.open(info['cnv2']['gens'][g]) as dcdfile:
                    xyz, time, step, box = tfile.read()
                    print("Shape", xyz.shape)
                    assert xyz.shape == (11, 22, 3), xyz.shape

                    xyz_nc, time, lengths, angles = dcdfile.read()
                    np.testing.assert_array_equal(xyz[:-1] * 10, xyz_nc)
Example #15
0
def test_cnv():
    proj = Processor("p1234", "data/PROJ1234", 'xa4')
    trajectories = [Trajectory(info, proj, None) for info in proj.get_infos()]
    cnv2_infos = list(map(_process_trajectory, trajectories))

    assert len(cnv2_infos) > 0

    for info in cnv2_infos:
        for g in range(len(info['cnv2']['gens'])):
            assert os.path.exists(info['cnv1']['gens'][g])
            assert os.path.exists(info['cnv2']['gens'][g])

            with mdtraj.open(info['cnv1']['gens'][g]) as tfile:
                with mdtraj.open(info['cnv2']['gens'][g]) as dcdfile:
                    xyz, time, step, box = tfile.read()
                    print("Shape", xyz.shape)
                    assert xyz.shape == (11, 22, 3), xyz.shape

                    xyz_nc, time, lengths, angles = dcdfile.read()
                    np.testing.assert_array_equal(xyz[:-1] * 10, xyz_nc)
Example #16
0
def get_num_frames(trj_file,stride):
    t=md.open(trj_file , mode="r",  force_overwrite=False)
    res=t.read(stride=stride)
    num_frames=np.shape(res[0])[0]

    #t=md.open(trj_file)
    #num_frames=t.__len__()
    
    del t
    gc.collect()
    return num_frames
Example #17
0
    def testCacheResults(self):
        # cause cache failures
        results = {}
        for f in xtcfiles:
            results[f] = self.db[f]

        desired = {}
        for f in xtcfiles:
            with mdtraj.open(f) as fh:
                desired[f] = len(fh)

        self.assertEqual(results, desired)
Example #18
0
def test_convert_chunked_project_1():
    with tempdir():
        root = os.path.join(get_data_home(), 'alanine_dipeptide')
        assert os.path.exists(root)
        cmd = ("msmb ConvertChunkedProject out {root} --pattern *.dcd "
               "-t {root}/ala2.pdb".format(root=root))
        shell(cmd)
        assert set(
            os.listdir('out')) == {'traj-00000000.dcd', 'trajectories.jsonl'}

        # check that out/traj-00000.dcd really has concatenated all of
        # the input trajs
        length = len(md.open('out/traj-00000000.dcd'))
        assert length == sum(
            len(md.open(f)) for f in glob.glob('%s/*.dcd' % root))

        with open('out/trajectories.jsonl') as f:
            record = json.load(f)
        assert set(record.keys()) == {'filename', 'chunks'}
        assert record['filename'] == 'traj-00000000.dcd'
        assert sorted(glob.glob('%s/*.dcd' % root)) == record['chunks']
Example #19
0
def test_cnv_bw():
    proj = Processor("v1", "data/v1", 'bw')
    trajectories = [Trajectory(info, proj, None) for info in proj.get_infos()]
    cnv2_infos = list(map(_process_trajectory, trajectories))

    assert len(cnv2_infos) > 0

    for info in cnv2_infos:
        assert os.path.exists(info['cnv2']['gens'][0])
        assert "/cnv2" in info['cnv2']['outdir']

        with mdtraj.open(info['cnv2']['gens'][0]) as tfile:
            xyz, time, step, box = tfile.read()
            print("Shape", xyz.shape)
            assert xyz.shape == (11, 22, 3), xyz.shape
Example #20
0
def test_cnv_bw():
    proj = Processor("v1", "data/v1", 'bw')
    trajectories = [Trajectory(info, proj, None) for info in proj.get_infos()]
    cnv2_infos = list(map(_process_trajectory, trajectories))

    assert len(cnv2_infos) > 0

    for info in cnv2_infos:
        assert os.path.exists(info['cnv2']['gens'][0])
        assert "/cnv2" in info['cnv2']['outdir']

        with mdtraj.open(info['cnv2']['gens'][0]) as tfile:
            xyz, time, step, box = tfile.read()
            print("Shape", xyz.shape)
            assert xyz.shape == (11, 22, 3), xyz.shape
Example #21
0
    def __set_dimensions_and_lengths(self):
        self._ntraj = len(self.trajfiles)
        # lookups pre-computed lengths, or compute it on the fly and store it in db.
        if config['use_trajectory_lengths_cache'] == 'True':
            from pyemma.coordinates.data.traj_info_cache import TrajectoryInfoCache
            for traj in self.trajfiles:
                self._lengths.append(TrajectoryInfoCache[traj])
        else:
            for traj in self.trajfiles:
                with mdtraj.open(traj, mode='r') as fh:
                    self._lengths.append(len(fh))

        # number of trajectories/data sets
        if self._ntraj == 0:
            raise ValueError("no valid data")
Example #22
0
    def _get_traj_info(self, filename):
        with mdtraj.open(filename, mode='r') as fh:
            try:
                length = len(fh)
            # certain formats like txt based ones (.gro, .lammpstrj) do not implement len()
            except (NotImplementedError, TypeError):
                frame = fh.read(1)[0]
                ndim = np.shape(frame)[1]
                _ = fh.read()
                length = fh.tell()
            else:
                frame = fh.read(1)[0]
                ndim = np.shape(frame)[1]

            offsets = fh.offsets if hasattr(fh, 'offsets') else ()

        return TrajInfo(ndim, length, offsets)
def test_compare_energies_acetylacetone_enol_waterbox(caplog):
    caplog.set_level(logging.WARNING)
    from transformato import FreeEnergyCalculator
    import mdtraj as md

    env = "waterbox"

    base = "data/acetylacetone-keto-acetylacetone-enol-rsfe/acetylacetone-enol/"
    (
        output_files_enol,
        output_files_keto,
    ) = get_output_files_acetylaceton_tautomer_pair()

    conf = f"{get_testsystems_dir()}/config/test-acetylacetone-tautomer-rsfe.yaml"

    configuration = load_config_yaml(
        config=conf, input_dir="data/", output_dir="data"
    )  # NOTE: for preprocessing input_dir is the output dir

    f = FreeEnergyCalculator(configuration, "acetylacetone-enol")
    for idx, b in enumerate(output_files_enol):
        traj = md.load_dcd(
            f"{b}/lig_in_{env}.dcd",
            f"{b}/lig_in_{env}.psf",
        )
        # used load_dcd for CHARMM
        traj.save_dcd(f"{base}/traj.dcd", force_overwrite=True)
        l_charmm = f._evaluate_e_on_all_snapshots_CHARMM(traj, idx + 1, env)
        # load dcd with openMM
        traj = md.open(f"{b}/lig_in_{env}.dcd")
        xyz, unitcell_lengths, _ = traj.read()
        xyz = xyz / 10  # correct the conversion
        unitcell_lengths = unitcell_lengths / 10
        l_openMM = f._evaluate_e_on_all_snapshots_openMM(
            xyz, unitcell_lengths, idx + 1, env
        )
        assert len(l_charmm) == len(l_openMM)
        s = abs(np.array(l_charmm) - np.array(l_openMM))
        mae = np.sum(s) / len(s)
        print(mae)
        assert mae < 1.0

        for e_charmm, e_openMM in zip(l_charmm, l_openMM):
            assert np.isclose(e_charmm, e_openMM, rtol=1e-2)
Example #24
0
    def test_featurereader_xtc(self):
        # cause cache failures
        with settings(use_trajectory_lengths_cache=False):
            reader = FeatureReader(xtcfiles, pdbfile)

        results = {}
        for f in xtcfiles:
            traj_info = self.db[f, reader]
            results[f] = traj_info.ndim, traj_info.length, traj_info.offsets

        expected = {}
        for f in xtcfiles:
            with mdtraj.open(f) as fh:
                length = len(fh)
                ndim = fh.read(1)[0].shape[1]
                offsets = fh.offsets if hasattr(fh, 'offsets') else []
                expected[f] = ndim, length, offsets

        np.testing.assert_equal(results, expected)
Example #25
0
def test_length(get_fn):
    files = [
        'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd',
        '2EQQ.pdb', 'frame0.binpos', 'frame0.xyz', 'frame0.tng'
    ]
    if not (on_win and on_py3):
        files.append('frame0.lh5')

    for file in files:
        opened = md.open(get_fn(file))

        if '.' + file.rsplit('.',
                             1)[-1] in mdtraj.core.trajectory._TOPOLOGY_EXTS:
            top = file
        else:
            top = 'native.pdb'

        loaded = md.load(get_fn(file), top=get_fn(top))
        assert len(opened) == len(loaded)
Example #26
0
    def save(self, *args, **kw):
        if not self.pk:  # new file

            # get length
            with _symlink_workaround_temp_uploaded_file(self.data) as f:
                with mdtraj.open(f) as traj:
                    self.length += len(traj)

            # validate hash
            import hashlib
            func = hashlib.sha512()
            for chunk in self.data.chunks():
                func.update(chunk)

            computed_hash = func.hexdigest()
            if not computed_hash == self.hash_sha512:
                raise ParseError(["Uploaded trajectory has different hash value than promised ",
                                  {"promised": self.hash_sha512,
                                   "received": computed_hash}])

        super(Trajectory, self).save(*args, **kw)
def test_compare_energies_2OJ9_original_vacuum(caplog):
    caplog.set_level(logging.WARNING)
    from transformato import FreeEnergyCalculator
    import mdtraj as md

    env = "vacuum"

    base = f"{get_testsystems_dir()}/2OJ9-original-2OJ9-tautomer-rsfe/2OJ9-original/"
    output_files_t1, _ = get_output_files_2oj9_tautomer_pair()

    conf = f"{get_testsystems_dir()}/config/test-2oj9-tautomer-pair-rsfe.yaml"

    configuration = load_config_yaml(
        config=conf, input_dir=get_testsystems_dir(), output_dir=get_testsystems_dir()
    )  # NOTE: for preprocessing input_dir is the output dir

    f = FreeEnergyCalculator(configuration, "2OJ9-original")
    for idx, b in enumerate(output_files_t1):
        # used load_dcd for CHARMM
        traj = md.load_dcd(
            f"{b}/lig_in_{env}.dcd",
            f"{b}/lig_in_{env}.psf",
        )
        traj.save_dcd(f"{base}/traj.dcd")
        l_charmm = f._evaluate_e_on_all_snapshots_CHARMM(traj, idx + 1, env)
        # load dcd with openMM
        traj = md.open(f"{b}/lig_in_{env}.dcd")
        xyz, unitcell_lengths, _ = traj.read()
        xyz = xyz / 10  # correct the conversion
        l_openMM = f._evaluate_e_on_all_snapshots_openMM(
            xyz, unitcell_lengths, idx + 1, env
        )

        assert len(l_charmm) == len(l_openMM)
        s = abs(np.array(l_charmm) - np.array(l_openMM))
        print(s)
        for e_charmm, e_openMM in zip(l_charmm, l_openMM):
            assert np.isclose(e_charmm, e_openMM, rtol=0.2)
        mae = np.sum(s) / len(s)
        assert mae < 0.005
Example #28
0
    def _get_traj_info(self, filename):
        # workaround NotImplementedError __len__ for xyz files
        # Github issue: markovmodel/pyemma#621
        if six.PY2:
            from mock import patch
        else:
            from unittest.mock import patch
        from mdtraj.formats import XYZTrajectoryFile

        def _make_len_func(top):
            def _len_xyz(self):
                assert isinstance(self, XYZTrajectoryFile)
                assert hasattr(
                    self, '_filename'), "structual change in xyzfile class!"
                import warnings
                from pyemma.util.exceptions import EfficiencyWarning
                warnings.warn(
                    "reading all of your data,"
                    " just to determine number of frames." +
                    " Happens only once, because this is cached."
                    if config['use_trajectory_lengths_cache'] else "",
                    EfficiencyWarning)
                # obtain len by reading whole file!
                mditer = mdtraj.iterload(self._filename, top=top)
                return sum(t.n_frames for t in mditer)

            return _len_xyz

        f = _make_len_func(self.topfile)

        # lookups pre-computed lengths, or compute it on the fly and store it in db.
        with patch.object(XYZTrajectoryFile, '__len__', f):
            with mdtraj.open(filename, mode='r') as fh:
                length = len(fh)
                frame = fh.read(1)[0]
                ndim = np.shape(frame)[1]
                offsets = fh.offsets if hasattr(fh, 'offsets') else []

        return TrajInfo(ndim, length, offsets)
def test_lazy_eval():
    import mdtraj as md

    base_path = f"{get_testsystems_dir()}/2OJ9-original-2OJ9-tautomer-rsfe/2OJ9-original/intst1/"
    dcd_path = f"{base_path}/lig_in_waterbox.dcd"
    psf_file = f"{base_path}/lig_in_waterbox.psf"
    traj = md.load(
        f"{dcd_path}",
        top=f"{psf_file}",
    )

    with md.open(dcd_path) as f:
        f.seek(10)
        xyz, unitcell_lengths, unitcell_angles = f.read()

    assert (xyz.shape) == (190, 2530, 3)
    assert len(xyz) == 190

    print(unitcell_lengths[0])
    print(len(unitcell_lengths))
    assert len(unitcell_lengths) == 190
    print(unitcell_angles[0])
    def worker(out_q, _range):

        g = box_crds(args.topo, np.array(args.dim))
        c = list()

        with md.open(args.traj) as md_traj:
            for i in _range:

                md_traj.seek(i)
                frame = md_traj.read_as_traj(g.topo, n_frames=1, stride=1)

                if external_grid:

                    ### Unit is cell matrix vectors. It will be transformed
                    ### into frac2real within the set_frame call. Therefore we
                    ### MUST assume that cell matrix vectors are perfectly normalized.
                    if os.path.exists(args.grid_files + "/%d_unit.dat.gz" % i):
                        unit = np.loadtxt(args.grid_files +
                                          "/%d_unit.dat.gz" % i)
                    else:
                        unit = np.loadtxt(args.grid_files + "/%d_unit.dat" % i)

                    if os.path.exists(args.grid_files +
                                      "/%d_center.dat.gz" % i):
                        center = np.loadtxt(args.grid_files +
                                            "/%d_center.dat.gz" % i)
                    else:
                        center = np.loadtxt(args.grid_files +
                                            "/%d_center.dat" % i)

                    g.set_frame(frame, unit, center)

                else:
                    g.set_frame(frame, np.eye(3, 3), args.center)

                c.append(g.get_inside_crds_frac())

            out_q.put(c)
Example #31
0
def test_reading_of_coords():

    env = "vacuum"
    output_files_t1, _ = get_output_files_2oj9_tautomer_pair()

    conf = f"{get_testsystems_dir()}/config/test-2oj9-tautomer-pair-rsfe.yaml"

    configuration = load_config_yaml(
        config=conf, input_dir=get_testsystems_dir(), output_dir=get_test_output_dir()
    )  # NOTE: for preprocessing input_dir is the output dir

    b = output_files_t1[0]
    print(b)
    traj_load = md.load_dcd(
        f"{b}/lig_in_{env}.dcd",
        f"{b}/lig_in_{env}.psf",
    )
    print(traj_load.xyz[0])

    traj_open = md.open(f"{b}/lig_in_{env}.dcd")
    xyz, unitcell_lengths, _ = traj_open.read()
    xyz = xyz / 10
    print(xyz[0])
    assert np.allclose(xyz[0], traj_load.xyz[0])
Example #32
0
def estimateDG(topfile=None,crdfile=None,pertfile=None,
               trajfile=None,librarypath=None):
    """
    This subroutine loads a trajectory for an alchemical state, and a list of
    ligand molecules
    For each frame of the trajectory
        for each perturbed ligand
          Align the perturbed ligand onto reference ligand.
          Generate K poses
          Accumulate exp energy difference
    Estimate (w bootstrapping) free energy difference & uncertainties
    """
    print ("HELLO ESTIMATE DG")
    # Setup system describing alchemical state
    amber = Amber()
    (molecules, space) = amber.readCrdTop(crdfile, topfile)
    morphfile = Parameter("morphfile",pertfile,""".""")
    system = createSystemFreeEnergy(molecules, morphfile=morphfile)
    cutoff_type = Parameter(".","cutoffperiodic",""".""")
    cutoff_dist = Parameter(".",10*angstrom,""".""")
    rf_dielectric = Parameter(".",82.0,""".""")
    shift_delta = Parameter(".",2.0,""".""")
    coulomb_power = Parameter(".",0,""".""")
    combining_rules = Parameter(".","arithmetic",""".""")
    lambda_val = Parameter(".",0.0,""".""")
    system = setupForceFieldsFreeEnergy(system, space, cutoff_type=cutoff_type,
                                        cutoff_dist=cutoff_dist,
                                        rf_dielectric=rf_dielectric,
                                        shift_delta=shift_delta,
                                        coulomb_power=coulomb_power,
                                        combining_rules=combining_rules,
                                        lambda_val=lambda_val)
    # Load ligands library
    # FIX ME ! Don't include ligands that have already been simulated !
    library = loadLibrary(librarypath)
    library_deltaenergies = {}
    # library_deltaenergies contain the list of computed energy differences
    for ligand in library:
        library_deltaenergies[ligand] = []
    #import pdb; pdb.set_trace()
    # Now scan trajectory
    start_frame = 1
    end_frame = 3
    step_frame = 1

    trajfile = Parameter(".",trajfile,""".""")
    mdtraj_trajfile = mdtraj.open(trajfile.val,'r')
    nframes = len(mdtraj_trajfile)
    if end_frame > (nframes - 1):
        end_frame = nframes - 1
    mdtraj_trajfile.seek(start_frame)
    current_frame = start_frame

    energies = {}
    for (ID, ligand) in library:
        energies[ID] = []
    while (current_frame <= end_frame):
        print ("#Processing frame %s " % current_frame)
        frames_xyz, cell_lengths, cell_angles = mdtraj_trajfile.read(n_frames=1)
        system = updateSystemfromTraj(system, frames_xyz, cell_lengths, cell_angles)
        ref_ligand = system[MGName("solutes")].molecules().first().molecule()
        ref_nrg = system.energy()
        print (ref_nrg)
        for (ID, ligand) in library:
            # Align ligand onto reference ligand
            mapping = AtomMCSMatcher(1*second).match(ref_ligand, PropertyMap(), ligand, PropertyMap())
            mapper = AtomResultMatcher(mapping)
            # This does a RB alignment
            # TODO) Explore optimised alignment codes
            # For instance could construct aligned ligand by reusing MCSS coordinates
            # and completing topology for variable part using BAT internal coordinates
            # Also, better otherwise never get intramolecular energy variations !
            # Basic test...SAME LIGAND should give 0 energy difference ! 
            # FIXME) Return multiple coordinates and update system in each instance
            aligned_ligand = ligand.move().align(ref_ligand, AtomMatchInverter(mapper))
            #print (ref_ligand.property("coordinates").toVector())
            #print ("####")
            #print (aligned_ligand.property("coordinates").toVector())
            # FIXME) Optimise for speed
            new_system = System()
            new_space = system.property("space")
            new_system.add( system[MGName("solvent")] )
            sols = MoleculeGroup("solutes")
            solref = MoleculeGroup("solute_ref")
            solhard = MoleculeGroup("solute_ref_hard")
            soltodummy = MoleculeGroup("solute_ref_todummy")
            solfromdummy = MoleculeGroup("solute_ref_fromdummy")
            sols.add(aligned_ligand)
            solref.add(aligned_ligand)
            solhard.add(aligned_ligand)
            new_system.add(sols)
            new_system.add(solref)
            new_system.add(solhard)
            new_system.add(soltodummy)
            new_system.add(solfromdummy)
            #print ("###")
            # DONE) Optimise for speed, only doing ligand energies
            #print (new_system[MGName("solutes")].first().molecule().property("coordinates").toVector())
            new_system = setupForceFieldsFreeEnergy(new_system, new_space, cutoff_type=cutoff_type,
                                        cutoff_dist=cutoff_dist,
                                        rf_dielectric=rf_dielectric,
                                        shift_delta=shift_delta,
                                        coulomb_power=coulomb_power,
                                        combining_rules=combining_rules,
                                        lambda_val=lambda_val)
            new_nrg = new_system.energy()
            print (new_nrg)
            energies[ID].append( new_nrg - ref_nrg )
            # for each conformation generated
            #     consider further optimisation (rapid MC --> if loaded flex files?)
            #     update 'perturbed' group with aligned ligand coordinates
            #     compute 'perturbed' energy
            #     accumulate 'perturbed' - reference
        #import pdb; pdb.set_trace()
        current_frame += step_frame
    import pdb; pdb.set_trace()
    # Now convert accumulated data int

    return 0
Example #33
0
def test_len(get_fn):
    with md.open(get_fn('frame0.xyz')) as fh:
        assert len(fh) == 501
        assert fh._frame_index == 0
        assert len(fh.read()) == 501
Example #34
0
        Sire.Stream.save((molecules, space), s3file.val)

    # What to do with this...
    system = createSystemFreeEnergy(molecules)
    lam = Symbol("lambda")
    solutes = system[MGName("solutes")]
    solute_ref = system[MGName("solute_ref")]
    system.setConstant(lam, lambda_val.val)
    system.add(PerturbationConstraint(solutes))
    system.setComponent(lam, lambda_val.val)
    # Now loop over snapshots in dcd and accumulate energies
    start_frame = 1
    end_frame = 1000000000
    step_frame = stepframe.val

    mdtraj_trajfile = mdtraj.open(trajfile.val,'r')
    nframes = len(mdtraj_trajfile)
    if end_frame > (nframes - 1):
        end_frame = nframes - 1
    mdtraj_trajfile.seek(start_frame)
    current_frame = start_frame

    #system = createSystemFreeEnergy(molecules)

    system_solute_rf = System()
    system_solute_rf.add(solutes)
    system_solute_rf.add(system[MGName("solute_ref")])
    system_solute_rf.add(system[MGName("solute_ref_hard")])
    system_solute_rf.add(system[MGName("solute_ref_todummy")])
    system_solute_rf.add(system[MGName("solute_ref_fromdummy")])
Example #35
0
def test_len():
    with md.open(get_fn('frame0.xyz')) as fh:
        assert len(fh) == 501
        assert fh._frame_index == 0
        assert len(fh.read()) == 501
Example #36
0
    def handle(self, *args, **options):
        def create_pos_to_gnum(dyn_id):
            gcdata_path="/protwis/sites/files/Precomputed/get_contacts_files/dynamic_symlinks/dyn%(dynid)s/dyn%(dynid)s_labels.tsv" % {"dynid" :dyn_id}
            if os.path.isfile(gcdata_path):
                pos_to_gnum={}
                with open(gcdata_path) as infile:
                    for line in infile: 
                        if len(line) == 0 or line[0] == "#":
                            continue
                        (pos,gnum) = line.split("\t")
                        pos_to_gnum[pos]=gnum.rstrip("\n")
                return pos_to_gnum
            else:
                return False

        def extract_res_info(res1):
            helix_to_treep={"1" :"2",
                            "12":"3",
                            "2" :"4" ,
                            "23":"5"  ,
                            "3" :"6" ,
                            "34":"7"  ,
                            "4" :"8" ,
                            "45":"9"  ,
                            "5" :"10" ,
                            "56":"11"  ,
                            "6" :"12" ,
                            "67":"13"  ,
                            "7" :"14" ,
                            "78":"15" ,
                            "8" :"16"}
            nodecolor = {'1': '#78C5D5',
                         '12': '#5FB0BF',
                         '2': '#459BA8',
                         '23': '#5FAF88',
                         '3': '#79C268',
                         '34': '#9FCD58',
                         '4': '#C5D747',
                         '45': '#DDD742',
                         '5': '#F5D63D',
                         '56': '#F3B138',
                         '6': '#F18C32',
                         '67': '#ED7A6A',
                         '7': '#E868A1',
                         '78': '#D466A4',
                         '8': '#BF63A6'}

            res1=res1[:res1.rfind(":")]
            if res1 in pos_to_gnum:
                gnum1=pos_to_gnum[res1]
                h1=gnum1.split("x")[0]
                if h1=="Ligand":
                    return False
                treep1=helix_to_treep[h1] + "." + gnum1
                nodecolor1 = nodecolor[h1]
                return (gnum1,treep1,nodecolor1,h1)
            else:
                return False


        def create_p_jsons(dynfiles_traj,pos_to_gnum):
            frame_ends_bytraj=[]
            accum_frames=0
            all_int_d=dict()
            hb_list=["hbbb","hbsb","hbss"]
            for traj in dynfiles_traj:
                n_frames=traj["n_frames"]
                frame_ends_bytraj.append(n_frames+accum_frames -1) #Starts by 0
                accum_frames+=n_frames

                cont_li=['sb', 'pc', 'ps', 'ts', 'vdw', 'hb', 'wb', 'wb2', 'hp']
                
                traj_int_d={e:dict() for e in cont_li}
                
                for int_d in traj_int_d.values():
                    int_d['defaults']={'edgeColor': 'rgba(50,50,50,100)', 'edgeWidth': 2}
                    int_d["trees"]=[{
                            "treeLabel" :'Helices',
                            "treePaths":set(), # ex. '2.1x30'
                        }]
                    int_d["edges"]=dict() #list of {'name1':, 'frames':, 'name2':, 'helixpos':} -> helixpos can be Intra or Inter
                                    #ex: {"frames": [ 0, 10, 12],  'helixpos': 'Intra', 'name1': '5x38', 'name2': '5x39'}
                    int_d["tracks"]=[
                                {"trackLabel":"Helices",
                                 "trackProperties" :list() # {'color': '#79C268', 'nodeName': '3x25', 'size': '1.0'}
                                }
                                ]
                all_int_d[traj["file_id"]]=traj_int_d
            gcdata_path="/protwis/sites/files/Precomputed/get_contacts_files/dynamic_symlinks/dyn%(dynid)s/dyn%(dynid)s_dynamic.tsv" % {"dynid" :dynfiles_traj[0]["dyn_id"]}

            traj_rep=0    
            pre_frame=False
            accum_frames=0
            if os.path.isfile(gcdata_path):
                with open(gcdata_path) as infile:
                    for line in infile:
                        line = line.strip()
                        if "total_frames" in line:
                            el = line.split(" ")
                            file_total_frames=int(el[1].split(":")[1])

                        if len(line) == 0 or line[0] == "#":
                            continue

                        allinfo = line.split("\t")
                        if len(allinfo)==4:
                            (frame,int_type,res1,res2)=allinfo
                        elif len(allinfo)==5:
                            (frame,int_type,res1,res2,res3)=allinfo
                        elif len(allinfo)==6:
                            (frame,int_type,res1,res2,res3,res4)=allinfo
                        else:
                            self.stdout.write(self.style.NOTICE("Incorrect number of elements in line. Skipping. Line: %s"%line))
                            continue
                        if frame != pre_frame:
                            if int(pre_frame) == frame_ends_bytraj[traj_rep]:
                                traj_rep+=1                            
                                accum_frames=int(frame)
                                self.stdout.write(self.style.NOTICE("\tTraj id: %s"%dynfiles_traj[traj_rep]["file_id"]))
                        frame_corr=str(int(frame)-accum_frames)
                        traj_id=dynfiles_traj[traj_rep]["file_id"]
                        traj_int_d=all_int_d[traj_id]
                        #add all res:
                        resinfo1=extract_res_info(res1)
                        resinfo2=extract_res_info(res2)
                        if resinfo1 and resinfo2:
                            (gnum1,treep1,nodecolor1,h1)=resinfo1
                            (gnum2,treep2,nodecolor2,h2)=resinfo2
                        else:
                            continue
                        for int_typeid, int_data in traj_int_d.items():
                            if treep1 not in int_data["trees"][0]["treePaths"]:
                                int_data["trees"][0]["treePaths"].add(treep1) # ex. '2.1x30'
                                int_data["tracks"][0]["trackProperties"].append({'color': nodecolor1, 'nodeName': gnum1, 'size': '1.0'})
                            if treep2 not in int_data["trees"][0]["treePaths"]:
                                int_data["trees"][0]["treePaths"].add(treep2)
                                int_data["tracks"][0]["trackProperties"].append({'color': nodecolor2, 'nodeName': gnum2, 'size': '1.0'})
                        #add this particular inteaction
                        if int_type in hb_list:
                            int_type="hb"
                        if int_type in traj_int_d:
                            edge_d=traj_int_d[int_type]["edges"]
                            if (gnum1,gnum2) in edge_d:
                                edge_d[(gnum1,gnum2)]["frames"].append(frame_corr)
                            elif (gnum2,gnum1) in edge_d:
                                edge_d[(gnum2,gnum1)]["frames"].append(frame_corr)
                            else:
                                if (h1==h2):
                                    hpos="Intra"
                                else:
                                    hpos="Inter"
                                edge_d[(gnum1,gnum2)] = {'name1':gnum1 , 'name2':gnum2 , 'frames':[frame_corr],  'helixpos':hpos}

                        pre_frame=frame

                for traj_id,traj_int_d in all_int_d.items():
                    for int_type, int_data in traj_int_d.items():
                        int_data["trees"][0]["treePaths"] = list(int_data["trees"][0]["treePaths"])
                        int_data["edges"] = [v for k,v in int_data["edges"].items()]
                        save_json(dynfiles_traj,traj_id,int_type,int_data)
                return True
            else:
                return False
                            
        def save_json(dynfiles_traj,traj_id,int_type,int_data):
            traj_filename=[e["file_name"] for e in dynfiles_traj if e['file_id']==traj_id][0]
            json_filename=traj_filename.split(".")[0] + "_" + int_type +".json"
            filpath=get_precomputed_file_path('flare_plot',int_type,url=False)
            if not os.path.isdir(filpath):
                os.makedirs(filpath)
            with open(os.path.join(filpath,json_filename), 'w') as outfile:
                json.dump(int_data, outfile)




        if options['ignore_publication']:
            dynobj=DyndbDynamics.objects.all()
        else:
            dynobj=DyndbDynamics.objects.filter(is_published=True)
        if options['dynamics_id']:
            dynobj=dynobj.filter(id__in=options['dynamics_id'])
        if dynobj == []:
            self.stdout.write(self.style.NOTICE("No dynamics found with specified conditions."))

        dynobj = dynobj.annotate(dyn_id=F('id'))
        dynobj_d = dynobj.values("id")
        dyn_id_li=[d["id"] for d in dynobj_d]

        dyn_traj_d={}
        i=0
        tot=len(dyn_id_li)
        for dyn_id in sorted(dyn_id_li):
            try:
                self.stdout.write(self.style.NOTICE("dyn %s - %.1f%% completed"%(dyn_id , (i/tot)*100) ))
                dynfiles = DyndbFilesDynamics.objects.filter(id_dynamics__id=dyn_id)
                dynfiles = dynfiles.annotate(file_name=F("id_files__filename"),file_path=F("id_files__filepath"),file_id=F('id_files__id'))
                dynfiles_traj = dynfiles.filter(type=2)
                dynfiles_traj = dynfiles_traj.values("file_name","file_path","file_id")
                print(dynfiles_traj)
                for traj in dynfiles_traj:
                    traj_path=traj["file_path"]
                    traj["dyn_id"]=dyn_id
                    if os.path.isfile(traj_path):
                        t=md.open(traj_path)
                        n_frames=t.__len__()
                        del t
                        gc.collect()
                        traj["n_frames"]=n_frames
                    else:
                        traj["n_frames"]=False
                pos_to_gnum=create_pos_to_gnum(dyn_id)
                if not pos_to_gnum:
                    self.stdout.write(self.style.ERROR("Labels file not found. Skipping." ))
                    continue
                result=create_p_jsons(dynfiles_traj,pos_to_gnum)
                if not result:
                    self.stdout.write(self.style.ERROR("GetContacts results file not found. Skipping." ))
                    continue
                dyn_traj_d[dyn_id]=dynfiles_traj
                i+=1
            except Exception as e:
                self.stdout.write(self.style.ERROR(e))
        self.stdout.write(self.style.NOTICE("100%" ))
Example #37
0
    def calculate_grid_quantities(self, energy=True, entropy=True, hbonds=True):
        """
        Performs grid-based solvation thermodynamics and structure calculations by iterating
        over frames in the trajectory.

        Parameters
        ----------
        energy : bool, optional

        entropy :

        hbonds :

        Returns
        -------

        """
        print_progress_bar(0, self.num_frames)
        if not self.topology_file.endswith(".h5"):
            topology = md.load_topology(self.topology_file)
        read_num_frames = 0
        with md.open(self.trajectory) as f:
            for frame_i in range(self.start_frame, self.start_frame + self.num_frames):
                print_progress_bar(frame_i - self.start_frame, self.num_frames)
                f.seek(frame_i)
                if not self.trajectory.endswith(".h5"):
                    trj = f.read_as_traj(topology, n_frames=1, stride=1)
                else:
                    trj = f.read_as_traj(n_frames=1, stride=1)
                if trj.n_frames == 0:
                    print("No more frames to read.")
                    break
                else:
                    self._process_frame(trj, energy, hbonds, entropy)
                    read_num_frames += 1
            if read_num_frames < self.num_frames:
                print(("{0:d} frames found in the trajectory, resetting self.num_frames.".format(read_num_frames)))
                self.num_frames = read_num_frames

        # Normalize voxel quantities
        for voxel in range(self.voxeldata.shape[0]):
            if self.voxeldata[voxel, 4] > 1.0:
                self.voxeldata[voxel, 14] = self.voxeldata[voxel, 13] / (self.voxeldata[voxel, 4] * 2.0)
                self.voxeldata[voxel, 13] /= (self.num_frames * self.voxel_vol * 2.0)
                self.voxeldata[voxel, 16] = self.voxeldata[voxel, 15] / (self.voxeldata[voxel, 4] * 2.0)
                self.voxeldata[voxel, 15] /= (self.num_frames * self.voxel_vol * 2.0)
                if self.voxeldata[voxel, 19] > 0.0:
                    self.voxeldata[voxel, 18] = self.voxeldata[voxel, 17] / (self.voxeldata[voxel, 19] * 2.0)
                    self.voxeldata[voxel, 17] /= (self.num_frames * self.voxel_vol * self.voxeldata[voxel, 19] * 2.0)
                for i in range(19, 35, 2):
                    self.voxeldata[voxel, i + 1] = self.voxeldata[voxel, i] / self.voxeldata[voxel, 4]
                    self.voxeldata[voxel, i] /= (self.num_frames * self.voxel_vol)
            else:
                self.voxeldata[voxel, 13] *= 0.0
                self.voxeldata[voxel, 15] *= 0.0
                if self.voxeldata[voxel, 19] > 0.0:
                    self.voxeldata[voxel, 17] *= 0.0
                for i in range(19, 35, 2):
                    self.voxeldata[voxel, i] *= 0.0

        # Calculate entropies
        if entropy:
            self.calculate_entropy(num_frames=self.num_frames)
Example #38
0
 def f():
     try:
         eq(len(md.open(get_fn(file), **kwargs)),
            len(md.load(get_fn(file), top=get_fn('native.pdb'))))
     except NotImplementedError as e:
         raise SkipTest(e)
Example #39
0
import collections
import glob
import mdtraj as md
import os
import pandas as pd

dt = 0.25

MIN_LENGTHS = collections.defaultdict(lambda : 1000 * 4)
MIN_LENGTHS[10478] = 500 * 4

projects = [10466, 10467, 10468, 10478]
names = {10466:"T4", 10467:"src", 10468:"abl", 10478:"setd8"}

data = []
for project in projects:
    path = "%s/%d/" % (os.environ["FAH_DATA_PATH"], project)
    min_length = MIN_LENGTHS[project]
    filenames = [filename for filename in glob.glob(path + "run*.h5")]
    lengths = [len(md.open(filename)) for filename in filenames]
    trimmed_lengths = [length for length in lengths if length > min_length]
    trimmed_ns = sum(trimmed_lengths) * dt
    n_traj = len(lengths)
    n_trimmed = len(trimmed_lengths)
    name = names[project]
    data.append(dict(project=project, frames=sum(lengths), trimmed_frames=sum(trimmed_lengths), trimmed_ns=trimmed_ns, n_trimmed=n_trimmed, n_traj=n_traj, name=name))

data = pd.DataFrame(data).set_index("project")

print data.to_html()
Example #40
0
    def generate_clusters(self, density_factor, ligand_file,
                          clustercenter_file):
        """Generate hydration sites from water molecules found in the binding site
        during the simulation. Clustering is done in two steps; i). An initial clustering over a 10%
        of frames, and ii). A refinement step where all frames are used.

        Parameters

        ----------
        ligand_file : string
            Name of the PDB file containing atomic coordinates of the ligand,
            assumed to be co-crystallized with the protein.

        Returns
        -------
        final_cluster_coords : numpy.ndarray
            Coordinates of hydration sites, represented by a 2-D array with shape N x 3,
            where N is the number of hydration sites identified during clustering.

        site_waters : list
            List of N sub-lists where N is the number of identified hydration sites, each sublist
            consist of a 3-element tuple for every water identified in that site. First element of
            the tuple is frame number, second element is correct index of the oxygen atom in the
            the original topology and third element is the offset index as read from a version of
            a trimmed version trajectory for clustering.

        Notes
        -----
        The following attributes of the object are updated when the clustering is successfully completed.
        self.hsa_region_O_ids:
            The indices of water oxygen atoms in HSA region for each frame are stored
            in the corresponding lists.
        self.hsa_region_flat_ids:
            Same as above except that indices are not atom indices from the topology
            but in a sequence from 0 to N, where N is the total number of water oxygen atoms found in the
            HSA region throughout the simulation.
        self.hsa_region_water_coords:
            An N x 3 numpy array is initialized, where N is the total number of water water oxygen atoms found in the
            HSA region throughout the simulation. The array gets populated during individual frame processing.
        """
        sphere_radius = md.utils.in_units_of(1.0, "angstroms", "nanometers")
        topology = md.load_topology(self.topology_file)
        if self.non_water_atom_ids.shape[0] == 0:
            raise Exception(
                ValueError,
                "Clustering is supported only for solute-solvent systems, no solute atoms found."
            )

        ligand = md.load_pdb(ligand_file, no_boxchk=True)
        ligand_coords = ligand.xyz[0, :, :]
        binding_site_atom_indices = np.asarray(
            list(range(ligand_coords.shape[0])))
        init_cluster_coords = None
        # Step 1: Initial Clustering if user didn't provide cluster centers
        if clustercenter_file is None:
            clustering_stride = 10
            print("Reading trajectory for clustering.")
            with md.open(self.trajectory) as f:
                f.seek(self.start_frame)
                # read all frames if no frames specified by user
                if self.num_frames is None:
                    trj_short = f.read_as_traj(
                        topology,
                        atom_indices=np.concatenate(
                            (binding_site_atom_indices,
                             self.wat_oxygen_atom_ids
                             )))[self.start_frame::clustering_stride]
                else:
                    trj_short = f.read_as_traj(
                        topology,
                        atom_indices=np.concatenate((binding_site_atom_indices,
                                                     self.wat_oxygen_atom_ids))
                    )[self.start_frame:self.num_frames:clustering_stride]
                    print(trj_short.n_frames)
                if trj_short.n_frames < 10:
                    sys.exit(
                        "Clustering requires at least 100 frames, current trajectory contains {0:d} frames."
                        .format(trj_short.n_frames))
                print("Performing an initial clustering over {0:d} frames.".
                      format(trj_short.n_frames))
                # Obtain water molecules solvating the binding site
                # FIXME: This is a workaround to use MDTraj compute_neighbor function xyz coordinates of the trajectory are
                # modified such that first n atoms coordinates are switched to n atoms of ligand coordinates.
                # Unexpected things will happen if the number of solute atoms less than the number of ligand atoms, which is
                # highly unlikely.
                coords = trj_short.xyz
                for i_frame in range(trj_short.n_frames):
                    for pseudo_index in range(
                            binding_site_atom_indices.shape[0]):
                        coords[i_frame, pseudo_index, :] = ligand_coords[
                            pseudo_index, :]

                haystack = np.setdiff1d(trj_short.topology.select("all"),
                                        binding_site_atom_indices)
                binding_site_waters = md.compute_neighbors(
                    trj_short,
                    self.hsa_region_radius,
                    binding_site_atom_indices,
                    haystack_indices=haystack)
                # generate a list of tuples, each tuple is a water and corresponding frame number in trj_short
                water_id_frame_list = [(i, nbr)
                                       for i in range(len(binding_site_waters))
                                       for nbr in binding_site_waters[i]]

                # Start initial clustering by building a KDTree and get initial neighbor count for all waters
                water_coordinates = np.ma.array(
                    [coords[wat[0], wat[1], :] for wat in water_id_frame_list],
                    mask=False)
                tree = spatial.cKDTree(water_coordinates)
                nbr_list = tree.query_ball_point(water_coordinates,
                                                 sphere_radius)
                nbr_count_list = np.ma.array([len(nbrs) for nbrs in nbr_list],
                                             mask=False)
                cutoff = trj_short.n_frames * density_factor * 0.1401
                if np.ceil(cutoff) - cutoff <= 0.5:
                    cutoff = np.ceil(cutoff)
                else:
                    cutoff = np.floor(cutoff)
                n_wat = 3 * cutoff

                # Set up clustering loop
                cluster_list = []
                cluster_iter = 0
                while n_wat > cutoff:
                    # Get water with max nbrs and retrieve its neighbors and marked for exclusion in next iteration
                    max_index = np.argmax(nbr_count_list)
                    to_exclude = np.array(nbr_list[max_index])
                    # Set current water count to current neighbors plus one for the water itself
                    n_wat = len(to_exclude) + 1

                    # Mask current water, its neighbors so that they are not considered in the next iteration
                    nbr_count_list.mask[to_exclude] = True
                    nbr_count_list.mask[max_index] = True
                    # Mask current waters' and its neighbors' coords so that they are not considered in the next iteration
                    water_coordinates.mask[to_exclude] = True
                    water_coordinates.mask[max_index] = True

                    # Accumulate neighbors for each water in current cluster, removing common neighbors
                    nbrs_of_to_exclude = np.unique(
                        np.array([
                            n_excluded
                            for excluded_nbrs in nbr_list[to_exclude]
                            for n_excluded in excluded_nbrs
                        ]))

                    # Obtain the list of waters whose neighbors need to be updated due to exclusion of the waters above
                    to_update = np.setxor1d(to_exclude, nbrs_of_to_exclude)
                    to_update = np.setdiff1d(to_update, np.asarray(max_index))

                    # Update the neighbor count for each water from the list generated above
                    if to_update.shape[0] != 0:
                        tree = spatial.cKDTree(water_coordinates)
                        updated_nbr_list = tree.query_ball_point(
                            water_coordinates[to_update], sphere_radius)
                        # for each updated member, get its original index and update the original neighbor search list
                        for index, nbrs in enumerate(updated_nbr_list):
                            if not nbr_count_list.mask[to_update[index]]:
                                nbr_count_list[to_update[index]] = len(nbrs)

                    # Check distances with previously identified clusters and do not consider if within 1.2 A
                    # of an existing cluster
                    current_wat = water_id_frame_list[max_index]
                    current_wat_coords = md.utils.in_units_of(
                        coords[current_wat[0], current_wat[1], :],
                        "nanometers", "angstroms")
                    near_flag = 0
                    if len(cluster_list) != 0:
                        for clust in cluster_list:
                            clust_coords = coords[clust[0], clust[1], :]
                            dist = np.linalg.norm(current_wat_coords -
                                                  clust_coords)
                            if dist < 1.20:
                                near_flag += 1
                    if near_flag == 0:
                        cluster_iter += 1
                        cluster_list.append(water_id_frame_list[max_index])
                init_cluster_coords = [
                    coords[cluster[0], cluster[1], :]
                    for cluster in cluster_list
                ]
        else:
            clusters_pdb_file = md.load_pdb(clustercenter_file, no_boxchk=True)
            init_cluster_coords = clusters_pdb_file.xyz[0, :, :]

        # Read full trajectory
        print("Reading trajectory to obtain water molecules for each cluster.")
        with md.open(self.trajectory) as f:
            f.seek(self.start_frame)
            if self.num_frames is None:
                trj = f.read_as_traj(topology,
                                     stride=1,
                                     atom_indices=np.concatenate(
                                         (binding_site_atom_indices,
                                          self.wat_oxygen_atom_ids)))
                self.num_frames = trj.n_frames
            else:
                trj = f.read_as_traj(topology,
                                     n_frames=self.num_frames,
                                     stride=1,
                                     atom_indices=np.concatenate(
                                         (binding_site_atom_indices,
                                          self.wat_oxygen_atom_ids)))
                if trj.n_frames < self.num_frames:
                    print((
                        "Warning: {0:d} frames found in the trajectory, resetting self.num_frames."
                        .format(trj.n_frames)))
                    self.num_frames = trj.n_frames
            for i_frame in range(trj.n_frames):
                for pseudo_index in range(binding_site_atom_indices.shape[0]):
                    trj.xyz[i_frame,
                            pseudo_index, :] = ligand_coords[pseudo_index, :]
            haystack = np.setdiff1d(trj.topology.select("all"),
                                    binding_site_atom_indices)
            start_point = haystack[0]
            binding_site_waters = md.compute_neighbors(
                trj,
                self.hsa_region_radius,
                binding_site_atom_indices,
                haystack_indices=haystack)
            # From the full frame-wise set of waters in the binding site, build two more frame-wise lists
            # one where each frame has a correct index of waters and another with a new index which ranges from
            # 0 to M, where M is the total number of hsa region waters - 1
            start = 0
            for i in range(len(binding_site_waters)):
                self.hsa_region_O_ids.append([])
                self.hsa_region_flat_ids.append([])
                for wat in binding_site_waters[i]:
                    wat_0 = wat - start_point
                    wat_offset = (
                        wat_0 * self.water_sites) + self.wat_oxygen_atom_ids[0]
                    self.hsa_region_O_ids[i].append(wat_offset)
                    self.hsa_region_flat_ids[i].append(start)
                    start += 3

            water_id_frame_list = [(i, nbr)
                                   for i in range(len(binding_site_waters))
                                   for nbr in binding_site_waters[i]]
            water_coordinates = np.array(
                [trj.xyz[wat[0], wat[1], :] for wat in water_id_frame_list])

        # Initialize array that stores coordinates all water molecules in HSA region, used for entropy calcs
        self.hsa_region_water_coords = np.zeros(
            (len(water_id_frame_list) * 3, 3), dtype=float)
        tree = spatial.cKDTree(water_coordinates)
        nbr_list = tree.query_ball_point(init_cluster_coords, sphere_radius)
        final_cluster_coords = []
        cutoff = int(self.num_frames * density_factor * 0.1401)
        if np.ceil(cutoff) - cutoff <= 0.5:
            cutoff = np.ceil(cutoff)
        else:
            cutoff = np.floor(cutoff)

        # apply refinement if user defined clusters not provided
        if clustercenter_file is None:
            # Step 2: Refinement
            # Initialize variables and data structures
            # Read in the trajectory but only first N solute atoms where N equals the number of ligand atoms
            # plus all water oxygen atoms
            # WARNING: This shifts indices of waters and once they are assigned to clusters, the indices need to
            # be corrected.

            print((
                "Refining initial cluster positions by considering {0:d} frames."
                .format(self.num_frames)))
            # For each cluster, set cluster center equal to geometric center of all waters in the cluster
            site_waters = []
            cluster_index = 1
            for cluster in nbr_list:
                cluster_water_coords = water_coordinates[cluster]
                if len(cluster) > cutoff:
                    near_flag = 0
                    waters_offset = [
                        (water_id_frame_list[wat][0] + self.start_frame,
                         ((water_id_frame_list[wat][1] - start_point) *
                          self.water_sites) + self.wat_oxygen_atom_ids[0])
                        for wat in cluster
                    ]

                    com = np.zeros(3)
                    masses = np.ones(cluster_water_coords.shape[0])
                    masses /= masses.sum()
                    com[:] = water_coordinates[cluster].T.dot(masses)
                    cluster_center = com[:]
                    # Raise flag if the current cluster center is within 1.2 A of existing cluster center
                    for other, coord in enumerate(final_cluster_coords[:-1]):
                        dist = np.linalg.norm(
                            md.utils.in_units_of(cluster_center, "nanometers",
                                                 "angstroms") - coord)
                        if dist < 1.20:
                            near_flag += 1
                    # Only add cluster center if it is at a safe distance from others
                    if near_flag == 0:
                        final_cluster_coords.append(
                            md.utils.in_units_of(cluster_center, "nanometers",
                                                 "angstroms"))
                        site_waters.append(waters_offset)
                        cluster_index += 1
        # otherwise store data for each user defined cluster
        else:
            # For each cluster, set cluster center equal to geometric center of all waters in the cluster
            final_cluster_coords = md.utils.in_units_of(
                init_cluster_coords, "nanometers", "angstroms")
            site_waters = []
            cluster_index = 1
            for cluster in nbr_list:
                waters_offset = [
                    (water_id_frame_list[wat][0] + self.start_frame,
                     ((water_id_frame_list[wat][1] - start_point) *
                      self.water_sites) + self.wat_oxygen_atom_ids[0])
                    for wat in cluster
                ]
                site_waters.append(waters_offset)
                cluster_index += 1

        # Write clustercenter file
        write_watpdb_from_coords("clustercenterfile", final_cluster_coords)
        self.clustercenter_file = "clustercenterfile.pdb"
        print(("Final number of clusters: {0:d}".format(
            len(final_cluster_coords))))
        return np.asarray(final_cluster_coords), site_waters
Example #41
0
import mdtraj as md
import os
import glob

filenames = glob.glob('trj*')

lengths = []

for filename in filenames:
	f =  md.open(filename)
	lengths.append(len(f))
	f.close()

for k in enumerate(lengths):
	if k[1] > 20:
		print k
		os.symlink('../agonist_b2ar/%s' % filenames[k[0]], '../agonist_b2ar_processed/%s' % filenames[k[0]])


Example #42
0
        Sire.Stream.save((molecules, space), s3file.val)

    # What to do with this...
    system = createSystemFreeEnergy(molecules)
    lam = Symbol("lambda")
    solutes = system[MGName("solutes")]
    solute_ref = system[MGName("solute_ref")]
    system.setConstant(lam, lambda_val.val)
    system.add(PerturbationConstraint(solutes))
    system.setComponent(lam, lambda_val.val)
    # Now loop over snapshots in dcd and accumulate energies
    start_frame = 1
    end_frame = 1000000000
    step_frame = stepframe.val

    mdtraj_trajfile = mdtraj.open(trajfile.val, 'r')
    nframes = len(mdtraj_trajfile)
    if end_frame > (nframes - 1):
        end_frame = nframes - 1
    mdtraj_trajfile.seek(start_frame)
    current_frame = start_frame

    #system = createSystemFreeEnergy(molecules)

    system_solute_rf = System()
    system_solute_rf.add(solutes)
    system_solute_rf.add(system[MGName("solute_ref")])
    system_solute_rf.add(system[MGName("solute_ref_hard")])
    system_solute_rf.add(system[MGName("solute_ref_todummy")])
    system_solute_rf.add(system[MGName("solute_ref_fromdummy")])
Example #43
0
def get_num_frames(trj_file,stride):
    t=md.open(trj_file , mode="r",  force_overwrite=False)
    res=t.read(stride=stride)
    num_frames=np.shape(res[0])[0]
    return num_frames
Example #44
0
    def _merge_trajs(self) -> Tuple[dict, dict, int, dict]:
        """
        load trajectories, thin trajs and merge themn.
        Also calculate N_k for mbar.
        """

        #############
        # set all file paths for potential
        if not os.path.isdir(f"{self.base_path}"):
            raise RuntimeError(f"{self.base_path} does not exist. Aborting.")

        nr_of_states = len(next(os.walk(f"{self.base_path}"))[1])

        logger.info(f"Evaluating {nr_of_states} states.")
        snapshots, unitcell = {}, {}
        N_k: dict = defaultdict(list)
        start, stride = -1, -1

        for env in self.envs:
            confs = []
            unitcell_ = []
            conf_sub = self.configuration["system"][self.structure][env]
            for lambda_state in tqdm(range(1, nr_of_states + 1)):
                dcd_path = f"{self.base_path}/intst{lambda_state}/{conf_sub['intermediate-filename']}.dcd"
                psf_path = f"{self.base_path}/intst{lambda_state}/{conf_sub['intermediate-filename']}.psf"
                if not os.path.isfile(dcd_path):
                    raise RuntimeError(f"{dcd_path} does not exist.")

                traj = mdtraj.open(f"{dcd_path}")
                # read trajs, determin offset, start ,stride and unitcell lengths
                if start == -1:
                    xyz, unitcell_lengths, _ = traj.read()
                    xyz, start, stride = self._thinning(xyz)
                else:
                    traj.seek(start)
                    xyz, unitcell_lengths, _ = traj.read(stride=stride)
                    xyz = xyz[:self.nr_of_max_snapshots]

                logger.debug(
                    f"Len: {len(xyz)}, Start: {start}, Stride: {stride}")

                # check that we have enough samples
                if len(xyz) < 10:
                    raise RuntimeError(
                        f"Below 10 conformations per lambda ({len(traj)}) -- decrease the thinning factor (currently: {self.thinning})."
                    )

                # thin unitcell_lengths
                # make sure that we can work with vacuum environments
                if env != "vacuum":
                    unitcell_lengths = unitcell_lengths[:self.
                                                        nr_of_max_snapshots]
                else:
                    unitcell_lengths = np.zeros(len(xyz))

                confs.extend(xyz / 10)
                unitcell_.extend(unitcell_lengths / 10)
                logger.debug(f"{dcd_path}")
                logger.debug(f"Nr of snapshots: {len(xyz)}")
                N_k[env].append(len(xyz))
                self.traj_files[env].append((dcd_path, psf_path))

            logger.info(f"Combined nr of snapshots: {len(confs)}")
            snapshots[env] = confs
            unitcell[env] = unitcell_
            assert len(confs) == len(unitcell_)
            logger.debug(len(confs))
        logger.debug(N_k)
        return (snapshots, unitcell, nr_of_states, N_k)
Example #45
0
"""
Make trajectory symlinks in local trajectories directory.
Looks for FAH data at the location given by environment variable
FAH_DATA_PATH
"""

import glob
import mdtraj as md
import os

RUN = None  # Set to either None or the run number of interest.
PROJECT = 10468
MIN_LENGTH = 1000 * 4  # kinase, T4
#MIN_LENGTH = 400 * 4  # setd8

PATH = "%s/%d/" % (os.environ["FAH_DATA_PATH"], PROJECT)

filenames = [filename for filename in glob.glob(PATH + "run*.h5") if len(md.open(filename)) > MIN_LENGTH]

try:
    os.mkdir("./trajectories/")
except:
    pass

for filename in filenames:
    if RUN is None or "run%d" % RUN in filename:
        base_filename = os.path.split(filename)[1]
        out_filename = "./trajectories/%s" % (base_filename)
        if not os.path.exists(out_filename):
            os.symlink(filename, out_filename)
Example #46
0
 def __determine_len(self, filename):
     with mdtraj.open(filename) as fh:
         return len(fh)
Example #47
0
    def process_chunk(self, begin_chunk, chunk_size, topology, energy, hbonds,
                      entropy):
        nbr_cutoff_sq = 3.5**2
        with md.open(self.trajectory) as f:
            f.seek(begin_chunk)
            trj = f.read_as_traj(topology, n_frames=chunk_size, stride=1)
            trj.xyz *= 10.0
            pbc = md.utils.in_units_of(trj.unitcell_lengths, "nanometers",
                                       "angstroms")
            frame_data = [[] for i in range(trj.n_frames)]
            calc.assign_voxels(trj.xyz, self.dims, self.gridmax, self.origin,
                               frame_data, self.wat_oxygen_atom_ids)

            for frame in range(trj.n_frames):
                coords = trj.xyz[frame, :, :].reshape(1, trj.xyz.shape[1],
                                                      trj.xyz.shape[2])
                periodic_box = pbc[frame].reshape(1, pbc.shape[1])
                waters = frame_data[frame]
                for wat in waters:
                    self.voxeldata[wat[0], 4] += 1
                    if energy or hbonds:
                        e_lj_array, e_elec_array = np.copy(
                            self.acoeff), np.copy(self.chg_product)
                        distance_matrix = np.zeros(
                            (self.water_sites, self.all_atom_ids.shape[0]))
                        calc.get_pairwise_distances(wat, self.all_atom_ids,
                                                    coords, pbc,
                                                    distance_matrix)
                        wat_nbrs = self.wat_oxygen_atom_ids[np.where(
                            (distance_matrix[0, :][self.wat_oxygen_atom_ids] <=
                             nbr_cutoff_sq) & (distance_matrix[0, :][
                                 self.wat_oxygen_atom_ids] > 0.0))]
                        self.voxeldata[wat[0], 17] += wat_nbrs.shape[0]
                        calc.calculate_energy(wat[1], distance_matrix,
                                              e_elec_array, e_lj_array,
                                              self.bcoeff)
                        self.voxeldata[wat[0], 11] += np.sum(
                            e_lj_array[:, :self.wat_oxygen_atom_ids[0]])
                        self.voxeldata[wat[0], 11] += np.sum(
                            e_elec_array[:, :self.wat_oxygen_atom_ids[0]])
                        self.voxeldata[wat[0], 13] += np.sum(
                            e_lj_array[:, self.wat_oxygen_atom_ids[0]:wat[1]]
                        ) + np.sum(e_lj_array[:, wat[1] + self.water_sites:])
                        self.voxeldata[wat[0], 13] += np.sum(
                            e_elec_array[:, self.wat_oxygen_atom_ids[0]:wat[1]]
                        ) + np.sum(e_elec_array[:, wat[1] + self.water_sites:])
                        e_nbr_list = [
                            np.sum(e_lj_array[:, wat_nbrs + i] +
                                   e_elec_array[:, wat_nbrs + i])
                            for i in xrange(self.water_sites)
                        ]
                        self.voxeldata[wat[0], 15] += np.sum(e_nbr_list)
                        """
                        ###DEBUG START###
                        elj_sw = np.sum(e_lj_array[:, :self.wat_oxygen_atom_ids[0]])
                        eelec_sw = np.sum(e_elec_array[:, :self.wat_oxygen_atom_ids[0]])
                        elj_ww = np.sum(e_lj_array[:, self.wat_oxygen_atom_ids[0]:wat[1]]) + np.sum(e_lj_array[:, wat[1] + 1:])
                        eelec_ww = np.sum(e_elec_array[:, self.wat_oxygen_atom_ids[0]:wat[1]]) + np.sum(e_elec_array[:, wat[1] + self.water_sites:])
                        e_nbr_list = [np.sum(e_lj_array[:, wat_nbrs + i] + e_elec_array[:, wat_nbrs + i]) for i in xrange(self.water_sites)]
                        enbr = np.sum(e_nbr_list)
                        print "Calc: ", elj_sw, eelec_sw, elj_ww, eelec_ww, enbr
                        distance_matrix = np.sqrt(distance_matrix)
                        energy_lj, energy_elec = self.calculate_energy(distance_matrix)
                        test_1 = np.sum(energy_lj[:self.wat_oxygen_atom_ids[0]:])
                        test_2 = np.sum(energy_elec[:, self.non_water_atom_ids])
                        test_3 = np.nansum(energy_lj[self.wat_oxygen_atom_ids[0]:])
                        test_4 = np.sum(energy_elec[:, self.wat_atom_ids[0]:wat[1]]) + np.sum(energy_elec[:, wat[1] + self.water_sites:])
                        test_5 = 0.0
                        test_5 += np.sum(energy_lj[self.wat_oxygen_atom_ids[0]:][(wat_nbrs - self.wat_oxygen_atom_ids[0]) / self.water_sites])
                        for i in range(self.water_sites):
                            test_5 += np.sum(energy_elec[:, wat_nbrs + i])
                        print "Ref: ", test_1, test_2, test_3, test_4, test_5
                        ###DEBUG END###
                        """
                        # H-bond calculations
                        if hbonds:
                            prot_nbrs_all = self.non_water_atom_ids[np.where(
                                distance_matrix[0, :][
                                    self.non_water_atom_ids] <= nbr_cutoff_sq)]
                            prot_nbrs_hb = prot_nbrs_all[np.where(
                                self.prot_hb_types[prot_nbrs_all] != 0)]
                            if wat_nbrs.shape[0] != 0 and prot_nbrs_hb.shape[
                                    0] != 0:
                                # hb_ww, hb_sw = self.calculate_hydrogen_bonds2(coords, wat[1], wat_nbrs, prot_nbrs_hb)
                                hb_ww, hb_sw = self.calculate_hydrogen_bonds(
                                    trj, wat[1], wat_nbrs, prot_nbrs_hb)
                                acc_ww = hb_ww[:, 0][np.where(
                                    hb_ww[:, 0] == wat[1])].shape[0]
                                don_ww = hb_ww.shape[0] - acc_ww
                                acc_sw = hb_sw[:, 0][np.where(
                                    hb_sw[:, 0] == wat[1])].shape[0]
                                don_sw = hb_sw.shape[0] - acc_sw
                                self.voxeldata[wat[0], 23] += hb_sw.shape[0]
                                self.voxeldata[wat[0], 25] += hb_ww.shape[0]
                                self.voxeldata[wat[0], 27] += don_sw
                                self.voxeldata[wat[0], 29] += acc_sw
                                self.voxeldata[wat[0], 31] += don_ww
                                self.voxeldata[wat[0], 33] += acc_ww
                                if wat_nbrs.shape[0] != 0 and hb_ww.shape[
                                        0] != 0:
                                    self.voxeldata[wat[0],
                                                   19] += wat_nbrs.shape[
                                                       0] / hb_ww.shape[0]
                                    # f_enc =  1.0 - (wat_nbrs.shape[0] / 5.25)
                                    # if f_enc < 0.0:
                                    #    f_enc = 0.0
                                    # self.voxeldata[wat[0], 21] += f_enc
                    if entropy:
                        self.calculate_euler_angles(wat, coords[0, :, :])
Example #48
0
import mdtraj as mdt

parser = arglib.ArgumentParser()
parser.add_argument('traj_dir', help='Directory to find trajectory files.')
parser.add_argument('conf_fn', help='Conformation filename that has the same atom names and residue IDs, etc. as the trajectories.')
parser.add_argument('output', default='./ProjectInfo.yaml', help='Output filename [ ./ProjectInfo.yaml ]')

args = parser.parse_args()

traj_list = [ os.path.join(args.traj_dir, fn) for fn in os.listdir(args.traj_dir)]
traj_list.sort(key=utils.keynat) # = list.sort(traj_list, key=utils.keynat)

print traj_list

traj_lens = []

for i in xrange(len(traj_list)):
       print i
       shape = len(mdt.open(traj_list[i]))
       traj_lens.append(shape)

records = { 'conf_filename' : args.conf_fn,
                'traj_lengths' : traj_lens,
                'traj_paths' : traj_list,
                'traj_converted_from' : [[] for fn in traj_list],
                'traj_errors' : [None for fn in traj_list]
              }

project = Project(records=records)
project.save(args.output)
Example #49
0
    def calculate_site_quantities(self,
                                  energy=True,
                                  entropy=True,
                                  hbonds=True,
                                  energy_lr_breakdown=False,
                                  angular_structure=False,
                                  shell_radii=None,
                                  r_theta_cutoff=6.0):
        """
        Performs site-based solvation thermodynamics and structure calculations by iterating
        over frames in the trajectory. If water molecules in hydration sites are already determined
        (the case when clustering is already done), then the list of hydration site waters in
        each frame is used to iterate over each water and calculate its properties. If externally
        determined hydration sites are provided (when self.clustercenter_file is set to a pdb file of
        hydration sites) then for each site, corresponding water is found in each frame and is used
        for caclulations.

        Parameters
        ----------
        energy : bool, optional
            Description
        hbonds : bool, optional
            Description
        entropy : bool, optional
            Description

        Returns
        -------
        None : NoneType
            This function updates hydration site data structures to store the results of calculations.
        """
        print_progress_bar(0, self.num_frames)
        topology = md.load_topology(self.topology_file)
        read_num_frames = 0
        if energy_lr_breakdown:
            if shell_radii is None:
                shell_radii = [3.5, 5.5, 8.5]
            else:
                assert len(shell_radii) == 3, "Water-water energy decomposition supported only upto 3 solvation shells." \
                                              "Please provide outer radii for three shells."
            shell_radii = [i**2 for i in shell_radii]
            shell_radii.insert(0, 0.0)
            self.energy_ww_lr_breakdown = [[
                0.0 for s in shell_radii
            ] for i in range(self.hsa_data.shape[0])]

        if angular_structure:
            if r_theta_cutoff > 8.0:
                print(
                    "Warning: r_theta_cutoff > 8.0 can take a long time."
                    "Resetting angular structure distance cutoff to 8.0 Angstrom"
                )
                r_theta_cutoff = 8.0
            self.angular_st_distribution = [
                [] for i in range(self.hsa_data.shape[0])
            ]

        with md.open(self.trajectory) as f:
            for frame_i in range(self.start_frame,
                                 self.start_frame + self.num_frames):
                print_progress_bar(frame_i - self.start_frame, self.num_frames)
                f.seek(frame_i)
                trj = f.read_as_traj(topology, n_frames=1, stride=1)
                if trj.n_frames == 0:
                    print("No more frames to read.")
                    break
                else:
                    self._process_frame(trj, frame_i, energy, hbonds, entropy,
                                        energy_lr_breakdown, angular_structure,
                                        shell_radii, r_theta_cutoff)
                    read_num_frames += 1
            if read_num_frames < self.num_frames:
                print((
                    "{0:d} frames found in the trajectory, resetting self.num_frames."
                    .format(read_num_frames)))
                self.num_frames = read_num_frames

        if entropy:
            self.generate_data_for_entropycalcs(self.start_frame,
                                                self.num_frames)
            self.run_entropy_scripts()
        self.normalize_site_quantities(self.num_frames)