def test_constraints(): c = np.array([(1,2,3.5)], dtype=np.dtype([('atom1', np.int32), ('atom2', np.int32), ('distance', np.float32)])) with HDF5TrajectoryFile(temp, 'w') as f: f.constraints = c with HDF5TrajectoryFile(temp) as f: assert eq(f.constraints, c)
def test_write_coordinates_reshape(): coordinates = np.random.randn(10,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates) with HDF5TrajectoryFile(temp) as f: assert eq(f.root.coordinates[:], coordinates.reshape(1,10,3)) assert eq(str(f.root.coordinates.attrs['units']), 'nanometers')
def test_topology(get_fn): top = md.load_pdb(get_fn('native.pdb')).topology with HDF5TrajectoryFile(temp, 'w') as f: f.topology = top with HDF5TrajectoryFile(temp) as f: assert f.topology == top
def test_write_multiple(): coordinates = np.random.randn(4, 10,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates) f.write(coordinates) with HDF5TrajectoryFile(temp) as f: assert eq(f.root.coordinates[:], np.vstack((coordinates, coordinates)))
def test_write_coordinates(): coordinates = np.random.randn(4, 10, 3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates) with HDF5TrajectoryFile(temp) as f: yield lambda: eq(f.root.coordinates[:], coordinates) yield lambda: eq(str(f.root.coordinates.attrs['units']), 'nanometers')
def test_read_slice_3(): coordinates = np.random.randn(4, 10,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, alchemicalLambda=np.arange(4)) with HDF5TrajectoryFile(temp) as f: got = f.read(stride=2, atom_indices=np.array([0,1])) assert eq(got.coordinates, coordinates[::2, [0,1], :]) assert eq(got.alchemicalLambda, np.arange(4)[::2])
def test_read_slice_2(): coordinates = np.random.randn(4, 10, 3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, alchemicalLambda=np.arange(4)) with HDF5TrajectoryFile(temp) as f: got = f.read(atom_indices=np.array([0, 1])) yield lambda: eq(got.coordinates, coordinates[:, [0, 1], :]) yield lambda: eq(got.alchemicalLambda, np.arange(4))
def test_read_slice_0(): coordinates = np.random.randn(4, 10,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, alchemicalLambda=np.array([1,2,3,4])) with HDF5TrajectoryFile(temp) as f: got = f.read(n_frames=2) assert eq(got.coordinates, coordinates[:2]) assert eq(got.velocities, None) assert eq(got.alchemicalLambda, np.array([1,2]))
def test_append(): x1 = np.random.randn(10,5,3) x2 = np.random.randn(8,5,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(x1) with HDF5TrajectoryFile(temp, 'a') as f: f.write(x2) with HDF5TrajectoryFile(temp) as f: eq(f.root.coordinates[:], np.concatenate((x1,x2)))
def test_read_0(): coordinates = np.random.randn(4, 10, 3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, alchemicalLambda=np.array([1, 2, 3, 4])) with HDF5TrajectoryFile(temp) as f: got = f.read() yield lambda: eq(got.coordinates, coordinates) yield lambda: eq(got.velocities, None) yield lambda: eq(got.alchemicalLambda, np.array([1, 2, 3, 4]))
def test_read_1(): coordinates = units.Quantity(np.random.randn(4, 10,3), units.angstroms) velocities = units.Quantity(np.random.randn(4, 10,3), units.angstroms/units.years) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, velocities=velocities) with HDF5TrajectoryFile(temp) as f: got = f.read() assert eq(got.coordinates, coordinates.value_in_unit(units.nanometers)) assert eq(got.velocities, velocities.value_in_unit(units.nanometers/units.picoseconds))
def test_read_slice_1(): coordinates = np.random.randn(4, 10,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates) with HDF5TrajectoryFile(temp) as f: got = f.read(n_frames=2) assert eq(got.coordinates, coordinates[:2]) assert eq(got.velocities, None) got = f.read(n_frames=2) assert eq(got.coordinates, coordinates[2:]) assert eq(got.velocities, None)
def test_write_units(): # simtk.units are automatically converted into MD units for storage on disk coordinates = units.Quantity(np.random.randn(4, 10,3), units.angstroms) velocities = units.Quantity(np.random.randn(4, 10,3), units.angstroms/units.year) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, velocities=velocities) with HDF5TrajectoryFile(temp) as f: assert eq(f.root.coordinates[:], coordinates.value_in_unit(units.nanometers)) assert eq(str(f.root.coordinates.attrs['units']), 'nanometers') assert eq(f.root.velocities[:], velocities.value_in_unit(units.nanometers/units.picosecond)) assert eq(str(f.root.velocities.attrs['units']), 'nanometers/picosecond')
def test_write_units2(): from mdtraj.utils import unit coordinates = unit.quantity.Quantity(np.random.randn(4, 10,3), unit.unit_definitions.angstroms) velocities = unit.quantity.Quantity(np.random.randn(4, 10,3), unit.unit_definitions.angstroms/unit.unit_definitions.year) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, velocities=velocities) with HDF5TrajectoryFile(temp) as f: assert eq(f.root.coordinates[:], coordinates.value_in_unit(unit.unit_definitions.nanometers)) assert eq(str(f.root.coordinates.attrs['units']), 'nanometers') assert eq(f.root.velocities[:], velocities.value_in_unit(unit.unit_definitions.nanometers/unit.unit_definitions.picosecond)) assert eq(str(f.root.velocities.attrs['units']), 'nanometers/picosecond')
def test_write_inconsistent_2(): coordinates = np.random.randn(4, 10, 3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates, velocities=coordinates) # we're saving a deficient set of data, since before we wrote # more information. assert_raises(ValueError, lambda: f.write(coordinates))
def test_write_inconsistent(): coordinates = np.random.randn(4, 10,3) with HDF5TrajectoryFile(temp, 'w') as f: f.write(coordinates) # since the first frames we saved didn't contain velocities, we # can't save more velocities with pytest.raises(ValueError): f.write(coordinates, velocities=coordinates)
def test_attributes(): constraints = np.zeros(10, dtype=[('atom1', np.int32), ('atom2', np.int32), ('distance', np.float32)]) with HDF5TrajectoryFile(temp, 'w') as f: f.title = 'mytitle' f.reference = 'myreference' f.forcefield = 'amber99' f.randomState = 'sdf' f.application = 'openmm' f.constraints = constraints with HDF5TrajectoryFile(temp) as g: eq(g.title, 'mytitle') eq(g.reference, 'myreference') eq(g.forcefield, 'amber99') eq(g.randomState, 'sdf') eq(g.application, 'openmm') eq(g.constraints, constraints)
def test_write_units_mismatch(): velocoties = units.Quantity(np.random.randn(4, 10,3), units.angstroms/units.picosecond) with HDF5TrajectoryFile(temp, 'w') as f: # if you try to write coordinates that are unitted and not # in the correct units, we find that with pytest.raises(TypeError): f.write(coordinates=velocoties)
def to_mdtraj_HDF5TrajectoryFile(item, atom_indices='all', structure_indices='all', check=True): if check: digest_item(item, 'file:h5') atom_indices = digest_atom_indices(atom_indices) structure_indices = digest_atom_indices(structure_indices) from mdtraj.formats import HDF5TrajectoryFile from ..mdtraj_HDF5TrajectoryFile import extract as extract_mdtraj_HDF5TrajectoryFile tmp_item = HDF5TrajectoryFile(item) tmp_item = extract_mdtraj_HDF5TrajectoryFile( tmp_item, atom_indices=atom_indices, structure_indices=structure_indices, copy_if_all=False, check=False) return tmp_item
def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): r""" Creates a (possibly featured) file reader by a number of input files and either a topology file or a featurizer. Parameters ---------- :param input_files: A single input file or a list of input files. :param topology: A topology file. If given, the featurizer argument can be None. :param featurizer: A featurizer. If given, the topology file can be None. :param chunksize: The chunk size with which the corresponding reader gets initialized. :return: Returns the reader. """ from pyemma.coordinates.data.numpy_filereader import NumPyFileReader from pyemma.coordinates.data.py_csv_reader import PyCSVReader from pyemma.coordinates.data import FeatureReader from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader # fragmented trajectories if (isinstance(input_files, (list, tuple)) and len(input_files) > 0 and any(isinstance(item, (list, tuple)) for item in input_files)): return FragmentedTrajectoryReader(input_files, topology, chunksize, featurizer) # normal trajectories if (isinstance(input_files, string_types) or (isinstance(input_files, (list, tuple)) and (any(isinstance(item, string_types) for item in input_files) or len(input_files) is 0))): reader = None # check: if single string create a one-element list if isinstance(input_files, string_types): input_list = [input_files] elif len(input_files) > 0 and all(isinstance(item, string_types) for item in input_files): input_list = input_files else: if len(input_files) is 0: raise ValueError("The passed input list should not be empty.") else: raise ValueError("The passed list did not exclusively contain strings or was a list of lists " "(fragmented trajectory).") # TODO: this does not handle suffixes like .xyz.gz (rare) _, suffix = os.path.splitext(input_list[0]) # check: do all files have the same file type? If not: raise ValueError. if all(item.endswith(suffix) for item in input_list): # do all the files exist? If not: Raise value error all_exist = True err_msg = "" for item in input_list: if not os.path.isfile(item): err_msg += "\n" if len(err_msg) > 0 else "" err_msg += "File %s did not exist or was no file" % item all_exist = False if not all_exist: raise ValueError("Some of the given input files were directories" " or did not exist:\n%s" % err_msg) if all_exist: from mdtraj.formats.registry import FormatRegistry # we need to check for h5 first, because of mdtraj custom HDF5 traj format (which is deprecated). if suffix in ['.h5', '.hdf5']: # TODO: inspect if it is a mdtraj h5 file, eg. has the given attributes try: from mdtraj.formats import HDF5TrajectoryFile HDF5TrajectoryFile(input_list[0]) reader = FeatureReader(input_list, featurizer=featurizer, topologyfile=topology, chunksize=chunksize) except: from pyemma.coordinates.data.h5_reader import H5Reader reader = H5Reader(filenames=input_files, chunk_size=chunksize, **kw) # CASE 1.1: file types are MD files elif suffix in FormatRegistry.loaders.keys(): # check: do we either have a featurizer or a topology file name? If not: raise ValueError. # create a MD reader with file names and topology if not featurizer and not topology: raise ValueError("The input files were MD files which makes it mandatory to have either a " "featurizer or a topology file.") reader = FeatureReader(input_list, featurizer=featurizer, topologyfile=topology, chunksize=chunksize) else: if suffix in ['.npy', '.npz']: reader = NumPyFileReader(input_list, chunksize=chunksize) # otherwise we assume that given files are ascii tabulated data else: reader = PyCSVReader(input_list, chunksize=chunksize, **kw) else: raise ValueError("Not all elements in the input list were of the type %s!" % suffix) else: raise ValueError("Input \"%s\" was no string or list of strings." % input) return reader
def test_reporter_subset(): tempdir = os.path.join(dir, 'test2') os.makedirs(tempdir) pdb = PDBFile(get_fn('native2.pdb')) pdb.topology.setUnitCellDimensions([2, 2, 2]) forcefield = ForceField('amber99sbildn.xml', 'amber99_obc.xml') system = forcefield.createSystem(pdb.topology, nonbondedMethod=CutoffPeriodic, nonbondedCutoff=1 * nanometers, constraints=HBonds, rigidWater=True) integrator = LangevinIntegrator(300 * kelvin, 1.0 / picoseconds, 2.0 * femtoseconds) integrator.setConstraintTolerance(0.00001) platform = Platform.getPlatformByName('Reference') simulation = Simulation(pdb.topology, system, integrator, platform) simulation.context.setPositions(pdb.positions) simulation.context.setVelocitiesToTemperature(300 * kelvin) hdf5file = os.path.join(tempdir, 'traj.h5') ncfile = os.path.join(tempdir, 'traj.nc') dcdfile = os.path.join(tempdir, 'traj.dcd') atomSubset = [0, 1, 2, 4, 5] reporter = HDF5Reporter(hdf5file, 2, coordinates=True, time=True, cell=True, potentialEnergy=True, kineticEnergy=True, temperature=True, velocities=True, atomSubset=atomSubset) reporter2 = NetCDFReporter(ncfile, 2, coordinates=True, time=True, cell=True, atomSubset=atomSubset) reporter3 = DCDReporter(dcdfile, 2, atomSubset=atomSubset) simulation.reporters.append(reporter) simulation.reporters.append(reporter2) simulation.reporters.append(reporter3) simulation.step(100) reporter.close() reporter2.close() reporter3.close() t = md.load(get_fn('native.pdb')) t.restrict_atoms(atomSubset) with HDF5TrajectoryFile(hdf5file) as f: got = f.read() eq(got.temperature.shape, (50, )) eq(got.potentialEnergy.shape, (50, )) eq(got.kineticEnergy.shape, (50, )) eq(got.coordinates.shape, (50, len(atomSubset), 3)) eq(got.velocities.shape, (50, len(atomSubset), 3)) eq(got.cell_lengths, 2 * np.ones((50, 3))) eq(got.cell_angles, 90 * np.ones((50, 3))) eq(got.time, 0.002 * 2 * (1 + np.arange(50))) assert f.topology == md.load(get_fn('native.pdb'), atom_indices=atomSubset).topology with NetCDFTrajectoryFile(ncfile) as f: xyz, time, cell_lengths, cell_angles = f.read() eq(cell_lengths, 20 * np.ones((50, 3))) eq(cell_angles, 90 * np.ones((50, 3))) eq(time, 0.002 * 2 * (1 + np.arange(50))) eq(xyz.shape, (50, len(atomSubset), 3)) hdf5_traj = md.load(hdf5file) dcd_traj = md.load(dcdfile, top=hdf5_traj) netcdf_traj = md.load(ncfile, top=hdf5_traj) # we don't have to convert units here, because md.load already handles # that eq(hdf5_traj.xyz, netcdf_traj.xyz) eq(hdf5_traj.unitcell_vectors, netcdf_traj.unitcell_vectors) eq(hdf5_traj.time, netcdf_traj.time) eq(dcd_traj.xyz, hdf5_traj.xyz) eq(dcd_traj.unitcell_vectors, hdf5_traj.unitcell_vectors)
def test_dont_overwrite(): with open(temp, 'w') as f: f.write('a') with pytest.raises(IOError): with HDF5TrajectoryFile(temp, 'w', force_overwrite=False) as f: f.write(np.random.randn(10,5,3))
def test_do_overwrite(): with open(temp, 'w') as f: f.write('a') with HDF5TrajectoryFile(temp, 'w', force_overwrite=True) as f: f.write(np.random.randn(10,5,3))
def test_reporter(): tempdir = os.path.join(dir, 'test1') os.makedirs(tempdir) pdb = PDBFile(get_fn('native.pdb')) forcefield = ForceField('amber99sbildn.xml', 'amber99_obc.xml') # NO PERIODIC BOUNARY CONDITIONS system = forcefield.createSystem(pdb.topology, nonbondedMethod=CutoffNonPeriodic, nonbondedCutoff=1.0 * nanometers, constraints=HBonds, rigidWater=True) integrator = LangevinIntegrator(300 * kelvin, 1.0 / picoseconds, 2.0 * femtoseconds) integrator.setConstraintTolerance(0.00001) platform = Platform.getPlatformByName('Reference') simulation = Simulation(pdb.topology, system, integrator, platform) simulation.context.setPositions(pdb.positions) simulation.context.setVelocitiesToTemperature(300 * kelvin) hdf5file = os.path.join(tempdir, 'traj.h5') ncfile = os.path.join(tempdir, 'traj.nc') dcdfile = os.path.join(tempdir, 'traj.dcd') reporter = HDF5Reporter(hdf5file, 2, coordinates=True, time=True, cell=True, potentialEnergy=True, kineticEnergy=True, temperature=True, velocities=True) reporter2 = NetCDFReporter(ncfile, 2, coordinates=True, time=True, cell=True) reporter3 = DCDReporter(dcdfile, 2) simulation.reporters.append(reporter) simulation.reporters.append(reporter2) simulation.reporters.append(reporter3) simulation.step(100) reporter.close() reporter2.close() with HDF5TrajectoryFile(hdf5file) as f: got = f.read() yield lambda: eq(got.temperature.shape, (50, )) yield lambda: eq(got.potentialEnergy.shape, (50, )) yield lambda: eq(got.kineticEnergy.shape, (50, )) yield lambda: eq(got.coordinates.shape, (50, 22, 3)) yield lambda: eq(got.velocities.shape, (50, 22, 3)) yield lambda: eq(got.cell_lengths, None) yield lambda: eq(got.cell_angles, None) yield lambda: eq(got.time, 0.002 * 2 * (1 + np.arange(50))) yield lambda: f.topology == md.load(get_fn('native.pdb')).top with NetCDFTrajectoryFile(ncfile) as f: xyz, time, cell_lengths, cell_angles = f.read() yield lambda: eq(cell_lengths, None) yield lambda: eq(cell_angles, None) yield lambda: eq(time, 0.002 * 2 * (1 + np.arange(50))) hdf5_traj = md.load(hdf5file) dcd_traj = md.load(dcdfile, top=get_fn('native.pdb')) netcdf_traj = md.load(ncfile, top=get_fn('native.pdb')) # we don't have to convert units here, because md.load already # handles that assert hdf5_traj.unitcell_vectors is None yield lambda: eq(hdf5_traj.xyz, netcdf_traj.xyz) yield lambda: eq(hdf5_traj.unitcell_vectors, netcdf_traj.unitcell_vectors) yield lambda: eq(hdf5_traj.time, netcdf_traj.time) yield lambda: eq(dcd_traj.xyz, hdf5_traj.xyz)