def topology(self, top): """Set the topology in the file Parameters ---------- top : mdtraj.Topology A topology object """ _check_mode(self.mode, ('w', )) if self._needs_initialization: self._initialize_headers(top.n_atoms) self._needs_initialization = False top, bonds = top.to_dataframe() data = { "AtomID": top.index.values + 1, "AtomNames": top.name.values, "ResidueNames": top.resName.values, "ChainID": top.chainID.values, "ResidueID": top.resSeq.values + 1, } for key, val in iteritems(data): node = self._get_node(where='/', name=key)[:] = val[:] node[:] = val[:]
def _typecast(self, namespace): """Work around for the argparse bug with respect to defaults and FileType not playing together nicely -- http://stackoverflow.com/questions/8236954/specifying-default-filenames-with-argparse-but-not-opening-them-on-help""" for name, type in iteritems(self.name_to_type): setattr(namespace, name, type(getattr(namespace, name))) return namespace
def assert_dict_equal(t1, t2, decimal=6): """Assert two dicts are equal. This method should actually work for any dict of numpy arrays/objects Parameters ---------- t1 : object t2 : object decimal : int Number of decimal places to check, for arrays inside the dicts """ # make sure the keys are the same eq_(list(t1.keys()), list(t2.keys())) for key, val in iteritems(t1): # compare numpy arrays using numpy.testing if isinstance(val, np.ndarray) or ('pandas' in sys.modules and isinstance(t1, pd.DataFrame)): if val.dtype.kind == 'f': # compare floats for almost equality assert_array_almost_equal(val, t2[key], decimal) else: # compare everything else (ints, bools) for absolute equality assert_array_equal(val, t2[key]) else: eq_(val, t2[key])
def topology(self, top): """Set the topology in the file Parameters ---------- top : mdtraj.Topology A topology object """ _check_mode(self.mode, ('w',)) if self._needs_initialization: self._initialize_headers(top.n_atoms) self._needs_initialization = False top, bonds = top.to_dataframe() data = { "AtomID": top.index.values + 1, "AtomNames": top.name.values, "ResidueNames": top.resName.values, "ChainID": top.chainID.values, "ResidueID": top.resSeq.values + 1, } for key, val in iteritems(data): node = self._get_node(where='/', name=key)[:] = val[:] node[:] = val[:]
def assert_dict_equal(t1, t2, decimal=6): """Assert two dicts are equal. This method should actually work for any dict of numpy arrays/objects Parameters ---------- t1 : object t2 : object decimal : int Number of decimal places to check, for arrays inside the dicts """ # make sure the keys are the same eq_(list(t1.keys()), list(t2.keys())) for key, val in iteritems(t1): # compare numpy arrays using numpy.testing if isinstance(val, np.ndarray) or ('pandas' in sys.modules and isinstance(t1, pd.DataFrame)): if val.dtype.kind == 'f': # compare floats for almost equality assert_array_almost_equal(val, t2[key], decimal) else: # compare everything else (ints, bools) for absolute equality assert_array_equal(val, t2[key]) else: eq_(val, t2[key])
def _typecast(self, namespace): """Work around for the argparse bug with respect to defaults and FileType not playing together nicely -- http://stackoverflow.com/questions/8236954/specifying-default-filenames-with-argparse-but-not-opening-them-on-help""" for name, type in iteritems(self.name_to_type): setattr(namespace, name, type(getattr(namespace, name))) return namespace
def _str_index(self): idx = self['index'] out = [] out += ['.. index:: %s' % idx.get('default','')] for section, references in iteritems(idx): if section == 'default': continue out += [' :%s: %s' % (section, ', '.join(references))] return out
def _str_index(self): idx = self['index'] out = [] out += ['.. index:: %s' % idx.get('default', '')] for section, references in iteritems(idx): if section == 'default': continue out += [' :%s: %s' % (section, ', '.join(references))] return out
def save_legacy_hdf(traj, filename): """Saves an MDTraj Trajectory as an MSMB2 lh5 file. Parameters ---------- traj : MDTraj.Trajectory Trajectory object to be saved filename : str String filename of HDF Trajectory file. """ MAXINT16 = np.iinfo(np.int16).max MAXINT32 = np.iinfo(np.int32).max DEFAULT_PRECISION = 1000 def _convert_to_lossy_integers(X, precision): """Implementation of the lossy compression used in Gromacs XTC using the pytables library. Convert 32 bit floats into 16 bit integers. These conversion functions have been optimized for memory use. Further memory reduction would require an in-place astype() operation, which one could create using ctypes.""" if np.max(X) * float(precision) < MAXINT16 and np.min(X) * float(precision) > -MAXINT16: X *= float(precision) Rounded = X.astype("int16") X /= float(precision) else: X *= float(precision) Rounded = X.astype("int32") X /= float(precision) logger.error("Data range too large for int16: try removing center of mass motion, check for 'blowing up, or just use .h5 or .xtc format.'") return(Rounded) top, bonds = traj.top.to_dataframe() data_dict = {} data_dict["AtomID"] = top.index.values + 1 data_dict["AtomNames"] = top.name.values data_dict["ResidueNames"] = top.resName.values data_dict["ChainID"] = top.chainID.values data_dict["ResidueID"] = top.resSeq.values + 1 data_dict["XYZList"] = _convert_to_lossy_integers(traj.xyz, DEFAULT_PRECISION) atom_dict = {} atom_dict["AtomID"] = tables.Int64Atom() atom_dict["AtomNames"] = tables.StringAtom(itemsize=4) atom_dict["ResidueNames"] = tables.StringAtom(itemsize=4) atom_dict["ChainID"] = tables.StringAtom(itemsize=1) atom_dict["ResidueID"] = tables.Int64Atom() atom_dict["XYZList"] = tables.Int16Atom() file_handle = tables.File(filename, 'w') for key, val in iteritems(data_dict): node = file_handle.createCArray(where='/', name=key, atom=atom_dict[key], shape=val.shape, filters=COMPRESSION) node[:] = val[:] file_handle.close()
def _delayed_init(self): import cffi ffi = cffi.FFI() # some platforms dont have all of these, especially wierd compilers or 32 bit machines nptypes = [getattr(np, name) for name in self.nptype_names if hasattr(np, name)] nptype_descr = dict([('%s%d' % (dtype.kind, dtype.itemsize), dtype) for dtype in map(np.dtype, nptypes)]) casts = {} for code, names in iteritems(self.ctypes): for name in names: casts[nptype_descr['%s%d' % (code, ffi.sizeof(name))]] = name + ' *' # casts is a dict that helps us cast numpy arrays, like # {np.float32 : 'float *', np.int32: 'int *'} self._ffi = ffi self._casts = casts
def __init__(self, traj, path, run_clone_gen, traj_len): try: os.mkdir(path + "/") except OSError: pass for (run, clone), num_gens in iteritems(run_clone_gen): try: os.mkdir(path + "/RUN%d/" % run) except OSError: pass os.mkdir(path + "/RUN%d/CLONE%d" % (run, clone)) for gen in range(num_gens): randomize_coordinates(traj, traj_len) traj.save(path + "/RUN%d/CLONE%d/frame%d.xtc" % (run, clone, gen))
def __init__(self, traj, path, run_clone_gen, traj_len): try: os.mkdir(path + "/") except OSError: pass for (run, clone), num_gens in iteritems(run_clone_gen): try: os.mkdir(path + "/RUN%d/" % run) except OSError: pass os.mkdir(path + "/RUN%d/CLONE%d" % (run, clone)) for gen in range(num_gens): randomize_coordinates(traj, traj_len) traj.save(path + "/RUN%d/CLONE%d/frame%d.xtc" % (run, clone, gen))
def _delayed_init(self): import cffi ffi = cffi.FFI() # some platforms dont have all of these, especially wierd compilers or 32 bit machines nptypes = [ getattr(np, name) for name in self.nptype_names if hasattr(np, name) ] nptype_descr = dict([('%s%d' % (dtype.kind, dtype.itemsize), dtype) for dtype in map(np.dtype, nptypes)]) casts = {} for code, names in iteritems(self.ctypes): for name in names: casts[nptype_descr['%s%d' % (code, ffi.sizeof(name))]] = name + ' *' # casts is a dict that helps us cast numpy arrays, like # {np.float32 : 'float *', np.int32: 'int *'} self._ffi = ffi self._casts = casts
def invert_assignments(assignments): """Invert an assignments array -- that is, produce a mapping from state -> traj/frame Parameters ---------- assignments : np.ndarray 2D array of MSMBuilder assignments Returns ------- inverse_mapping : collections.defaultdict Mapping from state -> traj,frame, such that inverse_mapping[s] gives the conformations assigned to state s. Notes ----- The assignments array may have -1's, which are simply placeholders we do not add these to the inverted assignments. Therefore, doing the following will raise a KeyError: >>> inv_assignments = MSMLib.invert_assignments(assignments) >>> print inv_assignments[-1] KeyError: -1 """ check_assignment_array_input(assignments) inverse_mapping = defaultdict(lambda: ([], [])) non_neg_inds = np.array(np.where(assignments != -1)).T # we do not care about -1's for (i, j) in non_neg_inds: inverse_mapping[assignments[i, j]][0].append(i) inverse_mapping[assignments[i, j]][1].append(j) # convert from lists to numpy arrays for key, (trajs, frames) in iteritems(inverse_mapping): inverse_mapping[key] = (np.array(trajs), np.array(frames)) return inverse_mapping
def assert_dict_equal(t1, t2, decimal=6): """ Assert two dicts are equal. This method should actually work for any dict of numpy arrays/objects """ # make sure the keys are the same eq_(list(t1.keys()), list(t2.keys())) for key, val in iteritems(t1): # compare numpy arrays using numpy.testing if isinstance(val, np.ndarray): if val.dtype.kind == 'f': # compare floats for almost equality assert_array_almost_equal(val, t2[key], decimal) else: # compare everything else (ints, bools) for absolute equality assert_array_equal(val, t2[key]) else: eq_(val, t2[key])
def assert_dict_equal(t1, t2, decimal=6): """ Assert two dicts are equal. This method should actually work for any dict of numpy arrays/objects """ # make sure the keys are the same eq_(list(t1.keys()), list(t2.keys())) for key, val in iteritems(t1): # compare numpy arrays using numpy.testing if isinstance(val, np.ndarray): if val.dtype.kind == 'f': # compare floats for almost equality assert_array_almost_equal(val, t2[key], decimal) else: # compare everything else (ints, bools) for absolute equality assert_array_equal(val, t2[key]) else: eq_(val, t2[key])
def main(args, verbose=True): """Run the main script. Parameters ---------- args : argparse.Namespace The collected command line arguments """ if args.atom_indices is not None: atom_indices = np.loadtxt(args.atom_indices, int) else: atom_indices = None out_x = ext(args.output) out_units = units[out_x] out_fields = fields[out_x] OutFileFormat = formats[out_x] in_x = ext(args.input[0]) InFileFormat = formats[in_x] if args.topology is not None: topology = _parse_topology(args.topology) else: topology = None if topology is not None and atom_indices is not None: topology = topology.subset(atom_indices) n_total = 0 if args.index is not None: assert len(args.input) == 1 # when chunk is None, we load up ALL of the frames. this isn't # strictly necessary, and it costs more memory, but it's ALOT # harder to get the code correct when we need to use data[start:end] # notation when all of the data isn't loaded up at once. it's easy # for hdf5 and netcdf, but for the others... assert args.chunk is None # this is the normal invocation pattern, but for PDBTrajectoryFile it's # different outfile_factory = functools.partial(OutFileFormat, args.output, 'w', force_overwrite=args.force) with outfile_factory() as outfile: for fn in args.input: assert in_x == ext(fn) with InFileFormat(fn, 'r') as infile: while True: data, in_units, n_frames = read(infile, args.chunk, stride=args.stride, atom_indices=atom_indices) if n_frames == 0: break if topology is not None: # if the user supplied a topology, we should probably # do some simple checks if data['xyz'].shape[1] != topology._numAtoms: warnings.warn('sdsfsd!!!!') data['topology'] = topology # if they want a specific set of frames, get those # with slice notation if args.index is not None: _data = {} for k, v in iteritems(data): if isinstance(v, np.ndarray): # we don't want the dimensionality to go deficient if isinstance(args.index, int): _data[k] = v[np.newaxis, args.index] else: _data[k] = v[args.index] elif isinstance(v, md.Topology): _data[k] = v else: raise RuntineError() data = _data print(list(data.keys())) n_frames = len(data['xyz']) convert(data, in_units, out_units, out_fields) write(outfile, data) n_total += n_frames if verbose: sys.stdout.write('\rconverted %d frames, %d atoms' % (n_total, data['xyz'].shape[1])) sys.stdout.flush() if verbose: print(' ')
def saveh(file, *args, **kwargs): """Save several numpy arrays into a single file in compressed ``.hdf`` format. If arguments are passed in with no keywords, the corresponding variable names, in the ``.hdf`` file, are 'arr_0', 'arr_1', etc. If keyword arguments are given, the corresponding variable names, in the ``.hdf`` file will match the keyword names. Parameters ---------- file : str or tables.File Either the file name (string) or an open pytables file (file-like object opened with tables.openFile(...)) where the data will be saved. args : Arguments, optional Arrays to save to the file. Since it is not possible for Python to know the names of the arrays outside `savez`, the arrays will be saved with names "arr_0", "arr_1", and so on. These arguments can be any expression. kwds : Keyword arguments, optional Arrays to save to the file. Arrays will be saved in the file with the keyword names. Notes ----- `saveh` will overwrite files by default. If you have an hdf5 that contains the arrays `arr_0` and `arr_1` and you attempt to save a new array `x`, it will go in side by side. But if you save a new `arr_0`, it will overwrite your previous array. Returns ------- None Raises ------ TypeError When arrays are of an unsupported type See Also -------- numpy.savez : Save several arrays into a single file in uncompressed ``.npz`` format. """ if isinstance(file, basestring): if TABLES2: handle = tables.openFile(file, 'a') else: handle = tables.open_file(file, 'a') own_fid = True else: if not isinstance(file, tables.File): raise TypeError('file must be either a string ' 'or an open tables.File: %s' % file) handle = file own_fid = False # name all the arrays namedict = kwargs for i, val in enumerate(args): key = 'arr_%d' % i if key in namedict.keys(): if own_fid: handle.close() raise ValueError('Cannot use un-named variables ' ' and keyword %s' % key) namedict[key] = val # ensure that they don't already exist if TABLES2: current_nodes = [e.name for e in handle.listNodes(where='/')] else: current_nodes = [e.name for e in handle.list_nodes(where='/')] for key in namedict.keys(): if key in current_nodes: if TABLES2: handle.removeNode('/', name=key) else: handle.remove_node('/', name=key) # per discussion on github, https://github.com/rmcgibbo/mdtraj/issues/5 # silent overwriting appears to be the desired functionality # raise IOError('Array already exists in file: %s' % key) # save all the arrays try: for key, val in iteritems(namedict): if not isinstance(val, np.ndarray): raise TypeError('Only numpy arrays can ' 'be saved: type(%s) is %s' % (key, type(val))) try: atom = tables.Atom.from_dtype(val.dtype) except ValueError: raise TypeError('Arrays of this dtype ' 'cannot be saved: %s' % val.dtype) if TABLES2: node = handle.createCArray(where='/', name=key, atom=atom, shape=val.shape, filters=COMPRESSION) else: node = handle.create_carray(where='/', name=key, atom=atom, shape=val.shape, filters=COMPRESSION) node[:] = val except Exception: handle.close() if own_fid: os.unlink(file) raise handle.flush() if own_fid: handle.close()
def main(args, verbose=True): """Run the main script. Parameters ---------- args : argparse.Namespace The collected commandline arguments """ if args.atom_indices is not None: atom_indices = np.loadtxt(args.atom_indices, int) else: atom_indices = None out_x = ext(args.output) out_units = units[out_x] out_fields = fields[out_x] OutFileFormat = formats[out_x] in_x = ext(args.input[0]) InFileFormat = formats[in_x] if args.topology is not None: topology = md.PDBTrajectoryFile(args.topology).topology else: topology = None if topology is not None and atom_indices is not None: topology = topology.subset(atom_indices) n_total = 0 if args.index is not None: assert len(args.input) == 1 # when chunk is None, we load up ALL of the frames. this isn't # strictly necessary, and it costs more memory, but it's ALOT # harder to get the code correct when we need to use data[start:end] # notation when all of the data isn't loaded up at once. it's easy # for hdf5 and netcdf, but for the others... assert args.chunk is None # this is the normal invocation pattern, but for PDBTrajectoryFile it's # different outfile_factory = functools.partial(OutFileFormat, args.output, 'w', force_overwrite=args.force) with outfile_factory() as outfile: for fn in args.input: assert in_x == ext(fn) with InFileFormat(fn, 'r') as infile: while True: data, in_units, n_frames = read(infile, args.chunk, stride=args.stride, atom_indices=atom_indices) if n_frames == 0: break if topology is not None: # if the user supplied a topology, we should probably # do some simple checks if data['xyz'].shape[1] != topology._numAtoms: warnings.warn('sdsfsd!!!!') data['topology'] = topology # if they want a specific set of frames, get those # with slice notation if args.index is not None: _data = {} for k, v in iteritems(data): if isinstance(v, np.ndarray): # we don't want the dimensionality to go deficient if isinstance(args.index, int): _data[k] = v[np.newaxis, args.index] else: _data[k] = v[args.index] elif isinstance(v, md.Topology): _data[k] = v else: raise RuntineError() data = _data print(list(data.keys())) n_frames = len(data['xyz']) convert(data, in_units, out_units, out_fields) write(outfile, data) n_total += n_frames if verbose: sys.stdout.write('\rconverted %d frames, %d atoms' % (n_total, data['xyz'].shape[1])) sys.stdout.flush() if verbose: print(' ')
def saveh(file, *args, **kwargs): """Save several numpy arrays into a single file in compressed ``.hdf`` format. If arguments are passed in with no keywords, the corresponding variable names, in the ``.hdf`` file, are 'arr_0', 'arr_1', etc. If keyword arguments are given, the corresponding variable names, in the ``.hdf`` file will match the keyword names. Parameters ---------- file : str or tables.File Either the file name (string) or an open pytables file (file-like object opened with tables.openFile(...)) where the data will be saved. args : Arguments, optional Arrays to save to the file. Since it is not possible for Python to know the names of the arrays outside `savez`, the arrays will be saved with names "arr_0", "arr_1", and so on. These arguments can be any expression. kwds : Keyword arguments, optional Arrays to save to the file. Arrays will be saved in the file with the keyword names. Notes ----- `saveh` will overwrite files by default. If you have an hdf5 that contains the arrays `arr_0` and `arr_1` and you attempt to save a new array `x`, it will go in side by side. But if you save a new `arr_0`, it will overwrite your previous array. Returns ------- None Raises ------ TypeError When arrays are of an unsupported type See Also -------- numpy.savez : Save several arrays into a single file in uncompressed ``.npz`` format. """ if isinstance(file, basestring): handle = tables.openFile(file, 'a') own_fid = True else: if not isinstance(file, tables.File): raise TypeError('file must be either a string ' 'or an open tables.File: %s' % file) handle = file own_fid = False # name all the arrays namedict = kwargs for i, val in enumerate(args): key = 'arr_%d' % i if key in namedict.keys(): if own_fid: handle.close() raise ValueError('Cannot use un-named variables ' ' and keyword %s' % key) namedict[key] = val # ensure that they don't already exist current_nodes = [e.name for e in handle.listNodes(where='/')] for key in namedict.keys(): if key in current_nodes: handle.removeNode('/', name=key) # per discussion on github, https://github.com/rmcgibbo/mdtraj/issues/5 # silent overwriting appears to be the desired functionality # raise IOError('Array already exists in file: %s' % key) # save all the arrays try: for key, val in iteritems(namedict): if not isinstance(val, np.ndarray): raise TypeError('Only numpy arrays can ' 'be saved: type(%s) is %s' % (key, type(val))) try: atom = tables.Atom.from_dtype(val.dtype) except ValueError: raise TypeError('Arrays of this dtype ' 'cannot be saved: %s' % val.dtype) node = handle.createCArray(where='/', name=key, atom=atom, shape=val.shape, filters=COMPRESSION) node[:] = val except Exception: handle.close() if own_fid: os.unlink(file) raise handle.flush() if own_fid: handle.close()