Beispiel #1
0
    def topology(self, top):
        """Set the topology in the file

        Parameters
        ----------
        top : mdtraj.Topology
            A topology object
        """
        _check_mode(self.mode, ('w', ))

        if self._needs_initialization:
            self._initialize_headers(top.n_atoms)
            self._needs_initialization = False

        top, bonds = top.to_dataframe()

        data = {
            "AtomID": top.index.values + 1,
            "AtomNames": top.name.values,
            "ResidueNames": top.resName.values,
            "ChainID": top.chainID.values,
            "ResidueID": top.resSeq.values + 1,
        }
        for key, val in iteritems(data):
            node = self._get_node(where='/', name=key)[:] = val[:]
            node[:] = val[:]
Beispiel #2
0
    def _typecast(self, namespace):
        """Work around for the argparse bug with respect to defaults and FileType not
        playing together nicely -- http://stackoverflow.com/questions/8236954/specifying-default-filenames-with-argparse-but-not-opening-them-on-help"""
        for name, type in iteritems(self.name_to_type):
            setattr(namespace, name, type(getattr(namespace, name)))

        return namespace
Beispiel #3
0
def assert_dict_equal(t1, t2, decimal=6):
    """Assert two dicts are equal. This method should actually
    work for any dict of numpy arrays/objects

    Parameters
    ----------
    t1 : object
    t2 : object
    decimal : int
        Number of decimal places to check, for arrays inside the dicts
    """

    # make sure the keys are the same
    eq_(list(t1.keys()), list(t2.keys()))

    for key, val in iteritems(t1):
        # compare numpy arrays using numpy.testing
        if isinstance(val, np.ndarray) or ('pandas' in sys.modules and isinstance(t1, pd.DataFrame)):
            if val.dtype.kind == 'f':
                # compare floats for almost equality
                assert_array_almost_equal(val, t2[key], decimal)
            else:
                # compare everything else (ints, bools) for absolute equality
                assert_array_equal(val, t2[key])
        else:
            eq_(val, t2[key])
Beispiel #4
0
    def topology(self, top):
        """Set the topology in the file

        Parameters
        ----------
        top : mdtraj.Topology
            A topology object
        """
        _check_mode(self.mode, ('w',))

        if self._needs_initialization:
            self._initialize_headers(top.n_atoms)
            self._needs_initialization = False

        top, bonds = top.to_dataframe()

        data = {
            "AtomID": top.index.values + 1,
            "AtomNames": top.name.values,
            "ResidueNames": top.resName.values,
            "ChainID": top.chainID.values,
            "ResidueID": top.resSeq.values + 1,
        }
        for key, val in iteritems(data):
            node = self._get_node(where='/', name=key)[:] = val[:]
            node[:] = val[:]
Beispiel #5
0
def assert_dict_equal(t1, t2, decimal=6):
    """Assert two dicts are equal. This method should actually
    work for any dict of numpy arrays/objects

    Parameters
    ----------
    t1 : object
    t2 : object
    decimal : int
        Number of decimal places to check, for arrays inside the dicts
    """

    # make sure the keys are the same
    eq_(list(t1.keys()), list(t2.keys()))

    for key, val in iteritems(t1):
        # compare numpy arrays using numpy.testing
        if isinstance(val, np.ndarray) or ('pandas' in sys.modules and isinstance(t1, pd.DataFrame)):
            if val.dtype.kind ==  'f':
                # compare floats for almost equality
                assert_array_almost_equal(val, t2[key], decimal)
            else:
                # compare everything else (ints, bools) for absolute equality
                assert_array_equal(val, t2[key])
        else:
            eq_(val, t2[key])
    def _typecast(self, namespace):
        """Work around for the argparse bug with respect to defaults and FileType not
        playing together nicely -- http://stackoverflow.com/questions/8236954/specifying-default-filenames-with-argparse-but-not-opening-them-on-help"""
        for name, type in iteritems(self.name_to_type):
            setattr(namespace, name, type(getattr(namespace, name)))

        return namespace
Beispiel #7
0
 def _str_index(self):
     idx = self['index']
     out = []
     out += ['.. index:: %s' % idx.get('default','')]
     for section, references in iteritems(idx):
         if section == 'default':
             continue
         out += ['   :%s: %s' % (section, ', '.join(references))]
     return out
Beispiel #8
0
 def _str_index(self):
     idx = self['index']
     out = []
     out += ['.. index:: %s' % idx.get('default', '')]
     for section, references in iteritems(idx):
         if section == 'default':
             continue
         out += ['   :%s: %s' % (section, ', '.join(references))]
     return out
Beispiel #9
0
def save_legacy_hdf(traj, filename):
    """Saves an MDTraj Trajectory as an MSMB2 lh5 file.

    Parameters
    ----------
    traj : MDTraj.Trajectory
        Trajectory object to be saved
    filename : str
        String filename of HDF Trajectory file.
    """

    MAXINT16 = np.iinfo(np.int16).max
    MAXINT32 = np.iinfo(np.int32).max
    DEFAULT_PRECISION = 1000

    def _convert_to_lossy_integers(X, precision):
        """Implementation of the lossy compression used in Gromacs XTC using the pytables library.  Convert 32 bit floats into 16 bit integers.  These conversion functions have been optimized for memory use.  Further memory reduction would require an in-place astype() operation, which one could create using ctypes."""
        if np.max(X) * float(precision) < MAXINT16 and np.min(X) * float(precision) > -MAXINT16:
            X *= float(precision)
            Rounded = X.astype("int16")
            X /= float(precision)
        else:
            X *= float(precision)
            Rounded = X.astype("int32")
            X /= float(precision)
            logger.error("Data range too large for int16: try removing center of mass motion, check for 'blowing up, or just use .h5 or .xtc format.'")
        return(Rounded)


    top, bonds = traj.top.to_dataframe()

    data_dict = {}
    data_dict["AtomID"] = top.index.values + 1
    data_dict["AtomNames"] = top.name.values
    data_dict["ResidueNames"] = top.resName.values
    data_dict["ChainID"] = top.chainID.values
    data_dict["ResidueID"] = top.resSeq.values + 1
    data_dict["XYZList"] = _convert_to_lossy_integers(traj.xyz, DEFAULT_PRECISION)

    atom_dict = {}
    atom_dict["AtomID"] = tables.Int64Atom()
    atom_dict["AtomNames"] = tables.StringAtom(itemsize=4)
    atom_dict["ResidueNames"] = tables.StringAtom(itemsize=4)
    atom_dict["ChainID"] = tables.StringAtom(itemsize=1)
    atom_dict["ResidueID"] = tables.Int64Atom()
    atom_dict["XYZList"] = tables.Int16Atom()

    file_handle = tables.File(filename, 'w')

    for key, val in iteritems(data_dict):
        node = file_handle.createCArray(where='/', name=key, atom=atom_dict[key], shape=val.shape, filters=COMPRESSION)
        node[:] = val[:]

    file_handle.close()
Beispiel #10
0
    def _delayed_init(self):
        import cffi
        ffi = cffi.FFI()
        # some platforms dont have all of these, especially wierd compilers or 32 bit machines
        nptypes = [getattr(np, name) for name in self.nptype_names if hasattr(np, name)]
        nptype_descr = dict([('%s%d' % (dtype.kind, dtype.itemsize), dtype) for dtype in map(np.dtype, nptypes)])

        casts = {}
        for code, names in iteritems(self.ctypes):
            for name in names:
                casts[nptype_descr['%s%d' % (code, ffi.sizeof(name))]] = name + ' *'
        # casts is a dict that helps us cast numpy arrays, like
        # {np.float32 : 'float *', np.int32: 'int *'}
        self._ffi = ffi
        self._casts = casts
Beispiel #11
0
    def __init__(self, traj, path, run_clone_gen, traj_len):

        try:
            os.mkdir(path + "/")
        except OSError:
            pass

        for (run, clone), num_gens in iteritems(run_clone_gen):

            try:
                os.mkdir(path + "/RUN%d/" % run)
            except OSError:
                pass

            os.mkdir(path + "/RUN%d/CLONE%d" % (run, clone))
            for gen in range(num_gens):
                randomize_coordinates(traj, traj_len)
                traj.save(path + "/RUN%d/CLONE%d/frame%d.xtc" % (run, clone, gen))
    def __init__(self, traj, path, run_clone_gen, traj_len):

        try:
            os.mkdir(path + "/")
        except OSError:
            pass

        for (run, clone), num_gens in iteritems(run_clone_gen):

            try:
                os.mkdir(path + "/RUN%d/" % run)
            except OSError:
                pass

            os.mkdir(path + "/RUN%d/CLONE%d" % (run, clone))
            for gen in range(num_gens):
                randomize_coordinates(traj, traj_len)
                traj.save(path + "/RUN%d/CLONE%d/frame%d.xtc" %
                          (run, clone, gen))
Beispiel #13
0
    def _delayed_init(self):
        import cffi
        ffi = cffi.FFI()
        # some platforms dont have all of these, especially wierd compilers or 32 bit machines
        nptypes = [
            getattr(np, name) for name in self.nptype_names
            if hasattr(np, name)
        ]
        nptype_descr = dict([('%s%d' % (dtype.kind, dtype.itemsize), dtype)
                             for dtype in map(np.dtype, nptypes)])

        casts = {}
        for code, names in iteritems(self.ctypes):
            for name in names:
                casts[nptype_descr['%s%d' %
                                   (code, ffi.sizeof(name))]] = name + ' *'
        # casts is a dict that helps us cast numpy arrays, like
        # {np.float32 : 'float *', np.int32: 'int *'}
        self._ffi = ffi
        self._casts = casts
Beispiel #14
0
def invert_assignments(assignments):
    """Invert an assignments array -- that is, produce a mapping
    from state -> traj/frame

    Parameters
    ----------
    assignments : np.ndarray
        2D array of MSMBuilder assignments

    Returns
    -------
    inverse_mapping : collections.defaultdict
        Mapping from state -> traj,frame, such that inverse_mapping[s]
        gives the conformations assigned to state s.

    Notes
    -----
    The assignments array may have -1's, which are simply placeholders
        we do not add these to the inverted assignments. Therefore, doing
        the following will raise a KeyError:

        >>> inv_assignments = MSMLib.invert_assignments(assignments)
        >>> print inv_assignments[-1]
        KeyError: -1
    """

    check_assignment_array_input(assignments)

    inverse_mapping = defaultdict(lambda: ([], []))
    non_neg_inds = np.array(np.where(assignments != -1)).T
    # we do not care about -1's

    for (i, j) in non_neg_inds:
        inverse_mapping[assignments[i, j]][0].append(i)
        inverse_mapping[assignments[i, j]][1].append(j)

    # convert from lists to numpy arrays
    for key, (trajs, frames) in iteritems(inverse_mapping):
        inverse_mapping[key] = (np.array(trajs), np.array(frames))

    return inverse_mapping
Beispiel #15
0
def assert_dict_equal(t1, t2, decimal=6):
    """
    Assert two dicts are equal.
    This method should actually
    work for any dict of numpy arrays/objects
    """

    # make sure the keys are the same
    eq_(list(t1.keys()), list(t2.keys()))

    for key, val in iteritems(t1):
        # compare numpy arrays using numpy.testing
        if isinstance(val, np.ndarray):
            if val.dtype.kind == 'f':
                # compare floats for almost equality
                assert_array_almost_equal(val, t2[key], decimal)
            else:
                # compare everything else (ints, bools) for absolute equality
                assert_array_equal(val, t2[key])
        else:
            eq_(val, t2[key])
Beispiel #16
0
def assert_dict_equal(t1, t2, decimal=6):
    """
    Assert two dicts are equal.
    This method should actually
    work for any dict of numpy arrays/objects
    """

    # make sure the keys are the same
    eq_(list(t1.keys()), list(t2.keys()))

    for key, val in iteritems(t1):
        # compare numpy arrays using numpy.testing
        if isinstance(val, np.ndarray):
            if val.dtype.kind == 'f':
                # compare floats for almost equality
                assert_array_almost_equal(val, t2[key], decimal)
            else:
                # compare everything else (ints, bools) for absolute equality
                assert_array_equal(val, t2[key])
        else:
            eq_(val, t2[key])
def main(args, verbose=True):
    """Run the main script.

    Parameters
    ----------
    args : argparse.Namespace
        The collected command line arguments
    """
    if args.atom_indices is not None:
        atom_indices = np.loadtxt(args.atom_indices, int)
    else:
        atom_indices = None

    out_x = ext(args.output)
    out_units = units[out_x]
    out_fields = fields[out_x]
    OutFileFormat = formats[out_x]

    in_x = ext(args.input[0])
    InFileFormat = formats[in_x]

    if args.topology is not None:
        topology = _parse_topology(args.topology)
    else:
        topology = None

    if topology is not None and atom_indices is not None:
        topology = topology.subset(atom_indices)

    n_total = 0
    if args.index is not None:
        assert len(args.input) == 1
        # when chunk is None, we load up ALL of the frames. this isn't
        # strictly necessary, and it costs more memory, but it's ALOT
        # harder to get the code correct when we need to use data[start:end]
        # notation when all of the data isn't loaded up at once. it's easy
        # for hdf5 and netcdf, but for the others...
        assert args.chunk is None

    # this is the normal invocation pattern, but for PDBTrajectoryFile it's
    # different
    outfile_factory = functools.partial(OutFileFormat, args.output, 'w',
                        force_overwrite=args.force)

    with outfile_factory() as outfile:
        for fn in args.input:
            assert in_x == ext(fn)
            with InFileFormat(fn, 'r') as infile:

                while True:
                    data, in_units, n_frames = read(infile, args.chunk, stride=args.stride,
                                                    atom_indices=atom_indices)
                    if n_frames == 0:
                        break

                    if topology is not None:
                        # if the user supplied a topology, we should probably
                        # do some simple checks
                        if data['xyz'].shape[1] != topology._numAtoms:
                            warnings.warn('sdsfsd!!!!')
                        data['topology'] = topology

                    # if they want a specific set of frames, get those
                    # with slice notation
                    if args.index is not None:
                        _data = {}
                        for k, v in iteritems(data):
                            if isinstance(v, np.ndarray):
                                # we don't want the dimensionality to go deficient
                                if isinstance(args.index, int):
                                    _data[k] = v[np.newaxis, args.index]
                                else:
                                    _data[k] = v[args.index]
                            elif isinstance(v, md.Topology):
                                _data[k] = v
                            else:
                                raise RuntineError()
                        data = _data
                        print(list(data.keys()))
                        n_frames = len(data['xyz'])

                    convert(data, in_units, out_units, out_fields)
                    write(outfile, data)
                    n_total += n_frames

                    if verbose:
                        sys.stdout.write('\rconverted %d frames, %d atoms' % (n_total, data['xyz'].shape[1]))
                        sys.stdout.flush()

    if verbose:
        print(' ')
Beispiel #18
0
def saveh(file, *args, **kwargs):
    """Save several numpy arrays into a single file in compressed ``.hdf`` format.

    If arguments are passed in with no keywords, the corresponding variable
    names, in the ``.hdf`` file, are 'arr_0', 'arr_1', etc. If keyword arguments
    are given, the corresponding variable names, in the ``.hdf`` file will
    match the keyword names.

    Parameters
    ----------
    file : str or tables.File
        Either the file name (string) or an open pytables file
        (file-like object opened with tables.openFile(...))
        where the data will be saved.
    args : Arguments, optional
        Arrays to save to the file. Since it is not possible for Python to
        know the names of the arrays outside `savez`, the arrays will be saved
        with names "arr_0", "arr_1", and so on. These arguments can be any
        expression.
    kwds : Keyword arguments, optional
        Arrays to save to the file. Arrays will be saved in the file with the
        keyword names.

    Notes
    -----
    `saveh` will overwrite files by default. If you have an hdf5 that contains the
    arrays `arr_0` and `arr_1` and you attempt to save a new array `x`, it will
    go in side by side. But if you save a new `arr_0`, it will overwrite your
    previous array.

    Returns
    -------
    None

    Raises
    ------
    TypeError
        When arrays are of an unsupported type

    See Also
    --------
    numpy.savez : Save several arrays into a single file in uncompressed ``.npz`` format.
    """

    if isinstance(file, basestring):
        if TABLES2:
            handle = tables.openFile(file, 'a')
        else:
            handle = tables.open_file(file, 'a')
        own_fid = True
    else:
        if not isinstance(file, tables.File):
            raise TypeError('file must be either a string '
                            'or an open tables.File: %s' % file)
        handle = file
        own_fid = False

    # name all the arrays
    namedict = kwargs
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            if own_fid:
                handle.close()
            raise ValueError('Cannot use un-named variables '
                             ' and keyword %s' % key)
        namedict[key] = val

    # ensure that they don't already exist
    if TABLES2:
        current_nodes = [e.name for e in handle.listNodes(where='/')]
    else:
        current_nodes = [e.name for e in handle.list_nodes(where='/')]

    for key in namedict.keys():
        if key in current_nodes:
            if TABLES2:
                handle.removeNode('/', name=key)
            else:
                handle.remove_node('/', name=key)
            # per discussion on github, https://github.com/rmcgibbo/mdtraj/issues/5
            # silent overwriting appears to be the desired functionality
            # raise IOError('Array already exists in file: %s' % key)

    # save all the arrays
    try:
        for key, val in iteritems(namedict):
            if not isinstance(val, np.ndarray):
                raise TypeError('Only numpy arrays can '
                                'be saved: type(%s) is %s' % (key, type(val)))
            try:
                atom = tables.Atom.from_dtype(val.dtype)
            except ValueError:
                raise TypeError('Arrays of this dtype '
                                'cannot be saved: %s' % val.dtype)

            if TABLES2:
                node = handle.createCArray(where='/',
                                           name=key,
                                           atom=atom,
                                           shape=val.shape,
                                           filters=COMPRESSION)
            else:
                node = handle.create_carray(where='/',
                                            name=key,
                                            atom=atom,
                                            shape=val.shape,
                                            filters=COMPRESSION)

            node[:] = val

    except Exception:
        handle.close()
        if own_fid:
            os.unlink(file)
        raise

    handle.flush()
    if own_fid:
        handle.close()
Beispiel #19
0
def main(args, verbose=True):
    """Run the main script.

    Parameters
    ----------
    args : argparse.Namespace
        The collected commandline arguments
    """
    if args.atom_indices is not None:
        atom_indices = np.loadtxt(args.atom_indices, int)
    else:
        atom_indices = None

    out_x = ext(args.output)
    out_units = units[out_x]
    out_fields = fields[out_x]
    OutFileFormat = formats[out_x]

    in_x = ext(args.input[0])
    InFileFormat = formats[in_x]

    if args.topology is not None:
        topology = md.PDBTrajectoryFile(args.topology).topology
    else:
        topology = None

    if topology is not None and atom_indices is not None:
        topology = topology.subset(atom_indices)

    n_total = 0
    if args.index is not None:
        assert len(args.input) == 1
        # when chunk is None, we load up ALL of the frames. this isn't
        # strictly necessary, and it costs more memory, but it's ALOT
        # harder to get the code correct when we need to use data[start:end]
        # notation when all of the data isn't loaded up at once. it's easy
        # for hdf5 and netcdf, but for the others...
        assert args.chunk is None

    # this is the normal invocation pattern, but for PDBTrajectoryFile it's
    # different
    outfile_factory = functools.partial(OutFileFormat,
                                        args.output,
                                        'w',
                                        force_overwrite=args.force)

    with outfile_factory() as outfile:
        for fn in args.input:
            assert in_x == ext(fn)
            with InFileFormat(fn, 'r') as infile:

                while True:
                    data, in_units, n_frames = read(infile,
                                                    args.chunk,
                                                    stride=args.stride,
                                                    atom_indices=atom_indices)
                    if n_frames == 0:
                        break

                    if topology is not None:
                        # if the user supplied a topology, we should probably
                        # do some simple checks
                        if data['xyz'].shape[1] != topology._numAtoms:
                            warnings.warn('sdsfsd!!!!')
                        data['topology'] = topology

                    # if they want a specific set of frames, get those
                    # with slice notation
                    if args.index is not None:
                        _data = {}
                        for k, v in iteritems(data):
                            if isinstance(v, np.ndarray):
                                # we don't want the dimensionality to go deficient
                                if isinstance(args.index, int):
                                    _data[k] = v[np.newaxis, args.index]
                                else:
                                    _data[k] = v[args.index]
                            elif isinstance(v, md.Topology):
                                _data[k] = v
                            else:
                                raise RuntineError()
                        data = _data
                        print(list(data.keys()))
                        n_frames = len(data['xyz'])

                    convert(data, in_units, out_units, out_fields)
                    write(outfile, data)
                    n_total += n_frames

                    if verbose:
                        sys.stdout.write('\rconverted %d frames, %d atoms' %
                                         (n_total, data['xyz'].shape[1]))
                        sys.stdout.flush()

    if verbose:
        print(' ')
Beispiel #20
0
def saveh(file, *args, **kwargs):
    """Save several numpy arrays into a single file in compressed ``.hdf`` format.

    If arguments are passed in with no keywords, the corresponding variable
    names, in the ``.hdf`` file, are 'arr_0', 'arr_1', etc. If keyword arguments
    are given, the corresponding variable names, in the ``.hdf`` file will
    match the keyword names.

    Parameters
    ----------
    file : str or tables.File
        Either the file name (string) or an open pytables file
        (file-like object opened with tables.openFile(...))
        where the data will be saved.
    args : Arguments, optional
        Arrays to save to the file. Since it is not possible for Python to
        know the names of the arrays outside `savez`, the arrays will be saved
        with names "arr_0", "arr_1", and so on. These arguments can be any
        expression.
    kwds : Keyword arguments, optional
        Arrays to save to the file. Arrays will be saved in the file with the
        keyword names.

    Notes
    -----
    `saveh` will overwrite files by default. If you have an hdf5 that contains the
    arrays `arr_0` and `arr_1` and you attempt to save a new array `x`, it will
    go in side by side. But if you save a new `arr_0`, it will overwrite your
    previous array.

    Returns
    -------
    None

    Raises
    ------
    TypeError
        When arrays are of an unsupported type

    See Also
    --------
    numpy.savez : Save several arrays into a single file in uncompressed ``.npz`` format.
    """


    if isinstance(file, basestring):
        handle = tables.openFile(file, 'a')
        own_fid = True
    else:
        if not isinstance(file, tables.File):
            raise TypeError('file must be either a string '
                'or an open tables.File: %s' % file)
        handle = file
        own_fid = False

    # name all the arrays
    namedict = kwargs
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            if own_fid:
                handle.close()
            raise ValueError('Cannot use un-named variables '
                ' and keyword %s' % key)
        namedict[key] = val

    # ensure that they don't already exist
    current_nodes = [e.name for e in handle.listNodes(where='/')]
    for key in namedict.keys():
        if key in current_nodes:
            handle.removeNode('/', name=key)
            # per discussion on github, https://github.com/rmcgibbo/mdtraj/issues/5
            # silent overwriting appears to be the desired functionality
            # raise IOError('Array already exists in file: %s' % key)

    # save all the arrays
    try:
        for key, val in iteritems(namedict):
            if not isinstance(val, np.ndarray):
                raise TypeError('Only numpy arrays can '
                    'be saved: type(%s) is %s' % (key, type(val)))
            try:
                atom = tables.Atom.from_dtype(val.dtype)
            except ValueError:
                raise TypeError('Arrays of this dtype '
                    'cannot be saved: %s' % val.dtype)

            node = handle.createCArray(where='/', name=key,
                atom=atom, shape=val.shape, filters=COMPRESSION)
            node[:] = val

    except Exception:
        handle.close()
        if own_fid:
            os.unlink(file)
        raise

    handle.flush()
    if own_fid:
        handle.close()