Example #1
0
def main():
    """Parse command line inputs, load up files, then call run() and save() to do
    the real work"""
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    # load...
    # project
    project = Project.load_from(args.project)

    # assignments
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    # states
    if -1 in args.states:
        states = np.unique(assigments[np.where(assignments != -1)])
        logger.info('Yanking from all %d states', len(states))
    else:
        # ensure that the states are sorted, and that they're unique -- you
        # can only request each state once
        states = np.unique(args.states)
        logger.info("Yanking from the following states: %s", states)

    # extract the conformations using np.random for the randomness
    confs_by_state = project.get_random_confs_from_states(
        assignments, states=states, num_confs=args.conformations_per_state,
        replacement=args.replacement)

    # save the conformations to disk, in the requested style
    save(confs_by_state=confs_by_state, states=states, style=args.style,
         format=args.format, outdir=args.output_dir)
Example #2
0
def test_overwrite_1():
    fid, fn = tempfile.mkstemp()
    try:
        a = np.arange(10)
        b = a + 1
        io.saveh(fn, a=a)
        io.saveh(fn, b=b)
        eq(io.loadh(fn, 'a'), a)
        eq(io.loadh(fn, 'b'), b)
    except:
        raise
    finally:
        if os.path.exists(fn):
            os.close(fid)
            os.unlink(fn)
Example #3
0
File: cca.py Project: schwancr/cca
    def load(cls, filename):
        """
        Load a previously saved CCA object

        Parameters
        ----------
        filename : str
            filename to load data from

        Returns
        -------
        cca_object : CCA
            loaded cca_object
        """
            
        filehandler = io.loadh(filename)

        regularizer = pickle.loads(filehandler['regularizer'][0])
        eta = filehandler['eta'][0]

        cca_object = cls(regularization=regularizer, regularization_strength=eta)

        if 'sol' in filehandler.keys():
            cca_object.v = filehandler['sol']
            cca_object._has_solution = True

        return cca_object
Example #4
0
def test_read_stride(get_fn):
    # Read a binpos with stride=3
    fn_binpos = get_fn('frame0.binpos')
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(stride=3)
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    assert eq(xyz, xyz2[::3])
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('assignments', default='Macro4/MacroAssignments.h5', help='Path to an assignments file. (default=Macro4/MacroAssignments.h5)')
    parser.add_argument('--project', default='ProjectInfo.yaml', help='Path to ProjectInfo.yaml file. (default=ProjectInfo.yaml)')
    args = parser.parse_args()

    project = Project.load_from(args.project)
    t = reduce(operator.add, (project.load_traj(i) for i in range(project.n_trajs)))

    phi_angles = md.compute_dihedrals(t, [PHI_INDICES]) * 180.0 / np.pi
    psi_angles = md.compute_dihedrals(t, [PSI_INDICES]) * 180.0 / np.pi
    state_index = np.hstack(io.loadh(args.assignments)['arr_0'])

    for i in np.unique(state_index):
        pp.plot(phi_angles[np.where(state_index == i)],
                psi_angles[np.where(state_index == i)],
                'x', label='State %d' % i)


    pp.title("Alanine Dipeptide Macrostates")
    pp.xlabel(r"$\phi$")
    pp.ylabel(r"$\psi$")
    annotate()

    pp.legend(loc=1, labelspacing=0.075, prop={'size': 8.0}, scatterpoints=1,
              markerscale=0.5, numpoints=1)
    pp.xlim([-180, 180])
    pp.ylim([-180, 180])
    pp.show()
Example #6
0
def test_read_stride_2(get_fn):
    # Read a binpos with stride=3 when n_frames is supplied (different code path)
    fn_binpos = get_fn('frame0.binpos')
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(n_frames=1000, stride=3)
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    assert eq(xyz, xyz2[::3])
Example #7
0
def test_read_atom_indices_slice(get_fn):
    "Read a binpos with atom_indices as a slice"
    fn_binpos = get_fn('frame0.binpos')
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(atom_indices=slice(0, 10, None))
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    assert eq(xyz, xyz2[:, 0:10, :])
Example #8
0
def readData(FN):
    """
This function will read data from a filename based on it's extension.
Inputs:
    1) FN: filename to find data
Outputs:
    2) data: numpy array containing data read in.

The function tries load and loadtxt from numpy, and throws an error if it cannot read the file.
Additionally if the filename is lh5 it will load the data as a Trajectory.Trajectory Object but return the array of coordinates.

    """
    if FN.split('.')[-1] == 'npy':
    	data = load(FN)
    elif FN.split('.')[-1] == 'lh5': # Assume this is a trajectory in msmbuilder
    	data = md.load(FN).xyz # Only return the coordinates.
    elif FN.split('.')[-1] == 'h5':
    	data = io.loadh( FN )
        try: data = data['arr_0']
        except: data = data['Data']
    	data = data[ where( data != -1 ) ]
    else:
    	try:
    		data = loadtxt(FN)
    	except:
    		print "\n\n dataIO.readData: Cannot read %s. Use numpy.save or numpy.savetxt. Exiting" % FN
    		exit()
    if data.shape == (): # If there is only one data point, then turn it into a list
    	data = array( [ data ] )
    return data
Example #9
0
def reduce_shells(system, num_shells):
    """
    Combine shells into final maps for average intensity, <I>/sigma(I), and number of pixels.
    Also compute mean of these parameters per resolution shell; return as shell_stats.
    """
    
    map_shape = (len(system['bins']['h']), len(system['bins']['k']), len(system['bins']['l']))
    map_keys = ["I", "I_sigI", "n_pixels"]

    file_glob = glob.glob(system['map_path'] + "temp/grid_rshell*.h5")
    filelist = sorted(file_glob, key = lambda name: int(name.split('rshell')[-1].split('_')[0]))
    assert len(filelist) == num_shells

    hkl = np.array(list(itertools.product(system['bins']['h'], system['bins']['k'], system['bins']['l'])))
    hkl_res = map_utils.compute_resolution(system['space_group'], system['cell'], hkl)
    hkl_res = hkl_res.reshape(map_shape)
    
    combined_maps, shell_stats = dict(), dict()
    shell_stats['resolution'] = np.zeros(num_shells)
    
    for key in map_keys:
        print "on key %s" %key
        combined_maps[key] = np.zeros(map_shape)
        shell_stats[key] = np.zeros(num_shells)

        for shell in range(len(filelist)):
            data = io.loadh(filelist[shell], key)
            combined_maps[key] += data
            shell_stats[key][shell] = np.median(data[data>0])

            if key == "I":
                shell_stats['resolution'][shell] = np.median(hkl_res[data>0])
            
    return combined_maps, shell_stats
Example #10
0
    def load(cls, tica_fn):
        """
        load a tICA solution to use in projecting data.

        Parameters:
        -----------
        tica_fn : str
            filename pointing to tICA solutions

        """
        # the only variables we need to save are the two matrices
        # and the eigenvectors / values as well as the lag time

        logger.warn("NOTE: You can only use the tICA solution, you will "
                    "not be able to continue adding data")
        f = io.loadh(tica_fn)

        metric = cPickle.loads(f["metric_string"][0])

        tica_obj = cls(f['lag'][0], prep_metric=metric)
        # lag entry is an array... with a single item

        tica_obj.timelag_corr_mat = f['timelag_corr_mat']
        tica_obj.cov_mat = f['cov_mat']

        tica_obj.vals = f['vals']
        tica_obj.vecs = f['vecs']

        tica_obj._sort()

        return tica_obj
Example #11
0
def entry_point():
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
        distances = io.loadh(args.distances, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
        distances = io.loadh(args.distances, 'Data')

    trimmed = run(assignments, distances, args.rmsd_cutoff)

    io.saveh(args.output, trimmed)
    logger.info('Saved output to %s', args.output)
Example #12
0
def main():
    args, atom_indices, project, project_root = parse_cmdline()

    # load all of the data from disk
    xyzlist, sampled_frames = load_trajs(project, os.path.dirname(args.project_yaml),
                                       atom_indices, args.stride, args.fraction)
    assignments = io.loadh(args.assignments, 'arr_0')
    # pick only the assignments that had their xyz data loaded
    assignments = np.concatenate([assignments[i, sampled_frames[i]] for i in range(len(sampled_frames))])

    # make sure we didn't mess up the subsampling and get nonsense data
    assert not np.any(assignments < 0), 'assignments negative? stride/sampling messed up probs. did you use a different strid than you clustered with?'
    #assert np.all(np.unique(assignments) == np.arange(np.max(assignments)+1)), 'assignments dont go from 0 to max. did you use a different strid than you clustered with?'

    n_real_atoms = len(atom_indices)
    n_padded_atoms = xyzlist.shape[2]
    assert n_padded_atoms >= n_real_atoms

    pairwise = calculate_pairwise_rmsd(xyzlist, n_real_atoms)

    print 'computing silhouette...'
    score = silhouette_score(pairwise, assignments, metric='precomputed')
    print 'silhouette score: %f' % score

    path = os.path.join(args.output, 'silhouette.dat')
    print 'saving results to flat text file (append): %s...' % path
    if not os.path.exists(args.output):
        os.makedirs(args.output)

    with open(path, 'a') as f:
        f.write('%f\n' % score)
Example #13
0
def test_read_atom_indices(get_fn):
    "Read a binpos with atom_indices as a list"
    fn_binpos = get_fn('frame0.binpos')
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(atom_indices=[0, 1, 2])
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    assert eq(xyz, xyz2[:, [0, 1, 2], :])
Example #14
0
def test_read_atomindices_1(get_fn, fn_xtc):
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
        xyz, time, step, box = f.read(atom_indices=[0, 1, 2])
    assert eq(xyz, iofile['xyz'][:, [0, 1, 2]])
    assert eq(step, iofile['step'])
    assert eq(box, iofile['box'])
    assert eq(time, iofile['time'])
Example #15
0
def test_read_atomindices_2():
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
         xyz, time, step, box = f.read(atom_indices=slice(None, None, 2))
    yield lambda: eq(xyz, iofile['xyz'][:, ::2])
    yield lambda: eq(step, iofile['step'])
    yield lambda: eq(box, iofile['box'])
    yield lambda: eq(time, iofile['time'])
Example #16
0
def project_plot_data():
    data = np.load('raw_data.npy')
    tica = io.loadh('original_SREP_tica.h5')
    print "raw_data.shape, vecs.shape:", data.shape, tica['vecs'].shape
    proj = np.dot(data, tica['components'].T)
    print "projected.shape:", proj.shape
    for i in range(len(proj)):
        plt.plot(proj[i][0], proj[i][1], 'r*', markersize=18)
        plt.text(proj[i][0], proj[i][1], i, fontsize=18)
Example #17
0
def plot_extracted_frames_locations_on_tica(stretch,tica_lag,n_frames):
    ev0 = io.loadh('analysis/tica_projections/ev0.h5')['arr_0']
    ev1 = io.loadh('analysis/tica_projections/ev1.h5')['arr_0']
    center_ev0 = ev0 - np.mean(ev0)
    center_ev1 = ev1 - np.mean(ev1)
    r = (center_ev0 + center_ev1)**2
    ind = np.argsort(r)
    plt.figure(figsize=(12,8))
    plt.hist2d(ev0,ev1,bins=100,norm=LogNorm())
    stretch = stretch
    plt.plot(ev0[ind[-n_frames * stretch::stretch]],ev1[ind[-n_frames * stretch::stretch]],'r*',markersize=18)
    np.savetxt('analysis/ev0_selected.txt', (ev0[ind[-n_frames * stretch::stretch]]))
    np.savetxt('analysis/ev1_selected.txt', (ev1[ind[-n_frames * stretch::stretch]]))
    plt.xlabel('tIC 1')
    plt.ylabel('tIC 2')
    plt.savefig('analysis/location_of_%d_extracted_frames_on_tica_l%d.png' %(n_frames,tica_lag))
    print "\nInfo: saved 'location_of_%d_extracted_frames_on_tica_l%d.png' at folder 'analysis'\n" %(n_frames,tica_lag)
    return ev0[ind[-n_frames * stretch::stretch]] , ev1[ind[-n_frames * stretch::stretch]]
Example #18
0
def test_read_0():
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read()
    with DCDTrajectoryFile(fn_dcd) as f:
        xyz2 = f.read()[0]
    xyz3 = io.loadh(get_fn("frame0.binpos.h5"), "xyz")

    yield lambda: eq(xyz[1:], xyz2)
    yield lambda: eq(xyz, xyz3)
Example #19
0
def test_read_stride(get_fn, fn_xtc):
    # read xtc with stride
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
        xyz, time, step, box = f.read(stride=3)
    assert eq(xyz, iofile['xyz'][::3])
    assert eq(step, iofile['step'][::3])
    assert eq(box, iofile['box'][::3])
    assert eq(time, iofile['time'][::3])
Example #20
0
def test_read_chunk3(get_fn, fn_xtc):
    with XTCTrajectoryFile(fn_xtc, chunk_size_multiplier=2) as f:
        xyz, time, step, box = f.read(n_frames=100)

    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    assert eq(xyz, iofile['xyz'][:100])
    assert eq(step, iofile['step'][:100])
    assert eq(box, iofile['box'][:100])
    assert eq(time, iofile['time'][:100])
Example #21
0
def test_read_chunk2():
    with XTCTrajectoryFile(fn_xtc, 'r', chunk_size_multiplier=1) as f:
        xyz, time, step, box = f.read()

    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    yield lambda: eq(xyz, iofile['xyz'])
    yield lambda: eq(step, iofile['step'])
    yield lambda: eq(box, iofile['box'])
    yield lambda: eq(time, iofile['time'])
Example #22
0
def test_read_1():
    with BINPOSTrajectoryFile(fn_binpos, chunk_size_multiplier=0.5) as f:
        xyz = f.read()
    with DCDTrajectoryFile(fn_dcd) as f:
        xyz2 = f.read()[0]
    xyz3 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')

    yield lambda: eq(xyz[1:], xyz2)
    yield lambda: eq(xyz, xyz3)
Example #23
0
def test_read_stride_2():
    "read xtc with stride with n_frames"
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
         xyz, time, step, box = f.read(n_frames=1000, stride=3)
    yield lambda: eq(xyz, iofile['xyz'][::3])
    yield lambda: eq(step, iofile['step'][::3])
    yield lambda: eq(box, iofile['box'][::3])
    yield lambda: eq(time, iofile['time'][::3])
Example #24
0
def test_read_1():
    with BINPOSTrajectoryFile(fn_binpos, chunk_size_multiplier=0.5) as f:
        xyz = f.read()
    with DCDTrajectoryFile(fn_dcd) as f:
        xyz2 = f.read()[0]
    xyz3 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')

    yield lambda: eq(xyz[1:], xyz2)
    yield lambda: eq(xyz, xyz3)
Example #25
0
def test_read_chunk3(get_fn, fn_xtc):
    with XTCTrajectoryFile(fn_xtc, chunk_size_multiplier=2) as f:
        xyz, time, step, box = f.read(n_frames=100)

    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    assert eq(xyz, iofile['xyz'][:100])
    assert eq(step, iofile['step'][:100])
    assert eq(box, iofile['box'][:100])
    assert eq(time, iofile['time'][:100])
Example #26
0
def test_read_stride(get_fn, fn_xtc):
    # read xtc with stride
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
        xyz, time, step, box = f.read(stride=3)
    assert eq(xyz, iofile['xyz'][::3])
    assert eq(step, iofile['step'][::3])
    assert eq(box, iofile['box'][::3])
    assert eq(time, iofile['time'][::3])
Example #27
0
def train(start, stop, stride):
    dataset = []
    for i in range(4):
        data = io.loadh('../../../stage1/analysis/t%d.h5' % i)['distances']
        print "working on:", i, data.shape
        dataset.append(data)
    for i in range(20):
        data = io.loadh('../../../stage2/analysis/t%d.h5' % i)['distances']
        print "working on:", i, data.shape
        dataset.append(data)
    for i in range(20):
        data = io.loadh('../../../stage3/analysis/t%d.h5' % i)['distances']
        print "working on:", i, data.shape
        dataset.append(data)
    for i in range(20):
        data = io.loadh('../../../stage4/analysis/t%d.h5' % i)['distances']
        print "working on:", i, data.shape
        dataset.append(data)
    return dataset
Example #28
0
def test_read_atomindices_w_stride(get_fn, fn_xtc):
    # test case for bug: https://github.com/mdtraj/mdtraj/issues/1394
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    for stride in strides:
        with XTCTrajectoryFile(fn_xtc) as f:
            xyz, time, step, box = f.read(atom_indices=[0, 1, 2], stride=stride)
        assert eq(xyz, iofile['xyz'][:, [0, 1, 2]][::stride])
        assert eq(step, iofile['step'][::stride])
        assert eq(box, iofile['box'][::stride])
        assert eq(time, iofile['time'][::stride])
Example #29
0
def test_read_stride_n_frames(get_fn, fn_xtc):
    # read xtc with stride with n_frames
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    for s in strides:
        with XTCTrajectoryFile(fn_xtc) as f:
            xyz, time, step, box = f.read(n_frames=1000, stride=s)
        assert eq(xyz, iofile['xyz'][::s])
        assert eq(step, iofile['step'][::s])
        assert eq(box, iofile['box'][::s])
        assert eq(time, iofile['time'][::s])
Example #30
0
def load(filename):
    # delay these imports, since this module is loaded in a bunch
    # of places but not necessarily used
    import scipy.io
    from msmbuilder import Project
    
    # the filename extension
    ext = os.path.splitext(filename)[1]

    # load trajectories
    if ext != '.h5' and ext in md._FormatRegistry.loaders.keys():
        val = md.load(filename)

    # load flat text files
    elif 'AtomIndices.dat' in filename:
        # try loading AtomIndices first, because the default for loadtxt
        # is to use floats
        val = np.loadtxt(filename, dtype=np.int)
    elif ext in ['.dat']:
        # try loading general .dats with floats
        val = np.loadtxt(filename)
    
    # short circuit opening ProjectInfo
    elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename) or (re.search('ProjectInfo.*\.yaml', filename)):
        val = Project.load_from(filename)
        
    # load with serializer files that end with .h5, .hdf or .h5.distances
    elif ext in ['.h5', '.hdf']:
        val = io.loadh(filename, deferred=False)
    elif filename.endswith('.h5.distances'):
        val = io.loadh(filename, deferred=False)

    # load matricies
    elif ext in ['.mtx']:
        val = scipy.io.mmread(filename)
        
    else:
        raise TypeError("I could not infer how to load this file. You "
            "can either request load=False, or perhaps add more logic to "
            "the load heuristics in this class: %s" % filename)

    return val
Example #31
0
def test_read_2(get_fn):
    fn_binpos = get_fn('frame0.binpos')
    fn_dcd = get_fn('frame0.dcd')
    with BINPOSTrajectoryFile(fn_binpos, chunk_size_multiplier=10) as f:
        xyz = f.read()
    with DCDTrajectoryFile(fn_dcd) as f:
        xyz2 = f.read()[0]
    xyz3 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')

    assert eq(xyz[1:], xyz2)
    assert eq(xyz, xyz3)
def entry_point():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Example #33
0
def test_read_stride_n_frames_offsets(get_fn, fn_xtc):
    # read xtc with stride with n_frames and offsets
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    for s in (1, 2, 3, 4, 5):
        with XTCTrajectoryFile(fn_xtc) as f:
            f.offsets # pre-compute byte offsets between frames
            xyz, time, step, box = f.read(n_frames=1000, stride=s)
        assert eq(xyz, iofile['xyz'][::s])
        assert eq(step, iofile['step'][::s])
        assert eq(box, iofile['box'][::s])
        assert eq(time, iofile['time'][::s])
Example #34
0
def test_read_stride_n_frames_offsets(get_fn, fn_xtc):
    # read xtc with stride with n_frames and offsets
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    for s in (1, 2, 3, 4, 5):
        with XTCTrajectoryFile(fn_xtc) as f:
            f.offsets  # pre-compute byte offsets between frames
            xyz, time, step, box = f.read(n_frames=1000, stride=s)
        assert eq(xyz, iofile['xyz'][::s])
        assert eq(step, iofile['step'][::s])
        assert eq(box, iofile['box'][::s])
        assert eq(time, iofile['time'][::s])
Example #35
0
def main():
    """Parse command line inputs, load up files, and build a movie."""
    args = parser.parse_args()
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    num_steps = int(args.num_steps)
    starting_state = int(args.starting_state)

    project = Project.load_from(args.project)
    T = scipy.io.mmread(args.tprob).tocsr()

    state_traj = msm_analysis.sample(T, starting_state, num_steps)
    sampled_traj = project.get_random_confs_from_states(
        assignments, state_traj, 1)
    traj = sampled_traj[0]
    traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj])
    traj.save(args.output)
Example #36
0
    def load_from(cls, filename):
        """
        Load project from disk

        Parameters
        ----------
        filename : string
            filename_or_file can be a path to a legacy .h5 or current
            .yaml file.

        Returns
        -------
        project : the loaded project object

        """

        rootdir = os.path.abspath(os.path.dirname(filename))

        if filename.endswith('.yaml'):
            with open(filename) as f:
                ondisk = yaml.load(f, Loader=Loader)
                records = {'conf_filename': ondisk['conf_filename'],
                           'traj_lengths': [],
                           'traj_paths': [],
                           'traj_converted_from': [],
                           'traj_errors': []}

                for trj in ondisk['trajs']:
                    records['traj_lengths'].append(trj['length'])
                    records['traj_paths'].append(trj['path'])
                    records['traj_errors'].append(trj['errors'])
                    records['traj_converted_from'].append(trj['converted_from'])

        elif filename.endswith('.h5'):
            ondisk = io.loadh(filename, deferred=False)
            n_trajs = len(ondisk['TrajLengths'])
            records = {'conf_filename': str(ondisk['ConfFilename'][0]),
                       'traj_lengths': ondisk['TrajLengths'],
                       'traj_paths': [],
                       'traj_converted_from': [[None]] * n_trajs,
                       'traj_errors': [None] * n_trajs}

            for i in xrange(n_trajs):
                # this is the convention used in the hdf project format to get the traj paths
                path = os.path.join(ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] + str(i) + ondisk['TrajFileType'][0])
                records['traj_paths'].append(path)

        else:
            raise ValueError('Sorry, I can only open files in .yaml'
                             ' or .h5 format: %s' % filename)

        return cls(records, validate=False, project_dir=rootdir)
def entry_point():
    """Parse command line inputs, load up files, then call run() and save() to do
    the real work"""
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    # load...
    # project
    project = Project.load_from(args.project)

    # assignments
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    # states
    if -1 in args.states:
        states = np.unique(assignments[np.where(assignments != -1)])
        logger.info('Yanking from all %d states', len(states))
    else:
        # ensure that the states are sorted, and that they're unique -- you
        # can only request each state once
        states = np.unique(args.states)
        logger.info("Yanking from the following states: %s", states)

    # extract the conformations using np.random for the randomness
    confs_by_state = project.get_random_confs_from_states(
        assignments,
        states=states,
        num_confs=args.conformations_per_state,
        replacement=args.replacement)

    # save the conformations to disk, in the requested style
    save(confs_by_state=confs_by_state,
         states=states,
         style=args.style,
         format=args.format,
         outdir=args.output_dir)
Example #38
0
def test_overwrite_2():
    fid, fn = tempfile.mkstemp()
    try:
        a = np.arange(10)
        b = a + 1
        io.saveh(fn, a=a)
        io.saveh(fn, a=b)
        eq(io.loadh(fn, 'a'), b)
    except:
        raise
    finally:
        if os.path.exists(fn):
            os.close(fid)
            os.unlink(fn)
Example #39
0
def plot_macros():
    for i in range(15):
        a = io.loadh('../macros_on_tica/on_tica_macro_%d.h5' % i)['arr_0']
        print i, a.shape
        plt.plot(a[:, 0],
                 a[:, 1],
                 'o',
                 alpha=0.8,
                 markersize=8,
                 label=i,
                 color=tableau20[i])
    plt.grid(True)
    leg = plt.legend(ncol=3,numpoints=1,scatterpoints=1,handletextpad=0.2,labelspacing=None,columnspacing=None,shadow=True,fancybox=True,bbox_to_anchor=(0.4,1.05)\
    ,prop={'family':'serif', 'size':'20', 'weight':'light'})
    for lh in leg.legendHandles:
        lh._legmarker.set_alpha(1)
Example #40
0
    def load(cls, input_fn, kernel=None):
        """
        load a ktica object saved via the .save method. 

        Parameters
        ----------
        input_fn : str
            input filename
        kernel : kernel instance, optional
            kernel to use when calculating inner products. If None,
            then we will look in the file. If it's not there, then an 
            exception will be raised

        Returns
        -------
        kt : ktica instance
        """

        f = io.loadh(input_fn)

        if not kernel is None:
            kernel = kernel
        elif 'kernel_str' in f.keys():
            kernel = pickle.loads(f['kernel_str'][0])
        else:
            raise Exception("kernel_str not found in %s. Need to pass a kernel object")

        kt = cls(kernel, reg_factor=f['reg_factor'][0])  
        # dt and reg_factor were saved as arrays with one element

        kt.K_uncentered = f['K_uncentered']
        kt.K = f['K']

        kt.M = f['M'].astype(np.double)
        kt.a = f['a']
        kt.a_mean = f['a_mean']
        kt.a_stdev = f['a_stdev']


        kt.vals = f['vals']
        kt.vecs = f['vecs']

        kt._normalized = False
        kt._sort()
        # ^^^ sorting also normalizes 
     
        return kt
def cluster():
    '''
    This function perfomes K-means clustering on the tICA space and saves assignsment files for each trajectory.
    Cluster centers are also saved at `microstate_centers.txt` file.
    '''
    cluster = KMeans(n_clusters=n_states,n_jobs=-1,verbose=0, max_iter=100, tol=0.0001,)
    dataset, ev0, ev1 = [], [], []
    print "Loading projected data..."
    for i in tqdm(range(start_traj, end_traj+1)):
        a = io.loadh('%s/traj%d_%s.h5' %(proj_path,i,traj_name))['arr_0']
        a = a[:,0:2]
        dataset.append(a)
	ev0.extend(a[:,0])
	ev1.extend(a[:,1])
    print "Clustering %d datapoints..." %len(ev0)
    cluster.fit(dataset)
    for i in range(start_traj,end_traj+1):
        np.savetxt('%s/assigns_%d.txt' %(out_path,i),np.array(cluster.labels_[i-start_traj]),fmt='%d')
    np.savetxt('%s/microstate_centers.txt' %out_path,np.array(cluster.cluster_centers_))
    print "Saved microstate assignments and microstate centers at %s" %out_path
    return cluster.cluster_centers_, np.array(ev0), np.array(ev1)
Example #42
0
    def __init__(self, args):
        import pylru
        from mdtraj import io
        from scipy.spatial import cKDTree

        self.args = args
        self.data = io.loadh(args.__dict__['projection-file'], deferred=False)
        self.kdtree = cKDTree(self.data['X'])

        self.top = pickle.loads(self.data['topology'][0])
        self.top.center_coordinates()
        self.topology_pdb_sring = pdb_string(self.top)
        self.alpha_carbon_indices = np.array(
            [a.index for a in self.top.top.atoms if a.name == 'CA'])

        self._traj_cache = pylru.lrucache(size=100)
        self._last_index = 0

        static_folder = os.path.join(os.path.dirname(__file__), 'static')
        s = super(PlotCommand, self) if six.PY2 else super()
        s.__init__(__name__, static_folder=static_folder)
Example #43
0
    def load_from_disk(cls, filename):
        """Load up a clusterer from disk

        This is useful because computing the Z-matrix
        (done in __init__) is the most expensive part, and assigning is cheap

        Parameters
        ----------
        filename : str
            location to save to

        Raises
        ------
        TODO: Probablt raises something if filename doesn't exist?
        """
        data = io.loadh(filename, deferred=False)
        Z, traj_lengths = data['z_matrix'], data['traj_lengths']
        # Next two lines are a hack to fix Serializer bug. KAB
        if np.rank(traj_lengths) == 0:
            traj_lengths = [traj_lengths]
        return cls(None, None, precomputed_values=(Z, traj_lengths))
Example #44
0
    def __init__(self, args):
        import pylru
        from mdtraj import io
        from scipy.spatial import cKDTree

        self.args = args
        self.data = io.loadh(args.__dict__['projection-file'], deferred=False)
        self.kdtree = cKDTree(self.data['X'])

        self.top = pickle.loads(self.data['topology'][0])
        self.top.center_coordinates()
        self.topology_pdb_sring = pdb_string(self.top)
        self.alpha_carbon_indices = np.array(
            [a.index for a in self.top.top.atoms if a.name == 'CA'])

        self._traj_cache = pylru.lrucache(size=100)
        self._last_index = 0

        static_folder = os.path.join(os.path.dirname(__file__), 'static')
        s = super(PlotCommand, self) if six.PY2 else super()
        s.__init__(__name__, static_folder=static_folder)
Example #45
0
def test_read_stride_switching(get_fn, fn_xtc):
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
        f.offsets  # pre-compute byte offsets between frames
        # read the first 10 frames with stride of 2
        s = 2
        n_frames = 10
        xyz, time, step, box = f.read(n_frames=n_frames, stride=s)
        assert eq(xyz, iofile['xyz'][:n_frames*s:s])
        assert eq(step, iofile['step'][:n_frames*s:s])
        assert eq(box, iofile['box'][:n_frames*s:s])
        assert eq(time, iofile['time'][:n_frames*s:s])
        # now read the rest with stride 3, should start from frame index 8.
        # eg. np.arange(0, n_frames*s, 2)[-1] == 18
        offset = f.tell()
        assert offset == 18
        s = 3
        xyz, time, step, box = f.read(n_frames=None, stride=s)
        assert eq(xyz, iofile['xyz'][offset::s])
        assert eq(step, iofile['step'][offset::s])
        assert eq(box, iofile['box'][offset::s])
        assert eq(time, iofile['time'][offset::s])
Example #46
0
def test_read_stride_switching(get_fn, fn_xtc):
    iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False)
    with XTCTrajectoryFile(fn_xtc) as f:
        f.offsets  # pre-compute byte offsets between frames
        # read the first 10 frames with stride of 2
        s = 2
        n_frames = 10
        xyz, time, step, box = f.read(n_frames=n_frames, stride=s)
        assert eq(xyz, iofile['xyz'][:n_frames * s:s])
        assert eq(step, iofile['step'][:n_frames * s:s])
        assert eq(box, iofile['box'][:n_frames * s:s])
        assert eq(time, iofile['time'][:n_frames * s:s])
        # now read the rest with stride 3, should start from frame index 8.
        # eg. np.arange(0, n_frames*s + 1, 2)[-1] == 20
        offset = f.tell()
        assert offset == 20
        s = 3
        xyz, time, step, box = f.read(n_frames=None, stride=s)
        assert eq(xyz, iofile['xyz'][offset::s])
        assert eq(step, iofile['step'][offset::s])
        assert eq(box, iofile['box'][offset::s])
        assert eq(time, iofile['time'][offset::s])
Example #47
0
def test_groups():
    # Test to ensure that files are loaded correctly even if they contain
    # nested groups and stuff
    x = np.random.randn(10)
    y = np.random.randn(11)
    f = tables.open_file(temp, 'w')
    f.create_group(where='/', name='mygroup')
    f.create_array(where='/mygroup', name='myarray', obj=x)
    f.create_array(where='/', name='mya2', obj=y)
    f.close()

    assert eq(io.loadh(temp)['mygroup/myarray'], x)
    assert eq(io.loadh(temp)['mya2'], y)
    assert eq(io.loadh(temp, deferred=False)['mygroup/myarray'], x)
    assert eq(io.loadh(temp, deferred=False)['mya2'], y)
    assert eq(io.loadh(temp, 'mygroup/myarray'), x)
    assert eq(io.loadh(temp, 'mya2'), y)
Example #48
0
def test_groups():
    """Test to ensure that files are loaded correctly even if they contain nested
    groups and stuff"""
    x = np.random.randn(10)
    y = np.random.randn(11)
    f = tables.openFile(temp, 'w')
    f.createGroup(where='/', name='mygroup')
    if tables.__version__ >= '3.0.0':
        f.createArray(where='/mygroup', name='myarray', obj=x)
        f.createArray(where='/', name='mya2', obj=y)
    else:
        f.createArray(where='/mygroup', name='myarray', object=x)
        f.createArray(where='/', name='mya2', object=y)
    f.close()

    yield lambda: eq(io.loadh(temp)['mygroup/myarray'], x)
    yield lambda: eq(io.loadh(temp)['mya2'], y)
    yield lambda: eq(io.loadh(temp, deferred=False)['mygroup/myarray'], x)
    yield lambda: eq(io.loadh(temp, deferred=False)['mya2'], y)
    yield lambda: eq(io.loadh(temp, 'mygroup/myarray'), x)
    yield lambda: eq(io.loadh(temp, 'mya2'), y)
Example #49
0
 def test_load_1(self):
     "Load by specifying array name"
     TestData = io.loadh(self.filename1, 'arr_0')
     eq(TestData, self.data)
Example #50
0
def test_read_atom_indices_slice():
    "Read a binpos with atom_indices as a slice"
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(atom_indices=slice(0, 10, None))
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    yield lambda: eq(xyz, xyz2[:, 0:10, :])
Example #51
0
def test_read_atom_indices():
    "Read a binpos with atom_indices as a list"
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(atom_indices=[0, 1, 2])
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    yield lambda: eq(xyz, xyz2[:, [0, 1, 2], :])
Example #52
0
def test_read_stride_2():
    "Read a binpos with stride=3 when n_frames is supplied (different code path)"
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(n_frames=1000, stride=3)
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    yield lambda: eq(xyz, xyz2[::3])
Example #53
0
def test_read_stride():
    "Read a binpos with stride=3"
    with BINPOSTrajectoryFile(fn_binpos) as f:
        xyz = f.read(stride=3)
    xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz')
    yield lambda: eq(xyz, xyz2[::3])
Example #54
0
    for p in range(n_parms):
        data = np.loadtxt('selected_frames/%s.txt' % (parms[p]))
        d[:, p] = data
    proj = np.dot(d, tica_evs.T)
    io.saveh('selected_frames/selected_frames_on_tica_l%d.h5' % (tica_lag),
             proj)
    return proj


# load inputs
psf = sys.argv[1]
traj = sys.argv[2]
vmd_path = sys.argv[3]
parms = np.loadtxt(sys.argv[4], dtype=str)
tica_lag = int(sys.argv[5])
tica_evs = io.loadh(sys.argv[6])['components']
ev0 = io.loadh(sys.argv[7])['arr_0']
ev1 = io.loadh(sys.argv[8])['arr_0']
output = sys.argv[9]

# first calculate tICA parameters
vmd_cal_parms(vmd_path, psf, traj)

# project tICA parameters on tICA eigenvectors
n_parms = len(parms)
data = project(n_parms, tica_evs, tica_lag)

# plot and save tICA landscape
plt.figure(figsize=(20, 15))
plt.hist2d(ev0, ev1, bins=200, norm=LogNorm(), cmap=plt.cmap.jet)
plt.plot(data[:, 0], data[:, 1], 'ro', markersize=12)
Example #55
0
 def test_save(self):
     """Save HDF5 to disk and load it back up"""
     io.saveh(self.filename2, self.data)
     TestData = io.loadh(self.filename2, 'arr_0')
     eq(TestData, self.data)
Example #56
0
 def test_load_2(self):
     "load using deferred=False"
     TestData = io.loadh(self.filename1, deferred=False)['arr_0']
     eq(TestData, self.data)
Example #57
0
 def test_load_2(self):
     "load using deferred=True"
     deferred = io.loadh(self.filename1, deferred=True)
     eq(deferred['arr_0'], self.data)
     deferred.close()