def main(): """Parse command line inputs, load up files, then call run() and save() to do the real work""" parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() # load... # project project = Project.load_from(args.project) # assignments try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') # states if -1 in args.states: states = np.unique(assigments[np.where(assignments != -1)]) logger.info('Yanking from all %d states', len(states)) else: # ensure that the states are sorted, and that they're unique -- you # can only request each state once states = np.unique(args.states) logger.info("Yanking from the following states: %s", states) # extract the conformations using np.random for the randomness confs_by_state = project.get_random_confs_from_states( assignments, states=states, num_confs=args.conformations_per_state, replacement=args.replacement) # save the conformations to disk, in the requested style save(confs_by_state=confs_by_state, states=states, style=args.style, format=args.format, outdir=args.output_dir)
def test_overwrite_1(): fid, fn = tempfile.mkstemp() try: a = np.arange(10) b = a + 1 io.saveh(fn, a=a) io.saveh(fn, b=b) eq(io.loadh(fn, 'a'), a) eq(io.loadh(fn, 'b'), b) except: raise finally: if os.path.exists(fn): os.close(fid) os.unlink(fn)
def load(cls, filename): """ Load a previously saved CCA object Parameters ---------- filename : str filename to load data from Returns ------- cca_object : CCA loaded cca_object """ filehandler = io.loadh(filename) regularizer = pickle.loads(filehandler['regularizer'][0]) eta = filehandler['eta'][0] cca_object = cls(regularization=regularizer, regularization_strength=eta) if 'sol' in filehandler.keys(): cca_object.v = filehandler['sol'] cca_object._has_solution = True return cca_object
def test_read_stride(get_fn): # Read a binpos with stride=3 fn_binpos = get_fn('frame0.binpos') with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(stride=3) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') assert eq(xyz, xyz2[::3])
def main(): parser = argparse.ArgumentParser() parser.add_argument('assignments', default='Macro4/MacroAssignments.h5', help='Path to an assignments file. (default=Macro4/MacroAssignments.h5)') parser.add_argument('--project', default='ProjectInfo.yaml', help='Path to ProjectInfo.yaml file. (default=ProjectInfo.yaml)') args = parser.parse_args() project = Project.load_from(args.project) t = reduce(operator.add, (project.load_traj(i) for i in range(project.n_trajs))) phi_angles = md.compute_dihedrals(t, [PHI_INDICES]) * 180.0 / np.pi psi_angles = md.compute_dihedrals(t, [PSI_INDICES]) * 180.0 / np.pi state_index = np.hstack(io.loadh(args.assignments)['arr_0']) for i in np.unique(state_index): pp.plot(phi_angles[np.where(state_index == i)], psi_angles[np.where(state_index == i)], 'x', label='State %d' % i) pp.title("Alanine Dipeptide Macrostates") pp.xlabel(r"$\phi$") pp.ylabel(r"$\psi$") annotate() pp.legend(loc=1, labelspacing=0.075, prop={'size': 8.0}, scatterpoints=1, markerscale=0.5, numpoints=1) pp.xlim([-180, 180]) pp.ylim([-180, 180]) pp.show()
def test_read_stride_2(get_fn): # Read a binpos with stride=3 when n_frames is supplied (different code path) fn_binpos = get_fn('frame0.binpos') with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(n_frames=1000, stride=3) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') assert eq(xyz, xyz2[::3])
def test_read_atom_indices_slice(get_fn): "Read a binpos with atom_indices as a slice" fn_binpos = get_fn('frame0.binpos') with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(atom_indices=slice(0, 10, None)) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') assert eq(xyz, xyz2[:, 0:10, :])
def readData(FN): """ This function will read data from a filename based on it's extension. Inputs: 1) FN: filename to find data Outputs: 2) data: numpy array containing data read in. The function tries load and loadtxt from numpy, and throws an error if it cannot read the file. Additionally if the filename is lh5 it will load the data as a Trajectory.Trajectory Object but return the array of coordinates. """ if FN.split('.')[-1] == 'npy': data = load(FN) elif FN.split('.')[-1] == 'lh5': # Assume this is a trajectory in msmbuilder data = md.load(FN).xyz # Only return the coordinates. elif FN.split('.')[-1] == 'h5': data = io.loadh( FN ) try: data = data['arr_0'] except: data = data['Data'] data = data[ where( data != -1 ) ] else: try: data = loadtxt(FN) except: print "\n\n dataIO.readData: Cannot read %s. Use numpy.save or numpy.savetxt. Exiting" % FN exit() if data.shape == (): # If there is only one data point, then turn it into a list data = array( [ data ] ) return data
def reduce_shells(system, num_shells): """ Combine shells into final maps for average intensity, <I>/sigma(I), and number of pixels. Also compute mean of these parameters per resolution shell; return as shell_stats. """ map_shape = (len(system['bins']['h']), len(system['bins']['k']), len(system['bins']['l'])) map_keys = ["I", "I_sigI", "n_pixels"] file_glob = glob.glob(system['map_path'] + "temp/grid_rshell*.h5") filelist = sorted(file_glob, key = lambda name: int(name.split('rshell')[-1].split('_')[0])) assert len(filelist) == num_shells hkl = np.array(list(itertools.product(system['bins']['h'], system['bins']['k'], system['bins']['l']))) hkl_res = map_utils.compute_resolution(system['space_group'], system['cell'], hkl) hkl_res = hkl_res.reshape(map_shape) combined_maps, shell_stats = dict(), dict() shell_stats['resolution'] = np.zeros(num_shells) for key in map_keys: print "on key %s" %key combined_maps[key] = np.zeros(map_shape) shell_stats[key] = np.zeros(num_shells) for shell in range(len(filelist)): data = io.loadh(filelist[shell], key) combined_maps[key] += data shell_stats[key][shell] = np.median(data[data>0]) if key == "I": shell_stats['resolution'][shell] = np.median(hkl_res[data>0]) return combined_maps, shell_stats
def load(cls, tica_fn): """ load a tICA solution to use in projecting data. Parameters: ----------- tica_fn : str filename pointing to tICA solutions """ # the only variables we need to save are the two matrices # and the eigenvectors / values as well as the lag time logger.warn("NOTE: You can only use the tICA solution, you will " "not be able to continue adding data") f = io.loadh(tica_fn) metric = cPickle.loads(f["metric_string"][0]) tica_obj = cls(f['lag'][0], prep_metric=metric) # lag entry is an array... with a single item tica_obj.timelag_corr_mat = f['timelag_corr_mat'] tica_obj.cov_mat = f['cov_mat'] tica_obj.vals = f['vals'] tica_obj.vecs = f['vecs'] tica_obj._sort() return tica_obj
def entry_point(): args = parser.parse_args() arglib.die_if_path_exists(args.output) try: assignments = io.loadh(args.assignments, 'arr_0') distances = io.loadh(args.distances, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') distances = io.loadh(args.distances, 'Data') trimmed = run(assignments, distances, args.rmsd_cutoff) io.saveh(args.output, trimmed) logger.info('Saved output to %s', args.output)
def main(): args, atom_indices, project, project_root = parse_cmdline() # load all of the data from disk xyzlist, sampled_frames = load_trajs(project, os.path.dirname(args.project_yaml), atom_indices, args.stride, args.fraction) assignments = io.loadh(args.assignments, 'arr_0') # pick only the assignments that had their xyz data loaded assignments = np.concatenate([assignments[i, sampled_frames[i]] for i in range(len(sampled_frames))]) # make sure we didn't mess up the subsampling and get nonsense data assert not np.any(assignments < 0), 'assignments negative? stride/sampling messed up probs. did you use a different strid than you clustered with?' #assert np.all(np.unique(assignments) == np.arange(np.max(assignments)+1)), 'assignments dont go from 0 to max. did you use a different strid than you clustered with?' n_real_atoms = len(atom_indices) n_padded_atoms = xyzlist.shape[2] assert n_padded_atoms >= n_real_atoms pairwise = calculate_pairwise_rmsd(xyzlist, n_real_atoms) print 'computing silhouette...' score = silhouette_score(pairwise, assignments, metric='precomputed') print 'silhouette score: %f' % score path = os.path.join(args.output, 'silhouette.dat') print 'saving results to flat text file (append): %s...' % path if not os.path.exists(args.output): os.makedirs(args.output) with open(path, 'a') as f: f.write('%f\n' % score)
def test_read_atom_indices(get_fn): "Read a binpos with atom_indices as a list" fn_binpos = get_fn('frame0.binpos') with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(atom_indices=[0, 1, 2]) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') assert eq(xyz, xyz2[:, [0, 1, 2], :])
def test_read_atomindices_1(get_fn, fn_xtc): iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) with XTCTrajectoryFile(fn_xtc) as f: xyz, time, step, box = f.read(atom_indices=[0, 1, 2]) assert eq(xyz, iofile['xyz'][:, [0, 1, 2]]) assert eq(step, iofile['step']) assert eq(box, iofile['box']) assert eq(time, iofile['time'])
def test_read_atomindices_2(): iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) with XTCTrajectoryFile(fn_xtc) as f: xyz, time, step, box = f.read(atom_indices=slice(None, None, 2)) yield lambda: eq(xyz, iofile['xyz'][:, ::2]) yield lambda: eq(step, iofile['step']) yield lambda: eq(box, iofile['box']) yield lambda: eq(time, iofile['time'])
def project_plot_data(): data = np.load('raw_data.npy') tica = io.loadh('original_SREP_tica.h5') print "raw_data.shape, vecs.shape:", data.shape, tica['vecs'].shape proj = np.dot(data, tica['components'].T) print "projected.shape:", proj.shape for i in range(len(proj)): plt.plot(proj[i][0], proj[i][1], 'r*', markersize=18) plt.text(proj[i][0], proj[i][1], i, fontsize=18)
def plot_extracted_frames_locations_on_tica(stretch,tica_lag,n_frames): ev0 = io.loadh('analysis/tica_projections/ev0.h5')['arr_0'] ev1 = io.loadh('analysis/tica_projections/ev1.h5')['arr_0'] center_ev0 = ev0 - np.mean(ev0) center_ev1 = ev1 - np.mean(ev1) r = (center_ev0 + center_ev1)**2 ind = np.argsort(r) plt.figure(figsize=(12,8)) plt.hist2d(ev0,ev1,bins=100,norm=LogNorm()) stretch = stretch plt.plot(ev0[ind[-n_frames * stretch::stretch]],ev1[ind[-n_frames * stretch::stretch]],'r*',markersize=18) np.savetxt('analysis/ev0_selected.txt', (ev0[ind[-n_frames * stretch::stretch]])) np.savetxt('analysis/ev1_selected.txt', (ev1[ind[-n_frames * stretch::stretch]])) plt.xlabel('tIC 1') plt.ylabel('tIC 2') plt.savefig('analysis/location_of_%d_extracted_frames_on_tica_l%d.png' %(n_frames,tica_lag)) print "\nInfo: saved 'location_of_%d_extracted_frames_on_tica_l%d.png' at folder 'analysis'\n" %(n_frames,tica_lag) return ev0[ind[-n_frames * stretch::stretch]] , ev1[ind[-n_frames * stretch::stretch]]
def test_read_0(): with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read() with DCDTrajectoryFile(fn_dcd) as f: xyz2 = f.read()[0] xyz3 = io.loadh(get_fn("frame0.binpos.h5"), "xyz") yield lambda: eq(xyz[1:], xyz2) yield lambda: eq(xyz, xyz3)
def test_read_stride(get_fn, fn_xtc): # read xtc with stride iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) with XTCTrajectoryFile(fn_xtc) as f: xyz, time, step, box = f.read(stride=3) assert eq(xyz, iofile['xyz'][::3]) assert eq(step, iofile['step'][::3]) assert eq(box, iofile['box'][::3]) assert eq(time, iofile['time'][::3])
def test_read_chunk3(get_fn, fn_xtc): with XTCTrajectoryFile(fn_xtc, chunk_size_multiplier=2) as f: xyz, time, step, box = f.read(n_frames=100) iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) assert eq(xyz, iofile['xyz'][:100]) assert eq(step, iofile['step'][:100]) assert eq(box, iofile['box'][:100]) assert eq(time, iofile['time'][:100])
def test_read_chunk2(): with XTCTrajectoryFile(fn_xtc, 'r', chunk_size_multiplier=1) as f: xyz, time, step, box = f.read() iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) yield lambda: eq(xyz, iofile['xyz']) yield lambda: eq(step, iofile['step']) yield lambda: eq(box, iofile['box']) yield lambda: eq(time, iofile['time'])
def test_read_1(): with BINPOSTrajectoryFile(fn_binpos, chunk_size_multiplier=0.5) as f: xyz = f.read() with DCDTrajectoryFile(fn_dcd) as f: xyz2 = f.read()[0] xyz3 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') yield lambda: eq(xyz[1:], xyz2) yield lambda: eq(xyz, xyz3)
def test_read_stride_2(): "read xtc with stride with n_frames" iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) with XTCTrajectoryFile(fn_xtc) as f: xyz, time, step, box = f.read(n_frames=1000, stride=3) yield lambda: eq(xyz, iofile['xyz'][::3]) yield lambda: eq(step, iofile['step'][::3]) yield lambda: eq(box, iofile['box'][::3]) yield lambda: eq(time, iofile['time'][::3])
def train(start, stop, stride): dataset = [] for i in range(4): data = io.loadh('../../../stage1/analysis/t%d.h5' % i)['distances'] print "working on:", i, data.shape dataset.append(data) for i in range(20): data = io.loadh('../../../stage2/analysis/t%d.h5' % i)['distances'] print "working on:", i, data.shape dataset.append(data) for i in range(20): data = io.loadh('../../../stage3/analysis/t%d.h5' % i)['distances'] print "working on:", i, data.shape dataset.append(data) for i in range(20): data = io.loadh('../../../stage4/analysis/t%d.h5' % i)['distances'] print "working on:", i, data.shape dataset.append(data) return dataset
def test_read_atomindices_w_stride(get_fn, fn_xtc): # test case for bug: https://github.com/mdtraj/mdtraj/issues/1394 iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) for stride in strides: with XTCTrajectoryFile(fn_xtc) as f: xyz, time, step, box = f.read(atom_indices=[0, 1, 2], stride=stride) assert eq(xyz, iofile['xyz'][:, [0, 1, 2]][::stride]) assert eq(step, iofile['step'][::stride]) assert eq(box, iofile['box'][::stride]) assert eq(time, iofile['time'][::stride])
def test_read_stride_n_frames(get_fn, fn_xtc): # read xtc with stride with n_frames iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) for s in strides: with XTCTrajectoryFile(fn_xtc) as f: xyz, time, step, box = f.read(n_frames=1000, stride=s) assert eq(xyz, iofile['xyz'][::s]) assert eq(step, iofile['step'][::s]) assert eq(box, iofile['box'][::s]) assert eq(time, iofile['time'][::s])
def load(filename): # delay these imports, since this module is loaded in a bunch # of places but not necessarily used import scipy.io from msmbuilder import Project # the filename extension ext = os.path.splitext(filename)[1] # load trajectories if ext != '.h5' and ext in md._FormatRegistry.loaders.keys(): val = md.load(filename) # load flat text files elif 'AtomIndices.dat' in filename: # try loading AtomIndices first, because the default for loadtxt # is to use floats val = np.loadtxt(filename, dtype=np.int) elif ext in ['.dat']: # try loading general .dats with floats val = np.loadtxt(filename) # short circuit opening ProjectInfo elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename) or (re.search('ProjectInfo.*\.yaml', filename)): val = Project.load_from(filename) # load with serializer files that end with .h5, .hdf or .h5.distances elif ext in ['.h5', '.hdf']: val = io.loadh(filename, deferred=False) elif filename.endswith('.h5.distances'): val = io.loadh(filename, deferred=False) # load matricies elif ext in ['.mtx']: val = scipy.io.mmread(filename) else: raise TypeError("I could not infer how to load this file. You " "can either request load=False, or perhaps add more logic to " "the load heuristics in this class: %s" % filename) return val
def test_read_2(get_fn): fn_binpos = get_fn('frame0.binpos') fn_dcd = get_fn('frame0.dcd') with BINPOSTrajectoryFile(fn_binpos, chunk_size_multiplier=10) as f: xyz = f.read() with DCDTrajectoryFile(fn_dcd) as f: xyz2 = f.read()[0] xyz3 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') assert eq(xyz[1:], xyz2) assert eq(xyz, xyz3)
def entry_point(): """Parse command line inputs, load up files, and build a movie.""" args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states( assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def test_read_stride_n_frames_offsets(get_fn, fn_xtc): # read xtc with stride with n_frames and offsets iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) for s in (1, 2, 3, 4, 5): with XTCTrajectoryFile(fn_xtc) as f: f.offsets # pre-compute byte offsets between frames xyz, time, step, box = f.read(n_frames=1000, stride=s) assert eq(xyz, iofile['xyz'][::s]) assert eq(step, iofile['step'][::s]) assert eq(box, iofile['box'][::s]) assert eq(time, iofile['time'][::s])
def main(): """Parse command line inputs, load up files, and build a movie.""" args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states( assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def load_from(cls, filename): """ Load project from disk Parameters ---------- filename : string filename_or_file can be a path to a legacy .h5 or current .yaml file. Returns ------- project : the loaded project object """ rootdir = os.path.abspath(os.path.dirname(filename)) if filename.endswith('.yaml'): with open(filename) as f: ondisk = yaml.load(f, Loader=Loader) records = {'conf_filename': ondisk['conf_filename'], 'traj_lengths': [], 'traj_paths': [], 'traj_converted_from': [], 'traj_errors': []} for trj in ondisk['trajs']: records['traj_lengths'].append(trj['length']) records['traj_paths'].append(trj['path']) records['traj_errors'].append(trj['errors']) records['traj_converted_from'].append(trj['converted_from']) elif filename.endswith('.h5'): ondisk = io.loadh(filename, deferred=False) n_trajs = len(ondisk['TrajLengths']) records = {'conf_filename': str(ondisk['ConfFilename'][0]), 'traj_lengths': ondisk['TrajLengths'], 'traj_paths': [], 'traj_converted_from': [[None]] * n_trajs, 'traj_errors': [None] * n_trajs} for i in xrange(n_trajs): # this is the convention used in the hdf project format to get the traj paths path = os.path.join(ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] + str(i) + ondisk['TrajFileType'][0]) records['traj_paths'].append(path) else: raise ValueError('Sorry, I can only open files in .yaml' ' or .h5 format: %s' % filename) return cls(records, validate=False, project_dir=rootdir)
def entry_point(): """Parse command line inputs, load up files, then call run() and save() to do the real work""" parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() # load... # project project = Project.load_from(args.project) # assignments try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') # states if -1 in args.states: states = np.unique(assignments[np.where(assignments != -1)]) logger.info('Yanking from all %d states', len(states)) else: # ensure that the states are sorted, and that they're unique -- you # can only request each state once states = np.unique(args.states) logger.info("Yanking from the following states: %s", states) # extract the conformations using np.random for the randomness confs_by_state = project.get_random_confs_from_states( assignments, states=states, num_confs=args.conformations_per_state, replacement=args.replacement) # save the conformations to disk, in the requested style save(confs_by_state=confs_by_state, states=states, style=args.style, format=args.format, outdir=args.output_dir)
def test_overwrite_2(): fid, fn = tempfile.mkstemp() try: a = np.arange(10) b = a + 1 io.saveh(fn, a=a) io.saveh(fn, a=b) eq(io.loadh(fn, 'a'), b) except: raise finally: if os.path.exists(fn): os.close(fid) os.unlink(fn)
def plot_macros(): for i in range(15): a = io.loadh('../macros_on_tica/on_tica_macro_%d.h5' % i)['arr_0'] print i, a.shape plt.plot(a[:, 0], a[:, 1], 'o', alpha=0.8, markersize=8, label=i, color=tableau20[i]) plt.grid(True) leg = plt.legend(ncol=3,numpoints=1,scatterpoints=1,handletextpad=0.2,labelspacing=None,columnspacing=None,shadow=True,fancybox=True,bbox_to_anchor=(0.4,1.05)\ ,prop={'family':'serif', 'size':'20', 'weight':'light'}) for lh in leg.legendHandles: lh._legmarker.set_alpha(1)
def load(cls, input_fn, kernel=None): """ load a ktica object saved via the .save method. Parameters ---------- input_fn : str input filename kernel : kernel instance, optional kernel to use when calculating inner products. If None, then we will look in the file. If it's not there, then an exception will be raised Returns ------- kt : ktica instance """ f = io.loadh(input_fn) if not kernel is None: kernel = kernel elif 'kernel_str' in f.keys(): kernel = pickle.loads(f['kernel_str'][0]) else: raise Exception("kernel_str not found in %s. Need to pass a kernel object") kt = cls(kernel, reg_factor=f['reg_factor'][0]) # dt and reg_factor were saved as arrays with one element kt.K_uncentered = f['K_uncentered'] kt.K = f['K'] kt.M = f['M'].astype(np.double) kt.a = f['a'] kt.a_mean = f['a_mean'] kt.a_stdev = f['a_stdev'] kt.vals = f['vals'] kt.vecs = f['vecs'] kt._normalized = False kt._sort() # ^^^ sorting also normalizes return kt
def cluster(): ''' This function perfomes K-means clustering on the tICA space and saves assignsment files for each trajectory. Cluster centers are also saved at `microstate_centers.txt` file. ''' cluster = KMeans(n_clusters=n_states,n_jobs=-1,verbose=0, max_iter=100, tol=0.0001,) dataset, ev0, ev1 = [], [], [] print "Loading projected data..." for i in tqdm(range(start_traj, end_traj+1)): a = io.loadh('%s/traj%d_%s.h5' %(proj_path,i,traj_name))['arr_0'] a = a[:,0:2] dataset.append(a) ev0.extend(a[:,0]) ev1.extend(a[:,1]) print "Clustering %d datapoints..." %len(ev0) cluster.fit(dataset) for i in range(start_traj,end_traj+1): np.savetxt('%s/assigns_%d.txt' %(out_path,i),np.array(cluster.labels_[i-start_traj]),fmt='%d') np.savetxt('%s/microstate_centers.txt' %out_path,np.array(cluster.cluster_centers_)) print "Saved microstate assignments and microstate centers at %s" %out_path return cluster.cluster_centers_, np.array(ev0), np.array(ev1)
def __init__(self, args): import pylru from mdtraj import io from scipy.spatial import cKDTree self.args = args self.data = io.loadh(args.__dict__['projection-file'], deferred=False) self.kdtree = cKDTree(self.data['X']) self.top = pickle.loads(self.data['topology'][0]) self.top.center_coordinates() self.topology_pdb_sring = pdb_string(self.top) self.alpha_carbon_indices = np.array( [a.index for a in self.top.top.atoms if a.name == 'CA']) self._traj_cache = pylru.lrucache(size=100) self._last_index = 0 static_folder = os.path.join(os.path.dirname(__file__), 'static') s = super(PlotCommand, self) if six.PY2 else super() s.__init__(__name__, static_folder=static_folder)
def load_from_disk(cls, filename): """Load up a clusterer from disk This is useful because computing the Z-matrix (done in __init__) is the most expensive part, and assigning is cheap Parameters ---------- filename : str location to save to Raises ------ TODO: Probablt raises something if filename doesn't exist? """ data = io.loadh(filename, deferred=False) Z, traj_lengths = data['z_matrix'], data['traj_lengths'] # Next two lines are a hack to fix Serializer bug. KAB if np.rank(traj_lengths) == 0: traj_lengths = [traj_lengths] return cls(None, None, precomputed_values=(Z, traj_lengths))
def test_read_stride_switching(get_fn, fn_xtc): iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) with XTCTrajectoryFile(fn_xtc) as f: f.offsets # pre-compute byte offsets between frames # read the first 10 frames with stride of 2 s = 2 n_frames = 10 xyz, time, step, box = f.read(n_frames=n_frames, stride=s) assert eq(xyz, iofile['xyz'][:n_frames*s:s]) assert eq(step, iofile['step'][:n_frames*s:s]) assert eq(box, iofile['box'][:n_frames*s:s]) assert eq(time, iofile['time'][:n_frames*s:s]) # now read the rest with stride 3, should start from frame index 8. # eg. np.arange(0, n_frames*s, 2)[-1] == 18 offset = f.tell() assert offset == 18 s = 3 xyz, time, step, box = f.read(n_frames=None, stride=s) assert eq(xyz, iofile['xyz'][offset::s]) assert eq(step, iofile['step'][offset::s]) assert eq(box, iofile['box'][offset::s]) assert eq(time, iofile['time'][offset::s])
def test_read_stride_switching(get_fn, fn_xtc): iofile = io.loadh(get_fn('frame0.xtc.h5'), deferred=False) with XTCTrajectoryFile(fn_xtc) as f: f.offsets # pre-compute byte offsets between frames # read the first 10 frames with stride of 2 s = 2 n_frames = 10 xyz, time, step, box = f.read(n_frames=n_frames, stride=s) assert eq(xyz, iofile['xyz'][:n_frames * s:s]) assert eq(step, iofile['step'][:n_frames * s:s]) assert eq(box, iofile['box'][:n_frames * s:s]) assert eq(time, iofile['time'][:n_frames * s:s]) # now read the rest with stride 3, should start from frame index 8. # eg. np.arange(0, n_frames*s + 1, 2)[-1] == 20 offset = f.tell() assert offset == 20 s = 3 xyz, time, step, box = f.read(n_frames=None, stride=s) assert eq(xyz, iofile['xyz'][offset::s]) assert eq(step, iofile['step'][offset::s]) assert eq(box, iofile['box'][offset::s]) assert eq(time, iofile['time'][offset::s])
def test_groups(): # Test to ensure that files are loaded correctly even if they contain # nested groups and stuff x = np.random.randn(10) y = np.random.randn(11) f = tables.open_file(temp, 'w') f.create_group(where='/', name='mygroup') f.create_array(where='/mygroup', name='myarray', obj=x) f.create_array(where='/', name='mya2', obj=y) f.close() assert eq(io.loadh(temp)['mygroup/myarray'], x) assert eq(io.loadh(temp)['mya2'], y) assert eq(io.loadh(temp, deferred=False)['mygroup/myarray'], x) assert eq(io.loadh(temp, deferred=False)['mya2'], y) assert eq(io.loadh(temp, 'mygroup/myarray'), x) assert eq(io.loadh(temp, 'mya2'), y)
def test_groups(): """Test to ensure that files are loaded correctly even if they contain nested groups and stuff""" x = np.random.randn(10) y = np.random.randn(11) f = tables.openFile(temp, 'w') f.createGroup(where='/', name='mygroup') if tables.__version__ >= '3.0.0': f.createArray(where='/mygroup', name='myarray', obj=x) f.createArray(where='/', name='mya2', obj=y) else: f.createArray(where='/mygroup', name='myarray', object=x) f.createArray(where='/', name='mya2', object=y) f.close() yield lambda: eq(io.loadh(temp)['mygroup/myarray'], x) yield lambda: eq(io.loadh(temp)['mya2'], y) yield lambda: eq(io.loadh(temp, deferred=False)['mygroup/myarray'], x) yield lambda: eq(io.loadh(temp, deferred=False)['mya2'], y) yield lambda: eq(io.loadh(temp, 'mygroup/myarray'), x) yield lambda: eq(io.loadh(temp, 'mya2'), y)
def test_load_1(self): "Load by specifying array name" TestData = io.loadh(self.filename1, 'arr_0') eq(TestData, self.data)
def test_read_atom_indices_slice(): "Read a binpos with atom_indices as a slice" with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(atom_indices=slice(0, 10, None)) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') yield lambda: eq(xyz, xyz2[:, 0:10, :])
def test_read_atom_indices(): "Read a binpos with atom_indices as a list" with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(atom_indices=[0, 1, 2]) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') yield lambda: eq(xyz, xyz2[:, [0, 1, 2], :])
def test_read_stride_2(): "Read a binpos with stride=3 when n_frames is supplied (different code path)" with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(n_frames=1000, stride=3) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') yield lambda: eq(xyz, xyz2[::3])
def test_read_stride(): "Read a binpos with stride=3" with BINPOSTrajectoryFile(fn_binpos) as f: xyz = f.read(stride=3) xyz2 = io.loadh(get_fn('frame0.binpos.h5'), 'xyz') yield lambda: eq(xyz, xyz2[::3])
for p in range(n_parms): data = np.loadtxt('selected_frames/%s.txt' % (parms[p])) d[:, p] = data proj = np.dot(d, tica_evs.T) io.saveh('selected_frames/selected_frames_on_tica_l%d.h5' % (tica_lag), proj) return proj # load inputs psf = sys.argv[1] traj = sys.argv[2] vmd_path = sys.argv[3] parms = np.loadtxt(sys.argv[4], dtype=str) tica_lag = int(sys.argv[5]) tica_evs = io.loadh(sys.argv[6])['components'] ev0 = io.loadh(sys.argv[7])['arr_0'] ev1 = io.loadh(sys.argv[8])['arr_0'] output = sys.argv[9] # first calculate tICA parameters vmd_cal_parms(vmd_path, psf, traj) # project tICA parameters on tICA eigenvectors n_parms = len(parms) data = project(n_parms, tica_evs, tica_lag) # plot and save tICA landscape plt.figure(figsize=(20, 15)) plt.hist2d(ev0, ev1, bins=200, norm=LogNorm(), cmap=plt.cmap.jet) plt.plot(data[:, 0], data[:, 1], 'ro', markersize=12)
def test_save(self): """Save HDF5 to disk and load it back up""" io.saveh(self.filename2, self.data) TestData = io.loadh(self.filename2, 'arr_0') eq(TestData, self.data)
def test_load_2(self): "load using deferred=False" TestData = io.loadh(self.filename1, deferred=False)['arr_0'] eq(TestData, self.data)
def test_load_2(self): "load using deferred=True" deferred = io.loadh(self.filename1, deferred=True) eq(deferred['arr_0'], self.data) deferred.close()