def start(self): print(self.instance) dataset = verboseload(self.inp) if not isinstance(dataset, list): self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset))) print('fitting...') self.instance.fit(dataset) verbosedump(self.instance, self.out) print('All done')
def start(self): print(self.instance) if os.path.exists(self.top): top = md.load(self.top) else: top = None dataset = [] for item in self.trjs: for trjfn in glob.glob(item): trajectory = [] for i, chunk in enumerate(md.iterload(trjfn, stride=self.stride, chunk=self.chunk, top=top)): print('\r{} chunk {}'.format(os.path.basename(trjfn), i), end='') sys.stdout.flush() trajectory.append(self.instance.partial_transform(chunk)) print() dataset.append(np.concatenate(trajectory)) verbosedump(dataset, self.out) print('All done')
def start(self): print(self.instance) if self.out is '' and self.transform is '': self.error('One of --out or --model should be specified') dataset = verboseload(self.inp) if not isinstance(dataset, list): self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset))) print('fit() on %d sequences of shape %s...' % ( len(dataset), ', '.join([str(dataset[e].shape) for e in range(min(3, len(dataset)))]))) self.instance.fit(dataset) if self.transformed is not '': transformed = self.instance.transform(dataset) verbosedump(transformed, self.transformed) if self.out is not '': verbosedump(self.instance, self.out) print('All done')
def load_quadwell(data_home=None, random_state=None): """Loader for quad-well dataset Parameters ---------- data_home : optional, default: None Specify another cache folder for the datasets. By default all mixtape data is stored in '~/mixtape_data' subfolders. random_state : {int, None}, default: None Seed the psuedorandom number generator to generate trajectories. If seed is None, the global numpy PRNG is used. If random_state is an int, the simulations will be cached in ``data_home``, or loaded from ``data_home`` if simulations with that seed have been performed already. With random_state=None, new simulations will be performed and the trajectories will not be cached. Notes ----- """ # V = 4*(x**8 + 0.8*np.exp(-80*x**2) + 0.2*(-80*(x-0.5)**2) + 0.5*np.exp(-40*(x+0.5)**2)) random = check_random_state(random_state) data_home = join(get_data_home(data_home=data_home), 'quadwell') if not exists(data_home): makedirs(data_home) if random_state is None: trajectories = _simulate_quadwell(random) else: if not isinstance(random_state, numbers.Integral): raise TypeError('random_state must be an int') path = join(data_home, 'version-0_random-state-%d.pkl' % random_state) if exists(path): trajectories = verboseload(path) else: trajectories = _simulate_quadwell(random) verbosedump(trajectories, path) return Bunch(trajectories=trajectories, DESCR=QUADWELL_DESCRIPTION)
def load_doublewell(data_home=None, random_state=None): """Loader for double-well dataset Parameters ---------- data_home : optional, default: None Specify another cache folder for the datasets. By default all mixtape data is stored in '~/mixtape_data' subfolders. random_state : {int, None}, default: None Seed the psuedorandom number generator to generate trajectories. If seed is None, the global numpy PRNG is used. If random_state is an int, the simulations will be cached in ``data_home``, or loaded from ``data_home`` if simulations with that seed have been performed already. With random_state=None, new simulations will be performed and the trajectories will not be cached. Notes ----- """ random = check_random_state(random_state) data_home = join(get_data_home(data_home=data_home), 'doublewell') if not exists(data_home): makedirs(data_home) if random_state is None: trajectories = _simulate_doublewell(random) else: assert isinstance(random_state, numbers.Integral), 'random_state but be an int' path = join(data_home, 'version-1_random-state-%d.pkl' % random_state) if exists(path): trajectories = verboseload(path) else: trajectories = _simulate_doublewell(random) verbosedump(trajectories, path) return Bunch(trajectories=trajectories, DESCR=DOUBLEWELL_DESCRIPTION)
output = {} for path in np.arange(prj.n_trajs): featurized_path = feat.partial_transform(prj.load_traj(path)) try: tica.partial_fit(featurized_path) except: print "skipping",path for path in np.arange(prj.n_trajs): featurized_path = feat.partial_transform(prj.load_traj(path)) output[path] = tica.partial_transform(featurized_path) # save output verbosedump(output, 'my-tics.pkl') verbosedump(tica, 'tica-obj.pkl') #Ignore below; the model fitting is very quick and can be done in iPython #get the data #X = [ output[i] for i in output.iterkeys()] #model = GaussianFusionHMM(n_states=5, n_features=5) #model.fit(X) #save model #verbosedump(model,"ghmm_s5_n5.pkl")