Example #1
0
    def start(self):
        print(self.instance)

        dataset = verboseload(self.inp)
        if not isinstance(dataset, list):
            self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset)))

        print('fitting...')
        self.instance.fit(dataset)

        verbosedump(self.instance, self.out)

        print('All done')
Example #2
0
    def start(self):
        print(self.instance)
        if os.path.exists(self.top):
            top = md.load(self.top)
        else:
            top = None

        dataset = []
        for item in self.trjs:
            for trjfn in glob.glob(item):
                trajectory = []
                for i, chunk in enumerate(md.iterload(trjfn, stride=self.stride, chunk=self.chunk, top=top)):
                    print('\r{} chunk {}'.format(os.path.basename(trjfn), i), end='')
                    sys.stdout.flush()
                    trajectory.append(self.instance.partial_transform(chunk))
                print()
                dataset.append(np.concatenate(trajectory))

        verbosedump(dataset, self.out)
        print('All done')
Example #3
0
    def start(self):
        print(self.instance)
        if self.out is '' and self.transform is '':
            self.error('One of --out or --model should be specified')

        dataset = verboseload(self.inp)
        if not isinstance(dataset, list):
            self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset)))

        print('fit() on %d sequences of shape %s...' % (
            len(dataset), ', '.join([str(dataset[e].shape) for e in range(min(3, len(dataset)))])))
        self.instance.fit(dataset)

        if self.transformed is not '':
            transformed = self.instance.transform(dataset)
            verbosedump(transformed, self.transformed)

        if self.out is not '':
            verbosedump(self.instance, self.out)

        print('All done')
Example #4
0
def load_quadwell(data_home=None, random_state=None):
    """Loader for quad-well dataset

    Parameters
    ----------
    data_home : optional, default: None
        Specify another cache folder for the datasets. By default
        all mixtape data is stored in '~/mixtape_data' subfolders.
    random_state : {int, None}, default: None
        Seed the psuedorandom number generator to generate trajectories. If
        seed is None, the global numpy PRNG is used. If random_state is an
        int, the simulations will be cached in ``data_home``, or loaded from
        ``data_home`` if simulations with that seed have been performed already.
        With random_state=None, new simulations will be performed and the
        trajectories will not be cached.

    Notes
    -----
    """

    # V = 4*(x**8 + 0.8*np.exp(-80*x**2) + 0.2*(-80*(x-0.5)**2) + 0.5*np.exp(-40*(x+0.5)**2))

    random = check_random_state(random_state)
    data_home = join(get_data_home(data_home=data_home), 'quadwell')
    if not exists(data_home):
        makedirs(data_home)

    if random_state is None:
        trajectories = _simulate_quadwell(random)
    else:
        if not isinstance(random_state, numbers.Integral):
            raise TypeError('random_state must be an int')
        path = join(data_home, 'version-0_random-state-%d.pkl' % random_state)
        if exists(path):
            trajectories = verboseload(path)
        else:
            trajectories = _simulate_quadwell(random)
            verbosedump(trajectories, path)

    return Bunch(trajectories=trajectories, DESCR=QUADWELL_DESCRIPTION)
Example #5
0
def load_doublewell(data_home=None, random_state=None):
    """Loader for double-well dataset

    Parameters
    ----------
    data_home : optional, default: None
        Specify another cache folder for the datasets. By default
        all mixtape data is stored in '~/mixtape_data' subfolders.
    random_state : {int, None}, default: None
        Seed the psuedorandom number generator to generate trajectories. If
        seed is None, the global numpy PRNG is used. If random_state is an
        int, the simulations will be cached in ``data_home``, or loaded from
        ``data_home`` if simulations with that seed have been performed already.
        With random_state=None, new simulations will be performed and the
        trajectories will not be cached.

    Notes
    -----
    """
    random = check_random_state(random_state)
    data_home = join(get_data_home(data_home=data_home), 'doublewell')
    if not exists(data_home):
        makedirs(data_home)

    if random_state is None:
        trajectories = _simulate_doublewell(random)
    else:
        assert isinstance(random_state, numbers.Integral), 'random_state but be an int'
        path = join(data_home, 'version-1_random-state-%d.pkl' % random_state)
        if exists(path):
            trajectories = verboseload(path)
        else:
            trajectories = _simulate_doublewell(random)
            verbosedump(trajectories, path)

    return Bunch(trajectories=trajectories, DESCR=DOUBLEWELL_DESCRIPTION)
Example #6
0
output = {}

for path in np.arange(prj.n_trajs):
        featurized_path =  feat.partial_transform(prj.load_traj(path))
        try:
                tica.partial_fit(featurized_path)
        except:
                print "skipping",path

for path in np.arange(prj.n_trajs):
        featurized_path =  feat.partial_transform(prj.load_traj(path))
        output[path] = tica.partial_transform(featurized_path)

# save output
verbosedump(output, 'my-tics.pkl')
verbosedump(tica, 'tica-obj.pkl')

#Ignore below; the model fitting is very quick and can be done in iPython

#get the data
#X =  [ output[i] for i in output.iterkeys()]

#model = GaussianFusionHMM(n_states=5, n_features=5)
#model.fit(X)

#save model
#verbosedump(model,"ghmm_s5_n5.pkl")