Ejemplo n.º 1
0
def test_1():
    fn = get_fn("frame0.h5")
    with enter_temp_directory():
        assert os.system("mixtape DRIDFeaturizer --trjs {} --out a.pkl".format(fn)) == 0
        assert os.system("mixtape DihedralFeaturizer --types phi psi --trjs {} --out b.pkl".format(fn)) == 0

        assert os.system("mixtape tICA --inp a.pkl --out ticamodel.pkl --transformed tics.pkl") == 0
        assert (
            os.system(
                "mixtape KMeans --random_state 0 --n_init 1 --inp b.pkl --out kmeans.pkl --transformed labels.pkl"
            )
            == 0
        )

        kmeans0 = verboseload("labels.pkl")
        kmeans1 = KMeans(random_state=0, n_init=1).fit_predict(verboseload("b.pkl"))
        tica0 = verboseload("tics.pkl")
        tica1 = tICA().fit_transform(verboseload("a.pkl"))

    eq(kmeans0[0], kmeans1[0])
    eq(tica0[0], tica1[0])
Ejemplo n.º 2
0
    def start(self):
        print(self.instance)

        dataset = verboseload(self.inp)
        if not isinstance(dataset, list):
            self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset)))

        print('fitting...')
        self.instance.fit(dataset)

        verbosedump(self.instance, self.out)

        print('All done')
Ejemplo n.º 3
0
    def start(self):
        print(self.instance)
        if self.out is '' and self.transform is '':
            self.error('One of --out or --model should be specified')

        dataset = verboseload(self.inp)
        if not isinstance(dataset, list):
            self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset)))

        print('fit() on %d sequences of shape %s...' % (
            len(dataset), ', '.join([str(dataset[e].shape) for e in range(min(3, len(dataset)))])))
        self.instance.fit(dataset)

        if self.transformed is not '':
            transformed = self.instance.transform(dataset)
            verbosedump(transformed, self.transformed)

        if self.out is not '':
            verbosedump(self.instance, self.out)

        print('All done')
Ejemplo n.º 4
0
def load_quadwell(data_home=None, random_state=None):
    """Loader for quad-well dataset

    Parameters
    ----------
    data_home : optional, default: None
        Specify another cache folder for the datasets. By default
        all mixtape data is stored in '~/mixtape_data' subfolders.
    random_state : {int, None}, default: None
        Seed the psuedorandom number generator to generate trajectories. If
        seed is None, the global numpy PRNG is used. If random_state is an
        int, the simulations will be cached in ``data_home``, or loaded from
        ``data_home`` if simulations with that seed have been performed already.
        With random_state=None, new simulations will be performed and the
        trajectories will not be cached.

    Notes
    -----
    """

    # V = 4*(x**8 + 0.8*np.exp(-80*x**2) + 0.2*(-80*(x-0.5)**2) + 0.5*np.exp(-40*(x+0.5)**2))

    random = check_random_state(random_state)
    data_home = join(get_data_home(data_home=data_home), 'quadwell')
    if not exists(data_home):
        makedirs(data_home)

    if random_state is None:
        trajectories = _simulate_quadwell(random)
    else:
        if not isinstance(random_state, numbers.Integral):
            raise TypeError('random_state must be an int')
        path = join(data_home, 'version-0_random-state-%d.pkl' % random_state)
        if exists(path):
            trajectories = verboseload(path)
        else:
            trajectories = _simulate_quadwell(random)
            verbosedump(trajectories, path)

    return Bunch(trajectories=trajectories, DESCR=QUADWELL_DESCRIPTION)
Ejemplo n.º 5
0
def load_doublewell(data_home=None, random_state=None):
    """Loader for double-well dataset

    Parameters
    ----------
    data_home : optional, default: None
        Specify another cache folder for the datasets. By default
        all mixtape data is stored in '~/mixtape_data' subfolders.
    random_state : {int, None}, default: None
        Seed the psuedorandom number generator to generate trajectories. If
        seed is None, the global numpy PRNG is used. If random_state is an
        int, the simulations will be cached in ``data_home``, or loaded from
        ``data_home`` if simulations with that seed have been performed already.
        With random_state=None, new simulations will be performed and the
        trajectories will not be cached.

    Notes
    -----
    """
    random = check_random_state(random_state)
    data_home = join(get_data_home(data_home=data_home), 'doublewell')
    if not exists(data_home):
        makedirs(data_home)

    if random_state is None:
        trajectories = _simulate_doublewell(random)
    else:
        assert isinstance(random_state, numbers.Integral), 'random_state but be an int'
        path = join(data_home, 'version-1_random-state-%d.pkl' % random_state)
        if exists(path):
            trajectories = verboseload(path)
        else:
            trajectories = _simulate_doublewell(random)
            verbosedump(trajectories, path)

    return Bunch(trajectories=trajectories, DESCR=DOUBLEWELL_DESCRIPTION)