예제 #1
0
 def setUp(self):
     self.prng = prng = np.random.RandomState(10)
     self.n_components = n_components = 3
     self.n_features = n_features = 3
     self.startprob = prng.rand(n_components)
     self.startprob = self.startprob / self.startprob.sum()
     self.transmat = prng.rand(n_components, n_components)
     self.transmat /= np.tile(self.transmat.sum(axis=1)[:, np.newaxis],
             (1, n_components))
     self.means = prng.randint(-20, 20, (n_components, n_features))
     self.covars = {
         'spherical': (1.0 + 2 * np.dot(prng.rand(n_components, 1),
                                        np.ones((1, n_features)))) ** 2,
         'tied': (make_spd_matrix(n_features, random_state=0)
                  + np.eye(n_features)),
         'diag': (1.0 + 2 * prng.rand(n_components, n_features)) ** 2,
         'full': np.array([make_spd_matrix(n_features, random_state=0)
                           + np.eye(n_features)
                           for x in range(n_components)]),
     }
     self.expanded_covars = {
         'spherical': [np.eye(n_features) * cov
                       for cov in self.covars['spherical']],
         'diag': [np.diag(cov) for cov in self.covars['diag']],
         'tied': [self.covars['tied']] * n_components,
         'full': self.covars['full'],
     }
    def __init__(self, rng, n_samples=500, n_components=2, n_features=2,
                 scale=50):
        self.n_samples = n_samples
        self.n_components = n_components
        self.n_features = n_features

        self.weights = rng.rand(n_components)
        self.weights = self.weights / self.weights.sum()
        self.means = rng.rand(n_components, n_features) * scale
        self.covariances = {
            'spherical': .5 + rng.rand(n_components),
            'diag': (.5 + rng.rand(n_components, n_features)) ** 2,
            'tied': make_spd_matrix(n_features, random_state=rng),
            'full': np.array([
                make_spd_matrix(n_features, random_state=rng) * .5
                for _ in range(n_components)])}
        self.precisions = {
            'spherical': 1. / self.covariances['spherical'],
            'diag': 1. / self.covariances['diag'],
            'tied': linalg.inv(self.covariances['tied']),
            'full': np.array([linalg.inv(covariance)
                             for covariance in self.covariances['full']])}

        self.X = dict(zip(COVARIANCE_TYPE, [generate_data(
            n_samples, n_features, self.weights, self.means, self.covariances,
            covar_type) for covar_type in COVARIANCE_TYPE]))
        self.Y = np.hstack([np.full(int(np.round(w * n_samples)), k,
                                    dtype=np.int)
                            for k, w in enumerate(self.weights)])
예제 #3
0
    def __init__(self, rng, n_samples=500, n_components=2, n_features=2,
                 scale=50):
        self.n_samples = n_samples
        self.n_components = n_components
        self.n_features = n_features

        self.weights = rng.rand(n_components)
        self.weights = self.weights / self.weights.sum()
        self.means = rng.rand(n_components, n_features) * scale
        self.covariances = {
            'spherical': .5 + rng.rand(n_components),
            'diag': (.5 + rng.rand(n_components, n_features)) ** 2,
            'tied': make_spd_matrix(n_features, random_state=rng),
            'full': np.array([
                make_spd_matrix(n_features, random_state=rng) * .5
                for _ in range(n_components)])}
        self.precisions = {
            'spherical': 1. / self.covariances['spherical'],
            'diag': 1. / self.covariances['diag'],
            'tied': linalg.inv(self.covariances['tied']),
            'full': np.array([linalg.inv(covariance)
                             for covariance in self.covariances['full']])}

        self.X = dict(zip(COVARIANCE_TYPE, [generate_data(
            n_samples, n_features, self.weights, self.means, self.covariances,
            covar_type) for covar_type in COVARIANCE_TYPE]))
        self.Y = np.hstack([k * np.ones(int(np.round(w * n_samples)))
                            for k, w in enumerate(self.weights)])
예제 #4
0
 def setUp(self):
     self.prng = prng = np.random.RandomState(10)
     self.n_components = n_components = 3
     self.n_features = n_features = 3
     self.startprob = prng.rand(n_components)
     self.startprob = self.startprob / self.startprob.sum()
     self.transmat = prng.rand(n_components, n_components)
     self.transmat /= np.tile(self.transmat.sum(axis=1)[:, np.newaxis],
                              (1, n_components))
     self.means = prng.randint(-20, 20, (n_components, n_features))
     self.covars = {
         'spherical': (1.0 + 2 * np.dot(prng.rand(n_components, 1),
                                        np.ones((1, n_features)))) ** 2,
         'tied': (make_spd_matrix(n_features, random_state=0)
                  + np.eye(n_features)),
         'diag': (1.0 + 2 * prng.rand(n_components, n_features)) ** 2,
         'full': np.array([make_spd_matrix(n_features, random_state=0)
                           + np.eye(n_features)
                           for x in range(n_components)]),
     }
     self.expanded_covars = {
         'spherical': [np.eye(n_features) * cov
                       for cov in self.covars['spherical']],
         'diag': [np.diag(cov) for cov in self.covars['diag']],
         'tied': [self.covars['tied']] * n_components,
         'full': self.covars['full'],
     }
예제 #5
0
class GaussianHMMParams(object):
    n_components = 3
    n_features = 3
    startprob = prng.rand(n_components)
    startprob = startprob / startprob.sum()
    transmat = np.random.rand(n_components, n_components)
    transmat /= np.tile(transmat.sum(axis=1)[:, np.newaxis], (1, n_components))
    means = prng.randint(-20, 20, (n_components, n_features))
    covars = {
        'spherical': (1.0 + 2 * prng.rand(n_components))**2,
        'tied':
        (make_spd_matrix(n_features, random_state=0) + np.eye(n_features)),
        'diag': (1.0 + 2 * prng.rand(n_components, n_features))**2,
        'full':
        np.array([
            make_spd_matrix(n_features, random_state=0) + np.eye(n_features)
            for x in xrange(n_components)
        ])
    }
    expanded_covars = {
        'spherical': [np.eye(n_features) * cov for cov in covars['spherical']],
        'diag': [np.diag(cov) for cov in covars['diag']],
        'tied': [covars['tied']] * n_components,
        'full': covars['full']
    }
예제 #6
0
def make_covar_matrix(covariance_type, n_components, n_features):
    mincv = 0.1
    rand = np.random.random
    return {
        'spherical': (mincv + mincv * np.dot(rand(
            (n_components, 1)), np.ones((1, n_features))))**2,
        'tied': (make_spd_matrix(n_features) + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * rand((n_components, n_features)))**2,
        'full':
        np.array([(make_spd_matrix(n_features) + mincv * np.eye(n_features))
                  for x in range(n_components)])
    }[covariance_type]
예제 #7
0
def make_covar_matrix(covariance_type, n_components, n_features):
    mincv = 0.1
    rand = np.random.random
    return {
        'spherical': (mincv + mincv * np.dot(rand((n_components, 1)),
                                             np.ones((1, n_features)))) ** 2,
        'tied': (make_spd_matrix(n_features)
                 + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * rand((n_components, n_features))) ** 2,
        'full': np.array([(make_spd_matrix(n_features)
                           + mincv * np.eye(n_features))
                          for x in range(n_components)])
    }[covariance_type]
예제 #8
0
def make_covar_matrix(covariance_type, n_components, n_features):
    mincv = 0.1
    rand = np.random.random
    if covariance_type == 'spherical':
        return (mincv + mincv * rand((n_components, )))**2
    elif covariance_type == 'tied':
        return (make_spd_matrix(n_features) + mincv * np.eye(n_features))
    elif covariance_type == 'diag':
        return (mincv + mincv * rand((n_components, n_features)))**2
    elif covariance_type == 'full':
        return np.array([
            (make_spd_matrix(n_features) + mincv * np.eye(n_features))
            for x in range(n_components)
        ])
예제 #9
0
def make_covar_matrix(covariance_type, n_components, n_features):
    mincv = 0.1
    rand = np.random.random
    if covariance_type == 'spherical':
        return (mincv + mincv * rand((n_components,))) ** 2
    elif covariance_type == 'tied':
        return (make_spd_matrix(n_features)
                + mincv * np.eye(n_features))
    elif covariance_type == 'diag':
        return (mincv + mincv * rand((n_components, n_features))) ** 2
    elif covariance_type == 'full':
        return np.array([(make_spd_matrix(n_features)
                        + mincv * np.eye(n_features))
                        for x in range(n_components)])
예제 #10
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = mixture.GMM(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars_ = {
        "spherical": (mincv + mincv * np.dot(prng.rand(n_mix, 1), np.ones((1, n_features)))) ** 2,
        "tied": (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)),
        "diag": (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        "full": np.array(
            [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in range(n_mix)]
        ),
    }[covariance_type]
    g.weights_ = hmm.normalize(prng.rand(n_mix))
    return g
예제 #11
0
 def _setUp(self):
     self.n_components = 10
     self.n_features = 4
     self.weights = rng.rand(self.n_components)
     self.weights = self.weights / self.weights.sum()
     self.means = rng.randint(-20, 20, (self.n_components, self.n_features))
     self.threshold = -0.5
     self.I = np.eye(self.n_features)
     self.covars = {
         "spherical": (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2,
         "tied": (make_spd_matrix(self.n_features, random_state=0) + 5 * self.I),
         "diag": (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2,
         "full": np.array(
             [make_spd_matrix(self.n_features, random_state=0) + 5 * self.I for x in range(self.n_components)]
         ),
     }
예제 #12
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = mixture.GMM(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars_ = {
        'spherical': (mincv + mincv * np.dot(prng.rand(n_mix, 1),
                                             np.ones((1, n_features)))) ** 2,
        'tied': (make_spd_matrix(n_features, random_state=prng)
                 + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        'full': np.array(
            [make_spd_matrix(n_features, random_state=prng)
             + mincv * np.eye(n_features) for x in range(n_mix)])
    }[covariance_type]
    g.weights_ = hmm.normalize(prng.rand(n_mix))
    return g
예제 #13
0
 def _setUp(self):
     self.n_components = 10
     self.n_features = 4
     self.weights = rng.rand(self.n_components)
     self.weights = self.weights / self.weights.sum()
     self.means = rng.randint(-20, 20, (self.n_components, self.n_features))
     self.threshold = -0.5
     self.I = np.eye(self.n_features)
     self.covars = {
         'spherical': (0.1 + 2 * rng.rand(self.n_components,
                                          self.n_features)) ** 2,
         'tied': (make_spd_matrix(self.n_features, random_state=0)
                  + 5 * self.I),
         'diag': (0.1 + 2 * rng.rand(self.n_components,
                                     self.n_features)) ** 2,
         'full': np.array([make_spd_matrix(self.n_features, random_state=0)
                           + 5 * self.I for x in range(self.n_components)])}
예제 #14
0
def make_covar_matrix(covariance_type,
                      n_components,
                      n_features,
                      random_state=None):
    mincv = 0.1
    prng = check_random_state(random_state)
    if covariance_type == 'spherical':
        return (mincv + mincv * prng.random_sample((n_components, )))**2
    elif covariance_type == 'tied':
        return (make_spd_matrix(n_features) + mincv * np.eye(n_features))
    elif covariance_type == 'diag':
        return (mincv + mincv * prng.random_sample(
            (n_components, n_features)))**2
    elif covariance_type == 'full':
        return np.array([(make_spd_matrix(n_features, random_state=prng) +
                          mincv * np.eye(n_features))
                         for x in range(n_components)])
예제 #15
0
def create_random_gmm(n_mix, n_features, cvtype, prng=prng):
    from sklearn import mixture

    g = mixture.GMM(n_mix, cvtype=cvtype)
    g.means = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars = {
        'spherical': (mincv + mincv * prng.rand(n_mix)) ** 2,
        'tied': (make_spd_matrix(n_features, random_state=prng)
                 + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        'full': np.array(
            [make_spd_matrix(n_features, random_state=prng)
             + mincv * np.eye(n_features) for x in xrange(n_mix)])
    }[cvtype]
    g.weights = hmm.normalize(prng.rand(n_mix))
    return g
예제 #16
0
 def _setUp(self):
     self.n_components = 10
     self.n_features = 4
     self.weights = rng.rand(self.n_components)
     self.weights = self.weights / self.weights.sum()
     self.means = rng.randint(-20, 20, (self.n_components, self.n_features))
     self.threshold = -0.5
     self.I = np.eye(self.n_features)
     self.covars = {'spherical': (0.1 + 2 * \
                     rng.rand(self.n_components, self.n_features)) ** 2,
               'tied': make_spd_matrix(self.n_features, random_state=0) +\
                     5 * self.I,
               'diag': (0.1 + 2 * rng.rand(self.n_components,\
                     self.n_features)) ** 2,
               'full': np.array([make_spd_matrix(self.n_features,\
                     random_state=0)
                   + 5 * self.I for x in range(self.n_components)])}
예제 #17
0
def make_covar_matrix(covariance_type, n_components, n_features,
                      random_state=None):
    mincv = 0.1
    prng = check_random_state(random_state)
    if covariance_type == 'spherical':
        return (mincv + mincv * prng.random_sample((n_components,))) ** 2
    elif covariance_type == 'tied':
        return (make_spd_matrix(n_features)
                + mincv * np.eye(n_features))
    elif covariance_type == 'diag':
        return (mincv + mincv *
                prng.random_sample((n_components, n_features))) ** 2
    elif covariance_type == 'full':
        return np.array([
            (make_spd_matrix(n_features, random_state=prng)
             + mincv * np.eye(n_features))
            for x in range(n_components)
        ])
예제 #18
0
def create_random_gmm(n_mix, n_features, cvtype, prng=prng):
    from sklearn import mixture

    g = mixture.GMM(n_mix, cvtype=cvtype)
    g.means = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars = {
        'spherical': (mincv + mincv * prng.rand(n_mix))**2,
        'tied': (make_spd_matrix(n_features, random_state=prng) +
                 mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features))**2,
        'full':
        np.array([
            make_spd_matrix(n_features, random_state=prng) +
            mincv * np.eye(n_features) for x in xrange(n_mix)
        ])
    }[cvtype]
    g.weights = hmm.normalize(prng.rand(n_mix))
    return g
def calculate_covariance(states, feature_list, n_features):
    # due to shortage of data we can't calculate the covariance matrix, that's why we return random

    np.set_printoptions(threshold='nan')
    random = np.array([make_spd_matrix(n_features, random_state=0) + np.eye(n_features) for x in range(len(states))])
    # covariance = list()
    # for i in range(0, len(states), 1):
    #     state = states[i]
    #     f_list_da = feature_list[state]
    #     # feat_transpose = np.transpose(f_list_da)
    #     arr = np.cov(np.array(f_list_da), rowvar=0)
    #     # adjusted_cov = arr + 0.2*np.identity(arr.shape[0])
    #     if np.isnan(arr).all():
    #         arr = random[i]
    #     # arr_tr = np.transpose(arr)
    #     # new_arr = np.multiply(arr, arr_tr)
    #
    #     # arr[arr == 0.] = 0.00001
    #     # covariance.append(new_arr)

    #     # diagonal = arr.diagonal()
    #     # print (arr.transpose() == arr).all()
    #     a = 0
    #     while not is_pos_def(arr):
    #         arr += 0.2
    #         a += 1
    #         if a == 10:
    #             arr = random[i]
    #     if not (arr.transpose() == arr).all():
    #         arr = make_summetric(arr)

    #     # np.linalg.cholesky(arr)
    #     covariance.append(arr)
    #     # t = np.linalg.cholesky(adjusted_cov)
    # covariance = np.array(covariance)

    #
    # np.linalg.cholesky(covariance)
    # # covariance_tr = np.transpose(covariance)
    # # cov = np.multiply(covariance, covariance_tr)
    # return covariance

    return random
예제 #20
0
class GMMTester():
    do_test_eval = True
    n_components = 10
    n_features = 4
    weights = rng.rand(n_components)
    weights = weights / weights.sum()
    means = rng.randint(-20, 20, (n_components, n_features))
    threshold = -0.5
    I = np.eye(n_features)
    covars = {
        'spherical': (0.1 + 2 * rng.rand(n_components, n_features))**2,
        'tied':
        make_spd_matrix(n_features, random_state=0) + 5 * I,
        'diag': (0.1 + 2 * rng.rand(n_components, n_features))**2,
        'full':
        np.array([
            make_spd_matrix(n_features, random_state=0) + 5 * I
            for x in xrange(n_components)
        ])
    }

    def test_eval(self):
        if not self.do_test_eval:
            return  # DPGMM does not support setting the means and
        # covariances before fitting There is no way of fixing this
        # due to the variational parameters being more expressive than
        # covariance matrices
        g = self.model(n_components=self.n_components,
                       covariance_type=self.covariance_type,
                       random_state=rng)
        # Make sure the means are far apart so responsibilities.argmax()
        # picks the actual component used to generate the observations.
        g.means_ = 20 * self.means
        g.covars_ = self.covars[self.covariance_type]
        g.weights_ = self.weights

        gaussidx = np.repeat(range(self.n_components), 5)
        n_samples = len(gaussidx)
        X = rng.randn(n_samples, self.n_features) + g.means_[gaussidx]

        ll, responsibilities = g.eval(X)

        self.assertEqual(len(ll), n_samples)
        self.assertEqual(responsibilities.shape,
                         (n_samples, self.n_components))
        assert_array_almost_equal(responsibilities.sum(axis=1),
                                  np.ones(n_samples))
        assert_array_equal(responsibilities.argmax(axis=1), gaussidx)

    def test_sample(self, n=100):
        g = self.model(n_components=self.n_components,
                       covariance_type=self.covariance_type,
                       random_state=rng)
        # Make sure the means are far apart so responsibilities.argmax()
        # picks the actual component used to generate the observations.
        g.means_ = 20 * self.means
        g.covars_ = np.maximum(self.covars[self.covariance_type], 0.1)
        g.weights_ = self.weights

        samples = g.sample(n)
        self.assertEquals(samples.shape, (n, self.n_features))

    def test_train(self, params='wmc'):
        g = mixture.GMM(n_components=self.n_components,
                        covariance_type=self.covariance_type)
        g.weights_ = self.weights
        g.means_ = self.means
        g.covars_ = 20 * self.covars[self.covariance_type]

        # Create a training set by sampling from the predefined distribution.
        X = g.sample(n_samples=100)
        g = self.model(n_components=self.n_components,
                       covariance_type=self.covariance_type,
                       random_state=rng,
                       min_covar=1e-1)
        g.fit(X, n_iter=1, init_params=params)

        # Do one training iteration at a time so we can keep track of
        # the log likelihood to make sure that it increases after each
        # iteration.
        trainll = []
        for iter in xrange(5):
            g.fit(X, n_iter=1, params=params, init_params='')
            trainll.append(self.score(g, X))
        g.fit(X, n_iter=10, params=params, init_params='')  # finish fitting

        # Note that the log likelihood will sometimes decrease by a
        # very small amount after it has more or less converged due to
        # the addition of min_covar to the covariance (to prevent
        # underflow).  This is why the threshold is set to -0.5
        # instead of 0.
        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > self.threshold,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f, for model %s. The likelihoods are %s." %
            (delta_min, self.threshold, self.covariance_type, trainll))

    def test_train_degenerate(self, params='wmc'):
        """ Train on degenerate data with 0 in some dimensions
        """
        # Create a training set by sampling from the predefined distribution.
        X = rng.randn(100, self.n_features)
        X.T[1:] = 0
        g = self.model(n_components=2,
                       covariance_type=self.covariance_type,
                       random_state=rng,
                       min_covar=1e-3)
        g.fit(X, n_iter=5, init_params=params)
        trainll = g.score(X)
        self.assertTrue(np.sum(np.abs(trainll / 100 / X.shape[1])) < 5)

    def test_train_1d(self, params='wmc'):
        """ Train on 1-D data
        """
        # Create a training set by sampling from the predefined distribution.
        X = rng.randn(100, 1)
        #X.T[1:] = 0
        g = self.model(n_components=2,
                       covariance_type=self.covariance_type,
                       random_state=rng,
                       min_covar=1e-7)
        g.fit(X, n_iter=5, init_params=params)
        trainll = g.score(X)
        if isinstance(g, mixture.DPGMM):
            self.assertTrue(np.sum(np.abs(trainll / 100)) < 5)
        else:
            self.assertTrue(np.sum(np.abs(trainll / 100)) < 2)

    def score(self, g, X):
        return g.score(X).sum()