Beispiel #1
0
def plot():
    fig, axes = plt.subplots(4, 4)
    fig.set_size_inches(10, 10)
    for i, ax in enumerate(axes.flatten()):
        n_ex = 150
        n_in = 2
        n_classes = np.random.randint(2, 4)
        X, y = make_blobs(
            n_samples=n_ex, centers=n_classes, n_features=n_in, random_state=i
        )
        X -= X.mean(axis=0)

        # take best fit over 10 runs
        best_elbo = -np.inf
        for k in range(10):
            _G = GMM(C=n_classes, seed=i * 3)
            ret = _G.fit(X, max_iter=100, verbose=False)
            while ret != 0:
                print("Components collapsed; Refitting")
                ret = _G.fit(X, max_iter=100, verbose=False)

            if _G.best_elbo > best_elbo:
                best_elbo = _G.best_elbo
                G = _G

        ax = plot_clusters(G, X, ax)
        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])
        ax.set_title("# Classes: {}; Final VLB: {:.2f}".format(n_classes, G.best_elbo))

    plt.tight_layout()
    plt.savefig("img/plot.png", dpi=300)
    plt.close("all")
Beispiel #2
0
    def __init__(self, n_components=1, n_mix=1, startprob=None,
                 transmat=None, startprob_prior=None, transmat_prior=None,
                 gmms=None, cvtype=None, var=3):
        """Create a hidden Markov model with GMM emissions.

        Parameters
        ----------
        n_components : int
            Number of states.
        """
        super(GMMHMM, self).__init__(n_components, startprob, transmat,
                                     startprob_prior=startprob_prior,
                                     transmat_prior=transmat_prior)

        # XXX: Hotfit for n_mix that is incompatible with the scikit's
        # BaseEstimator API
        self.n_mix = n_mix
        self.cvtype = cvtype
	self.var = var
        if gmms is None:
            gmms = []
            for x in xrange(self.n_components):
                if cvtype is None:
                    g = GMM(n_mix)
                else:
                    g = GMM(n_mix, cvtype=cvtype)
                gmms.append(g)
        self.gmms = gmms
Beispiel #3
0
    def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
                                          posteriors, fwdlattice, bwdlattice,
                                          params):
        super(GMMHMM, self)._accumulate_sufficient_statistics(
            stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice,
            params)

        for state, g in enumerate(self.gmms):
            lgmm_posteriors = np.log(g.eval(obs)[1] + np.finfo(np.float).eps)
            lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis]
                                      + np.finfo(np.float).eps)
            gmm_posteriors = np.exp(lgmm_posteriors)
            tmp_gmm = GMM(g.n_components, cvtype=g.cvtype)
            tmp_gmm.n_features = g.n_features
            tmp_gmm.covars = _distribute_covar_matrix_to_match_cvtype(
                                np.eye(g.n_features), g.cvtype, g.n_components)
            norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params)

            if np.any(np.isnan(tmp_gmm.covars)):
                raise ValueError

            stats['norm'][state] += norm
            if 'm' in params:
                stats['means'][state] += tmp_gmm.means * norm[:, np.newaxis]
            if 'c' in params:
                if tmp_gmm.cvtype == 'tied':
                    stats['covars'][state] += tmp_gmm._covars * norm.sum()
                else:
                    cvnorm = np.copy(norm)
                    shape = np.ones(tmp_gmm._covars.ndim)
                    shape[0] = np.shape(tmp_gmm._covars)[0]
                    cvnorm.shape = shape
                    stats['covars'][state] += tmp_gmm._covars * cvnorm
Beispiel #4
0
def test_forward_gmm():
    batch_size = 10
    dimension_embedding = 7
    num_mixtures = 2

    gmm = GMM(num_mixtures, dimension_embedding)
    latent_vectors = torch.randn(batch_size, dimension_embedding)

    gmm.train()
    out = gmm(latent_vectors)
    print(out)
Beispiel #5
0
def main():
    n_components = 2
    x = _get_faithful_data()
    _, n_features = x.shape

    pi_init = np.random.uniform(size=n_components)
    pi_init = pi_init / np.sum(pi_init)
    mean_init = np.random.randn(n_components, n_features)
    cov_init = np.stack([np.random.uniform() * np.eye(n_features),
                         np.random.uniform() * np.eye(n_features)])
    gmm = GMM(n_components, pi_init=pi_init, mean_init=mean_init,
              cov_init=cov_init)
    history = gmm.fit(x).history
    save_history_as_video_file(x, n_components, history, 'gmm_em.mp4')
    def __init__(self, *, rho, mixtures, DecoderType, **kwargs):
        super().__init__(DecoderType=DecoderType, **kwargs)

        self.rho = rho
        self.phi = Phi(0.0, float('inf'))

        self.mixtures = mixtures
        self.MM = GMM(mixtures, self.z_size)
        self.add_tail('MM', Identity(), self._MMLoss, remap=False)

        self._mirror = DecoderType(bottle_size=self.h_size,
                                   data_shape=self.data_shape)

        self.add_cond('disc', self._discriminator_cond_z)
        self.add_tail('disc', self._mirror, self._discriminator_loss)
Beispiel #7
0
    def get_item_pdf(data, ncomps):

        data = npa(data)
        #print(data)
        #pl.scatter(data[:,0],data[:,1])
        gmm = GMM(dim=2, ncomps=ncomps, data=data, method="kmeans")
        gmm.em(data, nsteps=100)
        """
        for item in data: 
                #gmm.pdf(j)
             p=gmm.pdf(item)
           
           print("P(y|θ):",gmm.pdf(item))
          """
        return gmm
Beispiel #8
0
    def __init__(self, k=1, dim=1, means=None, precisions=None,
                 weights=None, shrinkage=None, dof=None):
        """
        Initialize the structure, at least with the dimensions of the problem
        At most, with what is necessary to compute the likelihood of a point
        under the model
        """
        GMM.__init__(self, k, dim, 'full', means, precisions, weights)
        self.shrinkage = shrinkage
        self.dof = dof

        if self.shrinkage==None:
            self.shrinkage = np.ones(self.k)

        if self.dof==None:
            self.dof = np.ones(self.k)
Beispiel #9
0
 def test_implementation(self):
     centers_ = np.array([[3, 3], [-3, -3]])
     pos_list, ground_truth = datasets.make_blobs(n_samples=100,
         centers=centers_, cluster_std=1, random_state=0)
     np.random.seed(2020)
     gmm = GMM(n_components=2)
     gmm.fit(pos_list)
     gmm_standard = mixture.GaussianMixture(n_components=2)
     gmm_standard.fit(pos_list)
     self.assertTrue(np.linalg.norm(gmm.means_ - centers_) < 0.4)
     self.assertAlmostEqual(gmm_standard.lower_bound_, gmm.lower_bound_)
     assert_array_almost_equal(gmm.weights_, [0.5, 0.5])
     covariances_groud_truth = np.zeros([2, 2, 2])
     covariances_groud_truth[0, :, :] = np.eye(2)
     covariances_groud_truth[1, :, :] = np.eye(2)
     self.assertTrue(np.linalg.norm(covariances_groud_truth - gmm.covariances_) < 0.2)
Beispiel #10
0
    def condition(self, indices, x):
        """Conditional distribution over given indices.

        Parameters
        ----------
        indices : array, shape (n_new_features,)
            Indices of dimensions that we want to condition.

        x : array, shape (n_new_features,)
            Values of the features that we know.

        Returns
        -------
        conditional : GMM
            Conditional GMM distribution p(Y | X=x).
        """
        n_features = self.n_dim - len(indices)
        priors = np.empty(self.n_components)
        means = np.empty((self.n_components, n_features))
        covariances = np.empty((self.n_components, n_features, n_features))
        for k in range(self.n_components):
            mvn = MVN(mean=self.means[k],
                      covariance=self.covariance[k],
                      random_state=self.random_state)
            conditioned = mvn.condition(indices, x)
            priors[k] = (self.alpha[k] *
                         mvn.marginalize(indices).to_probability_density(x))
            means[k] = conditioned.mean
            covariances[k] = conditioned.covariance
        priors /= priors.sum()
        return GMM(n_components=self.n_components,
                   priors=priors,
                   means=means,
                   covariances=covariances,
                   random_state=self.random_state)
Beispiel #11
0
    def __init__(self, prec, cap=32):
        """Initialise with the precision matrix to use for the kernels, which implicitly provides the number of dimensions, and the cap on the number of kernels to allow."""
        self.prec = numpy.asarray(prec, dtype=numpy.float32)
        self.gmm = GMM(prec.shape[0], cap)  # Current mixture model.
        self.count = 0  # Number of samples provided so far.

        self.merge = numpy.empty(
            (cap, cap), dtype=numpy.float32
        )  # [i,j]; cost of merging two entrys, only valid when j<i, other values set high to avoid issues.
        self.merge[:, :] = 1e64

        # For holding the temporary merge costs calculated when adding a sample...
        self.mergeT = numpy.empty(cap, dtype=numpy.float32)

        # For the C code...
        self.temp = numpy.empty((2, prec.shape[0], prec.shape[0]),
                                dtype=numpy.float32)
Beispiel #12
0
    def check(self):
        """
        Checking the shape of sifferent matrices involved in the model
        """
        GMM.check(self)

        if self.prior_means.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_means")
        if self.prior_means.shape[1] != self.dim:
            raise ValueError("Incorrect dimension for self.prior_means")
        if self.prior_scale.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_scale")
        if self.prior_scale.shape[1] != self.dim:
            raise ValueError("Incorrect dimension for self.prior_scale")
        if self.prior_dof.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_dof")
        if self.prior_weights.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_weights")
Beispiel #13
0
    def __init__(self):
        compressor = CompressionNetworkArrhythmia()
        estimator = EstimationNetworkArrhythmia()
        # TODO: the GMM module should be part of the estimator....?
        gmm = GMM(num_mixtures=2, dimension_embedding=4)

        super().__init__(compression_module=compressor,
                         estimation_module=estimator,
                         gmm_module=gmm)
Beispiel #14
0
    def check(self):
        """
        Checking the shape of sifferent matrices involved in the model
        """
        GMM.check(self)

        if self.prior_means.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_means")
        if self.prior_means.shape[1] != self.dim:
            raise ValueError("Incorrect dimension for self.prior_means")
        if self.prior_scale.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_scale")
        if self.prior_scale.shape[1] != self.dim:
            raise ValueError("Incorrect dimension for self.prior_scale")
        if self.prior_dof.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_dof")
        if self.prior_weights.shape[0] != self.k:
            raise ValueError("Incorrect dimension for self.prior_weights")
Beispiel #15
0
    def __init__(self, k=1, dim=1, means=None, precisions=None,
                 weights=None, shrinkage=None, dof=None):
        """
        Initialize the structure with the dimensions of the problem
        Eventually provide different terms
        """
        GMM.__init__(self, k, dim, 'full', means, precisions, weights)
        self.shrinkage = shrinkage
        self.dof = dof

        if self.shrinkage == None:
            self.shrinkage = np.ones(self.k)

        if self.dof == None:
            self.dof = np.ones(self.k)

        if self.precisions != None:
            self._detp = [detsh(self.precisions[k]) for k in range(self.k)]
Beispiel #16
0
    def __init__(self, k=1, dim=1, means=None, precisions=None,
                 weights=None, shrinkage=None, dof=None):
        """
        Initialize the structure with the dimensions of the problem
        Eventually provide different terms
        """
        GMM.__init__(self, k, dim, 'full', means, precisions, weights)
        self.shrinkage = shrinkage
        self.dof = dof

        if self.shrinkage == None:
            self.shrinkage = np.ones(self.k)

        if self.dof == None:
            self.dof = np.ones(self.k)

        if self.precisions != None:
            self._detp = [detsh(self.precisions[k]) for k in range(self.k)]
Beispiel #17
0
    def __init__(self, N, A, GMM_kwargs, pi, state_map=None, observation_map=None):
        self.N = N  # kinds of states, int
        # self.M = M  # kinds of observed results, int
        self.A = np.array(A)  # state transferring probability, N * N matrix
        self.B = [GMM(**GMM_kwargs) for _ in range(self.N - 1)]  # pdf of observed results, for each state
        self.pi = pi  # pdf of init state, len N list
        self.K = self.B[0].K

        self.feats_lists = [[] for _ in range(N - 1)]
        self.state_transfer_historys = []
        if state_map is None:
            self.state_map = ['s' + str(i) for i in range(1, N + 1)]
Beispiel #18
0
def initialization(img, bbox, num_components=5, debug=False):
    xmin, ymin, xmax, ymax = bbox
    height, width, _ = img.shape
    alpha = np.zeros((height, width), dtype=np.int8)

    for h in xrange(height):  # Rows
        for w in xrange(width):  # Columns
            if (w >= xmin) and (w <= xmax) and (h >= ymin) and (h <= ymax):
                # Foreground
                alpha[h, w] = 1

    foreground_gmm = GMM(num_components)
    background_gmm = GMM(num_components)

    fg_clusters = foreground_gmm.initialize_gmm(img[alpha == 1])
    bg_clusters = background_gmm.initialize_gmm(img[alpha == 0])

    if debug:
        k = np.ones(alpha.shape, dtype=int) * -1
        k[alpha == 1] = fg_clusters[:]
        k[alpha == 0] = bg_clusters[:]
        visualize_clusters(img.shape, k, alpha)

        plt.imshow(alpha * 265)
        plt.show()
        for i in xrange(alpha.shape[0]):
            for j in xrange(alpha.shape[1]):
                print alpha[i, j],
            print ''

    return alpha, foreground_gmm, background_gmm
def test_tom(opt, test_loader, model, board):
    print('----Testing of module {} started----'.format(opt.name))
    model.to(device)
    model.eval()

    unet_mask = UnetGenerator(25, 20, ngf=64)
    load_checkpoint(unet_mask,
                    os.path.join(opt.checkpoint_dir, 'SEG', 'segm_final.pth'))
    unet_mask.to(device)
    unet_mask.eval()

    gmm = GMM(opt)
    load_checkpoint(gmm,
                    os.path.join(opt.checkpoint_dir, 'GMM', 'gmm_final.pth'))
    gmm.to(device)
    gmm.eval()

    length = len(test_loader.data_loader)
    step = 0
    pbar = tqdm(total=length)

    inputs = test_loader.next_batch()
    while inputs is not None:
        im_name = inputs['im_name']
        im_h = inputs['head'].to(device)
        im = inputs['image'].to(device)
        agnostic = inputs['agnostic'].to(device)
        c = inputs['cloth'].to(device)
        # c_warp = inputs['cloth_warp'].to(device)
        im_c = inputs['parse_cloth'].to(device)
        im_c_mask = inputs['parse_cloth_mask'].to(device)
        im_ttp = inputs['texture_t_prior'].to(device)

        with torch.no_grad():
            output_segm = unet_mask(torch.cat([agnostic, c], 1))
            grid_zero, theta, grid_one, delta_theta = gmm(agnostic, c)
        c_warp = F.grid_sample(c, grid_one, padding_mode='border')
        output_segm = F.log_softmax(output_segm, dim=1)

        output_argm = torch.max(output_segm, dim=1, keepdim=True)[1]
        final_segm = torch.zeros(output_segm.shape).to(device).scatter(
            1, output_argm, 1.0)
        input_tom = torch.cat([final_segm, c_warp, im_ttp], 1)

        with torch.no_grad():
            output_tom = model(input_tom)
        person_r = torch.tanh(output_tom[:, :3, :, :])
        mask_c = torch.sigmoid(output_tom[:, 3:, :, :])
        mask_c = (mask_c >= 0.5).type(torch.float)
        img_tryon = mask_c * c_warp + (1 - mask_c) * person_r

        visuals = [[im, c, img_tryon], [im_c, c_warp, person_r],
                   [im_c_mask, mask_c, im_h]]
        board_add_images(board, 'combine', visuals, step + 1)
        save_images(img_tryon, im_name,
                    osp.join(opt.dataroot, opt.datamode, 'final-output'))

        inputs = test_loader.next_batch()
        step += 1
        pbar.update(1)
Beispiel #20
0
 def __call__(self,
              points,
              K,
              max_iters=100,
              abs_tol=1e-16,
              rel_tol=1e-16,
              **kwargs):
     return GMM()(points,
                  K,
                  max_iters=100,
                  abs_tol=1e-16,
                  rel_tol=1e-16,
                  **kwargs)[1]
Beispiel #21
0
  def __init__(self, prec, cap = 32):
    """Initialise with the precision matrix to use for the kernels, which implicitly provides the number of dimensions, and the cap on the number of kernels to allow."""
    self.prec = numpy.asarray(prec, dtype=numpy.float32)
    self.gmm = GMM(prec.shape[0], cap) # Current mixture model.
    self.count = 0 # Number of samples provided so far.

    self.merge = numpy.empty((cap,cap), dtype=numpy.float32) # [i,j]; cost of merging two entrys, only valid when j<i, other values set high to avoid issues.
    self.merge[:,:] = 1e64

    # For holding the temporary merge costs calculated when adding a sample...
    self.mergeT = numpy.empty(cap, dtype=numpy.float32)

    # For the C code...
    self.temp = numpy.empty((2, prec.shape[0], prec.shape[0]), dtype=numpy.float32)
Beispiel #22
0
def pi_N(N, pi, mean_1, cov_1, mean_2, cov_2, steps):
    gmm = GMM(N, pi, mean_1, cov_1, mean_2, cov_2)
    pi_steps = np.zeros(shape=steps)
    # 均值
    pi_mu = 0
    # 方差
    pi_sigma = 0

    # 计算steps次估计值和均值
    for i in range(steps):
        D = gmm.dataset()
        pi_learn, _, _, _, _ = gmm.EM(D, N)
        pi_mu += pi_learn
        pi_steps[i] = pi_learn

    pi_mu /= steps

    # 计算steps次方差
    for i in range(steps):
        pi_sigma += (pi_steps[i] - pi_mu)**2

    pi_sigma /= steps

    return pi_mu, pi_sigma
Beispiel #23
0
class DAGMM2(nn.Module):
    def __init__(self):
        super().__init__()
        self.gmm = GMM(num_mixtures=3, dimension_embedding=4)

        self.estimator = EstimationNetworkArrhythmia()

    def forward(self, encode, AE, points):
        relative_ed = relative_euclidean_distance(AE, points)
        cosine_sim = cosine_similarity(AE, points)
        # Adding a dimension to prepare for concatenation.
        relative_ed = relative_ed.view(-1, 1)
        cosine_sim = cosine_sim.view(-1, 1)
        latent_vectors = torch.cat([encode, relative_ed, cosine_sim], dim=1)
        # latent_vectors has shape [batch_size, dim_embedding + 2]

        # Updating the parameters of the mixture.
        if self.training:
            mixtures_affiliations = self.estimator(latent_vectors)
            # mixtures_affiliations has shape [batch_size, num_mixtures]
            self.gmm._update_mixtures_parameters(latent_vectors,
                                                 mixtures_affiliations)
        # Estimating the energy of the samples.
        return self.gmm(latent_vectors)
Beispiel #24
0
def main():
    yml_path = sys.argv[1]
    with open(yml_path) as f:
        config = yaml.load(f)

    inlier_classes = config['train_data_params']['labels']
    outlier_classes = [i for i in range(10) if i not in inlier_classes]

    x, y = load_data('test', normalization='tanh', with_label=True)
    y = np.array(y)
    x = tf.constant(x, dtype=tf.float32)

    autoencoder = AutoEncoder(**config['autoencoder_params'])
    estimation_network = EstimationNetwork(**config['estimator_params'])
    gmm = GMM(config['estimator_params']['dense_units'][-1],
              config['autoencoder_params']['latent_dim'] + 1)

    autoencoder.build(input_shape=(1, 32, 32, 1))
    estimation_network.build(
        input_shape=(1, config['autoencoder_params']['latent_dim'] + 1))
    gmm([
        tf.random.normal((1, config['autoencoder_params']['latent_dim'] + 1)),
        tf.random.normal((1, config['estimator_params']['dense_units'][-1]))
    ])

    # tf 2.1.0 doesn't accept
    # gmm.build(input_shape=[(1, config['autoencoder_params']['latent_dim']+1),
    #                        (1, config['estimator_params']['dense_units'][-1])])

    dagmm = DAGMM(autoencoder, estimation_network, gmm)

    dagmm.load_weights(
        os.path.join(config['logdir'], 'model',
                     'dagmm_%d.h5' % config['test_epoch']))
    outputs = dagmm(x, training=False)
    outputs = np.squeeze(np.asarray(outputs))
    inlier_outputs = np.zeros(shape=(0, ))
    for c in inlier_classes:
        inlier_outputs = np.append(inlier_outputs, outputs[y == c])
    df_inlier = pd.DataFrame({'energy': inlier_outputs, 'label': 'inlier'})

    outlier_outputs = np.zeros(shape=(0, ))
    for c in outlier_classes:
        outlier_outputs = np.append(outlier_outputs, outputs[y == c])
    df_outlier = pd.DataFrame({'energy': outlier_outputs, 'label': 'outlier'})

    df = pd.concat([df_inlier, df_outlier], axis=0)
    df.to_csv(os.path.join(config['logdir'], 'outputs.csv'), index=None)
Beispiel #25
0
	def posterior(self, X):
		"""
			The E-step of the EM algorithm. 
			Returns the posterior probability p(y|X)

			Args:
				X (matrix, [n,  d]): Data to compute posterior for.

			Returns:
				Matrix of size		[n, K]
		"""
		P = np.zeros((X.shape[0], self.K))
		for i in range(self.K):
			P[:,i] = GMM.prob(X, self.mu[i], self.Sigma[i])

		return P / P.sum(axis=1, keepdims=True) # Normalize
Beispiel #26
0
def test_update_gmm():
    batch_size = 5
    dimension_embedding = 7
    num_mixtures = 2

    gmm = GMM(num_mixtures, dimension_embedding)

    latent_vectors = np.random.random([batch_size, dimension_embedding])
    latent_vectors = convert_to_var(latent_vectors)

    affiliations = np.random.random([batch_size, num_mixtures])
    affiliations = convert_to_var(affiliations)

    for param in gmm.parameters():
        print(param)

    gmm.train()
    gmm._update_mixtures_parameters(latent_vectors, affiliations)

    for param in gmm.parameters():
        print(param)
Beispiel #27
0
def test_update_gmm():
    batch_size = 10
    dimension_embedding = 7
    num_mixtures = 2

    gmm = GMM(num_mixtures, dimension_embedding)

    latent_vectors = torch.randn(batch_size, dimension_embedding)
    affiliations = torch.nn.functional.softmax(torch.rand(
        batch_size, num_mixtures),
                                               dim=1)

    print('----------parameters before update----------')
    for param in gmm.parameters():
        print(param)

    gmm.train()
    gmm._update_mixtures_parameters(latent_vectors, affiliations)

    print('----------parameters after update----------')
    for param in gmm.parameters():
        print(param)
def main(args):
    df = pd.read_csv(args.data)
    data = np.array(df[['X', 'Y']])
    plt.clf()
    plt.scatter(data[:, 0], data[:, 1], s=3, color='blue')

    gmm = GMM(args.k)
    gmm.fit(data)
    mean = gmm.get_means()
    sigma = gmm.get_covariances()
    pi = gmm.get_pis()

    # Plot ellipses for each of covariance matrices.
    for k in range(len(sigma)):
        w, h, angle = get_ellipse_from_covariance(sigma[k])
        e = patches.Ellipse(mean[k], w, h, angle=angle)
        e.set_alpha(np.power(pi[k], 0.1))
        e.set_facecolor('red')
        plt.axes().add_artist(e)
    plt.savefig('covariances_{}_{}.jpg'.format(args.data, args.name))
    plt.show()
def main():
    opt = parser()

    test_dataset = SieveDataset(opt)
    # create dataloader
    test_loader = SieveDataLoader(opt, test_dataset)

    if opt.name == 'GMM':
        model = GMM(opt)

        # visualization
        if not os.path.exists(
                os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)):
            os.makedirs(
                os.path.join(opt.tensorboard_dir, opt.name, opt.datamode))
        board = SummaryWriter(
            log_dir=os.path.join(opt.tensorboard_dir, opt.name, opt.datamode))

        checkpoint_path = osp.join(opt.checkpoint_dir, opt.name,
                                   'gmm_final.pth')
        load_checkpoint(model, checkpoint_path)
        test_gmm(opt, test_loader, model, board)

    elif opt.name == 'TOM':
        model = UnetGenerator(26, 4, ngf=64)

        # visualization
        if not os.path.exists(
                os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)):
            os.makedirs(
                os.path.join(opt.tensorboard_dir, opt.name, opt.datamode))
        board = SummaryWriter(
            log_dir=os.path.join(opt.tensorboard_dir, opt.name, opt.datamode))

        checkpoint_path = osp.join(opt.checkpoint_dir, opt.name,
                                   'tom_final.pth')
        load_checkpoint(model, checkpoint_path)
        test_tom(opt, test_loader, model, board)
Beispiel #30
0
def main():
    yml_path = sys.argv[1]
    with open(yml_path) as f:
        config = yaml.load(f)
    os.makedirs(config['logdir'], exist_ok=True)
    shutil.copy(yml_path, os.path.join(config['logdir'], 'config.yml'))

    x = load_specific_data(phase='train',
                           **config['train_data_params'])
    autoencoder = AutoEncoder(**config['autoencoder_params'])
    estimation_network = EstimationNetwork(**config['estimator_params'])
    gmm = GMM(config['estimator_params']['dense_units'][-1],
              config['autoencoder_params']['latent_dim']+1)

    dagmm = DAGMM(autoencoder,
                  estimation_network,
                  gmm)

    solver = Solver(dagmm,
                    **config['solver_params'],
                    logdir=config['logdir'])

    solver.fit(x,
               **config['fit_params'])
def main(args):
    df = pd.read_csv(args.data)
    data = np.array(df[['X', 'Y']])
    plt.clf()
    plt.scatter(data[:, 0], data[:, 1], s=3, color='blue')

    gmm = GMM(args.k)
    gmm.fit(data)
    mean = gmm.get_means()
    sigma = gmm.get_covariances()
    pi = gmm.get_pis()

    # Plot ellipses for each of covariance matrices.
    for k in range(len(sigma)):
        #print (sigma[k])
        h, w, angle = get_ellipse_from_covariance(sigma[k])
        e = patches.Ellipse(mean[k], w, h, angle=angle)
        e.set_alpha(np.power(pi[k], .3))
        e.set_facecolor('red')
        plt.axes().add_artist(e)
    plt.savefig('edgar_gmm_sparse.jpg')
    #plt.show()
    winsound.Beep(2500, 1000)
Beispiel #32
0
def generate_3_clusters():
    # generate 3 clusters
    c1 = sample(torch.Tensor([2.5, 2.5]), torch.Tensor([1.2, .8]), 500)
    c2 = sample(torch.Tensor([7.5, 7.5]), torch.Tensor([.75, .5]), 500)
    c3 = sample(torch.Tensor([8, 1.5]), torch.Tensor([.6, .8]), 1000)

    return torch.cat([c1, c2, c3])


if __name__ == '__main__':
    # generate data
    data = generate_3_clusters()
    # 3 components
    K = 3
    # create model
    gm = GMM(data, K=3)
    # training iterations
    iterations = 50
    # early stopping threshold
    thresh = 1e-6

    loss_p = 100000.
    for i in range(iterations):
        # run a step
        loss_c = gm.step()
        print(f'[{i}] Loss : {loss_c}')
        # difference
        if torch.abs(loss_c - loss_p).item() < thresh:
            print('Early Stopping')
            break
        # keep track of previous
        output = F.leaky_relu(self.fc2(F.leaky_relu(self.fc1(x))))
        return output


def energy(X):
    recon = netD(X)
    return torch.mean(torch.sum((X - recon)**2, 1))


args = get_args()

n_samples = args.n_data
n_epochs = args.n_epochs
batch_size = args.batch_size
m = 5
gmm = GMM(n_gaussians=2, dim=2, random_seed=22)
sample_data = torch.Tensor(gmm.sample(n_samples))

dataloader_train = DataLoader(sample_data, batch_size=batch_size)

noise = torch.rand(n_samples, 2)
fixed_noise = torch.rand(n_samples, 2)

netG = Generator(sample_data.numpy())
netD = Discrimator()

if torch.cuda.is_available():
    netG = netG.cuda()
    netD = netD.cuda()
    fixed_noise = fixed_noise.cuda()
Beispiel #34
0
    return np.sqrt(1 / lambda1), np.sqrt(1 / lambda2), angle


################################################################################
# GMM on 2D toy dataset
# The dataset is generated from N gaussian distributions equally spaced on N radius circle.
# Here, N=4
# You should be able to visualize the learnt gaussian distribution in plots folder
# Complete implementation of fit function for GMM class in gmm.py
################################################################################
x, y = toy_dataset(4, 100)
init = ['k_means', 'random']

for i in init:
    n_cluster = 4
    gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-6)
    iterations = gmm.fit(x)
    ll = gmm.compute_log_likelihood(x)

    assert gmm.means.shape == (
        n_cluster, 2), 'means should be numpy array with {}X2 shape'.format(n_cluster)

    assert gmm.variances.shape == (
        n_cluster, 2, 2), 'variances should be numpy array with {}X2X2 shape'.format(n_cluster)

    assert gmm.pi_k.shape == (
        n_cluster,), 'pi_k should be numpy vector of size'.format(n_cluster)

    assert iterations > 0 and type(
        iterations) == int, 'Number of updates should be positive integer'
Beispiel #35
0
def main():
	#log = open(LOG_FILE, "w")
	#sys.stdout = log
	#use train data to train GMM
	points = np.genfromtxt(TRAIN_FILE, usecols=(0,1))
	label = np.genfromtxt(TRAIN_FILE, usecols=2)
	w1 = (label == 1)

	'''u1 = np.array([[ 0.92016682, -0.53710902],
 				[-0.82053379, -0.52580246],
 				[ 2.27051023, -0.8221437 ],
 				[ 0.67995665, -0.57743096]])
	u2 = np.array([[1.50122208, 1.65573219],
	 				[0.65636359, 0.23047148],
	  				[2.14053852, -0.08155318],
	   				[2.73604834, 0.3522032]])
	sigma = np.empty((4,2,2))
	sigma[:] = np.eye(2)'''

	#initialize 2 GMMs
	#gmm_1 = GMM(4,2,np.ones(4)*0.25, u1, sigma)
	#gmm_2 = GMM(4,2,np.ones(4)*0.25, u2, sigma)
	gmm_1 = GMM(4,2)
	gmm_2 = GMM(4,2)

	#train
	print "---------------GMM_1------------------"
	gmm_1.train(points[w1])
	print gmm_1
	print "---------------GMM_2------------------"
	gmm_2.train(points[~w1])
	print gmm_2

	#visualization 2 GMMs
	#plt.figure()
	#gmm_1.draw()
	#plt.show()

	#plt.figure()
	#gmm_2.draw()
	#plt.show()

	#use dev data to classify and compute accuracy
	devs = np.genfromtxt(DEV_FILE, usecols=(0,1))
	dev_la = np.genfromtxt(DEV_FILE, usecols=2)
	re = (dev_la == 1)

	p_1 = gmm_1.predict(devs)
	p_2 = gmm_2.predict(devs)
	pr = ((p_1 > p_2) == re)
	accuracy = np.count_nonzero(pr) * 1.0 / len(pr) 
	print "dev data classify accuracy is", accuracy

	#use test data to classify
	tests = np.genfromtxt(TEST_FILE, usecols=(0,1))
	t_1 = gmm_1.predict(tests)
	t_2 = gmm_2.predict(tests)

	result = [1 if t_1[i] > t_2[i] else 2 for i in range(len(t_1))]
	f = open(RESULT_FILE, "w")
	for i in range(len(tests)):
		line = "%.6f %.6f  %d\n" % (tests[i, 0], tests[i, 1], result[i])
		f.write(line)
	f.close()
Beispiel #36
0
    chi_gt = arr([[1, 1, 0],[1, 0, 1]]) + 1e-2
    chi_gt = Normalize(chi_gt, 's1', 'row')[0]
    mu_gt = arr([[-1, -1], [1, -1], [0, 1]])
    sigma_gt = 3e-2

    # simulate data
    y_gt = randm(pi_gt, M)
    theta_gt = chi_gt[y_gt]
    N = random.poisson(mean_N, M)
#    N = random.exponential(mean_N, M)
    cN = cat(([0], cumsum(N)))
    z_gt = zeros(cN[-1])
    X = zeros((cN[-1], mu_gt.shape[1]))
    group_id = zeros(cN[-1], int32)
    g_type = zeros(M)
    gmm = GMM(theta_gt[0], mu_gt, sigma_gt);
    for m in range(M):
        gmm.priors = theta_gt[m]
        X[cN[m]:cN[m+1]], z_gt[cN[m]:cN[m+1]] = gmm.GenerateSample(N[m])
        group_id[cN[m]:cN[m+1]] = m

    # anomalies
    for ind in range(N_bad_instance):
        m =RI(1, M)
        g_type[m] = 1
        X[group_id == m] = random.randn((group_id == m).sum(), 
                                    mu_gt.shape[1])*0.5
    for ind in range(N_bad_group):
        m =RI(1, M)
        g_type[m] = 2
Beispiel #37
0
import pylab as pl
from normal import Normal
from gmm import GMM
from plot_normal import draw2dnormal
from plot_gmm import draw2dgmm

if False:
    fp = open("../data/faithful.txt")
    data = []
    for line in fp.readlines():
        x,y = line.split()
        data.append([float(x),float(y)])

    data = npa(data)
    pl.scatter(data[:,0],data[:,1])
    gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans")

    #x = Normal(2, data=data)
    #draw2dnormal(x,show=True,axes=pl.gca())
    print gmm
    draw2dgmm(gmm)
    pl.show()

if False:

    from test_func import noisy_cosine

    x,y = noisy_cosine()
    data = np.vstack([x,y]).transpose()
    pl.scatter(data[:,0],data[:,1])
Beispiel #38
0
class KDE_INC:
  """Provides an incrimental kernel density estimate system that uses Gaussians. A kernel density estimate system with Gaussian kernels that, on reaching a cap, starts merging kernels to limit the number of kernels to a constant - done in such a way as to minimise error whilst capping computation. (Computation is quite high however - this is not a very efficient implimentation.)"""
  def __init__(self, prec, cap = 32):
    """Initialise with the precision matrix to use for the kernels, which implicitly provides the number of dimensions, and the cap on the number of kernels to allow."""
    self.prec = numpy.asarray(prec, dtype=numpy.float32)
    self.gmm = GMM(prec.shape[0], cap) # Current mixture model.
    self.count = 0 # Number of samples provided so far.

    self.merge = numpy.empty((cap,cap), dtype=numpy.float32) # [i,j]; cost of merging two entrys, only valid when j<i, other values set high to avoid issues.
    self.merge[:,:] = 1e64

    # For holding the temporary merge costs calculated when adding a sample...
    self.mergeT = numpy.empty(cap, dtype=numpy.float32)

    # For the C code...
    self.temp = numpy.empty((2, prec.shape[0], prec.shape[0]), dtype=numpy.float32)

  def setPrec(self, prec):
    """Changes the precision matrix - must be called before any samples are added, and must have the same dimensions as the current one."""
    self.prec = numpy.asarray(prec, dtype=numpy.float32)


  def samples(self):
    """Returns how many samples have been added to the object."""
    return self.count

  def prob(self, sample):
    """Returns the probability of the given sample - must not be called until at least one sample has been added, though it will return a positive constant if called with no samples provided."""
    if self.count!=0: return self.gmm.prob(sample)
    else: return 1.0

  def nll(self, sample):
    """Returns the negative log liklihood of the given sample - must not be called until at least one sample has been added, though it will return a positive constant if called with no samples provided."""
    if self.count!=0: return self.gmm.nll(sample)
    else: return 0.0


  def __merge(self, weightA, meanA, precA, weightB, meanB, precB):
    """Merges two Gaussians and returns the merged result, as (weight, mean, prec)"""
    newWeight = weightA + weightB
    newMean = weightA/newWeight * meanA + weightB/newWeight * meanB

    deltaA = meanA - newMean
    covA = numpy.linalg.inv(precA) + numpy.outer(deltaA, deltaA)

    deltaB = meanB - newMean
    covB = numpy.linalg.inv(precB) + numpy.outer(deltaB, deltaB)

    newCov = weightA/newWeight * covA + weightB/newWeight * covB
    newPrec = numpy.linalg.inv(newCov)

    return (newWeight, newMean, newPrec)


  def __calcMergeCost(self, weightA, meanA, precA, weightB, meanB, precB):
    """Calculates and returns the cost of merging two Gaussians."""
    # (For anyone wondering about the fact we are comparing them against each other rather than against the result of merging them that is because this way tends to get better results.)

    # The log determinants and delta...
    logDetA = math.log(numpy.linalg.det(precA))
    logDetB = math.log(numpy.linalg.det(precB))
    delta = meanA - meanB

    # Kullback-Leibler of representing A using B...
    klA = logDetB - logDetA
    klA += numpy.trace(numpy.dot(precB, numpy.linalg.inv(precA)))
    klA += numpy.dot(numpy.dot(delta, precB), delta)
    klA -= precA.shape[0]
    klA *= 0.5

    # Kullback-Leibler of representing B using A...
    klB = logDetA - logDetB
    klB += numpy.trace(numpy.dot(precA, numpy.linalg.inv(precB)))
    klB += numpy.dot(numpy.dot(delta, precA), delta)
    klB -= precB.shape[0]
    klB *= 0.5

    # Return a weighted average...
    return weightA * klA + weightB * klB


  def add(self, sample):
    """Adds a sample, updating the kde accordingly."""
    global weave

    try:
      weave = None # Below code is actually slowing things down. Am disabling for now.
      if weave==None: raise Exception()
      support =  matrix_code + start_cpp() + """
      // Note - designed so that A and Out pointers can be the same.
       void doMerge(int size, float weightA, float * meanA, float * precA, float weightB, float * meanB, float * precB, float & weightOut, float * meanOut, float * precOut, float * tVec, float * tMat1, float * tMat2)
       {
        // Handle the weight, recording the ratios needed next...
         float wOut = weightA + weightB;
         float ratioA = weightA/wOut;
         float ratioB = weightB/wOut;
         weightOut = wOut;

        // Do the mean - simply a weighted average - store in a temporary for now...
         for (int i=0; i<size; i++)
         {
          tVec[i] = ratioA * meanA[i] + ratioB * meanB[i];
         }

        // Put the covariance of precision A into tMat1...
         for (int i=0; i<size*size; i++) tMat2[i] = precA[i];
         Inverse(tMat2, tMat1, size);

        // Add the outer product of the A delta into tMat1...
         for (int r=0; r<size; r++)
         {
          for (int c=0; c<size; c++)
          {
           tMat1[r*size + c] += (meanA[c] - tVec[c]) * (meanA[r] - tVec[r]);
          }
         }

        // Put the covariance of precision B into tMat2...
         for (int i=0; i<size*size; i++) precOut[i] = precB[i];
         Inverse(precOut, tMat2, size);

        // Add the outer product of the B delta into tMat2...
         for (int r=0; r<size; r++)
         {
          for (int c=0; c<size; c++)
          {
           tMat2[r*size + c] += (meanB[c] - tVec[c]) * (meanB[r] - tVec[r]);
          }
         }

        // Get the weighted average of the covariance matrices into tMat1...
         for (int i=0; i<size*size; i++)
         {
          tMat1[i] = ratioA * tMat1[i] + ratioB * tMat2[i];
         }

        // Dump the inverse of tMat1 into the output precision...
         Inverse(tMat1, precOut, size);

        // Copy from the temporary mean into the output mean...
         for (int i=0; i<size; i++) meanOut[i] = tVec[i];
       }

      float mergeCost(int size, float weightA, float * meanA, float * precA, float weightB, float * meanB, float * precB, float * tVec1, float * tVec2, float * tMat1, float * tMat2)
      {
       // Calculate some shared values...
        float logDetA = log(Determinant(precA, size));
        float logDetB = log(Determinant(precB, size));

        for (int i=0; i<size; i++)
        {
         tVec1[i] = meanA[i] - meanB[i];
        } // tVec1 now contains the delta.

       // Calculate the Kullback-Leibler divergance of substituting B for A...
        float klA = logDetB - logDetA;

        for (int i=0; i<size*size; i++) tMat1[i] = precA[i];
        if (Inverse(tMat1, tMat2, size)==false) return 0.0;
        for (int i=0; i<size; i++)
        {
         for (int j=0; j<size; j++)
         {
          klA += precB[i*size + j] * tMat2[j*size + i];
         }
        }

        for (int i=0; i<size; i++)
        {
         tVec2[i] = 0.0;
         for (int j=0; j<size; j++)
         {
          tVec2[i] += precB[i*size + j] * tVec1[j];
         }
        }
        for (int i=0; i<size; i++) klA += tVec1[i] * tVec2[i];
        klA -= size;
        klA *= 0.5;

       // Calculate the Kullback-Leibler divergance of substituting A for B...
        float klB = logDetA - logDetB;

        for (int i=0; i<size*size; i++) tMat1[i] = precB[i];
        if (Inverse(tMat1, tMat2, size)==false) return 0.0;
        for (int i=0; i<size; i++)
        {
         for (int j=0; j<size; j++)
         {
          klB += precA[i*size + j] * tMat2[j*size + i];
         }
        }

        for (int i=0; i<size; i++)
        {
         tVec2[i] = 0.0;
         for (int j=0; j<size; j++)
         {
          tVec2[i] += precA[i*size + j] * tVec1[j];
         }
        }
        for (int i=0; i<size; i++) klB += tVec1[i] * tVec2[i];
        klB -= size;
        klB *= 0.5;

       // Return a weighted average of the divergances...
        return weightA * klA + weightB * klB;
      }
      """

      code = start_cpp(support) + """
      if (count < Nweight[0])
      {
       // Pure KDE mode - just add the kernel...
        for (int i=0; i<Nsample[0]; i++)
        {
         MEAN2(count, i) = sample[i];
        }

        for (int i=0; i<Nsample[0]; i++)
        {
         for (int j=0; j<Nsample[0]; j++)
         {
          PREC3(count, i, j) = BASEPREC2(i, j);
         }
        }

        assert(Sprec[0]==sizeof(float));
        assert(Sprec[1]==sizeof(float)*Nsample[0]);
        log_norm[count]  = 0.5 * log(Determinant(&PREC3(count, 0, 0), Nsample[0]));
        log_norm[count] -= 0.5 * Nsample[0] * log(2.0*M_PI);

        float w = 1.0 / (count+1);
        for (int i=0; i<=count; i++)
        {
         weight[i] = w;
        }

       // If the next sample will involve merging then we need to fill in the merging costs cache in preperation...
        if (count+1==Nweight[0])
        {
         for (int i=0; i<Nweight[0]; i++)
         {
          for (int j=0; j<i; j++)
          {
           MERGE2(i, j) = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[j], &MEAN2(j,0), &PREC3(j,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));
          }
         }
        }
      }
      else
      {
       // We have the maximum number of kernels - need to either merge the new kernel with an existing one, or merge two existing kernels and use the freed up slot for the new kernel...

       // Update the weights, and calculate the weight of the new kernel...
        float adjust = float(count) / float(count+1);

        for (int i=0; i<Nweight[0]; i++) weight[i] *= adjust;
        for (int i=0; i<Nweight[0]; i++)
        {
         for (int j=0; j<i; j++) MERGE2(i, j) *= adjust;
        }

        float w = 1.0 / float(count + 1.0);

       // Calculate the costs of merging the new kernel with each of the old kernels...
        for (int i=0; i<Nweight[0]; i++)
        {
         mergeT[i] = mergeCost(Nsample[0], w, sample, basePrec, weight[i], &MEAN2(i,0), &PREC3(i,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));
        }

       // Find the lowest merge cost and act accordingly - either we are merging the new kernel with an old one or merging two existing kernels and putting the new kernel in on its own...
        int lowI = 1;
        int lowJ = 0;

        for (int i=0; i<Nweight[0]; i++)
        {
         for (int j=0; j<i; j++)
         {
          if (MERGE2(i, j) < MERGE2(lowI, lowJ))
          {
           lowI = i;
           lowJ = j;
          }
         }
        }

        int lowN = 0;

        for (int i=1; i<Nweight[0]; i++)
        {
         if (mergeT[i] < mergeT[lowN]) lowN = i;
        }

        if (mergeT[lowN] < MERGE2(lowI, lowJ))
        {
         // We are merging the new kernel with an existing kernel...

         // Do the merge...
          doMerge(Nsample[0], weight[lowN], &MEAN2(lowN,0), &PREC3(lowN,0,0), w, sample, basePrec, weight[lowN], &MEAN2(lowN,0), &PREC3(lowN,0,0), &TEMP2(0,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));

         // Update the normalising constant...
          log_norm[lowN]  = 0.5 * log(Determinant(&PREC3(lowN, 0, 0), Nsample[0]));
          log_norm[lowN] -= 0.5 * Nsample[0] * log(2.0*M_PI);

         // Update the array of merge costs...
          for (int i=0; i<Nweight[0]; i++)
          {
           if (i!=lowN)
           {
            float mc = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[lowN], &MEAN2(lowN,0), &PREC3(lowN,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));

            if (i<lowN) MERGE2(lowN, i) = mc;
                   else MERGE2(i, lowN) = mc;
           }
          }
        }
        else
        {
         // We are merging two existing kernels then putting the new kernel into the freed up spot...

         // Do the merge...
          doMerge(Nsample[0], weight[lowI], &MEAN2(lowI,0), &PREC3(lowI,0,0), weight[lowJ], &MEAN2(lowJ,0), &PREC3(lowJ,0,0), weight[lowI], &MEAN2(lowI,0), &PREC3(lowI,0,0), &TEMP2(0,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));

         // Copy in the new kernel...
          weight[lowJ] = w;
          for (int i=0; i<Nsample[0]; i++) MEAN2(lowJ,i) = sample[i];
          for (int i=0; i<Nsample[0];i++)
          {
           for (int j=0; j<Nsample[0]; j++)
           {
            PREC3(lowJ,i,j) = basePrec[i*Nsample[0] + j];
           }
          }

         // Update both normalising constants...
          log_norm[lowI]  = 0.5 * log(Determinant(&PREC3(lowI, 0, 0), Nsample[0]));
          log_norm[lowI] -= 0.5 * Nsample[0] * log(2.0*M_PI);

          log_norm[lowJ]  = 0.5 * log(Determinant(&PREC3(lowJ, 0, 0), Nsample[0]));
          log_norm[lowJ] -= 0.5 * Nsample[0] * log(2.0*M_PI);

         // Update the array of merge costs...
          for (int i=0; i<Nweight[0]; i++)
          {
           if (i!=lowI)
           {
            float mc = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[lowI], &MEAN2(lowI,0), &PREC3(lowI,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));

            if (i<lowI) MERGE2(lowI, i) = mc;
                   else MERGE2(i, lowI) = mc;
           }
          }

          for (int i=0; i<Nweight[0]; i++)
          {
           if ((i!=lowI)&&(i!=lowJ))
           {
            float mc = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[lowJ], &MEAN2(lowJ,0), &PREC3(lowJ,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0));

            if (i<lowJ) MERGE2(lowJ, i) = mc;
                   else MERGE2(i, lowJ) = mc;
           }
          }
        }
      }
      """

      sample = numpy.asarray(sample, dtype=numpy.float32).flatten()
      basePrec = self.prec
      count = self.count
      merge = self.merge
      mergeT = self.mergeT
      tempPrec = self.temp

      weight = self.gmm.weight
      mean = self.gmm.mean
      prec = self.gmm.prec
      log_norm = self.gmm.log_norm
      temp = self.gmm.temp

      weave.inline(code, ['sample', 'basePrec', 'count', 'merge', 'mergeT', 'tempPrec', 'weight', 'mean', 'prec', 'log_norm', 'temp'], support_code = support)
      self.count += 1

    except Exception, e:
      if weave!=None:
        print e
        weave = None

      if self.count<self.gmm.weight.shape[0]:
        # Pure kde phase...
        self.gmm.mean[self.count,:] = numpy.asarray(sample, dtype=numpy.float32)
        self.gmm.prec[self.count,:,:] = self.prec
        self.gmm.calcNorm(self.count)

        self.count += 1
        self.gmm.weight[:self.count] = 1.0 / float(self.count)

        if self.count==self.gmm.weight.shape[0]:
          # Next sample starts merging - need to prepare by filling in the kl array...
          # (Below is grossly inefficient - calculates the same things more times than is possibly funny. I'll optimise it if I ever decide that I care enough to do so.)
          for i in xrange(self.merge.shape[0]):
            for j in xrange(i):
              self.merge[i,j] = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[j], self.gmm.mean[j,:], self.gmm.prec[j,:,:])
      else:
        # Merging phase...
        sample = numpy.asarray(sample, dtype=numpy.float32)

        # Adjust weights...
        adjust = float(self.count) / float(self.count+1)
        self.gmm.weight *= adjust
        for i in xrange(self.merge.shape[0]): self.merge[i,:i] *= adjust

        self.count += 1
        weight = 1.0 / float(self.count)

        # Calculate the merging costs for the new kernel versus the old kernels...
        for i in xrange(self.merge.shape[0]):
          self.mergeT[i] = self.__calcMergeCost(weight, sample, self.prec, self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:])

        # Select the best merge - it either involves the new sample or it does not...
        bestOld = numpy.unravel_index(numpy.argmin(self.merge), self.merge.shape)
        bestNew = numpy.argmin(self.mergeT)
        if self.mergeT[bestNew] < self.merge[bestOld]:
          # Easy scenario - new kernel is being merged with an existing kernel - not too much fiddling involved...

          # Do the merge...
          newWeight, newMean, newPrec = self.__merge(weight, sample, self.prec, self.gmm.weight[bestNew], self.gmm.mean[bestNew,:], self.gmm.prec[bestNew,:,:])

          # Store the result...
          self.gmm.weight[bestNew] = newWeight
          self.gmm.mean[bestNew,:] = newMean
          self.gmm.prec[bestNew,:,:] = newPrec
          self.gmm.calcNorm(bestNew)

          # Update the merge weights...
          for i in xrange(self.merge.shape[0]):
            if i!=bestNew:
              cost = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[bestNew], self.gmm.mean[bestNew,:], self.gmm.prec[bestNew,:,:])
              if i<bestNew: self.merge[bestNew,i] = cost
              else: self.merge[i,bestNew] = cost

        else:
          # We are merging two old kernels, and then putting the new kernel into the slot freed up - this is extra fiddly...
          # Do the merge...
          newWeight, newMean, newPrec = self.__merge(self.gmm.weight[bestOld[0]], self.gmm.mean[bestOld[0],:], self.gmm.prec[bestOld[0],:,:], self.gmm.weight[bestOld[1]], self.gmm.mean[bestOld[1],:], self.gmm.prec[bestOld[1],:,:])

          # Store the result, put the new component in the other slot...
          self.gmm.weight[bestOld[0]] = newWeight
          self.gmm.mean[bestOld[0],:] = newMean
          self.gmm.prec[bestOld[0],:,:] = newPrec
          self.gmm.calcNorm(bestOld[0])

          self.gmm.weight[bestOld[1]] = weight
          self.gmm.mean[bestOld[1],:] = sample
          self.gmm.prec[bestOld[1],:,:] = self.prec
          self.gmm.calcNorm(bestOld[1])

          # Update the merge weights for both the merged and new kernels...
          for i in xrange(self.merge.shape[0]):
            if i!=bestOld[0]:
              cost = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[bestOld[0]], self.gmm.mean[bestOld[0],:], self.gmm.prec[bestOld[0],:,:])
              if i<bestOld[0]: self.merge[bestOld[0],i] = cost
              else: self.merge[i,bestOld[0]] = cost

          for i in xrange(self.merge.shape[0]):
            if i!=bestOld[0] and i!=bestOld[1]:
              cost = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[bestOld[1]], self.gmm.mean[bestOld[1],:], self.gmm.prec[bestOld[1],:,:])
              if i<bestOld[1]: self.merge[bestOld[1],i] = cost
              else: self.merge[i,bestOld[1]] = cost