예제 #1
0
	def __init__(self, emulated, LEDGrided, LCDed, keyboardhacked, touch):
		self.qOut= Queue.Queue(maxsize=0)
		self.qIn = Queue.Queue(maxsize=0)
		
		self.lowerBoarder = 7
		self.res = 8
		
		self.mapping = {8:7,4:3,2:1,1:0}
		

		self.emulated = emulated
		self.LEDGrided = LEDGrided
		self.LCDed = LCDed
		self.keyboardhacked = keyboardhacked
		self.touch = touch


		if self.LEDGrided or self.LCDed or self.keyboardhacked:
			self.i2chandler = i2cHandler.handler(self.qOut, self.LEDGrided, self.LCDed, self.keyboardhacked, self.touch)
		
		if self.emulated or self.keyboardhacked:
			import em
			print "Emulated"
			self.qOutEmulated = Queue.Queue(maxsize=0)
			self.emu = em.em(self.qOutEmulated, self.qIn)

		if self.LEDGrided or self.keyboardhacked:
			self.i2chandler.resetPacketCount()
		
		if self.keyboardhacked:
			print "Keyboardhacked"
			self.clearScreen(5)

		if self.LEDGrided:
			self.clearScreen(width*length)
def em_experiment(clf, X_tr, y_tr, X_te, y_te, n_classes, multi_class=False):
    mlb = MultiLabelBinarizer(classes=range(n_classes))
    mlb.fit(np.expand_dims(np.hstack((y_tr, y_te)), 1))
    y_tr_bin = mlb.transform(np.expand_dims(y_tr, 1))
    y_te_bin = mlb.transform(np.expand_dims(y_te, 1))
    train_priors = np.mean(y_tr_bin, 0)
    test_priors = np.mean(y_te_bin, 0)

    clf = init_classifiers(clf)
    print("Fitting", clf)
    clf.fit(X_tr, y_tr)
    test_posteriors = clf.predict_proba(X_te)
    posteriors_test_priors = np.mean(test_posteriors, axis=0)

    print('train priors', train_priors, sep='\n')
    print('test priors', test_priors, sep='\n')
    print('posteriors mean', posteriors_test_priors, sep='\n')
    print()

    em_test_posteriors, em_test_priors, history = em(y_te,
                                                     test_posteriors,
                                                     train_priors,
                                                     multi_class=multi_class)
    measures = get_measures_from_singlehist_measures(history)

    print('Results')
    print('prior from:   train test  post  em')
    for i, (a, b, c, d) in enumerate(
            zip(train_priors, test_priors, posteriors_test_priors,
                em_test_priors)):
        print(f'{i:11d} - {a:3.3f} {b:3.3f} {c:3.3f} {d:3.3f}')

    return measures
예제 #3
0
def cluster(X, K, divergence, debug=False):
    if divergence == 'KL':
        dist_cls = distributions.KL
        if np.any(X <= 0):  # for MFCCs...
            X = X - X.min() + 1e-8
        X = 5. * X / X.sum(1)[:,nax]
    elif divergence == 'IS':
        dist_cls = distributions.ItakuraSaito
        if np.any(X <= 0):  # for MFCCs...
            X = X - X.min() + 1e-8
        # X = X / X.sum(1)[:,nax]
    elif divergence == 'EU':
        dist_cls = distributions.SquareDistance
    else:
        print 'Wrong divergence'
        sys.exit(0)

    assignments, centroids, _ = kmeans.kmeans_best_of_n(X, K, n_trials=10,
            dist_cls=dist_cls, debug=debug)
    init_pi = np.ones(K) / K
    init_obs_distr = centroids

    tau_em, obs_distr, pi, em_ll_train, _ = em.em(X, centroids, n_iter=10)
    # tau_hmm, A, obs_distr, pi, ll_train, _ = hmm.em_hmm(X, init_pi, init_obs_distr, n_iter=10)
    # seq_hmm, _ = hmm.viterbi(X, pi, A, obs_distr)
    Tracer()()

    return {'kmeans': assignments,
            'EM': np.argmax(tau_em, axis=1),
            # 'hmm_smoothing': np.argmax(tau_hmm, axis=1),
            # 'hmm_viterbi': seq_hmm,
           }
예제 #4
0
파일: bo_em.py 프로젝트: imoonkey/embo
def main(job_id, params):
    num_runs = 20
    obs_length = 100
    num_states = 2
    num_obs = 2

    # readin hmm indx
    t = 0
    try:
        with open(os.path.join('.', 'hmm_index.txt')) as hmm_index_file:
            t = int(hmm_index_file.read())
        sys.stderr.write("!!!!!!!!!!!!!!!!!!HMM INDEX:  " + str(t) + "   !!!!!!!!!!!!!!!\n")
    except IOError:
        t = 0


    # generate HMM observations
    np.random.seed(0x6b6c26b2)
    seeds = np.random.randint(0x0fffffff, size=num_runs)
    np.random.seed(seeds[t])
    # random hmm
    z_mat, t_mat = random_hmm(num_states, num_obs)
    pi_vec = np.array([1.0 / num_states] * num_states)
    hmm_test = HMM(z_mat, t_mat, pi_vec)
    # random obs trajectory
    obs = hmm_test.generate(obs_length)[np.newaxis, :]

    # calculate log likelihood for input HMM parameters
    z_mat_p_input = np.array([[params['z_mat_p_0'][0], params['z_mat_p_1'][0]]])
    t_mat_p_input = np.array([[params['t_mat_p_0'][0], params['t_mat_p_1'][0]]])
    # pi_vec_input = np.array([params['pi_0'], 1 - params['pi_0']])
    hmm_estimate = make_parameterized_HMM(z_mat_p_input, t_mat_p_input, pi_vec)
    hmm_loglikelihood = hmm_estimate.loglikelihood(obs[0])

    # use the current suggest point and run EM to get a new point
    hmm_em_est, _, _ = em.em(hmm_estimate, hmm_estimate.z_mat, hmm_estimate.t_mat, obs, 30, 0.1)
    em_est_z_mat, em_est_t_mat = retrieve_parameterized_HMM(hmm_em_est)
    em_est_ll = -hmm_em_est.loglikelihood(obs[0])
    em_est_z_mat.reshape((em_est_z_mat.size,))
    em_est_t_mat.reshape((em_est_t_mat.size,))
    print em_est_t_mat
    print em_est_z_mat
    historical_points = [{'params': {}}]
    # write z_mat
    for i, v in enumerate(em_est_z_mat[0]):
        historical_points[0]['params']['z_mat_p_' + str(i)] = {'values': np.array([v]), 'type': 'float'}
    # write t_mat
    for i, v in enumerate(em_est_t_mat[0]):
        historical_points[0]['params']['t_mat_p_' + str(i)] = {'values': np.array([v]), 'type': 'float'}
    historical_points[0]['value'] = em_est_ll
    dump_new_history('.', historical_points)
    return -hmm_loglikelihood
def em_random_restarts(num_restarts, num_clusters, parameters_df):
    ll = None
    clusters = None

    for i in range(num_restarts):
        # Initialize cluster statistics
        new_clusters = [cl.Cluster() for j in range(num_clusters)]
        cl.initialize_clusters(new_clusters, parameters_df)

        # Run EM & Get LL Value
        new_ll = em.em(parameters_df, new_clusters)

        # Save Best LL Value and its clusters
        if ll is None or new_ll > ll:
            ll = new_ll
            clusters = new_clusters

    return ll, clusters
예제 #6
0
def main():
    # Load image
    im = Image.open(image_file).convert('RGB')
    width, height = im.size

    # Convenience function to build image band-by-band from array data
    def image_from_array(dat):
        bands = [Image.new('L', (width, height)) for n in range(3)]
        for i in range(3):
            bands[i].putdata(dat[:, i])
        return Image.merge('RGB', bands)

    # Resize image
    width, height = int(width / image_rescale), int(height / image_rescale)
    im = im.resize((width, height))

    # Summary image
    summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60),
                        (255, 255, 255))
    draw = ImageDraw.Draw(summary)
    draw.text((5, height + 10), 'Original', fill=(0, 0, 0))
    draw.text((width + 25, height + 10),
              'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov),
              fill=(0, 0, 0))
    draw.text((5, 2 * height + 40), 'Blocked Gamma', fill=(0, 0, 0))
    draw.text((width + 25, 2 * height + 40), 'Dists', fill=(0, 0, 0))
    del draw
    summary.paste(im, (10, 10))

    # Flatten to emissions
    real_emissions = list(im.getdata())
    num_data = len(real_emissions)
    real_emissions = np.array(real_emissions)

    # Block emissions
    width_blocks = np.array_split(np.arange(width), block_splits)
    height_blocks = np.array_split(np.arange(height), block_splits)
    idx = np.arange(num_data)
    idx.resize((height, width))
    blocks = []
    for hb in height_blocks:
        for wb in width_blocks:
            block = [idx[h, w] for h in hb for w in wb]
            blocks.append(np.array(block))

    # Generate noise
    v, c = noise_var, noise_cov
    cov = [[v, c, c], [c, v, c], [c, c, v]]
    noise = np.random.multivariate_normal([0, 0, 0], cov, width * height)
    noisy_emissions = real_emissions + noise

    # Generate noisy image
    noisy = image_from_array(noisy_emissions)
    summary.paste(noisy, (30 + width, 10))

    # Use K-means to initialize components
    results = kmeans(noisy_emissions, num_comps)
    init_gamma = results['best']
    means = results['means']

    # Analyze color space
    if do_colormap:
        col = {'R': 0, 'G': 1, 'B': 2}
        plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            plt.subplot(2, 3, i + 1)
            plt.hexbin(d[:, col[c1]],
                       d[:, col[c2]],
                       gridsize=30,
                       extent=(0, 255, 0, 255))
            plt.plot(means[:, col[c1]], means[:, col[c2]], '.k')
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        plt.savefig('image_test_color_colormap.png')
        plt.show()

    # Do EM
    results = em(noisy_emissions,
                 [MultivariateNormal() for n in range(num_comps)],
                 count_restart=count_restart,
                 blocks=blocks,
                 max_reps=100,
                 init_gamma=init_gamma,
                 trace=True,
                 pi_max=pi_max)
    dists = results['dists']
    dists_trace = results['dists_trace']
    pi = results['pi']
    print 'Iterations: %(reps)d' % results

    gamma = np.transpose(results['gamma'])
    means = np.array([d.mean() for d in dists])
    covs = np.array([d.cov() for d in dists])

    # Reconstruct with blocked gamma
    rec_blocked_gamma = np.array(
        [np.average(means, weights=g, axis=0) for g in gamma])
    im_blocked_gamma = image_from_array(rec_blocked_gamma)
    summary.paste(im_blocked_gamma, (10, 40 + height))

    # Reconstruct from distributions alone
    pi_opt = pi_maximize(noisy_emissions, dists)
    phi = np.empty((num_data, num_comps))
    for c in range(num_comps):
        phi[:, c] = dists[c].density(noisy_emissions)
    phi = np.matrix(phi)
    for i, pi in enumerate(pi_opt):
        phi[:, i] *= pi
    gamma_dists = phi / np.sum(phi, axis=1)
    rec_dists = np.array(np.dot(gamma_dists, means))
    im_dists = image_from_array(rec_dists)
    summary.paste(im_dists, (30 + width, 40 + height))

    # Show summary image
    if show_summary:
        summary.show()
    summary.save('image_test_color_reconstruction.png')

    # Compare RMSE between reconstructions
    def rmse(x):
        return np.sqrt(np.mean((x - real_emissions)**2))

    print 'Raw MSE: %.1f' % rmse(noisy_emissions)
    print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma)
    print 'Dists MSE: %.1f' % rmse(rec_dists)

    # Visualize variance components
    if do_variance_viz:
        temp_files = []
        col = {'R': 0, 'G': 1, 'B': 2}
        fig = plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            ax = fig.add_subplot(2, 3, i + 1)
            plt.hexbin(d[:, col[c1]],
                       d[:, col[c2]],
                       gridsize=30,
                       extent=(0, 255, 0, 255))
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        for idx, dists in enumerate(dists_trace):
            ells = []
            for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                             (real_emissions, 'R', 'B'),
                                             (real_emissions, 'G', 'B'),
                                             (noisy_emissions, 'R', 'G'),
                                             (noisy_emissions, 'R', 'B'),
                                             (noisy_emissions, 'G', 'B')]):
                for dist in dists:
                    m, c = dist.mean(), dist.cov()
                    cm = (c[[col[c1], col[c2]]])[:, [col[c1], col[c2]]]
                    e, v = la.eigh(cm)
                    ell = Ellipse(xy=[m[col[c1]], m[col[c2]]],
                                  width=np.sqrt(e[0]),
                                  height=np.sqrt(e[1]),
                                  angle=(180.0 / np.pi) * np.arccos(v[0, 0]))
                    ells.append(ell)
                    ax = fig.add_subplot(2, 3, i + 1)
                    ax.add_artist(ell)
                    ell.set_clip_box(ax.bbox)
                    ell.set_alpha(0.9)
                    ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0))
            file_name = 'tmp_%03d.png' % idx
            temp_files.append(file_name)
            plt.savefig(file_name, dpi=100)
            for ell in ells:
                ell.remove()
        command = ('mencoder', 'mf://tmp_*.png', '-mf',
                   'type=png:w=800:h=600:fps=5', '-ovc', 'lavc', '-lavcopts',
                   'vcodec=mpeg4', '-oac', 'copy', '-o',
                   'image_test_color_components.avi')
        os.spawnvp(os.P_WAIT, 'mencoder', command)
        for temp_file in temp_files:
            os.unlink(temp_file)

    # Find common variance components
    print 'True noise:'
    print cov
    chols = [la.cholesky(c) for c in covs]
    chol_recon = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            if j > i: continue
            chol_recon[i, j] = np.Inf
            for chol in chols:
                if abs(chol[i, j]) < abs(chol_recon[i, j]):
                    chol_recon[i, j] = chol[i, j]
    cov_recon = np.dot(chol_recon, np.transpose(chol_recon))
    print 'Reconstructed noise:'
    print cov_recon
예제 #7
0
print "ACCURACIES: "
print "Histogram: ", mean(histogram_accuracies)
print "Histogram Sensitivity", mean(histogram_sensitivities)
print "Histogram Specificity", mean(histogram_specificities)
print "Logreg: ", mean(logreg_accuracies)
print "Logreg Sensitivity", mean(logreg_sensitivities)
print "Logreg Specificity", mean(logreg_specificities)
print "Gaussian Means: ", mean(gaussian_mean_accuracies)
print "Gaussian Distinct: ", mean(gaussian_distinct_accuracies)
print "Random: ", len(samples_healthy) / float(len(samples_healthy) + len(samples_unhealthy))

samples_unhealthy = list(samples_unhealthy)
samples_unhealthy.extend(samples_s)
samples_unhealthy = array(samples_unhealthy)

sigma, mu, pi = em(samples_unhealthy, 2, 1000)
results = test_em_for_2(sigma, mu, pi, samples_murmur, samples_s)
print results 

"""
sigma, mu, pi = em(samples_unhealthy, 13, 1000)
labeled_diseases = []
results = test_em_for_13(sigma, mu, pi, labeled_diseases)

print results
"""




예제 #8
0
# Initialize summary image
summary = Image.new('L', (28 * num_components + 65, 28 * len(num_blocks)), 255)

# Do inference for varying numbers of blocks
idxs = np.argsort(map(np.sum, emissions))
reps = []
for block_i, num_block in enumerate(num_blocks):
    # Block data
    blocks = np.array_split(idxs, num_block)

    # Run EM
    results = em(emissions, [
        Product([Bernoulli() for i in range(28 * 28)])
        for n in range(num_components)
    ],
                 count_restart=3.0,
                 blocks=blocks,
                 gamma_seed=137,
                 init_gamma=(init_to_labels and labels or None))
    dists = results['dists']
    print 'Reps: %d' % results['reps']
    reps.append(results['reps'])

    # Produce summary image
    offset = 0
    im = Image.new('L', (28 * len(dists), 28))
    for d in results['dists']:
        digit = Image.new('L', (28, 28))
        digit.putdata(np.array(d.mean()) * 255)
        im.paste(digit, (offset, 0))
        offset += 28
예제 #9
0
    for wb in width_blocks:
        block = [idx[h, w] for h in hb for w in wb]
        blocks.append(np.array(block))

# Generate noise
noise = np.random.normal(0, noise_sd, width * height)
noisy_emissions = real_emissions + noise

# Generate noisy image
noisy = Image.new('L', (width, height))
noisy.putdata(noisy_emissions)
summary.paste(noisy, (30 + width, 10))

# Do EM
results = em(noisy_emissions,
             [NormalFixedMean(m, max_sigma=max_sigma) for m in range(256)],
             count_restart=count_restart,
             blocks=blocks)
dists = results['dists']
pi = results['pi']
print 'Iterations: %(reps)d' % results

gamma = np.transpose(results['gamma'])
means = np.array([d.mean() for d in dists])
sds = np.array([d.sd() for d in dists])

# Display summary figures
display_densities(real_emissions, dists)

# Reconstruct with argmax
im_argmax = Image.new('L', (width, height))
reconstruct_argmax = means[np.argmax(gamma, axis=1)]
예제 #10
0
    Tracer()()
    return seq, A, obs_distr, ll_test, monitor_vals


if __name__ == '__main__':
    X = np.loadtxt('EMGaussian.data')
    Xtest = np.loadtxt('EMGaussian.test')
    K = 4

    # Run simple EM (no HMM)
    iterations = 40
    assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5)
    new_centers = [distributions.Gaussian(c.mean, np.eye(2)) \
                for c in centers]
    tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = \
            em.em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest)

    # example with fixed parameters
    A = 1. / 6 * np.ones((K, K))
    A[np.diag(np.ones(K)) == 1] = 0.5

    lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr)
    log_p = smoothing(lalpha, lbeta)
    p = np.exp(log_p)

    def plot_traj(p):
        plt.figure()
        ind = np.arange(100)
        for k in range(K):
            plt.subplot(K, 1, k + 1)
            plt.bar(ind, p[:100, k])
예제 #11
0
            plt.imshow(data)
            plt.show()

        # Initialize with K-means
        if init == 'kmeans':
            init_gamma = kmeans(data.reshape((dim*dim,1)), 2)['best']

        # Do (potentially adaptive) blocked EM, depending on strategy
        for block_strategy in block_strategies:
            # Only 'perfect' strategy uses the true states
            blocks = block(data, block_strategy, true = x)
        
            # Do EM
            results = em(data.reshape((dim*dim,)),
                         model,
                         count_restart = count_restart,
                         blocks = blocks,
                         init_gamma = init_gamma,
                         pi_max = pi_max)
            print 'Iterations: %d (%s)' % (results['reps'], block_strategy)
            dists = results['dists']
            pi = results['pi']

            # Display results
            if show_each:
                for p, d in zip(np.transpose(pi), dists):
                    print '%s: %s' % (p, d.display())
                print
            if graphics:
                display_densities(data.reshape((dim*dim,)), dists)
                display_hist(data.reshape((dim*dim,)), dists)
예제 #12
0
        t = time.time()
        assignments, centroids, dists = \
                kmeans.kmeans_best_of_n(X, K, n_trials=4, dist_cls=distributions.KL)
        print 'K-means: {}s'.format(time.time() - t)
        results[algos.kmeans] = {
            'seq': assignments,
            'centroids': centroids,
        }
        seqs[algos.kmeans] = assignments

    # EM
    if options.init == 'em' or algos.em in algs:
        iterations = 10
        t = time.time()
        tau_em, obs_distr, pi, em_ll_train, _ = em.em(X,
                                                      centroids,
                                                      n_iter=options.n_iter)
        print 'EM: {}s'.format(time.time() - t)
        results[algos.em] = {
            'seq': np.argmax(tau_em, axis=1),
            'obs_distr': obs_distr,
            'll_train': em_ll_train,
            'tau': tau_em,
        }
        seqs[algos.em] = np.argmax(tau_em, axis=1)

    # initialization
    if options.init == 'em':
        # initialize with EM
        init_pi = pi
        init_obs_distr = obs_distr
예제 #13
0
    if init == 'true':
        init_gamma = data_comp
    data_mu = mu[data_comp]
    data = np.random.normal(data_mu, 1)
    blocks = np.array_split(np.arange(n), num_blocks)

    # Initialize with K-means
    if init == 'kmeans':
        init_gamma = kmeans(data.reshape((n,1)), 2)['best']

    # Do EM
    results = em(data,
                 model,
                 count_restart = count_restart,
                 blocks = blocks,
                 init_gamma = init_gamma,
                 init_reps = em_steps,
                 max_reps = em_steps,
                 pi_max = pi_max,
                 trace = True)
    if show_each:
        print 'Iterations: %(reps)d' % results
    dists, dists_trace = results['dists'], results['dists_trace']
    pi, pi_trace = results['pi'], results['pi_trace']

    # Display results
    if show_each:
        for p, d in zip(np.transpose(pi), dists):
            print '%s: %s' % (p, d.display())
        print
    if graphics:
def main():
    # Load image
    im = Image.open(image_file).convert('RGB')
    width, height = im.size

    # Convenience function to build image band-by-band from array data
    def image_from_array(dat):
        bands = [Image.new('L', (width, height)) for n in range(3)]
        for i in range(3):
            bands[i].putdata(dat[:,i])
        return Image.merge('RGB', bands)

    # Resize image
    width, height = int(width / image_rescale), int(height / image_rescale)
    im = im.resize((width, height))

    # Summary image
    summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60),
                        (255, 255, 255))
    draw = ImageDraw.Draw(summary)
    draw.text((5, height + 10), 'Original', fill = (0, 0, 0))
    draw.text((width + 25, height + 10),
              'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov),
              fill = (0, 0, 0))
    draw.text((5, 2 * height + 40), 'Blocked Gamma', fill = (0, 0, 0))
    draw.text((width + 25, 2 * height + 40), 'Dists', fill = (0, 0, 0))
    del draw
    summary.paste(im, (10, 10))

    # Flatten to emissions
    real_emissions = list(im.getdata())
    num_data = len(real_emissions)
    real_emissions = np.array(real_emissions)

    # Block emissions
    width_blocks = np.array_split(np.arange(width), block_splits)
    height_blocks = np.array_split(np.arange(height), block_splits)
    idx = np.arange(num_data)
    idx.resize((height, width))
    blocks = []
    for hb in height_blocks:
        for wb in width_blocks:
            block = [idx[h, w] for h in hb for w in wb]
            blocks.append(np.array(block))

    # Generate noise
    v, c = noise_var, noise_cov
    cov = [[v, c, c], [c, v, c], [c, c, v]]
    noise = np.random.multivariate_normal([0, 0, 0], cov, width * height)
    noisy_emissions = real_emissions + noise

    # Generate noisy image
    noisy = image_from_array(noisy_emissions)
    summary.paste(noisy, (30 + width, 10))

    # Use K-means to initialize components
    results = kmeans(noisy_emissions, num_comps)
    init_gamma = results['best']
    means = results['means']

    # Analyze color space
    if do_colormap:
        col = { 'R': 0, 'G': 1, 'B': 2 }
        plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            plt.subplot(2, 3, i+1)
            plt.hexbin(d[:,col[c1]], d[:,col[c2]], gridsize=30,
                       extent = (0, 255, 0, 255))
            plt.plot(means[:,col[c1]], means[:,col[c2]], '.k')
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        plt.savefig('image_test_color_colormap.png')
        plt.show()

    # Do EM
    results = em(noisy_emissions,
                 [MultivariateNormal() for n in range(num_comps)],
                 count_restart = count_restart,
                 blocks = blocks,
                 max_reps = 100,
                 init_gamma = init_gamma,
                 trace = True,
                 pi_max = pi_max)
    dists = results['dists']
    dists_trace = results['dists_trace']
    pi = results['pi']
    print 'Iterations: %(reps)d' % results

    gamma = np.transpose(results['gamma'])
    means = np.array([d.mean() for d in dists])
    covs = np.array([d.cov() for d in dists])

    # Reconstruct with blocked gamma
    rec_blocked_gamma = np.array([np.average(means, weights=g, axis=0)
                                  for g in gamma])
    im_blocked_gamma = image_from_array(rec_blocked_gamma)
    summary.paste(im_blocked_gamma, (10, 40 + height))

    # Reconstruct from distributions alone
    pi_opt = pi_maximize(noisy_emissions, dists)
    phi = np.empty((num_data, num_comps))
    for c in range(num_comps):
        phi[:,c] = dists[c].density(noisy_emissions)
    phi = np.matrix(phi)
    for i, pi in enumerate(pi_opt):
        phi[:,i] *= pi
    gamma_dists = phi / np.sum(phi, axis = 1)
    rec_dists = np.array(np.dot(gamma_dists, means))
    im_dists = image_from_array(rec_dists)
    summary.paste(im_dists, (30 + width, 40 + height))

    # Show summary image
    if show_summary:
        summary.show()
    summary.save('image_test_color_reconstruction.png')

    # Compare RMSE between reconstructions
    def rmse(x):
        return np.sqrt(np.mean((x - real_emissions) ** 2))
    print 'Raw MSE: %.1f' % rmse(noisy_emissions)
    print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma)
    print 'Dists MSE: %.1f' % rmse(rec_dists)

    # Visualize variance components
    if do_variance_viz:
        temp_files = []
        col = { 'R': 0, 'G': 1, 'B': 2 }
        fig = plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            ax = fig.add_subplot(2, 3, i+1)
            plt.hexbin(d[:,col[c1]], d[:,col[c2]], gridsize=30,
                       extent = (0, 255, 0, 255))
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        for idx, dists in enumerate(dists_trace):
            ells = []
            for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                             (real_emissions, 'R', 'B'),
                                             (real_emissions, 'G', 'B'),
                                             (noisy_emissions, 'R', 'G'),
                                             (noisy_emissions, 'R', 'B'),
                                             (noisy_emissions, 'G', 'B')]):
                for dist in dists:
                    m, c = dist.mean(), dist.cov()
                    cm = (c[[col[c1], col[c2]]])[:,[col[c1], col[c2]]]
                    e, v = la.eigh(cm)
                    ell = Ellipse(xy = [m[col[c1]], m[col[c2]]],
                                  width = np.sqrt(e[0]),
                                  height = np.sqrt(e[1]),
                                  angle = (180.0 / np.pi) * np.arccos(v[0,0]))
                    ells.append(ell)
                    ax = fig.add_subplot(2, 3, i+1)
                    ax.add_artist(ell)
                    ell.set_clip_box(ax.bbox)
                    ell.set_alpha(0.9)
                    ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0))
            file_name = 'tmp_%03d.png' % idx
            temp_files.append(file_name)
            plt.savefig(file_name, dpi = 100)
            for ell in ells:
                ell.remove()
        command = ('mencoder',
                   'mf://tmp_*.png',
                   '-mf',
                   'type=png:w=800:h=600:fps=5',
                   '-ovc',
                   'lavc',
                   '-lavcopts',
                   'vcodec=mpeg4',
                   '-oac',
                   'copy',
                   '-o',
                   'image_test_color_components.avi')
        os.spawnvp(os.P_WAIT, 'mencoder', command)
        for temp_file in temp_files:
            os.unlink(temp_file)

    # Find common variance components
    print 'True noise:'
    print cov
    chols = [la.cholesky(c) for c in covs]
    chol_recon = np.zeros((3,3))
    for i in range(3):
        for j in range(3):
            if j > i: continue
            chol_recon[i,j] = np.Inf
            for chol in chols:
                if abs(chol[i,j]) < abs(chol_recon[i,j]):
                    chol_recon[i,j] = chol[i,j]
    cov_recon = np.dot(chol_recon, np.transpose(chol_recon))
    print 'Reconstructed noise:'
    print cov_recon
예제 #15
0
import numpy as np
import em

observations = np.array([[1, 0, 0, 0, 1, 1, 0, 1, 0, 1],
                         [1, 1, 1, 1, 0, 1, 1, 1, 1, 1],
                         [1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
                         [1, 0, 1, 0, 0, 0, 1, 1, 0, 0],
                         [0, 1, 1, 1, 0, 1, 1, 1, 0, 1]])

print em.em(observations, [0.6, 0.5])
print em.em(observations, [0.5, 0.6])
print em.em(observations, [0.3, 0.3])
print em.em(observations, [0.9999, 0.00000001])
예제 #16
0
def main():
    tol = 0.00001
    max_iter = 100
    alpha = 2
    beta = 1
    """SYN dataset"""
    tag_syn = os.path.join("data", "SYN_data", "SYN_r")
    num_trials = 10

    plot_mv = []
    plot_bp = []
    plot_em = []

    for value in range(
            1, 20, 2
    ):  # r=1,3,5,6,7,9,11,13,15,17,19, means number of tasks assigned to each worker
        avg_error_mv = 0
        avg_error_bp = 0
        avg_error_em = 0

        time_mv = 0
        time_bp = 0
        time_em = 0
        for i in range(num_trials):  # trial=1~10
            data = io.loadmat(tag_syn +
                              "{}_{}.mat".format(value, i + 1))["data"][0][0]
            true_labels = io.loadmat(
                tag_syn +
                "{}_{}_true.mat".format(value, i + 1))["true_labels"][0]
            """MV"""
            start = time.time()
            est_mv = mv(data)
            error_mv = np.average(est_mv != true_labels)
            avg_error_mv += error_mv
            end = time.time()
            time_mv += end - start
            """"""
            """BP"""
            start = time.time()
            est_bp = bp(data, max_iter, tol, alpha, beta)
            error_bp = np.average(est_bp != true_labels)
            avg_error_bp += error_bp
            end = time.time()
            time_bp += end - start
            """"""
            """EM"""
            start = time.time()
            est_em = em(data, max_iter, tol, alpha, beta)
            error_em = np.average(est_em != true_labels)
            avg_error_em += error_em
            end = time.time()
            time_em += end - start
            """"""

        avg_error_mv = avg_error_mv / num_trials
        print(
            "In SYN dataset using MV, when r={}, error rate is {}, time is {} secs"
            .format(value, avg_error_mv, time_mv))
        avg_error_bp = avg_error_bp / num_trials
        print(
            "In SYN dataset using BP, when r={}, error rate is {}, time is {} secs"
            .format(value, avg_error_bp, time_bp))
        avg_error_em = avg_error_em / num_trials
        print(
            "In SYN dataset using EM, when r={}, error rate is {}, time is {} secs"
            .format(value, avg_error_em, time_em))

        plot_mv.append(avg_error_mv)
        plot_bp.append(avg_error_bp)
        plot_em.append(avg_error_em)

    plt.figure(figsize=(10, 8))
    plt.title("SYN dataset")
    plt.xlabel("r", )
    plt.ylabel("error")
    plt.xlim(0, 20)
    plt.ylim(0, 1)
    plt.plot([i for i in range(1, 20, 2)], plot_mv, label="MV")
    plt.plot([i for i in range(1, 20, 2)], plot_bp, label="BP")
    plt.plot([i for i in range(1, 20, 2)], plot_em, label="EM")
    plt.legend(loc="upper left")
    plt.show()
    """"""
    """SIM dataset"""
    tag_sim = os.path.join("data", "SIM_data", "SIM_l")
    num_trials = 10

    plot_mv = []
    plot_bp = []
    plot_em = []

    for value in [
            1, 5, 10, 15, 20, 25
    ]:  # l=1,5,10,15,20,25, means number of workers assigned to each task
        avg_error_mv = 0
        avg_error_bp = 0
        avg_error_em = 0

        time_mv = 0
        time_bp = 0
        time_em = 0
        for i in range(num_trials):  # trial=1~10
            data = io.loadmat(tag_sim +
                              "{}_{}.mat".format(value, i + 1))["data"][0][0]
            true_labels = io.loadmat(
                tag_sim +
                "{}_{}_true.mat".format(value, i + 1))["true_labels"][0]
            """MV"""
            start = time.time()
            est_mv = mv(data)
            error_mv = np.average(est_mv != true_labels)
            avg_error_mv += error_mv
            end = time.time()
            time_mv += end - start
            """"""
            """BP"""
            start = time.time()
            est_bp = bp(data, max_iter, tol, alpha, beta)
            error_bp = np.average(est_bp != true_labels)
            avg_error_bp += error_bp
            end = time.time()
            time_bp += end - start
            """"""
            """EM"""
            start = time.time()
            est_em = em(data, max_iter, tol, alpha, beta)
            error_em = np.average(est_em != true_labels)
            avg_error_em += error_em
            end = time.time()
            time_em += end - start
            """"""

        avg_error_mv = avg_error_mv / num_trials
        print(
            "In SIM dataset using MV, when l={}, error rate is {}, time is {} secs"
            .format(value, avg_error_mv, time_mv))
        avg_error_bp = avg_error_bp / num_trials
        print(
            "In SIM dataset using BP, when l={}, error rate is {}, time is {} secs"
            .format(value, avg_error_bp, time_bp))
        avg_error_em = avg_error_em / num_trials
        print(
            "In SIM dataset using EM, when l={}, error rate is {}, time is {} secs"
            .format(value, avg_error_em, time_em))

        plot_mv.append(avg_error_mv)
        plot_bp.append(avg_error_bp)
        plot_em.append(avg_error_em)

    plt.figure(figsize=(10, 8))
    plt.title("SIM dataset")
    plt.xlabel("l")
    plt.ylabel("error")
    plt.xlim(0, 30)
    plt.ylim(0, 1)
    plt.plot([1, 5, 10, 15, 20, 25], plot_mv, label="MV")
    plt.plot([1, 5, 10, 15, 20, 25], plot_bp, label="BP")
    plt.plot([1, 5, 10, 15, 20, 25], plot_em, label="EM")
    plt.legend(loc="upper left")
    plt.show()
    """"""
    """TEMP dataset"""
    data = io.loadmat("data/other_data/TEMP_data.mat")["data"][0][0]
    true_labels = io.loadmat(
        "data/other_data/TEMP_data_true.mat")["true_labels"][0]
    """MV"""
    start = time.time()
    est_mv = mv(data)
    error_mv = np.average(est_mv != true_labels)
    end = time.time()
    time_mv = end - start
    """"""
    """BP"""
    start = time.time()
    est_bp = bp(data, max_iter, tol, alpha, beta)
    error_bp = np.average(est_bp != true_labels)
    end = time.time()
    time_bp = end - start
    """"""
    """EM"""
    start = time.time()
    est_em = em(data, max_iter, tol, alpha, beta)
    error_em = np.average(est_em != true_labels)
    end = time.time()
    time_em = end - start
    """"""

    print("In TEMP dataset using MV, error rate is {}, time is {} secs".format(
        error_mv, time_mv))
    print("In TEMP dataset using BP, error rate is {}, time is {} secs".format(
        error_bp, time_bp))
    print("In TEMP dataset using EM, error rate is {}, time is {} secs".format(
        error_em, time_em))
    """"""
예제 #17
0
from utils import ExpenseManagerUtils
import hashlib
import connection
import os
from nocache import nocache


import em

app = Flask(__name__)
app.secret_key = "highlysecret"
app.config['TEMPLATES_AUTO_RELOAD'] = True

emutils = ExpenseManagerUtils.UtilsLib()
conn = connection.connection()
emapi = em.em()

@app.route('/')
def reroute():
    return redirect('/login', code=302)

@app.route('/login')
def login():
    email = request.cookies.get('loggedin')
    if email and request.cookies.get('security_verify:'+email) == emutils.hash_of_hashpass(email):
        return redirect('/home', code=302)
    else:
        form = LoginForm.LoginForm()
        return render_template('login.html', form=form)

@app.route('/register', methods = ['POST'])
예제 #18
0
def em_experiment(ax, clf, X_tr, y_tr, X_te, y_te, y_min=0, y_max=1.0):
    mlb = MultiLabelBinarizer()
    mlb.fit(np.expand_dims(np.hstack((y_tr, y_te)), 1))
    y_tr_bin = mlb.transform(np.expand_dims(y_tr, 1))
    y_te_bin = mlb.transform(np.expand_dims(y_te, 1))
    train_priors = np.mean(y_tr_bin, 0)
    test_priors = np.mean(y_te_bin, 0)

    print("Fitting", clf)

    clf.fit(X_tr, y_tr)
    test_posteriors = clf.predict_proba(X_te)
    posteriors_test_priors = np.mean(test_posteriors, axis=0)

    print('train priors', train_priors, sep='\n')
    print('test priors', test_priors, sep='\n')
    print('posteriors mean', posteriors_test_priors, sep='\n')
    print()

    em_test_posteriors, em_test_priors, history = em(y_te, test_posteriors,
                                                     train_priors)

    em_prior = [p[1] for _, p, _, _, _, _ in history]
    accuracy = [a for _, _, _, a, _, _ in history]
    f1 = [
        2 * p * r / (p + r) if p + r > 0 else 0 for _, _, _, _, p, r in history
    ]
    ax.set_ylim([y_min, y_max])
    ax.plot(range(len(accuracy)),
            accuracy,
            linestyle='-.',
            color='m',
            label='accuracy')
    ax.plot(range(len(f1)), f1, linestyle='--', color='#dd9f00', label='f1')
    ax.plot(range(len(em_prior)), em_prior, color='b', label='em pr')
    ax.hlines([train_priors[1]],
              0,
              len(em_prior) - 1,
              colors=['r'],
              linestyles=[':'],
              label='train pr')
    ax.hlines([posteriors_test_priors[1]],
              0,
              len(em_prior) - 1,
              colors=['#b5651d'],
              linestyles=['-.'],
              label='clf pr')
    ax.hlines([test_priors[1]],
              0,
              len(em_prior) - 1,
              colors=['g'],
              linestyles=['--'],
              label='test pr')

    ax.set()
    ax.grid()

    print('Results')
    print('prior from:   train test  post  em')
    for i, (a, b, c, d) in enumerate(
            zip(train_priors, test_priors, posteriors_test_priors,
                em_test_priors)):
        print(f'{i:11d} - {a:3.3f} {b:3.3f} {c:3.3f} {d:3.3f}')

    return posteriors_test_priors[1], em_test_priors[1], accuracy[0], accuracy[
        -1], f1[0], f1[-1]
예제 #19
0
            plt.imshow(data)
            plt.show()

        # Initialize with K-means
        if init == 'kmeans':
            init_gamma = kmeans(data.reshape((dim * dim, 1)), 2)['best']

        # Do (potentially adaptive) blocked EM, depending on strategy
        for block_strategy in block_strategies:
            # Only 'perfect' strategy uses the true states
            blocks = block(data, block_strategy, true=x)

            # Do EM
            results = em(data.reshape((dim * dim, )),
                         model,
                         count_restart=count_restart,
                         blocks=blocks,
                         init_gamma=init_gamma,
                         pi_max=pi_max)
            print 'Iterations: %d (%s)' % (results['reps'], block_strategy)
            dists = results['dists']
            pi = results['pi']

            # Display results
            if show_each:
                for p, d in zip(np.transpose(pi), dists):
                    print '%s: %s' % (p, d.display())
                print
            if graphics:
                display_densities(data.reshape((dim * dim, )), dists)
                display_hist(data.reshape((dim * dim, )), dists)
예제 #20
0
    if init == 'true':
        init_gamma = data_comp
    data_mu = mu[data_comp]
    data = np.random.normal(data_mu, 1)
    blocks = np.array_split(np.arange(n), num_blocks)

    # Initialize with K-means
    if init == 'kmeans':
        init_gamma = kmeans(data.reshape((n, 1)), 2)['best']

    # Do EM
    results = em(data,
                 model,
                 count_restart=count_restart,
                 blocks=blocks,
                 init_gamma=init_gamma,
                 init_reps=em_steps,
                 max_reps=em_steps,
                 pi_max=pi_max,
                 trace=True)
    if show_each:
        print 'Iterations: %(reps)d' % results
    dists, dists_trace = results['dists'], results['dists_trace']
    pi, pi_trace = results['pi'], results['pi_trace']

    # Display results
    if show_each:
        for p, d in zip(np.transpose(pi), dists):
            print '%s: %s' % (p, d.display())
        print
    if graphics:
예제 #21
0
파일: hmm.py 프로젝트: vmpet3/Semi-NMF
    mat = spio.loadmat("HMMdata.mat")
    X = mat['X']
    print(X.shape)

    # X = np.loadtxt('EMGaussian.data')
    # Xtest = np.loadtxt('EMGaussian.test')
    Xtest = X
    K = 3

    # Run simple EM (no HMM)
    iterations = 40
    assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5)
    new_centers = [distributions.Gaussian(c.mean, np.eye(2)) for c in centers]
    tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = em.em(X,
                                                          new_centers,
                                                          assignments,
                                                          n_iter=iterations,
                                                          Xtest=Xtest)

    # example with fixed parameters
    A = 1. / 6 * np.ones((K, K))
    A[np.diag(np.ones(K)) == 1] = 0.5

    lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr)
    log_p = smoothing(lalpha, lbeta)
    p = np.exp(log_p)

    # def plot_traj(p):
    #     plt.figure()
    #     ind = np.arange(100)
    #     for k in range(K):
예제 #22
0
import numpy as np
from plot_hist import plot_hist
from em import em

# Valeurs initiales pour 2 populations
pi0_2 = np.array([1. / 4, 3. / 4])
mu0_2 = np.array([.57, .67])
s20_2 = np.array([1. / 10000, 1. / 10000])

# Valeurs initiales pour 3 populations
pi0_3 = np.array([1. / 3, 1 / 3, 1. / 3])
mu0_3 = np.array([.57, .6, .67])
s20_3 = np.array([1. / 10000, 1. / 10000, 1. / 10000])

# Loi empirique
plot_hist('crabe.txt')

# 2 populations
pi, mu, s2 = em(pi0_2, mu0_2, s20_2)
plot_hist('crabe.txt', mu, s2, pi)

# 3 populations
pi, mu, s2 = em(pi0_3, mu0_3, s20_3)
plot_hist('crabe.txt', mu, s2, pi)
예제 #23
0
파일: hw2.py 프로젝트: hakuliu/inf552
#programmed entirely by Paul An

import clusterparser
import kmeans
import em

#for testing and visualization
def writeKAssignments(assignments):
    f = open('test.csv', 'w')
    for assignment in assignments:
        f.write(str(assignment) + '\n')
    f.close()

def writeEMAssignments(assignments):
    f = open('test.csv', 'w')
    for assignment in assignments:
        f.write(str(assignment.getAssignment()) + '\n')
    f.close()

points = clusterparser.parsedatafile("clusters.txt")
centroids, kmassignments = kmeans.kmeans(points, 3)
print(centroids)
gausses, emassignments = em.em(points, 3)
for gauss in gausses:
    print(gauss.mean)

print('done')
예제 #24
0
# Initialize summary image
summary = Image.new('L', (28 * num_components + 65, 28 * len(num_blocks)), 255)

# Do inference for varying numbers of blocks
idxs = np.argsort(map(np.sum, emissions))
reps = []
for block_i, num_block in enumerate(num_blocks):
    # Block data
    blocks = np.array_split(idxs, num_block)

    # Run EM
    results = em(emissions,
                 [Product([Bernoulli() for i in range(28 * 28)])
                  for n in range(num_components)],
                 count_restart = 3.0,
                 blocks = blocks,
                 gamma_seed = 137,
                 init_gamma = (init_to_labels and labels or None))
    dists = results['dists']
    print 'Reps: %d' % results['reps']
    reps.append(results['reps'])

    # Produce summary image
    offset = 0
    im = Image.new('L', (28 * len(dists), 28))
    for d in results['dists']:
        digit = Image.new('L', (28, 28))
        digit.putdata(np.array(d.mean()) * 255)
        im.paste(digit, (offset, 0))
        offset += 28
예제 #25
0
    iforest.fit(X_train)
    s_X_iforest = iforest.decision_function(X_test)
    print('LocalOutlierFactor processing...')
    lof = LocalOutlierFactor(n_neighbors=20)
    lof.fit(X_train)
    s_X_lof = lof.decision_function(X_test)
    print('OneClassSVM processing...')
    ocsvm = OneClassSVM()
    ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)])
    s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0]
    s_unif_iforest = iforest.decision_function(unif)
    s_unif_lof = lof.decision_function(unif)
    s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0]
    plt.subplot(121)
    auc_iforest, em_iforest, amax_iforest = em(t, t_max,
                                               volume_support,
                                               s_unif_iforest,
                                               s_X_iforest, n_generated)

    auc_lof, em_lof, amax_lof = em(t, t_max, volume_support,
                                   s_unif_lof, s_X_lof, n_generated)

    auc_ocsvm, em_ocsvm, amax_ocsvm = em(t, t_max, volume_support,
                                         s_unif_ocsvm, s_X_ocsvm,
                                         n_generated)
    if amax_iforest == -1 or amax_lof == -1 or amax_ocsvm == -1:
        amax = -1
    else:
        amax = max(amax_iforest, amax_lof, amax_ocsvm)
    plt.subplot(121)
    plt.plot(t[:amax], em_iforest[:amax], lw=1,
             label='%s (em_score = %0.3e)'
예제 #26
0
    for wb in width_blocks:
        block = [idx[h, w] for h in hb for w in wb]
        blocks.append(np.array(block))

# Generate noise
noise = np.random.normal(0, noise_sd, width * height)
noisy_emissions = real_emissions + noise

# Generate noisy image
noisy = Image.new('L', (width, height))
noisy.putdata(noisy_emissions)
summary.paste(noisy, (30 + width, 10))

# Do EM
results = em(noisy_emissions,
             [NormalFixedMean(m, max_sigma = max_sigma) for m in range(256)],
             count_restart = count_restart,
             blocks = blocks)
dists = results['dists']
pi = results['pi']
print 'Iterations: %(reps)d' % results

gamma = np.transpose(results['gamma'])
means = np.array([d.mean() for d in dists])
sds = np.array([d.sd() for d in dists])

# Display summary figures
display_densities(real_emissions, dists)

# Reconstruct with argmax
im_argmax = Image.new('L', (width, height))
reconstruct_argmax = means[np.argmax(gamma, axis=1)]
예제 #27
0
def main():
    run_data = {}
    run_id = 0

    scale = 0.5
    emissions_normal = { 1: Normal(0, 2.0 * scale),
                         2: Normal(3.5, 3.0 * scale),
                         3: Normal(6.5, 1.0 * scale) }
    emissions_laplace = { 1: Laplace(0, 2.0 * scale),
                          2: Laplace(3.5, 3.0 * scale),
                          3: Laplace(6.5, 1.0 * scale) }
    emission_spec = emissions_normal
    dists = [Normal(max_sigma = 6.0) for n in range(3)]
    num_state_reps = 50
    num_emission_reps = 4
    num_gamma_init_reps = 4
    num_blocks = [1, 2, 5, 10, 20, 50]
    verbose = False
    graphics_on = False

    total_work = (num_state_reps * num_emission_reps *
                  2 * num_gamma_init_reps * len(num_blocks))

    work = 0
    for state_rep in range(num_state_reps):
        print 'State repetition %d' % state_rep

        # Generate HMM states
        while True:
            model = HMM([('Start', (1,),          (1.0,)),
                         (1,       (1,2,3),       (0.98, 0.02, 0.0)),
                         (2,       (1,2,3),       (0.02, 0.95,  0.03)),
                         (3,       (1,2,3,'End'), (0.03,  0.03,  0.93, 0.01))],
                    emission_spec)
            model.simulate()
            num_data = len(model.state_vec)
            if num_data < 5000 and num_data > 100: break

        counts = {}
        for state in model.state_vec:
            if not state in counts:
                counts[state] = 0
            counts[state] += 1
        if verbose: print 'Counts: %s' % str(counts)

        # Generate shuffled indices for repeatable shuffling
        shuffling = np.arange(num_data)
        np.random.shuffle(shuffling)
        
        for emission_rep in range(num_emission_reps):
            if verbose: print 'Emission repetition %d' % emission_rep
            model.emit()

            for shuffled in [False, True]:
                if verbose: print 'Shuffling HMM run: %s' % str(shuffled)
                states = np.array(model.state_vec)
                emissions = np.array(model.emission_vec)
                if shuffled:
                    states = states[shuffling]
                    emissions = emissions[shuffling]
                
                for num_block in num_blocks:
                    if verbose: print 'Blocks: %d' % num_block

                    blocks = np.array_split(np.arange(num_data), num_block)
                    
                    for gamma_rep in range(num_gamma_init_reps):
                        if verbose: print 'Initial gamma seed: %d' % gamma_rep

                        init_gamma = np.array(states) - 1

                        run_id += 1
                        this_run = {}

                        this_run['num data'] = num_data
                        this_run['state rep'] = state_rep
                        this_run['emission rep'] = emission_rep
                        this_run['shuffled'] = shuffled
                        this_run['blocks'] = num_block
                        this_run['gamma init rep'] = gamma_rep

                        start_time = time.clock()
                        results = em(emissions,
                                     dists,
                                     blocks = blocks,
                                     gamma_seed = gamma_rep,
                                     init_gamma = init_gamma,
                                     count_restart = 0.0)
                        pi = results['pi']
                        dists = results['dists']
                        reps = results['reps']
                        conv = results['converged']
                        run_time = time.clock() - start_time
                        this_run['run time'] = run_time
                        this_run['reps'] = reps

                        conv_status = conv and 'converged' or 'not converged'
                        this_run['convergence'] = conv_status

                        print 'Reps: %d (%s)' % (reps, conv_status)
                        print 'Time elapsed: %.2f' % run_time
                        if verbose: print_mixture(pi, dists)

                        if graphics_on:
                            display_densities(emissions, dists)
                            display_hist(emissions, dists)

                        act = emission_spec.values()
                        this_run['err mean max'] = max_error_mean(dists, act)
                        this_run['err mean mean'] = mean_error_mean(dists, act)

                        like = np.zeros(num_data)
                        pi_overall = np.mean(pi, 0)
                        for p, dist in zip(pi_overall, dists):
                            like += p * dist.density(states)
                        this_run['log likelihood'] = np.sum(np.log(like))

                        like = np.zeros(num_data)
                        for i, block in enumerate(blocks):
                            for p, dist in zip(pi[i], dists):
                                comp = p * dist.density(states[block])
                                like[block] += comp
                        this_run['log likelihood local'] = np.sum(np.log(like))

                        run_data[run_id] = this_run

                        work += 1
                        print 'Finished run %d/%d' % (work, total_work)

    # Output data to CSV
    cols = set()
    for id in run_data:
        for k in run_data[id]:
            cols.add(k)
    with open('outfile.csv', 'wb') as f:
        writer = csv.writer(f)
        writer.writerow(list(cols))
        writer.writerows([[run_data[id][c] for c in cols] for id in run_data])
예제 #28
0
    iforest.fit(X_train)
    s_X_iforest = iforest.decision_function(X_test)
    print('LocalOutlierFactor processing...')
    lof = LocalOutlierFactor(n_neighbors=20)
    lof.fit(X_train)
    s_X_lof = lof.decision_function(X_test)
    print('OneClassSVM processing...')
    ocsvm = OneClassSVM()
    ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)])
    s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0]
    s_unif_iforest = iforest.decision_function(unif)
    s_unif_lof = lof.decision_function(unif)
    s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0]
    plt.subplot(121)
    auc_iforest, em_iforest, amax_iforest = em(t, t_max, volume_support,
                                               s_unif_iforest, s_X_iforest,
                                               n_generated)

    auc_lof, em_lof, amax_lof = em(t, t_max, volume_support, s_unif_lof,
                                   s_X_lof, n_generated)

    auc_ocsvm, em_ocsvm, amax_ocsvm = em(t, t_max, volume_support,
                                         s_unif_ocsvm, s_X_ocsvm, n_generated)
    if amax_iforest == -1 or amax_lof == -1 or amax_ocsvm == -1:
        amax = -1
    else:
        amax = max(amax_iforest, amax_lof, amax_ocsvm)
    plt.subplot(121)
    plt.plot(t[:amax],
             em_iforest[:amax],
             lw=1,
예제 #29
0
plt.show()

#GMM的构造
#Step 1.首先根据经验来分别对男女生的均值、方差和权值进行初始化
mu1 = 170
sigmal = 10
w1 = 0.7  #男生的
mu2 = 160
sigma2 = 10
w2 = 0.3  #以我们学校理工院校为例

d = 1
n = len(h)  # 样本长度
# 开始EM算法的主循环
for iteration in range(100):
    mu1, sigmal, w1, mu2, sigma2, w2 = em.em(h, mu1, sigmal, w1, mu2, sigma2,
                                             w2)

#男生女生以及混合后身高的概率密度曲线
t = np.linspace(120, 220, 550)  #500个
m = stats.norm.pdf(t, loc=mu1, scale=sigmal)  # 男生分布的预测
f = stats.norm.pdf(t, loc=mu2, scale=sigma2)  # 女生分别的预测
mix = w1 * m + w2 * f  #混合后
plt.plot(t, m, color='b')
plt.plot(t, f, color='r')
plt.plot(t, mix, color='k')
#男生女生以及混合后身高的概率密度曲线
plt.title('Probability density curve for boys and girls and mixed height')
#plt.legend([p1,p2,p3],["male","female","mixing"],loc='upper right')
plt.legend(["male", "female", "mixing"], loc='upper right')
plt.xlabel('height/cm')
plt.ylabel('Probability')  #坐标轴设置
예제 #30
0
import em
import matplotlib.pyplot as plt

print('Generating plot for first data set... \t', end='', flush=True)

# generate EM cluster plot for 1st data set
file_path = 'data/iris_flowers.csv'
x_axis = 'PetalLengthCm'
y_axis = 'SepalWidthCm'
n_clusters = 2
em.em(file_path, x_axis, y_axis, n_clusters)

print('first plot generated')
print('Generating plot for second data set... \t', end='', flush=True)

# generate EM cluster plot for 2nd data set
file_path = 'data/winequality-red.csv'
x_axis = 'citric acid'
y_axis = 'volatile acidity'
n_clusters = 2
em.em(file_path, x_axis, y_axis, n_clusters)

print('second plot generated')

# show both plots
plt.show()