def __init__(self, emulated, LEDGrided, LCDed, keyboardhacked, touch): self.qOut= Queue.Queue(maxsize=0) self.qIn = Queue.Queue(maxsize=0) self.lowerBoarder = 7 self.res = 8 self.mapping = {8:7,4:3,2:1,1:0} self.emulated = emulated self.LEDGrided = LEDGrided self.LCDed = LCDed self.keyboardhacked = keyboardhacked self.touch = touch if self.LEDGrided or self.LCDed or self.keyboardhacked: self.i2chandler = i2cHandler.handler(self.qOut, self.LEDGrided, self.LCDed, self.keyboardhacked, self.touch) if self.emulated or self.keyboardhacked: import em print "Emulated" self.qOutEmulated = Queue.Queue(maxsize=0) self.emu = em.em(self.qOutEmulated, self.qIn) if self.LEDGrided or self.keyboardhacked: self.i2chandler.resetPacketCount() if self.keyboardhacked: print "Keyboardhacked" self.clearScreen(5) if self.LEDGrided: self.clearScreen(width*length)
def em_experiment(clf, X_tr, y_tr, X_te, y_te, n_classes, multi_class=False): mlb = MultiLabelBinarizer(classes=range(n_classes)) mlb.fit(np.expand_dims(np.hstack((y_tr, y_te)), 1)) y_tr_bin = mlb.transform(np.expand_dims(y_tr, 1)) y_te_bin = mlb.transform(np.expand_dims(y_te, 1)) train_priors = np.mean(y_tr_bin, 0) test_priors = np.mean(y_te_bin, 0) clf = init_classifiers(clf) print("Fitting", clf) clf.fit(X_tr, y_tr) test_posteriors = clf.predict_proba(X_te) posteriors_test_priors = np.mean(test_posteriors, axis=0) print('train priors', train_priors, sep='\n') print('test priors', test_priors, sep='\n') print('posteriors mean', posteriors_test_priors, sep='\n') print() em_test_posteriors, em_test_priors, history = em(y_te, test_posteriors, train_priors, multi_class=multi_class) measures = get_measures_from_singlehist_measures(history) print('Results') print('prior from: train test post em') for i, (a, b, c, d) in enumerate( zip(train_priors, test_priors, posteriors_test_priors, em_test_priors)): print(f'{i:11d} - {a:3.3f} {b:3.3f} {c:3.3f} {d:3.3f}') return measures
def cluster(X, K, divergence, debug=False): if divergence == 'KL': dist_cls = distributions.KL if np.any(X <= 0): # for MFCCs... X = X - X.min() + 1e-8 X = 5. * X / X.sum(1)[:,nax] elif divergence == 'IS': dist_cls = distributions.ItakuraSaito if np.any(X <= 0): # for MFCCs... X = X - X.min() + 1e-8 # X = X / X.sum(1)[:,nax] elif divergence == 'EU': dist_cls = distributions.SquareDistance else: print 'Wrong divergence' sys.exit(0) assignments, centroids, _ = kmeans.kmeans_best_of_n(X, K, n_trials=10, dist_cls=dist_cls, debug=debug) init_pi = np.ones(K) / K init_obs_distr = centroids tau_em, obs_distr, pi, em_ll_train, _ = em.em(X, centroids, n_iter=10) # tau_hmm, A, obs_distr, pi, ll_train, _ = hmm.em_hmm(X, init_pi, init_obs_distr, n_iter=10) # seq_hmm, _ = hmm.viterbi(X, pi, A, obs_distr) Tracer()() return {'kmeans': assignments, 'EM': np.argmax(tau_em, axis=1), # 'hmm_smoothing': np.argmax(tau_hmm, axis=1), # 'hmm_viterbi': seq_hmm, }
def main(job_id, params): num_runs = 20 obs_length = 100 num_states = 2 num_obs = 2 # readin hmm indx t = 0 try: with open(os.path.join('.', 'hmm_index.txt')) as hmm_index_file: t = int(hmm_index_file.read()) sys.stderr.write("!!!!!!!!!!!!!!!!!!HMM INDEX: " + str(t) + " !!!!!!!!!!!!!!!\n") except IOError: t = 0 # generate HMM observations np.random.seed(0x6b6c26b2) seeds = np.random.randint(0x0fffffff, size=num_runs) np.random.seed(seeds[t]) # random hmm z_mat, t_mat = random_hmm(num_states, num_obs) pi_vec = np.array([1.0 / num_states] * num_states) hmm_test = HMM(z_mat, t_mat, pi_vec) # random obs trajectory obs = hmm_test.generate(obs_length)[np.newaxis, :] # calculate log likelihood for input HMM parameters z_mat_p_input = np.array([[params['z_mat_p_0'][0], params['z_mat_p_1'][0]]]) t_mat_p_input = np.array([[params['t_mat_p_0'][0], params['t_mat_p_1'][0]]]) # pi_vec_input = np.array([params['pi_0'], 1 - params['pi_0']]) hmm_estimate = make_parameterized_HMM(z_mat_p_input, t_mat_p_input, pi_vec) hmm_loglikelihood = hmm_estimate.loglikelihood(obs[0]) # use the current suggest point and run EM to get a new point hmm_em_est, _, _ = em.em(hmm_estimate, hmm_estimate.z_mat, hmm_estimate.t_mat, obs, 30, 0.1) em_est_z_mat, em_est_t_mat = retrieve_parameterized_HMM(hmm_em_est) em_est_ll = -hmm_em_est.loglikelihood(obs[0]) em_est_z_mat.reshape((em_est_z_mat.size,)) em_est_t_mat.reshape((em_est_t_mat.size,)) print em_est_t_mat print em_est_z_mat historical_points = [{'params': {}}] # write z_mat for i, v in enumerate(em_est_z_mat[0]): historical_points[0]['params']['z_mat_p_' + str(i)] = {'values': np.array([v]), 'type': 'float'} # write t_mat for i, v in enumerate(em_est_t_mat[0]): historical_points[0]['params']['t_mat_p_' + str(i)] = {'values': np.array([v]), 'type': 'float'} historical_points[0]['value'] = em_est_ll dump_new_history('.', historical_points) return -hmm_loglikelihood
def em_random_restarts(num_restarts, num_clusters, parameters_df): ll = None clusters = None for i in range(num_restarts): # Initialize cluster statistics new_clusters = [cl.Cluster() for j in range(num_clusters)] cl.initialize_clusters(new_clusters, parameters_df) # Run EM & Get LL Value new_ll = em.em(parameters_df, new_clusters) # Save Best LL Value and its clusters if ll is None or new_ll > ll: ll = new_ll clusters = new_clusters return ll, clusters
def main(): # Load image im = Image.open(image_file).convert('RGB') width, height = im.size # Convenience function to build image band-by-band from array data def image_from_array(dat): bands = [Image.new('L', (width, height)) for n in range(3)] for i in range(3): bands[i].putdata(dat[:, i]) return Image.merge('RGB', bands) # Resize image width, height = int(width / image_rescale), int(height / image_rescale) im = im.resize((width, height)) # Summary image summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60), (255, 255, 255)) draw = ImageDraw.Draw(summary) draw.text((5, height + 10), 'Original', fill=(0, 0, 0)) draw.text((width + 25, height + 10), 'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov), fill=(0, 0, 0)) draw.text((5, 2 * height + 40), 'Blocked Gamma', fill=(0, 0, 0)) draw.text((width + 25, 2 * height + 40), 'Dists', fill=(0, 0, 0)) del draw summary.paste(im, (10, 10)) # Flatten to emissions real_emissions = list(im.getdata()) num_data = len(real_emissions) real_emissions = np.array(real_emissions) # Block emissions width_blocks = np.array_split(np.arange(width), block_splits) height_blocks = np.array_split(np.arange(height), block_splits) idx = np.arange(num_data) idx.resize((height, width)) blocks = [] for hb in height_blocks: for wb in width_blocks: block = [idx[h, w] for h in hb for w in wb] blocks.append(np.array(block)) # Generate noise v, c = noise_var, noise_cov cov = [[v, c, c], [c, v, c], [c, c, v]] noise = np.random.multivariate_normal([0, 0, 0], cov, width * height) noisy_emissions = real_emissions + noise # Generate noisy image noisy = image_from_array(noisy_emissions) summary.paste(noisy, (30 + width, 10)) # Use K-means to initialize components results = kmeans(noisy_emissions, num_comps) init_gamma = results['best'] means = results['means'] # Analyze color space if do_colormap: col = {'R': 0, 'G': 1, 'B': 2} plt.figure() for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'), (real_emissions, 'R', 'B'), (real_emissions, 'G', 'B'), (noisy_emissions, 'R', 'G'), (noisy_emissions, 'R', 'B'), (noisy_emissions, 'G', 'B')]): plt.subplot(2, 3, i + 1) plt.hexbin(d[:, col[c1]], d[:, col[c2]], gridsize=30, extent=(0, 255, 0, 255)) plt.plot(means[:, col[c1]], means[:, col[c2]], '.k') plt.xlabel(c1) plt.ylabel(c2) plt.axis([-20, 275, -20, 275]) plt.savefig('image_test_color_colormap.png') plt.show() # Do EM results = em(noisy_emissions, [MultivariateNormal() for n in range(num_comps)], count_restart=count_restart, blocks=blocks, max_reps=100, init_gamma=init_gamma, trace=True, pi_max=pi_max) dists = results['dists'] dists_trace = results['dists_trace'] pi = results['pi'] print 'Iterations: %(reps)d' % results gamma = np.transpose(results['gamma']) means = np.array([d.mean() for d in dists]) covs = np.array([d.cov() for d in dists]) # Reconstruct with blocked gamma rec_blocked_gamma = np.array( [np.average(means, weights=g, axis=0) for g in gamma]) im_blocked_gamma = image_from_array(rec_blocked_gamma) summary.paste(im_blocked_gamma, (10, 40 + height)) # Reconstruct from distributions alone pi_opt = pi_maximize(noisy_emissions, dists) phi = np.empty((num_data, num_comps)) for c in range(num_comps): phi[:, c] = dists[c].density(noisy_emissions) phi = np.matrix(phi) for i, pi in enumerate(pi_opt): phi[:, i] *= pi gamma_dists = phi / np.sum(phi, axis=1) rec_dists = np.array(np.dot(gamma_dists, means)) im_dists = image_from_array(rec_dists) summary.paste(im_dists, (30 + width, 40 + height)) # Show summary image if show_summary: summary.show() summary.save('image_test_color_reconstruction.png') # Compare RMSE between reconstructions def rmse(x): return np.sqrt(np.mean((x - real_emissions)**2)) print 'Raw MSE: %.1f' % rmse(noisy_emissions) print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma) print 'Dists MSE: %.1f' % rmse(rec_dists) # Visualize variance components if do_variance_viz: temp_files = [] col = {'R': 0, 'G': 1, 'B': 2} fig = plt.figure() for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'), (real_emissions, 'R', 'B'), (real_emissions, 'G', 'B'), (noisy_emissions, 'R', 'G'), (noisy_emissions, 'R', 'B'), (noisy_emissions, 'G', 'B')]): ax = fig.add_subplot(2, 3, i + 1) plt.hexbin(d[:, col[c1]], d[:, col[c2]], gridsize=30, extent=(0, 255, 0, 255)) plt.xlabel(c1) plt.ylabel(c2) plt.axis([-20, 275, -20, 275]) for idx, dists in enumerate(dists_trace): ells = [] for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'), (real_emissions, 'R', 'B'), (real_emissions, 'G', 'B'), (noisy_emissions, 'R', 'G'), (noisy_emissions, 'R', 'B'), (noisy_emissions, 'G', 'B')]): for dist in dists: m, c = dist.mean(), dist.cov() cm = (c[[col[c1], col[c2]]])[:, [col[c1], col[c2]]] e, v = la.eigh(cm) ell = Ellipse(xy=[m[col[c1]], m[col[c2]]], width=np.sqrt(e[0]), height=np.sqrt(e[1]), angle=(180.0 / np.pi) * np.arccos(v[0, 0])) ells.append(ell) ax = fig.add_subplot(2, 3, i + 1) ax.add_artist(ell) ell.set_clip_box(ax.bbox) ell.set_alpha(0.9) ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0)) file_name = 'tmp_%03d.png' % idx temp_files.append(file_name) plt.savefig(file_name, dpi=100) for ell in ells: ell.remove() command = ('mencoder', 'mf://tmp_*.png', '-mf', 'type=png:w=800:h=600:fps=5', '-ovc', 'lavc', '-lavcopts', 'vcodec=mpeg4', '-oac', 'copy', '-o', 'image_test_color_components.avi') os.spawnvp(os.P_WAIT, 'mencoder', command) for temp_file in temp_files: os.unlink(temp_file) # Find common variance components print 'True noise:' print cov chols = [la.cholesky(c) for c in covs] chol_recon = np.zeros((3, 3)) for i in range(3): for j in range(3): if j > i: continue chol_recon[i, j] = np.Inf for chol in chols: if abs(chol[i, j]) < abs(chol_recon[i, j]): chol_recon[i, j] = chol[i, j] cov_recon = np.dot(chol_recon, np.transpose(chol_recon)) print 'Reconstructed noise:' print cov_recon
print "ACCURACIES: " print "Histogram: ", mean(histogram_accuracies) print "Histogram Sensitivity", mean(histogram_sensitivities) print "Histogram Specificity", mean(histogram_specificities) print "Logreg: ", mean(logreg_accuracies) print "Logreg Sensitivity", mean(logreg_sensitivities) print "Logreg Specificity", mean(logreg_specificities) print "Gaussian Means: ", mean(gaussian_mean_accuracies) print "Gaussian Distinct: ", mean(gaussian_distinct_accuracies) print "Random: ", len(samples_healthy) / float(len(samples_healthy) + len(samples_unhealthy)) samples_unhealthy = list(samples_unhealthy) samples_unhealthy.extend(samples_s) samples_unhealthy = array(samples_unhealthy) sigma, mu, pi = em(samples_unhealthy, 2, 1000) results = test_em_for_2(sigma, mu, pi, samples_murmur, samples_s) print results """ sigma, mu, pi = em(samples_unhealthy, 13, 1000) labeled_diseases = [] results = test_em_for_13(sigma, mu, pi, labeled_diseases) print results """
# Initialize summary image summary = Image.new('L', (28 * num_components + 65, 28 * len(num_blocks)), 255) # Do inference for varying numbers of blocks idxs = np.argsort(map(np.sum, emissions)) reps = [] for block_i, num_block in enumerate(num_blocks): # Block data blocks = np.array_split(idxs, num_block) # Run EM results = em(emissions, [ Product([Bernoulli() for i in range(28 * 28)]) for n in range(num_components) ], count_restart=3.0, blocks=blocks, gamma_seed=137, init_gamma=(init_to_labels and labels or None)) dists = results['dists'] print 'Reps: %d' % results['reps'] reps.append(results['reps']) # Produce summary image offset = 0 im = Image.new('L', (28 * len(dists), 28)) for d in results['dists']: digit = Image.new('L', (28, 28)) digit.putdata(np.array(d.mean()) * 255) im.paste(digit, (offset, 0)) offset += 28
for wb in width_blocks: block = [idx[h, w] for h in hb for w in wb] blocks.append(np.array(block)) # Generate noise noise = np.random.normal(0, noise_sd, width * height) noisy_emissions = real_emissions + noise # Generate noisy image noisy = Image.new('L', (width, height)) noisy.putdata(noisy_emissions) summary.paste(noisy, (30 + width, 10)) # Do EM results = em(noisy_emissions, [NormalFixedMean(m, max_sigma=max_sigma) for m in range(256)], count_restart=count_restart, blocks=blocks) dists = results['dists'] pi = results['pi'] print 'Iterations: %(reps)d' % results gamma = np.transpose(results['gamma']) means = np.array([d.mean() for d in dists]) sds = np.array([d.sd() for d in dists]) # Display summary figures display_densities(real_emissions, dists) # Reconstruct with argmax im_argmax = Image.new('L', (width, height)) reconstruct_argmax = means[np.argmax(gamma, axis=1)]
Tracer()() return seq, A, obs_distr, ll_test, monitor_vals if __name__ == '__main__': X = np.loadtxt('EMGaussian.data') Xtest = np.loadtxt('EMGaussian.test') K = 4 # Run simple EM (no HMM) iterations = 40 assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5) new_centers = [distributions.Gaussian(c.mean, np.eye(2)) \ for c in centers] tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = \ em.em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest) # example with fixed parameters A = 1. / 6 * np.ones((K, K)) A[np.diag(np.ones(K)) == 1] = 0.5 lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr) log_p = smoothing(lalpha, lbeta) p = np.exp(log_p) def plot_traj(p): plt.figure() ind = np.arange(100) for k in range(K): plt.subplot(K, 1, k + 1) plt.bar(ind, p[:100, k])
plt.imshow(data) plt.show() # Initialize with K-means if init == 'kmeans': init_gamma = kmeans(data.reshape((dim*dim,1)), 2)['best'] # Do (potentially adaptive) blocked EM, depending on strategy for block_strategy in block_strategies: # Only 'perfect' strategy uses the true states blocks = block(data, block_strategy, true = x) # Do EM results = em(data.reshape((dim*dim,)), model, count_restart = count_restart, blocks = blocks, init_gamma = init_gamma, pi_max = pi_max) print 'Iterations: %d (%s)' % (results['reps'], block_strategy) dists = results['dists'] pi = results['pi'] # Display results if show_each: for p, d in zip(np.transpose(pi), dists): print '%s: %s' % (p, d.display()) print if graphics: display_densities(data.reshape((dim*dim,)), dists) display_hist(data.reshape((dim*dim,)), dists)
t = time.time() assignments, centroids, dists = \ kmeans.kmeans_best_of_n(X, K, n_trials=4, dist_cls=distributions.KL) print 'K-means: {}s'.format(time.time() - t) results[algos.kmeans] = { 'seq': assignments, 'centroids': centroids, } seqs[algos.kmeans] = assignments # EM if options.init == 'em' or algos.em in algs: iterations = 10 t = time.time() tau_em, obs_distr, pi, em_ll_train, _ = em.em(X, centroids, n_iter=options.n_iter) print 'EM: {}s'.format(time.time() - t) results[algos.em] = { 'seq': np.argmax(tau_em, axis=1), 'obs_distr': obs_distr, 'll_train': em_ll_train, 'tau': tau_em, } seqs[algos.em] = np.argmax(tau_em, axis=1) # initialization if options.init == 'em': # initialize with EM init_pi = pi init_obs_distr = obs_distr
if init == 'true': init_gamma = data_comp data_mu = mu[data_comp] data = np.random.normal(data_mu, 1) blocks = np.array_split(np.arange(n), num_blocks) # Initialize with K-means if init == 'kmeans': init_gamma = kmeans(data.reshape((n,1)), 2)['best'] # Do EM results = em(data, model, count_restart = count_restart, blocks = blocks, init_gamma = init_gamma, init_reps = em_steps, max_reps = em_steps, pi_max = pi_max, trace = True) if show_each: print 'Iterations: %(reps)d' % results dists, dists_trace = results['dists'], results['dists_trace'] pi, pi_trace = results['pi'], results['pi_trace'] # Display results if show_each: for p, d in zip(np.transpose(pi), dists): print '%s: %s' % (p, d.display()) print if graphics:
def main(): # Load image im = Image.open(image_file).convert('RGB') width, height = im.size # Convenience function to build image band-by-band from array data def image_from_array(dat): bands = [Image.new('L', (width, height)) for n in range(3)] for i in range(3): bands[i].putdata(dat[:,i]) return Image.merge('RGB', bands) # Resize image width, height = int(width / image_rescale), int(height / image_rescale) im = im.resize((width, height)) # Summary image summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60), (255, 255, 255)) draw = ImageDraw.Draw(summary) draw.text((5, height + 10), 'Original', fill = (0, 0, 0)) draw.text((width + 25, height + 10), 'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov), fill = (0, 0, 0)) draw.text((5, 2 * height + 40), 'Blocked Gamma', fill = (0, 0, 0)) draw.text((width + 25, 2 * height + 40), 'Dists', fill = (0, 0, 0)) del draw summary.paste(im, (10, 10)) # Flatten to emissions real_emissions = list(im.getdata()) num_data = len(real_emissions) real_emissions = np.array(real_emissions) # Block emissions width_blocks = np.array_split(np.arange(width), block_splits) height_blocks = np.array_split(np.arange(height), block_splits) idx = np.arange(num_data) idx.resize((height, width)) blocks = [] for hb in height_blocks: for wb in width_blocks: block = [idx[h, w] for h in hb for w in wb] blocks.append(np.array(block)) # Generate noise v, c = noise_var, noise_cov cov = [[v, c, c], [c, v, c], [c, c, v]] noise = np.random.multivariate_normal([0, 0, 0], cov, width * height) noisy_emissions = real_emissions + noise # Generate noisy image noisy = image_from_array(noisy_emissions) summary.paste(noisy, (30 + width, 10)) # Use K-means to initialize components results = kmeans(noisy_emissions, num_comps) init_gamma = results['best'] means = results['means'] # Analyze color space if do_colormap: col = { 'R': 0, 'G': 1, 'B': 2 } plt.figure() for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'), (real_emissions, 'R', 'B'), (real_emissions, 'G', 'B'), (noisy_emissions, 'R', 'G'), (noisy_emissions, 'R', 'B'), (noisy_emissions, 'G', 'B')]): plt.subplot(2, 3, i+1) plt.hexbin(d[:,col[c1]], d[:,col[c2]], gridsize=30, extent = (0, 255, 0, 255)) plt.plot(means[:,col[c1]], means[:,col[c2]], '.k') plt.xlabel(c1) plt.ylabel(c2) plt.axis([-20, 275, -20, 275]) plt.savefig('image_test_color_colormap.png') plt.show() # Do EM results = em(noisy_emissions, [MultivariateNormal() for n in range(num_comps)], count_restart = count_restart, blocks = blocks, max_reps = 100, init_gamma = init_gamma, trace = True, pi_max = pi_max) dists = results['dists'] dists_trace = results['dists_trace'] pi = results['pi'] print 'Iterations: %(reps)d' % results gamma = np.transpose(results['gamma']) means = np.array([d.mean() for d in dists]) covs = np.array([d.cov() for d in dists]) # Reconstruct with blocked gamma rec_blocked_gamma = np.array([np.average(means, weights=g, axis=0) for g in gamma]) im_blocked_gamma = image_from_array(rec_blocked_gamma) summary.paste(im_blocked_gamma, (10, 40 + height)) # Reconstruct from distributions alone pi_opt = pi_maximize(noisy_emissions, dists) phi = np.empty((num_data, num_comps)) for c in range(num_comps): phi[:,c] = dists[c].density(noisy_emissions) phi = np.matrix(phi) for i, pi in enumerate(pi_opt): phi[:,i] *= pi gamma_dists = phi / np.sum(phi, axis = 1) rec_dists = np.array(np.dot(gamma_dists, means)) im_dists = image_from_array(rec_dists) summary.paste(im_dists, (30 + width, 40 + height)) # Show summary image if show_summary: summary.show() summary.save('image_test_color_reconstruction.png') # Compare RMSE between reconstructions def rmse(x): return np.sqrt(np.mean((x - real_emissions) ** 2)) print 'Raw MSE: %.1f' % rmse(noisy_emissions) print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma) print 'Dists MSE: %.1f' % rmse(rec_dists) # Visualize variance components if do_variance_viz: temp_files = [] col = { 'R': 0, 'G': 1, 'B': 2 } fig = plt.figure() for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'), (real_emissions, 'R', 'B'), (real_emissions, 'G', 'B'), (noisy_emissions, 'R', 'G'), (noisy_emissions, 'R', 'B'), (noisy_emissions, 'G', 'B')]): ax = fig.add_subplot(2, 3, i+1) plt.hexbin(d[:,col[c1]], d[:,col[c2]], gridsize=30, extent = (0, 255, 0, 255)) plt.xlabel(c1) plt.ylabel(c2) plt.axis([-20, 275, -20, 275]) for idx, dists in enumerate(dists_trace): ells = [] for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'), (real_emissions, 'R', 'B'), (real_emissions, 'G', 'B'), (noisy_emissions, 'R', 'G'), (noisy_emissions, 'R', 'B'), (noisy_emissions, 'G', 'B')]): for dist in dists: m, c = dist.mean(), dist.cov() cm = (c[[col[c1], col[c2]]])[:,[col[c1], col[c2]]] e, v = la.eigh(cm) ell = Ellipse(xy = [m[col[c1]], m[col[c2]]], width = np.sqrt(e[0]), height = np.sqrt(e[1]), angle = (180.0 / np.pi) * np.arccos(v[0,0])) ells.append(ell) ax = fig.add_subplot(2, 3, i+1) ax.add_artist(ell) ell.set_clip_box(ax.bbox) ell.set_alpha(0.9) ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0)) file_name = 'tmp_%03d.png' % idx temp_files.append(file_name) plt.savefig(file_name, dpi = 100) for ell in ells: ell.remove() command = ('mencoder', 'mf://tmp_*.png', '-mf', 'type=png:w=800:h=600:fps=5', '-ovc', 'lavc', '-lavcopts', 'vcodec=mpeg4', '-oac', 'copy', '-o', 'image_test_color_components.avi') os.spawnvp(os.P_WAIT, 'mencoder', command) for temp_file in temp_files: os.unlink(temp_file) # Find common variance components print 'True noise:' print cov chols = [la.cholesky(c) for c in covs] chol_recon = np.zeros((3,3)) for i in range(3): for j in range(3): if j > i: continue chol_recon[i,j] = np.Inf for chol in chols: if abs(chol[i,j]) < abs(chol_recon[i,j]): chol_recon[i,j] = chol[i,j] cov_recon = np.dot(chol_recon, np.transpose(chol_recon)) print 'Reconstructed noise:' print cov_recon
import numpy as np import em observations = np.array([[1, 0, 0, 0, 1, 1, 0, 1, 0, 1], [1, 1, 1, 1, 0, 1, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, 1, 0, 1, 1], [1, 0, 1, 0, 0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 0, 1, 1, 1, 0, 1]]) print em.em(observations, [0.6, 0.5]) print em.em(observations, [0.5, 0.6]) print em.em(observations, [0.3, 0.3]) print em.em(observations, [0.9999, 0.00000001])
def main(): tol = 0.00001 max_iter = 100 alpha = 2 beta = 1 """SYN dataset""" tag_syn = os.path.join("data", "SYN_data", "SYN_r") num_trials = 10 plot_mv = [] plot_bp = [] plot_em = [] for value in range( 1, 20, 2 ): # r=1,3,5,6,7,9,11,13,15,17,19, means number of tasks assigned to each worker avg_error_mv = 0 avg_error_bp = 0 avg_error_em = 0 time_mv = 0 time_bp = 0 time_em = 0 for i in range(num_trials): # trial=1~10 data = io.loadmat(tag_syn + "{}_{}.mat".format(value, i + 1))["data"][0][0] true_labels = io.loadmat( tag_syn + "{}_{}_true.mat".format(value, i + 1))["true_labels"][0] """MV""" start = time.time() est_mv = mv(data) error_mv = np.average(est_mv != true_labels) avg_error_mv += error_mv end = time.time() time_mv += end - start """""" """BP""" start = time.time() est_bp = bp(data, max_iter, tol, alpha, beta) error_bp = np.average(est_bp != true_labels) avg_error_bp += error_bp end = time.time() time_bp += end - start """""" """EM""" start = time.time() est_em = em(data, max_iter, tol, alpha, beta) error_em = np.average(est_em != true_labels) avg_error_em += error_em end = time.time() time_em += end - start """""" avg_error_mv = avg_error_mv / num_trials print( "In SYN dataset using MV, when r={}, error rate is {}, time is {} secs" .format(value, avg_error_mv, time_mv)) avg_error_bp = avg_error_bp / num_trials print( "In SYN dataset using BP, when r={}, error rate is {}, time is {} secs" .format(value, avg_error_bp, time_bp)) avg_error_em = avg_error_em / num_trials print( "In SYN dataset using EM, when r={}, error rate is {}, time is {} secs" .format(value, avg_error_em, time_em)) plot_mv.append(avg_error_mv) plot_bp.append(avg_error_bp) plot_em.append(avg_error_em) plt.figure(figsize=(10, 8)) plt.title("SYN dataset") plt.xlabel("r", ) plt.ylabel("error") plt.xlim(0, 20) plt.ylim(0, 1) plt.plot([i for i in range(1, 20, 2)], plot_mv, label="MV") plt.plot([i for i in range(1, 20, 2)], plot_bp, label="BP") plt.plot([i for i in range(1, 20, 2)], plot_em, label="EM") plt.legend(loc="upper left") plt.show() """""" """SIM dataset""" tag_sim = os.path.join("data", "SIM_data", "SIM_l") num_trials = 10 plot_mv = [] plot_bp = [] plot_em = [] for value in [ 1, 5, 10, 15, 20, 25 ]: # l=1,5,10,15,20,25, means number of workers assigned to each task avg_error_mv = 0 avg_error_bp = 0 avg_error_em = 0 time_mv = 0 time_bp = 0 time_em = 0 for i in range(num_trials): # trial=1~10 data = io.loadmat(tag_sim + "{}_{}.mat".format(value, i + 1))["data"][0][0] true_labels = io.loadmat( tag_sim + "{}_{}_true.mat".format(value, i + 1))["true_labels"][0] """MV""" start = time.time() est_mv = mv(data) error_mv = np.average(est_mv != true_labels) avg_error_mv += error_mv end = time.time() time_mv += end - start """""" """BP""" start = time.time() est_bp = bp(data, max_iter, tol, alpha, beta) error_bp = np.average(est_bp != true_labels) avg_error_bp += error_bp end = time.time() time_bp += end - start """""" """EM""" start = time.time() est_em = em(data, max_iter, tol, alpha, beta) error_em = np.average(est_em != true_labels) avg_error_em += error_em end = time.time() time_em += end - start """""" avg_error_mv = avg_error_mv / num_trials print( "In SIM dataset using MV, when l={}, error rate is {}, time is {} secs" .format(value, avg_error_mv, time_mv)) avg_error_bp = avg_error_bp / num_trials print( "In SIM dataset using BP, when l={}, error rate is {}, time is {} secs" .format(value, avg_error_bp, time_bp)) avg_error_em = avg_error_em / num_trials print( "In SIM dataset using EM, when l={}, error rate is {}, time is {} secs" .format(value, avg_error_em, time_em)) plot_mv.append(avg_error_mv) plot_bp.append(avg_error_bp) plot_em.append(avg_error_em) plt.figure(figsize=(10, 8)) plt.title("SIM dataset") plt.xlabel("l") plt.ylabel("error") plt.xlim(0, 30) plt.ylim(0, 1) plt.plot([1, 5, 10, 15, 20, 25], plot_mv, label="MV") plt.plot([1, 5, 10, 15, 20, 25], plot_bp, label="BP") plt.plot([1, 5, 10, 15, 20, 25], plot_em, label="EM") plt.legend(loc="upper left") plt.show() """""" """TEMP dataset""" data = io.loadmat("data/other_data/TEMP_data.mat")["data"][0][0] true_labels = io.loadmat( "data/other_data/TEMP_data_true.mat")["true_labels"][0] """MV""" start = time.time() est_mv = mv(data) error_mv = np.average(est_mv != true_labels) end = time.time() time_mv = end - start """""" """BP""" start = time.time() est_bp = bp(data, max_iter, tol, alpha, beta) error_bp = np.average(est_bp != true_labels) end = time.time() time_bp = end - start """""" """EM""" start = time.time() est_em = em(data, max_iter, tol, alpha, beta) error_em = np.average(est_em != true_labels) end = time.time() time_em = end - start """""" print("In TEMP dataset using MV, error rate is {}, time is {} secs".format( error_mv, time_mv)) print("In TEMP dataset using BP, error rate is {}, time is {} secs".format( error_bp, time_bp)) print("In TEMP dataset using EM, error rate is {}, time is {} secs".format( error_em, time_em)) """"""
from utils import ExpenseManagerUtils import hashlib import connection import os from nocache import nocache import em app = Flask(__name__) app.secret_key = "highlysecret" app.config['TEMPLATES_AUTO_RELOAD'] = True emutils = ExpenseManagerUtils.UtilsLib() conn = connection.connection() emapi = em.em() @app.route('/') def reroute(): return redirect('/login', code=302) @app.route('/login') def login(): email = request.cookies.get('loggedin') if email and request.cookies.get('security_verify:'+email) == emutils.hash_of_hashpass(email): return redirect('/home', code=302) else: form = LoginForm.LoginForm() return render_template('login.html', form=form) @app.route('/register', methods = ['POST'])
def em_experiment(ax, clf, X_tr, y_tr, X_te, y_te, y_min=0, y_max=1.0): mlb = MultiLabelBinarizer() mlb.fit(np.expand_dims(np.hstack((y_tr, y_te)), 1)) y_tr_bin = mlb.transform(np.expand_dims(y_tr, 1)) y_te_bin = mlb.transform(np.expand_dims(y_te, 1)) train_priors = np.mean(y_tr_bin, 0) test_priors = np.mean(y_te_bin, 0) print("Fitting", clf) clf.fit(X_tr, y_tr) test_posteriors = clf.predict_proba(X_te) posteriors_test_priors = np.mean(test_posteriors, axis=0) print('train priors', train_priors, sep='\n') print('test priors', test_priors, sep='\n') print('posteriors mean', posteriors_test_priors, sep='\n') print() em_test_posteriors, em_test_priors, history = em(y_te, test_posteriors, train_priors) em_prior = [p[1] for _, p, _, _, _, _ in history] accuracy = [a for _, _, _, a, _, _ in history] f1 = [ 2 * p * r / (p + r) if p + r > 0 else 0 for _, _, _, _, p, r in history ] ax.set_ylim([y_min, y_max]) ax.plot(range(len(accuracy)), accuracy, linestyle='-.', color='m', label='accuracy') ax.plot(range(len(f1)), f1, linestyle='--', color='#dd9f00', label='f1') ax.plot(range(len(em_prior)), em_prior, color='b', label='em pr') ax.hlines([train_priors[1]], 0, len(em_prior) - 1, colors=['r'], linestyles=[':'], label='train pr') ax.hlines([posteriors_test_priors[1]], 0, len(em_prior) - 1, colors=['#b5651d'], linestyles=['-.'], label='clf pr') ax.hlines([test_priors[1]], 0, len(em_prior) - 1, colors=['g'], linestyles=['--'], label='test pr') ax.set() ax.grid() print('Results') print('prior from: train test post em') for i, (a, b, c, d) in enumerate( zip(train_priors, test_priors, posteriors_test_priors, em_test_priors)): print(f'{i:11d} - {a:3.3f} {b:3.3f} {c:3.3f} {d:3.3f}') return posteriors_test_priors[1], em_test_priors[1], accuracy[0], accuracy[ -1], f1[0], f1[-1]
plt.imshow(data) plt.show() # Initialize with K-means if init == 'kmeans': init_gamma = kmeans(data.reshape((dim * dim, 1)), 2)['best'] # Do (potentially adaptive) blocked EM, depending on strategy for block_strategy in block_strategies: # Only 'perfect' strategy uses the true states blocks = block(data, block_strategy, true=x) # Do EM results = em(data.reshape((dim * dim, )), model, count_restart=count_restart, blocks=blocks, init_gamma=init_gamma, pi_max=pi_max) print 'Iterations: %d (%s)' % (results['reps'], block_strategy) dists = results['dists'] pi = results['pi'] # Display results if show_each: for p, d in zip(np.transpose(pi), dists): print '%s: %s' % (p, d.display()) print if graphics: display_densities(data.reshape((dim * dim, )), dists) display_hist(data.reshape((dim * dim, )), dists)
if init == 'true': init_gamma = data_comp data_mu = mu[data_comp] data = np.random.normal(data_mu, 1) blocks = np.array_split(np.arange(n), num_blocks) # Initialize with K-means if init == 'kmeans': init_gamma = kmeans(data.reshape((n, 1)), 2)['best'] # Do EM results = em(data, model, count_restart=count_restart, blocks=blocks, init_gamma=init_gamma, init_reps=em_steps, max_reps=em_steps, pi_max=pi_max, trace=True) if show_each: print 'Iterations: %(reps)d' % results dists, dists_trace = results['dists'], results['dists_trace'] pi, pi_trace = results['pi'], results['pi_trace'] # Display results if show_each: for p, d in zip(np.transpose(pi), dists): print '%s: %s' % (p, d.display()) print if graphics:
mat = spio.loadmat("HMMdata.mat") X = mat['X'] print(X.shape) # X = np.loadtxt('EMGaussian.data') # Xtest = np.loadtxt('EMGaussian.test') Xtest = X K = 3 # Run simple EM (no HMM) iterations = 40 assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5) new_centers = [distributions.Gaussian(c.mean, np.eye(2)) for c in centers] tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = em.em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest) # example with fixed parameters A = 1. / 6 * np.ones((K, K)) A[np.diag(np.ones(K)) == 1] = 0.5 lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr) log_p = smoothing(lalpha, lbeta) p = np.exp(log_p) # def plot_traj(p): # plt.figure() # ind = np.arange(100) # for k in range(K):
import numpy as np from plot_hist import plot_hist from em import em # Valeurs initiales pour 2 populations pi0_2 = np.array([1. / 4, 3. / 4]) mu0_2 = np.array([.57, .67]) s20_2 = np.array([1. / 10000, 1. / 10000]) # Valeurs initiales pour 3 populations pi0_3 = np.array([1. / 3, 1 / 3, 1. / 3]) mu0_3 = np.array([.57, .6, .67]) s20_3 = np.array([1. / 10000, 1. / 10000, 1. / 10000]) # Loi empirique plot_hist('crabe.txt') # 2 populations pi, mu, s2 = em(pi0_2, mu0_2, s20_2) plot_hist('crabe.txt', mu, s2, pi) # 3 populations pi, mu, s2 = em(pi0_3, mu0_3, s20_3) plot_hist('crabe.txt', mu, s2, pi)
#programmed entirely by Paul An import clusterparser import kmeans import em #for testing and visualization def writeKAssignments(assignments): f = open('test.csv', 'w') for assignment in assignments: f.write(str(assignment) + '\n') f.close() def writeEMAssignments(assignments): f = open('test.csv', 'w') for assignment in assignments: f.write(str(assignment.getAssignment()) + '\n') f.close() points = clusterparser.parsedatafile("clusters.txt") centroids, kmassignments = kmeans.kmeans(points, 3) print(centroids) gausses, emassignments = em.em(points, 3) for gauss in gausses: print(gauss.mean) print('done')
# Initialize summary image summary = Image.new('L', (28 * num_components + 65, 28 * len(num_blocks)), 255) # Do inference for varying numbers of blocks idxs = np.argsort(map(np.sum, emissions)) reps = [] for block_i, num_block in enumerate(num_blocks): # Block data blocks = np.array_split(idxs, num_block) # Run EM results = em(emissions, [Product([Bernoulli() for i in range(28 * 28)]) for n in range(num_components)], count_restart = 3.0, blocks = blocks, gamma_seed = 137, init_gamma = (init_to_labels and labels or None)) dists = results['dists'] print 'Reps: %d' % results['reps'] reps.append(results['reps']) # Produce summary image offset = 0 im = Image.new('L', (28 * len(dists), 28)) for d in results['dists']: digit = Image.new('L', (28, 28)) digit.putdata(np.array(d.mean()) * 255) im.paste(digit, (offset, 0)) offset += 28
iforest.fit(X_train) s_X_iforest = iforest.decision_function(X_test) print('LocalOutlierFactor processing...') lof = LocalOutlierFactor(n_neighbors=20) lof.fit(X_train) s_X_lof = lof.decision_function(X_test) print('OneClassSVM processing...') ocsvm = OneClassSVM() ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)]) s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0] s_unif_iforest = iforest.decision_function(unif) s_unif_lof = lof.decision_function(unif) s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0] plt.subplot(121) auc_iforest, em_iforest, amax_iforest = em(t, t_max, volume_support, s_unif_iforest, s_X_iforest, n_generated) auc_lof, em_lof, amax_lof = em(t, t_max, volume_support, s_unif_lof, s_X_lof, n_generated) auc_ocsvm, em_ocsvm, amax_ocsvm = em(t, t_max, volume_support, s_unif_ocsvm, s_X_ocsvm, n_generated) if amax_iforest == -1 or amax_lof == -1 or amax_ocsvm == -1: amax = -1 else: amax = max(amax_iforest, amax_lof, amax_ocsvm) plt.subplot(121) plt.plot(t[:amax], em_iforest[:amax], lw=1, label='%s (em_score = %0.3e)'
for wb in width_blocks: block = [idx[h, w] for h in hb for w in wb] blocks.append(np.array(block)) # Generate noise noise = np.random.normal(0, noise_sd, width * height) noisy_emissions = real_emissions + noise # Generate noisy image noisy = Image.new('L', (width, height)) noisy.putdata(noisy_emissions) summary.paste(noisy, (30 + width, 10)) # Do EM results = em(noisy_emissions, [NormalFixedMean(m, max_sigma = max_sigma) for m in range(256)], count_restart = count_restart, blocks = blocks) dists = results['dists'] pi = results['pi'] print 'Iterations: %(reps)d' % results gamma = np.transpose(results['gamma']) means = np.array([d.mean() for d in dists]) sds = np.array([d.sd() for d in dists]) # Display summary figures display_densities(real_emissions, dists) # Reconstruct with argmax im_argmax = Image.new('L', (width, height)) reconstruct_argmax = means[np.argmax(gamma, axis=1)]
def main(): run_data = {} run_id = 0 scale = 0.5 emissions_normal = { 1: Normal(0, 2.0 * scale), 2: Normal(3.5, 3.0 * scale), 3: Normal(6.5, 1.0 * scale) } emissions_laplace = { 1: Laplace(0, 2.0 * scale), 2: Laplace(3.5, 3.0 * scale), 3: Laplace(6.5, 1.0 * scale) } emission_spec = emissions_normal dists = [Normal(max_sigma = 6.0) for n in range(3)] num_state_reps = 50 num_emission_reps = 4 num_gamma_init_reps = 4 num_blocks = [1, 2, 5, 10, 20, 50] verbose = False graphics_on = False total_work = (num_state_reps * num_emission_reps * 2 * num_gamma_init_reps * len(num_blocks)) work = 0 for state_rep in range(num_state_reps): print 'State repetition %d' % state_rep # Generate HMM states while True: model = HMM([('Start', (1,), (1.0,)), (1, (1,2,3), (0.98, 0.02, 0.0)), (2, (1,2,3), (0.02, 0.95, 0.03)), (3, (1,2,3,'End'), (0.03, 0.03, 0.93, 0.01))], emission_spec) model.simulate() num_data = len(model.state_vec) if num_data < 5000 and num_data > 100: break counts = {} for state in model.state_vec: if not state in counts: counts[state] = 0 counts[state] += 1 if verbose: print 'Counts: %s' % str(counts) # Generate shuffled indices for repeatable shuffling shuffling = np.arange(num_data) np.random.shuffle(shuffling) for emission_rep in range(num_emission_reps): if verbose: print 'Emission repetition %d' % emission_rep model.emit() for shuffled in [False, True]: if verbose: print 'Shuffling HMM run: %s' % str(shuffled) states = np.array(model.state_vec) emissions = np.array(model.emission_vec) if shuffled: states = states[shuffling] emissions = emissions[shuffling] for num_block in num_blocks: if verbose: print 'Blocks: %d' % num_block blocks = np.array_split(np.arange(num_data), num_block) for gamma_rep in range(num_gamma_init_reps): if verbose: print 'Initial gamma seed: %d' % gamma_rep init_gamma = np.array(states) - 1 run_id += 1 this_run = {} this_run['num data'] = num_data this_run['state rep'] = state_rep this_run['emission rep'] = emission_rep this_run['shuffled'] = shuffled this_run['blocks'] = num_block this_run['gamma init rep'] = gamma_rep start_time = time.clock() results = em(emissions, dists, blocks = blocks, gamma_seed = gamma_rep, init_gamma = init_gamma, count_restart = 0.0) pi = results['pi'] dists = results['dists'] reps = results['reps'] conv = results['converged'] run_time = time.clock() - start_time this_run['run time'] = run_time this_run['reps'] = reps conv_status = conv and 'converged' or 'not converged' this_run['convergence'] = conv_status print 'Reps: %d (%s)' % (reps, conv_status) print 'Time elapsed: %.2f' % run_time if verbose: print_mixture(pi, dists) if graphics_on: display_densities(emissions, dists) display_hist(emissions, dists) act = emission_spec.values() this_run['err mean max'] = max_error_mean(dists, act) this_run['err mean mean'] = mean_error_mean(dists, act) like = np.zeros(num_data) pi_overall = np.mean(pi, 0) for p, dist in zip(pi_overall, dists): like += p * dist.density(states) this_run['log likelihood'] = np.sum(np.log(like)) like = np.zeros(num_data) for i, block in enumerate(blocks): for p, dist in zip(pi[i], dists): comp = p * dist.density(states[block]) like[block] += comp this_run['log likelihood local'] = np.sum(np.log(like)) run_data[run_id] = this_run work += 1 print 'Finished run %d/%d' % (work, total_work) # Output data to CSV cols = set() for id in run_data: for k in run_data[id]: cols.add(k) with open('outfile.csv', 'wb') as f: writer = csv.writer(f) writer.writerow(list(cols)) writer.writerows([[run_data[id][c] for c in cols] for id in run_data])
iforest.fit(X_train) s_X_iforest = iforest.decision_function(X_test) print('LocalOutlierFactor processing...') lof = LocalOutlierFactor(n_neighbors=20) lof.fit(X_train) s_X_lof = lof.decision_function(X_test) print('OneClassSVM processing...') ocsvm = OneClassSVM() ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)]) s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0] s_unif_iforest = iforest.decision_function(unif) s_unif_lof = lof.decision_function(unif) s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0] plt.subplot(121) auc_iforest, em_iforest, amax_iforest = em(t, t_max, volume_support, s_unif_iforest, s_X_iforest, n_generated) auc_lof, em_lof, amax_lof = em(t, t_max, volume_support, s_unif_lof, s_X_lof, n_generated) auc_ocsvm, em_ocsvm, amax_ocsvm = em(t, t_max, volume_support, s_unif_ocsvm, s_X_ocsvm, n_generated) if amax_iforest == -1 or amax_lof == -1 or amax_ocsvm == -1: amax = -1 else: amax = max(amax_iforest, amax_lof, amax_ocsvm) plt.subplot(121) plt.plot(t[:amax], em_iforest[:amax], lw=1,
plt.show() #GMM的构造 #Step 1.首先根据经验来分别对男女生的均值、方差和权值进行初始化 mu1 = 170 sigmal = 10 w1 = 0.7 #男生的 mu2 = 160 sigma2 = 10 w2 = 0.3 #以我们学校理工院校为例 d = 1 n = len(h) # 样本长度 # 开始EM算法的主循环 for iteration in range(100): mu1, sigmal, w1, mu2, sigma2, w2 = em.em(h, mu1, sigmal, w1, mu2, sigma2, w2) #男生女生以及混合后身高的概率密度曲线 t = np.linspace(120, 220, 550) #500个 m = stats.norm.pdf(t, loc=mu1, scale=sigmal) # 男生分布的预测 f = stats.norm.pdf(t, loc=mu2, scale=sigma2) # 女生分别的预测 mix = w1 * m + w2 * f #混合后 plt.plot(t, m, color='b') plt.plot(t, f, color='r') plt.plot(t, mix, color='k') #男生女生以及混合后身高的概率密度曲线 plt.title('Probability density curve for boys and girls and mixed height') #plt.legend([p1,p2,p3],["male","female","mixing"],loc='upper right') plt.legend(["male", "female", "mixing"], loc='upper right') plt.xlabel('height/cm') plt.ylabel('Probability') #坐标轴设置
import em import matplotlib.pyplot as plt print('Generating plot for first data set... \t', end='', flush=True) # generate EM cluster plot for 1st data set file_path = 'data/iris_flowers.csv' x_axis = 'PetalLengthCm' y_axis = 'SepalWidthCm' n_clusters = 2 em.em(file_path, x_axis, y_axis, n_clusters) print('first plot generated') print('Generating plot for second data set... \t', end='', flush=True) # generate EM cluster plot for 2nd data set file_path = 'data/winequality-red.csv' x_axis = 'citric acid' y_axis = 'volatile acidity' n_clusters = 2 em.em(file_path, x_axis, y_axis, n_clusters) print('second plot generated') # show both plots plt.show()