def train(self): # 训练 # 训练可以使用无监督或有监督 n_classes = 3 _model = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=0, max_iter=20) if self.rgb_or_gray == "rgb": _model.means_init = np.array([[0, 0, 0], [120, 100, 80], [225, 225, 225]]) # 使用标签初始化 else: _model.means_init = np.array([[0], [100], [255]]) # 使用标签初始化 _model.fit(self.img_arr_1d) return _model
def train_gmm(path): check_path(path) trainDict = pickle.load(open(os.path.join(path, 'train.dict'), 'rb')) validDict = pickle.load(open(os.path.join(path, 'valid.dict'), 'rb')) testDict = pickle.load(open(os.path.join(path, 'test.dict'), 'rb')) silentData = np.concatenate( (np.asarray(trainDict['silent']), np.asarray(validDict['silent'])), axis=0) silentData = np.concatenate((silentData, np.asarray(testDict['silent'])), axis=0) voiceData = np.concatenate( (np.asarray(trainDict['voice']), np.asarray(validDict['voice'])), axis=0) voiceData = np.concatenate((voiceData, np.asarray(testDict['voice'])), axis=0) estimator = GaussianMixture(n_components=2, covariance_type='diag', max_iter=100, random_state=11) meanInit = np.zeros((2, dims)) meanInit[0] = silentData.mean(axis=0) meanInit[1] = voiceData.mean(axis=0) estimator.means_init = meanInit estimator.fit(np.concatenate((silentData, voiceData), axis=0)) pickle.dump(estimator, open(os.path.join(modelPath, 'gmm1.model'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
def train(train_list, novocal_clf, vocal_clf): print "Extracting Train Features" train_features = np.empty((n_features, 0)) for i in range(len(train_list)): print train_list[i] y, sr = librosa.load(train_list[i], sr=fs) y = librosa.effects.hpss(y)[0] #Perform HPSS y = bandpass_filter(y, sr, low, high, 2) #Bandpass train_features = np.concatenate( (train_features, feature_extractor(y, fs, frame_length, hop_length, n_mfcc)), axis=1) # Transpose feature matrix train_features = train_features.T # Build a Tri-gaussian model from the extracted features clf = GaussianMixture(n_components=n_components_vocals + n_components_novocals, covariance_type=covariance_type, max_iter=max_iter) # Initialize model with bootstrap models clf.means_init = np.concatenate((novocal_clf.means_, vocal_clf.means_)) # Expectation-Maximization print "EM Estimations of parameters" clf.fit(train_features) return (clf, n_components_novocals, n_components_vocals)
def gmm_dbscan(minPts=5, e=1300): """ gmm算法与dbscan算法比较 :param minPts: q3取到的最优值,默认为最优 :param e: q3取到的最优半径,默认为最优 :return: 无 """ datas_set, datas_matrix = get_data() datas_matrix_T = datas_matrix.T X = datas_matrix_T res_vipno, random_vipno = lsh(0.01, "cosine") db = DBSCAN(eps=e, min_samples=minPts).fit(X) y_train = db.labels_ n_cluster = len(set(y_train)) - (1 if -1 in y_train else 0) accs = [] types = ['full', 'tied', 'diag', 'spherical'] # 比较了四种协方差矩阵对应的acc值 for type in types: estimator = GaussianMixture(n_components=n_cluster, covariance_type='tied') # 我们假定KMeans是真实的聚类结果,那么我们可以预先确定部分GMM参数 estimator.means_init = np.array( [X[y_train == i].mean(axis=0) for i in range(n_cluster)]) estimator.fit(X) y_train_pred = estimator.predict(X) train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100 print(y_train) print(y_train_pred) print("Comparing with DBScan, the accuracy of GMM is:", train_accuracy, "%") accs.append(train_accuracy) res = 0 # pos为q1中输入的随机vipno在gmm中的分类结果 pos = y_train_pred[datas_set.columns.get_loc(random_vipno)] # 逐个获取q1中输出的knn对应在gmm中的分类结果,和pos比较 for i in res_vipno: if y_train_pred[datas_set.columns.get_loc(i)] == pos: res += 1 print("For k =", len(res_vipno), "There are", res, "in the same cluster as GMM predicted") # 做四种协方差的acc值图 plt.bar(types, accs, alpha=0.9, width=0.35, facecolor='lightskyblue', edgecolor='white', label='acc', lw=1) plt.title("four covariances` acc") plt.legend(loc="upper left") plt.show()
def make_ellipses(self, ax): gmm = GaussianMixture(n_components=self.k, covariance_type="full", max_iter=500, random_state=0) gmm.means_init = self.kmeans.cluster_centers_ gmm.fit(self.data) for n in range(self.k): color = colors[n] if gmm.covariance_type == 'full': covariances = gmm.covariances_[n][:2, :2] elif gmm.covariance_type == 'tied': covariances = gmm.covariances_[:2, :2] elif gmm.covariance_type == 'diag': covariances = np.diag(gmm.covariances_[n][:2]) elif gmm.covariance_type == 'spherical': covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n] v, w = np.linalg.eigh(covariances) u = w[0] / np.linalg.norm(w[0]) angle = np.arctan2(u[1], u[0]) angle = 180 * angle / np.pi # convert to degrees v = 2. * np.sqrt(2.) * np.sqrt(v) ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1], 180 + angle, color=color) ell.set_clip_box(ax.bbox) ell.set_alpha(0.5) ax.add_artist(ell) ax.set_aspect('equal', 'datalim')
def train_gmm(estimator, feaPath, fileList): check_path(feaPath) data = [] for files in fileList: check_file(files) lines = open(files, 'rb').readlines() for items in lines: items = items.split('\n')[0].split('\t') audioClass, audioName = os.path.split(items[0]) audioName, _ = os.path.splitext(audioName) audioID = int(items[1]) check_file(os.path.join(feaPath, audioName+'.fea')) tmpdata = pickle.load(open(os.path.join(feaPath, audioName+'.fea'), 'rb')) assert tmpdata.shape[0] == dims for i in range(tmpdata.shape[1]): data.append(tmpdata[:, i]) data = np.asarray(data) gmm = GaussianMixture(n_components=2, covariance_type='diag', max_iter=100, random_state=0) gmm.means_init = estimator.means_ gmm.fit(data) pickle.dump(gmm, open(os.path.join(modelPath, 'gmm_esc50.model'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) return gmm
def train_gmm(files): print 'Load train data & train gmm model' stime = time.time() check_file(files) trainDict = pickle.load(open(files, 'rb')) silentData = np.asarray(trainDict['silent']) voiceData = np.asarray(trainDict['voice']) trainData = np.concatenate((silentData, voiceData), axis=0) meanInit = np.zeros((2, dims)) meanInit[0] = silentData.mean(axis=0) meanInit[1] = voiceData.mean(axis=0) estimator = GaussianMixture(n_components=2, covariance_type='diag', max_iter=100, random_state=0) estimator.means_init = meanInit estimator.fit(trainData) pickle.dump(trainData.mean(), open(os.path.join(waitPath, 'train.mean'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(trainData.std(), open(os.path.join(waitPath, 'train.std'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(estimator, open(os.path.join(modelPath, 'gmm.model'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) print 'Finished train & saved gmm model in:\n {:s}\nUsetime {:f}\n'.format( os.path.join(modelPath, 'gmm.model'), time.time() - stime)
def Lap_update(good_samples, n_comp=40, cov_type='full'): # returns a generator function that generates samples from a Laplace approximation of points in good_samples print('Fitting mixture of Gaussians ... ') n, dim = good_samples.shape if n < n_comp: n_comp = n estimator = GaussianMixture(n_components=n_comp, covariance_type=cov_type, max_iter=2500, random_state=0) estimator.means_init = [np.random.random_sample(dim) for i in range(n_comp)] estimator.fit(good_samples) print('Done!') def gen_lap(batch_size): while True: yield estimator.sample(batch_size)[0] return gen_lap # good_samples = np.ones([500, 2]) # g_fun = Lap_update(good_samples) # gen = g_fun(10) # samp = next(gen) # print(samp)
def gmm_kmeans(n_cluster=2): """ gmm与kmeans的比较 :param n_cluster: q2取到的最优值,默认为最优 :return: 无 """ # 数据获取 datas_set, datas_matrix = get_data() datas_matrix_T = datas_matrix.T X = datas_matrix_T res_vipno, random_vipno = lsh(0.01, "cosine") # 数据利用KMeans训练 clusterer = KMeans(n_clusters=n_cluster) y_train = clusterer.fit_predict(X) accs = [] types = ['full', 'tied', 'diag', 'spherical'] # 比较了四种协方差矩阵对应的acc值 for type in types: estimator = GaussianMixture(n_components=n_cluster, covariance_type='diag') # 我们假定KMeans是真实的聚类结果,那么我们可以预先确定部分GMM参数 estimator.means_init = np.array( [X[y_train == i].mean(axis=0) for i in range(n_cluster)]) estimator.fit(X) y_train_pred = estimator.predict(X) train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100 print(y_train) print(y_train_pred) print("Comparing with KMeans, the accuracy of GMM is:", train_accuracy, "%") accs.append(train_accuracy) res = 0 # pos为q1中输入的随机vipno在gmm中的分类结果 pos = y_train_pred[datas_set.columns.get_loc(random_vipno)] # 逐个获取q1中输出的knn对应在gmm中的分类结果,和pos比较 for i in res_vipno: if y_train_pred[datas_set.columns.get_loc(i)] == pos: res += 1 print("For k =", len(res_vipno), "There are", res, "in the same cluster as gmm predicted") # 做四种协方差的acc值图 plt.bar(types, accs, alpha=0.9, width=0.35, facecolor='lightskyblue', edgecolor='white', label='time', lw=1) plt.title("four covariances` acc") plt.legend(loc="upper left") plt.show()
def test_check_means(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components, n_features = rand_data.n_components, rand_data.n_features X = rand_data.X['full'] g = GaussianMixture(n_components=n_components) # Check means bad shape means_bad_shape = rng.rand(n_components + 1, n_features) g.means_init = means_bad_shape assert_raise_message(ValueError, "The parameter 'means' should have the shape of ", g.fit, X) # Check good means matrix means = rand_data.means g.means_init = means g.fit(X) assert_array_equal(means, g.means_init)
def train(_x_data): # 训练 # 训练可以使用无监督或有监督 n_classes = 3 _model = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=0, max_iter=20) _model.means_init = np.array([[0, 0, 0], [120, 100, 80], [225, 225, 225]]) # 使用标签初始化 _model.fit(_x_data) return _model
def cross_validate_gmm(min_count, feature_type): """Summary Args: min_count (TYPE): Description feature_type (TYPE): Description """ logger.info('train GaussianMixture model, min_count=%s, extract_method=%s', min_count, feature_type) X_train, y_train, X_test, y_test = compute_train_test_matrix( min_count, feature_type) cov_types = ['full', 'tied', 'diag', 'spherical'] n_components = 3 for cov_type in cov_types: logger.info('cov_type: %s', str.upper(cov_type)) clf = GaussianMixture(n_components=n_components, covariance_type=cov_type, max_iter=100, random_state=42, verbose=2) # Since we have class labels for the training data, we can # initialize the GMM parameters in a supervised manner. clf.means_init = np.array( [X_train[y_train == i].mean(axis=0) for i in range(n_components)]) # Train the other parameters using the EM algorithm. clf.fit(X_train) logger.info('TRAIN RESULT:') y_train_pred = clf.predict(X_train) accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100 logger.info('Accuracy: %.1f', accuracy) logger.info('Confusion_matrix: \n%s', confusion_matrix(y_train, y_train_pred)) logger.info('TEST RESULT:') y_test_pred = clf.predict(X_test) logger.info('Accuracy: %.1f', accuracy_score(y_test, y_test_pred) * 100) logger.info('Classification report: \n%s', classification_report(y_test, y_test_pred)) logger.info('Confusion_matrix: \n%s', confusion_matrix(y_test, y_test_pred))
def trainGMM(self, nClasses=2, covType='spherical', maxIts=20): model = GaussianMixture(n_components=nClasses, covariance_type=covType, max_iter=maxIts) model.means_init = np.array([ self.train[self.trainTgt == i].mean(axis=0) for i in range(nClasses) ]) model.fit(self.train, self.trainTgt) trainOut = model.predict(self.train) trainError = np.mean(self.trainTgt.ravel() == trainOut.ravel()) * 100 print("Training Error: ", trainError) return model
def get_predictions_semi(path, k_min, k_max, num_class, cov_type, seed, labels): targets = [] kmer_table = get_kmer_table(path, k_min, k_max) finalDf = pd.concat([kmer_table, pd.Series(labels)], axis=1) gmm = GMM(n_components=num_class, covariance_type=cov_type, random_state=seed) for i in range(num_class): if (i in list(finalDf.Labels)): targets.append(i) if (len(targets) == num_class): gmm.means_init = np.array( [kmer_table[finalDf.Labels == i].mean(axis=0) for i in targets]) gmm.fit(kmer_table) predictions = gmm.predict(kmer_table) return predictions
def gmm_dist(rad, beam, stm, etm, data_dict): gate = data_dict['gate'] vel = map(abs, data_dict['velocity']) #data_dict['velocity'] wid = data_dict['width'] power = data_dict['power'] gsflg = data_dict['gsflg'] # fig1 = plt.figure(1,figsize=(12,12)) #fig1.suptitle(stm.strftime("%d %b %Y")+ ' to ' + etm.strftime("%d %b %Y"), fontsize=16) # plt.subplot(221) # plt.scatter(vel, gate,c=gsflg) # plt.xlabel('Velocity [m/s]') # plt.ylabel('Range gate') # plt.subplot(222) # plt.scatter(wid, gate,c=gsflg) # plt.xlabel('Spectral width [m/s]') # plt.ylabel('Range gate') # plt.subplot(223) # plt.scatter(power, gate,c=gsflg) # plt.xlabel('Power [dB]') # plt.ylabel('Range gate') # plt.subplot(224) # plt.scatter(vel, wid,c=gsflg) # plt.xlabel('Velocity [m/s]') # plt.ylabel('Spectral width [m/s]') # fig1.tight_layout() #fig1.savefig(rad+'_beam'+str(beam)+'_'+stm.strftime("%y-%m-%d")+'_scatter_plot.png') #plt.show() #need to scale data before apply kmeans gate_scaled = preprocessing.scale(gate) vel_scaled = preprocessing.scale(vel) wid_scaled = preprocessing.scale(wid) power_scaled = preprocessing.scale(power) #data = np.column_stack((gate,vel,wid,power)) #data = np.column_stack((vel_scaled,wid_scaled)) full_data = np.column_stack( (gate_scaled, vel_scaled, wid_scaled, power_scaled)) # Break up the dataset into non-overlapping training (95%) and testing # (5%) sets. skf = StratifiedKFold(n_splits=20) # Only take the first fold. N, D = full_data.shape # TODO UGLY, FIX! train_index, test_index = next(iter(skf.split(full_data, np.ones(N)))) data = full_data[train_index, :] validation_data = full_data[test_index, :] N, D = data.shape # Z = KMeans(init = 'k-means++',n_clusters = 2).fit_predict(data) # source # http://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html#sphx-glr-auto-examples-mixture-plot-gmm-covariances-py n_classes = 4 cov_type = 'full' # ['spherical', 'diag', 'tied', 'full'] estimator = GaussianMixture(n_components=n_classes, \ covariance_type=cov_type, max_iter=20, \ random_state=0) # initialize the GMM parameters in a supervised manner. # estimator.means_init = np.array([X_train[y_train ==i].mean(axis=0)) estimator.means_init = np.random.random((n_classes, D)) * 2.0 - 1.0 # Train the other parameters using the EM algorithm. estimator.fit(data) fig2 = plt.figure(2, figsize=(12, 12)) for plot_data, marker, alpha in zip([data, validation_data], ['.','x'], \ [0.1, 0.7]): Z = estimator.predict(plot_data) #plt.subplot(111) #plt.scatter(plot_data[:,0], plot_data[:,1],c=Z) #plt.xlabel('Scaled Velocity') #plt.ylabel('Scaled Spectral width') plt.subplot(221) plt.scatter(plot_data[:, 1], plot_data[:, 0], c=Z, marker=marker, alpha=alpha) plt.xlabel('Scaled Velocity') plt.ylabel('Scaled Range gate') plt.subplot(222) plt.scatter(plot_data[:, 2], plot_data[:, 0], c=Z, marker=marker, alpha=alpha) plt.xlabel('Scaled Spectral width') plt.ylabel('Scaled Range gate') plt.subplot(223) plt.scatter(plot_data[:, 3], plot_data[:, 0], c=Z, marker=marker, alpha=alpha) plt.xlabel('Scaled Power') plt.ylabel('Scaled Range gate') plt.subplot(224) plt.scatter(plot_data[:, 1], plot_data[:, 2], c=Z, marker=marker, alpha=alpha) plt.xlabel('Scaled Velocity') plt.ylabel('Scaled Spectral width') fig2.tight_layout() plot_data = full_data Z = estimator.predict(plot_data) fig3 = plt.figure(3, figsize=(6, 6)) plt.subplot(111) ax3 = Axes3D(fig3, elev=48, azim=134) #, rect=[0, 0, .95, 1] ax3.scatter(vel, gate, wid, c=gsflg) ax3.set_xlabel('Velocity [m/s]') ax3.set_ylabel('Range gate') ax3.set_zlabel('Spectral width [m/s]') fig4 = plt.figure(4, figsize=(6, 6)) plt.subplot(111) ax4 = Axes3D(fig4, elev=48, azim=134) #, rect=[0, 0, .95, 1] ax4.scatter(plot_data[:, 1], plot_data[:, 0], plot_data[:, 2], c=Z) ax4.set_xlabel('Scaled Velocity') ax4.set_ylabel('Scaled Range gate') ax4.set_zlabel('Scaled Spectral width') plt.show()
def GMM_Sklearn(data, targets, colors,dataset, target_names): print('APPLY GMM...') X_train, Y_train = data[0], targets[0] X_val, Y_val = data[1], targets[1] X_test, Y_test = data[2], targets[2] n_classes = len(np.unique(Y_train)) # Try GMMs using different types of covariances. estimators = {cov_type: GaussianMixture(n_components=n_classes, covariance_type=cov_type, max_iter=20, random_state=0) for cov_type in ['spherical', 'diag', 'tied', 'full']} n_estimators = len(estimators) plt.figure(figsize=(3 * n_estimators // 2, 6)) plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05, left=.01, right=.99) for index, (name, estimator) in enumerate(estimators.items()): # Since we have class labels for the training data, we can # initialize the GMM parameters in a supervised manner. estimator.means_init = np.array([X_train[Y_train == i].mean(axis=0) for i in range(n_classes)]) # Train the other parameters using the EM algorithm. estimator.fit(X_train) h = plt.subplot(2, n_estimators // 2, index + 1) make_ellipses(estimator, h, colors) for n, color in enumerate(colors): dataf = X_train[(Y_train == n)] plt.scatter(dataf[:, 0], dataf[:, 1], s=0.8, color=color, label=target_names[n]) # Plot the test data with crosses for n, color in enumerate(colors): dataf = X_test[Y_test == n] plt.scatter(dataf[:, 0], dataf[:, 1], marker='x', color=color) y_train_pred = estimator.predict(X_train) train_accuracy = np.mean(y_train_pred.ravel() == Y_train.ravel()) * 100 plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy, transform=h.transAxes) y_test_pred = estimator.predict(X_test) test_accuracy = np.mean(y_test_pred.ravel() == Y_test.ravel()) * 100 plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy, transform=h.transAxes) plt.xticks(()) plt.yticks(()) plt.title(name) plt.legend(scatterpoints=1, loc='lower right', prop=dict(size=12)) ### APPLY GMM ### estimator = GaussianMixture(n_components=n_classes, covariance_type='tied', max_iter=20, random_state=0) estimator.means_init = np.array([X_train[Y_train == i].mean(axis=0) for i in range(n_classes)]) # Train the other parameters using the EM algorithm. estimator.fit(X_train) predictions = estimator.predict(X_test) scores = estimator.predict_proba(X_test) Y_test = list(Y_test) assert len(predictions) == len(scores) assert len(scores) == len(Y_test) acc, prec, rec, sens, spec = evaluate(estimator, X_test, Y_test, np.array(predictions), np.array(scores[:,1]), np.array(Y_test), n_classes, 'gmm' + dataset) print('Test Accuracy, Precision, Recall', acc, prec, rec) print() # print("Classification report for classifier %s:\n%s\n" # % (estimator, metrics.classification_report(Y_test, predictions))) # plt.figure() # disp = metrics.plot_confusion_matrix(estimator, X_test, Y_test) # disp.figure_.suptitle("Confusion Matrix") # print("Confusion matrix:\n%s" % disp.confusion_matrix) # plt.savefig('../plots/'+dataset +'_Gmm_confmatrix.png') return acc, prec, rec, sens, spec
print score writeHTML(clusterType="Kmean_final", clusterLabel=cluster_labels) #使用最佳的eps进行DBScan聚类 db = DBSCAN(eps=eps, min_samples=3).fit(XYMatrix) db_label = np.array([i + 1 for i in db.labels_]) score = silhouette_score(XYMatrix, db_label) print score writeHTML(clusterType="DBSCAN_final", clusterLabel=db_label) #Kmeans聚类与GMM聚类进行比较 kmean_classes = len(np.unique(cluster_labels)) #GMM聚类 n_components = kmean_classes gmm = GaussianMixture(n_components=kmean_classes, max_iter=20, random_state=0) gmm.means_init = np.array( [XYMatrix[cluster_labels == i].mean(axis=0) for i in range(kmean_classes)]) gmm.fit(XYMatrix) gmm_labels = gmm.predict(XYMatrix) #以Kmeans为基础,计算GMM的准确率 train_accuracy = np.mean(gmm_labels.ravel() == cluster_labels.ravel()) * 100 print "gmm - kmeans accuracy : ", train_accuracy #去除DBScan算法认定的噪音 no_noise_matrix = np.array(XYMatrix[db_label != 0]) no_noise_label = np.array(db_label[db_label != 0]) dbscan_class = len(np.unique(no_noise_label)) gmm = GaussianMixture(n_components=dbscan_class, random_state=0) gmm.means_init = np.array([ no_noise_matrix[no_noise_label == i].mean(axis=0)
# Break up the dataset into non-overlapping training (75%) and testing # (25%) sets. skf = StratifiedKFold(n_splits=4) # Only take the first fold. train_index, test_index = next(iter(skf.split(iris.data, iris.target))) X_train = iris.data[train_index] y_train = iris.target[train_index] X_test = iris.data[test_index] y_test = iris.target[test_index] print("X_train shape:",X_train.shape) print("X_test shape:",X_test.shape) # 训练 # 训练可以使用无监督或有监督 n_classes = 3 clf = GaussianMixture(n_components=n_classes, covariance_type='full',random_state=0,max_iter=20) clf.means_init = np.array([X_train[y_train == i].mean(axis=0)for i in range(n_classes)]) # 使用标签初始化 clf.fit(X_test) print("model means:",clf.means_) print("model weights:",clf.weights_) # 预测 #预测trian y_predict = clf.predict(X_train) print("train:",np.mean(y_predict.ravel()==y_train.ravel())) #预测test y_predict = clf.predict(X_test) print("test:",np.mean(y_predict.ravel()==y_test.ravel()))
def em(algo, X_train, X_test, y_train, y_test, init_means, no_iter=1000, component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11], num_class=7, toshow=1): array_aic = [] array_bic = [] array_homo = [] array_comp = [] array_sil = [] array_avg_log = [] for num_classes in component_list: clf = GaussianMixture(n_components=num_classes, covariance_type='spherical', max_iter=no_iter, init_params='kmeans') # clf = KMeans(n_clusters= num_classes, init='k-means++') clf.fit(X_train) y_test_pred = clf.predict(X_test) #Per sample average log likelihood avg_log = clf.score(X_test) array_avg_log.append(avg_log) #AIC on the test data aic = clf.aic(X_test) array_aic.append(aic) #BIC on the test data bic = clf.bic(X_test) array_bic.append(bic) #Homogenity score on the test data h**o = metrics.homogeneity_score(y_test, y_test_pred) array_homo.append(h**o) #Completeness score comp = metrics.completeness_score(y_test, y_test_pred) array_comp.append(comp) #Silhoutette score sil = metrics.silhouette_score(X_test, y_test_pred, metric='euclidean') array_sil.append(sil) #Generating plots fig1, ax1 = plt.subplots() ax1.plot(component_list, array_aic) ax1.plot(component_list, array_bic) plt.legend(['AIC', 'BIC']) plt.xlabel('Number of clusters') plt.title('AIC - BIC curve for EM - ' + algo) fig2, ax2 = plt.subplots() ax2.plot(component_list, array_homo) ax2.plot(component_list, array_comp) ax2.plot(component_list, array_sil) plt.legend(['homogenity', 'completeness', 'silhoutette']) plt.xlabel('Number of clusters') plt.title('Performance scores for EM - ' + algo) fig3, ax3 = plt.subplots() ax3.plot(component_list, array_avg_log) plt.xlabel('Number of clusters') plt.title('sample log avg likelihood for EM - ' + algo) if (toshow == 1): plt.show() #Training and testing accuracy for K = number of classes clf = GaussianMixture(n_components=num_class, covariance_type='spherical', max_iter=no_iter, init_params='kmeans') #Assigning the initial means as the mean feature vector for the class clf.means_init = init_means clf.fit(X_train) #Training accuracy y_train_pred = clf.predict(X_train) train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100 print('Training accuracy for Expected Maximization for K = {}: {}'.format( num_class, train_accuracy)) #Testing accuracy y_test_pred = clf.predict(X_test) test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100 print('Testing accuracy for Expected Maximization for K = {}: {}'.format( num_class, test_accuracy)) return component_list, array_aic, array_bic, array_homo, array_comp, array_sil, array_avg_log
def MyGaussianMixture(begin, end, iinput, nums, mystr): dir_path = "../Others/data/First_data.csv" ff = pd.read_csv(dir_path, sep=',', index_col=False, encoding="utf-8", low_memory=False) ##Read file list_train = [] list_target = [] max_x = -200 min_x = 200 max_y = -200 min_y = 200 for item in ff.index: if begin > ff.iloc[item]["Detection Date"] or end < ff.iloc[item][ "Detection Date"]: continue else: if float(ff.iloc[item]["Longitude"]) > max_x: max_x = float(ff.iloc[item]["Longitude"]) if float(ff.iloc[item]["Latitude"]) > max_y: max_y = float(ff.iloc[item]["Latitude"]) if float(ff.iloc[item]["Longitude"]) < min_x: min_x = float(ff.iloc[item]["Longitude"]) if float(ff.iloc[item]["Latitude"]) < min_y: min_y = float(ff.iloc[item]["Latitude"]) list_train.append([ float(ff.iloc[item]["Longitude"]), float(ff.iloc[item]["Latitude"]) ]) list_target.append(int(ff.iloc[item]["Lab Status"])) skf = StratifiedKFold(n_splits=2, random_state=0, shuffle=True) train = np.array(list_train) target = np.array(list_target) my_x_ticks = np.linspace(min_x, max_x, 5) my_y_ticks = np.arange(min_y, max_y, 5) train_index, test_index = next(iter(skf.split(train, target))) X_train = train[train_index] y_train = target[train_index] X_test = train[test_index] y_test = target[test_index] n_classes = np.unique(y_train) # Try GMMs using different types of covariances. fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111) list_return = [] for index, label in enumerate(n_classes): estimators = GaussianMixture(n_components=1, covariance_type="full", max_iter=100, random_state=0) estimators.means_init = np.array( [X_train[y_train == label].mean(axis=0)]) m = X_train[y_train == label] if m.shape[0] == 1: m = np.concatenate((m, m), axis=0) estimators.fit(m) ax.add_patch(make_ellipses(estimators, label)) if iinput == label: list_return.append(nums) list_return.append(estimators.means_[0][0]) list_return.append(estimators.means_[0][1]) list_return.append(estimators.covariances_[0][0][0]) list_return.append(estimators.covariances_[0][1][1]) Test(m[:, 0], estimators.means_[0][0]) Test(m[:, 1], estimators.means_[0][1]) data = train[target == label] plt.scatter(data[:, 0], data[:, 1], s=0.8, color=colors[label], label=target_names[label]) data = X_test[y_test == label] plt.scatter(data[:, 0], data[:, 1], marker='x', color=colors[label]) plt.title(mystr) plt.xlabel("Longitude") plt.ylabel("Latitude") plt.xlim((-124.665014 - 0.5, -116.87368700000002 + 0.5)) plt.ylim((45.488689 - 0.5, 49.548004 + 0.5)) print(min_x, max_x, min_y, max_y) plt.legend(scatterpoints=1, loc='best', prop=dict(size=12)) plt.show() return list_return
def gmm_dist(rad, beam, stm, etm, data_dict): gate = np.hstack(data_dict['gate']) vel = np.hstack(data_dict['velocity']) wid = np.hstack(data_dict['width']) power = np.hstack(data_dict['power']) elev = np.hstack(data_dict['elevation']) gs_flg = np.hstack(data_dict['gsflg']) plot_rti(data_dict,'velocity',gsct=False,fig_num=1) date_time, time, freq = [], [], [] num_scatter = data_dict['num_scatter'] for i in range(len(num_scatter)): #date_time.extend([data_dict['datetime'][i]]*num_scatter[i]) time.extend(date2num([data_dict['datetime'][i]]*num_scatter[i])) freq.extend([data_dict['frequency'][i]]*num_scatter[i]) time = np.array(time) freq = np.array(freq) alpha = 0.2 size = 2 marker = 's' fig2 = plt.figure(figsize=(10,6)) ax1 = plt.subplot(211) plt.scatter(time[gs_flg == 1], gate[gs_flg == 1],s=size,c='grey',marker=marker, alpha=alpha) #plot ground scatter as grey plt.scatter(time[gs_flg == 0], gate[gs_flg == 0],s=size,c='red',marker=marker, alpha=alpha) #plot the other scatter (IS) as red ax1.xaxis.set_major_formatter(DateFormatter('%H:%M')) #ax1.set_xlabel('Time UT') ax1.set_ylabel('Range gate') #need to scale data before apply kmeans gate_scaled = preprocessing.scale(gate) vel_scaled = preprocessing.scale(vel) wid_scaled = preprocessing.scale(wid) power_scaled = preprocessing.scale(power) time_scaled = preprocessing.scale(time) elev_scaled = preprocessing.scale(elev) freq_scaled = preprocessing.scale(freq) #data = np.column_stack((gate,vel,wid,power)) #data = np.column_stack((vel_scaled,wid_scaled)) data = np.column_stack((gate_scaled,vel_scaled,wid_scaled,\ power_scaled,elev_scaled,freq_scaled,time_scaled)) N,D = data.shape n_classes = 3 kmeans = KMeans(init = 'k-means++', n_clusters = n_classes, n_init=50).fit(data) # source # http://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html#sphx-glr-auto-examples-mixture-plot-gmm-covariances-py cov_type = 'full' # ['spherical', 'diag', 'tied', 'full'] estimator = GaussianMixture(n_components=n_classes, \ covariance_type=cov_type, max_iter=100, \ random_state=0) # initialize the GMM parameters in a supervised manner. #estimator.means_init = np.array([X_train[y_train == i].mean(axis=0)) estimator.means_init = kmeans.cluster_centers_ #np.random.random((n_classes, D))*2.0-1.0 # Train the other parameters using the EM algorithm. estimator.fit(data) Z = estimator.predict(data) mean_vels = np.zeros(n_classes) mean_wids = np.zeros(n_classes) for i in range(n_classes): mean_vels[i] = np.mean(np.abs(vel[Z == i])) mean_wids[i] = np.mean(wid[Z == i]) print mean_vels[i] print mean_wids[i] gsfg_min_vel = np.argmin(mean_vels) #denote the cluster with minimum mean velocity as ground scatter gsfg_max_vel = np.argmax(mean_vels) #denote the cluster with maxmum mean velocity as ionospheric scatter print gsfg_min_vel print gsfg_max_vel new_gsflg = [] tnum_scatter = 0 for i in range(len(num_scatter)): new_gsflg.append(Z[tnum_scatter:(tnum_scatter+num_scatter[i])].tolist()) tnum_scatter += num_scatter[i] #ipdb.set_trace() data_dict['gsflg'] = new_gsflg #print len(new_gsflg) ax2 = plt.subplot(212) plt.scatter(time, gate,s=size,c='blue',marker=marker,alpha = alpha) #plot the third scatter (E region/meteor scatter or noise?) as blue plt.scatter(time[Z == gsfg_min_vel], gate[Z == gsfg_min_vel],s=size,c='grey',marker=marker,alpha = alpha) #plot ground scatter as grey plt.scatter(time[Z == gsfg_max_vel], gate[Z == gsfg_max_vel],s=size,c='red',marker=marker,alpha = alpha) #plot ionospheric scatter as red ax2.xaxis.set_major_formatter(DateFormatter('%H:%M')) ax2.set_xlabel('Time UT') ax2.set_ylabel('Range gate') fig2.tight_layout() plot_rti(data_dict,'velocity',gsct=True,gsfg_min_vel=gsfg_min_vel,fig_num=3) #scatter_plot(data,Z) plt.show()
train_index, test_index = next(iter(indices)) X_train = iris.data[train_index] y_train = iris.target[train_index] X_test = iris.data[test_index] y_test = iris.target[test_index] num_classes = len(np.unique(y_train)) gmm = GaussianMixture(n_components=num_classes, covariance_type='full', init_params='random', random_state=0, max_iter=20) gmm.means_init = np.array( [X_train[y_train == i].mean(axis=0) for i in range(num_classes)]) gmm.fit(X_train) plt.figure() axis_handle = plt.subplot(1, 1, 1) colors = 'bgr' for i, color in enumerate(colors): eigenvalues, eigenvectors = np.linalg.eigh(gmm.covariances_[i][:2, :2]) norm_vec = eigenvectors[0] / np.linalg.norm(eigenvectors[0]) angle = np.arctan2(norm_vec[1], norm_vec[0]) angle = 180 * angle / np.pi scaling_factor = 8 eigenvalues *= scaling_factor ellipse = patches.Ellipse(gmm.means_[i, :2], eigenvalues[0], eigenvalues[1],
X = iris.data y = iris.target # Create GMM for n_classes n_classes = len(np.unique(y)) colors = ['navy', 'turquoise', 'darkorange'][0:n_classes] estimator = GaussianMixture(n_components=n_classes, covariance_type='full', max_iter=100, random_state=0) plt.figure() # Initialize the centroids # - Randoml select a data point of each class as the starting centroid estimator.means_init = np.array( [X[y == i][np.random.choice(len(X[y == i]))] for i in range(n_classes)]) # We can do better at initializing the centroid with labeled data (i.e., for unlabeled data this is not possible) # Since we have class labels for the training data, we can # initialize the GMM parameters in a supervised manner. # estimator.means_init = np.array([X[y == i].mean(axis=0) # for i in range(n_classes)]) # Train the other parameters using the EM algorithm. estimator.fit(X) h = plt.subplot() make_ellipses(estimator, h) for n, color in enumerate(colors): data = iris.data[iris.target == n]
from sklearn.mixture import GaussianMixture estimator = GaussianMixture(n_components=n_classes, covariance_type="spherical", max_iter=20, random_state=0) estimator.means_init = np.array( [x_train[y_train == i].mean(axis=0) for i in range(n_classes)]) estimator.fit(x_train)
print(i) if i >= 50 and i < 100: if kmeans[i] != 0: print(i) if i >= 100: if kmeans[i] != 2: print(i) #-----------------------------------------------GMM------------------------------------------------------ gmm = GaussianMixture(n_components=3, max_iter=3000) X_gmm = list() for i in array[:, 2]: X_gmm.append([i]) gmm.means_init = np.array([[1], [4], [6]]) gmm.covariances_init = np.array([[1], [1], [1]]) gmm.weights_init = np.array([0.5, 0.25, 0.25]) gmm.fit(X_gmm) gmm_result = gmm.predict(X_gmm) print("mean: ", gmm.means_) print("covarinace: ", gmm.covariances_) print("weight: ", gmm.weights_) class0 = 0 class1 = 0 class2 = 0 for i in gmm_result:
def GMM(data, targets, colors, dataset, target_names): print('APPLY GMM...') X_train, Y_train = data[0], targets[0] X_val, Y_val = data[1], targets[1] X_test, Y_test = data[2], targets[2] n_classes = len(np.unique(Y_train)) # Try GMMs using different types of covariances. estimators = { cov_type: GaussianMixture(n_components=n_classes, covariance_type=cov_type, max_iter=20, random_state=0) for cov_type in ['spherical', 'diag', 'tied', 'full'] } n_estimators = len(estimators) plt.figure(figsize=(3 * n_estimators // 2, 6)) plt.subplots_adjust(bottom=.01, top=0.95, hspace=.15, wspace=.05, left=.01, right=.99) for index, (name, estimator) in enumerate(estimators.items()): # Since we have class labels for the training data, we can # initialize the GMM parameters in a supervised manner. estimator.means_init = np.array( [X_train[Y_train == i].mean(axis=0) for i in range(n_classes)]) # Train the other parameters using the EM algorithm. estimator.fit(X_train) h = plt.subplot(2, n_estimators // 2, index + 1) make_ellipses(estimator, h, colors) for n, color in enumerate(colors): dataf = X_train[(Y_train == n)] plt.scatter(dataf[:, 0], dataf[:, 1], s=0.8, color=color, label=target_names[n]) # Plot the test data with crosses for n, color in enumerate(colors): dataf = X_test[Y_test == n] plt.scatter(dataf[:, 0], dataf[:, 1], marker='x', color=color) y_train_pred = estimator.predict(X_train) train_accuracy = np.mean(y_train_pred.ravel() == Y_train.ravel()) * 100 plt.text(0.05, 0.9, 'Train accuracy: %.1f' % train_accuracy, transform=h.transAxes) y_test_pred = estimator.predict(X_test) test_accuracy = np.mean(y_test_pred.ravel() == Y_test.ravel()) * 100 plt.text(0.05, 0.8, 'Test accuracy: %.1f' % test_accuracy, transform=h.transAxes) plt.xticks(()) plt.yticks(()) plt.title(name) plt.legend(scatterpoints=1, loc='lower right', prop=dict(size=12)) ### GMM with Tied Covariance ### estimator = GaussianMixture(n_components=n_classes, covariance_type='tied', max_iter=20, random_state=0) estimator.means_init = np.array( [X_train[Y_train == i].mean(axis=0) for i in range(n_classes)]) # Train the other parameters using the EM algorithm. estimator.fit(X_train) predictions = estimator.predict(X_test) scores = estimator.predict_proba(X_test) Y_test = list(Y_test) assert len(predictions) == len(scores) assert len(scores) == len(Y_test) accuracy, class_report = multiclass_evaluate(estimator, X_test, Y_test, np.array(predictions), np.array(scores), np.array(Y_test), n_classes, 'gmm' + dataset) sensitivity = [ class_report['0']['recall'], class_report['1']['recall'], class_report['2']['recall'] ] confusion_matrix = sklearn.metrics.confusion_matrix(y_true=Y_test, y_pred=predictions) print('Confusion Matrix', confusion_matrix) specificity0 = np.sum(confusion_matrix[1:, 1:]) / ( np.sum(confusion_matrix[1:, 1:]) + confusion_matrix[1, 0] + confusion_matrix[2, 0]) Tn1 = confusion_matrix[0, 0] + confusion_matrix[0, -1] + confusion_matrix[ -1, 0] + confusion_matrix[-1, -1] specificity1 = Tn1 / (Tn1 + confusion_matrix[0, 1] + confusion_matrix[2, 1]) specificity2 = np.sum(confusion_matrix[:1, :1]) / ( np.sum(confusion_matrix[:1, :1]) + confusion_matrix[0, -1] + confusion_matrix[1, -1]) specificity = [specificity0, specificity1, specificity2] assert specificity2 < 1 and specificity1 < 1 and specificity0 < 1 return accuracy, sensitivity, specificity
def gmm_dist(rad, beam, stm, etm): #Read data with emprical model information and RTI plot########################################################################################### data_dict = read_from_updated_db(rad, beam, stm, etm) plot_rti_emp(rad, beam, stm, etm, data_dict, 'velocity', fig_num=1, title_str='Empirical Model Results') gs_hops = [1.0, 2.0, 3.0] is_hops = [0.5, 1.5, 2.5] #emp_gsflg = np.hstack(data_dict['gsflg']) emp_gate = np.hstack(data_dict['gate']) emp_time, emp_gsflg = [], [] emp_num_scatter = data_dict['num_scatter'] for i in range(len(emp_num_scatter)): emp_time.extend( date2num([data_dict['datetime'][i]] * emp_num_scatter[i])) for j in range(len(data_dict['hop'][i])): if data_dict['hop'][i][j] in is_hops: emp_gsflg.append(0) elif data_dict['hop'][i][j] in gs_hops: emp_gsflg.append(1) emp_gsflg = np.array(emp_gsflg) emp_time = np.array(emp_time) #Read data with emprical model information and RTI plot########################################################################################## #Read data from database and RTI plot########################################################################################################### plot_rti(rad, beam, stm, etm, data_dict, 'velocity', gsct=False, fig_num=2, title_str='Traditional Model Results') gate = np.hstack(data_dict['gate']) vel = np.hstack(data_dict['velocity']) wid = np.hstack(data_dict['width']) power = np.hstack(data_dict['power']) elev = np.hstack(data_dict['elevation']) gs_flg = np.hstack(data_dict['gsflg']) time, freq = [], [] num_scatter = data_dict['num_scatter'] for i in range(len(num_scatter)): #date_time.extend([data_dict['datetime'][i]]*num_scatter[i]) time.extend(date2num([data_dict['datetime'][i]] * num_scatter[i])) freq.extend([data_dict['frequency'][i]] * num_scatter[i]) time = np.array(time) freq = np.array(freq) #Read data from database and RTI plot############################################################################################################# #GMM and RTI plot################################################################################################################################# #need to scale data before apply kmeans gate_scaled = preprocessing.scale(gate) vel_scaled = preprocessing.scale(vel) wid_scaled = preprocessing.scale(wid) power_scaled = preprocessing.scale(power) time_scaled = preprocessing.scale(time) elev_scaled = preprocessing.scale(elev) freq_scaled = preprocessing.scale(freq) data = np.column_stack((gate_scaled,vel_scaled,wid_scaled,\ power_scaled,elev_scaled,freq_scaled,time_scaled)) N, D = data.shape n_classes = 3 kmeans = KMeans(init='k-means++', n_clusters=n_classes, n_init=50).fit(data) # source # http://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_covariances.html#sphx-glr-auto-examples-mixture-plot-gmm-covariances-py cov_type = 'full' # ['spherical', 'diag', 'tied', 'full'] estimator = GaussianMixture(n_components=n_classes, \ covariance_type=cov_type, max_iter=100, \ random_state=0) # initialize the GMM parameters with kmean centroid estimator.means_init = kmeans.cluster_centers_ #np.random.random((n_classes, D))*2.0-1.0 # Train the other parameters using the EM algorithm. estimator.fit(data) Z = estimator.predict(data) median_vels = np.zeros(n_classes) median_wids = np.zeros(n_classes) for i in range(n_classes): median_vels[i] = np.median(np.abs(vel[Z == i])) median_wids[i] = np.median(wid[Z == i]) print median_vels[i] print median_wids[i] gsfg_min_vel = np.argmin( median_vels ) #denote the cluster with minimum mean velocity as ground scatter gsfg_max_vel = np.argmax( median_vels ) #denote the cluster with maxmum mean velocity as ionospheric scatter for i in range(n_classes): if (i != gsfg_min_vel and i != gsfg_max_vel): gsfg_undetermined = i #denote the third cluster as indeterminate scatter print gsfg_min_vel print gsfg_max_vel new_gsflg = [] tnum_scatter = 0 for i in range(len(num_scatter)): new_gsflg.append(Z[tnum_scatter:(tnum_scatter + num_scatter[i])].tolist()) tnum_scatter += num_scatter[i] #ipdb.set_trace() data_dict['gsflg'] = new_gsflg #print len(new_gsflg) #calculate GS/IS identification accuracy############################################################################################## num_true_trad_gs = len( np.where(((gs_flg == 1) | (gs_flg == -1)) & (emp_gsflg == 1))[0]) num_true_trad_is = len(np.where(((gs_flg == 0)) & (emp_gsflg == 0))[0]) num_emp = len(emp_gsflg) accur_tra = float(num_true_trad_gs + num_true_trad_is) / num_emp * 100. print 'The GS/IS identification accurary of traditional method is {:3.2f}%'.format( accur_tra) num_true_gmm_gs1 = len( np.where((Z == gsfg_min_vel) & (emp_gsflg == 1)) [0]) #Assuming the GS is the cluster with minimum median velocity num_true_gmm_is1 = len( np.where(((Z == gsfg_max_vel) | (Z == gsfg_undetermined)) & (emp_gsflg == 0))[0]) num_true_gmm_gs2 = len( np.where(((Z == gsfg_min_vel) | (Z == gsfg_undetermined)) & (emp_gsflg == 1))[0]) num_true_gmm_is2 = len( np.where((Z == gsfg_max_vel) & (emp_gsflg == 0)) [0]) #Assuming the IS is the cluster with maximum median velocity accur_gmm1 = float(num_true_gmm_gs1 + num_true_gmm_is1) / num_emp * 100. print 'Assuming the GS is the cluster with minimum median velocity and the IS is the remaining two clusters, the GS/IS identification accurary of GMM is {:3.2f}%'.format( accur_gmm1) accur_gmm2 = float(num_true_gmm_gs2 + num_true_gmm_is2) / num_emp * 100. print 'Assuming the IS is the cluster with maximum median velocity and the GS is the remaining two clusters, the GS/IS identification accurary of GMM is {:3.2f}%'.format( accur_gmm2) accur_gmm = max(accur_gmm1, accur_gmm2) #calculate GS/IS identification accuracy########################################################################################### plot_rti(rad, beam, stm, etm, data_dict, 'velocity', gsct=True, gsfg_min_vel=gsfg_min_vel, fig_num=3, title_str='Gaussian Mixture Model Results') #GMM and RTI plot################################################################################################################################# #scatter plot#################################################################################################################### cm = plt.cm.get_cmap('coolwarm') alpha = 1.0 size = 1 marker = 's' fig4 = plt.figure(figsize=(10, 8)) ax1 = plt.subplot(311) plt.scatter(emp_time[emp_gsflg == 1], emp_gate[emp_gsflg == 1], s=size, c='blue', marker=marker, alpha=alpha, cmap=cm) #plot GS as blue plt.scatter(emp_time[emp_gsflg == 0], emp_gate[emp_gsflg == 0], s=size, c='red', marker=marker, alpha=alpha, cmap=cm) #plot IS as red #plt.scatter(emp_time[emp_gsflg == -1], emp_gate[emp_gsflg == -1],s=size,c='blue',marker=marker, alpha=alpha) #plot the undertermined scatter as blue ax1.xaxis.set_major_formatter(DateFormatter('%H:%M')) ax1.set_xlabel('Time UT') ax1.set_xlim([stm, etm]) ax1.set_ylabel('Range gate') ax1.set_title('Empirical Model Results based on Burrell et al. 2015') ax2 = plt.subplot(312) plt.scatter(time[gs_flg == 1], gate[gs_flg == 1], s=size, c='blue', marker=marker, alpha=alpha, cmap=cm) #plot GS as blue plt.scatter(time[gs_flg == 0], gate[gs_flg == 0], s=size, c='red', marker=marker, alpha=alpha, cmap=cm) #plot IS as red #the indeterminate updated gflg (-1) was original ground scatter in traditional method when using the emp_data_dict plt.scatter(time[gs_flg == -1], gate[gs_flg == -1], s=size, c='blue', marker=marker, alpha=alpha, cmap=cm) ax2.xaxis.set_major_formatter(DateFormatter('%H:%M')) #ax1.set_xlabel('Time UT') ax2.set_xlim([stm, etm]) ax2.set_ylabel('Range gate') ax2.set_title( 'Traditional Model Results based on Blanchard et al. 2009 with an Accuracy of {:3.2f}%' .format(accur_tra)) ax3 = plt.subplot(313) plt.scatter(time[Z == gsfg_min_vel], gate[Z == gsfg_min_vel], s=size, c='blue', marker=marker, alpha=alpha, cmap=cm) #plot ground scatter as blue plt.scatter(time[Z == gsfg_max_vel], gate[Z == gsfg_max_vel], s=size, c='red', marker=marker, alpha=alpha, cmap=cm) #plot ionospheric scatter as red #plot the third scatter (E region/meteor scatter or noise, sometimes GS) as blue if accur_gmm1 > accur_gmm2: plt.scatter(time[Z == gsfg_undetermined], gate[Z == gsfg_undetermined], s=size, c='red', marker=marker, alpha=alpha, cmap=cm) else: plt.scatter(time[Z == gsfg_undetermined], gate[Z == gsfg_undetermined], s=size, c='blue', marker=marker, alpha=alpha, cmap=cm) ax3.xaxis.set_major_formatter(DateFormatter('%H:%M')) ax3.set_xlabel('Time UT') ax3.set_xlim([stm, etm]) ax3.set_ylabel('Range gate') ax3.set_title( 'Gaussian Mixture Model Results with an Accuracy of {:3.2f}%'.format( accur_gmm)) fig4.tight_layout() fig4.savefig('Fig4.png') #scatter plot####################################################################################################################### plt.show()