def evaluate(self, metric): if self.true_labels == 0: print("Error: A true_labels.txt file is needed") return if metric == "nmi": print("NMI: %f" % nmi(self.true_labels, self.predicted_labels)) elif metric == "purity": print("Purity: %f" % purity(self.true_labels, self.predicted_labels)) elif metric == "both": print("NMI: %f" % nmi(self.true_labels, self.predicted_labels)) print("Purity: %f" % purity(self.true_labels, self.predicted_labels)) else: print("Error: This metric is not available. Choose among the following options: 'nmi', 'purity', 'both'")
def nmi(self): """ Return the Normalized Mutual Information (NMI) of the clustering solution with respect to the ground-truth labels given. """ z = np.argmax(self.pi_nk, 1) return nmi(z, self.label)
def cluster(o, n_clusters=3): df = pd.read_pickle(o) reprsn = df['embedding'].values node_idx = df['node_id'].values labels = ry1() reprsn = [np.asarray(row, dtype='float32') for row in reprsn] reprsn = np.array(reprsn, dtype='float32') true_labels = [labels[int(node)] for node in node_idx] data = reprsn km = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10) km.fit(data) km_means_labels = km.labels_ km_means_cluster_centers = km.cluster_centers_ km_means_labels_unique = np.unique(km_means_labels) '''colors_ = cycle(colors.cnames.keys()) m,initial_dim = np.shape(data) data_2 = tsne(data, 2, initial_dim, 30) plt.figure(figsize=(12, 6)) plt.scatter(data_2[:, 0], data_2[:, 1], c=true_labels) plt.title('True Labels') plt.show()''' nmiv = nmi(true_labels, km_means_labels) print ' NMI value ', nmiv return nmiv #cal the accuracy of valdata
def nmi_matrix(df): mat = pd.DataFrame(index=df.columns, columns=df.columns, data=0) for i in df.columns: for j in df.columns: mat.loc[i, j] = nmi(df[i], df[j]) return mat
def benchmark(self, name: str, features: np.ndarray, labels: np.ndarray) -> Tuple[str, Dict]: """ Returns the clustering performance results in str and dict format. The metrics used are as follows: 1. Duration 2. Adjusted RAND Score 3. Normalized Mutual Information 4. Davies-Bouldin Index 5. Silhouette Score 6. Calinski-Harabasz Score 7. Clustering Accuracy Parameters ---------- name: str The name of the benchmark. features: np.ndarray The test instances to cluster. labels: np.ndarray The test labels. Returns ------- str The formatted string of the benchmark results. results: Dict The dictionary of benchmark results. """ start_time = time.time() predictions = self.predict(features) results = {} results["name"] = name results["duration"] = time.time() - start_time results["ari"] = ari(labels_true=labels, labels_pred=predictions) results["nmi"] = nmi(labels_true=labels, labels_pred=predictions) results["dbi"] = davies_bouldin_score(features, predictions) results["silhouette"] = silhouette_score(features, predictions, metric="euclidean") results["ch_score"] = calinski_harabasz_score(features, predictions) results["clustering_accuracy"] = clustering_accuracy( target=labels, prediction=predictions) return ( "%-9s\t%.2fs\t%.3f\t\t%.3f\t\t%.3f\t\t%.3f\t\t%.3f\t\t%.3f" % ( results.get("name"), results.get("duration"), results.get("dbi"), results.get("silhouette"), results.get("ch_score"), results.get("nmi"), results.get("ari"), results.get("clustering_accuracy"), ), results, )
def get_accuracy(cluster_assignments, y_true, n_clusters): ''' Computes the accuracy based on the provided kmeans cluster assignments and true labels, using the Munkres algorithm cluster_assignments: array of labels, outputted by kmeans y_true: true labels n_clusters: number of clusters in the dataset returns: a tuple containing the accuracy and confusion matrix, in that order ''' y_pred, confusion_matrix = get_y_preds(cluster_assignments, y_true, n_clusters) from sklearn.metrics import normalized_mutual_info_score as nmi nmi_score = nmi(y_true, y_pred) print('NMI: ' + str(np.round(nmi_score, 4))) from sklearn.metrics import adjusted_rand_score as ari ari_score = ari(y_true, y_pred) print('ARI: ' + str(np.round(ari_score, 4))) # with open('C:/Users/mals6571/Desktop/SpectralNet-master/src/applications/Results.txt','a') as my_file: ProjectDir = get_project_root() with open(os.path.join(ProjectDir, 'Results.txt'), 'a') as my_file: my_file.write("\n") my_file.write('NMI: ' + str(np.round(nmi_score, 4))) my_file.write("\n") my_file.write('ARI: ' + str(np.round(ari_score, 4))) my_file.write("\n") # calculate the accuracy return np.mean(y_pred == y_true), confusion_matrix
def run_net(data, params): # # UNPACK DATA # x_train_unlabeled, y_train_unlabeled, x_val, y_val, x_test, y_test = data[ 'spectral']['train_and_test'] inputs_vae = Input(shape=(params['img_dim'], params['img_dim'], 1), name='inputs_vae') ConvAE = Conv.ConvAE(inputs_vae, params) ConvAE.vae.load_weights('vae_mnist.h5') lh = LearningHandler(lr=params['spec_lr'], drop=params['spec_drop'], lr_tensor=ConvAE.learning_rate, patience=params['spec_patience']) lh.on_train_begin() losses_vae = np.empty((500, )) for i in range(500): # if i==0: x_val_y = ConvAE.vae.predict(x_val)[2] losses_vae[i] = ConvAE.train_vae(x_val, x_val_y, params['batch_size']) x_val_y = ConvAE.vae.predict(x_val)[2] y_sp = x_val_y.argmax(axis=1) print_accuracy(y_sp, y_val, params['n_clusters']) print("Epoch: {}, loss={:2f}".format(i, losses_vae[i])) # else: # losses_vae[i] = ConvAE.train_vae(x_val, x_val_y,params['batch_size']) # x_val_y = ConvAE.vae.predict(x_val)[2] # y_sp = x_val_y.argmax(axis=1) # print_accuracy(y_sp, y_val, params['n_clusters']) # print("Epoch: {}, loss={:2f}".format(i, losses_vae[i])) if i > 1: if np.abs(losses_vae[i] - losses_vae[i - 1]) < 0.0001: print('STOPPING EARLY') break # if self.lh.on_epoch_end(i, val_losses[i]): # print('STOPPING EARLY') # break # print training status # ConvAE.vae.save_weights('IJCAI_mnist2.h5') # spectral_net.net.save_weight('save.h5') # spectral_net.save print("finished training") x_val_y = ConvAE.vae.predict(x_val)[2] # x_val_y = ConvAE.classfier.predict(x_val_lp) y_sp = x_val_y.argmax(axis=1) print_accuracy(y_sp, y_val, params['n_clusters']) from sklearn.metrics import normalized_mutual_info_score as nmi nmi_score1 = nmi(y_sp, y_val) print('NMI: ' + str(np.round(nmi_score1, 4)))
def evaluate_model(self): """calculates NMI Arguments: ground_truth, predicted_values """ nmi_eval = nmi(self.ground_truth, self.predicted) print(f"NMI Accuracy is: {nmi_eval}")
def metriques(model, y_true, y_pred): pred1 = model.row_labels_ nmi_ = nmi(y_true, pred1) ari_ = ari(y_true, pred1) accuracy = ACCURACY(y_true, pred1) print("NMI: {}\nARI: {} ".format(nmi_, ari_)) print("ACCURACY: %s" % accuracy) return nmi_, ari_, accuracy
def run_experiment(ae_model_path): logger.info( f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) logger.info( f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) logger.info(f"Working now on {ae_model_path.name}") logger.info( f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) logger.info( f"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) new_seed = random.randint(0, 1000) logger.info(f"Seed value for this is: {new_seed}") set_random_seed(new_seed) ae_module = stacked_ae(pt_data.shape[1], [500, 500, 2000, 10], weight_initalizer=torch.nn.init.xavier_normal_, activation_fn=lambda x: F.relu(x), loss_fn=None, optimizer_fn=None) model_data = torch.load(ae_model_path, map_location='cpu') ae_module.load_state_dict(model_data) ae_module = ae_module.cuda() # Get embedded data embedded_data = None for batch_data in torch.utils.data.DataLoader(pt_data, batch_size=256, shuffle=False): embedded_batch_np = ae_module.forward( batch_data.cuda())[0].detach().cpu().numpy() if embedded_data is None: embedded_data = embedded_batch_np else: embedded_data = np.concatenate([embedded_data, embedded_batch_np], 0) del ae_module # Perform k-means k_means_labels = k_means(embedded_data, n_clusters, n_init=20)[1] k_means_nmi_value = nmi(gold_labels, k_means_labels, average_method='arithmetic') k_means_acc_value = cluster_acc(gold_labels, k_means_labels)[0] result_file = Path(f"{result_dir}/results_ae_kmeans_{dataset_name}.txt") result_file_exists = result_file.exists() f = open(result_file, "a+") if not result_file_exists: f.write("#\"ae_model_name\"\t\"NMI\"\t\"ACC\"\n") f.write( f"{ae_model_path.name}\t{k_means_nmi_value}\t{k_means_acc_value}\n") f.close()
def tensfact_baseline(): n_clusters = 81 f = open('buzz_user_tensor_45.npy') X_buzz = np.load(f) print X_buzz.shape X_buzz = X_buzz[buzz_ground.keys()] buzz_ground1 = buzz_ground.values() km = KMeans(n_clusters=81, init='k-means++', n_init=1, verbose=False) sc = 0.0 sc1 = 0.0 sc2 = 0.0 for i in xrange(10): km.fit(X_buzz) sc += nmi(buzz_ground1, km.labels_) sc1 += ari(buzz_ground1, km.labels_) sc2 += ami(buzz_ground1, km.labels_) print "BUZZ" print "nmi score %f" % (sc / float(10)) print "ari score %f" % (sc1 / float(10)) print "ami score %f" % (sc2 / float(10)) f = open('poli_user_tensor_75.npy') X_poli = np.load(f) print X_poli.shape X_poli = X_poli[poli_ground.keys()] poli_ground1 = poli_ground.values() sc = 0.0 sc1 = 0.0 km1 = KMeans(n_clusters=310, init='k-means++', n_init=1, verbose=False) sc = 0.0 sc1 = 0.0 sc2 = 0.0 for i in xrange(10): km1.fit(X_poli) sc += nmi(poli_ground1, km1.labels_) sc1 += ari(poli_ground1, km1.labels_) sc2 += ami(poli_ground1, km1.labels_) print "poli" print "nmi score %f" % (sc / float(10)) print "ari score %f" % (sc1 / float(10)) print "ami score %f" % (sc2 / float(10))
def NMI(y_true, y_pred): b, _, _, ch = y_true.shape output = np.zeros((b, ch)) for b_idx in range(b): for ch_idx in range(ch): true_max = y_true[b_idx, ..., ch_idx].max() pred_vmin = y_pred[b_idx, ..., ch_idx].max() output[b_idx, ch_idx] = nmi((y_true[b_idx, ..., ch_idx]/true_max*255).astype('uint8').ravel(), (y_pred[b_idx, ..., ch_idx]/pred_vmin*255).astype('uint8').ravel()) return output
def execute_algo(model, model_name, X, y, verbose=True): print("##############\n# {}\n##############".format(model_name)) model.fit(X) res_nmi = nmi(model.row_labels_, y) res_ari = ari(model.row_labels_, y) res_acc = accuracy(model.row_labels_, y) if verbose: print("NMI =", res_nmi) print("ARI =", res_ari) print("ACC =", res_acc) return res_nmi, res_ari, res_acc
def train(self): x, y = np.load('images/64px_image_x.npy'), np.load( 'images/64px_image_y.npy') x = np.reshape(x, (40000, 64, 64, 1)) kmeans = KMeans(n_clusters=2, n_init=20) y_pred = kmeans.fit_predict(self.encoder.predict(x)) y_pred_last = np.copy(y_pred) self.model.get_layer(name='clustering').set_weights( [kmeans.cluster_centers_]) loss = 0 ae_loss = 0 index = 0 maxiter = 80000 update_interval = 100 index_array = np.arange(x.shape[0]) batch_size = 16 tol = 0.001 # model.load_weights('DEC_model_final.h5') for ite in range(int(maxiter)): if ite % update_interval == 0: q = self.model.predict(x, verbose=0) # update the auxiliary target distribution p p = self.target_distribution(q) # evaluate the clustering performance y_pred = q.argmax(1) if y is not None: acc = np.round(metrics.acc(y, y_pred), 5) nmi = np.round(metrics.nmi(y, y_pred), 5) ari = np.round(metrics.ari(y, y_pred), 5) loss = np.round(loss, 5) print( 'Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f, loss=%.5f' % (ite, acc, nmi, ari, loss)) # check stop criterion - model convergence delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / y_pred.shape[0] y_pred_last = np.copy(y_pred) if ite > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) print('Reached tolerance threshold. Stopping training.') break idx = np.random.randint(low=0, high=x.shape[0], size=batch_size) # ae_loss = ae.train_on_batch(x=x[idx], y=x[idx]) loss = self.model.train_on_batch(x=x[idx], y=p[idx]) index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0 self.model.save_weights('DEC_model_final_64px.h5') self.test_model()
def on_epoch_end(self, epoch, logs=None): if int(epochs/10) != 0 and epoch % int(epochs/10) != 0: return feature_model = Model(self.model.input, self.model.get_layer( 'encoder_%d' % (int(len(self.model.layers) / 2) - 1)).output) features = feature_model.predict(self.x) km = KMeans(n_clusters=len(np.unique(self.y)), n_init=20, n_jobs=4) y_pred = km.fit_predict(features) # print() print(' '*8 + '|==> acc: %.4f, nmi: %.4f <==|' % (metrics.acc(self.y, y_pred), metrics.nmi(self.y, y_pred)))
def _are_labels_equal(labels_new, labels_old): """ Check if the old labels and new labels are equal. Therefore check the nmi for each subspace. If all are 1, labels have not changed. :param labels_new: new labels list :param labels_old: old labels list :return: True if labels for all subspaces are the same """ if labels_new is None or labels_old is None: return False return all([ nmi(labels_new[i], labels_old[i], average_method='arithmetic') == 1 for i in range(len(labels_new)) ])
def evaluate(self, metric): if self.true_labels == 0: print("Error: A true_labels.csv file is needed") return true_labels = list(self.true_labels.values()) if metric == "nmi": print("NMI: %f" % nmi(true_labels, self.predicted_labels)) elif metric == "purity": print("Purity: %f" % purity(true_labels, self.predicted_labels)) elif metric == "confusion matrix": print("Confusion matrix:") print(confusion_matrix(true_labels, self.predicted_labels)) elif metric == "all": print("NMI: %f" % nmi(true_labels, self.predicted_labels)) print("Purity: %f" % purity(true_labels, self.predicted_labels)) print("Confusion matrix:") print(confusion_matrix(true_labels, self.predicted_labels)) else: print( "Error: This metric is not available. Choose among the following options: 'nmi', 'purity', 'both'" )
def run_trial(X, labels, k): errors = '"' # Run our dbscan start = time() """ if metric == 'seuclidean': db = KMeans(eps,minPts,metric=metric,metric_params={'V':V}) else: db = kmean(,minPts,metric=metric) """ db = KMeans(k, n_jobs=12) pred_labels = db.fit_predict(X) elapsed = time() - start try: ari_score = ari(pred_labels, labels) except Exception as e: errors += str(e) + '; ' ari_score = np.nan try: nmi_score = nmi(pred_labels, labels, average_method='arithmetic') except Exception as e: errors += str(e) + '; ' nmi_score = np.nan try: ss_score = ss(X, pred_labels) except Exception as e: errors += str(e) + '; ' ss_score = np.nan try: vrc_score = vrc(X, pred_labels) except Exception as e: errors += str(e) + '; ' vrc_score = np.nan try: dbs_score = dbs(X, pred_labels) except Exception as e: errors += str(e) + '; ' dbs_score = np.nan errors += '"' return [ k, elapsed, ari_score, nmi_score, ss_score, vrc_score, dbs_score, errors ]
def run_net(data, params): # # UNPACK DATA # x_train_unlabeled, y_train_unlabeled, x_val, y_val, x_test, y_test = data[ 'spectral']['train_and_test'] inputs_vae = Input(shape=(params['img_dim'], params['img_dim'], 1), name='inputs_vae') ConvAE = Conv.ConvAE(inputs_vae, params) ConvAE.vae.load_weights( '/home/stu2/Signal-1/Deep-Spectral-Clustering-using-Dual-Autoencoder-Network-master/src/applications/vae_mnist.h5' ) lh = LearningHandler(lr=params['spec_lr'], drop=params['spec_drop'], lr_tensor=ConvAE.learning_rate, patience=params['spec_patience']) lh.on_train_begin() losses_vae = np.empty((500, )) for i in range(100): # if i==0: x_val_y = ConvAE.vae.predict(x_val)[2] #得到y losses_vae[i] = ConvAE.train_vae(x_val, x_val_y, params['batch_size']) x_val_y = ConvAE.vae.predict(x_val)[2] y_sp = x_val_y.argmax(axis=1) print_accuracy(y_sp, y_val, params['n_clusters']) print("Epoch: {}, loss={:2f}".format(i, losses_vae[i])) if i > 1: if np.abs(losses_vae[i] - losses_vae[i - 1]) < 0.0001: print('STOPPING EARLY') break print("finished training") x_val_y = ConvAE.vae.predict(x_val)[2] # x_val_y = ConvAE.classfier.predict(x_val_lp) y_sp = x_val_y.argmax(axis=1) print_accuracy(y_sp, y_val, params['n_clusters']) from sklearn.metrics import normalized_mutual_info_score as nmi nmi_score1 = nmi(y_sp, y_val) print('NMI: ' + str(np.round(nmi_score1, 4)))
def cluster0(n_clusters=3): labels = ry1() true_labels = np.asarray(labels, dtype='int32') data = rx1() km = KMeans(init='k-means++', n_clusters=n_clusters, n_init=10) km.fit(data) km_means_labels = km.labels_ km_means_cluster_centers = km.cluster_centers_ km_means_labels_unique = np.unique(km_means_labels) colors_ = cycle(colors.cnames.keys()) nmiv = nmi(true_labels, km_means_labels) print 'nmi value ', nmiv return nmiv #cal the accuracy of valdata
def evaluate(train_round_idx, ae_module, cluster_module): test_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(pt_data), batch_size=256) pred_labels = np.zeros(pt_data.shape[0], dtype=np.int) index = 0 n_batches = 0 for batch_data in test_loader: batch_data = batch_data[0].cuda() n_batches += 1 batch_size = batch_data.shape[0] embedded_data, reconstructed_data = ae_module.forward(batch_data) labels = cluster_module.prediction_hard_np(embedded_data) pred_labels[index:index + batch_size] = labels index = index + batch_size nmi_value = nmi(gold_labels, pred_labels, average_method='arithmetic') acc_value = cluster_acc(gold_labels, pred_labels)[0] logger.info( f"{train_round_idx} Evaluation: NMI: {nmi_value} ACC: {acc_value}") return nmi_value, acc_value
def run(dataset_name='seeds', n_clusters=3, mode='cpu'): X, Y = load_dataset(dataset_name) X = normalize_dataset(X) for i in range(1): C, V, m = sub_kmeans(X, n_clusters, mode) trans = V.T.real X_rotated = np.matmul(trans[None, :, :], np.transpose(X[:, None, :], [0, 2, 1])) X_rotated = X_rotated.squeeze(-1).T acc = nmi(Y, C) M = assign_markers(C) K = assign_colors(Y) print('') print('[i] Results') print('[*] m: %d' % m) print('[*] NMI: %.5f' % acc) data_points = zip(X_rotated[0], X_rotated[1], K, M) for x_, y_, c_, m_ in data_points: plt.scatter(x_, y_, c=c_, marker=m_, s=3) plt.title('Seeds, m={:d}, NMI={:.3f}'.format(m, acc)) plt.savefig('{}.png'.format(dataset_name), dpi=300)
def get_scores(x, y, n, k, dtr, dev): tx = tens0((n, k), dt=dtr, dev=dev) ty = tens0((n, k), dt=dtr, dev=dev) tx = tens_sel_set(tx, x, 1) ty = tens_sel_set(ty, y, 1) t = tx.t().matmul(ty) del tx, ty tt = t.max() - t tt = tt.cpu().numpy() row, col = ass(tt) del tt t = t.cpu().numpy() t = t[row, col].sum() t = t.tolist() / n x = x.cpu().numpy() y = y.cpu().numpy() s = { 'nmi': nmi(x, y, average_method='geometric'), 'ari': ari(x, y), 'acc': t, } return s
def run_net(data, params): # # UNPACK DATA # x_train, y_train, x_val, y_val, x_test, y_test = data['spectral'][ 'train_and_test'] x_train_unlabeled, y_train_unlabeled, x_train_labeled, y_train_labeled = data[ 'spectral']['train_unlabeled_and_labeled'] x_val_unlabeled, y_val_unlabeled, x_val_labeled, y_val_labeled = data[ 'spectral']['val_unlabeled_and_labeled'] if 'siamese' in params['affinity']: pairs_train, dist_train, pairs_val, dist_val = data['siamese'][ 'train_and_test'] x = np.concatenate((x_train, x_val, x_test), axis=0) y = np.concatenate((y_train, y_val, y_test), axis=0) if len(x_train_labeled): y_train_labeled_onehot = OneHotEncoder().fit_transform( y_train_labeled.reshape(-1, 1)).toarray() else: y_train_labeled_onehot = np.empty((0, len(np.unique(y)))) # # SET UP INPUTS # # create true y placeholder (not used in unsupervised training) y_true = tf.placeholder(tf.float32, shape=(None, params['n_clusters']), name='y_true') batch_sizes = { 'Unlabeled': params['batch_size'], 'Labeled': params['batch_size'], 'Orthonorm': params.get('batch_size_orthonorm', params['batch_size']), } input_shape = x.shape[1:] # spectralnet has three inputs -- they are defined here inputs = { 'Unlabeled': Input(shape=input_shape, name='UnlabeledInput'), 'Labeled': Input(shape=input_shape, name='LabeledInput'), 'Orthonorm': Input(shape=input_shape, name='OrthonormInput'), } # # DEFINE AND TRAIN SIAMESE NET # # run only if we are using a siamese network if params['affinity'] == 'siamese': siamese_net = networks.SiameseNet(inputs, params['arch'], params.get('siam_reg'), y_true) history = siamese_net.train(pairs_train, dist_train, pairs_val, dist_val, params['siam_lr'], params['siam_drop'], params['siam_patience'], params['siam_ne'], params['siam_batch_size']) else: siamese_net = None # # DEFINE AND TRAIN SPECTRALNET # spectral_net = networks.SpectralNet(inputs, params['arch'], params.get('spec_reg'), y_true, y_train_labeled_onehot, params['n_clusters'], params['affinity'], params['scale_nbr'], params['n_nbrs'], batch_sizes, siamese_net, x_train, len(x_train_labeled)) spectral_net.train(x_train_unlabeled, x_train_labeled, x_val_unlabeled, params['spec_lr'], params['spec_drop'], params['spec_patience'], params['spec_ne']) print("finished training") # # EVALUATE # # get final embeddings x_spectralnet = spectral_net.predict(x) # get accuracy and nmi kmeans_assignments, km = get_cluster_sols(x_spectralnet, ClusterClass=KMeans, n_clusters=params['n_clusters'], init_args={'n_init': 10}) y_spectralnet, _ = get_y_preds(kmeans_assignments, y, params['n_clusters']) print_accuracy(kmeans_assignments, y, params['n_clusters']) from sklearn.metrics import normalized_mutual_info_score as nmi nmi_score = nmi(kmeans_assignments, y) print('NMI: ' + str(np.round(nmi_score, 3))) if params['generalization_metrics']: x_spectralnet_train = spectral_net.predict(x_train_unlabeled) x_spectralnet_test = spectral_net.predict(x_test) km_train = KMeans( n_clusters=params['n_clusters']).fit(x_spectralnet_train) from scipy.spatial.distance import cdist dist_mat = cdist(x_spectralnet_test, km_train.cluster_centers_) closest_cluster = np.argmin(dist_mat, axis=1) print_accuracy(closest_cluster, y_test, params['n_clusters'], ' generalization') nmi_score = nmi(closest_cluster, y_test) print('generalization NMI: ' + str(np.round(nmi_score, 3))) return x_spectralnet, y_spectralnet
y[i] = 2 sparsity = 1 - (np.sum(T>0) / np.product(T.shape)) f, dt = CreateOutputFile("yelp", date = True) output_path = f"./output/_yelp/" + dt[:10] + "_" + dt[11:13] + "." + dt[14:16] + "." + dt[17:19] + "/" directory = os.path.dirname(output_path) if not os.path.exists(directory): os.makedirs(directory) model = CoClust(np.sum(T.shape) * 10, optimization_strategy = alg, path = output_path) model.fit(T) tau = model.final_tau_ nmi_x = nmi(y, model.x_, average_method='arithmetic') ari_x = ari(y, model.x_) f.write(f"{T.shape[0]},{T.shape[1]},{T.shape[2]},{len(set(y))},,,,{tau[0]},{tau[1]},{tau[2]},{nmi_x},,,{ari_x},,,{model._n_clusters[0]},{model._n_clusters[1]},{model._n_clusters[2]},{model.execution_time_},{sparsity},{alg}\n") f.close() gx = open(output_path + alg + "_assignments_"+ tensor + "_x.txt", 'w') for i in range(T.shape[0]): gx.write(f"{i}\t{model._assignment[0][i]}\n") gx.close() gy = open(output_path + alg + "_assignments_"+ tensor + "_y.txt", 'w') for i in range(T.shape[1]): gy.write(f"{i}\t{model._assignment[1][i]}\n") gy.close()
#feat_lbp.append(mahotas.features.lbp(img[i],1,8)) feat_lbp = scale(np.array(feat_lbp)) # PCA on LBP features #pca = PCA(n_components=20) #feat_lbp = pca.fit_transform(feat_lbp) #print "Variance Ratio: ",sum(pca.explained_variance_ratio_) # Normalization of features #feat_lbp = scale(feat_lbp) # Save LBP features file_pkl = open("face_lbp.pkl","wb") pickle.dump(feat_lbp,file_pkl) file_pkl.close() # Compute affinity matrix flag_sigma = 'global' sigma_lbp, aff_lbp = compute_affinity(feat_lbp,flag_sigma=flag_sigma,\ sigma=100.,nn=8) print "kernel computation finished" label_pred_identity = spectral_clustering(aff_lbp,n_clusters=20) nmi_identity = nmi(label_pred_identity,img_identity) print "NMI with identity: ",nmi_identity label_pred_pose = spectral_clustering(aff_lbp,n_clusters=4) nmi_pose = nmi(label_pred_pose,img_pose) print "NMI with pose: ",nmi_pose
def main(): path = './data/aucs_edgelist.txt' # Declare each layer's graph lunch = init_graph() facebook = init_graph() leisure = init_graph() work = init_graph() coauthor = init_graph() table = { 'lunch': lunch, 'facebook': facebook, 'leisure': leisure, 'work': work, 'coauthor': coauthor, } truth, na = get_truth() # Load data into graph print( "--------------------------------------------------Load multilayers graph--------------------------------------------------" ) with open(path) as f: for line in f: line = line.strip().split(',') name = line[2] if line[0] in na or line[1] in na: continue else: table[name].add_edge(line[0], line[1]) for name, graph in table.items(): print("\nGraph: {}".format(name)) print("\tNumber of nodes: {}".format(nx.number_of_nodes(graph))) print("\tNumber of edges: {}".format(nx.number_of_edges(graph))) graph_list = [lunch, work, coauthor, leisure] node_list = list(lunch.nodes) # # Tunning k # print("--------------------------------------------------Perform k clusters selection--------------------------------------------------") # sse_list = [] # range_k = np.arange(2, 15) # for k in range_k: # labels, sse = SCML(graph_list, k, 0.5) # score = silhouette_score(matrix, labels, random_state=42) # print("Number of clusters k = {}".format(k), # ",Silhouette Score = {}".format(round(score, 5))) # sse_list.append(sse) # # Plot elbow method for k # plot_elbow(range_k, sse_list, "Selection of k") # Tunning alpha print( "--------------------------------------------------Perform alpha selection--------------------------------------------------" ) range_a = np.arange(0.2, 1.1, 0.1) den = [] nmi_list = [] for alpha in range_a: labels = SCML(graph_list, 8, alpha) partitions = get_partition(labels, node_list) density = get_score(graph_list, partitions) den.append(density) print("\nAlpha = {}".format(round(alpha, 1))) print("\tDensity = {}".format(density)) nmi_value = nmi(truth, labels) print("\tNMI = {}".format(nmi_value)) nmi_list.append(nmi_value) # Plot elbow method for alpha plot_elbow(range_a, den, "Selection of alpha (Density)") plot_elbow(range_a, nmi_list, "Selection of alpha (NMI)") # Select the best model print( "--------------------------------------------------Multilayer Result---------------------------------------------------" ) labels = SCML(graph_list, 8, 0.2) partitions = get_partition(labels, node_list) print("NMI: {}".format(nmi(truth, labels))) purity = purity_score(truth, labels) print("Purity: {}".format(purity)) print( "--------------------------------------------------Single layer Result--------------------------------------------------" ) for name, g in table.items(): print("\nLayer: {}".format(name)) labels = onelayer(g, 8) # print(labels) print("\tNMI: {}".format(nmi(truth, labels))) purity = purity_score(truth, labels) print("\tPurity: {}".format(purity))
x_init = np.zeros(nVars) options = {'verbose':3} data_mat = {'nInst':nInst, 'nVars':nVars, 'A':A, 'x':x, 'b':b, \ 'x_init':x_init} savemat("./Mark_Schmidt/minConf/minConf_SPG_input.mat",data_mat) (x, f, funEvals, projects) = minConf_SPG(funObj, x_init, funProj, options) elif flag_test == 8: options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9,\ 'maxIter':500, 'suffDec':1e-4, 'interp':2, 'memory':10,\ 'useSpectral':1,'curvilinear':0,'feasibleInit':0,'testOpt':1,\ 'bbType':1} options = {'verbose':100, 'interp':10} options = setDefaultOptions(options, options_default) elif flag_test == 9: label = np.random.randint(0,10,100) alpha = np.array([.95, .85]) beta = np.array([.7, .55]) [num_ML, num_CL] = [100, 100] S = genConstraints(label, alpha, beta, num_ML, num_CL) elif flag_test == 10: tp = load_iris() [X, Y] = [scale(tp['data']), tp['target']] sim_mat = rbf_kernel(X) Y_pred = my_spectral_clustering(sim_mat, n_clusters=3) print nmi(Y_pred, Y) else: pass
file_csv = open("data_pheno.csv","wb") csvwriter = csv.writer(file_csv) for i in range(data.shape[0]): csvwriter.writerow(list(data[i,:])) file_csv.close() label_pred_4 = np.loadtxt('label_pred_4.csv') label_nbs_lf4 = np.loadtxt('label_nbs_lf4.csv') nmi_1 = [] nmi_2 = [] nmi_3 = [] nmi_4 = [] for j in range(data.shape[1]): nmi_1.append(nmi(data[:,j],label_pred_4)) nmi_2.append(nmi(data[:,j],label_nbs_lf4)) nmi_3.append(nmi(data[:,j],patient_label)) nmi_4.append(nmi(data[:,j],gold_stage)) index = np.arange(1,13) bar_width = 0.2 labels = ['BD.FEV1','oxygen','ExacTrunc','BD.FEV.FVC','FracVol.950U',\ 'Lowest.15.','Emphysema','Neutrophils','Lymphocytes',\ 'Monocytes','Eosinophils','Basophils'] bar_1 = plt.bar(index,nmi_1,bar_width,color='b',label='Normalization+NMF') bar_2 = plt.bar(index+bar_width,nmi_2,bar_width,color='r',label='NMF') bar_3 = plt.bar(index+bar_width*2,nmi_3,bar_width,color='g',label='Case/Control') bar_4 = plt.bar(index+bar_width*3,nmi_4,bar_width,color='y',label='Gold Stage') plt.xlabel('Phenotype Features')
# Find U by eigendecomposition aff = beta[0]*aff_pca+beta[1]*aff_gabor eig_val,eig_vec = la.eig(aff) # Sort eigenvalues and eigenvectors idx = eig_val.argsort()[::-1] eig_val = eig_val[idx] eig_vec = eig_vec[:,idx] U = eig_vec[:,0:40] # Optimize beta gamma = matrix([np.trace(aff_pca.dot(U).dot(U.T)),\ np.trace(aff_gabor.dot(U).dot(U.T))]) G = matrix([[-1.0,0.],[0.,-1.0]]) h = matrix([0.,0.]) A = matrix([1.,1.],(1,2)) b = matrix(1.0) res = qp(2*Q,-2*gamma,G,h,A,b) beta = res['x'].T residue_old = la.norm(aff-U_old.dot(U_old.T)) residue = la.norm(aff-U.dot(U.T)) n_iter = n_iter+1 print n_iter print "beta: ",beta print "-2*gamma: ",-2*gamma print "Residue: ",residue clf = KMeans(n_clusters=K,init='random') label_u = clf.fit_predict(U) nmi_u = nmi(label_u,label_true) print nmi_u
from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x = np.concatenate((x_train, x_test)) y = np.concatenate((y_train, y_test)) x = x.reshape((x.shape[0], -1)) x = np.divide(x, 50.) # normalize as it does in DEC paper print('MNIST samples', x.shape) return x, y db = 'mnist' n_clusters = 10 x, y = load_mnist() # define and train SAE model sae = SAE(dims=[x.shape[-1], 500, 500, 2000, 10]) sae.fit(x=x, epochs=400) sae.autoencoders.save_weights('weights_%s.h5' % db) # extract features print('Finished training, extracting features using the trained SAE model') features = sae.extract_feature(x) print('performing k-means clustering on the extracted features') from sklearn.cluster import KMeans km = KMeans(n_clusters, n_init=20) y_pred = km.fit_predict(features) from sklearn.metrics import normalized_mutual_info_score as nmi print('K-means clustering result on extracted features: NMI =', nmi(y, y_pred))
n_eye = 2 # Repeat KMeans 50 times to reduce randomness label_tmp = [] inertia_tmp = [] for i in range(50): clf = KMeans(n_clusters=n_pose,init='random') clf.fit(U) label_tmp.append(list(clf.labels_)) inertia_tmp.append(clf.inertia_) idx_tmp = inertia_tmp.index(min(inertia_tmp)) label_u = label_tmp[idx_tmp] inertia_vec.append(min(inertia_tmp)) score_vec.append(silhouette_score(U,np.array(label_u))) nmi_identity.append(nmi(label_u,img_identity)) nmi_pose.append(nmi(label_u,img_pose)) n_show = n_pose # Show mean image img_avg = np.zeros((n_show,img.shape[1],img.shape[2])) cnt_avg = np.zeros((n_show,1)) for i in range(len(label_u)): img_avg[label_u[i]] += img[i] cnt_avg[label_u[i]] += 1. for i in range(n_show): img_avg[i] = img_avg[i]/cnt_avg[i] #plt.imshow(img_avg[i],cmap=cm.Greys_r) #plt.show()
identity,pose,expression,eye],file_pkl) file_pkl.close() # Normalization of each image for i in range(img.shape[0]): img[i] = (img[i]-img[i].min())*1./(img[i].max()-img[i].min()) img = img.reshape(img.shape[0],img.shape[1]*img.shape[2]) img = scale(img) # 'global','local','manual' flag_sigma = 'global' # Compute similarity matrix sigma,aff_img = compute_affinity(img,flag_sigma=flag_sigma,sigma=100.,nn=7) if flag_sigma == 'local': sigma_init = sum(sigma**2)/len(sigma) print "Average Sigma(local): ",sigma_init K = 20 # Construct existing solution Y Y = np.zeros((img.shape[0],20)) for i in range(img.shape[0]): Y[i,img_identity[i]] = 1 val_lambda = 1.2 arr_tmp = val_lambda*Y.dot(Y.T) label_pred_identity = spectral_clustering(aff_img,n_clusters=K) nmi_identity = nmi(label_pred_identity,img_identity) print nmi_identity
feat_lbp = pickle.load(file_lbp) file_lbp.close() # Compute similarity matrix for FFT and Gabor flag_sigma = 'global' sigma_fft, aff_fft = compute_affinity(feat_fft,flag_sigma=flag_sigma) sigma_gabor, aff_gabor = compute_affinity(feat_gabor,flag_sigma=flag_sigma) sigma_lbp, aff_lbp = compute_affinity(feat_lbp,flag_sigma=flag_sigma) print "kernel computation finished" # Spectral Clustering using FFT K = 4 label_pred_fft = spectral_clustering(aff_fft,n_clusters=K) label_pred_gabor = spectral_clustering(aff_gabor,n_clusters=K) nmi_fft_identity = nmi(label_pred_fft,img_identity) nmi_gabor_identity = nmi(label_pred_gabor,img_identity) print "nmi_fft_identity: ", nmi_fft_identity print "nmi_gabor_identity: ",nmi_gabor_identity for alpha in np.arange(0.1,1.0,0.1): aff_add = alpha*aff_fft+(1-alpha)*aff_gabor label_pred_add = spectral_clustering(aff_add,n_clusters=K) nmi_add_identity = nmi(label_pred_add,img_identity) print (alpha,nmi_add_identity) # Weighted summation M = 2 Q = matrix([[np.trace(aff_fft.dot(aff_fft)),\ np.trace(aff_fft.dot(aff_gabor))],\ [np.trace(aff_gabor.dot(aff_fft)),\
def fPredict(test_ref, test_art, dParam, dHyper): weights_file = dParam['sOutPath'] + os.sep + '{}.h5'.format( dHyper['bestModel']) patchSize = dParam['patchSize'] vae = createModel(patchSize, dHyper) vae.compile(optimizer='adam', loss=None) vae.load_weights(weights_file) test_ref = np.expand_dims(test_ref, axis=1) test_art = np.expand_dims(test_art, axis=1) predict_ref, predict_art = vae.predict([test_ref, test_art], dParam['batchSize'][0], verbose=1) test_ref = np.squeeze(test_ref, axis=1) test_art = np.squeeze(test_art, axis=1) predict_art = np.squeeze(predict_art, axis=1) if dHyper['unpatch']: test_ref = fRigidUnpatchingCorrection2D(dHyper['actualSize'], test_ref, dParam['patchOverlap']) test_art = fRigidUnpatchingCorrection2D(dHyper['actualSize'], test_art, dParam['patchOverlap']) predict_art = fRigidUnpatchingCorrection2D(dHyper['actualSize'], predict_art, dParam['patchOverlap'], 'average') # pre TV processing test_art_tv_1 = denoise_tv_chambolle(test_art, weight=1) test_art_tv_3 = denoise_tv_chambolle(test_art, weight=3) test_art_tv_5 = denoise_tv_chambolle(test_art, weight=5) if dHyper['evaluate']: if dParam['lSaveIndividual']: fig = plt.figure() plt.gray() label = 'NRMSE: {:.2f}, SSIM: {:.3f}, NMI: {:.3f}' for i in range(len(test_ref)): ax = imshow(test_ref[i]) plt.xticks([]) plt.yticks([]) ax.set_xlabel( label.format( nrmse(test_ref[i], test_ref[i]), ssim(test_ref[i], test_ref[i], data_range=(test_ref[i].max() - test_ref[i].min())), nmi(test_ref[i].flatten(), test_ref[i].flatten()))) ax.set_title('reference image') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + 'reference_' + str(i) + '.png') else: plt.show() ax = imshow(test_art[i]) plt.xticks([]) plt.yticks([]) ax.set_xlabel( label.format( nrmse(test_ref[i], test_art[i]), ssim(test_ref[i], test_art[i], data_range=(test_art[i].max() - test_art[i].min())), nmi(test_ref[i].flatten(), test_art[i].flatten()))) ax.set_title('motion-affected image') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + 'art_' + str(i) + '.png') else: plt.show() ax = imshow(predict_art[i]) plt.xticks([]) plt.yticks([]) ax.set_xlabel( label.format( nrmse(test_ref[i], predict_art[i]), ssim(test_ref[i], predict_art[i], data_range=(predict_art[i].max() - predict_art[i].min())), nmi(test_ref[1].flatten(), predict_art[i].flatten()))) ax.set_title('reconstructed image') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + 'recon_' + str(i) + '.png') else: plt.show() ax = imshow(test_art_tv_1[i]) plt.xticks([]) plt.yticks([]) ax.set_xlabel( label.format( nrmse(test_ref[i], test_art_tv_1[i]), ssim(test_ref[i], test_art_tv_1[i], data_range=(test_art_tv_1[i].max() - test_art_tv_1[i].min())), nmi(test_ref[i].flatten(), test_art_tv_1[i].flatten()))) ax.set_title('TV weight 1') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + 'tv1_' + str(i) + '.png') else: plt.show() ax = imshow(test_art_tv_3[i]) plt.xticks([]) plt.yticks([]) ax.set_xlabel( label.format( nrmse(test_ref[i], test_art_tv_3[i]), ssim(test_ref[i], test_art_tv_3[i], data_range=(test_art_tv_3[i].max() - test_art_tv_3[i].min())), nmi(test_ref[i].flatten(), test_art_tv_3[i].flatten()))) ax.set_title('TV weight 3') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + 'tv3_' + str(i) + '.png') else: plt.show() ax = imshow(test_art_tv_5[i]) plt.xticks([]) plt.yticks([]) ax.set_xlabel( label.format( nrmse(test_ref[i], test_art_tv_5[i]), ssim(test_ref[i], test_art_tv_5[i], data_range=(test_art_tv_5[i].max() - test_art_tv_5[i].min())), nmi(test_ref[i].flatten(), test_art_tv_5[i].flatten()))) ax.set_title('TV weight 5') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + 'tv5_' + str(i) + '.png') else: plt.show() else: fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 10), sharex=True, sharey=True) # fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 15), sharex=True, sharey=True) ax = axes.ravel() plt.gray() label = 'NRMSE: {:.2f}, SSIM: {:.3f}, NMI: {:.3f}' for i in range(len(test_ref)): # orignal reconstructed images ax[0].imshow(test_ref[i]) ax[0].set_xlabel( label.format( nrmse(test_ref[i], test_ref[i]), ssim(test_ref[i], test_ref[i], data_range=(test_ref[i].max() - test_ref[i].min())), nmi(test_ref[i].flatten(), test_ref[i].flatten()))) ax[0].set_title('reference image') ax[1].imshow(test_art[i]) ax[1].set_xlabel( label.format( nrmse(test_ref[i], test_art[i]), ssim(test_ref[i], test_art[i], data_range=(test_art[i].max() - test_art[i].min())), nmi(test_ref[i].flatten(), test_art[i].flatten()))) ax[1].set_title('motion-affected image') ax[2].imshow(predict_art[i]) ax[2].set_xlabel( label.format( nrmse(test_ref[i], predict_art[i]), ssim(test_ref[i], predict_art[i], data_range=(predict_art[i].max() - predict_art[i].min())), nmi(test_ref[1].flatten(), predict_art[i].flatten()))) ax[2].set_title('reconstructed image') # TV denoiser ax[3].imshow(test_art_tv_1[i]) ax[3].set_xlabel( label.format( nrmse(test_ref[i], test_art_tv_1[i]), ssim(test_ref[i], test_art_tv_1[i], data_range=(test_art_tv_1[i].max() - test_art_tv_1[i].min())), nmi(test_ref[i].flatten(), test_art_tv_1[i].flatten()))) ax[3].set_title('TV weight 1') ax[4].imshow(test_art_tv_3[i]) ax[4].set_xlabel( label.format( nrmse(test_ref[i], test_art_tv_3[i]), ssim(test_ref[i], test_art_tv_3[i], data_range=(test_art_tv_3[i].max() - test_art_tv_3[i].min())), nmi(test_ref[i].flatten(), test_art_tv_3[i].flatten()))) ax[4].set_title('TV weight 3') ax[5].imshow(test_art_tv_5[i]) ax[5].set_xlabel( label.format( nrmse(test_ref[i], test_art_tv_5[i]), ssim(test_ref[i], test_art_tv_5[i], data_range=(test_art_tv_5[i].max() - test_art_tv_5[i].min())), nmi(test_ref[i].flatten(), test_art_tv_5[i].flatten()))) ax[5].set_title('TV weight 5') if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + str(i) + '.png') else: plt.show() else: plt.figure() plt.gray() for i in range(predict_art.shape[0]): plt.imshow(predict_art[i]) if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + str(i) + '.png', dpi=300) else: plt.show() else: nPatch = predict_art.shape[0] for i in range(nPatch // 4): fig, axes = plt.subplots(nrows=4, ncols=2) plt.gray() cols_title = ['original_art', 'predicted_art'] for ax, col in zip(axes[0], cols_title): ax.set_title(col) for j in range(4): axes[j, 0].imshow(test_art[4 * i + j]) axes[j, 1].imshow(predict_art[4 * i + j]) if dParam['lSave']: plt.savefig(dParam['sOutPath'] + os.sep + 'result' + os.sep + str(i) + '.png') else: plt.show()
return feature_model.predict(x, batch_size=self.batch_size) if __name__ == "__main__": """ An example for how to use SAE model on MNIST dataset. In terminal run python3 SAE.py to see the result. """ import numpy as np from load_mnist import load_mnist x,y=load_mnist(sample_size=10000,seed=0) db = 'mnist' n_clusters = 10 # define and train SAE model sae = SAE(dims=[x.shape[-1], 64,32]) sae.fit(x=x, epochs=400) sae.autoencoders.save_weights('weights_%s.h5' % db) # extract features print ('Finished training, extracting features using the trained SAE model') features = sae.extract_feature(x) print ('performing k-means clustering on the extracted features') from sklearn.cluster import KMeans km = KMeans(n_clusters, n_init=20) y_pred = km.fit_predict(features) from sklearn.metrics import normalized_mutual_info_score as nmi print ('K-means clustering result on extracted features: NMI =', nmi(y, y_pred))
print "Optimization fails",v_lambda_idx # Repeat KMeans 50 times to reduce randomness label_tmp = [] inertia_tmp = [] for i in range(50): clf = KMeans(n_clusters=dim_q,init='random') clf.fit(U) label_tmp.append(list(clf.labels_)) inertia_tmp.append(clf.inertia_) idx_tmp = inertia_tmp.index(min(inertia_tmp)) label_u = label_tmp[idx_tmp] inertia_vec.append(min(inertia_tmp)) score_vec.append(silhouette_score(U,np.array(label_u))) nmi_e.append(nmi(label_e,label_u)) beta_vec = np.array(beta_vec) # Plot the result plt.figure(0) plt.plot(v_lambda_range,nmi_e,'r',label='nmi_e') plt.xlabel("lambda(tradeoff between clustering quality and novelty)") plt.ylabel("NMI value") plt.legend(loc='upper left') plt.figure(1) plt.plot(v_lambda_range,inertia_vec) plt.xlabel("lambda") plt.ylabel("Inertia Value") plt.figure(2)
def run_net(data, params): """run the network with the parameters.""" # # UNPACK DATA # x_train, y_train, x_val, y_val, x_test, y_test = data['cnc']['train_and_test'] x_train_unlabeled, _, x_train_labeled, y_train_labeled = data['cnc'][ 'train_unlabeled_and_labeled'] x_val_unlabeled, _, _, _ = data['cnc']['val_unlabeled_and_labeled'] if 'siamese' in params['affinity']: pairs_train, dist_train, pairs_val, dist_val = data['siamese'][ 'train_and_test'] x = np.concatenate((x_train, x_val, x_test), axis=0) y = np.concatenate((y_train, y_val, y_test), axis=0) if x_train_labeled: y_train_labeled_onehot = OneHotEncoder().fit_transform( y_train_labeled.reshape(-1, 1)).toarray() else: y_train_labeled_onehot = np.empty((0, len(np.unique(y)))) # # SET UP INPUTS # # create true y placeholder (not used in unsupervised training) y_true = tf.placeholder( tf.float32, shape=(None, params['n_clusters']), name='y_true') batch_sizes = { 'Unlabeled': params['batch_size'], 'Labeled': params['batch_size'] } input_shape = x.shape[1:] # inputs to CNC inputs = { 'Unlabeled': Input(shape=input_shape, name='UnlabeledInput'), 'Labeled': Input(shape=input_shape, name='LabeledInput'), } # # DEFINE AND TRAIN SIAMESE NET # http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf. # DEFINE AND TRAIN Siamese NET if params['affinity'] == 'siamese': siamese_net = networks.SiameseNet(inputs, params['siam_arch'], params.get('siam_reg'), params['main_path'], y_true) siamese_net.train(pairs_train, dist_train, pairs_val, dist_val, params['siam_lr'], params['siam_drop'], params['siam_patience'], params['siam_ne'], params['siam_batch_size'], params['dset']) else: siamese_net = None # # DEFINE AND TRAIN CNC NET # cnc_net = networks.CncNet(inputs, params['cnc_arch'], params.get('cnc_reg'), y_true, y_train_labeled_onehot, params['n_clusters'], params['affinity'], params['scale_nbr'], params['n_nbrs'], batch_sizes, params['result_path'], params['dset'], siamese_net, x_train, params['cnc_lr'], params['cnc_tau'], params['bal_reg']) cnc_net.train(x_train_unlabeled, x_train_labeled, x_val_unlabeled, params['cnc_drop'], params['cnc_patience'], params['min_tem'], params['cnc_epochs']) # # EVALUATE # x_cncnet = cnc_net.predict(x) prediction = np.argmax(x_cncnet, 1) accuray_all = print_accuracy(prediction, y, params['n_clusters']) nmi_score_all = nmi(prediction, y) print('NMI: {0}'.format(np.round(nmi_score_all, 3))) if params['generalization_metrics']: x_cncnet_train = cnc_net.predict(x_train_unlabeled) x_cncnet_test = cnc_net.predict(x_test) prediction_train = np.argmax(x_cncnet_train, 1) accuray_train = print_accuracy(prediction_train, y_train, params['n_clusters']) nmi_score_train = nmi(prediction_train, y_train) print('TRAIN NMI: {0}'.format(np.round(nmi_score_train, 3))) prediction_test = np.argmax(x_cncnet_test, 1) accuray_test = print_accuracy(prediction_test, y_test, params['n_clusters']) nmi_score_test = nmi(prediction_test, y_test) print('TEST NMI: {0}'.format(np.round(nmi_score_test, 3))) with gfile.Open(params['result_path'] + 'results', 'w') as f: f.write(accuray_all + ' ' + accuray_train + ' ' + accuray_test + '\n') f.write( str(np.round(nmi_score_all, 3)) + ' ' + str(np.round(nmi_score_train, 3)) + ' ' + str(np.round(nmi_score_test, 3)) + '\n') else: with gfile.Open(params['result_path'] + 'results', 'w') as f: f.write(accuray_all + ' ' + str(np.round(nmi_score_all, 3)) + '\n')
sigma_fac, aff_fac = compute_affinity(data_fac,flag_sigma=flag_sigma,\ sigma=422.6228,nn=8) print "kernel computing finished" if flag_sigma == 'local': sigma_fou_init = sum(sigma_fou**2)/len(sigma_fou) sigma_fac_init = sum(sigma_fac**2)/len(sigma_fac) K = 10 label_true = [] for i in range(K): for j in range(200): label_true.append(i) # Spectral Clustering: Fourier coefficient label_fou = spectral_clustering(aff_fou,n_clusters=K) nmi_fou = nmi(label_fou,label_true) print "NMI(Source 1)",nmi_fou # SC: Autocorrelation Profile label_fac = spectral_clustering(aff_fac,n_clusters=K) nmi_fac = nmi(label_fac,label_true) print "NMI(Source 2)",nmi_fac # kernel addition for alpha in np.arange(0.1,1.0,0.1): aff_add = alpha*aff_fou+(1-alpha)*aff_fac label_add = spectral_clustering(aff_add,n_clusters=K) nmi_add = nmi(label_add,label_true) print "NMI(a*source_1+(1-a)*source_2)",(alpha,nmi_add) # Parameter settings