def rand(l1, l2): """ Compute the Adjusted Rand Index between two clusterings. Parameters: - - - - - l1, l2 : array cluster assignments """ return ars(l1, l2)
def matchClusteringLabels(filename): text = [ line.strip() for line in open('/home/haohanw/metagenomics/clusteringResult_' + filename + '_filterMouse.txt') ] result1 = {} for line in text: items = line.split('\t') result1[items[0][1:]] = int(items[1]) rs1 = [] rs2 = [] text = [ line.strip() for line in open('/home/haohanw/metagenomics/Kaiju/' + filename + '.label') ] result2 = {} for line in text: items = line.split('\t') rs1.append(int(items[1])) rs2.append(result1[items[0]]) result2[items[0]] = int(items[1]) print 'organizing results, now calculating' a = ars(rs1, rs2) print 'final score', abs(a) f = open( '/home/haohanw/metagenomics/clusteringCompare_' + filename + '.txt', 'w') sorted_result1 = sorted(result1.items(), key=operator.itemgetter(1)) for (n, v) in sorted_result1: if n in result2: f.writelines(n + '\t' + str(v) + '\t' + str(result2[n]) + '\n') f.close()
def train(self): step = 0 sum = 0 try: while not self.coord.should_stop(): step += 1 loss, _, norm = self.sess.run([self.loss, self.opt, self.norm]) sum += loss if step % 500 == 0: assignments, = self.sess.run( [self.assignments], feed_dict={self.test_images: self.raw_images}) print('%s\t%0.4f\t%0.4f\t%0.4f\t%0.4f\t%0.4f\t%0.4f' % (step, ars(self.labels, assignments), mis(self.labels, assignments), v_score(self.labels, assignments), self.purity_score(self.labels, assignments), norm, sum / step)) except tf.errors.OutOfRangeError: print 'Done training' finally: self.coord.request_stop() self.coord.join(self.threads)
model_2._fit_single(X, random_state=None) # In[5]: model_2.fit(X) # In[8]: model_2.row_labels_ # In[9]: predicted_labels_2 = model_2.row_labels_ print(nmi(true_labels, predicted_labels_2), acc(true_labels, predicted_labels_2), ars(true_labels, predicted_labels_2), amis(true_labels, predicted_labels_2)) # In[11]: model_5 = NMTFcoclus_ONM3F.ONM3F(n_row_clusters=4, n_col_clusters=4) model_5.fit(X) # In[15]: predicted_labels_5 = model_5.row_labels_ print(nmi(true_labels, predicted_labels_5), acc(true_labels, predicted_labels_5), ars(true_labels, predicted_labels_5), amis(true_labels, predicted_labels_5))
X = np.append(X, noise, axis = 1) X= normalize(X) # Y = SelfOrganizingSwarm(iterations=250, alpha=1, beta = 0.9,delta=0.001, theta=3).fit_transform(X) # Y = PCA(2).fit_transform(X) # Y =TSNE().fit_transform(X) Y= GSOM().fit_transform(X, lr = 1.0, beta=0.5, sf=0.6, wd=0.175, fd=0.8)#X,lr = 1.0, beta=0.0,sf=0.01, fd=0.75, wd=0.5) # fig = plt.figure() # ax = Axes3D(fig)00 # ax.scatter(X.T[0], X.T.[1], X.T[2],c = color, alpha=0.5, edgecolors='none') # plt.show() plt.subplot(211) # ax = fig.add_subplot(211) plt.scatter(Y.T[0], Y.T[1], s = 15, c = plt.cm.jet(color/(n_clusters*1.0)), edgecolors='none', alpha=0.375) labs = KMeans(n_clusters).fit(Y).labels_ plt.subplot(212) plt.scatter(Y.T[0], Y.T[1], s = 15, c =plt.cm.jet(labs/(n_clusters*1.0)), edgecolors='none', alpha=0.375) print 'ars ', ars(color,labs) print 'ami ', ami(color, labs) # # Y = Isomap().fit_transform(X) # ax2 = fig.add_subplot(121) # ax2.scatter(Y.T[0], Y.T[1], c = color, edgecolors='none', alpha=0.5) plt.show()
import numpy as np import pandas as pd from sklearn.cluster import KMeans from sklearn.metrics import adjusted_rand_score as ars digits_train = pd.read_csv('./data/optdigits.tra', header=None) digits_test = pd.read_csv('./data/optdigits.tes', header=None) # 0-63 features, 64 target X_train = digits_train[np.arange(64)] y_train = digits_train[64] X_test = digits_test[np.arange(64)] y_test = digits_test[64] kmeans = KMeans(n_clusters=10) kmeans.fit(X_train) y_pred = kmeans.predict(X_test) print(ars(y_test, y_pred))