예제 #1
0
def measure(FC, GC, SL, TL):
    nmi_s = NMI(SL, FC)
    nmi_t = NMI(TL, GC)
    ari_s = ARI(SL, FC)
    ari_t = ARI(TL, GC)

    # print(len(set(FC)), len(set(GC)))
    pri_s = purity(FC, SL)
    pri_t = purity(GC, TL)
    ps, rs, fs = PRF1(FC, SL)
    pt, rt, ft = PRF1(GC, TL)

    perform_source = [nmi_s, ari_s, pri_s, ps, rs, fs]
    perform_target = [nmi_t, ari_t, pri_t, pt, rt, ft]
    return perform_source, perform_target
예제 #2
0
def cluster_scores(latent_space, K, labels_true):
    labels_pred = KMeans(K).fit_predict(latent_space)
    return [
        silhouette_score(latent_space, labels_true),
        NMI(labels_true, labels_pred),
        ARI(labels_true, labels_pred)
    ]
예제 #3
0
def calc_NMI(G):
    # top と bottom 両方を計算して返す
    top = {n: d for n, d in G.nodes(data=True) if d['bipartite'] == 0}
    bottom = {n: d for n, d in G.nodes(data=True) if d['bipartite'] == 1}
    pred_top = []
    truth_top = []
    for n, d in top.items():
        pred_top.append(d['label'])
        truth_top.append(d['community'])
    pred_bottom = []
    truth_bottom = []
    for n, d in bottom.items():
        pred_bottom.append(d['label'])
        truth_bottom.append(d['community'])
    return NMI(truth_top, pred_top, average_method='arithmetic'), \
           NMI(truth_bottom, pred_bottom, average_method='arithmetic')
예제 #4
0
    def test(self, embed):
        acc_scores = []
        nmi_scores = []
        ami_scores = []
        ari_scores = []
        true_labels = self.data.labels.cpu()
        for _ in range(10):
            if self.clustering == 'kmeans':
                pred = KMeans(
                    n_clusters=self.data.num_classes).fit_predict(embed)
            else:
                pred = SpectralClustering(
                    n_clusters=self.data.num_classes).fit_predict(embed)
            acc = self.accuracy(pred)
            nmi = NMI(true_labels, pred, average_method='arithmetic')
            ami = AMI(true_labels, pred, average_method='arithmetic')
            ari = ARI(true_labels, pred)
            acc_scores.append(acc)
            nmi_scores.append(nmi)
            ami_scores.append(ami)
            ari_scores.append(ari)

        print("ACC", mean(acc_scores), std(acc_scores))
        print("NMI", mean(nmi_scores), std(nmi_scores))
        print("AMI", mean(ami_scores), std(ami_scores))
        print("ARI", mean(ari_scores), std(ari_scores))
예제 #5
0
    def clustering_scores(self,
                          name,
                          verbose=True,
                          prediction_algorithm='knn'):
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = get_latent(self.model, self.data_loaders[name])
            if prediction_algorithm == 'knn':
                labels_pred = KMeans(self.gene_dataset.n_labels,
                                     n_init=200).fit_predict(
                                         latent)  # n_jobs>1 ?
            elif prediction_algorithm == 'gmm':
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels,
                                                         labels_pred)[0]
            if verbose:
                print(
                    "Clustering Scores for %s:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                    % (name, asw_score, nmi_score, ari_score, uca_score))
            return asw_score, nmi_score, ari_score, uca_score
예제 #6
0
 def get_performance(self, y_true, y_pred):
     purity = self.get_purity(y_true, y_pred)
     from sklearn.metrics import normalized_mutual_info_score as NMI
     from sklearn.metrics import adjusted_rand_score as ARI
     nmi = NMI(y_true, y_pred)
     ari = ARI(y_true, y_pred)
     return purity, nmi, ari
예제 #7
0
def compute_dist():
	with open(path + 'Trapnell_TCC_pairwise_distance_dge.dat','wb') as outfile:
    	pickle.dump(D, outfile)

path = '/home/zgy_ucla_cs/Research/singleCell/TCC_old_pipeline/scRNA-Clustering/Trapnell_pipeline/'

with open(path + "Trapnell_TCC_pairwise_distance_21.dat", 'rb') as f:
    D=pickle.load(f, encoding='latin1')

with open(path + "Trapnell_TCC_pairwise_distance_31.dat", 'rb') as f:
    D=pickle.load(f, encoding='latin1')    

cluster_labels = np.loadtxt(path + 'Trapnells_data/Trapnell_labels.txt',dtype=str).astype(int)-1    
num_of_clusters=3
similarity_mat= D.max()-D
labels_spectral = spectral(num_of_clusters,similarity_mat)
print(NMI(cluster_labels, labels_spectral), ARI(cluster_labels, labels_spectral))  

# ===================== scVI =====================
# expression_train, expression_test, cluster_labels, c_test = train_test_split(X, X_type, random_state=0)


batch_size = 128
learning_rate = 0.001
epsilon = 0.01
latent_dimension = 10





tf.reset_default_graph()
expression = tf.placeholder(tf.float32, (None, X.shape[1]), name='x')
kl_scalar = tf.placeholder(tf.float32, (), name='kl_scalar')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=epsilon)
training_phase = tf.placeholder(tf.bool, (), name='training_phase')

# getting priors
log_library_size = np.log(np.sum(X, axis=1))
mean, var = np.mean(log_library_size), np.var(log_library_size)

# loading data
model = scVI.scVIModel(expression=expression, kl_scale=kl_scalar, \
                         optimize_algo=optimizer, phase=training_phase, \
                          library_size_mean=mean, library_size_var=var, n_latent=latent_dimension)

#starting computing session
sess = tf.Session()

 # Initialize the graph and fit the training set
# this takes less than a minute on a Tesla K80
sess.run(tf.global_variables_initializer())
result = train_model(model, (X, X), sess, 250, batch_size=batch_size)

dic_full = {expression: X, training_phase:False}
latent = sess.run(model.z, feed_dict=dic_full)
# clustering_score = cluster_scores(latent, len(cell_types), cluster_labels)
clustering_score = cluster_scores(latent, np.max(cluster_labels), cluster_labels)
print("Silhouette", clustering_score[0], "\nAdjusted Rand Index", clustering_score[1], \
        "\nNormalized Mutual Information", clustering_score[2])
예제 #8
0
def evaluate(net, loader):
    """evaluates on provided data
    """

    net.eval()
    predicts = np.zeros(len(loader.dataset), dtype=np.int32)
    labels = np.zeros(len(loader.dataset), dtype=np.int32)
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(loader):
            logger.progress('processing %d/%d batch' %
                            (batch_idx, len(loader)))
            inputs = inputs.to(cfg.device, non_blocking=True)
            # assuming the last head is the main one
            # output dimension of the last head
            # should be consistent with the ground-truth
            logits = net(inputs)[-1]
            start = batch_idx * loader.batch_size
            end = start + loader.batch_size
            end = min(end, len(loader.dataset))
            labels[start:end] = targets.cpu().numpy()
            predicts[start:end] = logits.max(1)[1].cpu().numpy()

    # compute accuracy
    num_classes = labels.max().item() + 1
    count_matrix = np.zeros((num_classes, num_classes), dtype=np.int32)
    for i in xrange(predicts.shape[0]):
        count_matrix[predicts[i], labels[i]] += 1
    reassignment = np.dstack(
        linear_sum_assignment(count_matrix.max() - count_matrix))[0]
    acc = count_matrix[reassignment[:, 0], reassignment[:, 1]].sum().astype(
        np.float32) / predicts.shape[0]
    return acc, NMI(labels, predicts), ARI(labels, predicts)
예제 #9
0
    def score_function(self, output):
        model, data = self.model, self.data
        loss = model.loss_function(data, output)
        error = self.mse(output['adj_recon'], data.adjmat)
        pred = model.classify(data)
        nmi = NMI(data.labels.cpu(), pred, average_method='arithmetic')

        return {'loss': loss, 'error': error, 'nmi': nmi}
예제 #10
0
def test_NMI():

    label_true = np.asarray([0, 1, 2, 3])
    label_pred = np.asarray([0, 1, 2, 3])

    expected = NMI(label_true, label_pred)
    actual = statistics.NMI(label_true, label_pred)

    assert expected == actual

    label_true = np.asarray([0, 0, 0, 3])
    label_pred = np.asarray([0, 2, 2, 2])

    expected = NMI(label_true, label_pred)
    actual = statistics.NMI(label_true, label_pred)

    assert expected == actual
예제 #11
0
 def eval_output(self):
     if self.cost:
         print(
             f"The final cost reduction is {round((self.cost[-2]-self.cost[-1])/self.cost[-2]*100, 2)}%"
         )
     if self.location == 'python':
         final_assign = self.label[-1, :]
     else:
         final_assign = self.label
     nmi = NMI(self.gt_par, final_assign)
     print(f"The final NMI score is {round(nmi, 4)}")
     return nmi
예제 #12
0
def Lda_topic_model(docs, dictionary, nb_topics, true_labels):
    k = 5
    lda = LdaModel(docs, num_topics=k, id2word=dictionary, passes=10)

    top_words = [[word[::-1] for word, _ in lda.show_topic(topic_id, topn=50)]
                 for topic_id in range(lda.num_topics)]
    top_betas = [[beta for _, beta in lda.show_topic(topic_id, topn=50)]
                 for topic_id in range(lda.num_topics)]
    nb_words = 12
    f, ax = plt.subplots(3, 2, figsize=(20, 15))
    for i in range(nb_topics):
        # ax = plt.subplot(gs[i])
        m, n = np.unravel_index(i,
                                shape=(3,
                                       2))[0], np.unravel_index(i,
                                                                shape=(3,
                                                                       2))[1]
        ax[m, n].barh(range(nb_words),
                      top_betas[i][:nb_words],
                      align='center',
                      color='green',
                      ecolor='black')
        ax[m, n].invert_yaxis()
        ax[m, n].set_yticks(range(nb_words))
        ax[m, n].set_yticklabels(top_words[i][:nb_words])
        ax[m, n].set_title("Topic " + str(i))
    plt.show()
    # get distribution of docs on topics.
    dist_on_topics = lda.get_document_topics(docs)
    topic_predict = []
    for d in dist_on_topics:
        p = 0
        win_topic = 0
        print(d)
        for i, t in enumerate(d):
            if t[1] > p:
                p = t[1]
                win_topic = t[0]
        print(win_topic)
        topic_predict.append(win_topic)
    mat = confusion_matrix(true_labels, topic_predict)
    print(mat)
    cluster_to_class = {}
    for i in range(5):
        cluster_to_class[i] = np.argmax(mat[:, i])
    custom_labels = [cluster_to_class[c] for c in topic_predict]
    print("accuracy:", accuracy_score(true_labels, custom_labels))
    print("f1_score micro: ",
          f1_score(true_labels, custom_labels, average='micro'))
    print("f1_score: macro",
          f1_score(true_labels, custom_labels, average='macro'))
    print("NMI", NMI(true_labels, custom_labels))
예제 #13
0
def analysis():
    df_gan = pd.read_csv('result/pbmc_two_batch-cluster_result.csv',
                         delimiter=',')
    df_lsi = pd.read_csv('result/pbmc_two_batch-LSI-cluster_result.csv',
                         delimiter=',')

    labels = df_lsi['predicted label'].values
    labels_pred = df_gan['predicted label'].values

    nmi_score = NMI(labels, labels_pred)
    ari_score = ARI(labels, labels_pred)
    print("Clustering Scores:\nNMI: %.4f\nARI: %.4f\n" %
          (nmi_score, ari_score))
예제 #14
0
 def clustering_scores(self, name, verbose=True):
     if self.gene_dataset.n_labels > 1:
         latent, _, labels = get_latent(self.model, self.data_loaders[name])
         labels_pred = KMeans(self.gene_dataset.n_labels,
                              n_init=200).fit_predict(latent)  # n_jobs>1 ?
         asw_score = silhouette_score(latent, labels)
         nmi_score = NMI(labels, labels_pred)
         ari_score = ARI(labels, labels_pred)
         if verbose:
             print(
                 "Clustering Scores for %s:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f"
                 % (name, asw_score, nmi_score, ari_score))
         return asw_score, nmi_score, ari_score
예제 #15
0
def get_performance(y_true, y_pred, n_cluster):
    """
    获取当前轮次的评估指标
    :param y_true:
    :param y_pred:
    :param n_cluster:
    :return:
    """
    purity = get_purity(y_true, y_pred, n_cluster)
    from sklearn.metrics import normalized_mutual_info_score as NMI
    from sklearn.metrics import adjusted_rand_score as ARI
    nmi = NMI(y_true, y_pred)
    ari = ARI(y_true, y_pred)
    return purity, nmi, ari
예제 #16
0
def log_NMI_AC(cp, ground_truth, dataset, log_flag, SEED=1234):
    # Init clustering hyperparameters
    n_clusters = cp.getint('Hyperparameters', 'ClusterNum')
    cluster_init = cp.getint('Hyperparameters', 'ClusterInit')
    # KMeans model
    #  km = KMeans(n_clusters=n_clusters, n_init=cluster_init, n_jobs=-1,
    #  random_state=SEED)
    km = AC(n_clusters=n_clusters)
    if isinstance(dataset, basestring):
        pred = km.fit_predict(np.load(dataset))
    else:
        pred = km.fit_predict(dataset)
    log('--------------- {} {} ------------------------'.format(
        log_flag, NMI(ground_truth, pred)))
예제 #17
0
 def eval_adv_efficiency(self, Y, Yadv):
     score = float("inf")
     if self.objective == "frobenius":
         y = self.one_hot(Y)
         yadv = self.one_hot(Yadv)
         score = -torch.norm(y @ torch.t(y) - yadv @ torch.t(yadv),
                             p=2) / len(Y)
     elif self.objective == "AMI":
         score = max(AMI(Y.cpu(), Yadv.cpu()), 0.0)
     elif self.objective == "NMI":
         score = max(NMI(Y.cpu(), Yadv.cpu()), 0.0)
     elif self.objective == "accuracy":
         score = AC(Y.cpu(), Yadv.cpu())
     return score
예제 #18
0
def kmean_clustering(doc_vectors, true_labels):
    k_mean = KMeans(n_clusters=5)
    k_mean.fit(doc_vectors)
    predict_label = k_mean.labels_
    mat = confusion_matrix(true_labels, predict_label)
    print(mat)
    cluster_to_class = {}
    for i in range(5):
        cluster_to_class[i] = np.argmax(mat[:, i])
    custom_labels = [cluster_to_class[c] for c in predict_label]
    print("accuracy:", accuracy_score(true_labels, custom_labels))
    print("f1_score micro: ",
          f1_score(true_labels, custom_labels, average='micro'))
    print("f1_score: macro",
          f1_score(true_labels, custom_labels, average='macro'))
    print("NMI", NMI(true_labels, predict_label))
예제 #19
0
    def score(self, z, true_labels):
        assert self.labels is not None, "Cannot compute clustering scores before fitting the data."

        self.silhouette_score = silhouette_score(z, self.labels)
        self.nmi = NMI(
            true_labels, self.labels,
            average_method="geometric")  # Same average as original paper
        self.ari = ARI(true_labels, self.labels)

        true_k = len(np.unique(true_labels))
        if self.k == true_k:
            self.accuracy = accuracy(true_labels, self.labels)
        else:
            print(
                "Fitted number of labels ({}) is not equal to given true number of labels ({}). Cannot "
                "compute accuracy.".format(self.k, true_k))
예제 #20
0
    def clustering_scores(self, prediction_algorithm="knn"):
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(self.gene_dataset.n_labels, n_init=200).fit_predict(latent)  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug("Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f" %
                         (asw_score, nmi_score, ari_score, uca_score))
            return asw_score, nmi_score, ari_score, uca_score
예제 #21
0
파일: main.py 프로젝트: varma-ds/OTDA
def main():
    data_df = readdata(dataFolder / corpus)
    data_df = shuffle(data_df, random_state=0)
    texts = data_df['text']
    print(len(texts))

    texts = preprocessortext(texts)
    texts = filter_text(texts)

    vectorizer, vocab = getvocabulary(texts)
    print('vocab', len(vocab))

    doctopic = None
    model = OTDA(len(vocab),
                 num_topics,
                 alpha=0.1,
                 beta=0,
                 max_iter=100,
                 max_err=0.0001,
                 fix_seed=True)

    topic_term_hist = []
    for batch_texts in generate_batches(texts, batch_size):
        docterm_m = get_docterm_matrix(batch_texts, vocab, method='tf')
        print(docterm_m.shape)
        prevS = 0
        W, H, prevS = otda(docterm_m, model, prevS)
        print(W.shape, H.shape)

        if isinstance(doctopic, type(None)) == True:
            doctopic = H
        else:
            doctopic = np.vstack((doctopic, H))

        topic_term_hist.append(W)

    cluster = np.argmax(doctopic, axis=1)
    nmi = NMI(data_df["Tag1"], cluster, average_method='arithmetic')
    print('NMI', nmi)

    emerging_topics = detect_emergingtopics(topic_term_hist, 90)
    print('emerging', emerging_topics)
예제 #22
0
def test_clustering(data, Y, coltype):
    nmis = []
    times = []
    n_clusters = len(set(Y))
    for i in range(20):
        cluster = AgglomerativeClustering(n_clusters=n_clusters,
                                          affinity="precomputed",
                                          linkage="average")
        start = time.time()
        sim = build_ensemble_inc(data, coltypes=coltype)
        duration = time.time() - start
        times.append(duration)
        distance = sim_to_dist(sim)
        predicted = cluster.fit_predict(distance)
        nmis.append(NMI(Y, predicted))
    print("Summary of all the runs.\n")
    print(
        "Mean nmi : {0} (standard deviation : {1}), mean duration : {2} (standard deviation : {3}) \n"
        .format(np.mean(nmis), np.std(nmis), np.mean(duration),
                np.std(duration)))
    print("----------\n")
예제 #23
0
def clustering_scores(X, y, prediction_algorithm='knn'):
    from sklearn.metrics import adjusted_rand_score as ARI
    from sklearn.metrics import normalized_mutual_info_score as NMI
    from sklearn.metrics import silhouette_score
    from sklearn.mixture import GaussianMixture as GMM
    from sklearn.cluster import KMeans

    cluster_num = np.unique(y).shape[0]
    if prediction_algorithm == 'knn':
        labels_pred = KMeans(cluster_num, n_init=200).fit_predict(X)
    elif prediction_algorithm == 'gmm':
        gmm = GMM(cluster_num)
        gmm.fit(X)
        labels_pred = gmm.predict(X)
    labels = y
    asw_score = silhouette_score(X, labels)
    nmi_score = NMI(labels, labels_pred)
    ari_score = ARI(labels, labels_pred)
    labels_int = convert_label_to_int(labels)
    uca_score = unsupervised_clustering_accuracy(labels_int, labels_pred)[0]
    return asw_score, nmi_score, ari_score, uca_score
예제 #24
0
# Load dataset embeddings and labels.
obj = np.load('mit67_embeddings.npz')
labels = obj['labels']
embeddings = obj['embeddings']

# Set up the subset of labels to visualize.
n_labels = 5
_uniq = np.unique(labels)[:n_labels]
print('The {n} labels selected to clusterize:'.format(n=n_labels), _uniq)

# Create the embeddings matrix (and its corresponding labels list)
# containing instances belonging to labels in the previous subset.
subsets_emb = np.zeros((0, 4096))
true_labels = []
for idx, _lab in enumerate(_uniq):
    part_emb = embeddings[np.where(labels == _lab)]
    subsets_emb = np.concatenate((subsets_emb, part_emb))
    true_labels += [_lab for _ in range(part_emb.shape[0])]
true_labels = np.array(true_labels)

# Use a dimensionality reduction technique (e.g., PCA).
reduced_emb = PCA(n_components=100).fit_transform(subsets_emb)

# Apply a clustering technique (e.g., KMeans) and evaluate its performance with
# a metric (e.g., Normalized Mutual Information).
pred_labels = KMeans(n_clusters=n_labels,
                     random_state=0).fit_predict(reduced_emb)
nmi = NMI(true_labels, pred_labels)
print('NMI score:', nmi)
예제 #25
0
파일: run_spectral.py 프로젝트: mlzxy/dac
args, _ = parser.parse_known_args()
print(str(args))

benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile))
accm = Accumulator('ari', 'nmi', 'et')
for batch in tqdm(benchmark):
    B = batch['X'].shape[0]
    for b in range(B):
        X = to_numpy(batch['X'][b])
        true_labels = to_numpy(batch['labels'][b].argmax(-1))
        true_K = len(np.unique(true_labels))

        tick = time.time()
        spec = SpectralClustering(n_clusters=true_K,
                                  affinity='nearest_neighbors',
                                  n_neighbors=10).fit(X)
        labels = spec.labels_

        accm.update([
            ARI(true_labels, labels),
            NMI(true_labels, labels, average_method='arithmetic'),
            time.time() - tick
        ])

save_dir = os.path.join(results_path, 'baselines', 'mmaf_spectral')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
logger = get_logger('spectral_baseline', os.path.join(save_dir, args.filename))
logger.info(accm.info())
예제 #26
0
	def optimize(self):
		it=0
		tm=[]
		md=[]
		nmi=[]
		ittr=[]
		com=[]	
		for itr in range(20):			
			self.__init__()
			startTime = time.time()
			self.Input_Graph()
			self.bats_init()
			#for i in self.bats:
			#	print(i.node)
			t=self.G.number_of_nodes()
			vel=[]
			for ll in range(t):
				vel.append(0)

			self.velocity=vel
			#print(self.velocity)
			#self.best_positions=vel;
			#self.best_p=vel
			#print('bf',self.velocity)
			self.best_bats()
			
			#print(self.velocity)
			#print(self.best_positions)
			#print('af',self.velocity)
			print("iteration : ",(itr+1))
			for i in range(self.iteration):
				#print('af',self.velocity)
				#print("Iteration : %d"%(i+1),end='\r')
				#print("iteration : ",(i+1))
				inc=0
				for p in self.bats:
					#self.velocity=vel
					#print(p.node)
					#print('bf',self.best_positions)
					
					self.updatevelocity(p)
					#print('af',self.best_positions)
					t1=self.updatepos(p)
					
					y=self.rearrange(t1)

					if(round(np.random.uniform(0,1),2)>self.R):
						#t2=self.eq_4()
						#y=self.rearrange(t2)
						y=self.eq_4()
					
					rd=round(np.random.uniform(0,1),2)
					fnew=self.fitness(y)
					#print(fnew)
					fi=self.bats_fitness_value[inc]
					bp=[]
					if(rd<self.A and fi<fnew):
						#print("p ",p.node)
						#print("y ",y.node)
						p=y.copy()					
						self.bats_fitness_value[inc] = fnew
						#self.best_p = 
						#self.bats.bats_fitness_values[(self.bats_fitness_values.keys())[inc]]=fnew
						it=i	
						#print(fnew)
						self.best_positions = [y.node[nd]['pos'] for nd in y]
						self.best_p[inc] = [y.node[nd]['pos'] for nd in y] # this is an array of positions
						#self.best_p[inc] = []

						#if (self.pos_modularity<fnew):   # here comparison with old fitness if good then position vector and 
						#	self.pos_modularity=fnew     # modularity is update
							#fit = self.fitness(y)
							#print()
						#	self.best_p = [y.node[nd]['pos'] for nd in y]
							#print(self.best_p)

						#print(self.best_positions)
						#print("communites : ",len(set(self.best_positions)))
						#print(self.best_positions)
						self.increase_r(i)
						self.decrease_a()
					inc+=1
	                        
				fmax=max(self.bats_fitness_value)
				#print(' max ',self.pos_modularity)
				#print(self.best_p)
				#fmax=max(self.bats_fitness_values.keys())
				#print(self.bats_fitness_value)
				#print(self.best_positions)
				pos=self.best_p[self.bats_fitness_value.index(fmax)] 
				self.positions=pos
				#print("communites : ",len(set(pos)))
				#print("Position : ",self.best_positions)
				
			print(fmax)
			n=NMI(list(self.pred.values()),pos)
			tm.append(float(format(np.round((time.time() - startTime),2))))
			md.append(fmax)
			nmi.append(n)
			#ittr.append(it)
			com.append(len(set(pos)))			
		print("\n\n**********************************************************")	
		print('\nThe script take {0} second ',tm)
		print("\nModularity is : ",md)
		print("\nNMI : ",nmi)			
		print("\nNumber of Communites : ",com)
			#print("\nGlobal Best Position : ",pos)
		#nx.draw(self.G,node_color=[self.G.node[i]['pos'] for i in self.G])
		nx.draw(self.G, node_color=pos)
		plt.show()
예제 #27
0
def evaluate(net, loader, writer, epoch):
    """evaluates on provided data
    """

    net.eval()
    predicts = np.zeros(len(loader.dataset), dtype=np.int32)
    labels = np.zeros(len(loader.dataset), dtype=np.int32)
    intermediates = np.zeros((len(loader.dataset), 2048), dtype=np.float32)
    images = np.zeros((len(loader.dataset), 3, 64, 64), dtype=np.float32)

    print(f"Evaluating on {len(loader.dataset)} samples")

    with torch.no_grad():
        for batch_idx, (batch, targets) in enumerate(loader):
            # logger.progress('processing %d/%d batch' % (batch_idx, len(loader)))
            batch = batch.to(cfg.device, non_blocking=True)
            # assuming the last head is the main one
            # output dimension of the last head
            # should be consistent with the ground-truth
            logits = net(batch, -1)
            start = batch_idx * loader.batch_size
            end = start + loader.batch_size
            end = min(end, len(loader.dataset))
            labels[start:end] = targets.cpu().numpy()
            predicts[start:end] = logits.max(1)[1].cpu().numpy()

            if epoch % cfg.embedding_freq == 0:
                intermediates[start:end] = net(batch, -1, True).cpu().numpy()
                if not cfg.tfm_adaptive_thresholding:
                    for i in range(3):
                        batch[:, i] = (batch[:, i] *
                                       cfg.tfm_stds[i]) + cfg.tfm_means[i]
                images[start:end] = torch.nn.functional.interpolate(
                    batch, size=(64, 64), mode='bicubic',
                    align_corners=False).cpu().numpy()

    # TODO: Gather labels and predicts
    # compute accuracy
    num_classes = labels.max().item() + 1
    count_matrix = np.zeros((num_classes, num_classes), dtype=np.int32)
    for i in range(predicts.shape[0]):
        count_matrix[predicts[i], labels[i]] += 1
    reassignment = np.dstack(
        linear_sum_assignment(count_matrix.max() - count_matrix))[0]
    acc = count_matrix[reassignment[:, 0], reassignment[:, 1]].sum().astype(
        np.float32) / predicts.shape[0]
    nmi = NMI(labels, predicts)
    ari = ARI(labels, predicts)

    # compute f1 scores per class
    predicts_reassigned = reassignment[predicts, 1]
    precision = precision_score(labels,
                                predicts_reassigned,
                                average=None,
                                zero_division=0)
    recall = recall_score(labels,
                          predicts_reassigned,
                          average=None,
                          zero_division=0)
    f1 = f1_score(labels, predicts_reassigned, average=None, zero_division=0)

    logger.info('Evaluation results at epoch %d are: '
                'ACC: %.3f, NMI: %.3f, ARI: %.3f' % (epoch, acc, nmi, ari))
    if cfg.local_rank == 0:
        writer.add_scalar('Evaluate/ACC', acc, epoch)
        writer.add_scalar('Evaluate/NMI', nmi, epoch)
        writer.add_scalar('Evaluate/ARI', ari, epoch)

        for i in range(len(f1)):
            writer.add_scalar(f'Evaluate/f1_{i}', f1[i], epoch)
            writer.add_scalar(f'Evaluate/precision_{i}', precision[i], epoch)
            writer.add_scalar(f'Evaluate/recall_{i}', recall[i], epoch)

        if epoch % cfg.embedding_freq == 0 and cfg.embedding_freq != -1:
            writer.add_embedding(intermediates, labels, images, epoch,
                                 cfg.session)

    return acc
예제 #28
0
for dataset in tqdm(benchmark):
    true_labels = to_numpy(dataset['labels'].argmax(-1))
    X = to_numpy(dataset['X'])
    ll = 0
    ari = 0
    nmi = 0
    mae = 0
    et = 0
    for b in range(len(X)):
        tick = time.time()
        vbmog.run(X[b], verbose=False)
        et += time.time() - tick
        ll += vbmog.loglikel(X[b])
        labels = vbmog.labels()
        ari += ARI(true_labels[b], labels)
        nmi += NMI(true_labels[b], labels, average_method='arithmetic')
        mae += abs(len(np.unique(true_labels[b])) - len(np.unique(labels)))

    ll /= len(X)
    ari /= len(X)
    nmi /= len(X)
    mae /= len(X)
    et /= len(X)

    accm.update([ll.item(), dataset['ll'], ari, nmi, mae, et])

save_dir = os.path.join(results_path, 'baselines', 'vbmog')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
logger = get_logger('vbmog_baseline', os.path.join(save_dir, args.filename))
logger.info(accm.info())
def main():

    ##divide image into patches(polygons) and get the positions of each one
    scene = cv2.imread('C:/Users/dario.dotti/Documents/LOST_dataset/camera001.jpg')
    list_poly = my_img_proc.divide_image(scene)



    with open('C:/Users/dario.dotti/Documents/LOST_dataset/8_2013-12_2012_camera001/raw_traj_organized_by_id.txt', 'rb') as handle:
        slices = pickle.load(handle)
    print 'data extracted'

    ##Feature Extraction METHOD: compute histogram of Oriented Tracklets for all the tracklets in every patch
    normalized_data_HOT,orig_list_id = main_camera017.histograms_of_oriented_trajectories(list_poly,slices)

    ##Feature Extraction METHOD: Compute the features velocity-acceleration-curvature to distinguish between pedestrian and cars
    #normalized_data_HOT = velo_acc_curva_feature(list_poly,slices)

    ####Experinment AUTOENCODER
    # ##3)load the model of the trained AE
    # if my_AE.load_training():
    #     print 'model loaded'
    #     weights = my_AE.get_weights()
    #     weights= weights.T

    ###)3a)load the model of the trained deep AE
    if my_AE.load_deep_AE():
        print 'model loaded'
        list_weights = my_AE.get_stacked_weigths([1,2])

        hid1_space = np.dot(normalized_data_HOT,list_weights[0])
        #hid2_space = np.dot(hid1_space,list_weights[1])


        weights= list_weights[1].T



    ####COMPUTE THE MOST SIMILAR FEATURE FOR EVERY WEIGHT

    #mostSimilar_feature_sorted,seeds_index = my_exp.get_most_similar_feature_from_AEweights(hid1_space,weights,orig_list_id,scene,slices)


    ###CLUSTERING

    ####seed
    # seeds = []
    # # ##get the traj with strongest weight and use it to initialize mean shift
    # map(lambda s:seeds.append(normalized_data_HOT[s]),seeds_index)
    # #
    # cluster_pred_AE,cluster_centers_seeds = my_exp.cluster_fit_predict(normalized_data_HOT,seeds)
    # freq_counter_seedsAE =  Counter(cluster_pred_AE)
    #
    #
    # print 'seeds from AE'
    # print freq_counter_seedsAE
    # #
    # main_camera017.color_traj_based_on_clusters(slices,cluster_pred_AE,freq_counter_seedsAE,scene)
#############



     ##raw features cluster
    cluster_pred_raw,cluster_centers_unseeds = my_exp.cluster_fit_predict(normalized_data_HOT,[])
    freq_counter_unseed =  Counter(cluster_pred_raw)
    print freq_counter_unseed

    # ##getting same number of cluster with raw features
    # temp_seeds = []
    # for trj_id in freq_counter_unseed.keys()[:len(freq_counter_seedsAE.keys())] :
    #     ##get index of every class
    #     index = np.where(cluster_pred_raw == trj_id)[0][0]
    #
    #     temp_seeds.append(normalized_data_HOT[index])
    #
    # cluster_pred_raw,cluster_centers_unseeds = my_exp.cluster_fit_predict(normalized_data_HOT,temp_seeds)
    # freq_counter_seeds_raw =  Counter(cluster_pred_raw)
    # print 'seeds from raw features'
    # print freq_counter_seeds_raw

    main_camera017.color_traj_based_on_clusters(slices,cluster_pred_raw,freq_counter_unseed,scene)

    ####SUPERVISED CLASSIFIER
    test_samples = []
    test_labels= []

    training_samples = []
    training_labels= []
    test_pos = []


    ##get test samples from every class
    for trj_c,trj_id in enumerate(freq_counter_unseed):
        ##get index of every class

        index = np.where(cluster_pred_raw == trj_id)[0]
        #take 10% of every class as test samples
        #print w[trj_c]
        random_index = random.sample(index,int((len(index)*0.15)))

        for i,c in enumerate(cluster_pred_raw):
            if i in random_index:
                test_samples.append(normalized_data_HOT[i])
                test_labels.append(trj_id)
                test_pos.append(i)
    print len(test_samples)

    for i,sample in enumerate(normalized_data_HOT):
        if i not in test_pos:
            training_samples.append(sample)
            training_labels.append(cluster_pred_raw[i])
    print np.array(training_samples).shape


    # #train Logistic regression classifier
    my_exp.logistic_regression_train(training_samples,training_labels)

    # #test
    pred = my_exp.logistic_regression_predict(test_samples)

    print NMI(test_labels,pred)
예제 #30
0
def clustering(Xsvd,
               cells,
               dataset,
               suffix,
               labels=None,
               tlabels=None,
               method='knn',
               istsne=True,
               name='',
               batch_labels=None,
               seed=42):
    tsne = TSNE(n_jobs=24).fit_transform(Xsvd)

    for n_components in [15]:
        if method == 'gmm':
            clf = mixture.GaussianMixture(n_components=n_components).fit(mat)
            labels_pred = clf.predict(tsne)
        elif method == 'knn':
            labels_pred = KMeans(n_components,
                                 n_init=200).fit_predict(tsne)  # n_jobs>1 ?
        elif method == 'dbscan':
            labels_pred = DBSCAN(eps=0.3, min_samples=10).fit(tsne).labels_
        elif method == 'spectral':
            spectral = cluster.SpectralClustering(n_clusters=n_components,
                                                  eigen_solver='arpack',
                                                  affinity="nearest_neighbors")
            labels_pred = spectral.fit_predict(tsne)
        elif method == 'louvain':
            from scipy.spatial import distance

            for louvain in [30]:
                print('****', louvain)
                mat = kneighbors_graph(Xsvd,
                                       louvain,
                                       mode='distance',
                                       include_self=True).todense()

                G = nx.from_numpy_matrix(mat)
                partition = community.best_partition(G, random_state=seed)

                labels_pred = []
                for i in range(mat.shape[0]):
                    labels_pred.append(partition[i])

                labels_pred = np.array(labels_pred)
                print('louvain', louvain, tsne[:5], len(labels),
                      len(labels_pred))
                #print(np.unique(labels_pred))

                if labels is not None:
                    nmi_score = NMI(labels, labels_pred)
                    ari_score = ARI(labels, labels_pred)
                    print(
                        n_components, method,
                        "Clustering Scores:\nNMI: %.4f\nARI: %.4f\n" %
                        (nmi_score, ari_score))

    if istsne:
        n_components = len(np.unique(labels_pred))
        vis_x = tsne[:, 0]
        vis_y = tsne[:, 1]
        colors = [
            'blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink',
            'yellow', 'black', 'teal', 'plum', 'tan', 'bisque', 'beige',
            'slategray', 'brown', 'darkred', 'salmon', 'coral', 'olive',
            'lightpink', 'teal', 'darkcyan', 'BlueViolet', 'CornflowerBlue',
            'DarkKhaki', 'DarkTurquoise'
        ]

        show_tsne(tsne,
                  labels,
                  'result/%s/%s-%s-LSI-true.png' % (dataset, name, suffix),
                  tlabels=tlabels)
        show_tsne(tsne, labels_pred,
                  'result/%s/%s-%s-LSI-pred.png' % (dataset, name, suffix))

        with open('result/%s-LSI-cluster_result.csv' % (dataset), 'w') as f:
            f.write('cell,predicted label,tsne-1,tsne-2\n')
            for cell, pred, t in zip(cells, labels_pred, tsne):
                f.write('%s,%d,%f,%f\n' % (cell, pred, t[0], t[1]))

    if batch_labels is not None:
        show_tsne(
            tsne, batch_labels, 'result/%s/%s-GMVAE-%s-%s-batch.png' %
            (dataset, dataset, suffix, name))