Beispiel #1
0
def pred(c1, name):
    #predict
    clustered = []
    count = 0
    for i in c1:
        num = c1[i]
        clustered.extend([count] * num)
        count += 1
    print("predict num cluster {}".format(len(Counter(clustered))))

    #labels
    labels = label_data[name]
    tmp = []
    count = 0
    for i, elem in enumerate(labels):
        num = len(labels[elem])
        tmp.extend([i] * num)
        if num == 1:
            count += 1
    labels = tmp
    c2 = Counter(labels)
    print("truth num cluster {}".format(len(Counter(labels))))

    prec, rec, f1 = pairwise_precision_recall_f1(clustered, labels)
    print('pairwise precision', '{:.5f}'.format(prec), 'recall',
          '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1))
Beispiel #2
0
    def train(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]):

        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]
        # nb_classes = len(set(rawlabels))

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]
        y_train = y_train[np.newaxis]
        y_val = y_val[np.newaxis]
        y_test = y_test[np.newaxis]
        y_all = y_all[np.newaxis]

        train_mask = train_mask[np.newaxis]
        val_mask = val_mask[np.newaxis]
        test_mask = test_mask[np.newaxis]
        all_mask = all_mask[np.newaxis]

        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in')
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')

            # forward
            logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels,
                                                               residual=residual, activation=nonlinearity, feature_size=ft_size)


            # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32)

            # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32)

            # cal masked_loss
            # lab_list = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ), name='lab_list')
            # ftr_resh = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='ftr_resh')
            log_resh = tf.reshape(logits, [-1, nb_classes])
            lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
            msk_resh = tf.reshape(msk_in, [-1])


            print ("final_embedding: checkout", final_embedding)
            print ("logits: checkout", logits)
            print ("log_resh: checkout", log_resh)
            # print ("ftr_resh: ", ftr_resh)
            print ("lab_resh: ", lab_resh)
            print ("fea_list: ", fea_list)
            print ("centers_embed: ", centers_embed)
            print ("batch_size, nb_nodes, nb_classes, ft_size", batch_size, nb_nodes, nb_classes, ft_size)

            osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes)
            osm_loss = osm_caa_loss.forward

            # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32)
            # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32)
            # log_resh: checkout Tensor("Reshape:0", shape=(286, 30), dtype=float32)
            # ftr_resh:  Tensor("ftr_resh:0", shape=(286, 100), dtype=float32)
            # lab_resh:  Tensor("Reshape_1:0", shape=(286, 30), dtype=int32)

            osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed)
            # osmLoss, checkvalue = osm_loss(metric_ftr_in, rawlabels, centers_embed)
            SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
            loss = osmLoss
            # 为什么loss会固定
            # loss = osmLoss
            # loss = SoftMaxloss

            accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
            # optimzie
            train_op = model.training(loss, lr, l2_coef)

            Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name)
            self.mkdir(Path)
            checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype)
            print('model: {}'.format(checkpt_file))
            saver = tf.train.Saver()

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            vlss_mn = np.inf
            vacc_mx = 0.0
            curr_step = 0

            with tf.Session(config=config) as sess:
                sess.run(init_op)

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

                for epoch in range(nb_epochs):
                    tr_step = 0

                    tr_size = fea_list[0].shape[0]
                    # ================   training    ============
                    while tr_step * batch_size < tr_size:
                        fd1 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size]
                               for i, d in zip(ftr_in_list, fea_list)}
                        fd2 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size]
                               for i, d in zip(bias_in_list, biases_list)}
                        fd3 = {lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size],
                               msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size],
                               metric_ftr_in: rawFeature,
                               is_train: True,
                               attn_drop: 0.6,
                               ffd_drop: 0.6}
                        fd = fd1
                        fd.update(fd2)
                        fd.update(fd3)
                        _, loss_value_tr, acc_tr, att_val_train = sess.run([train_op, loss, accuracy, att_val],
                                                                           feed_dict=fd)
                        test_check_value = sess.run(checkvalue, feed_dict=fd)
                        print ("test_check_value: ", test_check_value)

                        train_loss_avg += loss_value_tr
                        train_acc_avg += acc_tr
                        tr_step += 1


                    vl_step = 0
                    vl_size = fea_list[0].shape[0]
                    # =============   val       =================
                    while vl_step * batch_size < vl_size:
                        # fd1 = {ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size]}
                        fd1 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size]
                               for i, d in zip(ftr_in_list, fea_list)}
                        fd2 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size]
                               for i, d in zip(bias_in_list, biases_list)}
                        fd3 = {lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size],
                               msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size],
                               metric_ftr_in: rawFeature,
                               is_train: False,
                               attn_drop: 0.0,
                               ffd_drop: 0.0}

                        fd = fd1
                        fd.update(fd2)
                        fd.update(fd3)
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                                                         feed_dict=fd)
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                    # import pdb; pdb.set_trace()
                    print('Epoch: {}, att_val: {}'.format(epoch, np.mean(att_val_train, axis=0)))
                    print('Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f | vl_step: %d, tr_step: %d' %
                          (train_loss_avg / tr_step, train_acc_avg / tr_step,
                           val_loss_avg / vl_step, val_acc_avg / vl_step, vl_step, tr_step))

                    if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                        if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                            vacc_early_model = val_acc_avg / vl_step
                            vlss_early_model = val_loss_avg / vl_step
                            saver.save(sess, checkpt_file)
                        vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                        vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                        curr_step = 0
                    else:
                        curr_step += 1
                        if curr_step == patience:
                            print('Early stop! Min loss: ', vlss_mn,
                                  ', Max accuracy: ', vacc_mx)
                            print('Early stop model validation loss: ',
                                  vlss_early_model, ', accuracy: ', vacc_early_model)
                            break

                    train_loss_avg = 0
                    train_acc_avg = 0
                    val_loss_avg = 0
                    val_acc_avg = 0
                # check save
                saver.save(sess, checkpt_file)

                saver.restore(sess, checkpt_file)
                print('load model from : {}'.format(checkpt_file))
                ts_size = fea_list[0].shape[0]
                ts_step = 0
                ts_loss = 0.0
                ts_acc = 0.0

                while ts_step * batch_size < ts_size:
                    fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(ftr_in_list, fea_list)}
                    fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(bias_in_list, biases_list)}
                    fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size],
                           msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                           metric_ftr_in: rawFeature,
                          is_train: False,
                          attn_drop: 0.0,
                          ffd_drop: 0.0}

                    fd = fd1
                    fd.update(fd2)
                    fd.update(fd3)
                    loss_value_ts, acc_ts, jhy_final_embedding, test_final_embeed_check = sess.run([loss, accuracy, final_embedding, test_final_embeed],
                                                                          feed_dict=fd)
                    ts_loss += loss_value_ts
                    ts_acc += acc_ts
                    ts_step += 1

                xx = np.expand_dims(jhy_final_embedding, axis=0)[all_mask]
                xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask]
                yy = y_all[all_mask]


                print ("check fd")
                print('xx: {}, yy: {}, ts_size: {}, ts_step: {}, batch_size: {}'.format(xx.shape, yy.shape, ts_size, ts_step,batch_size))

                labels, numberofLabels = self.getLabel(yy)

                from utils import  clustering, pairwise_precision_recall_f1

                clusters_pred = clustering(xx2, num_clusters=numberofLabels)
                prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
                print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels)

                if needtSNE:
                    tSNEAnanlyse(xx, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_final.png" % (self.name)))
                    tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_features.png" % (self.name)))
                    tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_xx2.png" % (self.name)))
                    tSNEAnanlyse(xx, clusters_pred, join(settings.PIC_DIR, "HAN", "rawReature_%s_result_label.png" % (self.name)))


                sess.close()

        return prec, rec, f1, xx2
Beispiel #3
0
    def MetricDebug(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]):
        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]
        # nb_classes = len(set(rawlabels))

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]
        y_train = y_train[np.newaxis]
        y_val = y_val[np.newaxis]
        y_test = y_test[np.newaxis]
        y_all = y_all[np.newaxis]

        train_mask = train_mask[np.newaxis]
        val_mask = val_mask[np.newaxis]
        test_mask = test_mask[np.newaxis]
        all_mask = all_mask[np.newaxis]

        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in')
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')

            # forward
            logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels,
                                                               residual=residual, activation=nonlinearity, feature_size=ft_size)

            log_resh = tf.reshape(logits, [-1, nb_classes])
            lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
            msk_resh = tf.reshape(msk_in, [-1])

            osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes)
            osm_loss = osm_caa_loss.forward

            osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed)
            SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
            loss = osmLoss

            accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
            # optimzie
            train_op = model.training(loss, lr, l2_coef)

            Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name)
            self.mkdir(Path)
            checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype)
            saver = tf.train.Saver()

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            ts_size = fea_list[0].shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            with tf.Session(config=config) as sess:
                sess.run(init_op)
                saver.restore(sess, checkpt_file)

                while ts_step * batch_size < ts_size:
                    fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(ftr_in_list, fea_list)}
                    fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(bias_in_list, biases_list)}
                    fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size],
                           msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                           metric_ftr_in: rawFeature,
                          is_train: False,
                          attn_drop: 0.0,
                          ffd_drop: 0.0}

                    fd = fd1
                    fd.update(fd2)
                    fd.update(fd3)
                    test_final_embeed_check = sess.run([ test_final_embeed], feed_dict=fd)
                    ts_step += 1

                xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask]
                yy = y_all[all_mask]

                labels, numberofLabels = self.getLabel(yy)

                from utils import  clustering, pairwise_precision_recall_f1

                clusters_pred = clustering(xx2, num_clusters=numberofLabels)
                prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
                print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels)

                if needtSNE:
                    tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_features.png" % (self.name)))
                    tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_xx2.png" % (self.name)))
def train(name, needtSNE=False, savefile=True):
    adj, adj2, features, labels, Clusterlabels, Ids = load_local_data(
        name=name)

    initClusterlabel = Clusterlabels
    oneHotClusterLabels = toOneHot(Clusterlabels)
    num_logits = len(oneHotClusterLabels[0])
    # enc.transform([['Female', 1], ['Male', 4]]).toarray()
    print('debuging ', oneHotClusterLabels.shape)

    originClusterlabels = Clusterlabels
    n_clusters = len(set(labels))
    OldClusterlabels = Clusterlabels
    originNumberOfClusterlabels = len(set(Clusterlabels))

    num_nodes = adj.shape[0]
    input_feature_dim = features.shape[1]
    adj_norm, adj_label = NormalizedAdj(adj)
    adj_norm2, adj_label2 = NormalizedAdj(adj2)

    if FLAGS.is_sparse:  # TODO to test
        # features = sparse_to_tuple(features.tocoo())
        # features_nonzero = features[1].shape[0]
        features = features.todense()  # TODO
    else:
        features = normalize_vectors(features)

    # Define placeholders
    placeholders = {
        # 'features': tf.sparse_placeholder(tf.float32),
        'features': tf.placeholder(tf.float32,
                                   shape=(None, input_feature_dim)),
        'labels': tf.placeholder(tf.int64, shape=(None), name='labels'),
        'graph1': tf.sparse_placeholder(tf.float32),
        'graph2': tf.sparse_placeholder(tf.float32),
        'graph1_orig': tf.sparse_placeholder(tf.float32),
        'graph2_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'epoch': tf.placeholder_with_default(0., shape=()),
        'clusterEpoch': tf.placeholder_with_default(0., shape=())
    }

    # pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()  # negative edges/pos edges
    # norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.nnz) * 2)

    def get_embs():
        feed_dict.update({placeholders['dropout']: 0})
        emb = sess.run(model.z_mean_1, feed_dict=feed_dict)  # z_mean is better
        return emb

    def getGraphDetail(adj):
        pos_weight = float(adj.shape[0] * adj.shape[0] -
                           adj.sum()) / adj.sum()  # negative edges/pos edges
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.nnz) * 2)
        return {'norm': norm, 'pos_weight': pos_weight}

        # return pos_weight, norm

    # loss1s = []
    # loss2s = []
    # loss3s = []

    n_clusters = len(set(labels))
    graph1 = getGraphDetail(adj)
    graph2 = getGraphDetail(adj2)

    # construct adj_orig
    graph1['labels'] = tf.reshape(
        tf.sparse_tensor_to_dense(placeholders['graph1_orig'],
                                  validate_indices=False), [-1])
    graph2['labels'] = tf.reshape(
        tf.sparse_tensor_to_dense(placeholders['graph2_orig'],
                                  validate_indices=False), [-1])

    # Train model
    for clusterepoch in range(FLAGS.clusterEpochs):
        print('cluster epoch: ', clusterepoch)
        # tf.reset_default_graph()

        # num_logits
        model = BuildModel(placeholders,
                           input_feature_dim,
                           num_nodes,
                           name='model%d' % (clusterepoch),
                           num_logits=num_logits)

        # Session

        # tf.reset_default_graph()
        # sess = tf.InteractiveSession()

        opt = OptimizerDualGCNAutoEncoder(model=model,
                                          num_nodes=num_nodes,
                                          z_label=Clusterlabels,
                                          name='model%d' % (clusterepoch),
                                          graph1=graph1,
                                          graph2=graph2)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        # Centers
        # centers = opt.centers

        for epoch in range(FLAGS.epochs):
            # print ('epoch: ', epoch)

            # opt.epoch = epoch
            model.epoch = epoch

            # Construct feed dictionary
            # Number of logics and preb

            feed_dict = construct_feed_dict(adj_norm, adj_label, adj_norm2,
                                            adj_label2, features, placeholders,
                                            Clusterlabels, epoch,
                                            clusterepoch + 1)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})
            # Run single weight update
            # outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict)
            outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict)
            # [Loss, softmax_loss, loss3, centerloss, reconstructloss] = sess.run([opt.cost, opt.softmax_loss, opt.loss3, opt.centerloss, opt.reconstructloss], feed_dict=feed_dict)
            # [Loss, loss3, centerloss, reconstructloss, L2loss] = sess.run([opt.cost, opt.loss3, opt.centerloss, opt.reconstructloss, opt.L2loss], feed_dict=feed_dict)
            [Loss, reconstructloss] = sess.run([opt.cost, opt.reconstructloss],
                                               feed_dict=feed_dict)

            # print ('loss: ', Loss, ', loss1: ', loss1, ', loss2: ', loss2 ,', centerloss: ', centerloss, ', acc: ', outs[2])
            print('epoch: ', epoch, ', loss: ', Loss, ', reconstructloss : ',
                  reconstructloss)

        # if clusterepoch != FLAGS.clusterEpochs -1 :
        emb = get_embs()
        X_new = TSNE(learning_rate=100).fit_transform(emb)

        tClusterLabels = []
        Maxscore = -10000
        NumberOfCluster = 0
        for nc in range(2, originNumberOfClusterlabels + 1, 1):
            TempLabels = clustering(X_new, nc)
            score = silhouette_score(X_new, TempLabels)
            print('nc: ', nc, ', score: ', score)
            if score > Maxscore:
                Maxscore = score
                tClusterLabels = TempLabels
                NumberOfCluster = nc

        print('NumberOfCluster: ', NumberOfCluster,
              ', originNumberOfClusterlabels : ', originNumberOfClusterlabels,
              ', Maxscore: ', Maxscore)
        if NumberOfCluster < 0 or NumberOfCluster > originNumberOfClusterlabels:
            continue

        # 符合不断缩小的要求
        # 重新修改这些参数
        Clusterlabels = tClusterLabels
        originNumberOfClusterlabels = NumberOfCluster

        prec, rec, f1 = pairwise_precision_recall_f1(Clusterlabels, labels)
        print('prec: ', prec, ', rec: ', rec, ', f1: ', f1,
              ', originNumberOfClusterlabels: ', originNumberOfClusterlabels)
        Cc = Counter(Clusterlabels)
        print(Cc)
        if needtSNE:
            sNEComparingAnanlyse(emb,
                                 OldClusterlabels,
                                 labels,
                                 Clusterlabels,
                                 savepath=join(
                                     settings.PIC_DIR,
                                     "%s_%s.png" % (name, clusterepoch)))
            # tSNEAnanlyse(emb, labels, join(settings.PIC_DIR, "%s.png"%(clusterepoch)) )
            # tf.reset_default_graph()

    emb = get_embs()
    emb_norm = normalize_vectors(emb)
    clusters_pred = clustering(emb_norm,
                               num_clusters=originNumberOfClusterlabels)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
    print('prec: ', prec, ', rec: ', rec, ', f1: ', f1,
          ', originNumberOfClusterlabels: ', originNumberOfClusterlabels)
    # lossPrint(range(FLAGS.epochs), loss1s, loss2s, loss3s)
    if needtSNE:
        tSNEAnanlyse(emb, labels,
                     join(settings.PIC_DIR, "%s_final.png" % (name)))
    tf.reset_default_graph()
    return [prec, rec, f1], num_nodes, n_clusters
Beispiel #5
0
    sess.run(init_op)
    fd = {ftr_input: features}
    while epoch < epochs:
        _, losscheck, value2 = sess.run([train_op, loss, checkvalue], feed_dict=fd)
        print ("epoch: {} loss: {}, checkvalue: {}".format(epoch, losscheck, value2))
        epoch += 1

    print ("final_embed: ", final_embed)
    embedding = sess.run([final_embed], feed_dict=fd)
    embedding = embedding[0]
    print ("embedding: ", embedding)

    from utils import clustering, pairwise_precision_recall_f1

    clusters_pred = clustering(embedding, num_clusters=nb_class)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, rawlabels)
    print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', nb_class)

    tSNEAnanlyse(embedding, rawlabels, join(settings.PIC_DIR, "PureMetricLoss", "%s_final.png" % (name)))
    tSNEAnanlyse(features, rawlabels, join(settings.PIC_DIR, "PureMetricLoss", "%s_features.png" % (name)))

    # my_KNN(xx, yy)
    # my_Kmeans(xx, yy)

    sess.close()





Beispiel #6
0
def clusterTest(embedding, numberofLabels):
    clusters_pred = clustering(embedding, num_clusters=numberofLabels)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
    return [prec, rec, f1]