Python clusteringの例、utils.clustering Pythonの例

コード例 #1

0

ファイルを表示

ファイル: train.py プロジェクト: shanxuanchen/AttentionBasedNameDisambiguation

def getNewClusterLabel(emb, initClusterlabel, NumberOfCluster):
    Clusterlabels = clustering(emb, num_clusters=NumberOfCluster)

    print('Clusterlabels: ', Counter(Clusterlabels))
    print('initClusterlabel: ', initClusterlabel)
    # 假如出现只有一种类别的话，这个要做修改和调整的。
    C = Counter(Clusterlabels)
    # print (C)
    for idx, v in C.items():
        if v == 1:
            tTable = getOriginClusterLabel(initClusterlabel, Clusterlabels,
                                           idx)
            if tTable == -1:
                continue
            print('idx: ', idx, ', tTable: ', tTable)
            for tidx, k in enumerate(Clusterlabels):
                if Clusterlabels[tidx] == idx:
                    Clusterlabels[tidx] = tTable

            # 删了一个label，后面的label往前移
            for tidx, k in enumerate(Clusterlabels):
                if Clusterlabels[tidx] > idx:
                    Clusterlabels[tidx] = Clusterlabels[tidx] - 1
            NumberOfCluster = NumberOfCluster - 1
    # Clusterlabels = clustering(emb, num_clusters=NumberOfCluster)

    return NumberOfCluster, Clusterlabels

コード例 #2

0

ファイルを表示

def test_clustering(n_clusters):
    # Loading in the cleaned DF
    with open("pickles/profiles1.pkl", 'rb') as fp:
        data_frame = pickle.load(fp)
        clustered_df = clustering(
            data_frame,
            fn_vectorized_words=vectorized_words_count_vector,
            fn_algorithm_clustering=agglomerative_clustering,
            n_clusters=n_clusters)
        with open("pickles/clustered_profiles.pkl", "wb") as wb:
            clustered_df.to_csv(r"csv/clustered_profiles.csv", index=False)
            pickle.dump(clustered_df, wb)

コード例 #3

0

ファイルを表示

def Get_Cluster_Stocks():
    """Get cluster of different risk stocks, 5 stocks each risk cluster """
    conn = connect_db()
    cur = conn.cursor()
    stocks = clustering(conn, cur)
    # cur.close()
    print(stocks)

    for risk_list in stocks:
        for idx, stock_id in enumerate(risk_list):
            cur.execute(f"select name from stock where id = {stock_id};")
            data = cur.fetchall()
            stock_name = str(data[0])
            stock_name = stock_name[2:-3]
            # stock_name = get_stock_name(stock_id)
            risk_list[idx] = stock_name + '(' + str(stock_id) + ')'
    cur.close()

    low_risk_list = stocks[0]
    mid_risk_list = stocks[1]
    high_risk_list = stocks[2]
    return low_risk_list, mid_risk_list, high_risk_list

コード例 #4

0

ファイルを表示

    def train(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]):

        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]
        # nb_classes = len(set(rawlabels))

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]
        y_train = y_train[np.newaxis]
        y_val = y_val[np.newaxis]
        y_test = y_test[np.newaxis]
        y_all = y_all[np.newaxis]

        train_mask = train_mask[np.newaxis]
        val_mask = val_mask[np.newaxis]
        test_mask = test_mask[np.newaxis]
        all_mask = all_mask[np.newaxis]

        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in')
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')

            # forward
            logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels,
                                                               residual=residual, activation=nonlinearity, feature_size=ft_size)


            # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32)

            # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32)

            # cal masked_loss
            # lab_list = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ), name='lab_list')
            # ftr_resh = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='ftr_resh')
            log_resh = tf.reshape(logits, [-1, nb_classes])
            lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
            msk_resh = tf.reshape(msk_in, [-1])


            print ("final_embedding: checkout", final_embedding)
            print ("logits: checkout", logits)
            print ("log_resh: checkout", log_resh)
            # print ("ftr_resh: ", ftr_resh)
            print ("lab_resh: ", lab_resh)
            print ("fea_list: ", fea_list)
            print ("centers_embed: ", centers_embed)
            print ("batch_size, nb_nodes, nb_classes, ft_size", batch_size, nb_nodes, nb_classes, ft_size)

            osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes)
            osm_loss = osm_caa_loss.forward

            # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32)
            # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32)
            # log_resh: checkout Tensor("Reshape:0", shape=(286, 30), dtype=float32)
            # ftr_resh:  Tensor("ftr_resh:0", shape=(286, 100), dtype=float32)
            # lab_resh:  Tensor("Reshape_1:0", shape=(286, 30), dtype=int32)

            osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed)
            # osmLoss, checkvalue = osm_loss(metric_ftr_in, rawlabels, centers_embed)
            SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
            loss = osmLoss
            # 为什么loss会固定
            # loss = osmLoss
            # loss = SoftMaxloss

            accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
            # optimzie
            train_op = model.training(loss, lr, l2_coef)

            Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name)
            self.mkdir(Path)
            checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype)
            print('model: {}'.format(checkpt_file))
            saver = tf.train.Saver()

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            vlss_mn = np.inf
            vacc_mx = 0.0
            curr_step = 0

            with tf.Session(config=config) as sess:
                sess.run(init_op)

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

                for epoch in range(nb_epochs):
                    tr_step = 0

                    tr_size = fea_list[0].shape[0]
                    # ================   training    ============
                    while tr_step * batch_size < tr_size:
                        fd1 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size]
                               for i, d in zip(ftr_in_list, fea_list)}
                        fd2 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size]
                               for i, d in zip(bias_in_list, biases_list)}
                        fd3 = {lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size],
                               msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size],
                               metric_ftr_in: rawFeature,
                               is_train: True,
                               attn_drop: 0.6,
                               ffd_drop: 0.6}
                        fd = fd1
                        fd.update(fd2)
                        fd.update(fd3)
                        _, loss_value_tr, acc_tr, att_val_train = sess.run([train_op, loss, accuracy, att_val],
                                                                           feed_dict=fd)
                        test_check_value = sess.run(checkvalue, feed_dict=fd)
                        print ("test_check_value: ", test_check_value)

                        train_loss_avg += loss_value_tr
                        train_acc_avg += acc_tr
                        tr_step += 1


                    vl_step = 0
                    vl_size = fea_list[0].shape[0]
                    # =============   val       =================
                    while vl_step * batch_size < vl_size:
                        # fd1 = {ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size]}
                        fd1 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size]
                               for i, d in zip(ftr_in_list, fea_list)}
                        fd2 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size]
                               for i, d in zip(bias_in_list, biases_list)}
                        fd3 = {lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size],
                               msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size],
                               metric_ftr_in: rawFeature,
                               is_train: False,
                               attn_drop: 0.0,
                               ffd_drop: 0.0}

                        fd = fd1
                        fd.update(fd2)
                        fd.update(fd3)
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                                                         feed_dict=fd)
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                    # import pdb; pdb.set_trace()
                    print('Epoch: {}, att_val: {}'.format(epoch, np.mean(att_val_train, axis=0)))
                    print('Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f | vl_step: %d, tr_step: %d' %
                          (train_loss_avg / tr_step, train_acc_avg / tr_step,
                           val_loss_avg / vl_step, val_acc_avg / vl_step, vl_step, tr_step))

                    if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                        if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                            vacc_early_model = val_acc_avg / vl_step
                            vlss_early_model = val_loss_avg / vl_step
                            saver.save(sess, checkpt_file)
                        vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                        vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                        curr_step = 0
                    else:
                        curr_step += 1
                        if curr_step == patience:
                            print('Early stop! Min loss: ', vlss_mn,
                                  ', Max accuracy: ', vacc_mx)
                            print('Early stop model validation loss: ',
                                  vlss_early_model, ', accuracy: ', vacc_early_model)
                            break

                    train_loss_avg = 0
                    train_acc_avg = 0
                    val_loss_avg = 0
                    val_acc_avg = 0
                # check save
                saver.save(sess, checkpt_file)

                saver.restore(sess, checkpt_file)
                print('load model from : {}'.format(checkpt_file))
                ts_size = fea_list[0].shape[0]
                ts_step = 0
                ts_loss = 0.0
                ts_acc = 0.0

                while ts_step * batch_size < ts_size:
                    fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(ftr_in_list, fea_list)}
                    fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(bias_in_list, biases_list)}
                    fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size],
                           msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                           metric_ftr_in: rawFeature,
                          is_train: False,
                          attn_drop: 0.0,
                          ffd_drop: 0.0}

                    fd = fd1
                    fd.update(fd2)
                    fd.update(fd3)
                    loss_value_ts, acc_ts, jhy_final_embedding, test_final_embeed_check = sess.run([loss, accuracy, final_embedding, test_final_embeed],
                                                                          feed_dict=fd)
                    ts_loss += loss_value_ts
                    ts_acc += acc_ts
                    ts_step += 1

                xx = np.expand_dims(jhy_final_embedding, axis=0)[all_mask]
                xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask]
                yy = y_all[all_mask]


                print ("check fd")
                print('xx: {}, yy: {}, ts_size: {}, ts_step: {}, batch_size: {}'.format(xx.shape, yy.shape, ts_size, ts_step,batch_size))

                labels, numberofLabels = self.getLabel(yy)

                from utils import  clustering, pairwise_precision_recall_f1

                clusters_pred = clustering(xx2, num_clusters=numberofLabels)
                prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
                print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels)

                if needtSNE:
                    tSNEAnanlyse(xx, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_final.png" % (self.name)))
                    tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_features.png" % (self.name)))
                    tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_xx2.png" % (self.name)))
                    tSNEAnanlyse(xx, clusters_pred, join(settings.PIC_DIR, "HAN", "rawReature_%s_result_label.png" % (self.name)))


                sess.close()

        return prec, rec, f1, xx2

コード例 #5

0

ファイルを表示

    def MetricDebug(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]):
        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]
        # nb_classes = len(set(rawlabels))

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]
        y_train = y_train[np.newaxis]
        y_val = y_val[np.newaxis]
        y_test = y_test[np.newaxis]
        y_all = y_all[np.newaxis]

        train_mask = train_mask[np.newaxis]
        val_mask = val_mask[np.newaxis]
        test_mask = test_mask[np.newaxis]
        all_mask = all_mask[np.newaxis]

        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in')
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')

            # forward
            logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels,
                                                               residual=residual, activation=nonlinearity, feature_size=ft_size)

            log_resh = tf.reshape(logits, [-1, nb_classes])
            lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
            msk_resh = tf.reshape(msk_in, [-1])

            osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes)
            osm_loss = osm_caa_loss.forward

            osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed)
            SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
            loss = osmLoss

            accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
            # optimzie
            train_op = model.training(loss, lr, l2_coef)

            Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name)
            self.mkdir(Path)
            checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype)
            saver = tf.train.Saver()

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            ts_size = fea_list[0].shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            with tf.Session(config=config) as sess:
                sess.run(init_op)
                saver.restore(sess, checkpt_file)

                while ts_step * batch_size < ts_size:
                    fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(ftr_in_list, fea_list)}
                    fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(bias_in_list, biases_list)}
                    fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size],
                           msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                           metric_ftr_in: rawFeature,
                          is_train: False,
                          attn_drop: 0.0,
                          ffd_drop: 0.0}

                    fd = fd1
                    fd.update(fd2)
                    fd.update(fd3)
                    test_final_embeed_check = sess.run([ test_final_embeed], feed_dict=fd)
                    ts_step += 1

                xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask]
                yy = y_all[all_mask]

                labels, numberofLabels = self.getLabel(yy)

                from utils import  clustering, pairwise_precision_recall_f1

                clusters_pred = clustering(xx2, num_clusters=numberofLabels)
                prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
                print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels)

                if needtSNE:
                    tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_features.png" % (self.name)))
                    tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_xx2.png" % (self.name)))

コード例 #6

0

ファイルを表示

ファイル: train.py プロジェクト: shanxuanchen/AttentionBasedNameDisambiguation

def train(name, needtSNE=False, savefile=True):
    adj, adj2, features, labels, Clusterlabels, Ids = load_local_data(
        name=name)

    initClusterlabel = Clusterlabels
    oneHotClusterLabels = toOneHot(Clusterlabels)
    num_logits = len(oneHotClusterLabels[0])
    # enc.transform([['Female', 1], ['Male', 4]]).toarray()
    print('debuging ', oneHotClusterLabels.shape)

    originClusterlabels = Clusterlabels
    n_clusters = len(set(labels))
    OldClusterlabels = Clusterlabels
    originNumberOfClusterlabels = len(set(Clusterlabels))

    num_nodes = adj.shape[0]
    input_feature_dim = features.shape[1]
    adj_norm, adj_label = NormalizedAdj(adj)
    adj_norm2, adj_label2 = NormalizedAdj(adj2)

    if FLAGS.is_sparse:  # TODO to test
        # features = sparse_to_tuple(features.tocoo())
        # features_nonzero = features[1].shape[0]
        features = features.todense()  # TODO
    else:
        features = normalize_vectors(features)

    # Define placeholders
    placeholders = {
        # 'features': tf.sparse_placeholder(tf.float32),
        'features': tf.placeholder(tf.float32,
                                   shape=(None, input_feature_dim)),
        'labels': tf.placeholder(tf.int64, shape=(None), name='labels'),
        'graph1': tf.sparse_placeholder(tf.float32),
        'graph2': tf.sparse_placeholder(tf.float32),
        'graph1_orig': tf.sparse_placeholder(tf.float32),
        'graph2_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'epoch': tf.placeholder_with_default(0., shape=()),
        'clusterEpoch': tf.placeholder_with_default(0., shape=())
    }

    # pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()  # negative edges/pos edges
    # norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.nnz) * 2)

    def get_embs():
        feed_dict.update({placeholders['dropout']: 0})
        emb = sess.run(model.z_mean_1, feed_dict=feed_dict)  # z_mean is better
        return emb

    def getGraphDetail(adj):
        pos_weight = float(adj.shape[0] * adj.shape[0] -
                           adj.sum()) / adj.sum()  # negative edges/pos edges
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.nnz) * 2)
        return {'norm': norm, 'pos_weight': pos_weight}

        # return pos_weight, norm

    # loss1s = []
    # loss2s = []
    # loss3s = []

    n_clusters = len(set(labels))
    graph1 = getGraphDetail(adj)
    graph2 = getGraphDetail(adj2)

    # construct adj_orig
    graph1['labels'] = tf.reshape(
        tf.sparse_tensor_to_dense(placeholders['graph1_orig'],
                                  validate_indices=False), [-1])
    graph2['labels'] = tf.reshape(
        tf.sparse_tensor_to_dense(placeholders['graph2_orig'],
                                  validate_indices=False), [-1])

    # Train model
    for clusterepoch in range(FLAGS.clusterEpochs):
        print('cluster epoch: ', clusterepoch)
        # tf.reset_default_graph()

        # num_logits
        model = BuildModel(placeholders,
                           input_feature_dim,
                           num_nodes,
                           name='model%d' % (clusterepoch),
                           num_logits=num_logits)

        # Session

        # tf.reset_default_graph()
        # sess = tf.InteractiveSession()

        opt = OptimizerDualGCNAutoEncoder(model=model,
                                          num_nodes=num_nodes,
                                          z_label=Clusterlabels,
                                          name='model%d' % (clusterepoch),
                                          graph1=graph1,
                                          graph2=graph2)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        # Centers
        # centers = opt.centers

        for epoch in range(FLAGS.epochs):
            # print ('epoch: ', epoch)

            # opt.epoch = epoch
            model.epoch = epoch

            # Construct feed dictionary
            # Number of logics and preb

            feed_dict = construct_feed_dict(adj_norm, adj_label, adj_norm2,
                                            adj_label2, features, placeholders,
                                            Clusterlabels, epoch,
                                            clusterepoch + 1)
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})
            # Run single weight update
            # outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict)
            outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict)
            # [Loss, softmax_loss, loss3, centerloss, reconstructloss] = sess.run([opt.cost, opt.softmax_loss, opt.loss3, opt.centerloss, opt.reconstructloss], feed_dict=feed_dict)
            # [Loss, loss3, centerloss, reconstructloss, L2loss] = sess.run([opt.cost, opt.loss3, opt.centerloss, opt.reconstructloss, opt.L2loss], feed_dict=feed_dict)
            [Loss, reconstructloss] = sess.run([opt.cost, opt.reconstructloss],
                                               feed_dict=feed_dict)

            # print ('loss: ', Loss, ', loss1: ', loss1, ', loss2: ', loss2 ,', centerloss: ', centerloss, ', acc: ', outs[2])
            print('epoch: ', epoch, '， loss: ', Loss, ', reconstructloss : ',
                  reconstructloss)

        # if clusterepoch != FLAGS.clusterEpochs -1 :
        emb = get_embs()
        X_new = TSNE(learning_rate=100).fit_transform(emb)

        tClusterLabels = []
        Maxscore = -10000
        NumberOfCluster = 0
        for nc in range(2, originNumberOfClusterlabels + 1, 1):
            TempLabels = clustering(X_new, nc)
            score = silhouette_score(X_new, TempLabels)
            print('nc: ', nc, ', score: ', score)
            if score > Maxscore:
                Maxscore = score
                tClusterLabels = TempLabels
                NumberOfCluster = nc

        print('NumberOfCluster: ', NumberOfCluster,
              ', originNumberOfClusterlabels : ', originNumberOfClusterlabels,
              ', Maxscore: ', Maxscore)
        if NumberOfCluster < 0 or NumberOfCluster > originNumberOfClusterlabels:
            continue

        # 符合不断缩小的要求
        # 重新修改这些参数
        Clusterlabels = tClusterLabels
        originNumberOfClusterlabels = NumberOfCluster

        prec, rec, f1 = pairwise_precision_recall_f1(Clusterlabels, labels)
        print('prec: ', prec, ', rec: ', rec, ', f1: ', f1,
              ', originNumberOfClusterlabels: ', originNumberOfClusterlabels)
        Cc = Counter(Clusterlabels)
        print(Cc)
        if needtSNE:
            sNEComparingAnanlyse(emb,
                                 OldClusterlabels,
                                 labels,
                                 Clusterlabels,
                                 savepath=join(
                                     settings.PIC_DIR,
                                     "%s_%s.png" % (name, clusterepoch)))
            # tSNEAnanlyse(emb, labels, join(settings.PIC_DIR, "%s.png"%(clusterepoch)) )
            # tf.reset_default_graph()

    emb = get_embs()
    emb_norm = normalize_vectors(emb)
    clusters_pred = clustering(emb_norm,
                               num_clusters=originNumberOfClusterlabels)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
    print('prec: ', prec, ', rec: ', rec, ', f1: ', f1,
          ', originNumberOfClusterlabels: ', originNumberOfClusterlabels)
    # lossPrint(range(FLAGS.epochs), loss1s, loss2s, loss3s)
    if needtSNE:
        tSNEAnanlyse(emb, labels,
                     join(settings.PIC_DIR, "%s_final.png" % (name)))
    tf.reset_default_graph()
    return [prec, rec, f1], num_nodes, n_clusters

コード例 #7

0

ファイルを表示

    plt.savefig('NON_DEEP_mean_std_cls' + str(N_CLUSTERING))

    return y


if __name__ == '__main__':

    models = [
        'resnet101', 'resnet50', 'resnet18', 'vgg11', 'sobel', 'laplacian',
        'HOG'
    ]
    datasets = ['ICDAR15', 'MSRA-TD500', 'MSRA-TD500.blur']
    num_of_clustering = 2

    dataset = datasets[1]
    model = models[6]

    image_root_path = os.path.join(os.path.expanduser('~'), 'Documents',
                                   dataset, 'trainim')
    root = '/home/litianjiao/codes/curriculum/extracted_feats'

    means, vars = main(model=model, dataset=dataset)
    y = nonDeepClustering(N_CLUSTERING=num_of_clustering, MEAN=means, VAR=vars)
    clustering(NUM_OF_CLUSTERING=num_of_clustering,
               IMG_ROOT=image_root_path,
               ROOT=root,
               ID=y,
               DATASET=dataset,
               MODEL=model)

コード例 #8

0

ファイルを表示

with tf.Session() as sess:
    sess.run(init_op)
    fd = {ftr_input: features}
    while epoch < epochs:
        _, losscheck, value2 = sess.run([train_op, loss, checkvalue], feed_dict=fd)
        print ("epoch: {} loss: {}, checkvalue: {}".format(epoch, losscheck, value2))
        epoch += 1

    print ("final_embed: ", final_embed)
    embedding = sess.run([final_embed], feed_dict=fd)
    embedding = embedding[0]
    print ("embedding: ", embedding)

    from utils import clustering, pairwise_precision_recall_f1

    clusters_pred = clustering(embedding, num_clusters=nb_class)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, rawlabels)
    print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', nb_class)

    tSNEAnanlyse(embedding, rawlabels, join(settings.PIC_DIR, "PureMetricLoss", "%s_final.png" % (name)))
    tSNEAnanlyse(features, rawlabels, join(settings.PIC_DIR, "PureMetricLoss", "%s_features.png" % (name)))

    # my_KNN(xx, yy)
    # my_Kmeans(xx, yy)

    sess.close()

コード例 #9

0

ファイルを表示

    for j in range(N_CLUSTERING):
        plt.plot(class_means[j], class_vars[j], color_map[j])

    plt.savefig('NON_DEEP_mean_std_cls' + str(N_CLUSTERING))

    return y


if __name__ == '__main__':

    models = [
        'resnet101', 'resnet50', 'resnet18', 'vgg11', 'sobel', 'laplacian'
    ]
    datasets = ['ICDAR15', 'MSRA-TD500', 'MSRA-TD500.blur']
    num_of_clustering = 2

    dataset = datasets[1]

    image_root_path = os.path.join(os.path.expanduser('~'), 'Documents',
                                   dataset, 'trainim')
    root = '/home/litianjiao/codes/curriculum/extracted_feats'

    # main(model=models[4], dataset=datasets[0])
    means, vars = main(model=models[4], dataset=dataset)
    y = nonDeepClustering(N_CLUSTERING=num_of_clustering, MEAN=means, VAR=vars)
    clustering(NUM_OF_CLUSTERING=num_of_clustering,
               IMG_ROOT=image_root_path,
               ROOT=root,
               ID=y)

コード例 #10

0

ファイルを表示

ファイル: main.py プロジェクト: Jhe-Yun/Stock-Forecasting

from utils import clustering

if __name__ == "__main__":

    load_dotenv()
    conn = psycopg2.connect(database="teamc",
                            user=os.getenv("user"),
                            password=os.getenv("password"),
                            host=os.getenv("host"),
                            port="5432")
    cur = conn.cursor()

    # Add_History:
    # cur.execute("SELECT * FROM stock")
    # stocks = cur.fetchall()
    # today = datetime.date.today()
    # tomorrow = today + datetime.timedelta(days = 1)
    # three_years_ago = today - relativedelta(years = 3)
    # for stock in stocks:
    #     Add_History(conn, cur, stock[0], three_years_ago.strftime('%Y%m%d'), tomorrow.strftime('%Y%m%d'))

    # Add_Analysis:
    # cur.execute("SELECT * FROM stock")
    # stocks = cur.fetchall()
    # for stock in stocks:
    #     Add_Analysis(conn, cur, stock[0])

    # output: [[low], [medium], [high]]
    stocks = clustering(conn, cur)

    cur.close()

コード例 #11

0

ファイルを表示

def clusterTest(embedding, numberofLabels):
    clusters_pred = clustering(embedding, num_clusters=numberofLabels)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
    return [prec, rec, f1]

コード例 #12

0

ファイルを表示

ファイル: checkDataEmbedding.py プロジェクト: shanxuanchen/AttentionBasedNameDisambiguation

        EndIndex = -2
    featurePath = getPATH(name, idf_threshold, 'feature_and_label', ispretrain)
    # idx_features_labels = np.genfromtxt(join(settings.DATA_DIR, 'local', 'graph-{}'.format(idf_threshold)), dtype=np.dtype(str))
    idx_features_labels = np.genfromtxt(featurePath, dtype=np.dtype(str))
    features = np.array(idx_features_labels[:, 1:EndIndex],
                        dtype=np.float32)  # sparse?
    rawlabels = encode_labels(idx_features_labels[:, EndIndex])
    pids = idx_features_labels[:, 0]
    return features, pids, rawlabels


def load_test_names():
    return data_utils.load_json(settings.DATA_DIR, 'test_name_list2.json')


Res = {}

names = load_test_names()
for name in names:
    features, pids, rawlabels = loadFeature(name, ispretrain=False)
    tSNEAnanlyse(
        features, rawlabels,
        join(settings.PIC_DIR, "MetricLearning",
             "rawReature_%s_train.png" % (name)))
    numberofLabels = len(set(rawlabels))
    clusters_pred = clustering(features, num_clusters=numberofLabels)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, rawlabels)
    Res[name] = {"prec": prec, "rec": rec, "f1": f1}

print(Res)