def run(self):
     print threading.current_thread()
     while True:
         if self.queue.qsize() < 100 and not self.queue.full():
             if self.is_train:
                 batch_x, batch_y = load_data(sex=sex,
                                              img_size=img_size,
                                              batch_size=batch_size,
                                              augment_times=7)
                 if DEBUG_MODEL:
                     print "%s ask lock" % threading.current_thread()
                 train_lock.acquire()
                 if DEBUG_MODEL:
                     print "%s acquire" % threading.current_thread()
                 self.queue.put({"x": batch_x, "y": batch_y})
                 train_lock.release()
                 if DEBUG_MODEL:
                     print "%s release" % threading.current_thread()
             else:
                 batch_x, batch_y = load_data(sex=sex,
                                              img_size=img_size,
                                              batch_size=batch_size,
                                              augment_times=0)
                 test_lock.acquire()
                 self.queue.put({"x": batch_x, "y": batch_y})
                 test_lock.release()
Exemple #2
0
def train():
    path = '../dataset/usage_train.csv'
    df = load_data(path)
    df['weekofyear'] = df.ds.apply(lambda x: x.weekofyear)
    samples_week = 2 * 24 * 7

    results_train = pd.DataFrame(data=None,
                                 index=df.id.unique(),
                                 columns=[50, 60, 70, 80, 90])
    results_test = results_train.copy()

    final_models = {}
    final_errors = {'train': {}, 'test': {}}
    for house_id in df.id.unique():
        df_house = df[df.id == house_id]
        print('*********************')
        print('INFO: {}'.format(house_id))
        print('*********************')
        results_train, results_test = error_estimation_cross_validation(
            df_house, house_id, samples_week, results_train, results_test)
        model, error_train, error_test = fit_final_model(
            df_house, samples_week)
        final_models[house_id] = model
        final_errors['train'][house_id] = error_train
        final_errors['test'][house_id] = error_test

    pickle.dump(final_models, open('./model/models.pickle', 'wb'))
    pickle.dump(final_errors, open('./model/final_errors.pickle', 'wb'))
    results_train.to_csv('./results/performance_metrics_train.csv')
    results_test.to_csv('./results/performance_metrics_test.csv')
def predict():
    path_data = '../dataset/usage_test.csv'
    path_model = './model/models.pickle'
    df = load_data(path_data)
    models = pickle.load(open(path_model, 'rb'))

    # do predictions
    predictions = pd.DataFrame(data=None)
    for house_id in df.id.unique():
        print('*********************')
        print('INFO: {}'.format(house_id))
        print('*********************')
        df_house = df[df.id == house_id]
        pred = predict_a_week_ahead(models[house_id], df_house)
        df_house = df_house.merge(pred[['ds', 'yhat']])
        predictions = predictions.append(df_house)

    save_predictions(predictions)
Exemple #4
0
            epochs = 4
        else:
            epochs = 4
        """
        1、先构建网络,定义一些变量
        2、构建损失函数
        3、构建循环网络
        4、筛选保留集样本
        5、先实现残差网络 再实现增量学习
        6、实现简单的残差网络
        """
        # Create neural network model
        print('Run {0} starting ...'.format(itera))
        print("Building model and compiling functions...")

        image_train, label_train, image_test, label_test = utils_data.load_data(
            Cifar_train_file, Cifar_test_file)
        #next batch
        image_batch, label_batch_0, file_protoset_batch = utils_data.Prepare_train_data_batch(
            image_train, label_train, files_protoset, itera, order, nb_cl,
            batch_size)
        label_batch = tf.one_hot(label_batch_0, 100)
        #初次训练
        if itera == 0:
            #不需要蒸馏
            variables_graph, variables_graph2, scores, scores_stored = utils_cifar.prepareNetwork(
                gpu, image_batch, itera)
            with tf.device('/gpu:0'):
                scores = tf.concat(scores, 0)
                l2_reg = wght_decay * tf.reduce_sum(
                    tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                      scope='ResNet34'))
Exemple #5
0
features_extractor = utils_models.build_features_extractor(
    features_extractor_name, data_shape)

for task_id, dataset in enumerate(datasets):
    if task_id == 0:
        open_mode = 'w'
    else:
        open_mode = 'a'
    utils.write_on_file(result_filename, open_mode,
                        "[%s] Starting new task..." % dataset.upper())
    '''X_train, y_train, X_valid, y_valid = utils_data.load_data(data_dir=data_dir, dataset=dataset, data_size=data_size,
                                                          phase='train_valid', valid_split=valid_split, seed=seed)'''
    X_train, y_train, X_valid, y_valid, _, _ = utils_data.load_data(
        data_dir=data_dir,
        dataset=dataset,
        data_size=data_size,
        valid_split=valid_split,
        test_split=test_split,
        seed=seed)
    ############################################ TRAINING ############################################
    utils.write_on_file(
        result_filename, 'a',
        "[%s] Starting autoencoder's cross-validation..." % dataset.upper())
    autoencoder = utils.autoencoder_cross_validation(
        features_extractor, batch_size, X_train, X_valid, features_mean,
        features_std, autoencoder_hidden_layer_sizes,
        autoencoder_weight_decays, autoencoder_learning_rates,
        autoencoder_epsilons, autoencoder_epochs, autoencoder_objective_loss,
        dataset)
    trained_autoencoders.append(autoencoder)
Exemple #6
0
    parser.add_argument('--weight_decay_layer_one', type=float)
    parser.add_argument('--weight_decay_layer_two', type=float)
    parser.add_argument('--num_epochs_patience', type=int, default=100)
    parser.add_argument('--num_epochs_max', type=int, default=5000)
    parser.add_argument('--run_id', type=str)
    parser.add_argument('--dataset_split', type=str)
    parser.add_argument('--learning_rate_decay_patience', type=int, default=50)
    parser.add_argument('--learning_rate_decay_factor',
                        type=float,
                        default=0.8)
    args = parser.parse_args()
    vars(args)['model'] = 'GeomGCN_TwoLayers_ExperimentTwoAll'

    t1 = time.time()
    if args.dataset_split == 'jknet':
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args.dataset, None, 0.6, 0.2, 'ExperimentTwoAll')
    else:
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args.dataset, args.dataset_split, None, None, 'ExperimentTwoAll')
    print(time.time() - t1)

    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    net = GeomGCNNet(g=g,
                     num_input_features=num_features,
                     num_output_classes=num_labels,
                     num_hidden=args.num_hidden,
                     num_divisions=25,
                     dropout_rate=args.dropout_rate,
                     num_heads_layer_one=args.num_heads_layer_one,
Exemple #7
0
# Working station
train_path = 'F:/Dataset/ILSVRC2012/cifar-100-python/train'
test_path = 'F:/Dataset/ILSVRC2012/cifar-100-python/test'
save_path = './model/'

###########################

str_settings_resnet = str(nb_cl) + 'settings_resnet.pickle'
# with open(str_settings_resnet, 'rb') as fp:
#     order = cPickle.load(fp)
#     files_valid = cPickle.load(fp)
#     files_train = cPickle.load(fp)

order = np.load('./order.npy', encoding='latin1')

image_train, label_train, image_test, label_test = utils_data.load_data(
    train_path, test_path)

for nb_cl in [2]:  #, 5, 10, 20, 50]:  # 不同类别数量/批次
    nb_groups = int(100 / nb_cl)
    acc_list = np.zeros((nb_groups, 1))
    for itera in range(nb_groups):  # 增量学习的次数(迭代产生的模型数量)
        # next batch
        image_batch, label_batch_0, file_protoset_batch = utils_data.Prepare_test_data_batch(
            image_test, label_test, itera, order, nb_cl, batch_size)
        label_batch = tf.one_hot(label_batch_0, 100)

        # Initialization

        print("Processing network after {} increments\t".format(itera))
        # Evaluation on cumul(累加) of classes or original classes
        if is_cumul == 'cumul':
Exemple #8
0
def train(n_epoch=N_TRAINING_EPOCH,
          img_size=299,
          sex=0,
          batch_size=16,
          num_gpu=1,
          start_layer=-1,
          start_epoch=0):
    assert start_layer in [
        -1, XCEPTION_EXIT_START, XCEPTION_MID_START, XCEPTION_ENTRY_START, 0
    ]
    assert sex in [0, 1, 2]
    # model file path
    if start_layer != -1:
        model_file = model_out_dir + "/model.h5"
    else:
        model_file = None

    # learning rate
    if start_layer == -1:
        learning_rate = 1E-2
    elif start_layer == XCEPTION_EXIT_START:
        learning_rate = 1E-3
    elif start_layer == XCEPTION_MID_START:
        learning_rate = 5E-4
    elif start_layer == XCEPTION_ENTRY_START:
        learning_rate = 1E-4
    else:
        learning_rate = 5E-5

    model = _build_regressor(img_size, num_gpu, start_layer, model_file,
                             learning_rate)

    best_mae = np.inf
    tolerance = 0

    data_ids = load_sex_ids(sex)

    for epoch in tqdm(range(start_epoch + 1, start_epoch + n_epoch + 1)):
        print "[x] epoch {} -------------------------------------------".format(
            epoch)
        for mini_batch in range(len(data_ids) // batch_size):
            batch_x, batch_y = load_data(sex=sex,
                                         img_size=img_size,
                                         batch_size=batch_size,
                                         augment_times=7)
            loss = model.train_on_batch(x=batch_x, y=batch_y)
            if mini_batch % 50 == 0:
                print "--epoch {}, mini_batch {}, loss {}".format(
                    epoch, mini_batch, loss)

        # test
        print "[x] test in epoch {}".format(epoch)
        losses = 0.0
        for mini_batch in range(int(0.2 * len(data_ids) // batch_size)):
            batch_x, batch_y = load_data(sex=sex,
                                         img_size=img_size,
                                         batch_size=batch_size,
                                         augment_times=0)
            loss = model.test_on_batch(batch_x, batch_y)
            losses += loss
        losses = losses / (int(0.3 * len(data_ids) // batch_size))
        print "== epoch {}, test loss {}".format(epoch, losses)

        # test and metric
        print "[x] predict in epoch {}".format(epoch)
        y_true = []
        y_pred = []
        for mini_batch in range(int(0.2 * len(data_ids) // batch_size)):
            batch_x, batch_y = load_data(sex=sex,
                                         img_size=img_size,
                                         batch_size=batch_size,
                                         augment_times=0)
            pred_y = model.predict_on_batch(batch_x)
            for i in range(batch_size):
                y_true.append(batch_y[i] * SCALE)
                y_pred.append(pred_y[i] * SCALE)

        evs, mae, mse, meae, r2s, ccc = regression_metric(
            np.array(y_true), np.array(y_pred))
        save_obj(
            {
                "evs": evs,
                "mae": mae,
                "mse": mse,
                "meae": meae,
                "r2s": r2s,
                "ccc": ccc,
                "loss": losses
            },
            name=metric_out_dir + "/epoch_{}.pkl".format(epoch))

        if mae < best_mae:
            best_mae = mae
            tolerance = 0
            model.save_weights(model_out_dir + "/model.h5")
        else:
            tolerance += 1

        print "[x] epoch {}, evs {}, mae {}, mse {}, meae {}, r2s {}, ccc {}".format(
            epoch, evs, mae, mse, meae, r2s, ccc)

        if tolerance > TOLERANCE:
            break
Exemple #9
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    # adj, features, y_test, tx, ty, test_maks, true_labels = load_data('cora')
    # print(true_labels)
    # adj, features, y_test, test_maks, true_labels=load_npz('amazon_electronics_photo')
    # print(true_labels)

    # adj=preprocess_high_order_adj( adj, 2, 0.01 )
    # print(adj)

    # if args.dataset_split == 'jknet':
    g, features, true_labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
        args.dataset_str, None, 0.6, 0.2)
    adj = g.adj(scipy_fmt='coo')
    true_labels = true_labels.detach().numpy()
    # print(true_labels)
    # else:
    #     g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
    #         args.dataset_str, args.dataset_split)

    args.n_clusters = true_labels.max() + 1
    print(args.n_clusters, "ssssssss")

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    # adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    # adj_norm =  torch.sparse.FloatTensor(sp.coo_matrix(adj))
    # adj_norm=torch.tensor(adj.todense(),dtype=torch.float)
    # print(adj_norm)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    z_x = torch.zeros(features.shape[0], args.hidden1)
    z_w = torch.zeros(features.shape[0], args.hidden2)
    z_shuffle = torch.cat((features, z_x, z_w), axis=1)
    n_nodes, feat_dim = z_shuffle.shape

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout,
                        args.n_clusters)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    z_x, mu_x, _, z_w, mu_w, _, _, logvar_px, qz = model(z_shuffle, adj_norm)
    z_shuffle = torch.cat((features, z_x.detach_(), z_w.detach_()), axis=1)
    hidden_emb = None
    for epoch in tqdm(range(args.epochs)):
        t = time.time()
        model.train()
        optimizer.zero_grad()

        # z_shuffle=torch.cat((features,z_x.detach_(),z_w.detach_()),axis=1)
        z_x, mu_x, logvar_x, z_w, mu_w, logvar_w, mu_px, logvar_px, qz = model(
            z_shuffle, adj_norm)

        # print(z_x.shape,"z_x.shape")
        # After back-propagating gae loss, now do the deepWalk regularization

        # mu_x = mu_x.unsqueeze(-1)
        # mu_x = mu_x.expand(-1, args.hidden2)

        logvar_x1 = logvar_x.unsqueeze(-1)
        logvar_x1 = logvar_x1.expand(-1, args.hidden2, args.n_clusters)

        mu_x1 = mu_x.unsqueeze(-1)
        mu_x1 = mu_x1.expand(-1, args.hidden2, args.n_clusters)
        if torch.cuda.is_available():
            mu_x1 = mu_x1.cuda()
            logvar_x1 = logvar_x1.cuda()

        # KLD_W = -0.5 / n_nodes* torch.sum(1 + logvar_w - mu_w.pow(2) - logvar_w.exp())
        # KLD_Z = -torch.sum(qz * torch.log(qz + 1e-10))/n_nodes
        KLD_Z = -0.5 / n_nodes * torch.mean(
            torch.sum(1 + qz * torch.log(qz + 1e-10), 1))
        # print(KLD_Z,"klz")

        # qz = qz.unsqueeze(-1)
        # qz = qz.expand(-1, 1)

        # print(logvar_px.shape,logvar_x1.shape,"hhhhi")
        # KLD_QX_PX = 0.5 / n_nodes* (((logvar_px - logvar_x) + ((logvar_x.exp() + (mu_x - mu_px).pow(2))/logvar_px.exp())) - 1)
        # # print(KLD_QX_PX.shape,qz.shape,"hhhhi")
        # KLD_QX_PX = KLD_QX_PX.unsqueeze(1)
        # qz = qz.unsqueeze(-1)
        # print(KLD_QX_PX.shape,qz.shape,"hhhhi")

        # KLD_QX_PX = KLD_QX_PX.expand(2708, 1, args.hidden2)
        KLD_QX_PX = loss_function(preds=model.dc(z_x),
                                  labels=adj_label,
                                  mu=(mu_x1 - mu_px),
                                  logvar=(logvar_px - logvar_x1),
                                  n_nodes=n_nodes,
                                  norm=norm,
                                  pos_weight=pos_weight)
        KLD_QX_PX = KLD_QX_PX = KLD_QX_PX.expand(n_nodes, 1, args.hidden2)
        E_KLD_QX_PX = torch.sum(
            torch.bmm(KLD_QX_PX,
                      qz.unsqueeze(-1) / n_nodes))
        # print(E_KLD_QX_PX)
        # print(model.dc(z_x).shape,adj_label.shape,"hdhhhhhhd")

        model.train()
        optimizer.zero_grad()
        lbl_1 = torch.ones(n_nodes)
        lbl_2 = torch.zeros(n_nodes)
        lbl = torch.cat((lbl_1, lbl_2))
        idx = np.random.permutation(n_nodes)
        # print(features.shape,z_x.shape,adj_norm.shape)
        shuf_fts = z_shuffle[idx, :]
        # FeatHL=torch.cat((features,shuf_fts),axis=1)
        # _, featHL_dim = FeatHL.shape
        # modelHL = GCNModelVAE(featHL_dim, args.hidden1, args.hidden2, args.dropout,2)
        n_nodes1, feat_dim1 = z_shuffle.shape
        # model1 = GCNModelVAE(feat_dim1, args.hidden1, args.hidden2, args.dropout,args.n_clusters)
        # z_xL1, mu_xL1, logvar_xL1,z_wL1, mu_wL1, logvar_wL1,mu_pxL1, logvar_pxL1,_ = model1(z_shuffle, adj_norm)

        z_xL2, mu_xL2, logvar_xL2, z_wL2, mu_wL2, logvar_wL2, mu_pxL2, logvar_pxL2, qz2 = model(
            shuf_fts, adj_norm)
        KLD_Z2 = 0.5 / n_nodes * torch.mean(
            torch.sum(1 + qz2 * torch.log(qz2 + 1e-10), 1))

        KLD_QX_PX2 = loss_function(preds=model.dc(z_wL2),
                                   labels=adj_label,
                                   mu=mu_wL2,
                                   logvar=logvar_wL2,
                                   n_nodes=n_nodes,
                                   norm=norm,
                                   pos_weight=pos_weight)
        KLD_QX_PX2 = KLD_QX_PX2.expand(n_nodes, 1, args.hidden2)
        E_KLD_QX_PX2 = torch.sum(
            torch.bmm(KLD_QX_PX2,
                      qz2.unsqueeze(-1) / n_nodes))

        lossF = (1.0/loss_function(preds=model.dc(z_xL2), labels=adj_label,
                             mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight))+\
                             (1.0/E_KLD_QX_PX2)+KLD_Z2
        # z_xL2, mu_xL2, logvar_xL2,z_wL2, mu_wL2, logvar_wL2,mu_pxL2, logvar_pxL2,qz2 = model(shuf_fts, adj_norm)
        # lossF = (1.0/loss_function(preds=model.dc(z_xL2), labels=adj_label,
        #                      mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes,
        #                      norm=norm, pos_weight=pos_weight))+\
        #                      (1.0/E_KLD_QX_PX2)+ KLD_Z2+lossF

        # lossF = loss_functionShuffle(preds=model.dc(z_xL2), labels=adj_label,
        #                      mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes,
        #                      norm=norm, pos_weight=pos_weight)+\
        #                      loss_functionShuffle(preds=model.dc(z_wL2), labels=adj_label,
        #                      mu=mu_wL2, logvar=logvar_wL2, n_nodes=n_nodes,
        #                      norm=norm, pos_weight=pos_weight)
        # LossF=Variable(torch.tensor(lossF).type(torch.FloatTensor),requires_grad=True)
        # lossF.backward()

        loss = loss_function(
            preds=model.dc(z_x),
            labels=adj_label,
            mu=mu_x,
            logvar=logvar_x,
            n_nodes=n_nodes,
            norm=norm,
            pos_weight=pos_weight) + loss_function(
                preds=model.dc(z_w),
                labels=adj_label,
                mu=mu_w,
                logvar=logvar_w,
                n_nodes=n_nodes,
                norm=norm,
                pos_weight=pos_weight) + lossF + KLD_Z + E_KLD_QX_PX
        # if lossF<0.02:
        #   break
        # lossF.backward()
        # HL=np.concatenate((mu_xL1.data.numpy(),mu_wL1.data.numpy()),axis=1)
        # HL2=np.concatenate((mu_xL2.data.numpy(),mu_wL2.data.numpy()),axis=1)
        # kmeans = KMeans(n_clusters=2, random_state=0).fit(HL)
        # kmeans2 = KMeans(n_clusters=2, random_state=0).fit(HL2)
        # predict_labels = kmeans.predict(HL)
        # predict_labels2 = kmeans.predict(HL2)
        # pr=np.amax(kmeans.fit_transform(HL), axis=1)
        # pr2=np.amax(kmeans.fit_transform(HL2), axis=1)
        # pr=torch.cat((torch.tensor(pr), torch.tensor(pr2)))
        # b_xent = nn.BCEWithLogitsLoss()
        # lossF = b_xent(torch.FloatTensor(pr),torch.FloatTensor(lbl))

        # print(lossF)
        # print(loss, lossF)
        loss.backward(retain_graph=True)
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = np.concatenate((mu_x.data.numpy(), mu_w.data.numpy()),
                                    axis=1)
        # hidden_emb=mu_x.data.numpy()
        # print(hidden_emb.shape)
        # roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)

        # if args.dw == 1:
        #     tqdm.write("Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}".format(
        #         epoch + 1, cur_loss, cur_dw_loss,
        #         ap_curr, time.time() - t))
        # else:
        #     tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format(
        #         epoch + 1, cur_loss,
        #         ap_curr, time.time() - t))
        roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                            test_edges_false)
        # # tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
        wandb.log({"roc_score1": roc_score})
        wandb.log({"ap_score1": ap_score})
        if (epoch + 1) % 10 == 0:
            tqdm.write("Evaluating intermediate results...")
            kmeans = KMeans(n_clusters=args.n_clusters,
                            random_state=0).fit(hidden_emb)
            predict_labels = kmeans.predict(hidden_emb)
            # print(np.argmax(kmeans.fit_transform(hidden_emb), axis=1).shape)
            pr = np.amax(kmeans.fit_transform(hidden_emb), axis=1)
            b_xent = nn.BCEWithLogitsLoss()
            print(loss, lossF)
            # lossF = b_xent(torch.FloatTensor(pr),torch.FloatTensor(true_labels))
            cm = clustering_metrics(true_labels, predict_labels)
            cm.evaluationClusterModelFromLabel(tqdm)
            roc_score, ap_score = get_roc_score(hidden_emb, adj_orig,
                                                test_edges, test_edges_false)
            tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
            # np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb)
            print(loss, lossF)
            print("Kmeans ACC", purity_score(true_labels, predict_labels))
            # roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
            # tqdm.write('Test ROC score: ' + str(roc_score))
            # tqdm.write('Test AP score: ' + str(ap_score))
    tqdm.write("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                        test_edges_false)
    tqdm.write('Test ROC score: ' + str(roc_score))
    tqdm.write('Test AP score: ' + str(ap_score))
    kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
    predict_labels = kmeans.predict(hidden_emb)
    cm = clustering_metrics(true_labels, predict_labels)
    cm.evaluationClusterModelFromLabel(tqdm)
    print("Kmeans ACC", purity_score(true_labels, predict_labels))

    if args.plot == 1:
        cm.plotClusters(tqdm, hidden_emb, true_labels)
Exemple #10
0
    parser.add_argument('--dropout_rate', type=float, default=0.2)
    parser.add_argument('--learning_rate', type=float, default=0.01)
    parser.add_argument('--weight_decay_layer_one', type=float, default=5e-4)
    parser.add_argument('--weight_decay_layer_two', type=float, default=5e-4)
    parser.add_argument('--num_epochs_patience', type=int, default=200)
    parser.add_argument('--num_epochs_max', type=int, default=1000)
    parser.add_argument('--run_id', type=str)
    parser.add_argument('--dataset_split', type=str)
    parser.add_argument('--learning_rate_decay_patience', type=int, default=50)
    parser.add_argument('--learning_rate_decay_factor',
                        type=float,
                        default=0.8)
    args = parser.parse_args()

    if args.dataset_split == 'jknet':
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args.dataset, None, 0.6, 0.2)
    else:
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args.dataset, args.dataset_split)
    acc = []
    for seed in range(args.iter):
        setup_seed(seed * 10)
        g.set_n_initializer(dgl.init.zero_initializer)
        g.set_e_initializer(dgl.init.zero_initializer)

        if args.model == 'GCN':
            net = GCN(num_features, args.num_hidden, num_labels)
        if args.model == 'GAT':
            net = GAT(num_features, args.num_hidden, num_labels)
        if args.model == 'GIN':
            net = GIN(num_features, args.num_hidden, num_labels)
Exemple #11
0
    parser.add_argument('--weight_decay_layer_one', type=float)
    parser.add_argument('--weight_decay_layer_two', type=float)
    parser.add_argument('--num_epochs_patience', type=int, default=100)
    parser.add_argument('--num_epochs_max', type=int, default=5000)
    parser.add_argument('--run_id', type=str)
    parser.add_argument('--dataset_split', type=str)
    parser.add_argument('--learning_rate_decay_patience', type=int, default=50)
    parser.add_argument('--learning_rate_decay_factor',
                        type=float,
                        default=0.8)
    args = parser.parse_args()
    vars(args)['model'] = 'GeomGCN_TwoLayers'

    t1 = time.time()
    if args.dataset_split == 'jknet':
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args.dataset, None, 0.6, 0.2, 'GeomGCN', args.dataset_embedding)
    else:
        g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
            args.dataset, args.dataset_split, None, None, 'GeomGCN',
            args.dataset_embedding)
    print(time.time() - t1)

    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    net = GeomGCNNet(g=g,
                     num_input_features=num_features,
                     num_output_classes=num_labels,
                     num_hidden=args.num_hidden,
                     num_divisions=9,
                     dropout_rate=args.dropout_rate,