Beispiel #1
0
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):

    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    model.train()
    optimizer.zero_grad()

    output, embeddings = model.myforward(train_fea,
                                         train_adj,
                                         adj_knn,
                                         layer=1.5)

    # special for reddit
    if sampler.learning_type == "inductive":
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_ssl = args.lambda_ * ssl_agent.make_loss(embeddings)
    loss_total = loss_train + loss_ssl

    loss_total.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the reddit dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:

    # if args.early_stopping > 0 and sampler.dataset != "reddit":
    #     loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
    #     early_stopping(loss_val, model)

    if not args.fastmode and args.early_stopping > 0:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        model.eval()
        output = model(val_fea, val_adj, adj_knn)
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        early_stopping(acc_val, model)
    else:
        loss_val = 0
        acc_val = 0

    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t

    try:
        return (loss_train.item(), acc_train.item(), loss_val, acc_val,
                loss_ssl.item(), loss_total.item(), train_t)
    except:
        return (loss_train.item(), acc_train.item(), loss_val, acc_val,
                loss_ssl, loss_total.item(), train_t)
Beispiel #2
0
def compute_metric(input, target):
    metric = {
        'accuracy@1': accuracy(input=input, target=target, topk=1),
        'accuracy@5': accuracy(input=input, target=target, topk=5),
    }

    return metric
Beispiel #3
0
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):
    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(train_fea, train_adj)
    # special for reddit
    if sampler.learning_type == "inductive":
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the reddit dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:

    grads = [
        np.linalg.norm(l.grad.cpu().numpy())
        for l in model.midlayer[0].model.weights
    ]
    norms = [
        np.linalg.norm(l.detach().cpu().numpy())
        for l in model.midlayer[0].model.weights
    ]
    print("Grads:", grads)
    print("Norms", norms)
    print(np.array(norms) / np.array(grads))

    if args.early_stopping > 0 and sampler.dataset != "reddit":
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        early_stopping(loss_val, model)

    if not args.fastmode:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        model.eval()
        output = model(val_fea, val_adj)
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        if sampler.dataset == "reddit":
            early_stopping(loss_val, model)
    else:
        loss_val = 0
        acc_val = 0

    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val, acc_val,
            get_lr(optimizer), train_t, val_t, grads, norms)
Beispiel #4
0
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):
    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    model.train()
    optimizer.zero_grad()
    recovered, mu, logvar, output = model(train_fea, train_adj)
    # special for reddit
    if sampler.learning_type == "inductive":
        #loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_nc = F.nll_loss(output[idx_train], labels[idx_train])
        ae_loss = loss_function(preds=recovered,
                                labels=train_adj,
                                mu=mu,
                                logvar=logvar,
                                n_nodes=train_adj.size(0))

        loss_train = loss_nc + 0.2 * ae_loss
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the reddit dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:
    '''if args.early_stopping > 0 and sampler.dataset != "reddit":
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() 
        early_stopping(loss_val, model)

    if not args.fastmode:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        model.eval()
        recovered, mu, logvar,output = model(val_fea, val_adj)
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        if sampler.dataset == "reddit":
            early_stopping(loss_val, model)
    else:
        loss_val = 0
        acc_val = 0'''

    model.eval()
    recovered, mu, logvar, output = model(val_fea, val_adj)
    loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
    acc_val = accuracy(output[idx_val], labels[idx_val]).item()
    early_stopping(acc_val, model)

    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val, acc_val,
            get_lr(optimizer), train_t, val_t)
Beispiel #5
0
def train_full(epoch, train_g, val_g, idx_val, labels):
    unsupervised_model.eval()

    t = time.time()
    classifier_model.train()

    optimizer.zero_grad()
    # get features for training
    feats = unsupervised_model(train_g.ndata['features'], train_g)
    
    output = classifier_model(feats)
    # special for inductive, learning type must be inductive
    if sampler.learning_type=='inductive':
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    # loss_train = F.nll_loss(output, labels[idx_train])
    # acc_train = accuracy(output, labels[idx_train])
    # if sampler.learning_type == "inductive":
    #     loss_train = F.nll_loss(output, labels[idx_train])
    #     acc_train = accuracy(output, labels[idx_train])
    # else:
    #     loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    #     acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the coauthor_phy dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:

    classifier_model.eval()
    if sampler.dataset in ['coauthor_phy']:
        unsupervised_model.cpu()
        classifier_model.cpu()
        labels = labels.cpu()
    # get features for validation
    feats = unsupervised_model(val_g.ndata['features'], val_g)

    output = classifier_model(feats)
    loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
    acc_val = accuracy(output[idx_val], labels[idx_val]).item()
    early_stopping(loss_val, classifier_model)

    if sampler.dataset in ['coauthor_phy']:
        unsupervised_model.cuda()
        classifier_model.cuda()
        labels = labels.cuda()
    
    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
Beispiel #6
0
def test_accuracy():
    input = torch.tensor([[2, 3, 1], [5, 7, 0], [0, 9, -1]], dtype=torch.float)
    target = torch.tensor([1, 0, 2], dtype=torch.long)

    assert torch.equal(accuracy(input=input, target=target, topk=1),
                       torch.tensor([1., 0., 0.]))

    assert torch.equal(accuracy(input=input, target=target, topk=2),
                       torch.tensor([1., 1., 0.]))
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):
    unsupervised_model.eval()
    
    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    classifier_model.train()
    
    optimizer.zero_grad()
    feats = unsupervised_model(train_fea, train_adj)
    output = classifier_model(feats)
    
    # special for reddit
    if sampler.learning_type == "inductive":
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the reddit dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:

    classifier_model.eval()
    feats = unsupervised_model(val_fea, val_adj)
    output = classifier_model(feats)
    if args.early_stopping > 0 and sampler.dataset not in ['reddit', 'coauthor_phy']:
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        early_stopping(loss_val, classifier_model)

    if not args.fastmode:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        if sampler.dataset in ['reddit', 'coauthor_phy']:
            early_stopping(loss_val, classifier_model)
    else:
        loss_val = 0
        acc_val = 0

    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
Beispiel #8
0
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):
    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    model.train()
    optimizer.zero_grad()
    #with torch.no_grad():
    output = model(train_fea, train_adj)
    # special for reddit
    if sampler.learning_type == "inductive":  #yes!
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:  #only reddit yes!
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()

    #torch.nn.utils.clip_grad_norm(model.parameters(), 0.5) #

    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the reddit dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:

    if args.early_stopping > 0 and sampler.dataset != "reddit":
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        early_stopping(loss_val, model)

    if not args.fastmode:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        model.eval()
        #with torch.no_grad():
        output = model(val_fea, val_adj)
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        if sampler.dataset == "reddit":
            early_stopping(loss_val, model)
    else:
        loss_val = 0
        acc_val = 0

    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val, acc_val,
            get_lr(optimizer), train_t, val_t)
Beispiel #9
0
def train():
    global best_epoch, best_acc

    if args.start_epoch:
        model.load_state_dict(
            torch.load(
                os.path.join(args.model_path,
                             'model-%d.pkl' % (args.start_epoch))))

    # Training
    for epoch in range(args.start_epoch, args.num_epochs):
        train_loss = 0
        train_acc = 0
        scheduler.step()
        model.train()
        for i, x in enumerate(train_loader):
            logit = model(x[0].float())
            target = train_label[i]

            loss = criterion(logit, target.view(1))

            model.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_acc += accuracy(logit, target.view(1))

        print('[epoch', epoch + 1, '] Train loss:', train_loss / i,
              'Train Acc:', train_acc / i)

        if (epoch + 1) % args.val_step == 0:
            model.eval()
            val_loss = 0
            val_acc = 0
            with torch.no_grad():
                for i, x in enumerate(valid_loader):
                    logit = model(x[0].float())
                    target = valid_label[i]

                    val_loss += criterion(logit, target.view(1)).item()
                    val_acc += accuracy(logit, target.view(1))

                if best_acc <= (val_acc / i):
                    best_epoch = epoch + 1
                    best_acc = (val_acc / i)
                    torch.save(
                        model.state_dict(),
                        os.path.join(args.model_path,
                                     'model-%d.pkl' % (best_epoch)))

            print('Val loss:', val_loss / i, 'Val Acc:', val_acc / i)
def test(test_adj, test_fea, idx_test, labels):
    unsupervised_model.eval()
    classifier_model.eval()

    if sampler.learning_type=='inductive':
        unsupervised_model.cpu()
        classifier_model.cpu()
        labels = labels.cpu()

    # construct g from test adj
    if sampler.learning_type=='inductive':
        test_edges = test_adj._indices().data.numpy()
    else:
        test_edges = test_adj._indices().data.cpu().numpy()

    test_edges = sp.coo_matrix((np.ones(test_edges.shape[1]),
                             (test_edges[0], test_edges[1])),
                            shape=(test_adj.shape[0], test_adj.shape[0]),
                            dtype=np.float32)

    test_g = nx.from_scipy_sparse_matrix(test_edges, create_using=nx.DiGraph())
    test_g = DGLGraph(test_g)
    feats = unsupervised_model(test_fea, test_g)

    output = classifier_model(feats)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test])
    if args.debug:
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "auc= {:.4f}".format(auc_test),
              "accuracy= {:.4f}".format(acc_test.item()))
        print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
    def infer(test_reader):
        """
        do inference function
        """
        total_cost = 0.0
        total_count = 0
        preds, labels = [], []
        for data in test_reader():
            avg_cost, avg_acc, batch_prediction = exe.run(
                inference_program,
                feed=feeder.feed(data),
                fetch_list=fetch_list,
                return_numpy=True)
            total_cost += avg_cost * len(data)
            total_count += len(data)
            preds.append(batch_prediction)
            labels.append(np.asarray([x[-1] for x in data], dtype=np.int64))
        y_pred = np.concatenate(preds)
        y_label = np.concatenate(labels)

        metric_res = []
        for metric_name in metric_type:
            if metric_name == 'accuracy_with_threshold':
                metric_res.append(
                    (metric_name,
                     metric.accuracy_with_threshold(y_pred,
                                                    y_label,
                                                    threshold=0.3)))
            elif metric_name == 'accuracy':
                metric_res.append(
                    (metric_name, metric.accuracy(y_pred, y_label)))
            else:
                print("Unknown metric type: ", metric_name)
                exit()
        return total_cost / (total_count * 1.0), metric_res
Beispiel #12
0
def train(args, model, data, label, train_idx, feature_mask, optimizer, epoch):
    model.train()
    optimizer.zero_grad()
    # criterion = nn.CrossEntropyLoss()
    rec_vec, output, semantic = model(feature_mask)
    label = label.long().view(-1, )

    # classification loss
    cls_loss = F.nll_loss(output[train_idx], label[train_idx])
    args.logger.warning("Classfication loss " + str(cls_loss.item()))

    # reconstruction loss
    rec_loss = 0.0
    for v in range(args.view_num):
        sum = torch.sum(torch.pow(torch.sub(rec_vec[v], data[v]), 2.0), 1)
        fea = feature_mask[:, v].double()
        loss = sum * fea
        loss = torch.sum(loss)
        args.logger.warning("View " + str(v) + " loss " + str(loss.item()))
        rec_loss += loss

    # summary loss
    if epoch < 100:
        loss = rec_loss
    else:
        loss = cls_loss + rec_loss
    args.logger.warning("Total loss " + str(loss.item()))

    loss.backward()
    optimizer.step()
    acc_train = accuracy(output[train_idx], label[train_idx]).item()
    args.logger.error("Epoch : " + str(epoch) + ' train accuracy : ' +
                      str(acc_train))
    return acc_train, semantic
Beispiel #13
0
 def validate(self):
     self.pred_labels = self.classify()
     acc = accuracy(self.true_labels, self.pred_labels)
     ematch = exact_match(self.true_labels, self.pred_labels)
     pre_micro, rec_micro, f_micro = f_score_micro(self.true_labels, self.pred_labels)
     pre_label, rec_label, f_label, prec_result, recall_result = f_score_by_label(self.true_labels, self.pred_labels, len(self.label_dict))
     return acc, ematch, pre_micro, rec_micro, f_micro, pre_label, rec_label, f_label, prec_result, recall_result
def test(X):
    zh, score = kmeans(X)
    a = accuracy(z, zh)
    print "k-means original space:", a

    Y = pca_proj(X)
    zh, score = kmeans(Y)
    a = accuracy(z, zh)
    print "k-means/1D PCA:", a

    zh = kmeans_random(X, 20)
    a = accuracy(z, zh)
    print "k-means/random projection 1D:", a

    zh = energy_random(X, 20)
    a = accuracy(z, zh)
    print "energy/random projection 1D", a
Beispiel #15
0
def train(epoch, train_adj, train_fea, val_adj=None, val_fea=None):
    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea
    t = time.time()

    #adjust lr
    if args.lradjust:
        #scheduler.step(loss_val)
        scheduler.step()

    model.train()
    optimizer.zero_grad()
    output = model(train_fea, train_adj)
    #special for reddit
    if sampler.learning_type == "inductive":
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()

    #We can not apply the fastmode for the reddit dataset.
    if sampler.learning_type == "inductive" or not args.fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        output = model(val_fea, val_adj)
     
    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])

    if args.earlystopping > 0:
        early_stopping(loss_val, model)
    
    if args.debug and epoch % 1 == 0:
        print('Epoch: {:04d}'.format(epoch+1),
            'loss_train: {:.4f}'.format(loss_train.item()),
            'acc_train: {:.4f}'.format(acc_train.item()),
            'loss_val: {:.4f}'.format(loss_val.item()),
            'acc_val: {:.4f}'.format(acc_val.item()),
            'time: {:.4f}s'.format(time.time() - t))
    return (loss_train.item(), acc_train.item(), loss_val.item(), acc_val.item())
def test(X):
    zh, score = kmeans(X)
    a = accuracy(z, zh)
    print "k-means original space:", a

    Y = pca_proj(X)
    zh, score = kmeans(Y)
    a = accuracy(z, zh)
    print "k-means/1D PCA:", a

    zh = kmeans_random(X, 20)
    a = accuracy(z, zh)
    print "k-means/random projection 1D:", a

    zh = energy_random(X, 20)
    a = accuracy(z, zh)
    print "energy/random projection 1D", a
Beispiel #17
0
def test(args, model, data, label, test_idx, feature_mask, epoch=0):
    model.eval()
    with torch.no_grad():
        _, output, semantic = model(feature_mask)
        loss_test = F.nll_loss(output[test_idx], label[test_idx])
        acc_test = accuracy(output[test_idx], label[test_idx]).item()
        args.logger.error("Epoch : " + str(epoch) + ' test accuracy : ' +
                          str(acc_test))
    return acc_test, semantic
Beispiel #18
0
def test(test_adj,test_fea):
    model.eval()
    output = model(test_fea, test_adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    if args.debug:
        print("Test set results:",
            "loss= {:.4f}".format(loss_test.item()),
            "accuracy= {:.4f}".format(acc_test.item()))
    return (loss_test.item(), acc_test.item())
Beispiel #19
0
    def test_accuracy_should_examine_each_sentence(self):
        reference = [
            "the dog walks on his 4 legs in the park",
            "bob looked at the stars and saw jupiter"
        ]
        translation = [
            "the dog is on his 4 leg in the stadium",
            "bob look at a star and saw some wood"
        ]

        self.assertEqual(103 / 180, metric.accuracy(reference, translation))
Beispiel #20
0
def trainGraphConvolutionNetwork(hparam):
    from model import GraphConvNetwork
    from data.dataset import CoraDataSet

    device = 'cuda' if hparam.gpu else 'cpu'

    db = CoraDataSet(basepath=hparam.dataPath)
    X, y, A, idx_train, idx_val, idx_test = db.getTorchTensor(device)

    net = GraphConvNetwork(inchannel=db.featureDim,
                           nhidden=hparam.nHidden,
                           outchannel=db.numClass,
                           dropout=hparam.dropout).to(device).train()
    from losses import LossFamily
    lossfunc = LossFamily[hparam.loss]

    import torch.optim as optim
    optimizer = optim.Adam(net.parameters(),
                           lr=hparam.lr,
                           weight_decay=hparam.weight_decay)

    from metric import accuracy
    for epoch in range(hparam.epoch):
        optimizer.zero_grad()
        #forward path
        yhat = net(X, A)
        _loss = lossfunc(yhat[idx_train], y[idx_train])
        # backward
        _loss.backward()
        optimizer.step()

        #统计截断
        trainLoss = _loss.item()
        valLoss = lossfunc(yhat[idx_val], y[idx_val]).item()

        trainAcc = accuracy(yhat[idx_train], y[idx_train])
        valAcc = accuracy(yhat[idx_val], y[idx_val])

        print('epoch %d,train accuracy %.2f,val accuracy %.2f' %
              (trainAcc, valAcc))
        torch.save(net.state_dict(), hparam.savepath)
Beispiel #21
0
def _forward(data,
             model,
             loss_fn,
             window,
             forecast_length,
             training=True,
             teacher_ratio=1):
    outputs = []
    label_x, feature_x, label_y, feature_y, _, _ = data
    batch_size = label_x.shape[0]
    model.init_hidden(batch_size)
    # concat the true value of day(t-1) and the features of day(t) to forecast day(t)
    inp = torch.cat([
        label_x[:, :-1].reshape(batch_size, window - 1, 1), feature_x[:, 1:, :]
    ],
                    dim=2)
    # no need to iterate the first day
    for time_step in range(window - 1):
        output = model(inp[:, time_step:time_step + 1, :])
        outputs.append(output)
    for idx in range(forecast_length):
        if idx == 0:
            inp = torch.cat([
                label_x[:, -1:].reshape([-1, 1, 1]), feature_y[:,
                                                               idx:idx + 1, :]
            ],
                            dim=2)
        else:
            if training:
                if np.random.random() < teacher_ratio:
                    inp = torch.cat([
                        label_y[:, idx - 1].reshape([-1, 1, 1]),
                        feature_y[:, idx:idx + 1, :]
                    ],
                                    dim=2)
                else:
                    inp = torch.cat([
                        output.reshape([-1, 1, 1]), feature_y[:,
                                                              idx:idx + 1, :]
                    ],
                                    dim=2)
            else:
                inp = torch.cat(
                    [output.reshape([-1, 1, 1]), feature_y[:, idx:idx + 1, :]],
                    dim=2)

        output = model(inp)
        outputs.append(output)
    outputs = torch.stack(outputs, 1)
    loss = loss_fn(outputs, torch.cat([label_x[:, 1:], label_y], 1))

    avg_acc = accuracy(outputs[:, -forecast_length:], label_y)
    return loss, outputs, avg_acc
Beispiel #22
0
def test(test_adj, test_fea):
    model.eval()
    # output = model(test_fea, test_adj)
    output, embeddings = model.myforward(test_fea, test_adj, adj_knn)

    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])

    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))
    print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
Beispiel #23
0
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):
    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(train_fea, train_adj)
    #special for reddit
    if sampler.learning_type == "inductive":
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    #We can not apply the fastmode for the reddit dataset.
    if sampler.learning_type == "inductive" or not args.fastmode:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        model.eval()
        output = model(val_fea, val_adj)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])

    if args.lradjust:
        scheduler.step()
    if args.early_stopping > 0:
        early_stopping(loss_val, model)

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val.item(),
            acc_val.item(), get_lr(optimizer), train_t, val_t)
Beispiel #24
0
def test(test_adj, test_fea):
    model.eval()
    recovered, mu, logvar, output = model(test_fea, test_adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test])
    if args.debug:
        '''print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "auc= {:.4f}".format(auc_test),
              "accuracy= {:.4f}".format(acc_test.item()))'''
        print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
def test_sampling(model, test_g, val_batch_size):
    model.eval()
    output = model.inference(test_g, test_g.ndata['features'], val_batch_size,
                             'cpu')

    loss_test = F.nll_loss(output[idx_test.cpu()], labels[idx_test].cpu())
    acc_test = accuracy(output[idx_test.cpu()], labels[idx_test].cpu())
    auc_test = roc_auc_compute_fn(output[idx_test.cpu()],
                                  labels[idx_test].cpu())
    if args.debug:
        print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
              "auc= {:.4f}".format(auc_test),
              "accuracy= {:.4f}".format(acc_test.item()))
        print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
Beispiel #26
0
def validation_class(network, dataset, type='validation'):
    accuracy = 0.

    time_max = dataset.get_size(type='validation') // batch_size
    for index in range(0, time_max, 1):
        data_x, data_y = dataset.get_minbatch(batch_size, index, type=type)
        pred_label = network.predict(data_x)
        pred_label = pred_label.astype(np.int)
        pred_label = np.equal(pred_label, data_y).astype(np.int)
        data_y = np.ones_like(pred_label)

        accuracy += metric.accuracy(pred_label, data_y)

    accuracy = accuracy / time_max

    return accuracy
def test(test_adj, test_fea):
    unsupervised_model.eval()
    classifier_model.eval()
    feats = unsupervised_model(test_fea, test_adj)
    output = classifier_model(feats)
    
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test])
    if args.debug:
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "auc= {:.4f}".format(auc_test),
              "accuracy= {:.4f}".format(acc_test.item()))
        print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
Beispiel #28
0
def main():
    data = np.genfromtxt('./winequality-white.csv',
                         delimiter=';', dtype=float, skip_header=1)
    train_feature = data[:3000, :-1]
    train_label = data[:3000, -1]

    test_feature = data[3000:, :-1]
    test_label = data[3000:, -1]

    adaboost = Adaboost(train_feature, train_label)
    adaboost.train(3)

    predict_label = []
    for item in test_feature:
        predict_label.append(adaboost.predict(item))
    print("test accuracy: ", accuracy(predict_label, test_label))
Beispiel #29
0
def test_sampling(test_g, val_batch_size):
    unsupervised_model.eval()
    classifier_model.eval()
    feats = unsupervised_model.inference(test_g, test_g.ndata['features'], val_batch_size, 'cpu')

    output = classifier_model(feats[idx_test.cpu()].cuda())

    loss_test = F.nll_loss(output, labels[idx_test])
    acc_test = accuracy(output, labels[idx_test])
    auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test])
    if args.debug:
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "auc= {:.4f}".format(auc_test),
              "accuracy= {:.4f}".format(acc_test.item()))
        print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
def test_full(model, test_g, idx_test, labels):
    model.eval()
    if sampler.dataset in ['coauthor_phy']:
        model.cpu()
        labels = labels.cpu()

    output = model(test_g.ndata['features'], test_g)

    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test])
    if args.debug:
        print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
              "auc= {:.4f}".format(auc_test),
              "accuracy= {:.4f}".format(acc_test.item()))
        print("accuracy=%.5f" % (acc_test.item()))
    return (loss_test.item(), acc_test.item())
Beispiel #31
0
def eval(model, eval_dataloader, device):
    model.eval()
    eval_accuracy = 0.
    #eval_map, eval_accuracy, eval_mrr = 0., 0., 0.
    nb_eval_steps, nb_eval_examples = 0, 0
    preds, labels = [], []

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = (t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids, x_input_ids, x_input_mask, x_segment_ids, y_input_ids, y_input_mask, y_segment_ids = batch

        with torch.no_grad():
            # tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids)
            logits = model(x_input_ids, x_input_mask, x_segment_ids,
                           y_input_ids, y_input_mask, y_segment_ids, input_ids,
                           segment_ids, input_mask)

        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        preds.extend(np.argmax(logits, 1).tolist())
        labels.extend(label_ids.tolist())
        tmp_eval_accuracy = accuracy(logits, label_ids)
        #tmp_eval_map = mean_average_precision(label_ids, logits[:,1])
        #tmp_eval_mrr = mean_reciprocal_rank(label_ids, logits[:, 1])

        # eval_loss += tmp_eval_loss.mean().item()
        eval_accuracy += tmp_eval_accuracy
        #eval_map += tmp_eval_map
        #eval_mrr += tmp_eval_mrr

        nb_eval_examples += input_ids.size(0)
        nb_eval_steps += 1

    # eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_examples
    eval_f1 = f1_score(np.array(labels), np.array(preds))
    #eval_map = eval_map / nb_eval_examples
    #eval_mrr = eval_mrr / nb_eval_examples
    result = {  # 'eval_loss': eval_loss,
        'eval_accuracy': eval_accuracy,
        'eval_f1_score': eval_f1
    }
    #'eval_map': eval_map,
    #'eval_mrr': eval_mrr}
    pprint(result)
#fname = "normal_density2.pdf"
fname = "lognormal_density2.pdf"

###############################################################################

t = PrettyTable(['Method', 'Accuracy'])

km = KMeans(k, n_init=5)
km.fit(Y)
zh_kmeans = km.labels_
x1_kmeans = X[np.where(zh_kmeans==0)][:, np.newaxis]
x2_kmeans = X[np.where(zh_kmeans==1)][:, np.newaxis]
x1_mu_kmeans, x2_mu_kmeans = km.cluster_centers_
x1_mu_kmeans, x2_mu_kmeans = x1_mu_kmeans[0], x2_mu_kmeans[0]
x1_var_kmeans, x2_var_kmeans = np.var(x1_kmeans), np.var(x2_kmeans)
acc_kmeans = metric.accuracy(z, zh_kmeans)
t.add_row(['k-means', acc_kmeans])

gm = GMM(k, n_init=5, init_params="kmeans")
gm.fit(Y)
zh_gmm = gm.predict(Y)
#x1_gmm = X[np.where(zh_gmm==0)][:, np.newaxis]
#x2_gmm = X[np.where(zh_gmm==1)][:, np.newaxis]
x1_mu_gmm, x2_mu_gmm = gm.means_
x1_mu_gmm, x2_mu_gmm = x1_mu_gmm[0], x2_mu_gmm[0]
x1_var_gmm, x2_var_gmm = gm.covariances_
x1_var_gmm, x2_var_gmm = x1_var_gmm[0][0], x2_var_gmm[0][0]
acc_gmm = metric.accuracy(z, zh_gmm)
t.add_row(['gmm', acc_gmm])

G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y))
    G = kernel_matrix(X, rho)
    
    # initialization
    mu0, z0 = initialization.kmeanspp(k, X, ret='both')
    Z0 = ztoZ(z0)
    z1 = initialization.spectral(k, G)
    Z1 = ztoZ(z1)

    t = BeautifulTable()
    t.column_headers = ["Method", "Accuracy", "Objective", "Exec Time"]
    
    start = timer()
    zh = energy_clustering_brute(k, G, Z0)
    end = timer()
    Zh = ztoZ(zh)
    t.append_row(["E-clustering brute", metric.accuracy(z, zh), 
                  objective(Zh, G), end-start])
    
    start = timer()
    zh = energy_hartigan(k, G, Z0)
    end = timer()
    Zh = ztoZ(zh)
    t.append_row(["E-H-clustering++", metric.accuracy(z, zh), 
                  objective(Zh, G), end-start])
    
    t.append_row(['Spectral Clustering:', metric.accuracy(z, z1),
                  objective(Z1,G), '-'])

    start = timer()
    zh = energy_hartigan(k, G, Z1)
    end = timer()
    import data
    from metric import accuracy
    import eclust
    import initialization
    
    num_experiments = 10
    table = np.zeros((num_experiments, 5))
    for i in range(num_experiments):
        X, z = data.univariate_lognormal([0, -1.5], [0.3, 1.5], [100, 100])
        #X, z = data.univariate_normal([0, 5], [1, 22], [15, 15])
        Y = np.array([[x] for x in X])
        k = 2

        # 1D energy clustering
        zh, cost = two_clusters1D(X)
        table[i,0] = accuracy(z, zh)
       
        # initialization
        z0 = initialization.kmeanspp(k, Y, ret='labels')
        Z0 = eclust.ztoZ(z0)
        rho = lambda x, y: np.linalg.norm(x-y)
        G = eclust.kernel_matrix(Y, rho)
        z1 = initialization.spectral(k, G)
        Z1 = eclust.ztoZ(z1)
        
        # Hartigan's method
        zh = eclust.energy_hartigan(k, G, Z0)
        table[i,1] = accuracy(z, zh)
        
        zh = eclust.energy_hartigan(k, G, Z1)
        table[i,2] = accuracy(z, zh)
k = 2
n = 2000
n1, n2 = np.random.multinomial(n, [0.5, 0.5])
m1 = 0
s1 = 1.5
m2 = 1.5
s2 = 0.3
#X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2])
X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2])
Y = np.array([[x] for x in X])

### clustering
t = PrettyTable(['Method', 'Accuracy'])
G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y))
zh_kmeans = wrapper.kmeans(k, Y)
t.add_row(['k-means', metric.accuracy(z, zh_kmeans)])
zh_gmm = wrapper.gmm(k, Y)
t.add_row(['gmm', metric.accuracy(z, zh_gmm)])
zh_kgroups = wrapper.kernel_kgroups(k, Y, G)
t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)])
print t

### estimated classes
x1_true = X[np.where(z==0)]
x2_true = X[np.where(z==1)]

x1_kmeans = X[np.where(zh_kmeans==0)]
x2_kmeans = X[np.where(zh_kmeans==1)]

x1_gmm = X[np.where(zh_gmm==0)]
x2_gmm = X[np.where(zh_gmm==1)]
    n = 400
    d = 10
    n1, n2 = np.random.multinomial(n, [1/2, 1/2])
    m1 = np.zeros(d)
    m2 = 0.7*np.ones(d)
    s1 = s2 = np.eye(d)
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])

    G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y))
    W = np.eye(n)
    k = 2

    t = PrettyTable(["Method", "Accuracy"])
    
    zh = kernel_kmeans(k, X, G, W, run_times=5, ini="k-means++")
    a = metric.accuracy(z, zh)
    t.add_row(["Kernel k-means", a])
    
    zh = kernel_kgroups(k, X, G, W, run_times=5, ini="k-means++")
    a = metric.accuracy(z, zh)
    t.add_row(["Kernel k-groups", a])
    
    zh = spectral(k, X, G, W, run_times=5)
    a = metric.accuracy(z, zh)
    t.add_row(["Spectral", a])
    
    zh = kmeans(k, X, run_times=5)
    a = metric.accuracy(z, zh)
    t.add_row(["k-means", a])
    
    zh = gmm(k, X, run_times=5)
    
    import data
    import metric

    #np.random.seed(12)

    D = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.ones(D)
    s2 = 2*np.eye(D)
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [100, 100])
    k = 2

    # scikit-learn library has a better procedure to estimate the covariance
    # matrix.

    g = GMM(k)
    zh  = g.fit_predict(X)
    print "GMM class:", metric.accuracy(z, zh)

    zh = gmm(k, X)
    print "GMM func:", metric.accuracy(z, zh)
    
    sg = sk_GMM(k)
    sg.fit(X)
    zh = sg.predict(X)
    print "GMM sklearn:", metric.accuracy(z, zh)


    rho = lambda x,y: np.power(np.linalg.norm(x-y), 1)
    G = kernel_matrix(X, rho)
    
    # initialization
    z0, mu0 = init.kmeans_plus2(k, X)
    Z0 = ztoZ(z0)
    z1 = init.spectral(k, G, W)
    Z1 = ztoZ(z1)

    t = PrettyTable(["Method", "Accuracy", "Objective", "Exec Time"])
    
    start = timer()
    zh = kernel_kgroups(k, G, Z0, W)
    end = timer()
    Zh = ztoZ(zh)
    t.add_row(["kernel k-groups (k-means++)", metric.accuracy(z, zh), 
                  objective(Zh, G, W), end-start])
    
    start = timer()
    zh = kernel_kgroups(k, G, Z1, W)
    end = timer()
    Zh = ztoZ(zh)
    t.add_row(["kernel k-groups (spectral)", metric.accuracy(z, zh), 
                  objective(Zh, G, W), end-start])
    
    start = timer()
    zh = kernel_kmeans(k, G, Z0, W)
    end = timer()
    Zh = ztoZ(zh)
    t.add_row(["kernel k-means (k-means++)", metric.accuracy(z, zh), 
                  objective(Zh, G, W), end-start])
        dist = np.zeros((n_samples, self.n_clusters))
        self._compute_dist(K, dist, self.within_distances_,
                           update_within=False)
        return dist.argmin(axis=1)



###############################################################################
if __name__ == '__main__':
    import energy
    import data
    from metric import accuracy
    from sklearn.cluster import KMeans

    X, z = data.multivariate_normal(
        [[0,0], [2,0]], 
        [np.eye(2), np.eye(2)],
        [100, 100]
    )

    kernel = energy.energy_kernel
    km = KernelEnergy(n_clusters=2, max_iter=100, verbose=1, 
                      kernel_params={'alpha':.8})
    zh = km.fit_predict(X)
    print accuracy(z, zh)
    
    km = KMeans(n_clusters=2)
    zh = km.fit_predict(X)
    print accuracy(z, zh)

G = eclust.kernel_matrix(data, rho)
#G = eclust.kernel_matrix(data, rho_gauss)
#G = eclust.kernel_matrix(data, rho_exp)

k = 3

r = []
r.append(wrapper.kmeans(k, data, run_times=5))
r.append(wrapper.gmm(k, data, run_times=5))
r.append(wrapper.spectral_clustering(k, data, G, run_times=5))
r.append(wrapper.spectral(k, data, G, run_times=5))
r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral'))
r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']

for algo, zh in zip(algos, r):
    t.add_row([algo, 
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t

                costs.append(cost)
        costs = np.array(costs)
        min_index = costs.argmin()
        min_cost = costs[min_index]
        return min_cost, min_index


###############################################################################
if __name__ == '__main__':
    import data
    from metric import accuracy

    m1 = np.array([0,0])
    s1 = np.array([[1,0],[0,1]])
    n1 = 100

    m2 = np.array([3,0])
    s2 = np.array([[1,0],[0,10]])
    n2 = 100

    X, true_labels = data.multivariate_normal([m1,m2], [s1,s2], [n1,n2])
    
    ec = EClust(n_clusters=2, max_iter=10, init='kmeans++')
    labels = ec.fit_predict(X)
    print accuracy(labels, true_labels)

    km = KMeans(2)
    labels2 = km.fit_predict(X)
    print accuracy(labels2, true_labels)