예제 #1
0
        epoch_nb = int(file.split('.')[0])
        if epoch_nb < best_epoch:
            os.remove(file)
            
files = glob.glob('*.pkl')
for file in files:
    epoch_nb = int(file.split('.')[0])
    if epoch_nb > best_epoch:
        os.remove(file)
        
print('Optimization Finished!')
print('Total time elapsed: {:.4f}s'.format(time.time() - t_total))

#Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))

#Testing
acc_test = compute_test()

#Plot
output_dir = "results/random_seed_" + str(args.seed)
mkdir_p(output_dir)

fig, ax = plt.subplots()
ax.plot(train_losses, label = 'train loss')
ax.plot(val_losses, label = 'validation loss')
ax.set_xlabel('epochs')
ax.set_ylabel('cross entropy loss')
ax.legend()
예제 #2
0
for file in files:
    filename = os.path.split(file)[-1]
    epoch_nb = int(filename.split('.')[0])
    if epoch_nb != best_epoch:
        os.remove(file)

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

print("Model arguments: ")
print(args)
print(model)

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(
    torch.load(os.path.join(dump_dir, '{}.pkl'.format(best_epoch))))

# Testing
model.eval()
output_logits = model(features, adj)
output = F.log_softmax(output_logits, dim=-1)
loss_test = F.nll_loss(output[0, idx_test], labels[0, idx_test])
acc_test = accuracy(output[0, idx_test], labels[0, idx_test])
print("Test set results:", "loss= {:.4f}".format(loss_test.data.item()),
      "accuracy= {:.4f}".format(acc_test.data.item()))

with open(os.path.join('./result', "%s.txt" % (configStr, )), 'a') as f:
    f.write("Test graph results: \tmean loss: %.4f \tmean acc: %4f \n" %
            (loss_test, acc_test))
예제 #3
0
def main(opt):
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]
    if opt['model'] == 'GAT':
        model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                    n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'],
                    opt['negative_slope'], opt['residual'], opt)
    elif opt['model'] == 'AGNN':
        model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                     n_classes, opt['in_drop'], opt)
    print(model)
    if opt['early_stop']:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()

    # use optimizer
    optimizer = get_optimizer(opt['optimizer'],
                              parameters=model.parameters(),
                              lr=opt['lr'],
                              weight_decay=opt['weight_decay'])

    # initialize graph
    dur = []
    for epoch in range(opt['epochs']):
        # model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        # logits = model(features)
        # loss = loss_fcn(logits[train_mask], labels[train_mask])
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        loss, logits = train(model, optimizer, features, train_mask, labels)

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if opt['fastmode']:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if opt['early_stop']:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if opt['early_stop']:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
예제 #4
0
def train_ray(opt, checkpoint_dir=None, data_dir="../data"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]

    models = []
    optimizers = []
    datas = [g for i in range(opt['num_init'])]

    for split in range(opt['num_init']):
        if opt['model'] == 'GAT':
            model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                        n_classes, heads, F.elu, opt['in_drop'],
                        opt['attn_drop'], opt['negative_slope'],
                        opt['residual'], opt)
        elif opt['model'] == 'AGNN':
            model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                         n_classes, opt['in_drop'], opt)

        train_this = train
        model = model.to(device)
        models.append(model)

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)

        # model = model.to(device)
        parameters = [p for p in model.parameters() if p.requires_grad]

        optimizer = get_optimizer(opt['optimizer'],
                                  parameters,
                                  lr=opt['lr'],
                                  weight_decay=opt['weight_decay'])
        optimizers.append(optimizer)

        # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint
        # should be restored.
        if checkpoint_dir:
            checkpoint = os.path.join(checkpoint_dir, "checkpoint")
            model_state, optimizer_state = torch.load(checkpoint)
            model.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

    for epoch in range(1, opt['epochs']):
        loss = np.mean([
            train_this(model, optimizer, features, train_mask,
                       labels)[0].item()
            for model, optimizer in zip(models, optimizers)
        ])
        train_accs, val_accs, tmp_test_accs = average_test(models, datas)
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            best = np.argmax(val_accs)
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save(
                (models[best].state_dict(), optimizers[best].state_dict()),
                path)
        tune.report(loss=loss,
                    accuracy=np.mean(val_accs),
                    test_acc=np.mean(tmp_test_accs),
                    train_acc=np.mean(train_accs))
예제 #5
0
파일: analasis.py 프로젝트: subercui/pyGAT
    X, Y = load_dataset(args.seed)
    split_rate = 0.8
    x_train = X[0:int(split_rate * X.shape[0])]
    x_test = X[int(split_rate * X.shape[0]):]

    y_train = Y[0:int(split_rate * Y.shape[0])]
    y_test = Y[int(split_rate * Y.shape[0]):]

    # Restore best model
    model = GAT(nfeat=120,
                nhid=4,
                nclass=int(y_train.max()) + 1,
                dropout=args.dropout,
                nheads=args.nb_heads,
                alpha=args.alpha)
    model.load_state_dict(
        torch.load('{}/{}.pkl'.format('output(Apr 12 20.08.47 2019)', '649')))
    print('model loaded')

    # Testing
    mode = 'plot_chan_exp'  # define which test to run: GADN, svm, run_record, cut_channel_GADN, cut_channel_svm
    if mode == 'GADN':
        compute_test(x_test, y_test, SpikeChans=True)
    elif mode == 'cut_channel_GADN':
        cut_chan_test(mode='GADN')
    elif mode == 'svm':
        clf = svm_run_and_test()
    elif mode == 'run_record':
        run_EEG_record(model)
    elif mode == 'attents_box_chart':
        attents_box(model)
    elif mode == 'plot_chan_exp':
예제 #6
0
def train_ray_int(opt, checkpoint_dir=None, data_dir="../data"):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = get_dataset(opt)
    g = data[0]
    if opt['gpu'] < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(opt['gpu'])

    # if opt["num_splits"] > 0:
    #   dataset.data = set_train_val_test_split(
    #     23 * np.random.randint(0, opt["num_splits"]),  # random prime 23 to make the splits 'more' random. Could remove
    #     dataset.data,
    #     num_development=5000 if opt["dataset"] == "CoauthorCS" else 1500)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    num_feats = features.shape[1]
    n_classes = data.num_classes
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
  #Edges %d
  #Classes %d
  #Train samples %d
  #Val samples %d
  #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()
    # create model
    heads = ([opt['num_heads']] * opt['num_layers']) + [opt['num_out_heads']]
    if opt['model'] == 'GAT':
        model = GAT(g, opt['num_layers'], num_feats, opt['num_hidden'],
                    n_classes, heads, F.elu, opt['in_drop'], opt['attn_drop'],
                    opt['negative_slope'], opt['residual'], opt)
    elif opt['model'] == 'AGNN':
        model = AGNN(g, opt['num_layers'], num_feats, opt['num_hidden'],
                     n_classes, opt['in_drop'], opt)

    model = model.to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = get_optimizer(opt["optimizer"],
                              parameters,
                              lr=opt["lr"],
                              weight_decay=opt["weight_decay"])

    if checkpoint_dir:
        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
        model_state, optimizer_state = torch.load(checkpoint)
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
    train_this = train
    this_test = test_OGB if opt['dataset'] == 'ogbn-arxiv' else test
    best_time = best_epoch = train_acc = val_acc = test_acc = 0
    for epoch in range(1, opt["epoch"]):
        # loss = train(model, optimizer, data)
        loss = train_this(model, optimizer, features, train_mask,
                          labels)[0].item()
        if opt["no_early"]:
            tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g)
            best_time = opt['time']
        else:
            tmp_train_acc, tmp_val_acc, tmp_test_acc = this_test(model, g)
        if tmp_val_acc > val_acc:
            best_epoch = epoch
            train_acc = tmp_train_acc
            val_acc = tmp_val_acc
            test_acc = tmp_test_acc
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), path)
        tune.report(loss=loss,
                    accuracy=val_acc,
                    test_acc=test_acc,
                    train_acc=train_acc,
                    best_time=best_time,
                    best_epoch=best_epoch)
예제 #7
0
파일: train.py 프로젝트: LCS2-IIITD/MG2Vec-
#           break

#     files = glob.glob('*.pkl')
#     for file in files:
#         epoch_nb = int(file.split('.')[0])
#         if epoch_nb < best_epoch:
#             os.remove(file)

files = glob.glob('*.pkl')
for file in files:
    epoch_ = file.split('.')[0]
    model = GAT(nfeat=features.shape[1],
                nhid=args.hidden,
                dropout=args.dropout,
                nheads=args.nb_heads,
                alpha=args.alpha)
    model.load_state_dict(torch.load(file))
    model.eval()
    model = model.cuda()
    embeddings = model.get_embedding(features, adj)
    embeddings = embeddings.cpu().detach().numpy()
    print(embeddings.shape, "Numpy array shape")
    np.save(epoch_, embeddings)
    os.remove(file)

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
# print('Loading {}th epoch'.format(best_epoch))
# model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))
예제 #8
0
파일: train.py 프로젝트: aimeng100/pyGAT
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1

    if bad_counter == args.patience:
        break

    files = glob.glob('*.pkl')
    for file in files:
        epoch_nb = int(file.split('.')[0])
        if epoch_nb < best_epoch:
            os.remove(file)

files = glob.glob('*.pkl')
for file in files:
    epoch_nb = int(file.split('.')[0])
    if epoch_nb > best_epoch:
        os.remove(file)

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))

# Testing
compute_test()
예제 #9
0
    idx_test = idx_test.cuda()

features, adj, labels = Variable(features), Variable(adj), Variable(labels)


def compute_test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.data.item()),
          "accuracy= {:.4f}".format(acc_test.data.item()))


# Load model
filename = glob.glob('{}_{}_*.pkl'.format(model._get_name(), args.dataset))[0]
print("Loading {}...".format(filename))
if args.cuda:
    model_location = 'cuda'
else:
    model_location = 'cpu'
state_dict = torch.load(filename, map_location=model_location)
model.load_state_dict(state_dict)
model.eval()
print('Loaded {}.'.format(filename))

# Testing
compute_test()

state_dict['gc1.weight']
예제 #10
0
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()
    features, adj, labels = Variable(features), Variable(adj), Variable(labels)

    model = GAT(nfeat=features.shape[1],
                nhid=12,
                nclass=int(labels.max()) + 1,
                dropout=0.6,
                nheads=8,
                alpha=0.2)

    model.load_state_dict(torch.load('693.pkl'))
    compute_test()

    # x = torch.cat([att(features, adj) for att in model.attentions], dim=1)
    # embeddings = x.cpu().data.numpy()
    # time_start = time.time()
    #
    # pca_50 = PCA(n_components=50)
    # pca_result_50 = pca_50.fit_transform(embeddings)
    #
    # tsne = TSNE(n_components=2, verbose=1, perplexity=50, n_iter=1000)
    # tsne_results = tsne.fit_transform(pca_result_50)
    #
    # feat_cols = ['tsne-one', 'tsne-two']
    # df = pd.DataFrame(tsne_results, columns=feat_cols)
    # df['labels'] = labels.cpu().data.numpy()