else:
        bad_counter += 1

    if bad_counter == args.patience:
        break

    files = glob.glob(filedir + '/*.pkl')
    for file in files:
        epoch_nb = int(file.split('/')[-1][:-4])
        if epoch_nb < best_epoch:
            os.remove(file)

files = glob.glob(filedir + '/*.pkl')
for file in files:
    epoch_nb = int(file.split('/')[-1][:-4])
    if epoch_nb > best_epoch:
        os.remove(file)

print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(torch.load(filedir + '/{}.pkl'.format(best_epoch)))

# Testing
compute_test()
np.save(filedir + "/idx_test_joshi", idx_test.cpu().detach().numpy())
np.save(filedir + "/idx_train_joshi", idx_train.cpu().detach().numpy())
np.save(filedir + "/idx_val_joshi", idx_val.cpu().detach().numpy())
np.save(filedir + "/neighbor_genes_joshi", neighbor_genes)
Esempio n. 2
0
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1

    if bad_counter == args.patience:
        break

    files = glob.glob('*.pkl')
    for file in files:
        epoch_nb = int(file.split('.')[0])
        if epoch_nb < best_epoch:
            os.remove(file)

files = glob.glob('*.pkl')
for file in files:
    epoch_nb = int(file.split('.')[0])
    if epoch_nb > best_epoch:
        os.remove(file)

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))

# Testing
compute_test()
Esempio n. 3
0
    def train_pipeline(self, adj, features, labels, idx_train, idx_val,
                       idx_test, *args):

        adj = normalize_adj(adj + sp.eye(adj.shape[0]))

        if sp.issparse(adj):
            adj = adj.todense()

        if sp.issparse(features):
            features = features.todense()

        # With networkx, we no longer need to convert from one-hot encoding...
        # labels = np.where(labels)[1]

        adj = torch.FloatTensor(adj)
        features = torch.FloatTensor(features)
        labels = torch.LongTensor(labels)
        idx_train = torch.LongTensor(idx_train)
        idx_val = torch.LongTensor(idx_val)
        idx_test = torch.LongTensor(idx_test)

        random.seed(self.args.seed)
        np.random.seed(self.args.seed)
        torch.manual_seed(self.args.seed)
        if self.args.cuda:
            torch.cuda.manual_seed(self.args.seed)

        # Model and optimizer
        if self.args.sparse:
            model = SpGAT(
                nfeat=features.shape[1],
                nhid=self.args.hidden,
                nclass=int(labels.max()) + 1,
                dropout=self.args.dropout,
                nheads=self.args.nb_heads,
                alpha=self.args.alpha,
            )
        else:
            model = GAT(
                nfeat=features.shape[1],
                nhid=self.args.hidden,
                nclass=int(labels.max()) + 1,
                dropout=self.args.dropout,
                nheads=self.args.nb_heads,
                alpha=self.args.alpha,
            )
        optimizer = optim.Adam(model.parameters(),
                               lr=self.args.lr,
                               weight_decay=self.args.weight_decay)

        if self.args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_val = idx_val.cuda()
            idx_test = idx_test.cuda()

        features, adj, labels = Variable(features), Variable(adj), Variable(
            labels)

        # TODO: Test if these lines could be written below line 41.
        self.adj = adj
        self.features = features
        self.labels = labels
        self.idx_train = idx_train
        self.idx_val = idx_val
        self.idx_test = idx_test

        def train(epoch):
            t = time.time()
            model.train()
            optimizer.zero_grad()
            output = model(features, adj)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            acc_train = accuracy(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            if not self.args.fastmode:
                # Evaluate validation set performance separately,
                # deactivates dropout during validation run.
                model.eval()
                output = model(features, adj)

            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = accuracy(output[idx_val], labels[idx_val])
            print(
                "Epoch: {:04d}".format(epoch + 1),
                "loss_train: {:.4f}".format(loss_train.data.item()),
                "acc_train: {:.4f}".format(acc_train.data.item()),
                "loss_val: {:.4f}".format(loss_val.data.item()),
                "acc_val: {:.4f}".format(acc_val.data.item()),
                "time: {:.4f}s".format(time.time() - t),
            )

            return loss_val.data.item()

        # Train model
        t_total = time.time()
        loss_values = []
        bad_counter = 0
        best = self.args.epochs + 1
        best_epoch = 0
        for epoch in range(self.args.epochs):
            loss_values.append(train(epoch))

            torch.save(model.state_dict(), "{}.pkl".format(epoch))
            if loss_values[-1] < best:
                best = loss_values[-1]
                best_epoch = epoch
                bad_counter = 0
            else:
                bad_counter += 1

            if bad_counter == self.args.patience:
                break

            files = glob.glob("*.pkl")
            for file in files:
                epoch_nb = int(file.split(".")[0])
                if epoch_nb < best_epoch:
                    os.remove(file)

        files = glob.glob("*.pkl")
        for file in files:
            epoch_nb = int(file.split(".")[0])
            if epoch_nb > best_epoch:
                os.remove(file)

        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

        # Restore best model
        print("Loading {}th epoch".format(best_epoch))
        model.load_state_dict(torch.load("{}.pkl".format(best_epoch)))

        self.model = model

        return model
def time_model(file):
    model.eval()
    n_warmup = 50
    n_sample = 50
    print("=== Running Warmup Passes")
    for i in range(0, n_warmup):
        output = model(features, adj)

    print("=== Collecting Runtime over ", str(n_sample), " Passes")
    tic = time.perf_counter()
    for i in range(0, n_sample):
        output = model(features, adj)
    toc = time.perf_counter()
    avg_runtime = float(toc - tic) / n_sample
    print("average runtime = ", avg_runtime)

    # write runtime to file
    f = open(file, "w")
    f.write(str(avg_runtime) + "\n")
    f.close()


if __name__ == "__main__":
    map_location = torch.device('cpu')
    model.load_state_dict(torch.load(args.pkl_file))

    if len(args.time_file) != 0:  # time and send time to file
        time_model(args.time_file)

    compute_test()
Esempio n. 5
0
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1

    if bad_counter == args.patience:
        break

    # files = glob.glob(f'./saved_models/{args.dataset}/*.pkl')
    # for file in files:
    #     epoch_nb = int(file.split('.')[0])
    #     if epoch_nb < best_epoch:
    #         os.remove(file)

# files = glob.glob(f'./saved_models/{args.dataset}/*.pkl')
# for file in files:
#     epoch_nb = int(file.split('.')[0])
#     if epoch_nb > best_epoch:
#         os.remove(file)

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Restore best model
print('Loading {}th epoch'.format(best_epoch))
model.load_state_dict(torch.load(f'./saved_models/{args.dataset}/best.pkl'))

# Testing
compute_test()
Esempio n. 6
0
    def train_pipeline(self, *args, custom_function=True, function=None):
        random.seed(self.args.seed)
        np.random.seed(self.args.seed)
        torch.manual_seed(self.args.seed)
        if self.args.cuda:
            torch.cuda.manual_seed(self.args.seed)

        # Load data
        adj, features, labels, idx_train, idx_val, idx_test = new_load_data(
            *args, custom_function=custom_function, function=function)

        # Model and optimizer
        if self.args.sparse:
            model = SpGAT(nfeat=features.shape[1],
                          nhid=self.args.hidden,
                          nclass=int(labels.max()) + 1,
                          dropout=self.args.dropout,
                          nheads=self.args.nb_heads,
                          alpha=self.args.alpha)
        else:
            model = GAT(nfeat=features.shape[1],
                        nhid=self.args.hidden,
                        nclass=int(labels.max()) + 1,
                        dropout=self.args.dropout,
                        nheads=self.args.nb_heads,
                        alpha=self.args.alpha)
        optimizer = optim.Adam(model.parameters(),
                               lr=self.args.lr,
                               weight_decay=self.args.weight_decay)

        if self.args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_val = idx_val.cuda()
            idx_test = idx_test.cuda()

        features, adj, labels = Variable(features), Variable(adj), Variable(
            labels)

        # TODO: Test if these lines could be written below line 41.
        self.adj = adj
        self.features = features
        self.labels = labels
        self.idx_train = idx_train
        self.idx_val = idx_val
        self.idx_test = idx_test

        def train(epoch):
            t = time.time()
            model.train()
            optimizer.zero_grad()
            output = model(features, adj)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            acc_train = accuracy(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            if not self.args.fastmode:
                # Evaluate validation set performance separately,
                # deactivates dropout during validation run.
                model.eval()
                output = model(features, adj)

            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = accuracy(output[idx_val], labels[idx_val])
            print('Epoch: {:04d}'.format(epoch + 1),
                  'loss_train: {:.4f}'.format(loss_train.data.item()),
                  'acc_train: {:.4f}'.format(acc_train.data.item()),
                  'loss_val: {:.4f}'.format(loss_val.data.item()),
                  'acc_val: {:.4f}'.format(acc_val.data.item()),
                  'time: {:.4f}s'.format(time.time() - t))

            return loss_val.data.item()

        # Train model
        t_total = time.time()
        loss_values = []
        bad_counter = 0
        best = self.args.epochs + 1
        best_epoch = 0
        for epoch in range(self.args.epochs):
            loss_values.append(train(epoch))

            torch.save(model.state_dict(), '{}.pkl'.format(epoch))
            if loss_values[-1] < best:
                best = loss_values[-1]
                best_epoch = epoch
                bad_counter = 0
            else:
                bad_counter += 1

            if bad_counter == self.args.patience:
                break

            files = glob.glob('*.pkl')
            for file in files:
                epoch_nb = int(file.split('.')[0])
                if epoch_nb < best_epoch:
                    os.remove(file)

        files = glob.glob('*.pkl')
        for file in files:
            epoch_nb = int(file.split('.')[0])
            if epoch_nb > best_epoch:
                os.remove(file)

        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

        # Restore best model
        print('Loading {}th epoch'.format(best_epoch))
        model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))

        self.model = model

        return model
Esempio n. 7
0
    def __init__(self,graph,sparse = False,epochs = 200,learning_rate = 0.005,
                 weight_decay = 5e-4,hidden = 8,nb_heads = 8,drop_out = 0.6,
                alpha = 0.2 ,patience = 100,train = 1500,val = 2000,test = 3100):
        self.graph = graph
        self.sparse  = sparse
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.hidden = hidden
        self.nb_heads  = nb_heads
        self.drop_out = drop_out
        self.alpha = alpha
        self.patience = patience
        self.train = train
        self.val = val
        self.test = test

        idx_train,idx_val , idx_test = self.load_data()

        random.seed(random_seed)
        np.random.seed(random_seed)
        torch.manual_seed(random_seed)

        if self.sparse:
            model = SpGAT(nfeat=self.features.shape[1],
                          nhid=self.hidden,
                          nclass=int(self.labels.max()) + 1,
                          dropout=self.drop_out,
                          nheads=self.nb_heads,
                          alpha=self.alpha)
        else:
            model = GAT(nfeat=self.features.shape[1],
                        nhid=self.hidden,
                        nclass=int(self.labels.max()) + 1,
                        dropout=self.drop_out,
                        nheads=self.nb_heads,
                        alpha=self.alpha)

        optimizer = optim.Adam(model.parameters(),
                               lr=self.learning_rate,
                               weight_decay=self.weight_decay)

        #利用GPU
        # device = torch.device("cuda:0")
        # torch.cuda.empty_cache()
        # model.to(device)
        # self.features = self.features.to(device)
        # self.adj = self.adj.to(device)
        # self.labels = self.labels.to(device)
        # idx_train = idx_train.to(device)
        # idx_val = idx_val.to(device)
        # idx_test = idx_test.to(device)

        features, adj, labels = Variable(self.features), Variable(self.adj), Variable(self.labels)

        t_total = time.time()
        loss_values = []
        bad_counter = 0
        best = self.epochs + 1
        best_epoch = 0
        for epoch in range(self.epochs):

            t = time.time()
            model.train()
            optimizer.zero_grad()
            output = model(features, adj)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            acc_train = accuracy(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            model.eval()
            output = model(features, adj)

            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = accuracy(output[idx_val], labels[idx_val])

            print('Epoch: {:04d}'.format(epoch + 1),
                  'loss_train: {:.4f}'.format(loss_train.data),
                  'acc_train: {:.4f}'.format(acc_train.data),
                  'loss_val: {:.4f}'.format(loss_val.data),
                  'acc_val: {:.4f}'.format(acc_val.data),
                  'time: {:.4f}s'.format(time.time() - t))
            loss_values.append(loss_val.data)
            torch.save(model.state_dict(), '{}.pkl'.format(epoch))
            if loss_values[-1] < best:
                best = loss_values[-1]
                best_epoch = epoch
                bad_counter = 0
            else:
                bad_counter += 1

            if bad_counter == self.patience:
                break

            files = glob.glob('*.pkl')
            for file in files:
                epoch_nb = int(file.split('.')[0])
                if epoch_nb < best_epoch:
                    os.remove(file)

        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
        print('Loading {}th epoch'.format(best_epoch))
        model.load_state_dict(torch.load('{}.pkl'.format(best_epoch)))

        model.eval()
        output = model(features, adj)
        loss_test = F.nll_loss(output[idx_test], labels[idx_test])
        acc_test = accuracy(output[idx_test], labels[idx_test])
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.data),
              "accuracy= {:.4f}".format(acc_test.data))