Example #1
0
def get_data(args, device):
    # load and preprocess dataset
    data = load_data(args)
    g = data.g
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    labels = g.ndata['label']
    train_nid = np.nonzero(train_mask.data.numpy())[0].astype(np.int64)

    # Normalize features
    if args.normalize:
        feats = g.ndata['feat']
        train_feats = feats[train_mask]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats.data.numpy())
        features = scaler.transform(feats.data.numpy())
        g.ndata['feat'] = torch.FloatTensor(features)

    in_feats = g.ndata['feat'].shape[1]
    n_classes = data.num_classes
    n_edges = g.number_of_edges()
    g = g.long()

    # create the cluster gcn iterator
    cluster_iterator = ClusterIter(args.dataset,
                                   g,
                                   args.psize,
                                   args.batch_size,
                                   train_nid,
                                   use_pp=args.use_pp)

    # set device for dataset tensors
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)
    g = g.int().to(device)
    return (g, cluster_iterator, train_mask, val_mask, test_mask, labels,
            train_nid, in_feats, n_classes, n_edges)
Example #2
0
    graph.ndata['labels'] = labels
    mask = th.zeros(num_nodes, dtype=th.bool)
    mask[train_idx] = True
    graph.ndata['train_mask'] = mask
    mask = th.zeros(num_nodes, dtype=th.bool)
    mask[val_idx] = True
    graph.ndata['valid_mask'] = mask
    mask = th.zeros(num_nodes, dtype=th.bool)
    mask[test_idx] = True
    graph.ndata['test_mask'] = mask

    graph.in_degrees(0)
    graph.out_degrees(0)
    graph.find_edges(0)

    cluster_iter_data = ClusterIter('ogbn-products', graph,
                                    args.num_partitions, args.batch_size)
    cluster_iterator = DataLoader(cluster_iter_data,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=0,
                                  collate_fn=partial(subgraph_collate_fn,
                                                     graph))

    in_feats = graph.ndata['feat'].shape[1]
    n_classes = (labels.max() + 1).item()
    # Pack data
    data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator

    # Run 10 times
    test_accs = []
Example #3
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi', 'amazon', 'amazon-0.1',
                          'amazon-0.3', 'amazon2M', 'amazon2M-47'])

    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    data = load_data(args)

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

    # Normalize features
    if args.normalize:
        train_feats = data.features[train_nid]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats)
        features = scaler.transform(data.features)
    else:
        features = data.features

    features = torch.FloatTensor(features)
    if not multitask:
        labels = torch.LongTensor(data.labels)
    else:
        labels = torch.FloatTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask).type(torch.bool)
    val_mask = torch.ByteTensor(data.val_mask).type(torch.bool)
    test_mask = torch.ByteTensor(data.test_mask).type(torch.bool)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().item()
    n_val_samples = val_mask.sum().item()
    n_test_samples = test_mask.sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
            (n_edges, n_classes,
            n_train_samples,
            n_val_samples,
            n_test_samples))
    # create GCN model
    g = data.graph
    if args.self_loop and not args.dataset.startswith('reddit'):
        g.remove_edges_from(g.selfloop_edges())
        g.add_edges_from(zip(g.nodes(), g.nodes()))
        print("adding self-loop edges")
    g = DGLGraph(g, readonly=True)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    print(torch.cuda.get_device_name(0))

    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    print('labels shape:', labels.shape)

    cluster_iterator = ClusterIter(
        args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp)

    print("features shape, ", features.shape)

    model_sel = {'GCN': GCNCluster, 'graphsage': GraphSAGE}
    model_class = model_sel[args.model_type]
    print('using model:', model_class)

    model = model_class(in_feats,
                        args.n_hidden,
                        n_classes,
                        args.n_layers,
                        F.relu,
                        args.dropout, args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    writer = SummaryWriter(log_dir)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
    print("current memory after model before training",
          torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(cluster_iterator):
            # sync with upper level training graph
            cluster.copy_from_parent()
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['labels']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch. 
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/{len(cluster_iterator)}:training loss", loss.item())
                writer.add_scalar('train/loss', loss.item(),
                                  global_step=j + epoch * len(cluster_iterator))
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            val_f1_mic, val_f1_mac = evaluate(
                model, g, labels, val_mask, multitask)
            print(
                "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(), os.path.join(
                    log_dir, 'best_model.pkl'))
            writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
            writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(torch.load(os.path.join(
            log_dir, 'best_model.pkl')))
    test_f1_mic, test_f1_mac = evaluate(
        model, g, labels, test_mask, multitask)
    print(
        "Test F1-mic{:.4f}, Test F1-mac{:.4f}". format(test_f1_mic, test_f1_mac))
    writer.add_scalar('test/f1-mic', test_f1_mic)
    writer.add_scalar('test/f1-mac', test_f1_mac)
Example #4
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi'])
    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    assert args.dataset == 'amazon2m'
    g, graph_labels = load_graphs(
        '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin')
    assert len(g) == 1
    g = g[0]
    data = g.ndata
    labels = torch.LongTensor(data['label'])
    if hasattr(torch, 'BoolTensor'):
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = data['test_mask'].bool()

    train_nid = np.nonzero(train_mask.cpu().numpy())[0].astype(np.int64)
    val_nid = np.nonzero(val_mask.cpu().numpy())[0].astype(np.int64)

    # Normalize features
    features = torch.FloatTensor(data['feat'])
    if args.normalize:
        train_feats = features[train_nid]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats)
        features = scaler.transform(features)
    features = torch.FloatTensor(features)

    in_feats = features.shape[1]
    n_classes = 47
    n_edges = g.number_of_edges()

    n_train_samples = train_mask.int().sum().item()
    n_val_samples = val_mask.int().sum().item()
    n_test_samples = test_mask.int().sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes,
           n_train_samples,
           n_val_samples,
           n_test_samples))
    # create GCN model
    if args.self_loop:
        print("adding self-loop edges")
        g = add_self_loop(g)
    # g = DGLGraph(g, readonly=True)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
        raise ValueError('no cuda')
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    print(torch.cuda.get_device_name(0))

    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    print('labels shape:', labels.shape)
    train_cluster_iterator = ClusterIter(
        args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp)
    val_cluster_iterator = ClusterIter(
        args.dataset, g, args.psize_val, 1, val_nid, use_pp=False)

    print("features shape, ", features.shape)
    model = GraphSAGE(in_feats,
                      args.n_hidden,
                      n_classes,
                      args.n_layers,
                      F.relu,
                      args.dropout,
                      args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    writer = SummaryWriter(log_dir)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
    print("current memory after model before training",
          torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(train_cluster_iterator):
            # sync with upper level training graph
            cluster.copy_from_parent()
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['labels']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch.
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
                      f"{len(train_cluster_iterator)}:training loss", loss.item())
                writer.add_scalar('train/loss', loss.item(),
                                  global_step=j + epoch * len(train_cluster_iterator))
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            total_f1_mic = []
            total_f1_mac = []
            model.eval()
            for j, cluster in enumerate(val_cluster_iterator):
                cluster.copy_from_parent()
                with torch.no_grad():
                    logits = model(cluster)
                    batch_labels = cluster.ndata['labels']
                    # batch_val_mask = cluster.ndata['val_mask']
                    val_f1_mic, val_f1_mac = calc_f1(batch_labels.cpu().numpy(),
                                                        logits.cpu().numpy(), multitask)
                total_f1_mic.append(val_f1_mic)
                total_f1_mac.append(val_f1_mac)

            val_f1_mic = np.mean(total_f1_mic)
            val_f1_mac = np.mean(total_f1_mac)

            print(
                "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(), os.path.join(
                    log_dir, 'best_model.pkl'))
            writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
            writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(torch.load(os.path.join(
            log_dir, 'best_model.pkl')))
Example #5
0
    mask = th.zeros(num_nodes, dtype=th.bool)
    mask[train_idx] = True
    graph.ndata['train_mask'] = mask
    mask = th.zeros(num_nodes, dtype=th.bool)
    mask[val_idx] = True
    graph.ndata['valid_mask'] = mask
    mask = th.zeros(num_nodes, dtype=th.bool)
    mask[test_idx] = True
    graph.ndata['test_mask'] = mask

    graph.in_degree(0)
    graph.out_degree(0)
    graph.find_edges(0)

    cluster_iter_data = ClusterIter('ogbn-products', graph,
                                    args.num_partitions, args.batch_size,
                                    th.cat([train_idx, val_idx, test_idx]))
    idx = th.arange(args.num_partitions // args.batch_size)
    cluster_iterator = DataLoader(cluster_iter_data,
                                  batch_size=32,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=4,
                                  collate_fn=partial(subgraph_collate_fn,
                                                     graph))

    in_feats = graph.ndata['feat'].shape[1]
    print(in_feats)
    n_classes = (labels.max() + 1).item()
    # Pack data
    data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator
Example #6
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi'])
    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    data = load_data(args)
    g = data.g
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    labels = g.ndata['label']

    train_nid = np.nonzero(train_mask.data.numpy())[0].astype(np.int64)

    # Normalize features
    if args.normalize:
        feats = g.ndata['feat']
        train_feats = feats[train_mask]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats.data.numpy())
        features = scaler.transform(feats.data.numpy())
        g.ndata['feat'] = torch.FloatTensor(features)

    in_feats = g.ndata['feat'].shape[1]
    n_classes = data.num_classes
    n_edges = g.number_of_edges()

    n_train_samples = train_mask.int().sum().item()
    n_val_samples = val_mask.int().sum().item()
    n_test_samples = test_mask.int().sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))
    # create GCN model
    if args.self_loop and not args.dataset.startswith('reddit'):
        g = dgl.remove_self_loop(g)
        g = dgl.add_self_loop(g)
        print("adding self-loop edges")
    # metis only support int64 graph
    g = g.long()

    cluster_iterator = ClusterIter(args.dataset,
                                   g,
                                   args.psize,
                                   args.batch_size,
                                   train_nid,
                                   use_pp=args.use_pp)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        g = g.int().to(args.gpu)

    print('labels shape:', g.ndata['label'].shape)
    print("features shape, ", g.ndata['feat'].shape)

    model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers,
                      F.relu, args.dropout, args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
        print(
            "current memory after model before training",
            torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(cluster_iterator):
            # sync with upper level training graph
            if cuda:
                cluster = cluster.to(torch.cuda.current_device())
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['label']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch.
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(
                    f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
                    f"{len(cluster_iterator)}:training loss", loss.item())
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            val_f1_mic, val_f1_mac = evaluate(model, g, labels, val_mask,
                                              multitask)
            print("Val F1-mic{:.4f}, Val F1-mac{:.4f}".format(
                val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(),
                           os.path.join(log_dir, 'best_model.pkl'))

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(
            torch.load(os.path.join(log_dir, 'best_model.pkl')))
    test_f1_mic, test_f1_mac = evaluate(model, g, labels, test_mask, multitask)
    print("Test F1-mic{:.4f}, Test F1-mac{:.4f}".format(
        test_f1_mic, test_f1_mac))
Example #7
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # load and preprocess dataset
    data = load_data(args)
    g = data.g
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    labels = g.ndata['label']
    train_nid = np.nonzero(train_mask.data.numpy())[0].astype(np.int64)

    # Normalize features
    if args.normalize:
        feats = g.ndata['feat']
        train_feats = feats[train_mask]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats.data.numpy())
        features = scaler.transform(feats.data.numpy())
        g.ndata['feat'] = torch.FloatTensor(features)

    in_feats = g.ndata['feat'].shape[1]
    n_classes = data.num_classes

    g = g.long()

    # create GCN model
    cluster_iterator = ClusterIter(args.dataset,
                                   g,
                                   args.psize,
                                   args.batch_size,
                                   train_nid,
                                   use_pp=args.use_pp)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        g = g.int().to(args.gpu)

    print('labels shape:', g.ndata['label'].shape)
    print("features shape, ", g.ndata['feat'].shape)
    if args.model_type == 'sage':
        model = GCN(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                    args.dropout, args.use_layernorm, False, False, 1, True)
    else:
        raise NotImplementedError(
            f'{args.model_type} is not a supported model type')

    if cuda:
        model.cuda()

    # use optimizer
    loss_f = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()

    total_time = 0.
    val_accs = []
    test_accs = []
    for epoch in range(args.n_epochs):
        print(f'Running epoch {epoch} / {args.n_epochs}', flush=True)
        start_time = time.time()
        for j, cluster in enumerate(cluster_iterator):
            # sync with upper level training graph
            if cuda:
                cluster = cluster.to(torch.cuda.current_device())
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['label']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        end_time = time.time()
        elapsed_time = end_time - start_time
        total_time += elapsed_time

        # evaluate
        # do NOT include evaluation within the timing metrics
        if args.eval_cpu:
            model.to('cpu')
            if args.use_f1:
                val_acc = evaluate(model, g.cpu(), labels.cpu(),
                                   val_mask.cpu(), 'f1')
                test_acc = evaluate(model, g.cpu(), labels.cpu(),
                                    test_mask.cpu(), 'f1')
            else:
                val_acc = evaluate(model, g.cpu(), labels.cpu(),
                                   val_mask.cpu())
                test_acc = evaluate(model, g.cpu(), labels.cpu(),
                                    test_mask.cpu())
            model.cuda()
        else:
            if args.use_f1:
                val_acc = evaluate(model, g, labels, val_mask, 'f1')
                test_acc = evaluate(model, g, labels, test_mask, 'f1')
            else:
                val_acc = evaluate(model, g, labels, val_mask)
                test_acc = evaluate(model, g, labels, test_mask)
        val_accs.append(val_acc)
        test_accs.append(test_acc)
        print(f'Val acc {val_acc}', flush=True)

    print(f'Training Time: {total_time:.4f}', flush=True)
    print(f'Last Val: {val_accs[-1]:.4f}', flush=True)
    print(f'Best Val: {max(val_accs):.4f}', flush=True)
    print(f'Last Test: {test_accs[-1]:.4f}', flush=True)
    print(f'Best Test: {max(test_accs):.4f}', flush=True)

    plt.plot(val_accs)
    title = args.fig_name
    plt.title(title)
    os.makedirs(args.fig_dir, exist_ok=True)
    plt.savefig(os.path.join(args.fig_dir, title + '.png'))