예제 #1
0
파일: pred.py 프로젝트: wxEdward/GIAAD
def predict(adj, features):
    #adj_mat = pickle.load(open('adj_matrix_formal_stage.pkl', 'rb'))
    #feats = np.load('feature_formal_stage.npy')
    #true_labels = np.load('train_labels_formal_stage.npy') - 2
    #true_labels[ np.where(true_labels<0)] = 0

    t1 = time.time()
    #adj_mat = pickle.load(open(sys.argv[1],'rb'))
    #feats = np.load(sys.argv[2])
    import os
    #os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    dev = torch.device('cpu')
    features = torch.FloatTensor(features).to(dev)
    features = features / (features.norm(dim=1)[:, None] + 1e-8)

    graph = DGLGraph(adj)
    graph = add_self_loop(graph)

    model_filename = 'dminer/def_pool2.pt'

    model = torch.load(model_filename, map_location=dev)

    model.eval()

    with torch.no_grad():
        logits = model(graph, features)
        #logits = logits[mask]
        _, labels = torch.max(logits, dim=1)

    labels = labels.cpu().detach().numpy()
    labels[np.where(labels == 0)] = -1
    labels = labels + 2

    return labels
예제 #2
0
def main(args):
    # load and preprocess dataset
    g, graph_labels = load_graphs(
        '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin')
    assert len(g) == 1
    g = g[0]
    data = g.ndata
    features = torch.FloatTensor(data['feat'])
    labels = torch.LongTensor(data['label'])
    if hasattr(torch, 'BoolTensor'):
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = data['test_mask'].bool()
    num_feats = features.shape[1]
    n_classes = 47
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # add self loop
    g = add_self_loop(g)
    # g.remove_edges_from(nx.selfloop_edges(g))
    # g = DGLGraph(g)
    # g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop,
                args.negative_slope, args.residual)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    start = time.time()
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
    print(f"Time Consuming {np.sum(dur)}, Overall time {time.time() - start}")
예제 #3
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi'])
    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    assert args.dataset == 'amazon2m'
    g, graph_labels = load_graphs(
        '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin')
    assert len(g) == 1
    g = g[0]
    data = g.ndata
    labels = torch.LongTensor(data['label'])
    if hasattr(torch, 'BoolTensor'):
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = data['test_mask'].bool()

    train_nid = np.nonzero(train_mask.cpu().numpy())[0].astype(np.int64)
    val_nid = np.nonzero(val_mask.cpu().numpy())[0].astype(np.int64)

    # Normalize features
    features = torch.FloatTensor(data['feat'])
    if args.normalize:
        train_feats = features[train_nid]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats)
        features = scaler.transform(features)
    features = torch.FloatTensor(features)

    in_feats = features.shape[1]
    n_classes = 47
    n_edges = g.number_of_edges()

    n_train_samples = train_mask.int().sum().item()
    n_val_samples = val_mask.int().sum().item()
    n_test_samples = test_mask.int().sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes,
           n_train_samples,
           n_val_samples,
           n_test_samples))
    # create GCN model
    if args.self_loop:
        print("adding self-loop edges")
        g = add_self_loop(g)
    # g = DGLGraph(g, readonly=True)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
        raise ValueError('no cuda')
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    print(torch.cuda.get_device_name(0))

    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    print('labels shape:', labels.shape)
    train_cluster_iterator = ClusterIter(
        args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp)
    val_cluster_iterator = ClusterIter(
        args.dataset, g, args.psize_val, 1, val_nid, use_pp=False)

    print("features shape, ", features.shape)
    model = GraphSAGE(in_feats,
                      args.n_hidden,
                      n_classes,
                      args.n_layers,
                      F.relu,
                      args.dropout,
                      args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    writer = SummaryWriter(log_dir)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
    print("current memory after model before training",
          torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(train_cluster_iterator):
            # sync with upper level training graph
            cluster.copy_from_parent()
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['labels']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch.
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
                      f"{len(train_cluster_iterator)}:training loss", loss.item())
                writer.add_scalar('train/loss', loss.item(),
                                  global_step=j + epoch * len(train_cluster_iterator))
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            total_f1_mic = []
            total_f1_mac = []
            model.eval()
            for j, cluster in enumerate(val_cluster_iterator):
                cluster.copy_from_parent()
                with torch.no_grad():
                    logits = model(cluster)
                    batch_labels = cluster.ndata['labels']
                    # batch_val_mask = cluster.ndata['val_mask']
                    val_f1_mic, val_f1_mac = calc_f1(batch_labels.cpu().numpy(),
                                                        logits.cpu().numpy(), multitask)
                total_f1_mic.append(val_f1_mic)
                total_f1_mac.append(val_f1_mac)

            val_f1_mic = np.mean(total_f1_mic)
            val_f1_mac = np.mean(total_f1_mac)

            print(
                "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(), os.path.join(
                    log_dir, 'best_model.pkl'))
            writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
            writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(torch.load(os.path.join(
            log_dir, 'best_model.pkl')))
예제 #4
0
def main(args):
    # load and preprocess dataset
    # data = load_dgl_data(args)
    dataset = args.dataset
    # prefix = '/mnt/yushi/'
    # prefix = 'graphzoom'
    dataset_dir = f'{args.prefix}/dataset/{dataset}'
    # data = load_data(dataset_dir, args.dataset)

    load_data_time = time.time()
    # if dataset in ['Amazon2M', 'reddit']:
    if dataset in ['Amazon2M']:
        g, _ = load_graphs(
            f'{args.prefix}/dataset/Amazon2M/Amazon2M_dglgraph.bin')
        g = g[0]
        data = g.ndata
        features = torch.FloatTensor(data['feat'])
        onehot_labels = F.one_hot(data['label']).numpy()
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = val_mask
        data = EasyDict({
            'graph': g,
            'labels': data['label'],
            'onehot_labels': onehot_labels,
            'features': data['feat'],
            'train_mask': train_mask,
            'val_mask': val_mask,
            'test_mask': test_mask,
            'num_labels': onehot_labels.shape[1],
            'coarse': False
        })
    else:
        data = load_dgl_data(args)
    original_adj, labels, train_ids, test_ids, train_labels, test_labels, feats = load_data(
        dataset_dir, args.dataset)
    labels = torch.LongTensor(labels)
    train_mask = _sample_mask(train_ids, labels.shape[0])
    onehot_labels = F.one_hot(labels).numpy()
    if dataset == 'reddit':
        g = data.graph
    else:
        g = DGLGraph(data.graph)
        val_ids = test_ids[1000:1500]
        test_ids = test_ids[:1000]
        test_mask = _sample_mask(test_ids, labels.shape[0])
        val_mask = _sample_mask(val_ids, labels.shape[0])
        data = EasyDict({
            'graph': data.graph,
            'labels': labels,
            'onehot_labels': onehot_labels,
            # 'features': feats,
            'features': data.features,
            'train_mask': train_mask,
            'val_mask': val_mask,
            'test_mask': test_mask,
            'num_labels': onehot_labels.shape[1],
            'coarse': False
        })
        # g = DGLGraph(data.graph)
    print(f'load data finished: {time.time() - load_data_time}')
    if args.coarse:
        # * load projection matrix
        levels = args.level
        reduce_results = f"graphzoom/reduction_results/{dataset}/fusion/"
        projections, coarse_adj = construct_proj_laplacian(
            original_adj, levels + 1, reduce_results)  # coarsen levels
        # *calculate coarse feature, labels
        # label_mask = np.expand_dims(data.train_mask, 1)
        # coarse_labels = projections[0] @ (onehot_labels * label_mask)
        print('creating coarse DGLGraph')
        start = time.process_time()
        # ! what will happen if g is assigned to other variables later
        multi_level_dglgraph = [g]
        for i in range(1, len(coarse_adj)):
            g = DGLGraph()
            g.from_scipy_sparse_matrix(coarse_adj[i])
            multi_level_dglgraph.append(g)
            data.features = projections[i - 1] @ data.features
        multi_level_dglgraph.reverse()
        projections.reverse()
        projections = projections[1:]
        for projection in range(len(projections)):
            coo = projections[projection].tocoo()
            values = coo.data
            indices = np.vstack((coo.row, coo.col))
            i = torch.LongTensor(indices)
            v = torch.FloatTensor(values)
            projections[projection] = torch.sparse.FloatTensor(
                i, v, torch.Size(coo.shape)).cuda()
        print(f'creating finished in {time.process_time() - start}')
        # * new train/test masks

        # *replace datao
    labels = torch.LongTensor(data.labels)
    loss_fcn = torch.nn.CrossEntropyLoss()
    features = torch.FloatTensor(data.features)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels

    cuda = True
    torch.cuda.set_device(args.gpu)
    features = features.cuda()
    labels = labels.cuda()
    train_mask = train_mask.cuda()
    val_mask = val_mask.cuda()
    test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    # add self loop
    if args.self_loop or args.arch == 'gat':
        for i in range(len(multi_level_dglgraph)):
            multi_level_dglgraph[i] = add_self_loop(multi_level_dglgraph[i])
        print('add self_loop')
    n_edges = multi_level_dglgraph[0].number_of_edges()
    print("""----Data statistics------'
      # Edges %d
      # Classes %d
      # Train samples %d
      # Val samples %d
      # Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # * create GCN model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = create_model(
        args.arch,
        multi_level_dglgraph,
        num_layers=args.level - 1,
        in_dim=in_feats,
        num_hidden=args.num_hidden,
        num_classes=n_classes,
        heads=heads,
        #  activation=F.elu,
        feat_drop=args.in_drop,
        attn_drop=args.attn_drop,
        negative_slope=args.negative_slope,
        residual=args.residual,
        log_softmax=False,
        projection_matrix=projections)

    if cuda:
        model.cuda()
    print(model)
    # loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    acc = 0
    start = time.time()
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits, h = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        # if not args.coarse:
        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))
    print(f'training time: {time.time() - start}')
    # if not args.coarse:
    acc = evaluate(model, features, labels, test_mask)
    # print(h.shape)
    # np.save(f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_mask',
    #         h.detach().cpu().numpy())
    # torch.save(model.state_dict(),
    #            f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_params.pth.tar',)
    print("Test accuracy {:.2%}".format(acc))
예제 #5
0
def main(args):
    # load and preprocess dataset
    path = './dataset/' + str(args.dataset) + '/'
    '''
    edges = np.loadtxt(path + 'edges.txt')
    edges = edges.astype(int)

    features = np.loadtxt(path + 'features.txt')

    train_mask = np.loadtxt(path + 'train_mask.txt')
    train_mask = train_mask.astype(int)

    labels = np.loadtxt(path + 'labels.txt')
    labels = labels.astype(int)
    '''
    edges = np.load(path + 'edges.npy')
    features = np.load(path + 'features.npy')
    train_mask = np.load(path + 'train_mask.npy')
    labels = np.load(path + 'labels.npy')

    num_edges = edges.shape[0]
    num_nodes = features.shape[0]
    num_feats = features.shape[1]
    n_classes = int(max(labels) - min(labels) + 1)

    assert train_mask.shape[0] == num_nodes

    print('dataset {}'.format(args.dataset))
    print('# of edges : {}'.format(num_edges))
    print('# of nodes : {}'.format(num_nodes))
    print('# of features : {}'.format(num_feats))

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)

    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(train_mask)

    else:
        train_mask = torch.ByteTensor(train_mask)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
    '''
    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()
    '''
    u = edges[:, 0]
    v = edges[:, 1]
    g = DGLGraph()
    g.add_nodes(num_nodes)
    g.add_edges(u, v)
    # add self loop
    if args.self_loop:
        g = transform.add_self_loop(g)

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    model = EglGCN(g, num_feats, args.num_hidden, n_classes, args.num_layers,
                   F.relu, args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    Used_memory = 0

    for epoch in range(args.num_epochs):
        model.train()
        torch.cuda.synchronize()
        t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        now_mem = torch.cuda.max_memory_allocated(0)
        Used_memory = max(now_mem, Used_memory)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        torch.cuda.synchronize()
        run_time_this_epoch = time.time() - t0

        if epoch >= 3:
            dur.append(run_time_this_epoch)

        train_acc = accuracy(logits[train_mask], labels[train_mask])
        print(
            'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb'
            .format(epoch, run_time_this_epoch, train_acc,
                    (now_mem * 1.0 / (1024**2))))

    Used_memory /= (1024**3)
    print('^^^{:6f}^^^{:6f}'.format(Used_memory, np.mean(dur)))
예제 #6
0
파일: train.py 프로젝트: ydwu4/dgl-hack
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    print(type(data))
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    print(type(g))

    if isinstance(g, nx.classes.digraph.DiGraph):
        print('g is DiGraph')
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        g.add_edges(g.nodes(), g.nodes())
    elif isinstance(g, DGLGraph):
        print('g is DGLGraph')
        g = transform.add_self_loop(g)

    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    #model = GAT(g,
    model = EglGAT(
        g,
        #model = FusedGAT(g,
        args.num_layers,
        num_feats,
        args.num_hidden,
        n_classes,
        heads,
        F.elu,
        args.in_drop,
        args.attn_drop,
        args.negative_slope,
        args.residual)
    print(model)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        print('epoch = ', epoch)
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        tf = time.time()
        torch.cuda.synchronize()
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        tf1 = time.time()

        optimizer.zero_grad()
        torch.cuda.synchronize()
        t1 = time.time()
        loss.backward()
        torch.cuda.synchronize()
        t2 = time.time()
        print('forward time', tf1 - tf, 's. backward time:', t2 - t1, 's')
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
예제 #7
0
def train(args):

    # load and preprocess dataset
    #data = load_data(args)
    #data = CoraFull()
    #data = Coauthor('cs')
    #FIRST, CHECK DATASET
    path = './dataset/' + str(args.dataset) + '/'
    '''
    edges = np.loadtxt(path + 'edges.txt')
    edges = edges.astype(int)

    features = np.loadtxt(path + 'features.txt')

    train_mask = np.loadtxt(path + 'train_mask.txt')
    train_mask = train_mask.astype(int)

    labels = np.loadtxt(path + 'labels.txt')
    labels = labels.astype(int)
    '''
    edges = np.load(path + 'edges.npy')
    features = np.load(path + 'features.npy')
    train_mask = np.load(path + 'train_mask.npy')
    labels = np.load(path + 'labels.npy')

    num_edges = edges.shape[0]
    num_nodes = features.shape[0]
    num_feats = features.shape[1]
    n_classes = max(labels) - min(labels) + 1

    assert train_mask.shape[0] == num_nodes

    print('dataset {}'.format(args.dataset))
    print('# of edges : {}'.format(num_edges))
    print('# of nodes : {}'.format(num_nodes))
    print('# of features : {}'.format(num_feats))

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)

    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(train_mask)

    else:
        train_mask = torch.ByteTensor(train_mask)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()

    u = edges[:, 0]
    v = edges[:, 1]

    #initialize a DGL graph
    g = DGLGraph()
    g.add_nodes(num_nodes)
    g.add_edges(u, v)

    # add self loop
    if isinstance(g, nx.classes.digraph.DiGraph):
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        g.add_edges(g.nodes(), g.nodes())
    elif isinstance(g, DGLGraph):
        g = transform.add_self_loop(g)

    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop,
                args.negative_slope, args.residual)
    if args.early_stop:
        stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    record_time = 0
    avg_run_time = 0
    Used_memory = 0

    for epoch in range(args.num_epochs):
        #print('epoch = ', epoch)
        #print('mem0 = {}'.format(mem0))
        torch.cuda.synchronize()
        tf = time.time()
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        now_mem = torch.cuda.max_memory_allocated(0)
        print('now_mem : ', now_mem)
        Used_memory = max(now_mem, Used_memory)
        tf1 = time.time()

        optimizer.zero_grad()
        torch.cuda.synchronize()
        t1 = time.time()
        loss.backward()
        torch.cuda.synchronize()
        optimizer.step()
        t2 = time.time()
        run_time_this_epoch = t2 - tf

        if epoch >= 3:
            dur.append(time.time() - t0)
            record_time += 1

            avg_run_time += run_time_this_epoch

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        #log for each step
        print(
            'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb'
            .format(epoch, run_time_this_epoch, train_acc,
                    (now_mem * 1.0 / (1024**2))))
        '''
        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if args.early_stop:
                if stopper.step(val_acc, model):   
                    break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc /{:.4f} | ETputs(KTEPS) {:.2f}".
              format(epoch, np.mean(dur), loss.item(), train_acc,
                     val_acc, n_edges / np.mean(dur) / 1000))
        
        '''

    if args.early_stop:
        model.load_state_dict(torch.load('es_checkpoint.pt'))

    #OUTPUT we need
    avg_run_time = avg_run_time * 1. / record_time
    Used_memory /= (1024**3)
    print('^^^{:6f}^^^{:6f}'.format(Used_memory, avg_run_time))
예제 #8
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    # import pdb; pdb.set_trace()
    adj_label = torch.FloatTensor(adj_label.toarray()).cuda()

    # pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()]).cuda()
    print(f'pos_weight: {pos_weight.item()}')
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)


    # model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, dropout=args.dropout) #DGL

    # === DGL ==== # 
    g = dgl.DGLGraph(adj, readonly=True)
    g = add_self_loop(g)
    model = GVAE(g, feat_dim, args.hidden1, args.hidden2, dropout=args.dropout).cuda()
    features = torch.Tensor(features).cuda()
    # === DGL ==== # 

    print(model)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in range(args.epochs):

        t = time.time()
        model.train()
        optimizer.zero_grad()
        # recovered, mu, logvar = model(features, adj_norm)
        recovered, mu, logvar = model(features)
        loss = loss_function(preds=recovered, labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()


        if epoch%10==0:
            hidden_emb = mu.cpu().data.numpy()
            roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)
            print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss),
                "val_ap=", "{:.5f}".format(ap_curr),
                "time=", "{:.5f}".format(time.time() - t)
                )

    print("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))
예제 #9
0
def main(args):
    # load and preprocess dataset
    # data = load_dgl_data(args)
    dataset = args.dataset
    # prefix = '/mnt/yushi/'
    # prefix = 'graphzoom'
    dataset_dir = f'{args.prefix}/dataset/{dataset}'
    # data = load_data(dataset_dir, args.dataset)

    load_data_time = time.time()
    # if dataset in ['Amazon2M', 'reddit']:
    if dataset in ['Amazon2M']:
        g, _ = load_graphs(
            f'{args.prefix}/dataset/Amazon2M/Amazon2M_dglgraph.bin')
        g = g[0]
        data = g.ndata
        features = torch.FloatTensor(data['feat'])
        onehot_labels = F.one_hot(data['label']).numpy()
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = val_mask
        data = EasyDict({
            'graph': g,
            'labels': data['label'],
            'onehot_labels': onehot_labels,
            'features': data['feat'],
            'train_mask': train_mask,
            'val_mask': val_mask,
            'test_mask': test_mask,
            'num_labels': onehot_labels.shape[1],
            'coarse': False
        })
    else:
        original_adj, labels, train_ids, test_ids, train_labels, test_labels, feats = load_data(
            dataset_dir, args.dataset)
        data = load_dgl_data(args)
        labels = torch.LongTensor(labels)
        train_mask = _sample_mask(train_ids, labels.shape[0])
        onehot_labels = F.one_hot(labels).numpy()
        if dataset == 'reddit':
            g = data.graph
        else:
            val_ids = test_ids[1000:1500]
            test_ids = test_ids[:1000]
            test_mask = _sample_mask(test_ids, labels.shape[0])
            val_mask = _sample_mask(val_ids, labels.shape[0])
            data = EasyDict({
                'graph': data.graph,
                'labels': labels,
                'onehot_labels': onehot_labels,
                'features': feats,
                # 'features': data.features,
                'train_mask': train_mask,
                'val_mask': val_mask,
                'test_mask': test_mask,
                'num_labels': onehot_labels.shape[1],
                'coarse': False
            })
            # g = DGLGraph(data.graph)
    print(f'load data finished: {time.time() - load_data_time}')
    if args.coarse:
        # * load projection matrix
        levels = args.level
        reduce_results = f"graphzoom/reduction_results/{dataset}/fusion/"
        projections, coarse_adj = construct_proj_laplacian(
            original_adj, levels, reduce_results)
        # *calculate coarse feature, labels
        label_mask = np.expand_dims(data.train_mask, 1)
        onehot_labels = onehot_labels * label_mask
        for i in range(levels):
            data.features = projections[i] @ data.features
            onehot_labels = projections[i] @ onehot_labels
        # coarse_labels = projections[0] @ onehot_labels
        # ! add train_mask
        rows_sum = onehot_labels.sum(axis=1)[:, np.newaxis]
        norm_coarse_labels = onehot_labels / rows_sum
        norm_label_entropy = Categorical(
            torch.Tensor(norm_coarse_labels)).entropy()
        label_entropy_mask = torch.BoolTensor(norm_label_entropy < 0.01)
        coarse_train_mask = torch.BoolTensor(onehot_labels.sum(axis=1))
        # coarse_train_mask = label_entropy_mask
        # ! entropy threshold

        # coarse_graph = nx.Graph(coarse_adj[1])
        print('creating coarse DGLGraph')
        start = time.process_time()
        g = DGLGraph()
        g.from_scipy_sparse_matrix(coarse_adj[1])
        print(f'creating finished in {time.process_time() - start}')
        # list(map(np.shape, [coarse_embed, coarse_labels]))
        # * new train/test masks
        coarsen_ratio = projections[0].shape[1] / projections[0].shape[0]
        # coarse_train_mask = _sample_mask(
        #     range(int(coarsen_ratio*len(data.train_mask.int().sum().item()))),
        #     norm_coarse_labels.shape[0])
        # coarse_train_mask = _sample_mask(
        #     range(norm_coarse_labels.shape[0]),
        # range(60),
        # norm_coarse_labels.shape[0])
        # coarse_test_mask = _sample_mask(
        #     range(100, 700), norm_coarse_labels.shape[0])
        # coarse_val_mask = _sample_mask(
        #     range(700, 1000), norm_coarse_labels.shape[0])

        # *replace data
        data = EasyDict({
            'graph': g,
            'labels': onehot_labels,
            #     'onehot_labels': onehot_labels,
            'features': data.features,
            'train_mask': coarse_train_mask,
            # 'val_mask': coarse_val_mask,
            # 'test_mask': coarse_test_mask,
            'num_classes': norm_coarse_labels.shape[1],
            'num_labels': onehot_labels.shape[1],
            'coarse': True
        })
    if args.coarse:
        labels = torch.FloatTensor(data.labels)
        loss_fcn = torch.nn.KLDivLoss(reduction='batchmean')
        print('training coarse')
    else:
        labels = torch.LongTensor(data.labels)
        loss_fcn = torch.nn.CrossEntropyLoss()
    features = torch.FloatTensor(data.features)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    # add self loop
    if args.self_loop or args.arch == 'gat':
        g = add_self_loop(data.graph)
        print('add self_loop')
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      # Edges %d
      # Classes %d
      # Train samples %d
      # Val samples %d
      # Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # * create GCN model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = create_model(
        args.arch,
        g,
        num_layers=args.num_layers,
        in_dim=in_feats,
        num_hidden=args.num_hidden,
        num_classes=n_classes,
        heads=heads,
        #  activation=F.elu,
        feat_drop=args.in_drop,
        attn_drop=args.attn_drop,
        negative_slope=args.negative_slope,
        residual=args.residual,
        log_softmax=args.coarse)

    if cuda:
        model.cuda()
    print(model)
    # loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    acc = 0
    start = time.time()
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits, h = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])  # ?

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        if not args.coarse:
            acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))
    print(f'training time: {time.time() - start}')
    if not args.coarse:
        acc = evaluate(model, features, labels, test_mask)
    print(h.shape)
    np.save(f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_mask',
            h.detach().cpu().numpy())
    torch.save(
        model.state_dict(),
        f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_params.pth.tar',
    )
    print("Test accuracy {:.2%}".format(acc))
예제 #10
0
def main(args):

    torch.manual_seed(1234)

    if args.dataset == 'cora' or args.dataset == 'citeseer' or args.dataset == 'pubmed':
        data = load_data(args)
        features = torch.FloatTensor(data.features)

        labels = torch.LongTensor(data.labels)
        in_feats = features.shape[1]
        g = data.graph
        if args.dataset == 'cora':
            g.remove_edges_from(nx.selfloop_edges(g))
            g.add_edges_from(zip(g.nodes(), g.nodes()))
        g = DGLGraph(g)
        attr_matrix = data.features
        labels = data.labels

    else:
        if args.dataset == 'physics':
            data = Coauthor('physics')
        if args.dataset == 'cs':
            data = Coauthor('cs')
        if args.dataset == 'computers':
            data = AmazonCoBuy('computers')
        if args.dataset == 'photo':
            data = AmazonCoBuy('photo')

        g = data
        g = data[0]
        attr_matrix = g.ndata['feat']
        labels = g.ndata['label']

        features = torch.FloatTensor(g.ndata['feat'])

    ### LCC of the graph
    n_components = 1
    sparse_graph = g.adjacency_matrix_scipy(return_edge_ids=False)
    _, component_indices = sp.csgraph.connected_components(sparse_graph)
    component_sizes = np.bincount(component_indices)
    components_to_keep = np.argsort(
        component_sizes
    )[::-1][:n_components]  # reverse order to sort descending
    nodes_to_keep = [
        idx for (idx, component) in enumerate(component_indices)
        if component in components_to_keep
    ]

    adj_matrix = sparse_graph[nodes_to_keep][:, nodes_to_keep]
    num_nodes = len(nodes_to_keep)
    g = adj_matrix
    g = DGLGraph(g)
    g = remove_self_loop(g)
    g = add_self_loop(g)
    g = DGLGraph(g)

    g.ndata['feat'] = attr_matrix[nodes_to_keep]
    features = torch.FloatTensor(g.ndata['feat'].float())
    if args.dataset == 'cora' or args.dataset == 'pubmed':
        features = features / (features.norm(dim=1) + 1e-8)[:, None]
    g.ndata['label'] = labels[nodes_to_keep]
    labels = torch.LongTensor(g.ndata['label'])

    in_feats = features.shape[1]

    unique_l = np.unique(labels, return_counts=False)
    n_classes = len(unique_l)
    n_nodes = g.number_of_nodes()
    n_edges = g.number_of_edges()

    print('Number of nodes', n_nodes, 'Number of edges', n_edges)

    enc = OneHotEncoder()
    enc.fit(labels.reshape(-1, 1))
    ylabels = enc.transform(labels.reshape(-1, 1)).toarray()

    for beta in [args.beta]:
        for K in [args.num_clusters]:
            for alpha in [args.alpha]:
                accs = []
                t_st = time.time()

                sets = "imbalanced"

                for k in range(2):  #number of differnet trainings
                    #print(k)

                    random_state = np.random.RandomState()
                    if sets == "imbalanced":
                        train_idx, val_idx, test_idx = get_train_val_test_split(
                            random_state,
                            ylabels,
                            train_examples_per_class=None,
                            val_examples_per_class=None,
                            test_examples_per_class=None,
                            train_size=20 * n_classes,
                            val_size=30 * n_classes,
                            test_size=None)
                    elif sets == "balanced":
                        train_idx, val_idx, test_idx = get_train_val_test_split(
                            random_state,
                            ylabels,
                            train_examples_per_class=20,
                            val_examples_per_class=30,
                            test_examples_per_class=None,
                            train_size=None,
                            val_size=None,
                            test_size=None)
                    else:
                        ("No such set configuration (imbalanced/balanced)")

                    n_nodes = len(nodes_to_keep)
                    train_mask = np.zeros(n_nodes)
                    train_mask[train_idx] = 1
                    val_mask = np.zeros(n_nodes)
                    val_mask[val_idx] = 1
                    test_mask = np.zeros(n_nodes)
                    test_mask[test_idx] = 1
                    train_mask = torch.BoolTensor(train_mask)
                    val_mask = torch.BoolTensor(val_mask)
                    test_mask = torch.BoolTensor(test_mask)
                    """
                    Planetoid Split for CORA, CiteSeer, PubMed
                    train_mask = torch.BoolTensor(data.train_mask)
                    val_mask = torch.BoolTensor(data.val_mask)
                    test_mask = torch.BoolTensor(data.test_mask)
                    train_mask2 = torch.BoolTensor(data.train_mask)
                    val_mask2 = torch.BoolTensor(data.val_mask)
                    test_mask2 = torch.BoolTensor(data.test_mask)
                    """

                    if args.gpu < 0:
                        cuda = False

                    else:
                        cuda = True
                        torch.cuda.set_device(args.gpu)
                        features = features.cuda()
                        labels = labels.cuda()
                        train_mask = train_mask.cuda()
                        val_mask = val_mask.cuda()
                        test_mask = test_mask.cuda()

                    gic = GIC(g, in_feats, args.n_hidden, args.n_layers,
                              nn.PReLU(args.n_hidden), args.dropout, K, beta,
                              alpha)

                    if cuda:
                        gic.cuda()

                    gic_optimizer = torch.optim.Adam(
                        gic.parameters(),
                        lr=args.gic_lr,
                        weight_decay=args.weight_decay)

                    # train GIC
                    cnt_wait = 0
                    best = 1e9
                    best_t = 0
                    dur = []

                    for epoch in range(args.n_gic_epochs):
                        gic.train()
                        if epoch >= 3:
                            t0 = time.time()

                        gic_optimizer.zero_grad()
                        loss = gic(features)
                        #print(loss)
                        loss.backward()
                        gic_optimizer.step()

                        if loss < best:
                            best = loss
                            best_t = epoch
                            cnt_wait = 0
                            torch.save(gic.state_dict(), 'best_gic.pkl')
                        else:
                            cnt_wait += 1

                        if cnt_wait == args.patience:
                            #print('Early stopping!')
                            break

                        if epoch >= 3:
                            dur.append(time.time() - t0)

                        #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | "
                        #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                        #n_edges / np.mean(dur) / 1000))

                    # train classifier
                    #print('Loading {}th epoch'.format(best_t))
                    gic.load_state_dict(torch.load('best_gic.pkl'))
                    embeds = gic.encoder(features, corrupt=False)
                    embeds = embeds / (embeds + 1e-8).norm(dim=1)[:, None]
                    embeds = embeds.detach()

                    # create classifier model
                    classifier = Classifier(args.n_hidden, n_classes)
                    if cuda:
                        classifier.cuda()

                    classifier_optimizer = torch.optim.Adam(
                        classifier.parameters(),
                        lr=args.classifier_lr,
                        weight_decay=args.weight_decay)

                    dur = []
                    best_a = 0
                    cnt_wait = 0
                    for epoch in range(args.n_classifier_epochs):
                        classifier.train()
                        if epoch >= 3:
                            t0 = time.time()

                        classifier_optimizer.zero_grad()
                        preds = classifier(embeds)
                        loss = F.nll_loss(preds[train_mask],
                                          labels[train_mask])
                        loss.backward()
                        classifier_optimizer.step()

                        if epoch >= 3:
                            dur.append(time.time() - t0)

                        acc = evaluate(
                            classifier, embeds, labels, val_mask
                        )  #+ evaluate(classifier, embeds, labels, train_mask)

                        if acc > best_a and epoch > 100:
                            best_a = acc
                            best_t = epoch

                            torch.save(classifier.state_dict(),
                                       'best_class.pkl')

                        #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
                        #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                        #acc, n_edges / np.mean(dur) / 1000))

                    acc = evaluate(classifier, embeds, labels, test_mask)
                    accs.append(acc)

                print('=================== ', ' alpha', alpha, ' beta ', beta,
                      'K', K)
                print(args.dataset, ' Acc (mean)', mean(accs), ' (std)',
                      stdev(accs))
                print('=================== time', int(
                    (time.time() - t_st) / 60))