def predict(adj, features): #adj_mat = pickle.load(open('adj_matrix_formal_stage.pkl', 'rb')) #feats = np.load('feature_formal_stage.npy') #true_labels = np.load('train_labels_formal_stage.npy') - 2 #true_labels[ np.where(true_labels<0)] = 0 t1 = time.time() #adj_mat = pickle.load(open(sys.argv[1],'rb')) #feats = np.load(sys.argv[2]) import os #os.environ["CUDA_VISIBLE_DEVICES"] = '0' dev = torch.device('cpu') features = torch.FloatTensor(features).to(dev) features = features / (features.norm(dim=1)[:, None] + 1e-8) graph = DGLGraph(adj) graph = add_self_loop(graph) model_filename = 'dminer/def_pool2.pt' model = torch.load(model_filename, map_location=dev) model.eval() with torch.no_grad(): logits = model(graph, features) #logits = logits[mask] _, labels = torch.max(logits, dim=1) labels = labels.cpu().detach().numpy() labels[np.where(labels == 0)] = -1 labels = labels + 2 return labels
def main(args): # load and preprocess dataset g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata features = torch.FloatTensor(data['feat']) labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() num_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # add self loop g = add_self_loop(g) # g.remove_edges_from(nx.selfloop_edges(g)) # g = DGLGraph(g) # g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] start = time.time() for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) print(f"Time Consuming {np.sum(dur)}, Overall time {time.time() - start}")
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset assert args.dataset == 'amazon2m' g, graph_labels = load_graphs( '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin') assert len(g) == 1 g = g[0] data = g.ndata labels = torch.LongTensor(data['label']) if hasattr(torch, 'BoolTensor'): train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = data['test_mask'].bool() train_nid = np.nonzero(train_mask.cpu().numpy())[0].astype(np.int64) val_nid = np.nonzero(val_mask.cpu().numpy())[0].astype(np.int64) # Normalize features features = torch.FloatTensor(data['feat']) if args.normalize: train_feats = features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(features) features = torch.FloatTensor(features) in_feats = features.shape[1] n_classes = 47 n_edges = g.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model if args.self_loop: print("adding self-loop edges") g = add_self_loop(g) # g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False raise ValueError('no cuda') else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) train_cluster_iterator = ClusterIter( args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) val_cluster_iterator = ClusterIter( args.dataset, g, args.psize_val, 1, val_nid, use_pp=False) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(train_cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(train_cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(train_cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: total_f1_mic = [] total_f1_mac = [] model.eval() for j, cluster in enumerate(val_cluster_iterator): cluster.copy_from_parent() with torch.no_grad(): logits = model(cluster) batch_labels = cluster.ndata['labels'] # batch_val_mask = cluster.ndata['val_mask'] val_f1_mic, val_f1_mac = calc_f1(batch_labels.cpu().numpy(), logits.cpu().numpy(), multitask) total_f1_mic.append(val_f1_mic) total_f1_mac.append(val_f1_mac) val_f1_mic = np.mean(total_f1_mic) val_f1_mac = np.mean(total_f1_mac) print( "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join( log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict(torch.load(os.path.join( log_dir, 'best_model.pkl')))
def main(args): # load and preprocess dataset # data = load_dgl_data(args) dataset = args.dataset # prefix = '/mnt/yushi/' # prefix = 'graphzoom' dataset_dir = f'{args.prefix}/dataset/{dataset}' # data = load_data(dataset_dir, args.dataset) load_data_time = time.time() # if dataset in ['Amazon2M', 'reddit']: if dataset in ['Amazon2M']: g, _ = load_graphs( f'{args.prefix}/dataset/Amazon2M/Amazon2M_dglgraph.bin') g = g[0] data = g.ndata features = torch.FloatTensor(data['feat']) onehot_labels = F.one_hot(data['label']).numpy() train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = val_mask data = EasyDict({ 'graph': g, 'labels': data['label'], 'onehot_labels': onehot_labels, 'features': data['feat'], 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'num_labels': onehot_labels.shape[1], 'coarse': False }) else: data = load_dgl_data(args) original_adj, labels, train_ids, test_ids, train_labels, test_labels, feats = load_data( dataset_dir, args.dataset) labels = torch.LongTensor(labels) train_mask = _sample_mask(train_ids, labels.shape[0]) onehot_labels = F.one_hot(labels).numpy() if dataset == 'reddit': g = data.graph else: g = DGLGraph(data.graph) val_ids = test_ids[1000:1500] test_ids = test_ids[:1000] test_mask = _sample_mask(test_ids, labels.shape[0]) val_mask = _sample_mask(val_ids, labels.shape[0]) data = EasyDict({ 'graph': data.graph, 'labels': labels, 'onehot_labels': onehot_labels, # 'features': feats, 'features': data.features, 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'num_labels': onehot_labels.shape[1], 'coarse': False }) # g = DGLGraph(data.graph) print(f'load data finished: {time.time() - load_data_time}') if args.coarse: # * load projection matrix levels = args.level reduce_results = f"graphzoom/reduction_results/{dataset}/fusion/" projections, coarse_adj = construct_proj_laplacian( original_adj, levels + 1, reduce_results) # coarsen levels # *calculate coarse feature, labels # label_mask = np.expand_dims(data.train_mask, 1) # coarse_labels = projections[0] @ (onehot_labels * label_mask) print('creating coarse DGLGraph') start = time.process_time() # ! what will happen if g is assigned to other variables later multi_level_dglgraph = [g] for i in range(1, len(coarse_adj)): g = DGLGraph() g.from_scipy_sparse_matrix(coarse_adj[i]) multi_level_dglgraph.append(g) data.features = projections[i - 1] @ data.features multi_level_dglgraph.reverse() projections.reverse() projections = projections[1:] for projection in range(len(projections)): coo = projections[projection].tocoo() values = coo.data indices = np.vstack((coo.row, coo.col)) i = torch.LongTensor(indices) v = torch.FloatTensor(values) projections[projection] = torch.sparse.FloatTensor( i, v, torch.Size(coo.shape)).cuda() print(f'creating finished in {time.process_time() - start}') # * new train/test masks # *replace datao labels = torch.LongTensor(data.labels) loss_fcn = torch.nn.CrossEntropyLoss() features = torch.FloatTensor(data.features) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor # add self loop if args.self_loop or args.arch == 'gat': for i in range(len(multi_level_dglgraph)): multi_level_dglgraph[i] = add_self_loop(multi_level_dglgraph[i]) print('add self_loop') n_edges = multi_level_dglgraph[0].number_of_edges() print("""----Data statistics------' # Edges %d # Classes %d # Train samples %d # Val samples %d # Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # * create GCN model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = create_model( args.arch, multi_level_dglgraph, num_layers=args.level - 1, in_dim=in_feats, num_hidden=args.num_hidden, num_classes=n_classes, heads=heads, # activation=F.elu, feat_drop=args.in_drop, attn_drop=args.attn_drop, negative_slope=args.negative_slope, residual=args.residual, log_softmax=False, projection_matrix=projections) if cuda: model.cuda() print(model) # loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] acc = 0 start = time.time() for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits, h = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) # if not args.coarse: acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print(f'training time: {time.time() - start}') # if not args.coarse: acc = evaluate(model, features, labels, test_mask) # print(h.shape) # np.save(f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_mask', # h.detach().cpu().numpy()) # torch.save(model.state_dict(), # f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_params.pth.tar',) print("Test accuracy {:.2%}".format(acc))
def main(args): # load and preprocess dataset path = './dataset/' + str(args.dataset) + '/' ''' edges = np.loadtxt(path + 'edges.txt') edges = edges.astype(int) features = np.loadtxt(path + 'features.txt') train_mask = np.loadtxt(path + 'train_mask.txt') train_mask = train_mask.astype(int) labels = np.loadtxt(path + 'labels.txt') labels = labels.astype(int) ''' edges = np.load(path + 'edges.npy') features = np.load(path + 'features.npy') train_mask = np.load(path + 'train_mask.npy') labels = np.load(path + 'labels.npy') num_edges = edges.shape[0] num_nodes = features.shape[0] num_feats = features.shape[1] n_classes = int(max(labels) - min(labels) + 1) assert train_mask.shape[0] == num_nodes print('dataset {}'.format(args.dataset)) print('# of edges : {}'.format(num_edges)) print('# of nodes : {}'.format(num_nodes)) print('# of features : {}'.format(num_feats)) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(train_mask) else: train_mask = torch.ByteTensor(train_mask) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() ''' # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() ''' u = edges[:, 0] v = edges[:, 1] g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(u, v) # add self loop if args.self_loop: g = transform.add_self_loop(g) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) model = EglGCN(g, num_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] Used_memory = 0 for epoch in range(args.num_epochs): model.train() torch.cuda.synchronize() t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) now_mem = torch.cuda.max_memory_allocated(0) Used_memory = max(now_mem, Used_memory) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() run_time_this_epoch = time.time() - t0 if epoch >= 3: dur.append(run_time_this_epoch) train_acc = accuracy(logits[train_mask], labels[train_mask]) print( 'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb' .format(epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)))) Used_memory /= (1024**3) print('^^^{:6f}^^^{:6f}'.format(Used_memory, np.mean(dur)))
def main(args): # load and preprocess dataset data = load_data(args) print(type(data)) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop print(type(g)) if isinstance(g, nx.classes.digraph.DiGraph): print('g is DiGraph') g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) elif isinstance(g, DGLGraph): print('g is DGLGraph') g = transform.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] #model = GAT(g, model = EglGAT( g, #model = FusedGAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): print('epoch = ', epoch) model.train() if epoch >= 3: t0 = time.time() # forward tf = time.time() torch.cuda.synchronize() logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) tf1 = time.time() optimizer.zero_grad() torch.cuda.synchronize() t1 = time.time() loss.backward() torch.cuda.synchronize() t2 = time.time() print('forward time', tf1 - tf, 's. backward time:', t2 - t1, 's') optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def train(args): # load and preprocess dataset #data = load_data(args) #data = CoraFull() #data = Coauthor('cs') #FIRST, CHECK DATASET path = './dataset/' + str(args.dataset) + '/' ''' edges = np.loadtxt(path + 'edges.txt') edges = edges.astype(int) features = np.loadtxt(path + 'features.txt') train_mask = np.loadtxt(path + 'train_mask.txt') train_mask = train_mask.astype(int) labels = np.loadtxt(path + 'labels.txt') labels = labels.astype(int) ''' edges = np.load(path + 'edges.npy') features = np.load(path + 'features.npy') train_mask = np.load(path + 'train_mask.npy') labels = np.load(path + 'labels.npy') num_edges = edges.shape[0] num_nodes = features.shape[0] num_feats = features.shape[1] n_classes = max(labels) - min(labels) + 1 assert train_mask.shape[0] == num_nodes print('dataset {}'.format(args.dataset)) print('# of edges : {}'.format(num_edges)) print('# of nodes : {}'.format(num_nodes)) print('# of features : {}'.format(num_feats)) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(train_mask) else: train_mask = torch.ByteTensor(train_mask) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() u = edges[:, 0] v = edges[:, 1] #initialize a DGL graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(u, v) # add self loop if isinstance(g, nx.classes.digraph.DiGraph): g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) elif isinstance(g, DGLGraph): g = transform.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] record_time = 0 avg_run_time = 0 Used_memory = 0 for epoch in range(args.num_epochs): #print('epoch = ', epoch) #print('mem0 = {}'.format(mem0)) torch.cuda.synchronize() tf = time.time() model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) now_mem = torch.cuda.max_memory_allocated(0) print('now_mem : ', now_mem) Used_memory = max(now_mem, Used_memory) tf1 = time.time() optimizer.zero_grad() torch.cuda.synchronize() t1 = time.time() loss.backward() torch.cuda.synchronize() optimizer.step() t2 = time.time() run_time_this_epoch = t2 - tf if epoch >= 3: dur.append(time.time() - t0) record_time += 1 avg_run_time += run_time_this_epoch train_acc = accuracy(logits[train_mask], labels[train_mask]) #log for each step print( 'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb' .format(epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)))) ''' if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc /{:.4f} | ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) ''' if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) #OUTPUT we need avg_run_time = avg_run_time * 1. / record_time Used_memory /= (1024**3) print('^^^{:6f}^^^{:6f}'.format(Used_memory, avg_run_time))
def gae_for(args): print("Using {} dataset".format(args.dataset_str)) adj, features = load_data(args.dataset_str) n_nodes, feat_dim = features.shape # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) adj_label = adj_train + sp.eye(adj_train.shape[0]) # adj_label = sparse_to_tuple(adj_label) # import pdb; pdb.set_trace() adj_label = torch.FloatTensor(adj_label.toarray()).cuda() # pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()]).cuda() print(f'pos_weight: {pos_weight.item()}') norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, dropout=args.dropout) #DGL # === DGL ==== # g = dgl.DGLGraph(adj, readonly=True) g = add_self_loop(g) model = GVAE(g, feat_dim, args.hidden1, args.hidden2, dropout=args.dropout).cuda() features = torch.Tensor(features).cuda() # === DGL ==== # print(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) hidden_emb = None for epoch in range(args.epochs): t = time.time() model.train() optimizer.zero_grad() # recovered, mu, logvar = model(features, adj_norm) recovered, mu, logvar = model(features) loss = loss_function(preds=recovered, labels=adj_label, mu=mu, logvar=logvar, n_nodes=n_nodes, norm=norm, pos_weight=pos_weight) loss.backward() cur_loss = loss.item() optimizer.step() if epoch%10==0: hidden_emb = mu.cpu().data.numpy() roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(cur_loss), "val_ap=", "{:.5f}".format(ap_curr), "time=", "{:.5f}".format(time.time() - t) ) print("Optimization Finished!") roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score))
def main(args): # load and preprocess dataset # data = load_dgl_data(args) dataset = args.dataset # prefix = '/mnt/yushi/' # prefix = 'graphzoom' dataset_dir = f'{args.prefix}/dataset/{dataset}' # data = load_data(dataset_dir, args.dataset) load_data_time = time.time() # if dataset in ['Amazon2M', 'reddit']: if dataset in ['Amazon2M']: g, _ = load_graphs( f'{args.prefix}/dataset/Amazon2M/Amazon2M_dglgraph.bin') g = g[0] data = g.ndata features = torch.FloatTensor(data['feat']) onehot_labels = F.one_hot(data['label']).numpy() train_mask = data['train_mask'].bool() val_mask = data['val_mask'].bool() test_mask = val_mask data = EasyDict({ 'graph': g, 'labels': data['label'], 'onehot_labels': onehot_labels, 'features': data['feat'], 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'num_labels': onehot_labels.shape[1], 'coarse': False }) else: original_adj, labels, train_ids, test_ids, train_labels, test_labels, feats = load_data( dataset_dir, args.dataset) data = load_dgl_data(args) labels = torch.LongTensor(labels) train_mask = _sample_mask(train_ids, labels.shape[0]) onehot_labels = F.one_hot(labels).numpy() if dataset == 'reddit': g = data.graph else: val_ids = test_ids[1000:1500] test_ids = test_ids[:1000] test_mask = _sample_mask(test_ids, labels.shape[0]) val_mask = _sample_mask(val_ids, labels.shape[0]) data = EasyDict({ 'graph': data.graph, 'labels': labels, 'onehot_labels': onehot_labels, 'features': feats, # 'features': data.features, 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, 'num_labels': onehot_labels.shape[1], 'coarse': False }) # g = DGLGraph(data.graph) print(f'load data finished: {time.time() - load_data_time}') if args.coarse: # * load projection matrix levels = args.level reduce_results = f"graphzoom/reduction_results/{dataset}/fusion/" projections, coarse_adj = construct_proj_laplacian( original_adj, levels, reduce_results) # *calculate coarse feature, labels label_mask = np.expand_dims(data.train_mask, 1) onehot_labels = onehot_labels * label_mask for i in range(levels): data.features = projections[i] @ data.features onehot_labels = projections[i] @ onehot_labels # coarse_labels = projections[0] @ onehot_labels # ! add train_mask rows_sum = onehot_labels.sum(axis=1)[:, np.newaxis] norm_coarse_labels = onehot_labels / rows_sum norm_label_entropy = Categorical( torch.Tensor(norm_coarse_labels)).entropy() label_entropy_mask = torch.BoolTensor(norm_label_entropy < 0.01) coarse_train_mask = torch.BoolTensor(onehot_labels.sum(axis=1)) # coarse_train_mask = label_entropy_mask # ! entropy threshold # coarse_graph = nx.Graph(coarse_adj[1]) print('creating coarse DGLGraph') start = time.process_time() g = DGLGraph() g.from_scipy_sparse_matrix(coarse_adj[1]) print(f'creating finished in {time.process_time() - start}') # list(map(np.shape, [coarse_embed, coarse_labels])) # * new train/test masks coarsen_ratio = projections[0].shape[1] / projections[0].shape[0] # coarse_train_mask = _sample_mask( # range(int(coarsen_ratio*len(data.train_mask.int().sum().item()))), # norm_coarse_labels.shape[0]) # coarse_train_mask = _sample_mask( # range(norm_coarse_labels.shape[0]), # range(60), # norm_coarse_labels.shape[0]) # coarse_test_mask = _sample_mask( # range(100, 700), norm_coarse_labels.shape[0]) # coarse_val_mask = _sample_mask( # range(700, 1000), norm_coarse_labels.shape[0]) # *replace data data = EasyDict({ 'graph': g, 'labels': onehot_labels, # 'onehot_labels': onehot_labels, 'features': data.features, 'train_mask': coarse_train_mask, # 'val_mask': coarse_val_mask, # 'test_mask': coarse_test_mask, 'num_classes': norm_coarse_labels.shape[1], 'num_labels': onehot_labels.shape[1], 'coarse': True }) if args.coarse: labels = torch.FloatTensor(data.labels) loss_fcn = torch.nn.KLDivLoss(reduction='batchmean') print('training coarse') else: labels = torch.LongTensor(data.labels) loss_fcn = torch.nn.CrossEntropyLoss() features = torch.FloatTensor(data.features) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor # add self loop if args.self_loop or args.arch == 'gat': g = add_self_loop(data.graph) print('add self_loop') n_edges = g.number_of_edges() print("""----Data statistics------' # Edges %d # Classes %d # Train samples %d # Val samples %d # Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # * create GCN model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = create_model( args.arch, g, num_layers=args.num_layers, in_dim=in_feats, num_hidden=args.num_hidden, num_classes=n_classes, heads=heads, # activation=F.elu, feat_drop=args.in_drop, attn_drop=args.attn_drop, negative_slope=args.negative_slope, residual=args.residual, log_softmax=args.coarse) if cuda: model.cuda() print(model) # loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] acc = 0 start = time.time() for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits, h = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) # ? optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) if not args.coarse: acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print(f'training time: {time.time() - start}') if not args.coarse: acc = evaluate(model, features, labels, test_mask) print(h.shape) np.save(f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_mask', h.detach().cpu().numpy()) torch.save( model.state_dict(), f'embeddings/{(args.arch).upper()}_{dataset}_emb_level_1_params.pth.tar', ) print("Test accuracy {:.2%}".format(acc))
def main(args): torch.manual_seed(1234) if args.dataset == 'cora' or args.dataset == 'citeseer' or args.dataset == 'pubmed': data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) in_feats = features.shape[1] g = data.graph if args.dataset == 'cora': g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) attr_matrix = data.features labels = data.labels else: if args.dataset == 'physics': data = Coauthor('physics') if args.dataset == 'cs': data = Coauthor('cs') if args.dataset == 'computers': data = AmazonCoBuy('computers') if args.dataset == 'photo': data = AmazonCoBuy('photo') g = data g = data[0] attr_matrix = g.ndata['feat'] labels = g.ndata['label'] features = torch.FloatTensor(g.ndata['feat']) ### LCC of the graph n_components = 1 sparse_graph = g.adjacency_matrix_scipy(return_edge_ids=False) _, component_indices = sp.csgraph.connected_components(sparse_graph) component_sizes = np.bincount(component_indices) components_to_keep = np.argsort( component_sizes )[::-1][:n_components] # reverse order to sort descending nodes_to_keep = [ idx for (idx, component) in enumerate(component_indices) if component in components_to_keep ] adj_matrix = sparse_graph[nodes_to_keep][:, nodes_to_keep] num_nodes = len(nodes_to_keep) g = adj_matrix g = DGLGraph(g) g = remove_self_loop(g) g = add_self_loop(g) g = DGLGraph(g) g.ndata['feat'] = attr_matrix[nodes_to_keep] features = torch.FloatTensor(g.ndata['feat'].float()) if args.dataset == 'cora' or args.dataset == 'pubmed': features = features / (features.norm(dim=1) + 1e-8)[:, None] g.ndata['label'] = labels[nodes_to_keep] labels = torch.LongTensor(g.ndata['label']) in_feats = features.shape[1] unique_l = np.unique(labels, return_counts=False) n_classes = len(unique_l) n_nodes = g.number_of_nodes() n_edges = g.number_of_edges() print('Number of nodes', n_nodes, 'Number of edges', n_edges) enc = OneHotEncoder() enc.fit(labels.reshape(-1, 1)) ylabels = enc.transform(labels.reshape(-1, 1)).toarray() for beta in [args.beta]: for K in [args.num_clusters]: for alpha in [args.alpha]: accs = [] t_st = time.time() sets = "imbalanced" for k in range(2): #number of differnet trainings #print(k) random_state = np.random.RandomState() if sets == "imbalanced": train_idx, val_idx, test_idx = get_train_val_test_split( random_state, ylabels, train_examples_per_class=None, val_examples_per_class=None, test_examples_per_class=None, train_size=20 * n_classes, val_size=30 * n_classes, test_size=None) elif sets == "balanced": train_idx, val_idx, test_idx = get_train_val_test_split( random_state, ylabels, train_examples_per_class=20, val_examples_per_class=30, test_examples_per_class=None, train_size=None, val_size=None, test_size=None) else: ("No such set configuration (imbalanced/balanced)") n_nodes = len(nodes_to_keep) train_mask = np.zeros(n_nodes) train_mask[train_idx] = 1 val_mask = np.zeros(n_nodes) val_mask[val_idx] = 1 test_mask = np.zeros(n_nodes) test_mask[test_idx] = 1 train_mask = torch.BoolTensor(train_mask) val_mask = torch.BoolTensor(val_mask) test_mask = torch.BoolTensor(test_mask) """ Planetoid Split for CORA, CiteSeer, PubMed train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) train_mask2 = torch.BoolTensor(data.train_mask) val_mask2 = torch.BoolTensor(data.val_mask) test_mask2 = torch.BoolTensor(data.test_mask) """ if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() gic = GIC(g, in_feats, args.n_hidden, args.n_layers, nn.PReLU(args.n_hidden), args.dropout, K, beta, alpha) if cuda: gic.cuda() gic_optimizer = torch.optim.Adam( gic.parameters(), lr=args.gic_lr, weight_decay=args.weight_decay) # train GIC cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_gic_epochs): gic.train() if epoch >= 3: t0 = time.time() gic_optimizer.zero_grad() loss = gic(features) #print(loss) loss.backward() gic_optimizer.step() if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(gic.state_dict(), 'best_gic.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: #print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), #n_edges / np.mean(dur) / 1000)) # train classifier #print('Loading {}th epoch'.format(best_t)) gic.load_state_dict(torch.load('best_gic.pkl')) embeds = gic.encoder(features, corrupt=False) embeds = embeds / (embeds + 1e-8).norm(dim=1)[:, None] embeds = embeds.detach() # create classifier model classifier = Classifier(args.n_hidden, n_classes) if cuda: classifier.cuda() classifier_optimizer = torch.optim.Adam( classifier.parameters(), lr=args.classifier_lr, weight_decay=args.weight_decay) dur = [] best_a = 0 cnt_wait = 0 for epoch in range(args.n_classifier_epochs): classifier.train() if epoch >= 3: t0 = time.time() classifier_optimizer.zero_grad() preds = classifier(embeds) loss = F.nll_loss(preds[train_mask], labels[train_mask]) loss.backward() classifier_optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate( classifier, embeds, labels, val_mask ) #+ evaluate(classifier, embeds, labels, train_mask) if acc > best_a and epoch > 100: best_a = acc best_t = epoch torch.save(classifier.state_dict(), 'best_class.pkl') #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), #acc, n_edges / np.mean(dur) / 1000)) acc = evaluate(classifier, embeds, labels, test_mask) accs.append(acc) print('=================== ', ' alpha', alpha, ' beta ', beta, 'K', K) print(args.dataset, ' Acc (mean)', mean(accs), ' (std)', stdev(accs)) print('=================== time', int( (time.time() - t_st) / 60))