def evaluate_downstream(model, dataloader, device): model.eval() total_loss = 0 x_embeds = [] ys = [] with torch.no_grad(): for data in dataloader: ys.append(data.y.cpu().detach().numpy()) if data.x is None: data.x = torch.ones(data.batch.shape[0]) data = data.to(device) x_embed = model.encode_graph(data) x_embeds.append(x_embed.cpu().detach().numpy()) evaluate_embedding(np.vstack(x_embeds), np.concatenate(ys))
x_aug = model(data_aug.x, data_aug.edge_index, data_aug.batch, data_aug.num_graphs) x_stro_aug = model(data_stro_aug.x, data_stro_aug.edge_index, data_stro_aug.batch, data_stro_aug.num_graphs) # print(x) # print(x_aug) loss = model.loss_cal(x_stro_aug, x_aug) # print(loss) loss_all += loss.item() #* data.num_graphs loss.backward() optimizer.step() # print('batch') print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader))) if epoch % log_interval == 0: model.eval() emb, y = model.encoder.get_embeddings(dataloader_eval) acc_val, acc = evaluate_embedding(emb, y) accuracies['val'].append(acc_val) accuracies['test'].append(acc) # print(accuracies['val'][-1], accuracies['test'][-1]) tpe = ('local' if args.local else '') + ('prior' if args.prior else '') with open('logs/log_' + args.DS + '_' + args.aug, 'a+') as f: s = json.dumps(accuracies) f.write('{},{},{},{},{},{},{}\n'.format(args.DS, tpe, args.num_gc_layers, epochs, log_interval, lr, s)) f.write('\n')
dataloader = DataLoader(dataset, batch_size=batch_size) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) print('================') print('lr: {}'.format(lr)) print('num_features: {}'.format(dataset_num_features)) print('hidden_dim: {}'.format(args.hidden_dim)) print('num_gc_layers: {}'.format(args.num_gc_layers)) print('================') model.eval() emb, y = model.encoder.get_embeddings(dataloader) res = evaluate_embedding(emb, y) accuracies['logreg'].append(res[0]) accuracies['svc'].append(res[1]) accuracies['linearsvc'].append(res[2]) accuracies['randomforest'].append(res[3]) for epoch in range(1, epochs + 1): loss_all = 0 model.train() for data in dataloader: data = data.to(device) optimizer.zero_grad() loss = model(data.x, data.edge_index, data.batch, data.num_graphs) loss_all += loss.item() * data.num_graphs loss.backward() optimizer.step()
shuffle=True) in_dim = dataset[0][0].ndata['attr'].shape[1] # Step 2: Create model =================================================================== # model = InfoGraph(in_dim, args.hid_dim, args.n_layers) model = model.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam(model.parameters(), lr=args.lr) print('===== Before training ======') wholegraph = wholegraph.to(args.device) emb = model.get_embedding(wholegraph).cpu() res = evaluate_embedding(emb, labels) print('logreg {:4f}, svc {:4f}'.format(res[0], res[1])) best_logreg = 0 best_svc = 0 best_epoch = 0 best_loss = 0 # Step 4: training epoches =============================================================== # for epoch in range(1, args.epochs): loss_all = 0 model.train() for graph, label in dataloader: graph = graph.to(args.device)
def train(args, DS, gpu, num_gc_layers=4, epoch=40, batch=64): accuracies = {'val': [], 'test': []} epochs = epoch log_interval = 10 batch_size = batch # batch_size = 512 lr = args.lr path = osp.join(osp.dirname(osp.realpath(__file__)), 'data', DS) # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) # print(path) dataset = TUDataset(path, name=DS, aug=args.aug, stro_aug=args.stro_aug).shuffle() dataset_eval = TUDataset(path, name=DS, aug='none', stro_aug='none').shuffle() print(len(dataset)) print(dataset.get_num_feature()) try: dataset_num_features = dataset.get_num_feature() except: dataset_num_features = 1 dataloader = DataLoader(dataset, batch_size=batch_size) dataloader_eval = DataLoader(dataset_eval, batch_size=batch_size) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') online_encoder = Encoder(dataset_num_features, args.hidden_dim, num_gc_layers) model = BYOL(online_encoder, args.hidden_dim, num_gc_layers, use_momentum=False).to(device) # print(model) optimizer = torch.optim.Adam(model.online_encoder.parameters(), lr=lr) print('================') print('lr: {}'.format(lr)) print('num_features: {}'.format(dataset_num_features)) print('hidden_dim: {}'.format(args.hidden_dim)) print('num_gc_layers: {}'.format(num_gc_layers)) print('================') model.eval() emb, y = model.online_encoder.get_embeddings(dataloader_eval) # print(emb.shape, y.shape) """ acc_val, acc = evaluate_embedding(emb, y) accuracies['val'].append(acc_val) accuracies['test'].append(acc) """ for epoch in range(1, epochs + 1): loss_all = 0 model.train() for data in dataloader: # print('start') data, data_weak_aug, data_stro_aug = data optimizer.zero_grad() node_num, _ = data.x.size() data = data.to(device) # x = model(data.x, data.edge_index, data.batch, data.num_graphs) if args.aug == 'dnodes' or args.aug == 'subgraph' or args.aug == 'random2' or args.aug == 'random3' or args.aug == 'random4': edge_idx = data_weak_aug.edge_index.numpy() _, edge_num = edge_idx.shape idx_not_missing = [ n for n in range(node_num) if (n in edge_idx[0] or n in edge_idx[1]) ] node_num_aug = len(idx_not_missing) data_weak_aug.x = data_weak_aug.x[idx_not_missing] data_weak_aug.batch = data.batch[idx_not_missing] idx_dict = {idx_not_missing[n]: n for n in range(node_num_aug)} edge_idx = [[ idx_dict[edge_idx[0, n]], idx_dict[edge_idx[1, n]] ] for n in range(edge_num) if not edge_idx[0, n] == edge_idx[1, n]] data_weak_aug.edge_index = torch.tensor(edge_idx).transpose_( 0, 1) if args.stro_aug == 'stro_dnodes' or args.stro_aug == \ 'stro_subgraph' or args.stro_aug \ == 'random2' or args.stro_aug == 'random3' or args.stro_aug == 'random4': edge_idx = data_stro_aug.edge_index.numpy() _, edge_num = edge_idx.shape idx_not_missing = [ n for n in range(node_num) if (n in edge_idx[0] or n in edge_idx[1]) ] node_num_aug = len(idx_not_missing) data_stro_aug.x = data_stro_aug.x[idx_not_missing] data_stro_aug.batch = data.batch[idx_not_missing] idx_dict = {idx_not_missing[n]: n for n in range(node_num_aug)} edge_idx = [[ idx_dict[edge_idx[0, n]], idx_dict[edge_idx[1, n]] ] for n in range(edge_num) if not edge_idx[0, n] == edge_idx[1, n]] data_stro_aug.edge_index = torch.tensor(edge_idx).transpose_( 0, 1) data_weak_aug = data_weak_aug.to(device) data_stro_aug = data_stro_aug.to(device) # weak_proj, x_proj = model(data.x, data.edge_index, data.batch, # data.num_graphs, data_weak_aug.x, data_weak_aug.edge_index, # data_weak_aug.batch, data_weak_aug.num_graphs) # target = model.loss_cal(x_proj, weak_proj) # loss_C = - torch.log(target).mean() # # stro_proj, x_proj = model(data.x, data.edge_index, data.batch, # data.num_graphs, data_stro_aug.x, data_stro_aug.edge_index, # data_stro_aug.batch, data_stro_aug.num_graphs) # prediction = model.loss_cal(x_proj, stro_proj) # loss_D = model.clsa_loss(prediction, target) # loss = loss_D.item() * data.num_graphs + loss_C # print('Loss {}, Loss_D {}, Loss_C'.format(loss, loss_D, loss_C)) loss = model(data_weak_aug.x, data_weak_aug.edge_index, data_weak_aug.batch, data_weak_aug.num_graphs, data_stro_aug.x, data_stro_aug.edge_index, data_stro_aug.batch, data_stro_aug.num_graphs) loss_all += loss loss.backward() optimizer.step() # model.update_moving_average() print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader))) if epoch % log_interval == 0: model.eval() emb, y = model.online_encoder.get_embeddings(dataloader_eval) acc_val, acc = evaluate_embedding(emb, y) accuracies['val'].append(acc_val) accuracies['test'].append(acc) # print(accuracies['val'][-1], accuracies['test'][-1]) tpe = ('local' if args.local else '') + ('prior' if args.prior else '') with open( 'logs/log_BYOL_' + args.DS + '_' + args.aug + '_' + args.stro_aug, 'a+') as f: s = json.dumps(accuracies) f.write( '{},bs:{},epoch:{},layers:{},{},gpu:{},{},{},{},{},{}\n'.format( args.DS, batch, epoch, layers, tpe, gpu, num_gc_layers, epochs, log_interval, lr, s)) f.write('\n')
in_dim = wholegraph.ndata['attr'].shape[1] # Step 2: Create model =================================================================== # model = InfoGraph(in_dim, args.hid_dim, args.n_layers) model = model.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam(model.parameters(), lr=args.lr) print('===== Before training ======') wholegraph = wholegraph.to(args.device) wholefeat = wholegraph.ndata['attr'] emb = model.get_embedding(wholegraph, wholefeat).cpu() res = evaluate_embedding(emb, labels, args.device) ''' Evaluate the initialized embeddings ''' ''' using logistic regression and SVM(non-linear) ''' print('logreg {:4f}, svc {:4f}'.format(res[0], res[1])) best_logreg = 0 best_logreg_epoch = 0 best_svc = 0 best_svc_epoch = 0 # Step 4: training epochs =============================================================== # for epoch in range(args.epochs): loss_all = 0 model.train()