def run_fix_mask(args, seed, adj_percent, wei_percent): pruning.setup_seed(seed) adj, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) node_num = features.size()[0] class_num = labels.numpy().max() + 1 adj = adj.cuda() features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'], adj=adj) pruning.add_mask(net_gcn) net_gcn = net_gcn.cuda() pruning.random_pruning(net_gcn, adj_percent, wei_percent) adj_spar, wei_spar = pruning.print_sparsity(net_gcn) for name, param in net_gcn.named_parameters(): if 'mask' in name: param.requires_grad = False optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) acc_test = 0.0 best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0} for epoch in range(args['total_epoch']): optimizer.zero_grad() output = net_gcn(features, adj) loss = loss_func(output[idx_train], labels[idx_train]) loss.backward() optimizer.step() with torch.no_grad(): output = net_gcn(features, adj, val_test=True) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') if acc_val > best_val_acc['val_acc']: best_val_acc['val_acc'] = acc_val best_val_acc['test_acc'] = acc_test best_val_acc['epoch'] = epoch print( "(Fix Mask) Epoch:[{}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]" .format(epoch, acc_val * 100, acc_test * 100, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'])) return best_val_acc['val_acc'], best_val_acc['test_acc'], best_val_acc[ 'epoch'], adj_spar, wei_spar
results['final_test'][k] * 100, results['epoch'], epoch_time)) print("=" * 120) print("syd final: IMP:[{}], Train:[{:.2f}] Best Val:[{:.2f}] at epoch:[{}] | Final Test Acc:[{:.2f}] Adj:[{:.2f}%] Wei:[{:.2f}%]" .format(imp_num, results['final_train'][k] * 100, results['highest_valid'][k] * 100, results['epoch'], results['final_test'][k] * 100, results['adj_spar'], results['wei_spar'])) print("=" * 120) if __name__ == "__main__": args = ArgsInit().save_exp() imp_num = args.imp_num percent_list = [(1 - (1 - 0.05) ** (i + 1), 1 - (1 - 0.2) ** (i + 1)) for i in range(20)] args.pruning_percent_adj, args.pruning_percent_wei = percent_list[imp_num - 1] pruning.print_args(args, 80) pruning.setup_seed(666) print("syd: IMP:[{}] Pruning adj[{:.6f}], wei[{:.6f}]".format(imp_num, args.pruning_percent_adj, args.pruning_percent_wei)) resume_train_ckpt = torch.load(args.resume_dir) start_imp = resume_train_ckpt['imp_num'] main_fixed_mask(args, start_imp, resume_train_ckpt) # rewind_weight_mask, rewind_predict_weight = main_get_mask(args, imp_num, resume_train_ckpt) # print("INFO: IMP[{}] Begin Retrain!".format(imp_num)) # main_fixed_mask(args, imp_num, rewind_weight_mask, rewind_predict_weight, resume_train_ckpt=None)
100, results['final_test'] * 100, results['epoch'])) print("=" * 120) print( "syd final: RP:[{}], Train:[{:.2f}] Best Val:[{:.2f}] at epoch:[{}] | Final Test Acc:[{:.2f}] Adj:[{:.2f}%] Wei:[{:.2f}%]" .format(imp_num, results['final_train'] * 100, results['highest_valid'] * 100, results['epoch'], results['final_test'] * 100, results['adj_spar'], results['wei_spar'])) print("=" * 120) if __name__ == "__main__": args = ArgsInit().save_exp() pruning.print_args(args, 120) pruning.setup_seed(args.seed) start_imp = 1 rewind_weight_mask = None resume_train_ckpt = None percent_list = [(1 - (1 - 0.05)**(i + 1), 1 - (1 - 0.2)**(i + 1)) for i in range(20)] if args.resume_dir: resume_train_ckpt = torch.load(args.resume_dir) start_imp = resume_train_ckpt['imp_num'] adj_percent, wei_percent = percent_list[start_imp] main_fixed_mask(args, start_imp, adj_percent, wei_percent, resume_train_ckpt) start_imp += 1
def run_fix_mask(args, imp_num, adj_percent, wei_percent): pruning.setup_seed(args['seed']) adj, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) adj = load_adj_raw(args['dataset']) node_num = features.size()[0] class_num = labels.numpy().max() + 1 g = dgl.DGLGraph() g.add_nodes(node_num) adj = adj.tocoo() g.add_edges(adj.row, adj.col) features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() if args['net'] == 'gin': net_gcn = GINNet(args['embedding_dim'], g) pruning_gin.add_mask(net_gcn) pruning_gin.random_pruning(net_gcn, adj_percent, wei_percent) adj_spar, wei_spar = pruning_gin.print_sparsity(net_gcn) elif args['net'] == 'gat': net_gcn = GATNet(args['embedding_dim'], g) g.add_edges(list(range(node_num)), list(range(node_num))) pruning_gat.add_mask(net_gcn) pruning_gat.random_pruning(net_gcn, adj_percent, wei_percent) adj_spar, wei_spar = pruning_gat.print_sparsity(net_gcn) else: assert False net_gcn = net_gcn.cuda() for name, param in net_gcn.named_parameters(): if 'mask' in name: param.requires_grad = False optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0} for epoch in range(args['fix_epoch']): optimizer.zero_grad() output = net_gcn(g, features, 0, 0) loss = loss_func(output[idx_train], labels[idx_train]) loss.backward() optimizer.step() with torch.no_grad(): net_gcn.eval() output = net_gcn(g, features, 0, 0) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') if acc_val > best_val_acc['val_acc']: best_val_acc['val_acc'] = acc_val best_val_acc['test_acc'] = acc_test best_val_acc['epoch'] = epoch print( "RP[{}] (Fix Mask) Epoch:[{}/{}] LOSS:[{:.4f}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]" .format(imp_num, epoch, args['fix_epoch'], loss, acc_val * 100, acc_test * 100, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'])) print( "syd final: [{},{}] RP[{}] (Fix Mask) Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}] | Adj:[{:.2f}%] Wei:[{:.2f}%]" .format(args['dataset'], args['net'], imp_num, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'], adj_spar, wei_spar))
def run_get_admm_weight_mask(args, index, wei_percent, seed): adj = np.load("./ADMM/admm_{}/adj_{}.npy".format(args['dataset'], index)) adj = utils.normalize_adj(adj) adj = utils.sparse_mx_to_torch_sparse_tensor(adj) pruning.setup_seed(seed) _, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) adj = adj.to_dense() node_num = features.size()[0] class_num = labels.numpy().max() + 1 adj = adj.cuda() features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() net_gcn = net.net_gcn_baseline(embedding_dim=args['embedding_dim']) pruning.add_mask(net_gcn) net_gcn = net_gcn.cuda() for name, param in net_gcn.named_parameters(): if 'mask' in name: param.requires_grad = False print("NAME:{}\tSHAPE:{}\tGRAD:{}".format(name, param.shape, param.requires_grad)) optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) acc_test = 0.0 best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0} rewind_weight = copy.deepcopy(net_gcn.state_dict()) for epoch in range(args['total_epoch']): optimizer.zero_grad() output = net_gcn(features, adj) loss = loss_func(output[idx_train], labels[idx_train]) loss.backward() optimizer.step() with torch.no_grad(): output = net_gcn(features, adj, val_test=True) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') if acc_val > best_val_acc['val_acc']: best_val_acc['test_acc'] = acc_test best_val_acc['val_acc'] = acc_val best_val_acc['epoch'] = epoch best_epoch_mask = pruning.get_final_weight_mask_epoch( net_gcn, wei_percent=wei_percent) print( "(ADMM Get Mask) Epoch:[{}] Val:[{:.2f}] Test:[{:.2f}] | Best Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]" .format(epoch, acc_val * 100, acc_test * 100, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'])) return best_epoch_mask, rewind_weight
def run_get_mask(args, imp_num, rewind_weight_mask=None): pruning.setup_seed(args['seed']) adj, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) adj = load_adj_raw(args['dataset']) node_num = features.size()[0] class_num = labels.numpy().max() + 1 g = dgl.DGLGraph() g.add_nodes(node_num) adj = adj.tocoo() g.add_edges(adj.row, adj.col) features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() if args['net'] == 'gin': net_gcn = GINNet(args['embedding_dim'], g) pruning_gin.add_mask(net_gcn) elif args['net'] == 'gat': net_gcn = GATNet(args['embedding_dim'], g) g.add_edges(list(range(node_num)), list(range(node_num))) pruning_gat.add_mask(net_gcn) else: assert False net_gcn = net_gcn.cuda() if rewind_weight_mask: net_gcn.load_state_dict(rewind_weight_mask) if args['net'] == 'gin': pruning_gin.add_trainable_mask_noise(net_gcn, c=1e-5) adj_spar, wei_spar = pruning_gin.print_sparsity(net_gcn) else: pruning_gat.add_trainable_mask_noise(net_gcn, c=1e-5) adj_spar, wei_spar = pruning_gat.print_sparsity(net_gcn) optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0} rewind_weight = copy.deepcopy(net_gcn.state_dict()) for epoch in range(args['mask_epoch']): optimizer.zero_grad() output = net_gcn(g, features, 0, 0) loss = loss_func(output[idx_train], labels[idx_train]) loss.backward() if args['net'] == 'gin': pruning_gin.subgradient_update_mask(net_gcn, args) # l1 norm else: pruning_gat.subgradient_update_mask(net_gcn, args) # l1 norm optimizer.step() with torch.no_grad(): net_gcn.eval() output = net_gcn(g, features, 0, 0) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') if acc_val > best_val_acc['val_acc']: best_val_acc['val_acc'] = acc_val best_val_acc['test_acc'] = acc_test best_val_acc['epoch'] = epoch if args['net'] == 'gin': rewind_weight, adj_spar, wei_spar = pruning_gin.get_final_mask_epoch( net_gcn, rewind_weight, args) else: rewind_weight, adj_spar, wei_spar = pruning_gat.get_final_mask_epoch( net_gcn, rewind_weight, args) print( "IMP[{}] (Get Mask) Epoch:[{}/{}] LOSS:[{:.4f}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}] | Adj:[{:.2f}%] Wei:[{:.2f}%]" .format(imp_num, epoch, args['mask_epoch'], loss, acc_val * 100, acc_test * 100, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'], adj_spar, wei_spar)) return rewind_weight
def run_get_mask(args, seed, imp_num, rewind_weight_mask=None): pruning.setup_seed(seed) adj, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) # adj = coo_matrix(adj) # adj_dict = {} # adj_dict['adj'] = adj # torch.save(adj_dict, "./adjs/pubmed/original.pt") # pdb.set_trace() node_num = features.size()[0] class_num = labels.numpy().max() + 1 adj = adj.cuda() features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'], adj=adj) pruning.add_mask(net_gcn) net_gcn = net_gcn.cuda() if args['weight_dir']: print("load : {}".format(args['weight_dir'])) encoder_weight = {} cl_ckpt = torch.load(args['weight_dir'], map_location='cuda') encoder_weight['weight_orig_weight'] = cl_ckpt['gcn.fc.weight'] ori_state_dict = net_gcn.net_layer[0].state_dict() ori_state_dict.update(encoder_weight) net_gcn.net_layer[0].load_state_dict(ori_state_dict) if rewind_weight_mask: net_gcn.load_state_dict(rewind_weight_mask) pruning.soft_mask_init(net_gcn, args['init_soft_mask_type'], seed) adj_spar, wei_spar = pruning.print_sparsity(net_gcn) else: pruning.soft_mask_init(net_gcn, args['init_soft_mask_type'], seed) optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) acc_test = 0.0 best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0} rewind_weight = copy.deepcopy(net_gcn.state_dict()) for epoch in range(args['mask_epoch']): optimizer.zero_grad() output = net_gcn(features, adj) loss = loss_func(output[idx_train], labels[idx_train]) loss.backward() pruning.subgradient_update_mask(net_gcn, args) # l1 norm optimizer.step() with torch.no_grad(): output = net_gcn(features, adj, val_test=True) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') if acc_val > best_val_acc['val_acc']: best_val_acc['test_acc'] = acc_test best_val_acc['val_acc'] = acc_val best_val_acc['epoch'] = epoch best_epoch_mask = pruning.get_final_mask_epoch( net_gcn, adj_percent=args['pruning_percent_adj'], wei_percent=args['pruning_percent_wei']) print( "(Get Mask) Epoch:[{}] Val:[{:.2f}] Test:[{:.2f}] | Best Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]" .format(epoch, acc_val * 100, acc_test * 100, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'])) return best_epoch_mask, rewind_weight
def run(args): pruning.setup_seed(args['seed']) adj, features, labels, idx_train, idx_val, idx_test = load_data( args['dataset']) adj = load_adj_raw(args['dataset']) node_num = features.size()[0] class_num = labels.numpy().max() + 1 g = dgl.DGLGraph() g.add_nodes(node_num) adj = adj.tocoo() g.add_edges(adj.row, adj.col) features = features.cuda() labels = labels.cuda() loss_func = nn.CrossEntropyLoss() if args['net'] == 'gin': net_gcn = GINNet(args['embedding_dim']) else: net_gcn = GATNet(args['embedding_dim']) g.add_edges(list(range(node_num)), list(range(node_num))) net_gcn = net_gcn.cuda() optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0} for epoch in range(args['total_epoch']): optimizer.zero_grad() output = net_gcn(g, features, 0, 0) loss = loss_func(output[idx_train], labels[idx_train]) loss.backward() optimizer.step() with torch.no_grad(): net_gcn.eval() output = net_gcn(g, features, 0, 0) acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro') acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro') if acc_val > best_val_acc['val_acc']: best_val_acc['val_acc'] = acc_val best_val_acc['test_acc'] = acc_test best_val_acc['epoch'] = epoch print( "(Baseline) Epoch:[{}] LOSS:[{:.2f}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]" .format(epoch, loss, acc_val * 100, acc_test * 100, best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch'])) print( "syd final: [{},{}] (Baseline) Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]" .format(args['dataset'], args['net'], best_val_acc['val_acc'] * 100, best_val_acc['test_acc'] * 100, best_val_acc['epoch']))