def run(is_train, hidden, data_name, random_state, use_cuda, save_path): if is_train is False and save_path is None: raise RuntimeError("There is no file for test.") device = "cpu" if use_cuda: device = "cuda" # Load Train data train_data = load_data(data_name=data_name, train=True, random_state=random_state) dim_features = train_data[0].shape[1] num_labels = max(train_data[2]).item() + 1 model = GCN(dim_features, hidden, num_labels) # Train Phase if is_train: print("---Training Start---") train(model, train_data, device, save_path) print("---Training Done--- \n") if save_path is not None: # Load model model.load_state_dict(torch.load(save_path)) # Test Phase print("---Test Start---") test_data = load_data(data_name=data_name, train=False, random_state=42) print("Test Accuracy: %2.2f %%" % get_acc(model, test_data, device)) print("---Test Done--- \n")
def main(): data_generator = DataGenerator(args) meta_model = GCN(nfeat=args.in_f_d, nhid=args.hidden, nclass=args.nclasses, dropout=args.dropout).to(device) proto_model = GCN_Proto(args, nfeat=args.hidden, dropout=args.dropout).to(device) structure_model = GCN_Structure(args, nfeat=args.hidden, nhid=args.structure_dim, dropout=args.dropout).to( device) if args.train: meta_optimiser = torch.optim.Adam( list(meta_model.parameters()) + list(proto_model.parameters()) + list(structure_model.parameters()), lr=args.meta_lr, weight_decay=args.weight_decay) train(args, meta_model, meta_optimiser, proto_model, structure_model, metatrain_iterations=args.metatrain_iterations, data_generator=data_generator, fit_function=meta_gradient_step, fit_function_kwargs={'train': True, 'inner_train_steps': args.inner_train_steps, 'inner_lr': args.inner_lr, 'batch_n': args.batch_n, 'device': device}) else: if args.test_load_epoch > 0: meta_model.load_state_dict( torch.load(args.logdir + '/' + exp_string + '/' + 'model_epoch_{}'.format(args.test_load_epoch))) proto_model.load_state_dict( torch.load( args.logdir + '/' + exp_string + '/' + 'proto_model_epoch_{}'.format(args.test_load_epoch))) structure_model.load_state_dict( torch.load( args.logdir + '/' + exp_string + '/' + 'structure_model_epoch_{}'.format( args.test_load_epoch))) meta_optimiser = torch.optim.Adam(list(meta_model.parameters()) + list(proto_model.parameters()), lr=args.meta_lr, weight_decay=args.weight_decay) evaluate(args, meta_model, meta_optimiser, proto_model, structure_model, data_generator=data_generator, fit_function=meta_gradient_step, fit_function_kwargs={'train': False, 'inner_train_steps': args.inner_train_steps, 'inner_lr': args.inner_lr_test, 'batch_n': args.test_sample_g_n, 'device': device})
else: bad_counter += 1 if bad_counter >= args.early_stopping and loss_val < args.loss_threshold: print("Early stopping...") break print("Optimization Finished!") total_time = time.time() - t_total mean_time = np.mean(epoch_time_list) print("Total time elapsed: {:.4f}s".format(total_time)) print("Time per epoch: {:.4f}s".format(mean_time)) if args.early_stopping and args.save: print('Loading {}th epoch'.format(best_epoch)) model.load_state_dict(torch.load('{}.pkl'.format(run_id))) # Testing acc = test() valacc_list.append(acc_val) acc_list.append(acc) total_time_list.append(total_time) mean_time_list.append(mean_time) avgvalacc = np.mean(valacc_list) avgacc = np.mean(acc_list) avg_total_time = np.mean(total_time_list) avg_mean_time = np.mean(mean_time_list) stdvalacc = np.std(valacc_list) stdacc = np.std(acc_list) print(
epoch_nb = int(file.split('.')[0]) if epoch_nb < best_epoch: os.remove(file) files = glob.glob('*.pkl') for file in files: epoch_nb = int(file.split('.')[0]) if epoch_nb > best_epoch: os.remove(file) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Restore best model print('Loading {}th epoch'.format(best_epoch)) model.load_state_dict(torch.load('{}.pkl'.format(best_epoch))) # Testing acc_sum = 0 loss_sum = 0 with torch.no_grad(): model.eval() for subgraph in content_g: index = content_g[subgraph]['index_subgraph'] idx_test = content_g[subgraph]['idx_test'] adj = content_g[subgraph]['adj'] adj = torch.FloatTensor(np.array(adj.todense())) if args.cuda: adj = adj.cuda() output = model(features[index], adj) labels_test = labels[index]
node_heat_map = np.array(node_heat_map[:mol.GetNumAtoms()]).reshape(-1, 1) pos_node_heat_map = MinMaxScaler(feature_range=(0, 1)).fit_transform( node_heat_map * (node_heat_map >= 0)).reshape(-1, ) neg_node_heat_map = MinMaxScaler(feature_range=(-1, 0)).fit_transform( node_heat_map * (node_heat_map < 0)).reshape(-1, ) return pos_node_heat_map + neg_node_heat_map dataset = load_bbbp(hp.N) random.Random(hp.shuffle_seed).shuffle(dataset) split_idx = int(np.floor(len(dataset) * hp.train_frac)) test_dataset = dataset[split_idx:] loader = DataLoader(test_dataset, batch_size=1, shuffle=False) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = GCN(hp.H_0, hp.H_1, hp.H_2, hp.H_3).to(device) model.load_state_dict(torch.load('gcn_state_dict.pt')) model.eval() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) print(model) model.train() total_loss = 0 for data in tqdm(loader): # breakpoint() data = data.to(device) optimizer.zero_grad() out = model(data) loss = F.binary_cross_entropy(out, data.y) loss.backward() try:
torch.cuda.manual_seed(args.seed) # Load data #adj, features, labels, idx_train, idx_val, idx_test = load_data() adj, A_tilde, adj_sct1, adj_sct2, adj_sct4, features, labels, idx_train, idx_val, idx_test = load_citation( args.dataset, args.normalization, args.cuda) # Model and optimizer model = GCN(nfeat=features.shape[1], para3=args.hid1, para4=args.hid2, nclass=labels.max().item() + 1, dropout=args.dropout, smoo=args.smoo) PATH = "state_dict_model.pt" model.load_state_dict(torch.load(PATH)) if args.cuda: model = model.cuda() features = features.cuda() A_tilde = A_tilde.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=50, gamma=0.9)
parser.add_argument('n_bits', type=int, default=32) args = parser.parse_args() n_anchor = 1000 n_bits = args.n_bits n_class = 21 n_epoch = 10 topk = 15 # dataset: 'cifar10', 'nuswide', 'ImageNet', 'sun' dataset = 'nuswide' dset = load_data(dataset) meta = torch.load('nuswide_2000_32_0.4454_0.5912') anchor = meta['anchor'] gcn = GCN(500, n_bits, n_class, meta['anchor_affnty'], 40) gcn.load_state_dict(meta['state_dict']) gcn.cuda() test_loader = data.DataLoader(dataset=db(dset.testdata, dset.testlabel), batch_size=100, shuffle=False, num_workers=4) tH = [] gcn.eval() for images, _ in test_loader: in_aff, out_aff = rbf_affnty(images, anchor, topk=topk) images = Variable(images).cuda() in_aff = Variable(in_aff).cuda() out_aff = Variable(out_aff).cuda() out, _ = gcn(images, in_aff, out_aff)
normalise = softmax_normalisation #manual entry # Model and optimizer model = GCN(dims=dims, dropout=dropout, adj=adj, nrm_mthd=nrm_mthd, learnable=blearnable, projection=bprojection) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #results on the untrained version of the model. #model(features) #over_smoothing(model.embeddings_dict) checkpoint = torch.load(path + 'model-optimised.pt') model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) print(values.shape) nrm = SphToAdj(indices, model.edge_weights.detach(), size) print('-') print(nrm[torch.where(indices[0] == 2)[0]]) print(nrm[torch.where(indices[0] == 3)[0]]) print(nrm[torch.where(indices[0] == 4)[0]]) print(nrm[torch.where(indices[0] == 5)[0]]) print(nrm[torch.where(indices[0] == 6)[0]]) print(nrm[torch.where(indices[0] == 7)[0]]) print(nrm[torch.where(indices[0] == 8)[0]]) print(nrm[torch.where(indices[0] == 9)[0]])
def train_gcn(dataset, test_ratio=0.5, val_ratio=0.2, seed=1, n_hidden=64, n_epochs=200, lr=1e-2, weight_decay=5e-4, dropout=0.5, use_embs=False, verbose=True, cuda=False): data = dataset.get_data() # train text embs if use_embs: pad_ix, n_tokens, matrix, pretrained_embs = data['features'] if pretrained_embs is not None: pretrained_embs = torch.FloatTensor(pretrained_embs) features = torch.LongTensor(matrix) else: pad_ix = None n_tokens = None pretrained_embs = None features = torch.FloatTensor(data['features']) labels = torch.LongTensor(data['labels']) n = len(data['ids']) train_mask, val_mask, test_mask = get_masks(n, data['main_ids'], data['main_labels'], test_ratio=test_ratio, val_ratio=val_ratio, seed=seed) train_mask = torch.BoolTensor(train_mask) val_mask = torch.BoolTensor(val_mask) test_mask = torch.BoolTensor(test_mask) if cuda: torch.cuda.set_device("cuda:0") features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = DGLGraph(data['graph']) g = dgl.transform.add_self_loop(g) n_edges = g.number_of_edges() degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) if use_embs: if pretrained_embs is not None: in_feats = 100 else: in_feats = 64 else: in_feats = features.shape[1] # + 1 for unknown class n_classes = data['n_classes'] + 1 model = GCN(g, in_feats=in_feats, n_hidden=n_hidden, n_classes=n_classes, activation=F.relu, dropout=dropout, use_embs=use_embs, pretrained_embs=pretrained_embs, pad_ix=pad_ix, n_tokens=n_tokens) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.9, patience=20, min_lr=1e-10) best_f1 = -100 # initialize graph dur = [] for epoch in range(n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward mask_probs = torch.empty(features.shape).uniform_(0, 1) if cuda: mask_probs = mask_probs.cuda() mask_features = torch.where(mask_probs > 0.2, features, torch.zeros_like(features)) logits = model(mask_features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) f1 = evaluate(model, features, labels, val_mask) scheduler.step(1 - f1) if f1 > best_f1: best_f1 = f1 torch.save(model.state_dict(), 'best_model.pt') if verbose: print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | F1 {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), f1, n_edges / np.mean(dur) / 1000)) model.load_state_dict(torch.load('best_model.pt')) f1 = evaluate(model, features, labels, test_mask) if verbose: print() print("Test F1 {:.2}".format(f1)) return f1
loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' % (epoch + 1, n_epoch, i + 1, len(unlabeled_loader), loss.data[0])) torch.save( { 'state_dict': gcn.state_dict(), 'mean_val': mean_val, 'anchor': anchor, 'anchor_affnty': anchor_affnty }, './ImageNet_%d_%d' % (n_labeled, n_bits)) ''' model = torch.load('./ImageNet_%d_%d' % (n_labeled, n_bits)) print model.keys() anchor = model['anchor'] anchor_affnty = model['anchor_affnty'] gcn = GCN(4096, n_bits, 1000, anchor_affnty, 40) gcn.load_state_dict(model['state_dict']) gcn.cuda() ''' traindata, testdata = load_ImageNet_full(mean_val) train_loader = data.DataLoader(dataset=db(traindata, None), batch_size=100, shuffle=False, num_workers=4)
def main(args): # convert boolean type for args assert args.use_ist in ['True', 'False'], ["Only True or False for use_ist, get ", args.use_ist] assert args.split_input in ['True', 'False'], ["Only True or False for split_input, get ", args.split_input] assert args.split_output in ['True', 'False'], ["Only True or False for split_output, get ", args.split_output] assert args.self_loop in ['True', 'False'], ["Only True or False for self_loop, get ", args.self_loop] assert args.use_layernorm in ['True', 'False'], ["Only True or False for use_layernorm, get ", args.use_layernorm] assert args.use_random_proj in ['True', 'False'], ["Only True or False for use_random_proj, get ", args.use_random_proj] use_ist = (args.use_ist == 'True') split_input = (args.split_input == 'True') split_output = (args.split_output == 'True') self_loop = (args.self_loop == 'True') use_layernorm = (args.use_layernorm == 'True') use_random_proj = (args.use_random_proj == 'True') # make sure hidden layer is the correct shape assert (args.n_hidden % args.num_subnet) == 0 # load and preprocess dataset global t0 if args.dataset in {'cora', 'citeseer', 'pubmed'}: data = load_data(args) else: raise NotImplementedError(f'{args.dataset} is not a valid dataset') # randomly project the input to make it dense if use_random_proj: # densify input features with random projection from sklearn import random_projection # make sure input features are divisible by number of subnets # otherwise some parameters of the last subnet will be handled improperly n_components = int(data.features.shape[-1] / args.num_subnet) * args.num_subnet transformer = random_projection.GaussianRandomProjection(n_components=n_components) new_feature = transformer.fit_transform(data.features) features = torch.FloatTensor(new_feature) else: assert (data.features.shape[-1] % args.num_subnet) == 0. features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) # graph preprocess and calculate normalization factor g = data.graph # add self loop if self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) g = g.to(device) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 norm = norm.to(device) g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, use_layernorm) model = model.to(device) loss_fcn = torch.nn.CrossEntropyLoss() # initialize graph dur = [] record = [] sub_models = [] opt_list = [] sub_dict_list = [] main_dict = None for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() if use_ist: model.eval() # IST training: # Distribute parameter to sub networks num_subnet = args.num_subnet if (epoch % args.iter_per_site) == 0.: main_dict = model.state_dict() feats_idx = [] # store all layer indices within a single list # create input partition if split_input: feats_idx.append(torch.chunk(torch.randperm(in_feats), num_subnet)) else: feats_idx.append(None) # create hidden layer partitions for i in range(1, args.n_layers): feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet)) # create output layer partitions if split_output: feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet)) else: feats_idx.append(None) for subnet_id in range(args.num_subnet): if (epoch % args.iter_per_site) == 0.: # create the sub model to train sub_model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, use_layernorm, split_input, split_output, args.num_subnet) sub_model = sub_model.to(device) sub_dict = main_dict.copy() # split input params if split_input: idx = feats_idx[0][subnet_id] sub_dict['layers.0.weight'] = main_dict['layers.0.weight'][idx, :] # split hidden params (and output params) for i in range(1, args.n_layers + 1): if i == args.n_layers and not split_output: pass # params stay the same else: idx = feats_idx[i][subnet_id] sub_dict[f'layers.{i - 1}.weight'] = sub_dict[f'layers.{i -1}.weight'][:, idx] sub_dict[f'layers.{i - 1}.bias'] = main_dict[f'layers.{i - 1}.bias'][idx] sub_dict[f'layers.{i}.weight'] = main_dict[f'layers.{i}.weight'][idx, :] # use a lr scheduler curr_lr = args.lr if epoch >= int(args.n_epochs*0.5): curr_lr /= 10 if epoch >= int(args.n_epochs*0.75): curr_lr /= 10 # import params into subnet for training sub_model.load_state_dict(sub_dict) sub_models.append(sub_model) sub_models = sub_models[-num_subnet:] optimizer = torch.optim.Adam( sub_model.parameters(), lr=curr_lr, weight_decay=args.weight_decay) opt_list.append(optimizer) opt_list = opt_list[-num_subnet:] else: sub_model = sub_models[subnet_id] optimizer = opt_list[subnet_id] # train a sub network optimizer.zero_grad() sub_model.train() if split_input: model_input = features[:, feats_idx[0][subnet_id]] else: model_input = features logits = sub_model(model_input) loss = loss_fcn(logits[train_mask], labels[train_mask]) # reset optimization for every sub training loss.backward() optimizer.step() # save sub model parameter if ( ((epoch + 1) % args.iter_per_site == 0.) or (epoch == args.n_epochs - 1)): sub_dict = sub_model.state_dict() sub_dict_list.append(sub_dict) sub_dict_list = sub_dict_list[-num_subnet:] # Merge parameter to main network: # force aggregation if training about to end if ( ((epoch + 1) % args.iter_per_site == 0.) or (epoch == args.n_epochs - 1)): #keys = main_dict.keys() update_dict = main_dict.copy() # copy in the input parameters if split_input: if args.n_layers <= 1 and not split_output: for idx, sub_dict in zip(feats_idx[0], sub_dict_list): update_dict['layers.0.weight'][idx, :] = sub_dict['layers.0.weight'] else: for i, sub_dict in enumerate(sub_dict_list): curr_idx = feats_idx[0][i] next_idx = feats_idx[1][i] correct_rows = update_dict['layers.0.weight'][curr_idx, :] correct_rows[:, next_idx] = sub_dict['layers.0.weight'] update_dict['layers.0.weight'][curr_idx, :] = correct_rows else: if args.n_layers <= 1 and not split_output: update_dict['layers.0.weight'] = sum(sub_dict['layers.0.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list) else: for i, sub_dict in enumerate(sub_dict_list): next_idx = feats_idx[1][i] update_dict['layers.0.weight'][:, next_idx] = sub_dict['layers.0.weight'] # copy the rest of the parameters for i in range(1, args.n_layers + 1): if i == args.n_layers: if not split_output: update_dict[f'layers.{i-1}.bias'] = sum(sub_dict[f'layers.{i-1}.bias'] for sub_dict in sub_dict_list) / len(sub_dict_list) update_dict[f'layers.{i}.weight'] = sum(sub_dict[f'layers.{i}.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list) else: for idx, sub_dict in zip(feats_idx[i], sub_dict_list): update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias'] update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight'] else: if i >= args.n_layers - 1 and not split_output: for idx, sub_dict in zip(feats_idx[i], sub_dict_list): update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias'] update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight'] else: for idx, sub_dict in enumerate(sub_dict_list): curr_idx = feats_idx[i][idx] next_idx = feats_idx[i+1][idx] update_dict[f'layers.{i-1}.bias'][curr_idx] = sub_dict[f'layers.{i-1}.bias'] correct_rows = update_dict[f'layers.{i}.weight'][curr_idx, :] correct_rows[:, next_idx] = sub_dict[f'layers.{i}.weight'] update_dict[f'layers.{i}.weight'][curr_idx, :] = correct_rows model.load_state_dict(update_dict) else: raise NotImplementedError('Should train with IST') if epoch >= 3: dur.append(time.time() - t0) acc_val = evaluate(model, features, labels, val_mask) acc_test = evaluate(model, features, labels, test_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Val Accuracy {:.4f} | Test Accuracy {:.4f} |" "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc_val, acc_test, n_edges / np.mean(dur) / 1000)) record.append([acc_val, acc_test]) all_test_acc = [v[1] for v in record] all_val_acc = [v[0] for v in record] acc = evaluate(model, features, labels, test_mask) print(f"Final Test Accuracy: {acc:.4f}") print(f"Best Val Accuracy: {max(all_val_acc):.4f}") print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
# Train model t_total = time.time() loss_values = [] bad_counter = 0 best_loss = np.inf best_epoch = 0 for epoch in range(args.epochs): loss_values.append(train(epoch)) if loss_values[-1] < best_loss: best_loss = loss_values[-1] best_epoch = epoch bad_counter = 0 best_state_dict = copy.deepcopy(model.state_dict()) else: bad_counter += 1 if bad_counter == args.patience: break print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Restore best model print('Loading {}th epoch'.format(best_epoch)) model.load_state_dict(best_state_dict) acc_test = test()
'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item())) return loss_val.item(), acc_val.item() def test(): model.eval() output = model(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) stopping_args = Stop_args(patience=args.patience, max_epochs=args.epochs) early_stopping = EarlyStopping(model, **stopping_args) for epoch in range(args.epochs): loss_val, acc_val = train(epoch) if early_stopping.check([acc_val, loss_val], epoch): break print("Optimization Finished!") # Restore best model print('Loading {}th epoch'.format(early_stopping.best_epoch)) model.load_state_dict(early_stopping.best_state) test()
def main(): net = GCN(num_classes=num_classes, input_size=train_args['input_size']).cuda() if len(train_args['snapshot']) == 0: curr_epoch = 0 else: print 'training resumes from ' + train_args['snapshot'] net.load_state_dict( torch.load( os.path.join(ckpt_path, exp_name, train_args['snapshot']))) split_snapshot = train_args['snapshot'].split('_') curr_epoch = int(split_snapshot[1]) train_record['best_val_loss'] = float(split_snapshot[3]) train_record['corr_mean_iu'] = float(split_snapshot[6]) train_record['corr_epoch'] = curr_epoch net.train() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) train_simul_transform = simul_transforms.Compose([ simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)), simul_transforms.RandomCrop(train_args['input_size']), simul_transforms.RandomHorizontallyFlip() ]) val_simul_transform = simul_transforms.Compose([ simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)), simul_transforms.CenterCrop(train_args['input_size']) ]) img_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = standard_transforms.Compose([ expanded_transforms.MaskToTensor(), expanded_transforms.ChangeLabel(ignored_label, num_classes - 1) ]) restore_transform = standard_transforms.Compose([ expanded_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage() ]) train_set = CityScapes('train', simul_transform=train_simul_transform, transform=img_transform, target_transform=target_transform) train_loader = DataLoader(train_set, batch_size=train_args['batch_size'], num_workers=16, shuffle=True) val_set = CityScapes('val', simul_transform=val_simul_transform, transform=img_transform, target_transform=target_transform) val_loader = DataLoader(val_set, batch_size=val_args['batch_size'], num_workers=16, shuffle=False) weight = torch.ones(num_classes) weight[num_classes - 1] = 0 criterion = CrossEntropyLoss2d(weight).cuda() # don't use weight_decay for bias optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' and ('gcm' in name or 'brm' in name) ], 'lr': 2 * train_args['new_lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' and ('gcm' in name or 'brm' in name) ], 'lr': train_args['new_lr'], 'weight_decay': train_args['weight_decay'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' and not ('gcm' in name or 'brm' in name) ], 'lr': 2 * train_args['pretrained_lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' and not ('gcm' in name or 'brm' in name) ], 'lr': train_args['pretrained_lr'], 'weight_decay': train_args['weight_decay'] }], momentum=0.9, nesterov=True) if len(train_args['snapshot']) > 0: optimizer.load_state_dict( torch.load( os.path.join(ckpt_path, exp_name, 'opt_' + train_args['snapshot']))) optimizer.param_groups[0]['lr'] = 2 * train_args['new_lr'] optimizer.param_groups[1]['lr'] = train_args['new_lr'] optimizer.param_groups[2]['lr'] = 2 * train_args['pretrained_lr'] optimizer.param_groups[3]['lr'] = train_args['pretrained_lr'] if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) if not os.path.exists(os.path.join(ckpt_path, exp_name)): os.mkdir(os.path.join(ckpt_path, exp_name)) for epoch in range(curr_epoch, train_args['epoch_num']): train(train_loader, net, criterion, optimizer, epoch) validate(val_loader, net, criterion, optimizer, epoch, restore_transform)
acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return acc_test model_file = 'model_save/' + args.dataset + '.pkl' # Train model t_total = time.time() max_acc = 0 acc_list = [] for epoch in range(args.epochs): val_acc = train(epoch) if val_acc > max_acc: max_acc = val_acc torch.save(model.state_dict(), model_file) acc_list.append(val_acc) if args.load_best: model.load_state_dict(torch.load(model_file)) print(max(acc_list)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Testing acc_test = test() if len(args.save_file) > 0: with open(args.save_file, 'a') as f: f.write('GCN %.4f' % acc_test) f.write('\n')
vecs = [] cnt += sum(res) for j in res: print(int(j), file=f) print("") print(cnt, "/", len(test_data)) #loss_test = F.nll_loss(output[idx_test], labels[idx_test]) #acc_test = accuracy(output[idx_test], labels[idx_test]) #print("Test set results:", # "loss= {:.4f}".format(loss_test.item()), # "accuracy= {:.4f}".format(acc_test.item())) # Train model t_total = time.time() if args.train: for epoch in range(args.epochs): train(epoch) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) torch.save(model.state_dict(), 'model.mdl') else: model.load_state_dict(torch.load('model.mdl')) # Extract Embedding #emb = torch.nn.Sequential(*list(model.children())[:-1]) # Testing test()