def main(): from pre_process import preprocess feature, a_hat, labels = preprocess() print("loaded") selected, unselected = depart(len(labels), 1 - Config.test_ratio) labels_selected = labels[selected] labels_unselected = labels[unselected] feature = torch.from_numpy(feature).float().cuda() tensor_selected = torch.tensor(labels_selected).long().cuda() a_hat = torch.tensor(a_hat).float().cuda() net = GCN(a_hat, feature.shape[1], Config.num_classes, Config.hidden_size, Config.n_hidden_layer).cuda() print(net) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=Config.lr) net.train() for e in range(Config.num_epochs): optimizer.zero_grad() output = net(feature) loss = criterion(output[selected], tensor_selected) loss.backward() optimizer.step() trained_accuracy = evaluate(output[selected], labels_selected) untrained_accuracy = evaluate(output[unselected], labels_unselected) print( "[Epoch %d]: trained acc: %.7f, untrained acc: %.7f, loss: %.7f" % (e, trained_accuracy, untrained_accuracy, loss.detach().cpu().numpy()))
def main(): # Load data start = time.time() N, _adj, _feats, _labels, train_adj, train_feats, train_nodes, val_nodes, test_nodes, y_train, y_val, y_test, val_mask, test_mask = utils.load_data(args.dataset) print('Loaded data in {:.2f} seconds!'.format(time.time() - start)) # Prepare Train Data start = time.time() _, parts = utils.partition_graph(train_adj, train_nodes, args.num_clusters_train) parts = [np.array(pt) for pt in parts] train_features, train_support, y_train = utils.preprocess_multicluster(train_adj, parts, train_feats, y_train, args.num_clusters_train, args.batch_size) print('Train Data pre-processed in {:.2f} seconds!'.format(time.time() - start)) # Prepare Test Data if args.test == 1: y_test, test_mask = y_val, val_mask start = time.time() _, test_features, test_support, y_test, test_mask = utils.preprocess(_adj, _feats, y_test, np.arange(N), args.num_clusters_test, test_mask) print('Test Data pre-processed in {:.2f} seconds!'.format(time.time() - start)) # Shuffle Batches batch_idxs = list(range(len(train_features))) # model model = GCN(fan_in=_in, fan_out=_out, layers=args.layers, dropout=args.dropout, normalize=True, bias=False).float() model.cuda() # Loss Function criterion = torch.nn.CrossEntropyLoss() # Optimization Algorithm optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # Learning Rate Schedule scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=args.lr, steps_per_epoch=int(args.num_clusters_train/args.batch_size), epochs=args.epochs+1, anneal_strategy='linear') model.train() # Train for epoch in range(args.epochs + 1): np.random.shuffle(batch_idxs) avg_loss = 0 start = time.time() for batch in batch_idxs: loss = train(model.cuda(), criterion, optimizer, train_features[batch], train_support[batch], y_train[batch], dataset=args.dataset) if args.lr_scheduler == 1: scheduler.step() avg_loss += loss.item() # Write Train stats to tensorboard writer.add_scalar('time/train', time.time() - start, epoch) writer.add_scalar('loss/train', avg_loss/len(train_features), epoch) if args.test == 1: # Test on cpu f1 = test(model.cpu(), test_features, test_support, y_test, test_mask, device='cpu') print('f1: {:.4f}'.format(f1))
def test_memorize_minibatch(self): for db_name in self.db_names: db_info = get_db_info(db_name) train_data, val_data, _ = get_train_val_test_datasets( dataset_name=db_name, train_test_split='use_full_train', encoders=dict(CATEGORICAL='CategoricalOrdinalEnc', SCALAR='ScalarRobustScalerEnc', DATETIME='DatetimeScalarEnc', LATLONG='LatLongScalarEnc', TEXT='TextSummaryScalarEnc'), ) train_loader = get_dataloader( dataset=train_data, batch_size=256, sampler_class_name='SequentialSampler', num_workers=0, max_nodes_per_graph=False) writer = DummyWriter() model = GCN(writer, db_info=db_info, hidden_dim=256, n_init_layers=3, activation_class_name='SELU', activation_class_kwargs={}, loss_class_kwargs={}, loss_class_name='CrossEntropyLoss', p_dropout=0.0, drop_whole_embeddings=True, n_layers=3, readout_class_name='AvgPooling', readout_kwargs={}) if torch.cuda.is_available(): model.cuda() model.device = torch.device('cuda:0') else: model.device = torch.device('cpu') model.train() optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.0) bdgl, features, label = next(iter(train_loader)) recursive_to((bdgl, features, label), model.device) for _ in tqdm(range(200)): optimizer.zero_grad() output = model(bdgl, features) loss = model.loss_fxn(output, label) if loss < 1e-4: break loss.backward() optimizer.step() else: tqdm.write(f'Loss: {loss}') self.fail("Didn't memorize minibatch")
def execute(params, budget=None, max_epoch=243, device='cpu', seed=42): np.random.seed(seed) torch.manual_seed(seed) if device == "cuda": torch.cuda.manual_seed(seed) # Load data if params['dataset'] == "cora": adj, features, labels, idx_train, idx_val, idx_test = load_data( dataset=params['dataset'], train_percent=0.052) if params['dataset'] == "citeseer": adj, features, labels, idx_train, idx_val, idx_test = load_citeseer( train_percent=0.036) # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=params['hidden'], nclass=labels.max().item() + 1, dropout=params['dropout']) optimizer = optim.Adam(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay']) if device == "cuda": model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() # train model if device == "cuda": start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) start.record() else: t1 = time.time_ns() model.train() num_epoch = int(budget) if budget != None else max_epoch for epoch in range(num_epoch): optimizer.zero_grad() output = model(features, adj) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() # evaluation model.eval() output = model(features, adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) if device == "cuda": end.record() torch.cuda.synchronize() total_time = start.elapsed_time(end) / 1e3 sys.stdout.flush() acc_val = acc_val.item() else: t2 = time.time_ns() total_time = (t2 - t1) / 1e9 print() print( f"dataset={params['dataset']}, num_epoch={num_epoch}, device={next(model.parameters()).device}" ) print("Validation results:", "loss= {:.4f}".format(loss_val.item()), "accuracy= {:.4f}".format(acc_val)) print("Total training time: {:.4f} sec".format(total_time)) return 1 - acc_val
adj = adj.to(device) t = time.time() output = model(features, adj) loss_train = criterion(output, batch_labels) acc_train = accuracy(output, batch_labels) optimizer.zero_grad() loss_train.backward() optimizer.step() loss_val = F.nll_loss(output, batch_labels) acc_val = accuracy(output, batch_labels) model.train() print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item()), 'time: {:.4f}s'.format(time.time() - t)) # Train model t_total = time.time() print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - start_time)) ##accuracy check
def main(): # Make dir temp = "./tmp" os.makedirs(temp, exist_ok=True) # Load data start = time.time() (train_adj, full_adj, train_feats, test_feats, y_train, y_val, y_test, train_mask, val_mask, test_mask, _, val_nodes, test_nodes, num_data, visible_data) = utils.load_data(args.dataset) print('Loaded data in {:.2f} seconds!'.format(time.time() - start)) start = time.time() # Prepare Train Data if args.batch_size > 1: start = time.time() _, parts = utils.partition_graph(train_adj, visible_data, args.num_clusters_train) print('Partition graph in {:.2f} seconds!'.format(time.time() - start)) parts = [np.array(pt) for pt in parts] else: start = time.time() (parts, features_batches, support_batches, y_train_batches, train_mask_batches) = utils.preprocess( train_adj, train_feats, y_train, train_mask, visible_data, args.num_clusters_train, diag_lambda=args.diag_lambda) print('Partition graph in {:.2f} seconds!'.format(time.time() - start)) # Prepare valid Data start = time.time() (_, val_features_batches, val_support_batches, y_val_batches, val_mask_batches) = utils.preprocess( full_adj, test_feats, y_val, val_mask, np.arange(num_data), args.num_clusters_val, diag_lambda=args.diag_lambda) print('Partition graph in {:.2f} seconds!'.format(time.time() - start)) # Prepare Test Data start = time.time() (_, test_features_batches, test_support_batches, y_test_batches, test_mask_batches) = utils.preprocess( full_adj, test_feats, y_test, test_mask, np.arange(num_data), args.num_clusters_test, diag_lambda=args.diag_lambda) print('Partition graph in {:.2f} seconds!'.format(time.time() - start)) idx_parts = list(range(len(parts))) # model model = GCN( fan_in=_in, fan_out=_out, layers=args.layers, dropout=args.dropout, normalize=True, bias=False, precalc=True).float() model.to(torch.device('cuda')) print(model) # Loss Function if args.multilabel: criterion = torch.nn.BCEWithLogitsLoss() else: criterion = torch.nn.CrossEntropyLoss() # Optimization Algorithm optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # Learning Rate Schedule # scheduler = torch.optim.lr_scheduler.OneCycleLR( # optimizer, max_lr=args.lr, steps_per_epoch=int(args.num_clusters_train/args.batch_size), epochs=args.epochs+1, # anneal_strategy='linear') pbar = tqdm.tqdm(total=args.epochs, dynamic_ncols=True) for epoch in range(args.epochs + 1): # Train np.random.shuffle(idx_parts) start = time.time() avg_loss = 0 total_correct = 0 n_nodes = 0 if args.batch_size > 1: (features_batches, support_batches, y_train_batches, train_mask_batches) = utils.preprocess_multicluster( train_adj, parts, train_feats, y_train, train_mask, args.num_clusters_train, args.batch_size, args.diag_lambda) for pid in range(len(features_batches)): # Use preprocessed batch data features_b = features_batches[pid] support_b = support_batches[pid] y_train_b = y_train_batches[pid] train_mask_b = train_mask_batches[pid] loss, pred, labels = train( model.train(), criterion, optimizer, features_b, support_b, y_train_b, train_mask_b, torch.device('cuda')) avg_loss += loss n_nodes += pred.squeeze().numel() total_correct += torch.eq(pred.squeeze(), labels.squeeze()).sum().item() else: np.random.shuffle(idx_parts) for pid in idx_parts: # use preprocessed batch data features_b = features_batches[pid] support_b = support_batches[pid] y_train_b = y_train_batches[pid] train_mask_b = train_mask_batches[pid] loss, pred, labels = train( model.train(), criterion, optimizer, features_b, support_b, y_train_b, train_mask_b, torch.device('cuda')) avg_loss = loss.item() n_nodes += pred.squeeze().numel() total_correct += torch.eq(pred.squeeze(), labels.squeeze()).sum().item() train_acc = total_correct / n_nodes # Write Train stats to tensorboard writer.add_scalar('time/train', time.time() - start, epoch) writer.add_scalar('loss/train', avg_loss/len(features_batches), epoch) writer.add_scalar('acc/train', train_acc, epoch) # Validation cost, acc, micro, macro = evaluate( model.eval(), criterion, val_features_batches, val_support_batches, y_val_batches, val_mask_batches, val_nodes, torch.device("cuda")) # Write Valid stats to tensorboard writer.add_scalar('acc/valid', acc, epoch) writer.add_scalar('mi_F1/valid', micro, epoch) writer.add_scalar('ma_F1/valid', macro, epoch) writer.add_scalar('loss/valid', cost, epoch) pbar.set_postfix({"t": avg_loss/len(features_batches),"t_acc": train_acc, "v": cost, "v_acc": acc}) pbar.update() pbar.close() # Test if args.test == 1: # Test on cpu cost, acc, micro, macro = test( model.eval(), criterion, test_features_batches, test_support_batches, y_test_batches, test_mask_batches, torch.device("cpu")) writer.add_scalar('acc/test', acc, epoch) writer.add_scalar('mi_F1/test', micro, epoch) writer.add_scalar('ma_F1/test', macro, epoch) writer.add_scalar('loss/test', cost, epoch) print('test: acc: {:.4f}'.format(acc)) print('test: mi_f1: {:.4f}, ma_f1: {:.4f}'.format(micro, macro))
def train(**kwargs): """ GCN training --- - the folder you need: - args.path4AffGraph - args.path4node_feat - path4partial_label - these folder would be created: - data/GCN_prediction/label - data/GCN_prediction/logit """ # os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, [0, 1, 2, 3])) t_start = time.time() # 根据命令行参数更新配置 args.parse(**kwargs) # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda:" + str(kwargs["GPU"])) print(device) # 把有改動的參數寫到tensorboard名稱上 if kwargs["debug"] is False: comment_init = '' for k, v in kwargs.items(): comment_init += '|{} '.format(v) writer = SummaryWriter(comment=comment_init) # === set evaluate object for evaluate later IoU = IOUMetric(args.num_class) IoU_CRF = IOUMetric(args.num_class) # === dataset train_dataloader = graph_voc(start_idx=kwargs["start_index"], end_idx=kwargs["end_index"], device=device) # === for each image, do training and testing in the same graph # for ii, (adj_t, features_t, labels_t, rgbxy_t, img_name, label_fg_t, # label_bg_t) in enumerate(train_dataloader): t4epoch = time.time() for ii, data in enumerate(train_dataloader): if data is None: continue # === use RGBXY as feature # if args.use_RGBXY: # data["rgbxy_t"] = normalize_rgbxy(data["rgbxy_t"]) # features_t = data["rgbxy_t"].clone() # === only RGB as feature t_be = time.time() if args.use_lap: """ is constructing................ """ H, W, C = data["rgbxy_t"].shape A = torch.zeros([H * W, H * W], dtype=torch.float64) def find_neibor(card_x, card_y, H, W, radius=2): """ Return idx of neibors of (x,y) in list --- """ neibors_idx = [] for idx_x in np.arange(card_x - radius, card_x + radius + 1): for idx_y in np.arange(card_y - radius, card_y + radius + 1): if (-radius < idx_x < H) and (-radius < idx_y < W): neibors_idx.append( (idx_x * W + idx_y, idx_x, idx_y)) return neibors_idx t_start = time.time() t_start = t4epoch neibors = dict() for node_idx in range(H * W): card_x, card_y = node_idx // W, node_idx % W neibors = find_neibor(card_x, card_y, H, W, radius=1) # print("H:{} W:{} | {} -> ({},{})".format( # H, W, node_idx, card_x, card_y)) for nei in neibors: # print("nei: ", nei) diff_rgb = data["rgbxy_t"][ card_x, card_y, :3] - data["rgbxy_t"][nei[1], nei[2], :3] diff_xy = data["rgbxy_t"][card_x, card_y, 3:] - data["rgbxy_t"][nei[1], nei[2], 3:] A[node_idx, nei[0]] = torch.exp( -torch.pow(torch.norm(diff_rgb), 2) / (2. * args.CRF_deeplab["bi_rgb_std"])) + torch.exp( -torch.pow(torch.norm(diff_xy), 2) / (2. * args.CRF_deeplab["bi_xy_std"])) # print("{:3.1f}s".format(time.time() - t_start)) D = torch.diag(A.sum(dim=1)) L_mat = D - A print("time for Laplacian {:3f} s".format(time.time() - t_be)) # === Model and optimizer img_label = load_image_label_from_xml(img_name=data["img_name"], voc12_root=args.path4VOC_root) img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1] num_class = np.max(img_class) + 1 # debug("num_class: {} {}".format(num_class + 1, type(num_class + 1)), # line=290) model = GCN( nfeat=data["features_t"].shape[1], nhid=args.num_hid_unit, # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # image label don't have BG # adaptive num_class should have better performance nclass=args.num_class, # args.num_class| num_class # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> dropout=args.drop_rate) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # ==== moving tensor to GPU if args.cuda: model.to(device) data["features_t"] = data["features_t"].to(device) data["adj_t"] = data["adj_t"].to(device) data["labels_t"] = data["labels_t"].to(device) data["label_fg_t"] = data["label_fg_t"].to(device) data["label_bg_t"] = data["label_bg_t"].to(device) # L_mat = L_mat.to(device) # === save the prediction before training if args.save_mask_before_train: model.eval() postprocess_image_save(img_name=data["img_name"], model_output=model(data["features_t"], data["adj_t"]).detach(), epoch=0) # ==== Train model # t4epoch = time.time() criterion_ent = HLoss() # criterion_sym = symmetricLoss() for epoch in range(args.max_epoch): model.train() optimizer.zero_grad() output = model(data["features_t"], data["adj_t"]) # === seperate FB/BG label loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255) loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255) # F.log_softmax(label_fg_t, dim=1) # loss_sym = criterion_sym(output, labels_t, ignore_index=255) loss = loss_fg + loss_bg if args.use_ent: loss_entmin = criterion_ent(output, data["labels_t"], ignore_index=255) loss += 10. * loss_entmin if args.use_lap: loss_lap = torch.trace( torch.mm(output.transpose(1, 0), torch.mm(L_mat.type_as(output), output))) / (H * W) gamma = 1e-2 loss += gamma * loss_lap # loss = F.nll_loss(output, labels_t, ignore_index=255) if loss is None: print("skip this image: ", data["img_name"]) break # === for normalize cut # lamda = args.lamda # n_cut = 0. # if args.use_regular_NCut: # W = gaussian_propagator(output) # d = torch.sum(W, dim=1) # for k in range(output.shape[1]): # s = output[idx_test_t, k] # n_cut = n_cut + torch.mm( # torch.mm(torch.unsqueeze(s, 0), W), # torch.unsqueeze(1 - s, 1)) / (torch.dot(d, s)) # === calculus loss & updated parameters # loss_train = loss.cuda() + lamda * n_cut loss_train = loss.cuda() loss_train.backward() optimizer.step() # === save predcit mask at max epoch & IoU of img if (epoch + 1) % args.max_epoch == 0 and args.save_mask: t_now = time.time() if not kwargs["debug"]: evaluate_IoU(model=model, features=data["features_t"], adj=data["adj_t"], img_name=data["img_name"], epoch=args.max_epoch, img_idx=ii + 1, writer=writer, IoU=IoU, IoU_CRF=IoU_CRF, use_CRF=False, save_prediction_np=True) print("[{}/{}] time: {:.4f}s\n\n".format( ii + 1, len(train_dataloader), t_now - t4epoch)) t4epoch = t_now # end for epoch # print( # "loss: {} | loss_fg: {} | loss_bg:{} | loss_entmin: {} | loss_lap: {}" # .format(loss.data.item(), loss_fg.data.item(), loss_bg.data.item(), # loss_entmin.data.item(), loss_lap.data.item())) # end for dataloader if kwargs["debug"] is False: writer.close() print("training was Finished!") print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format( (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60, (time.time() - t_start) % 60))
def gcn_train(**kwargs): """ GCN training --- - the folder you need: - args.path4AffGraph - args.path4node_feat - path4partial_label - these folder would be created: - data/GCN4DeepLab/Label - data/GCN4DeepLab/Logit """ t_start = time.time() # update config args.parse(**kwargs) device = torch.device("cuda:" + str(kwargs["GPU"])) print(device) # tensorboard if args.use_TB: time_now = datetime.datetime.today() time_now = "{}-{}-{}|{}-{}".format(time_now.year, time_now.month, time_now.day, time_now.hour, time_now.minute // 30) keys_ignore = ["start_index", "GPU"] comment_init = '' for k, v in kwargs.items(): if k not in keys_ignore: comment_init += '|{} '.format(v) writer = SummaryWriter( logdir='runs/{}/{}'.format(time_now, comment_init)) # initial IoUMetric object for evaluation IoU = IOUMetric(args.num_class) # initial dataset train_dataloader = graph_voc(start_idx=kwargs["start_index"], end_idx=kwargs["end_index"], device=device) # train a seperate GCN for each image t4epoch = time.time() for ii, data in enumerate(train_dataloader): if data is None: continue img_label = load_image_label_from_xml(img_name=data["img_name"], voc12_root=args.path4VOC_root) img_class = [idx + 1 for idx, f in enumerate(img_label) if int(f) == 1] num_class = np.max(img_class) + 1 model = GCN(nfeat=data["features_t"].shape[1], nhid=args.num_hid_unit, nclass=args.num_class, dropout=args.drop_rate) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # put data into GPU if args.cuda: model.to(device) data["features_t"] = data["features_t"].to(device) data["adj_t"] = data["adj_t"].to(device) data["labels_t"] = data["labels_t"].to(device) data["label_fg_t"] = data["label_fg_t"].to(device) data["label_bg_t"] = data["label_bg_t"].to(device) t_be = time.time() H, W, C = data["rgbxy_t"].shape N = H * W # laplacian if args.use_lap: L_mat = compute_lap_test(data, device, radius=2).to(device) print("Time for laplacian {:3.1f} s".format(time.time() - t_be)) criterion_ent = HLoss() for epoch in range(args.max_epoch): model.train() optimizer.zero_grad() output = model(data["features_t"], data["adj_t"]) # foreground and background loss loss_fg = F.nll_loss(output, data["label_fg_t"], ignore_index=255) loss_bg = F.nll_loss(output, data["label_bg_t"], ignore_index=255) loss = loss_fg + loss_bg if args.use_ent: loss_entmin = criterion_ent(output, data["labels_t"], ignore_index=255) loss += 10. * loss_entmin if args.use_lap: loss_lap = torch.trace( torch.mm(output.transpose(1, 0), torch.mm(L_mat.type_as(output), output))) / N gamma = 1e-2 loss += gamma * loss_lap if loss is None: print("skip this image: ", data["img_name"]) break loss_train = loss.cuda() loss_train.backward() optimizer.step() # save predicted mask and IoU at max epoch if (epoch + 1) % args.max_epoch == 0 and args.save_mask: t_now = time.time() evaluate_IoU(model=model, features=data["features_t"], adj=data["adj_t"], img_name=data["img_name"], img_idx=ii + 1, writer=writer, IoU=IoU, save_prediction_np=True) print("evaluate time: {:3.1f} s".format(time.time() - t_now)) print("[{}/{}] time: {:.1f}s\n\n".format( ii + 1, len(train_dataloader), t_now - t4epoch)) t4epoch = t_now print("======================================") if writer is not None: writer.close() print("training was Finished!") print("Total time elapsed: {:.0f} h {:.0f} m {:.0f} s\n".format( (time.time() - t_start) // 3600, (time.time() - t_start) / 60 % 60, (time.time() - t_start) % 60))
def train_gcn(dataset, test_ratio=0.5, val_ratio=0.2, seed=1, n_hidden=64, n_epochs=200, lr=1e-2, weight_decay=5e-4, dropout=0.5, use_embs=False, verbose=True, cuda=False): data = dataset.get_data() # train text embs if use_embs: pad_ix, n_tokens, matrix, pretrained_embs = data['features'] if pretrained_embs is not None: pretrained_embs = torch.FloatTensor(pretrained_embs) features = torch.LongTensor(matrix) else: pad_ix = None n_tokens = None pretrained_embs = None features = torch.FloatTensor(data['features']) labels = torch.LongTensor(data['labels']) n = len(data['ids']) train_mask, val_mask, test_mask = get_masks(n, data['main_ids'], data['main_labels'], test_ratio=test_ratio, val_ratio=val_ratio, seed=seed) train_mask = torch.BoolTensor(train_mask) val_mask = torch.BoolTensor(val_mask) test_mask = torch.BoolTensor(test_mask) if cuda: torch.cuda.set_device("cuda:0") features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = DGLGraph(data['graph']) g = dgl.transform.add_self_loop(g) n_edges = g.number_of_edges() degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) if use_embs: if pretrained_embs is not None: in_feats = 100 else: in_feats = 64 else: in_feats = features.shape[1] # + 1 for unknown class n_classes = data['n_classes'] + 1 model = GCN(g, in_feats=in_feats, n_hidden=n_hidden, n_classes=n_classes, activation=F.relu, dropout=dropout, use_embs=use_embs, pretrained_embs=pretrained_embs, pad_ix=pad_ix, n_tokens=n_tokens) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.9, patience=20, min_lr=1e-10) best_f1 = -100 # initialize graph dur = [] for epoch in range(n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward mask_probs = torch.empty(features.shape).uniform_(0, 1) if cuda: mask_probs = mask_probs.cuda() mask_features = torch.where(mask_probs > 0.2, features, torch.zeros_like(features)) logits = model(mask_features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) f1 = evaluate(model, features, labels, val_mask) scheduler.step(1 - f1) if f1 > best_f1: best_f1 = f1 torch.save(model.state_dict(), 'best_model.pt') if verbose: print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | F1 {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), f1, n_edges / np.mean(dur) / 1000)) model.load_state_dict(torch.load('best_model.pt')) f1 = evaluate(model, features, labels, test_mask) if verbose: print() print("Test F1 {:.2}".format(f1)) return f1
def main(args): # convert boolean type for args assert args.use_ist in ['True', 'False'], ["Only True or False for use_ist, get ", args.use_ist] assert args.split_input in ['True', 'False'], ["Only True or False for split_input, get ", args.split_input] assert args.split_output in ['True', 'False'], ["Only True or False for split_output, get ", args.split_output] assert args.self_loop in ['True', 'False'], ["Only True or False for self_loop, get ", args.self_loop] assert args.use_layernorm in ['True', 'False'], ["Only True or False for use_layernorm, get ", args.use_layernorm] assert args.use_random_proj in ['True', 'False'], ["Only True or False for use_random_proj, get ", args.use_random_proj] use_ist = (args.use_ist == 'True') split_input = (args.split_input == 'True') split_output = (args.split_output == 'True') self_loop = (args.self_loop == 'True') use_layernorm = (args.use_layernorm == 'True') use_random_proj = (args.use_random_proj == 'True') # make sure hidden layer is the correct shape assert (args.n_hidden % args.num_subnet) == 0 # load and preprocess dataset global t0 if args.dataset in {'cora', 'citeseer', 'pubmed'}: data = load_data(args) else: raise NotImplementedError(f'{args.dataset} is not a valid dataset') # randomly project the input to make it dense if use_random_proj: # densify input features with random projection from sklearn import random_projection # make sure input features are divisible by number of subnets # otherwise some parameters of the last subnet will be handled improperly n_components = int(data.features.shape[-1] / args.num_subnet) * args.num_subnet transformer = random_projection.GaussianRandomProjection(n_components=n_components) new_feature = transformer.fit_transform(data.features) features = torch.FloatTensor(new_feature) else: assert (data.features.shape[-1] % args.num_subnet) == 0. features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) # graph preprocess and calculate normalization factor g = data.graph # add self loop if self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) g = g.to(device) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 norm = norm.to(device) g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, use_layernorm) model = model.to(device) loss_fcn = torch.nn.CrossEntropyLoss() # initialize graph dur = [] record = [] sub_models = [] opt_list = [] sub_dict_list = [] main_dict = None for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() if use_ist: model.eval() # IST training: # Distribute parameter to sub networks num_subnet = args.num_subnet if (epoch % args.iter_per_site) == 0.: main_dict = model.state_dict() feats_idx = [] # store all layer indices within a single list # create input partition if split_input: feats_idx.append(torch.chunk(torch.randperm(in_feats), num_subnet)) else: feats_idx.append(None) # create hidden layer partitions for i in range(1, args.n_layers): feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet)) # create output layer partitions if split_output: feats_idx.append(torch.chunk(torch.randperm(args.n_hidden), num_subnet)) else: feats_idx.append(None) for subnet_id in range(args.num_subnet): if (epoch % args.iter_per_site) == 0.: # create the sub model to train sub_model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, use_layernorm, split_input, split_output, args.num_subnet) sub_model = sub_model.to(device) sub_dict = main_dict.copy() # split input params if split_input: idx = feats_idx[0][subnet_id] sub_dict['layers.0.weight'] = main_dict['layers.0.weight'][idx, :] # split hidden params (and output params) for i in range(1, args.n_layers + 1): if i == args.n_layers and not split_output: pass # params stay the same else: idx = feats_idx[i][subnet_id] sub_dict[f'layers.{i - 1}.weight'] = sub_dict[f'layers.{i -1}.weight'][:, idx] sub_dict[f'layers.{i - 1}.bias'] = main_dict[f'layers.{i - 1}.bias'][idx] sub_dict[f'layers.{i}.weight'] = main_dict[f'layers.{i}.weight'][idx, :] # use a lr scheduler curr_lr = args.lr if epoch >= int(args.n_epochs*0.5): curr_lr /= 10 if epoch >= int(args.n_epochs*0.75): curr_lr /= 10 # import params into subnet for training sub_model.load_state_dict(sub_dict) sub_models.append(sub_model) sub_models = sub_models[-num_subnet:] optimizer = torch.optim.Adam( sub_model.parameters(), lr=curr_lr, weight_decay=args.weight_decay) opt_list.append(optimizer) opt_list = opt_list[-num_subnet:] else: sub_model = sub_models[subnet_id] optimizer = opt_list[subnet_id] # train a sub network optimizer.zero_grad() sub_model.train() if split_input: model_input = features[:, feats_idx[0][subnet_id]] else: model_input = features logits = sub_model(model_input) loss = loss_fcn(logits[train_mask], labels[train_mask]) # reset optimization for every sub training loss.backward() optimizer.step() # save sub model parameter if ( ((epoch + 1) % args.iter_per_site == 0.) or (epoch == args.n_epochs - 1)): sub_dict = sub_model.state_dict() sub_dict_list.append(sub_dict) sub_dict_list = sub_dict_list[-num_subnet:] # Merge parameter to main network: # force aggregation if training about to end if ( ((epoch + 1) % args.iter_per_site == 0.) or (epoch == args.n_epochs - 1)): #keys = main_dict.keys() update_dict = main_dict.copy() # copy in the input parameters if split_input: if args.n_layers <= 1 and not split_output: for idx, sub_dict in zip(feats_idx[0], sub_dict_list): update_dict['layers.0.weight'][idx, :] = sub_dict['layers.0.weight'] else: for i, sub_dict in enumerate(sub_dict_list): curr_idx = feats_idx[0][i] next_idx = feats_idx[1][i] correct_rows = update_dict['layers.0.weight'][curr_idx, :] correct_rows[:, next_idx] = sub_dict['layers.0.weight'] update_dict['layers.0.weight'][curr_idx, :] = correct_rows else: if args.n_layers <= 1 and not split_output: update_dict['layers.0.weight'] = sum(sub_dict['layers.0.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list) else: for i, sub_dict in enumerate(sub_dict_list): next_idx = feats_idx[1][i] update_dict['layers.0.weight'][:, next_idx] = sub_dict['layers.0.weight'] # copy the rest of the parameters for i in range(1, args.n_layers + 1): if i == args.n_layers: if not split_output: update_dict[f'layers.{i-1}.bias'] = sum(sub_dict[f'layers.{i-1}.bias'] for sub_dict in sub_dict_list) / len(sub_dict_list) update_dict[f'layers.{i}.weight'] = sum(sub_dict[f'layers.{i}.weight'] for sub_dict in sub_dict_list) / len(sub_dict_list) else: for idx, sub_dict in zip(feats_idx[i], sub_dict_list): update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias'] update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight'] else: if i >= args.n_layers - 1 and not split_output: for idx, sub_dict in zip(feats_idx[i], sub_dict_list): update_dict[f'layers.{i-1}.bias'][idx] = sub_dict[f'layers.{i-1}.bias'] update_dict[f'layers.{i}.weight'][idx, :] = sub_dict[f'layers.{i}.weight'] else: for idx, sub_dict in enumerate(sub_dict_list): curr_idx = feats_idx[i][idx] next_idx = feats_idx[i+1][idx] update_dict[f'layers.{i-1}.bias'][curr_idx] = sub_dict[f'layers.{i-1}.bias'] correct_rows = update_dict[f'layers.{i}.weight'][curr_idx, :] correct_rows[:, next_idx] = sub_dict[f'layers.{i}.weight'] update_dict[f'layers.{i}.weight'][curr_idx, :] = correct_rows model.load_state_dict(update_dict) else: raise NotImplementedError('Should train with IST') if epoch >= 3: dur.append(time.time() - t0) acc_val = evaluate(model, features, labels, val_mask) acc_test = evaluate(model, features, labels, test_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Val Accuracy {:.4f} | Test Accuracy {:.4f} |" "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc_val, acc_test, n_edges / np.mean(dur) / 1000)) record.append([acc_val, acc_test]) all_test_acc = [v[1] for v in record] all_val_acc = [v[0] for v in record] acc = evaluate(model, features, labels, test_mask) print(f"Final Test Accuracy: {acc:.4f}") print(f"Best Val Accuracy: {max(all_val_acc):.4f}") print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
def main(): net = GCN(num_classes=num_classes, input_size=train_args['input_size']).cuda() if len(train_args['snapshot']) == 0: curr_epoch = 0 else: print 'training resumes from ' + train_args['snapshot'] net.load_state_dict( torch.load( os.path.join(ckpt_path, exp_name, train_args['snapshot']))) split_snapshot = train_args['snapshot'].split('_') curr_epoch = int(split_snapshot[1]) train_record['best_val_loss'] = float(split_snapshot[3]) train_record['corr_mean_iu'] = float(split_snapshot[6]) train_record['corr_epoch'] = curr_epoch net.train() mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) train_simul_transform = simul_transforms.Compose([ simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)), simul_transforms.RandomCrop(train_args['input_size']), simul_transforms.RandomHorizontallyFlip() ]) val_simul_transform = simul_transforms.Compose([ simul_transforms.Scale(int(train_args['input_size'][0] / 0.875)), simul_transforms.CenterCrop(train_args['input_size']) ]) img_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = standard_transforms.Compose([ expanded_transforms.MaskToTensor(), expanded_transforms.ChangeLabel(ignored_label, num_classes - 1) ]) restore_transform = standard_transforms.Compose([ expanded_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage() ]) train_set = CityScapes('train', simul_transform=train_simul_transform, transform=img_transform, target_transform=target_transform) train_loader = DataLoader(train_set, batch_size=train_args['batch_size'], num_workers=16, shuffle=True) val_set = CityScapes('val', simul_transform=val_simul_transform, transform=img_transform, target_transform=target_transform) val_loader = DataLoader(val_set, batch_size=val_args['batch_size'], num_workers=16, shuffle=False) weight = torch.ones(num_classes) weight[num_classes - 1] = 0 criterion = CrossEntropyLoss2d(weight).cuda() # don't use weight_decay for bias optimizer = optim.SGD([{ 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' and ('gcm' in name or 'brm' in name) ], 'lr': 2 * train_args['new_lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' and ('gcm' in name or 'brm' in name) ], 'lr': train_args['new_lr'], 'weight_decay': train_args['weight_decay'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] == 'bias' and not ('gcm' in name or 'brm' in name) ], 'lr': 2 * train_args['pretrained_lr'] }, { 'params': [ param for name, param in net.named_parameters() if name[-4:] != 'bias' and not ('gcm' in name or 'brm' in name) ], 'lr': train_args['pretrained_lr'], 'weight_decay': train_args['weight_decay'] }], momentum=0.9, nesterov=True) if len(train_args['snapshot']) > 0: optimizer.load_state_dict( torch.load( os.path.join(ckpt_path, exp_name, 'opt_' + train_args['snapshot']))) optimizer.param_groups[0]['lr'] = 2 * train_args['new_lr'] optimizer.param_groups[1]['lr'] = train_args['new_lr'] optimizer.param_groups[2]['lr'] = 2 * train_args['pretrained_lr'] optimizer.param_groups[3]['lr'] = train_args['pretrained_lr'] if not os.path.exists(ckpt_path): os.mkdir(ckpt_path) if not os.path.exists(os.path.join(ckpt_path, exp_name)): os.mkdir(os.path.join(ckpt_path, exp_name)) for epoch in range(curr_epoch, train_args['epoch_num']): train(train_loader, net, criterion, optimizer, epoch) validate(val_loader, net, criterion, optimizer, epoch, restore_transform)