def main(): # 파라미터 로드 args = parse_args() # 리소스 로드 if torch.cuda.is_available(): device = torch.device(args.device) else: device = torch.device("cpu") model = MIM(args).to(device) print(model) print('The model is loaded!\n') # 데이터셋 로드 train_input_handle, test_input_handle = datasets_factory.data_provider(args.dataset_name, args.train_data_paths, args.valid_data_paths, args.batch_size * args.n_gpu, args.img_width, seq_length=args.total_length, is_training=True) # n 64 64 1 로 나옴 # with torch.set_grad_enabled(True): if args.pretrained_model: model.load(args.pretrained_model) eta = args.sampling_start_value # 1.0 optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) MSELoss = torch.nn.MSELoss() for itr in range(1, args.max_iterations + 1): if train_input_handle.no_batch_left(): train_input_handle.begin(do_shuffle=True) ims = train_input_handle.get_batch() ims_reverse = None if args.reverse_img: ims_reverse = ims[:, :, :, ::-1] ims_reverse = preprocess.reshape_patch(ims_reverse, args.patch_size) ims = preprocess.reshape_patch(ims, args.patch_size) eta, real_input_flag = schedule_sampling(eta, itr, args) loss = trainer.trainer(model, ims, real_input_flag, args, itr, ims_reverse, device, optimizer, MSELoss) if itr % args.snapshot_interval == 0: model.save(itr) if itr % args.test_interval == 0: trainer.test(model, test_input_handle, args, itr) if itr % args.display_interval == 0: print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'itr: ' + str(itr)) print('training loss: ' + str(loss)) train_input_handle.next() del loss
def run(args): ms.context.set_context( mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False, ) net = LeNet5( num_class=10, num_channel=3, use_bn=args.use_bn, dbg_log_tensor=args.log_tensor, ) loss = ms.nn.loss.SoftmaxCrossEntropyWithLogits( sparse=True, reduction='mean', ) opt = build_optimizer(args, net) if args.mode == 'init': save_checkpoint( net, ckpt_file_name=os.path.join('seeds', '%d.ckpt' % (time.time())), ) if args.mode == 'train': ds_train = create_dataset( args=args, data_path=os.path.join(args.data_path, 'train'), batch_size=args.device_batch_size, ) if args.init_ckpt: print('using init checkpoint %s' % (args.init_ckpt)) load_ckpt(net, args.init_ckpt) train(args, net, loss, opt, ds_train) if args.mode == 'test': if args.use_kungfu: rank = kfops.kungfu_current_rank() if rank > 0: return ds_test = create_dataset( args=args, data_path=os.path.join(args.data_path, 'test'), batch_size=args.device_batch_size, ) if args.ckpt_files: checkpoints = args.ckpt_files.split(',') else: checkpoint_dir = get_ckpt_dir(args) print('checkpoint_dir: %s' % (checkpoint_dir)) checkpoints = list(sorted(glob.glob(checkpoint_dir + '/*.ckpt'))) print('will test %d checkpoints' % (len(checkpoints))) # for i, n in enumerate(checkpoints): # print('[%d]=%s' % (i, n)) test(args, net, loss, opt, ds_test, checkpoints)
def run(args): ms.context.set_context( mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False, ) net = LeNet5( num_class=10, num_channel=3, use_bn=args.use_bn, ) loss = ms.nn.loss.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = build_optimizer(args, net) if args.mode == 'init': save_checkpoint( net, ckpt_file_name=os.path.join('seeds', '%d.ckpt' % (time.time())), ) if args.mode == 'train': ds_train = create_dataset( data_path=os.path.join(args.data_path, 'train'), batch_size=args.device_batch_size, ) if args.init_ckpt: print('using init checkpoint %s' % (args.init_ckpt)) load_ckpt(net, args.init_ckpt) train(args, net, loss, opt, ds_train) if args.mode == 'test': ds_test = create_dataset( data_path=os.path.join(args.data_path, 'test'), batch_size=args.device_batch_size, ) if args.ckpt_files: checkpoints = args.ckpt_files.split(',') else: steps = [10, 20, 30, 40] checkpoints = [get_ckpt_file_name(args, i) for i in steps] print('will test %d checkpoints' % (len(checkpoints))) # for i, n in enumerate(checkpoints): # print('[%d]=%s' % (i, n)) test(args, net, loss, opt, ds_test, checkpoints)
def fit(args, model, device, optimizer, loss_fn, dataset, labels_list, task_id): # Dataloader train_loader = trainer.get_loader(mnist.getTrain(dataset), args, device, 'train') val_loader = trainer.get_loader(mnist.getVal(dataset), args, device, 'val') # Log Best Accuracy best_val_loss = 0 # Early Stopping early_stop = 0 # Training loop for epoch in range(1, args.epochs + 1): # Prepare model for current task model.set_task_id(labels_list[task_id]) trainer.train(args, model, device, train_loader, optimizer, epoch, loss_fn) val_loss, _ = trainer.test(args, model, device, val_loader, loss_fn, val=True) if val_loss > best_val_loss: best_val_loss = val_loss best_state = model.state_dict() early_stop = 0 else: early_stop += 1 if early_stop >= args.early_stop_after: break return best_state
def train(): parser = argparse.ArgumentParser() train_dataset = '../data/corpus.small' vocab_dataset = '../data/vocab.small' output = '../output/bert.model' model = 'bert' parser.add_argument('--model', type=str, required=False, default=model) parser.add_argument("-c", "--train_dataset", required=False, type=str, default=train_dataset, help="train dataset for train bert") parser.add_argument("-t", "--test_dataset", type=str, default=None, help="test set for evaluate train set") parser.add_argument("-v", "--vocab_path", required=False, type=str, default=vocab_dataset, help="built vocab model path with bert-vocab") parser.add_argument("-o", "--output_path", required=False, type=str, default=output, help="exoutput/bert.model") parser.add_argument("-b", "--batch_size", type=int, default=64, help="number of batch_size") parser.add_argument("-e", "--epochs", type=int, default=10, help="number of epochs") parser.add_argument("-w", "--num_workers", type=int, default=5, help="dataloader worker size") parser.add_argument("--require_improvement", type=int, default=1000, help="patience of early stopping") parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false") parser.add_argument("--log_freq", type=int, default=10, help="printing loss every n iter: setting n") parser.add_argument("--corpus_lines", type=int, default=None, help="total number of lines in corpus") parser.add_argument("--cuda_devices", type=int, nargs='+', default=0, help="CUDA device ids") parser.add_argument("--on_memory", type=bool, default=True, help="Loading on memory: true or false") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate of adam") args = parser.parse_args() model_name = args.model module = import_module('models.' + model_name) config = module.ModelConfig(args) train_data, dev_data, test_data = build_dataset(config) train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) test_iter = build_iterator(test_data, config) model = module.Model(config).to(config.device) model_train(config, model, train_iter, dev_iter, test_iter) test(config, model, test_iter)
def main(): train_set, train_labels, dev_set, dev_labels = load_data() model, mean_len, std_dev = train(train_set, train_labels) prediction = test(model, dev_set, mean_len, std_dev) print_statistics(prediction, dev_labels) create_physical_model(model, mean_len, std_dev)
def test_images(images, model, env, split=False, hook=None): eval_params = struct(overlap=env.args.overlap, split=split, image_size=(env.args.train_size, env.args.train_size), batch_size=env.args.batch_size, nms_params=get_nms_params(env.args), device=env.device, debug=env.debug) eval_test = evaluate.eval_test(model.eval(), env.encoder, eval_params) return trainer.test(env.dataset.test_on(images, env.args, env.encoder), eval_test, hook=hook)
def main(): netconfig, hyperparams_config, data_config = config() model = SELUNet(dropout=netconfig["alphadropout"]) model = load_model(model, netconfig["model_resume_file"]) dump_config(model, netconfig, hyperparams_config, data_config) # copy_files(netconfig["model_dir"]) train_data, validate_data, test_data = create_train_validate_test_dataloader( netconfig) saver = Saver(netconfig["save_dir"]) use_cuda = netconfig["use_cuda"] writer = SummaryWriter(netconfig["writer_dir"]) tee = Tee(netconfig["tee_file"]) optimizer = optim.Adamax(model.parameters()) scheduler = optim.lr_scheduler.StepLR(optimizer, netconfig["lr_decay_epochs"], netconfig["lr_decay"]) loss = construct_loss(netconfig) finalconfig = { "train_data": train_data, "test_data": validate_data, "model": model, "saver": saver, "use_cuda": use_cuda, "epochs": netconfig["epochs"], "optimizer": optimizer, "scheduler": scheduler, "loss": loss, "writer": writer, "tee": tee, "model_dir": netconfig["model_dir"], "test_interval": netconfig["test_interval"], "window": netconfig["window"], "stride": netconfig["stride"], "idr_interval": netconfig["idr_interval"], } train_test_loop(finalconfig) if test_data: metrics = test(model, loss, test_data, use_cuda) tee.writeln("Test: " + " ".join(("{: >5}: {:.4f}".format(k, v) for k, v in metrics.items())))
def main(): # prepare datasets train_set = DIV2KDataSet(args, args.scale, args.data_dir, 'DIV2K_train_HR', 'DIV2K_train_LR_bicubic') test_set = DIV2KDataSet(args, args.scale, args.data_dir, 'DIV2K_valid_HR', 'DIV2K_valid_LR_bicubic') # append datasets # train_set_flickr2k = DIV2KDataSet(args, args.scale, '../datasets/Flickr2K', # 'Flickr2K_HR', 'Flickr2K_LR_bicubic') # train_set.append(train_set_flickr2k) print(train_set.__len__()) # model my_model = SRNet(args) if isinstance(args.gpu, list) and len(args.gpu) > 1: import torch.nn as nn my_model = nn.DataParallel(my_model, args.gpu) else: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) trainer.network_paras(my_model) # restor for testing/fine-tuning if args.is_finetuning: my_model = trainer.restore(args, my_model) # train trainer.train(args, my_model, train_set) # run benchmark trainer.run_benchmark(args, my_model, args.benchmark_dir) # test trainer.test(args, my_model, test_set)
def train_loop(trainer, lr): print(f"Current learning rate: {lr}") for epoch in range(0, 80): trainer.train(epoch, lr) if trainer.test(epoch): no_progress = 0 else: no_progress += 1 if no_progress >= 6: break lr = lr/10 for i in range(0, 2): print(f"Current learning rate: {lr}") start_epoch = trainer.load() no_progress = 0 for epoch in range(start_epoch, start_epoch+80): trainer.train(epoch, lr) if trainer.test(epoch): no_progress = 0 else: no_progress += 1 if no_progress >= 6: break lr = lr/10
def test_all_tasks(coresets, args, model, device, loss_fn, labels_list, split): def fit_coreset(coreset, labels, args, model, device): # Get Inference Model final_model, _ = trainer.get_model(args, device) final_model.load_state_dict(model.state_dict()) final_model.set_range(labels) # Get Loss Function and Optimizer and Dataloader loss_fn = trainer.get_loss_fn(args, device, final_model, model) optimizer = optim.Adam(final_model.parameters(), lr=args.lr) coreset_loader = trainer.get_loader(coreset, args, device, 'coreset') for epoch in range(args.coreset_epochs): trainer.train(args, final_model, device, coreset_loader, optimizer, epoch, loss_fn, verbose=False) return final_model # Coreset Evaluation Loop test_accs = np.zeros(len(labels_list)) for j, labels in enumerate(labels_list): if args.coreset_size > 0 and len(coresets) > j: # Get Inference Model by Tuning on Coreset final_model = fit_coreset(coresets[j], labels, args, model, device) else: # No Coreset final_model = model # Evaluate on the test set testset = get_dataset(args, j, split) test_loader = trainer.get_loader(testset, args, device, split) final_model.set_range(labels) _, test_accs[j] = trainer.test(args, final_model, device, test_loader, loss_fn) return test_accs
def fit(args, model, device, optimizer, loss_fn, coresets, dataset, labels_list, task_id): # Dataloader train_loader = trainer.get_loader(mnist.getTrain(dataset), args, device, 'train') val_loader = trainer.get_loader(mnist.getVal(dataset), args, device, 'val') # Log Best Accuracy best_val_acc = 0 all_test_accs = [] # Early Stopping early_stop = 0 # Training loop for epoch in range(1, args.epochs + 1): # Prepare model for current task model.set_range(labels_list[task_id]) trainer.train(args, model, device, train_loader, optimizer, epoch, loss_fn) _, val_acc = trainer.test(args, model, device, val_loader, loss_fn, val=True) if val_acc > best_val_acc: best_val_acc = val_acc best_state = model.state_dict() early_stop = 0 else: early_stop += 1 if early_stop >= args.early_stop_after: break # Evaluate all tasks on test set test_accs = test_all_tasks(coresets, args, model, device, loss_fn, labels_list, 'test') all_test_accs.append(test_accs) return best_state, all_test_accs
print('Parallel training on {0} GPUs.'.format(torch.cuda.device_count())) net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # loading wm examples if args.wmtrain: print("WM acc:") test(net, criterion, logfile, wmloader, device) # start training for epoch in range(start_epoch, start_epoch + args.max_epochs): # adjust learning rate adjust_learning_rate(args.lr, optimizer, epoch, args.lradj, args.ratio) train(epoch, net, criterion, optimizer, logfile, trainloader, device, wmloader) print("Test acc:") acc = test(net, criterion, logfile, testloader, device) if args.wmtrain: print("WM acc:") test(net, criterion, logfile, wmloader, device)
colors = {5: 'b', 10: 'k', 20: 'r', 32: 'g'} test_results = {} for d in densities: for cs in clique_sizes: args = {'N': generator.N, 'edge density': d, 'planted clique size': cs} logger.args = args generator.edge_density = args['edge density'] generator.clique_size = args['planted clique size'] generator.create_test_dataset() print('Test dataset created') test_results[d, cs, 'loss'], test_results[d, cs, 'accuracy'], test_results[ d, cs, 'exact accuracy'], test_results[d, cs, 'mismatch'] = test( gnn, generator, logger) # plot test loss plt.figure(0) plt.clf() for cs in clique_sizes: plt.semilogy(densities, [test_results[d, cs, 'loss'] for d in densities], 'b', label='C={}'.format(cs), color=colors[cs]) plt.xlabel('Edge density') plt.ylabel('Cross Entropy Loss') plt.title('Test Loss: N={}'.format(logger.args['N'])) plt.legend()
def main(): args = cfg.parse_args() torch.cuda.manual_seed(args.random_seed) print(args) # create logging folder log_path = os.path.join(args.save_path, args.exp_name + '/log') model_path = os.path.join(args.save_path, args.exp_name + '/models') os.makedirs(log_path, exist_ok=True) os.makedirs(model_path, exist_ok=True) writer = SummaryWriter(log_path) # tensorboard # load model print('==> loading models') device = torch.device("cuda:0") G = Generator(args.dim_z, args.dim_a, args.nclasses, args.ch).to(device) VD = VideoDiscriminator(args.nclasses, args.ch).to(device) ID = ImageDiscriminator(args.ch).to(device) G = nn.DataParallel(G) VD = nn.DataParallel(VD) ID = nn.DataParallel(ID) # optimizer optimizer_G = torch.optim.Adam(G.parameters(), args.g_lr, (0.5, 0.999)) optimizer_VD = torch.optim.Adam(VD.parameters(), args.d_lr, (0.5, 0.999)) optimizer_ID = torch.optim.Adam(ID.parameters(), args.d_lr, (0.5, 0.999)) # loss criterion_gan = nn.BCEWithLogitsLoss().to(device) criterion_l1 = nn.L1Loss().to(device) # prepare dataset print('==> preparing dataset') transform = torchvision.transforms.Compose([ transforms_vid.ClipResize((args.img_size, args.img_size)), transforms_vid.ClipToTensor(), transforms_vid.ClipNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) transform_test = torchvision.transforms.Compose([ transforms.Resize((args.img_size, args.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) if args.dataset == 'mug': dataset_train = MUG('train', args.data_path, transform=transform) dataset_val = MUG('val', args.data_path, transform=transform) dataset_test = MUG_test(args.data_path, transform=transform_test) else: raise NotImplementedError dataloader_train = torch.utils.data.DataLoader( dataset=dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True, drop_last=True) dataloader_val = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) dataloader_test = torch.utils.data.DataLoader( dataset=dataset_test, batch_size=args.batch_size_test, num_workers=args.num_workers, shuffle=False, pin_memory=True) print('==> start training') for epoch in range(args.max_epoch): train(args, epoch, G, VD, ID, optimizer_G, optimizer_VD, optimizer_ID, criterion_gan, criterion_l1, dataloader_train, writer, device) if epoch % args.val_freq == 0: val(args, epoch, G, criterion_l1, dataloader_val, device, writer) test(args, epoch, G, dataloader_test, device, writer) if epoch % args.save_freq == 0: torch.save(G.state_dict(), os.path.join(model_path, 'G_%d.pth' % (epoch))) torch.save(VD.state_dict(), os.path.join(model_path, 'VD_%d.pth' % (epoch))) torch.save(ID.state_dict(), os.path.join(model_path, 'ID_%d.pth' % (epoch))) return
def run(args=None): device = 'cuda' if torch.cuda.is_available() and ( not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders( args) lossFn = nn.CrossEntropyLoss(reduction='none') evalFn = lambda x: torch.max(x, dim=1)[1] net = get_net(device, args.dataset, args.net, input_size, input_channel, n_class, load_model=args.load_model, net_dim=args.cert_net_dim ) #, feature_extract=args.core_feature_extract) timestamp = int(time.time()) model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) model_dir = args.root_dir + 'models_new/%s' % (model_signature) args.model_dir = model_dir count_vars(args, net) if not os.path.exists(model_dir): os.makedirs(model_dir) if isinstance(net, UpscaleNet): relaxed_net = None relu_ids = None else: relaxed_net = RelaxedNetwork(net.blocks, args.n_rand_proj).to(device) relu_ids = relaxed_net.get_relu_ids() if "nat" in args.train_mode: cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, no_r_net=True).to(device) else: dummy_input = torch.rand((1, ) + net.dims[0], device=device, dtype=torch.float32) cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, dummy_input=dummy_input).to(device) n_epochs, test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = args.n_epochs, None, None, None, None if 'train' in args.train_mode: tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) eps = 0 epoch = 0 lr = args.lr n_epochs = args.n_epochs if "COLT" in args.train_mode: relu_stable = args.relu_stable # if args.layers is None: # args.layers = [-2, -1] + relu_ids layers = get_layers(args.train_mode, cnet, n_attack_layers=args.n_attack_layers, protected_layers=args.protected_layers) elif "adv" in args.train_mode: relu_stable = None layers = [-1, -1] args.mix = False elif "natural" in args.train_mode: relu_stable = None layers = [-2, -2] args.nat_factor = 1 args.mix = False elif "diffAI" in args.train_mode: relu_stable = None layers = [-2, -2] else: assert False, "Unknown train mode %s" % args.train_mode print('Saving model to:', model_dir) print('Training layers: ', layers) for j in range(len(layers) - 1): opt, lr_scheduler = get_opt(cnet.net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader, args.lr_sched, fixup="fixup" in args.net) curr_layer_idx = layers[j + 1] eps_old = eps eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j) kappa_sched = Scheduler(0.0 if args.mix else 1.0, 1.0, num_train * args.mix_epochs, 0) beta_sched = Scheduler( args.beta_start if args.mix else args.beta_end, args.beta_end, args.train_batch * len(train_loader) * args.mix_epochs, 0) eps_sched = Scheduler(eps_old if args.anneal else eps, eps, num_train * args.anneal_epochs, 0) layer_dir = '{}/{}'.format(model_dir, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'. format(eps, lr, curr_layer_idx)) for curr_epoch in range(n_epochs): train(device, epoch, args, j + 1, layers, cnet, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_ids, stats, relu_stable, relu_stable_protected=args.relu_stable_protected, beta_sched=beta_sched) if isinstance(lr_scheduler, optim.lr_scheduler.StepLR ) and curr_epoch >= args.mix_epochs: lr_scheduler.step() if (epoch + 1) % args.test_freq == 0: with torch.no_grad(): test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = test( device, args, cnet, test_loader if args.test_set == "test" else train_loader, [curr_layer_idx], stats=stats, log_ind=(epoch + 1) % n_epochs == 0) if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0: torch.save( net.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) torch.save( opt.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1))) stats.update_tb(epoch) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec lr = lr * args.lr_layer_dec if args.cert: with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, epoch=epoch, domains=args.cert_domain) elif args.train_mode == 'print': print('printing network to:', args.out_net_file) dummy_input = torch.randn(1, input_channel, input_size, input_size, device='cuda') net.skip_norm = True torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True) elif args.train_mode == 'test': with torch.no_grad(): test(device, args, cnet, test_loader if args.test_set == "test" else train_loader, [-1], log_ind=True) elif args.train_mode == "cert": tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) print('Saving results to:', model_dir) with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, domains=args.cert_domain) exit(0) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) return test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc
'debug_mode': DEBUG_MODE } loss, top_1, top_3 = trainer.train(**trainer_args) print('Train loss: {0:.5f} Top 1: {1:.1f}% Top 3: {2:.1f}%'.format( loss, top_1, top_3)) val_args = { 'model': model, 'device': DEVICE, 'test_loader': val_loader, 'loss_function': loss_function, 'debug_mode': DEBUG_MODE } loss, top_1, top_3 = trainer.test(**val_args) print( 'Validation loss: {0:.5f} Top 1: {1:.1f}% Top 3: {2:.1f}%'.format( loss, top_1, top_3)) if top_3 > best_score: best_top_3 = top_3 best_model = copy.deepcopy(model) model_save_name = 'fold_' + str(fold) + '.weights' torch.save(best_model.state_dict(), MODEL_SAVE_PATH + model_save_name) test_args = { 'model': model, 'device': DEVICE, 'test_loader': test_loader,
def main(): print("===Setup running===") parser = argparse.ArgumentParser() parser.add_argument("--config", default="./config/poison_train.yaml") parser.add_argument("--gpu", default="0", type=str) args = parser.parse_args() config, _, _ = load_config(args.config) print("===Prepare data===") bd_config = config["backdoor"] print("Load backdoor config:\n{}".format(bd_config)) bd_transform = CLBD(bd_config["clbd"]["trigger_path"]) target_label = bd_config["target_label"] poison_ratio = bd_config["poison_ratio"] train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), ]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), ]) print("Load dataset from: {}".format(config["dataset_dir"])) clean_train_data = CIFAR10(config["dataset_dir"], train_transform, train=True) poison_train_idx = gen_poison_idx(clean_train_data, target_label, poison_ratio=poison_ratio) print("Load the adversarially perturbed dataset from: {}".format( config["adv_dataset_path"])) poison_train_data = CleanLabelDataset( clean_train_data, config["adv_dataset_path"], bd_transform, poison_train_idx, target_label, ) poison_train_loader = DataLoader(poison_train_data, **config["loader"], shuffle=True) clean_test_data = CIFAR10(config["dataset_dir"], test_transform, train=False) poison_test_idx = gen_poison_idx(clean_test_data, target_label) poison_test_data = CleanLabelDataset( clean_test_data, config["adv_dataset_path"], bd_transform, poison_test_idx, target_label, ) clean_test_loader = DataLoader(clean_test_data, **config["loader"]) poison_test_loader = DataLoader(poison_test_data, **config["loader"]) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu gpu = torch.cuda.current_device() print("Set gpu to: {}".format(args.gpu)) model = resnet18() model = model.cuda(gpu) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda(gpu) optimizer = torch.optim.SGD(model.parameters(), **config["optimizer"]["SGD"]) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, **config["lr_scheduler"]["multi_step"]) for epoch in range(config["num_epochs"]): print("===Epoch: {}/{}===".format(epoch + 1, config["num_epochs"])) print("Poison training...") poison_train(model, poison_train_loader, criterion, optimizer) print("Test model on clean data...") test(model, clean_test_loader, criterion) print("Test model on poison data...") test(model, poison_test_loader, criterion) scheduler.step() print("Adjust learning rate to {}".format( optimizer.param_groups[0]["lr"]))
def main(): # 파라미터 로드 args = parse_args() # 리소스 로드 if torch.cuda.is_available(): device = torch.device(args.device) else: device = torch.device("cpu") model = MIM(args).to(device) print(model) print('The model is loaded!\n') # 데이터셋 로드 train_input_handle, test_input_handle = datasets_factory.data_provider( args.dataset_name, args.train_data_paths, args.valid_data_paths, args.batch_size * args.n_gpu, args.img_width, seq_length=args.total_length, is_training=True) # n 64 64 1 로 나옴 gen_images = None cell_state = [init_state(args) for i in range(4)] hidden_state = [init_state(args) for i in range(4)] cell_state_diff = [init_state(args) for i in range(3)] hidden_state_diff = [init_state(args) for i in range(3)] st_memory = init_state(args) conv_lstm_c = init_state(args) MIMB_ct_weight = nn.Parameter( torch.randn((args.num_hidden[0] * 2, args.img_height, args.img_width), device=device)) MIMB_oc_weight = nn.Parameter( torch.randn((args.num_hidden[0], args.img_height, args.img_width), device=device)) MIMN_ct_weight = nn.Parameter( torch.randn((args.num_hidden[0] * 2, args.img_height, args.img_width), device=device)) MIMN_oc_weight = nn.Parameter( torch.randn((args.num_hidden[0], args.img_height, args.img_width), device=device)) if args.pretrained_model: hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_ct_weight, \ MIMB_oc_weight, MIMN_ct_weight, MIMN_oc_weight = loadVariables(args) model.load(args.pretrained_model) eta = args.sampling_start_value # 1.0 optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # MSELoss = torch.nn.MSELoss() for itr in range(1, args.max_iterations + 1): if train_input_handle.no_batch_left(): train_input_handle.begin(do_shuffle=True) # input data 가져오기 및 reshape_patch 적용 ims = train_input_handle.get_batch() ims = preprocess.reshape_patch(ims, args.patch_size) eta, real_input_flag = schedule_sampling(eta, itr, args) # ims tensor로 변경하고 gpu 설정해줌 # 전방 전파 후 예측 영상과 gt 영상 생성 ims_tensor = torch.tensor(ims, device=device) gen_images = model.forward(ims_tensor, real_input_flag, hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_oc_weight, MIMB_ct_weight, MIMN_oc_weight, MIMN_ct_weight) gt_ims = torch.tensor(ims[:, 1:], device=device) # optical flow loss 적용 gen_diff, gt_diff = DOFLoss.dense_optical_flow_loss( gen_images, gt_ims, args.img_channel) optical_loss = DOFLoss.calc_optical_flow_loss(gen_diff, gt_diff, args.device) MSE_loss = F.mse_loss(gen_images, gt_ims) # 역전파 적용 optimizer.zero_grad() # loss = 0.8 * MSE_loss + 0.2 * optical_loss loss = MSE_loss loss.backward() optimizer.step() # 출력용 loss 저장 loss_print = loss.detach_() flag = 1 # 똑같은 computation graph 사용을 막기 위해 그래프와 학습 변수들을 분리해 줌 del gen_images detachVariables(hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_ct_weight, MIMB_oc_weight, MIMN_ct_weight, MIMN_oc_weight) ims_reverse = None if args.reverse_img: ims_reverse = ims[:, :, :, ::-1] ims_tensor = torch.tensor(ims_reverse.copy(), device=device) gen_images = model.forward(ims_tensor, real_input_flag, hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_oc_weight, MIMB_ct_weight, MIMN_oc_weight, MIMN_ct_weight) gt_ims = torch.tensor(ims_reverse[:, 1:].copy(), device=device) gen_diff, gt_diff = DOFLoss.dense_optical_flow_loss( gen_images, gt_ims, args.img_channel) optical_loss = DOFLoss.calc_optical_flow_loss( gen_diff, gt_diff, args.device) MSE_loss = F.mse_loss(gen_images, gt_ims) optimizer.zero_grad() # loss = 0.8 * MSE_loss + 0.2 * optical_loss loss = MSE_loss loss.backward() optimizer.step() loss_print += loss.detach_() flag += 1 # 똑같은 computation graph 사용을 막기 위해 그래프와 학습 변수들을 분리해 줌 del gen_images detachVariables(hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_ct_weight, MIMB_oc_weight, MIMN_ct_weight, MIMN_oc_weight) if args.reverse_input: ims_rev = ims[:, ::-1] ims_tensor = torch.tensor(ims_rev.copy(), device=device) gen_images = model.forward(ims_tensor, real_input_flag, hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_oc_weight, MIMB_ct_weight, MIMN_oc_weight, MIMN_ct_weight) gt_ims = torch.tensor(ims_rev[:, 1:].copy(), device=device) gen_diff, gt_diff = DOFLoss.dense_optical_flow_loss( gen_images, gt_ims, args.img_channel) optical_loss = DOFLoss.calc_optical_flow_loss( gen_diff, gt_diff, args.device) MSE_loss = F.mse_loss(gen_images, gt_ims) optimizer.zero_grad() # loss = 0.8 * MSE_loss + 0.2 * optical_loss loss = MSE_loss loss.backward() optimizer.step() loss_print += loss.detach_() flag += 1 # 똑같은 computation graph 사용을 막기 위해 그래프와 학습 변수들을 분리해 줌 del gen_images detachVariables(hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_ct_weight, MIMB_oc_weight, MIMN_ct_weight, MIMN_oc_weight) if args.reverse_img: ims_rev = ims_reverse[:, ::-1] ims_tensor = torch.tensor(ims_rev.copy(), device=device) gen_images = model.forward(ims_tensor, real_input_flag, hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_oc_weight, MIMB_ct_weight, MIMN_oc_weight, MIMN_ct_weight) gt_ims = torch.tensor(ims_rev[:, 1:].copy(), device=device) gen_diff, gt_diff = DOFLoss.dense_optical_flow_loss( gen_images, gt_ims, args.img_channel) optical_loss = DOFLoss.calc_optical_flow_loss( gen_diff, gt_diff, args.device) MSE_loss = F.mse_loss(gen_images, gt_ims) optimizer.zero_grad() # loss = 0.8 * MSE_loss + 0.2 * optical_loss loss = MSE_loss loss.backward() optimizer.step() loss_print += loss.detach_() flag += 1 # 똑같은 computation graph 사용을 막기 위해 그래프와 학습 변수들을 분리해 줌 del gen_images detachVariables(hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_ct_weight, MIMB_oc_weight, MIMN_ct_weight, MIMN_oc_weight) # 전방전파 한 만큼 loss 나눠줌 loss_print = loss_print.item() / flag # gen_diff_tensor = torch.tensor(gen_diff, device=args.device, requires_grad=True) # gt_diff_tensor = torch.tensor(gt_diff, device=args.device, requires_grad=True) # # # optical flow loss 벡터 구하는 식 # diff = gt_diff_tensor - gen_diff_tensor # diff = torch.pow(diff, 2) # squared_distance = diff[0] + diff[1] # distance = torch.sqrt(squared_distance) # distance_sum = torch.mean(distance) # DOF_Mloss = F.mse_loss(gen_diff_tensor[0], gt_diff_tensor[0]) # DOF_Dloss = F.mse_loss(gen_diff_tensor[1], gt_diff_tensor[1]) # 얘 MSE로 하던가 Norm2 마할라노비스 등등으로 loss 구한다음에 MSE_loss 랑 더해주고 역전파 시키기 # loss = 0.7 * MSE_loss + 0.25 * DOF_Mloss + 0.25 * DOF_Dloss # loss = trainer.trainer(model, ims, real_input_flag, args, itr, ims_reverse, device, optimizer, MSELoss) if itr % args.snapshot_interval == 0: # 모델 세이브 할때 detachVariable에 들어가는 애들 다 바꿔줘야 함 saveVariables(args, hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_ct_weight, MIMB_oc_weight, MIMN_ct_weight, MIMN_oc_weight, itr) model.save(itr) if itr % args.test_interval == 0: trainer.test(model, test_input_handle, args, itr, hidden_state, cell_state, hidden_state_diff, cell_state_diff, st_memory, conv_lstm_c, MIMB_oc_weight, MIMB_ct_weight, MIMN_oc_weight, MIMN_ct_weight) if itr % args.display_interval == 0: print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'itr: ' + str(itr)) print('training loss: ' + str(loss_print)) train_input_handle.next()
lr=lr, momentum=0.9, weight_decay=5e-4) # start training for epoch in range(start_epoch, start_epoch + max_epochs): # adjust learning rate adjust_learning_rate(lr, optimizer, epoch, lradj) train_steal(epoch, net, parent, optimizer, logfile, testloader if use_test else trainloader, device, grad_query=grad_query) print("Test acc:") acc = test(net, test_criterion, logfile, testloader, device) print('Saving..') state = { 'net': net.module if device is 'cuda' else net, 'acc': acc, 'epoch': epoch, } if not os.path.isdir(save_dir): os.mkdir(save_dir) torch.save(state, os.path.join(save_dir, save_model))
# Get datasets as pandas dataframes train_data, valid_data, test_data = data.get_dataset(config) # Initialize the model model = models.Model(config) # Initialie the training class trainer = trainer.Trainer(model, config) # If continuing, load previous checkpoint if cont: trainer.load() # Train the model for epoch in range(config.num_epochs): print("----------------Epoch #%d of %d" % (epoch + 1, config.num_epochs)) # Train the model on the training dataset train_accuracy, train_loss = trainer.train(train_data) valid_accuracy, valid_loss = trainer.test(valid_data) # Print results of epoch of training print("-------Results: training accuracy: %.2f, training loss: %.2f, \ valid accuracy: %.2f, valid loss %.2f" % (train_accuracy, train_loss, \ valid_accuracy, valid_loss)) # Save model at end of each epoch trainer.save() # Get final test set results test_accuracy, test_loss = trainer.test(test_data) print( "----------------Final Results: test accuracy: %.2f, test loss: %.2f" % (test_accuracy, test_loss)) if infer: # TODO add inference ability on random.wav files a = 0
def awgn_test(testloader, net, device, args): EbN0_test = torch.arange(args.EbN0dB_test_start, args.EbN0dB_test_end, args.EbN0dB_precision) # Test parameters test_BLER = torch.zeros((len(EbN0_test), 1)) for p in range(len(EbN0_test)): test_BLER[p] = test(net, args, testloader, device, EbN0_test[p]) print('Eb/N0:', EbN0_test[p].numpy(), '| test BLER: %.4f' % test_BLER[p])
def main(args): if args.gpu is not None: print('Using GPU %d' % args.gpu) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('CPU mode') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = transforms.Compose([ transforms.RandomResizedCrop(227), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) val_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(227), #transforms.RandomResizedCrop(227), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) # DataLoader initialize train_data = DataLoader(args.pascal_path, 'trainval', transform=train_transform) t_trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=args.batch, shuffle=True, num_workers=CORES, pin_memory=True) print('[DATA] Target Train loader done!') val_data = DataLoader(args.pascal_path, 'test', transform=val_transform, random_crops=args.crops) t_testloader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=False, num_workers=CORES, pin_memory=True) print('[DATA] Target Test loader done!') if not args.test: s_trainset = torchvision.datasets.ImageFolder( args.imgnet_path, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(227), transforms.ToTensor(), normalize ])) s_trainloader = torch.utils.data.DataLoader(dataset=s_trainset, batch_size=5 * args.batch, shuffle=False, num_workers=CORES, pin_memory=True) print('[DATA] Source Train loader done!') N = len(train_data.names) iter_per_epoch = N / args.batch model = Network(num_classes=21) g_model = Network(num_classes=21) d_model = disnet() if args.gpu is not None: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('[MODEL] CUDA DEVICE : {}'.format(device)) model.to(device) g_model.to(device) d_model.to(device) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=0.9, weight_decay=0.0001) g_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, g_model.parameters()), lr=args.lr, momentum=0.9, weight_decay=0.0001) d_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, d_model.parameters()), lr=args.lr, momentum=0.9, weight_decay=0.0001) if args.model is not None: checkpoint = torch.load(args.model) model.load(checkpoint['model'], True) g_model.load(checkpoint['g_model'], True) d_model.load_state_dict(checkpoint['d_model']) optimizer.load_state_dict(checkpoint['optimizer']) g_optimizer.load_state_dict(checkpoint['g_optimizer']) d_optimizer.load_state_dict(checkpoint['d_optimizer']) ############## TRAINING ############### print('Start training: lr %f, batch size %d' % (args.lr, args.batch)) print('Checkpoint: ' + args.checkpoint) # Train the Model steps = args.iter_start best_mAP = 0.0 best_path = './{}/model-{}_pretrained-{}_lr-0pt001_lmd_s-{}_acc-{}.pth'.format( args.checkpoint, 'alexnet', 'False', args.lmd_s, '{}') if args.test: args.epochs = 1 for epoch in range(int(iter_per_epoch * args.iter_start), args.epochs): if not args.test: adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=100, decay=0.1) adjust_learning_rate(g_optimizer, epoch, init_lr=args.lr / 2, step=100, decay=0.1) adjust_learning_rate(d_optimizer, epoch, init_lr=args.lr / 1.5, step=100, decay=0.1) done = train(epoch, model, g_model, d_model, optimizer, g_optimizer, d_optimizer, t_trainloader, s_trainloader, args.lmd_s, device) best_mAP = test(epoch, model, g_model, d_model, optimizer, g_optimizer, d_optimizer, t_testloader, best_mAP, best_path, device)
parser = argparse.ArgumentParser('[*] Argument ') parser.add_argument('-train', default = 'true', help = 'train True or False') parser.add_argument('-test', default = 'false',help = 'test True or False') parser.add_argument('-device', default = 3, help = 'GPU number') parser.add_argument('-dim', default = 150, help = 'embedding dimenstion size') parser.add_argument('-layer', default = 1, help = '# of layer') parser.add_argument('-batch', default = 1028, help = '# of batch') parser.add_argument('-hidden', default = 512, help = '# of hidden') parser.add_argument('-margin', default = 1, help = 'margin') parser.add_argument('-epoch', default = 50, help = '# of epoch') parser.add_argument('-lr', default = 0.001, help = 'learning rate') parser.add_argument('-data', default = './dataset/', help = 'data folder path') parser.add_argument('-pretrain',default= './sample_onmt/',help = 'pretrain model path') parser.add_argument('-output', default = './result/model',help = 'output path') parser.add_argument('-unref', default = './result/model',help = 'unrefer model path') args = parser.parse_args() if args.train == 'true': train(args) if args.test == 'true': test(args) print('[*] OVER')
if args.num_train_dec > 0: for idx in range(args.num_train_dec): if args.hard_example: train_loss, hard_examples = train_hardexample(epoch, model, dec_optimizer, args, use_cuda = use_cuda, mode ='decoder', hard_examples = hard_examples) else: train(epoch, model, dec_optimizer, args, use_cuda = use_cuda, mode ='decoder') this_loss, this_ber = validate(model, general_optimizer, args, use_cuda = use_cuda) report_loss.append(this_loss) report_ber.append(this_ber) if args.print_test_traj == True: print('test loss trajectory', report_loss) print('test ber trajectory', report_ber) print('total epoch', args.num_epoch) ################################################# # Testing Processes ################################################# test(model, args, use_cuda = use_cuda) torch.save(model.state_dict(), './tmp/torch_model_'+identity+'.pt') print('saved model', './tmp/torch_model_'+identity+'.pt')
def main(): global args, best_score, best_epoch best_score, best_epoch = -1, -1 if len(sys.argv) > 1: args = parse_args() print('----- Experiments parameters -----') for k, v in args.__dict__.items(): print(k, ':', v) else: print( 'Please provide some parameters for the current experiment. Check-out arg.py for more info!' ) sys.exit() # init random seeds utils.setup_env(args) # init tensorboard summary is asked tb_writer = SummaryWriter(f'{args.data_dir}/runs/{args.name}/tensorboard' ) if args.tensorboard else None # init data loaders loader = get_loader(args) train_loader = torch.utils.data.DataLoader(loader( path_to_data=args.data_dir, mode='TRAIN'), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(loader(path_to_data=args.data_dir, mode='VAL'), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) exp_logger, lr = None, None model = get_model(args) criterion = losses.get_criterion(args) # optionally resume from a checkpoint if args.resume: model, exp_logger, args.start_epoch, best_score, best_epoch, lr = load_checkpoint( args, model) args.lr = lr else: # create all output folders utils.init_output_env(args) if exp_logger is None: exp_logger = init_logger(args, model) optimizer, scheduler = optimizers.get_optimizer(args, model) print(' + Number of params: {}'.format(utils.count_params(model))) model.to(args.device) criterion.to(args.device) if args.test: test_loader = torch.utils.data.DataLoader(loader( path_to_data=args.data_dir, mode='TEST'), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) trainer.test(args, test_loader, model, criterion, args.start_epoch, eval_score=metrics.accuracy_regression, output_dir=args.out_pred_dir, has_gt=True) sys.exit() is_best = True for epoch in range(args.start_epoch, args.epochs + 1): print('Current epoch: ', epoch) trainer.train(args, train_loader, model, criterion, optimizer, exp_logger, epoch, eval_score=metrics.accuracy_regression, tb_writer=tb_writer) # evaluate on validation set val_mae, val_squared_mse, val_loss = trainer.validate( args, val_loader, model, criterion, exp_logger, epoch, eval_score=metrics.accuracy_regression, tb_writer=tb_writer) # update learning rate if scheduler is None: trainer.adjust_learning_rate(args, optimizer, epoch) else: prev_lr = optimizer.param_groups[0]['lr'] if 'ReduceLROnPlateau' == args.scheduler: scheduler.step(val_loss) else: scheduler.step() print( f"Updating learning rate from {prev_lr} to {optimizer.param_groups[0]['lr']}" ) # remember best acc and save checkpoint is_best = val_mae < best_score best_score = min(val_mae, best_score) if True == is_best: best_epoch = epoch save_checkpoint( args, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, 'best_epoch': best_epoch, 'exp_logger': exp_logger, }, is_best) # write plots to disk generate_plots(args, exp_logger, is_best=is_best) # generate html report logger.export_logs(args, epoch, best_epoch) if args.tensorboard: tb_writer.close() print("That's all folks!")
def main(): # Add argument parser parser = argparse.ArgumentParser(description='Dog vs Cat Example') parser.add_argument( '--data', type=str, default='/home/aims/Dropbox/AMMI/Tutorial/NN_1/project/Cat_Dog_data', help='Folder that contain your training and testing data') args = parser.parse_args() writer = SummaryWriter('runs/catvsdog_experiment_1') # set path PATH = Path(args.data) TRAIN = Path(PATH / 'train') VALID = Path(PATH / 'test') # set hyperparameter num_workers = 0 batch_size = 32 n_features = 6 input_size = 224 output_size = 2 # create custom tranform viz_transforms = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) valid_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # Loading the data using custom data loader viz_data = MyDataset(TRAIN, transform=viz_transforms) train_data = MyDataset(TRAIN, transform=train_transforms) valid_data = MyDataset(VALID, transform=valid_transforms) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers, shuffle=True) viz_loader = torch.utils.data.DataLoader(viz_data, batch_size=batch_size, num_workers=num_workers, shuffle=True) # Create model use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model_cnn = CNN2(input_size, n_features, output_size) optimizer = optim.Adam(model_cnn.parameters(), lr=0.01) # get some random training images dataiter = iter(viz_loader) images, labels = dataiter.next() # create grid of images img_grid = torchvision.utils.make_grid(images) # show images matplotlib_imshow(img_grid) # write to tensorboard writer.add_image('four_dogvscats', img_grid) writer.add_graph(model_cnn, images) # get the class labels for each image classes = ('cat', 'dog') class_labels = [classes[lab] for lab in labels] # log embeddings features = images.view(-1, 224 * 224 * 3) writer.add_embedding(features, metadata=class_labels, label_img=images) model_cnn = model_cnn.to(device) for epoch in range(20): train(epoch, model_cnn, train_loader, optimizer, writer, device) test(model_cnn, valid_loader, device) writer.close()
epoch_start_time)) if args.print_test_traj == True: print('test loss trajectory', report_loss) print('test ber trajectory', report_ber) print('test bler trajectory', report_bler) print('total epoch', args.num_epoch) ################################################# # Testing Processes ################################################# modelpath = './tmp/attention_model_' + str(args.channel) + '_lr_' + str( args.enc_lr) + '_D' + str(args.D) + '_' + str(args.num_block) + '.pt' torch.save(model.state_dict(), modelpath) print('saved model', modelpath) # torch.save(model.state_dict(), './tmp/torch_model_'+identity+'.pt') # print('saved model', './tmp/torch_model_'+identity+'.pt') if args.is_variable_block_len: print('testing block length', args.block_len_low) test(model, args, block_len=args.block_len_low, use_cuda=use_cuda) print('testing block length', args.block_len) test(model, args, block_len=args.block_len, use_cuda=use_cuda) print('testing block length', args.block_len_high) test(model, args, block_len=args.block_len_high, use_cuda=use_cuda) else: test(model, args, use_cuda=use_cuda) print("Training Time: {}s".format(time.time() - start_time))
def main(): global args, best_score, best_epoch best_score, best_epoch = -1, -1 if len(sys.argv) > 1: args = parse_args() print('----- Experiments parameters -----') for k, v in args.__dict__.items(): print(k, ':', v) else: print('Please provide some parameters for the current experiment. Check-out args.py for more info!') sys.exit() # init random seeds utils.setup_env(args) # init tensorboard summary is asked tb_writer = SummaryWriter(f'{args.data_dir}/runs/{args.name}/tensorboard') if args.tensorboard else None # init data loaders loader = get_loader(args) train_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='train', min_size=args.min_size_train, max_size=args.max_size_train, dataset_size=args.dataset_size_train), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True) val_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='val', min_size=args.min_size_val, max_size=args.max_size_val, dataset_size=args.dataset_size_val), batch_size=1, shuffle=False, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True) exp_logger, lr = None, None model = get_model(args) criterion = losses.get_criterion(args) # optionally resume from a checkpoint if args.resume: model, exp_logger, args.start_epoch, best_score, best_epoch, lr = load_checkpoint(args, model) args.lr = lr else: # create all output folders utils.init_output_env(args) if exp_logger is None: exp_logger = init_logger(args, model) optimizer, scheduler = optimizers.get_optimizer(args, model) print(' + Number of params: {}'.format(utils.count_params(model))) model.to(args.device) criterion.to(args.device) if args.test: test_loader = torch.utils.data.DataLoader(loader(data_dir=args.data_dir, split='test', min_size=args.min_size_val, max_size=args.max_size_val, dataset_size=args.dataset_size_val), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, collate_fn=lambda x: x, pin_memory=True) trainer.test(args, test_loader, model, criterion, args.start_epoch, eval_score=metrics.get_score(args.test_type), output_dir=args.out_pred_dir, has_gt=True, print_freq=args.print_freq_val) sys.exit() is_best = True for epoch in range(args.start_epoch, args.epochs + 1): print('Current epoch:', epoch) trainer.train(args, train_loader, model, criterion, optimizer, exp_logger, epoch, eval_score=metrics.get_score(args.train_type), print_freq=args.print_freq_train, tb_writer=tb_writer) # evaluate on validation set mAP, val_loss = trainer.validate(args, val_loader, model, criterion, exp_logger, epoch, eval_score=metrics.get_score(args.val_type), print_freq=args.print_freq_val, tb_writer=tb_writer) # Update learning rate if scheduler is None: trainer.adjust_learning_rate(args, optimizer, epoch) else: prev_lr = optimizer.param_groups[0]['lr'] if 'ReduceLROnPlateau' == args.scheduler: scheduler.step(val_loss) else: scheduler.step() print(f"Updating learning rate from {prev_lr} to {optimizer.param_groups[0]['lr']}") # remember best acc and save checkpoint is_best = mAP > best_score best_score = max(mAP, best_score) if True == is_best: best_epoch = epoch save_checkpoint(args, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_score': best_score, 'best_epoch': best_epoch, 'exp_logger': exp_logger, }, is_best) if args.tensorboard: tb_writer.close() print(" ***** Processes all done. *****")
def train_episode(cnets, dTNet, device, args, lr, epoch, n_epochs, train_loader, test_loader, episode_idx, layers, stats=None, eps_init=0, balanced_loss=False, net_weights=[1]): if not isinstance(cnets,list): cnets = [cnets] for cnet in cnets: cnet.train() net = cnets[0].net relaxed_net = cnets[0].relaxed_net relu_ids = relaxed_net.get_relu_ids() eps = eps_init if "COLT" in args.train_mode: relu_stable = args.relu_stable elif "adv" in args.train_mode: relu_stable = None args.mix = False elif "natural" in args.train_mode: relu_stable = None args.nat_factor = 1 args.mix = False elif "diffAI" in args.train_mode: relu_stable = None else: raise RuntimeError(f"Unknown train mode {args.train_mode:}") print('Saving model to:', args.model_dir) print('Training layers: ', layers) for j in range(len(layers) - 1): opt, lr_scheduler = get_opt(net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader, args.lr_sched) curr_layer_idx = layers[j + 1] eps_old = eps eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j) if eps_old is None: eps_old = eps kappa_sched = Scheduler(0 if args.mix else 1, 1, args.train_batch * len(train_loader) * args.mix_epochs, 0 if not args.anneal else args.train_batch * len(train_loader)*args.anneal_warmup) beta_sched = Scheduler(args.beta_start if args.mix else args.beta_end, args.beta_end, args.train_batch * len(train_loader) * args.mix_epochs, 0) eps_sched = Scheduler(eps_old if args.anneal else eps, eps, args.train_batch * len(train_loader) * args.anneal_epochs, args.train_batch * len(train_loader)*args.anneal_warmup, power=args.anneal_pow) layer_dir = '{}/{}/{}'.format(args.model_dir, episode_idx, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'.format(eps, lr, curr_layer_idx)) if balanced_loss: assert cnets[0].lossFn_test is None, "Unexpected lossFn" data_balance = np.array(train_loader.dataset.targets).astype(float).mean() balance_factor = (1 - data_balance) / (data_balance + 1e-3) cnets[0].update_loss_fn(balance_factor, device) for curr_epoch in range(n_epochs): if balanced_loss and args.sliding_loss_balance is not None and j == 0: # if sliding loss balance is acitve, anneal loss balance from fully balanced to partially balanced assert 0 <= args.sliding_loss_balance <= 1 balance_factor_initial = (1-data_balance)/(data_balance+1e-3) scaling_factor_balance = 1-max(min((curr_epoch-0.1*n_epochs)/(n_epochs*0.7), args.sliding_loss_balance), 0) balance_factor = scaling_factor_balance * (balance_factor_initial-1) + 1 cnets[0].update_loss_fn(balance_factor, device) train(device, epoch, args, j + 1, layers, cnets, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_ids, stats, relu_stable, relu_stable_protected=args.relu_stable_protected, net_weights=net_weights, beta_sched=beta_sched) if isinstance(lr_scheduler, optim.lr_scheduler.StepLR) and curr_epoch >= args.mix_epochs: lr_scheduler.step() if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0: torch.save(dTNet.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) torch.save(dTNet.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1))) test(device, args, cnets[0], test_loader if args.test_set == "test" else train_loader, [curr_layer_idx], stats=stats) stats.update_tb(epoch) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec lr = lr * args.lr_layer_dec net.freeze(len(net.blocks)-1) return epoch