def changeDues(msg): typeCheck(msg, {'cardIds': list, 'minDue': int, 'maxDue': int}) cids = msg['cardIds'] with Col() as col: checkpoint(col, 'Change card dues') minDue = (msg['minDue'] - col.crt) // 86400 maxDue = (msg['maxDue'] - col.crt) // 86400 for cid in cids: card = col.getCard(cid) oldIvl, oldDue = card.ivl, card.due if card.queue == 0 or card.type == 0: # Ignore for new cards continue # TODO: Properly calculate the next interval using exponential learning curve oldDue = card.due newDue = random.randint(minDue, maxDue) print(oldDue, newDue) card.type = 2 card.queue = 2 card.due = newDue card.ivl += newDue - oldDue card.flush() col.reset() return emit.emitResult(True)
def test(epoch): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 all_correct = [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs) loss = loss_func(outputs, targets) step_loss = loss.item() if(args.private): step_loss /= inputs.shape[0] test_loss += step_loss _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct_idx = predicted.eq(targets.data).cpu() all_correct += correct_idx.numpy().tolist() correct += correct_idx.sum() acc = 100.*float(correct)/float(total) print('test loss:%.5f'%(test_loss/(batch_idx+1)), 'test acc:', acc) ## Save checkpoint. if acc > best_acc: best_acc = acc checkpoint(net, acc, epoch, args.sess) return (test_loss/batch_idx, acc)
def resetScheduling(msg): typeCheck(msg, {'cardIds': list}) cids = msg['cardIds'] with Col() as col: checkpoint(col, "Reset scheduling and learning on selected cards") col.sched.resetCards(cids) col.sched.removeLrn(cids) return emit.emitResult(True)
def main(hparams): # Set up some stuff accoring to hparams hparams.n_input = np.prod(hparams.image_shape) utils.set_num_measurements(hparams) utils.print_hparams(hparams) # get inputs data_dict = model_input(hparams) estimator = utils.get_estimator(hparams, 'vae') utils.setup_checkpointing(hparams) measurement_losses, l2_losses = utils.load_checkpoints(hparams) h_hats_dict = {model_type: {} for model_type in hparams.model_types} for key, x in data_dict.iteritems(): if not hparams.not_lazy: # If lazy, first check if the image has already been # saved before by *all* estimators. If yes, then skip this image. save_paths = utils.get_save_paths(hparams, key) is_saved = all([ os.path.isfile(save_path) for save_path in save_paths.values() ]) if is_saved: continue # Get Rx data Rx = data_dict[key]['Rx_data'] Tx = data_dict[key]['Rx_data'] H = data_dict[key]['H_data'] # Construct estimates using each estimator h_hat = estimator(Tx, Rx, hparams) # Save the estimate h_hats_dict['vae'][key] = h_hat # Compute and store measurement and l2 loss measurement_losses['vae'][key] = utils.get_measurement_loss( h_hat, Tx, Rx) l2_losses['vae'][key] = utils.get_l2_loss(h_hat, H) print 'Processed upto image {0} / {1}'.format(key + 1, len(data_dict)) # Checkpointing if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter == 0): utils.checkpoint(key, h_hat, measurement_losses, l2_losses, save_image, hparams) print '\nProcessed and saved first ', key + 1, 'channels\n'
def test(epoch, train_loss): model.eval() projector.eval() # Save at the last epoch # if epoch == args.epoch - 1 and args.local_rank % ngpus_per_node == 0: checkpoint(model, train_loss, epoch, args, optimizer) checkpoint(projector, train_loss, epoch, args, optimizer, save_name_add='_projector') # Save at every 100 epoch # elif epoch % 100 == 0 and args.local_rank % ngpus_per_node == 0: checkpoint(model, train_loss, epoch, args, optimizer, save_name_add='_epoch_' + str(epoch)) checkpoint(projector, train_loss, epoch, args, optimizer, save_name_add=('_projector_epoch_' + str(epoch)))
def encode(self, x): loglikelihood_accum = 0 zout = [] z = x.clone() z.requires_grad = True for L in range(self.num_blocks): # squeeze - ensures the channel dimension is divisible by 2 z = self.squeezes[L](z) for K in range(self.num_layers_per_block): # permute z, plogdet = self.permutes[L][K](z) if self.checkpoint_gradients: z, logdet = utils.checkpoint(self.flows[L][K], z) else: z, logdet = self.flows[L][K](z) loglikelihood_accum = loglikelihood_accum + (logdet + plogdet) del plogdet del logdet # split hierarchical # this operation returns two non-contiguous blocks # with references to the original z tensor # if we do not call .contiguous() (or .clone()) on BOTH z1 and z2, # then the entire z tensor must be kept around # the del operators just do a little cleanup to avoid a (very) slight memory bump z1, z2 = torch.chunk(z, 2, dim=1) z1 = z1.contiguous() z2 = z2.contiguous() zout.append(z1) del z z = z2 del z2 zout.append(z) return zout, loglikelihood_accum
def main(hparams): # Set up some stuff according to hparams hparams.n_input = np.prod(hparams.image_shape) maxiter = hparams.max_outer_iter utils.print_hparams(hparams) # get inputs xs_dict = model_input(hparams) estimators = utils.get_estimators(hparams) utils.setup_checkpointing(hparams) measurement_losses, l2_losses = utils.load_checkpoints(hparams) x_hats_dict = {'dcgan' : {}} x_batch_dict = {} for key, x in xs_dict.iteritems(): if hparams.lazy: # If lazy, first check if the image has already been # saved before by *all* estimators. If yes, then skip this image. save_paths = utils.get_save_paths(hparams, key) is_saved = all([os.path.isfile(save_path) for save_path in save_paths.values()]) if is_saved: continue x_batch_dict[key] = x if len(x_batch_dict) < hparams.batch_size: continue # Reshape input x_batch_list = [x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()] x_batch = np.concatenate(x_batch_list) # Construct measurements A_outer = utils.get_outer_A(hparams) y_batch_outer=np.matmul(x_batch, A_outer) x_main_batch = 0.0 * x_batch z_opt_batch = np.random.randn(hparams.batch_size, 100) for k in range(maxiter): x_est_batch=x_main_batch + hparams.outer_learning_rate*(np.matmul((y_batch_outer-np.matmul(x_main_batch,A_outer)),A_outer.T)) estimator = estimators['dcgan'] x_hat_batch,z_opt_batch = estimator(x_est_batch,z_opt_batch, hparams) x_main_batch=x_hat_batch for i, key in enumerate(x_batch_dict.keys()): x = xs_dict[key] y = y_batch_outer[i] x_hat = x_hat_batch[i] # Save the estimate x_hats_dict['dcgan'][key] = x_hat # Compute and store measurement and l2 loss measurement_losses['dcgan'][key] = utils.get_measurement_loss(x_hat, A_outer, y) l2_losses['dcgan'][key] = utils.get_l2_loss(x_hat, x) print 'Processed upto image {0} / {1}'.format(key+1, len(xs_dict)) # Checkpointing if (hparams.save_images) and ((key+1) % hparams.checkpoint_iter == 0): utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) #x_hats_dict = {'dcgan' : {}} print '\nProcessed and saved first ', key+1, 'images\n' x_batch_dict = {} # Final checkpoint if hparams.save_images: utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) print '\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict)) if hparams.print_stats: for model_type in hparams.model_types: print model_type mean_m_loss = np.mean(measurement_losses[model_type].values()) mean_l2_loss = np.mean(l2_losses[model_type].values()) print 'mean measurement loss = {0}'.format(mean_m_loss) print 'mean l2 loss = {0}'.format(mean_l2_loss) if hparams.image_matrix > 0: utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams) # Warn the user that some things were not processsed if len(x_batch_dict) > 0: print '\nDid NOT process last {} images because they did not fill up the last batch.'.format(len(x_batch_dict)) print 'Consider rerunning lazily with a smaller batch size.'
def main(hparams): hparams.n_input = np.prod(hparams.image_shape) maxiter = hparams.max_outer_iter utils.print_hparams(hparams) xs_dict = model_input(hparams) estimators = utils.get_estimators(hparams) utils.setup_checkpointing(hparams) measurement_losses, l2_losses = utils.load_checkpoints(hparams) x_hats_dict = {'dcgan': {}} x_batch_dict = {} for key, x in xs_dict.iteritems(): x_batch_dict[key] = x if len(x_batch_dict) < hparams.batch_size: continue x_coll = [ x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems() ] x_batch = np.concatenate(x_coll) A_outer = utils.get_outer_A(hparams) # 1bitify y_batch_outer = np.sign(np.matmul(x_batch, A_outer)) x_main_batch = 0.0 * x_batch z_opt_batch = np.random.randn(hparams.batch_size, 100) for k in range(maxiter): x_est_batch = x_main_batch + hparams.outer_learning_rate * ( np.matmul( (y_batch_outer - np.sign(np.matmul(x_main_batch, A_outer))), A_outer.T)) estimator = estimators['dcgan'] x_hat_batch, z_opt_batch = estimator(x_est_batch, z_opt_batch, hparams) x_main_batch = x_hat_batch for i, key in enumerate(x_batch_dict.keys()): x = xs_dict[key] y = y_batch_outer[i] x_hat = x_hat_batch[i] x_hats_dict['dcgan'][key] = x_hat measurement_losses['dcgan'][key] = utils.get_measurement_loss( x_hat, A_outer, y) l2_losses['dcgan'][key] = utils.get_l2_loss(x_hat, x) print 'Processed upto image {0} / {1}'.format(key + 1, len(xs_dict)) if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter == 0): utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) print '\nProcessed and saved first ', key + 1, 'images\n' x_batch_dict = {} if hparams.save_images: utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) print '\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict)) if hparams.print_stats: for model_type in hparams.model_types: print model_type mean_m_loss = np.mean(measurement_losses[model_type].values()) mean_l2_loss = np.mean(l2_losses[model_type].values()) print 'mean measurement loss = {0}'.format(mean_m_loss) print 'mean l2 loss = {0}'.format(mean_l2_loss) if hparams.image_matrix > 0: utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams) # Warn the user that some things were not processsed if len(x_batch_dict) > 0: print '\nDid NOT process last {} images because they did not fill up the last batch.'.format( len(x_batch_dict)) print 'Consider rerunning lazily with a smaller batch size.'
import torch import utils from option import args #set the args in option.py from data import data from trainer import Trainer if __name__ == '__main__': # set the random seed, so the later rand func can return reproducible results torch.manual_seed(args.seed) checkpoint = utils.checkpoint(args) # log related if checkpoint.ok: my_loader = data(args).get_loader() # init DataLoader t = Trainer(my_loader, checkpoint, args) while not t.terminate(): t.train() t.test() checkpoint.done()
def main(): start_epoch = 0 best_prec1 = 0.0 seed = np.random.randint(10000) if seed is not None: np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) if args.gpus is not None: device = torch.device("cuda:{}".format(args.gpus[0])) cudnn.benchmark = False cudnn.deterministic = True cudnn.enabled = True else: device = torch.device("cpu") now = datetime.now().strftime('%Y-%m-%d-%H:%M:%S') if args.mission is not None: if 'vgg' == args.arch and args.batchnorm: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{args.mission}/{now}' elif 'resnet20' == args.arch: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}/{args.mission}/{now}' else: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{args.mission}/{now}' else: if 'vgg' == args.arch and args.batchnorm: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{now}' else: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{now}' _make_dir(args.job_dir) ckpt = utils.checkpoint(args) print_logger = utils.get_logger(os.path.join(args.job_dir, "logger.log")) utils.print_params(vars(args), print_logger.info) log_file = os.path.join(args.job_dir, 'search_log.csv') writer_train = SummaryWriter(args.job_dir + '/run/train') writer_test = SummaryWriter(args.job_dir + '/run/test') ## hyperparameters settings ## n_layers = (args.num_layers - 2) * 2 unit_k_bits = int(args.k_bits) kbits_list = [unit_k_bits for i in range(n_layers)] print_logger.info(f'k_bits_list {kbits_list}') # Data loading print('=> Preparing data..') if args.dataset in ['cifar10', 'cifar100', 'mnist']: IMAGE_SIZE = 32 else: IMAGE_SIZE = 224 if args.dataset == 'imagenet': # train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1) # val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1) train_data = get_imagenet_iter_torch(type='train', image_dir=args.base_data_dir, batch_size=args.train_batch_size, num_threads=args.workers, crop=IMAGE_SIZE, device_id=0, num_gpus=1) elif args.dataset == 'cifar10': train_transform, test_transform = utils._data_transforms_cifar10( cutout=args.cutout) train_data = torchvision.datasets.CIFAR10(args.data_dir, train=True, transform=train_transform, download=True) # test_data = torchvision.datasets.CIFAR10(args.data_dir,train=False, transform=test_transform, download=True) # train_loader = get_cifar_iter_dali(type='train', image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers) # val_loader = get_cifar_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers) # Create model # Create model print('=> Building model...') if args.dataset == 'cifar10' or args.dataset == 'mnist': num_classes = 10 train_data_length = 50000 eval_data_length = 10000 elif args.dataset == 'imagenet': num_classes = 1000 train_data_length = 50000 eval_data_length = 10000 if args.arch == 'mobilenetv2': model_config = { 'k_bits': kbits_list, 'num_layers': args.num_layers, 'pre_k_bits': args.pre_k_bits, 'ratio': args.ratio, 'width_mult': args.width_mult } else: model_config = { 'k_bits': kbits_list, 'num_layers': args.num_layers, 'pre_k_bits': args.pre_k_bits, 'ratio': args.ratio } if 'vgg' == args.arch and args.batchnorm: model, model_k_bits = import_module( f"models.{args.dataset}.{args.archtype}.{args.arch}" ).__dict__[f'{args.arch}{args.num_layers}_bn'](model_config) elif 'resnet20' == args.arch: model, model_k_bits = import_module( f"models.{args.dataset}.{args.archtype}.{args.arch}" ).__dict__[f'{args.arch}'](model_config) else: model, model_k_bits = import_module( f"models.{args.dataset}.{args.archtype}.{args.arch}" ).__dict__[f'{args.arch}{args.num_layers}'](model_config) model = model.to(device) print_logger.info(f'model_k_bits_list {model_k_bits}') # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() # Optionally resume from a checkpoint resume = args.resume if resume: print('=> Loading checkpoint {}'.format(resume)) checkpoint = torch.load(resume, map_location=device) state_dict = checkpoint['state_dict'] start_epoch = checkpoint['epoch'] pre_train_best_prec1 = checkpoint['best_prec1'] model_check = load_check(state_dict, model) model.load_state_dict(model_check) print('Prec@1:', pre_train_best_prec1) else: checkpoint = model.state_dict() choose_model,k_bits = architecture_search(args=args,nn_model=model,device = device,checkpoint=checkpoint, \ step=args.step,criterion=criterion,train_data=train_data,train_batch_size=args.train_batch_size, \ eval_batch_size=args.eval_batch_size,train_data_length = train_data_length, \ eval_data_length = eval_data_length,clip_value=args.grad_clip,lam=args.lam,\ gpu_id = 0,print_logger = print_logger,ckpt = ckpt,log_file=log_file)
# gradients metric wandb.watch(gen) wandb.watch(critic) # model mode gen.train() critic.train() start_time = time.time() for epoch in range(start_epoch, end_epoch + 1): train_one_epoch(epoch, dataloader, gen, critic, opt_gen, opt_critic, fixed_noise, device, metric_logger, num_samples=cfg.NUM_SAMPLES, freq=cfg.FREQ) if epoch == cfg.NUM_EPOCHS + 1: checkpoint(epoch, end_epoch, gen, critic, opt_gen, opt_critic, fixed_noise) elif epoch % cfg.SAVE_EACH_EPOCH == 0: checkpoint(epoch, end_epoch, gen, critic, opt_gen, opt_critic, fixed_noise) total_time = time.time() - start_time print(f"=> Training time:{total_time}")
def main(hparams): # set up perceptual loss device = 'cuda:0' percept = PerceptualLoss( model="net-lin", net="vgg", use_gpu=device.startswith("cuda") ) utils.print_hparams(hparams) # get inputs xs_dict = model_input(hparams) estimators = utils.get_estimators(hparams) utils.setup_checkpointing(hparams) measurement_losses, l2_losses, lpips_scores, z_hats = utils.load_checkpoints(hparams) x_hats_dict = {model_type : {} for model_type in hparams.model_types} x_batch_dict = {} A = utils.get_A(hparams) noise_batch = hparams.noise_std * np.random.standard_t(2, size=(hparams.batch_size, hparams.num_measurements)) for key, x in xs_dict.items(): if not hparams.not_lazy: # If lazy, first check if the image has already been # saved before by *all* estimators. If yes, then skip this image. save_paths = utils.get_save_paths(hparams, key) is_saved = all([os.path.isfile(save_path) for save_path in save_paths.values()]) if is_saved: continue x_batch_dict[key] = x if len(x_batch_dict) < hparams.batch_size: continue # Reshape input x_batch_list = [x.reshape(1, hparams.n_input) for _, x in x_batch_dict.items()] x_batch = np.concatenate(x_batch_list) # Construct noise and measurements y_batch = utils.get_measurements(x_batch, A, noise_batch, hparams) # Construct estimates using each estimator for model_type in hparams.model_types: estimator = estimators[model_type] x_hat_batch, z_hat_batch, m_loss_batch = estimator(A, y_batch, hparams) for i, key in enumerate(x_batch_dict.keys()): x = xs_dict[key] y_train = y_batch[i] x_hat = x_hat_batch[i] # Save the estimate x_hats_dict[model_type][key] = x_hat # Compute and store measurement and l2 loss measurement_losses[model_type][key] = m_loss_batch[key] l2_losses[model_type][key] = utils.get_l2_loss(x_hat, x) lpips_scores[model_type][key] = utils.get_lpips_score(percept, x_hat, x, hparams.image_shape) z_hats[model_type][key] = z_hat_batch[i] print('Processed upto image {0} / {1}'.format(key+1, len(xs_dict))) # Checkpointing if (hparams.save_images) and ((key+1) % hparams.checkpoint_iter == 0): utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, lpips_scores, z_hats, save_image, hparams) x_hats_dict = {model_type : {} for model_type in hparams.model_types} print('\nProcessed and saved first ', key+1, 'images\n') x_batch_dict = {} # Final checkpoint if hparams.save_images: utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, lpips_scores, z_hats, save_image, hparams) print('\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict))) if hparams.print_stats: for model_type in hparams.model_types: print(model_type) measurement_loss_list = list(measurement_losses[model_type].values()) l2_loss_list = list(l2_losses[model_type].values()) mean_m_loss = np.mean(measurement_loss_list) mean_l2_loss = np.mean(l2_loss_list) print('mean measurement loss = {0}'.format(mean_m_loss)) print('mean l2 loss = {0}'.format(mean_l2_loss)) if hparams.image_matrix > 0: utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams) # Warn the user that some things were not processsed if len(x_batch_dict) > 0: print('\nDid NOT process last {} images because they did not fill up the last batch.'.format(len(x_batch_dict))) print('Consider rerunning lazily with a smaller batch size.')
print('Evaluating model!') val_miou, val_ciou, val_acc = validation(model, loader_val, cfg) history['val_miou'].append(val_miou) history['val_ciou'].append(val_ciou) history['val_acc'].append(val_acc) if val_miou > best_pred[0]: best_pred[0] = val_miou best_model= True else: best_model=False if val_acc > best_pred[1]: best_pred[1] = val_acc print('Best validation IOU and Acc:', best_pred) checkpoint(state={'epoch': epoch, 'encoder': net_encoder.state_dict(), 'decoder': net_decoder.state_dict(), 'optimizer': optimizer.state_dict()}, cfg=cfg, best= best_model, history= history)
utils.tensorboard_log_test(avg_accuracy, avg_loss, std) print('$' * 80) print('TEST: Epoch {} loss: {:.4f} accuracy: {:%} std: {:.4f}'.format( epoch, avg_loss, avg_accuracy, std)) # Run the experiment ========================================================== try: for epoch in range(params.n_epochs): train(epoch) if epoch % params.valid_eval_freq == 0: valid(epoch) if epoch % params.checkpoint_freq == 0: checkpoint_path = os.path.join(params.checkpoint_dir, 'model_' + str(epoch) + '.pt') utils.checkpoint(model, checkpoint_path) print('Training finished') checkpoint_path = os.path.join(params.checkpoint_dir, 'model_last.pt') utils.checkpoint(model, checkpoint_path) print('Model saved. Evaluating test set') test() except KeyboardInterrupt: print('Keyboard Interrupt received. Saving, testing, and shutting down') checkpoint_path = os.path.join(params.checkpoint_dir, 'model_last.pt') utils.checkpoint(model, checkpoint_path) print('Model saved. Evaluating test set') test()
def main(args): # Setup datasets dload_train, dload_train_labeled, dload_valid, dload_test = get_data(args) # Model and buffer sample_q = get_sample_q(args) f, replay_buffer = get_model_and_buffer(args, sample_q) # Setup Optimizer params = f.class_output.parameters() if args.clf_only else f.parameters() if args.optimizer == "adam": optim = torch.optim.Adam(params, lr=args.lr, betas=[0.9, 0.999], weight_decay=args.weight_decay) else: optim = torch.optim.SGD(params, lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) best_valid_acc = 0.0 cur_iter = 0 for epoch in range(args.start_epoch, args.n_epochs): # Decay lr if epoch in args.decay_epochs: for param_group in optim.param_groups: new_lr = param_group["lr"] * args.decay_rate param_group["lr"] = new_lr # Load data for i, (x_p_d, _) in tqdm(enumerate(dload_train)): # Warmup if cur_iter <= args.warmup_iters: lr = args.lr * cur_iter / float(args.warmup_iters) for param_group in optim.param_groups: param_group["lr"] = lr x_p_d = x_p_d.to(device) x_lab, y_lab = dload_train_labeled.__next__() x_lab, y_lab = x_lab.to(device), y_lab.to(device) # Label smoothing dist = smooth_one_hot(y_lab, args.n_classes, args.smoothing) L = 0.0 # log p(y|x) cross entropy loss if args.pyxce > 0: logits = f.classify(x_lab) l_pyxce = KHotCrossEntropyLoss()(logits, dist) if cur_iter % args.print_every == 0: acc = (logits.max(1)[1] == y_lab).float().mean() print("p(y|x)CE {}:{:>d} loss={:>14.9f}, acc={:>14.9f}". format(epoch, cur_iter, l_pyxce.item(), acc.item())) logger.record_dict({ "l_pyxce": l_pyxce.cpu().data.item(), "acc_pyxce": acc.item() }) L += args.pyxce * l_pyxce # log p(x) using sgld if args.pxsgld > 0: if args.class_cond_p_x_sample: assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond" y_q = torch.randint(0, args.n_classes, (args.sgld_batch_size, )).to(device) x_q = sample_q(f, replay_buffer, y=y_q) else: x_q = sample_q(f, replay_buffer) # sample from log-sumexp fp_all = f(x_p_d) fq_all = f(x_q) fp = fp_all.mean() fq = fq_all.mean() l_pxsgld = -(fp - fq) if cur_iter % args.print_every == 0: print( "p(x)SGLD | {}:{:>d} loss={:>14.9f} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f}" .format(epoch, i, l_pxsgld, fp, fq)) logger.record_dict( {"l_pxsgld": l_pxsgld.cpu().data.item()}) L += args.pxsgld * l_pxsgld # log p(x) using contrastive learning if args.pxcontrast > 0: # ones like dist to use all indexes ones_dist = torch.ones_like(dist).to(device) output, target, ce_output, neg_num = f.joint(img=x_lab, dist=ones_dist) l_pxcontrast = nn.CrossEntropyLoss(reduction="mean")(output, target) if cur_iter % args.print_every == 0: acc = (ce_output.max(1)[1] == y_lab).float().mean() print( "p(x)Contrast {}:{:>d} loss={:>14.9f}, acc={:>14.9f}". format(epoch, cur_iter, l_pxcontrast.item(), acc.item())) logger.record_dict({ "l_pxcontrast": l_pxcontrast.cpu().data.item(), "acc_pxcontrast": acc.item() }) L += args.pxycontrast * l_pxcontrast # log p(x|y) using sgld if args.pxysgld > 0: x_q_lab = sample_q(f, replay_buffer, y=y_lab) fp, fq = f(x_lab).mean(), f(x_q_lab).mean() l_pxysgld = -(fp - fq) if cur_iter % args.print_every == 0: print( "p(x|y)SGLD | {}:{:>d} loss={:>14.9f} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f}" .format(epoch, i, l_pxysgld.item(), fp, fq)) logger.record_dict( {"l_pxysgld": l_pxysgld.cpu().data.item()}) L += args.pxsgld * l_pxysgld # log p(x|y) using contrastive learning if args.pxycontrast > 0: output, target, ce_output, neg_num = f.joint(img=x_lab, dist=dist) l_pxycontrast = nn.CrossEntropyLoss(reduction="mean")(output, target) if cur_iter % args.print_every == 0: acc = (ce_output.max(1)[1] == y_lab).float().mean() print( "p(x|y)Contrast {}:{:>d} loss={:>14.9f}, acc={:>14.9f}" .format(epoch, cur_iter, l_pxycontrast.item(), acc.item())) logger.record_dict({ "l_pxycontrast": l_pxycontrast.cpu().data.item(), "acc_pxycontrast": acc.item() }) L += args.pxycontrast * l_pxycontrast # SGLD training of log q(x) may diverge # break here and record information to restart if L.abs().item() > 1e8: print("restart epoch: {}".format(epoch)) print("save dir: {}".format(args.log_dir)) print("id: {}".format(args.id)) print("steps: {}".format(args.n_steps)) print("seed: {}".format(args.seed)) print("exp prefix: {}".format(args.exp_prefix)) sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ print("restart epoch: {}".format(epoch)) print("save dir: {}".format(args.log_dir)) print("id: {}".format(args.id)) print("steps: {}".format(args.n_steps)) print("seed: {}".format(args.seed)) print("exp prefix: {}".format(args.exp_prefix)) assert False, "shit loss explode..." optim.zero_grad() L.backward() optim.step() cur_iter += 1 if epoch % args.plot_every == 0: if args.plot_uncond: if args.class_cond_p_x_sample: assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond" y_q = torch.randint(0, args.n_classes, (args.sgld_batch_size, )).to(device) x_q = sample_q(f, replay_buffer, y=y_q) plot( "{}/x_q_{}_{:>06d}.png".format(args.log_dir, epoch, i), x_q) if args.plot_contrast: x_q = sample_q(f, replay_buffer, y=y_q, contrast=True) plot( "{}/contrast_x_q_{}_{:>06d}.png".format( args.log_dir, epoch, i), x_q) else: x_q = sample_q(f, replay_buffer) plot( "{}/x_q_{}_{:>06d}.png".format(args.log_dir, epoch, i), x_q) if args.plot_contrast: x_q = sample_q(f, replay_buffer, contrast=True) plot( "{}/contrast_x_q_{}_{:>06d}.png".format( args.log_dir, epoch, i), x_q) if args.plot_cond: # generate class-conditional samples y = torch.arange(0, args.n_classes)[None].repeat( args.n_classes, 1).transpose(1, 0).contiguous().view(-1).to(device) x_q_y = sample_q(f, replay_buffer, y=y) plot("{}/x_q_y{}_{:>06d}.png".format(args.log_dir, epoch, i), x_q_y) if args.plot_contrast: y = torch.arange(0, args.n_classes)[None].repeat( args.n_classes, 1).transpose(1, 0).contiguous().view(-1).to(device) x_q_y = sample_q(f, replay_buffer, y=y, contrast=True) plot( "{}/contrast_x_q_y_{}_{:>06d}.png".format( args.log_dir, epoch, i), x_q_y) if args.ckpt_every > 0 and epoch % args.ckpt_every == 0: checkpoint(f, replay_buffer, f"ckpt_{epoch}.pt", args) if epoch % args.eval_every == 0: # Validation set correct, val_loss = eval_classification(f, dload_valid) if correct > best_valid_acc: best_valid_acc = correct print("Best Valid!: {}".format(correct)) checkpoint(f, replay_buffer, "best_valid_ckpt.pt", args) # Test set correct, test_loss = eval_classification(f, dload_test) print("Epoch {}: Valid Loss {}, Valid Acc {}".format( epoch, val_loss, correct)) print("Epoch {}: Test Loss {}, Test Acc {}".format( epoch, test_loss, correct)) f.train() logger.record_dict({ "Epoch": epoch, "Valid Loss": val_loss, "Valid Acc": correct.detach().cpu().numpy(), "Test Loss": test_loss, "Test Acc": correct.detach().cpu().numpy(), "Best Valid": best_valid_acc.detach().cpu().numpy(), "Loss": L.cpu().data.item(), }) checkpoint(f, replay_buffer, "last_ckpt.pt", args) logger.dump_tabular()
output = output.view(-1, opt.embed_dim).float() loss = criterion_lm(output, target) / batch.token.size(0) loss.backward() forward_losses += float(loss) optimizer_lm.step() print("\nforward LM loss: {}".format(forward_losses)) print("backward LM loss: {}".format(backward_losses)) ############ transport pretrained layers ############### for key, state in lm_model.state_dict().items(): model.state_dict()[key] = state checkpoint(opt.lm_epoch, model, os.path.join(CURRENT_DIR, '../models/pretrained_bilstm_crf.pth'), opt.batch_size, interrupted=False, use_gpu=opt.gpu) if opt.use_pretrain: print("========== use pretrain model ===========") model.load_state_dict(torch.load(opt.use_pretrain)) print(model) ############ start training ################ train_iter = BucketIterator(train_dataset, batch_size=opt.batch_size, shuffle=True, repeat=False,
log[i].append({}) model.train() for k, batch in tqdm(zip(ts, trainLoader)): t = time.time() loss_train = process_batch(batch, loss_train, i, k, 'train', t0) t_optim += time.time() - t for key, value in loss_train.items(): log[i][j][key] = float(np.mean(value[-opt.nbatch_train:])) log[i][j]['train_batch'] = k model.eval() for k, batch in zip(vs, valLoader): t = time.time() loss_val = process_batch(batch, loss_val, i, k, 'val', t0) t_optim += time.time() - t for key, value in loss_val.items(): log[i][j][key] = float(np.mean(value[-opt.nbatch_val:])) # optionally update LR after each epoch/minibatch model.lr_step() utils.checkpoint('%d_%d' %(i, j), model, log, opt) log[i][j]['time(optim)'] = '%.2f(%.2f)' %(time.time() - t0, t_optim) print(log[i][j]) except KeyboardInterrupt: time.sleep(2) # waiting for all threads to stop print('-' * 89) save = input('Exiting early, save the last model?[y/n]') if save == 'y': print('Saving...') utils.checkpoint('final', model, log, opt)
def train(config): gpu_manage(config) ### DATASET LOAD ### print('===> Loading datasets') dataset = TrainDataset(config) print('dataset:', len(dataset)) train_size = int((1 - config.validation_size) * len(dataset)) validation_size = len(dataset) - train_size train_dataset, validation_dataset = torch.utils.data.random_split( dataset, [train_size, validation_size]) print('train dataset:', len(train_dataset)) print('validation dataset:', len(validation_dataset)) training_data_loader = DataLoader(dataset=train_dataset, num_workers=config.threads, batch_size=config.batchsize, shuffle=True) validation_data_loader = DataLoader(dataset=validation_dataset, num_workers=config.threads, batch_size=config.validation_batchsize, shuffle=False) ### MODELS LOAD ### print('===> Loading models') gen = Generator(gpu_ids=config.gpu_ids) if config.gen_init is not None: param = torch.load(config.gen_init) gen.load_state_dict(param) print('load {} as pretrained model'.format(config.gen_init)) dis = Discriminator(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) if config.dis_init is not None: param = torch.load(config.dis_init) dis.load_state_dict(param) print('load {} as pretrained model'.format(config.dis_init)) # setup optimizer opt_gen = optim.Adam(gen.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) opt_dis = optim.Adam(dis.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) real_a = torch.FloatTensor(config.batchsize, config.in_ch, config.width, config.height) real_b = torch.FloatTensor(config.batchsize, config.out_ch, config.width, config.height) M = torch.FloatTensor(config.batchsize, config.width, config.height) criterionL1 = nn.L1Loss() criterionMSE = nn.MSELoss() criterionSoftplus = nn.Softplus() if config.cuda: gen = gen.cuda() dis = dis.cuda() criterionL1 = criterionL1.cuda() criterionMSE = criterionMSE.cuda() criterionSoftplus = criterionSoftplus.cuda() real_a = real_a.cuda() real_b = real_b.cuda() M = M.cuda() real_a = Variable(real_a) real_b = Variable(real_b) logreport = LogReport(log_dir=config.out_dir) validationreport = TestReport(log_dir=config.out_dir) print('===> begin') start_time = time.time() # main for epoch in range(1, config.epoch + 1): epoch_start_time = time.time() for iteration, batch in enumerate(training_data_loader, 1): real_a_cpu, real_b_cpu, M_cpu = batch[0], batch[1], batch[2] real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu) real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu) M.data.resize_(M_cpu.size()).copy_(M_cpu) att, fake_b = gen.forward(real_a) ################ ### Update D ### ################ opt_dis.zero_grad() # train with fake fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab.detach()) batchsize, _, w, h = pred_fake.size() loss_d_fake = torch.sum( criterionSoftplus(pred_fake)) / batchsize / w / h # train with real real_ab = torch.cat((real_a, real_b), 1) pred_real = dis.forward(real_ab) loss_d_real = torch.sum( criterionSoftplus(-pred_real)) / batchsize / w / h # Combined loss loss_d = loss_d_fake + loss_d_real loss_d.backward() if epoch % config.minimax == 0: opt_dis.step() ################ ### Update G ### ################ opt_gen.zero_grad() # First, G(A) should fake the discriminator fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab) loss_g_gan = torch.sum( criterionSoftplus(-pred_fake)) / batchsize / w / h # Second, G(A) = B loss_g_l1 = criterionL1(fake_b, real_b) * config.lamb loss_g_att = criterionMSE(att[:, 0, :, :], M) loss_g = loss_g_gan + loss_g_l1 + loss_g_att loss_g.backward() opt_gen.step() # log if iteration % 10 == 0: print( "===> Epoch[{}]({}/{}): loss_d_fake: {:.4f} loss_d_real: {:.4f} loss_g_gan: {:.4f} loss_g_l1: {:.4f}" .format(epoch, iteration, len(training_data_loader), loss_d_fake.item(), loss_d_real.item(), loss_g_gan.item(), loss_g_l1.item())) log = {} log['epoch'] = epoch log['iteration'] = len(training_data_loader) * (epoch - 1) + iteration log['gen/loss'] = loss_g.item() log['dis/loss'] = loss_d.item() logreport(log) print('epoch', epoch, 'finished, use time', time.time() - epoch_start_time) with torch.no_grad(): log_validation = test(config, validation_data_loader, gen, criterionMSE, epoch) validationreport(log_validation) print('validation finished') if epoch % config.snapshot_interval == 0: checkpoint(config, epoch, gen, dis) logreport.save_lossgraph() validationreport.save_lossgraph() print('training time:', time.time() - start_time)
opt.batch_size, token2id[labels.PAD], char2id[labels.PAD], label2id[labels.O], shuffle=True))): batch_start = time.time() model.zero_grad() model.train() token_batch = get_variable(torch.LongTensor(token_batch), use_gpu=opt.gpu).transpose(1, 0) char_batch = get_variable(torch.LongTensor(char_batch), use_gpu=opt.gpu).transpose(1, 0) label_batch = get_variable(torch.LongTensor(label_batch), use_gpu=opt.gpu).transpose(1, 0) # loss = model.loss(token_batch, char_batch, label_batch) / token_batch.shape[0] loss = model.loss(token_batch, char_batch, label_batch) optimizer.zero_grad() print("loss: {}".format(loss)) loss.backward() optimizer.step() #print("loss: {}".format(float(loss))) loss_per_epoch += float(loss) print('{}epoch\nloss: {}\nvalid: {}\ntime: {} sec.\n'.format( epoch, loss_per_epoch, 0, time.time() - start)) if epoch % 10 == 0: print("model save {}epoch".format(epoch)) checkpoint(model, opt.model_path) checkpoint(model, opt.model_path)
def main(hparams): hparams.n_input = np.prod(hparams.image_shape) hparams.model_type = 'vae' maxiter = hparams.max_outer_iter utils.print_hparams(hparams) xs_dict = model_input(hparams) # returns the images estimators = utils.get_estimators(hparams) utils.setup_checkpointing(hparams) measurement_losses, l2_losses = utils.load_checkpoints(hparams) x_hats_dict = {'vae': {}} x_batch_dict = {} for key, x in xs_dict.iteritems(): print key x_batch_dict[key] = x #placing images in dictionary if len(x_batch_dict) < hparams.batch_size: continue x_coll = [ x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems() ] #Generates the columns of input x x_batch = np.concatenate(x_coll) # Generates entire X A_outer = utils.get_outer_A(hparams) # Created the random matric A noise_batch = hparams.noise_std * np.random.randn( hparams.batch_size, 100) y_batch_outer = np.sign( np.matmul(x_batch, A_outer) ) # Multiplication of A and X followed by quantization on 4 levels #y_batch_outer = np.matmul(x_batch, A_outer) x_main_batch = 0.0 * x_batch z_opt_batch = np.random.randn(hparams.batch_size, 20) #Input to the generator of the GAN for k in range(maxiter): x_est_batch = x_main_batch + hparams.outer_learning_rate * ( np.matmul( (y_batch_outer - np.sign(np.matmul(x_main_batch, A_outer))), A_outer.T)) #x_est_batch = x_main_batch + hparams.outer_learning_rate * (np.matmul((y_batch_outer - np.matmul(x_main_batch, A_outer)), A_outer.T)) # Gradient decent in x is done estimator = estimators['vae'] x_hat_batch, z_opt_batch = estimator( x_est_batch, z_opt_batch, hparams) # Projectin on the GAN x_main_batch = x_hat_batch dist = np.linalg.norm(x_batch - x_main_batch) / 784 print 'cool' print dist for i, key in enumerate(x_batch_dict.keys()): x = xs_dict[key] y = y_batch_outer[i] x_hat = x_hat_batch[i] # Save the estimate x_hats_dict['vae'][key] = x_hat # Compute and store measurement and l2 loss measurement_losses['vae'][key] = utils.get_measurement_loss( x_hat, A_outer, y) l2_losses['vae'][key] = utils.get_l2_loss(x_hat, x) print 'Processed upto image {0} / {1}'.format(key + 1, len(xs_dict)) # Checkpointing if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter == 0): utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) #x_hats_dict = {'dcgan' : {}} print '\nProcessed and saved first ', key + 1, 'images\n' x_batch_dict = {} # Final checkpoint if hparams.save_images: utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) print '\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict)) if hparams.print_stats: for model_type in hparams.model_types: print model_type mean_m_loss = np.mean(measurement_losses[model_type].values()) mean_l2_loss = np.mean(l2_losses[model_type].values()) print 'mean measurement loss = {0}'.format(mean_m_loss) print 'mean l2 loss = {0}'.format(mean_l2_loss) if hparams.image_matrix > 0: utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams) # Warn the user that some things were not processsed if len(x_batch_dict) > 0: print '\nDid NOT process last {} images because they did not fill up the last batch.'.format( len(x_batch_dict)) print 'Consider rerunning lazily with a smaller batch size.'
if args.epochwise: for k in range(100, 1000, 100): model, linear, projector, loptim, attacker = load(args, k) print('loading.......epoch ', str(k)) ##### Linear evaluation ##### for i in range(args.epoch): print('Epoch ', i) train_acc, model, linear, projector, loptim = linear_train( i, model, linear, projector, loptim, attacker) test_acc, model, linear = test(model, linear) adjust_lr(i, loptim) checkpoint(model, test_acc, args.epoch, args, loptim, save_name_add='epochwise' + str(k)) checkpoint(linear, test_acc, args.epoch, args, loptim, save_name_add='epochwise' + str(k) + '_linear') if args.local_rank % ngpus_per_node == 0: with open(logname, 'a') as logfile: logwriter = csv.writer(logfile, delimiter=',') logwriter.writerow([k, train_acc, test_acc]) model, linear, projector, loptim, attacker = load(args, 0)
def main(): start_epoch = 0 best_prec1 = 0.0 seed=np.random.randint(10000) if seed is not None: np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) if args.gpus is not None: device = torch.device("cuda:{}".format(args.gpus[0])) cudnn.benchmark = False # cudnn.deterministic = True cudnn.enabled = True else: device = torch.device("cpu") now = datetime.now().strftime('%Y-%m-%d-%H:%M:%S') if args.mission is not None: if 'vgg' == args.arch and args.batchnorm: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{args.mission}/{now}' elif 'resnet20' == args.arch: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}/{args.mission}/{now}' else: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{args.mission}/{now}' else: if 'vgg' == args.arch and args.batchnorm: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{now}' else: args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{now}' _make_dir(args.job_dir) ckpt = utils.checkpoint(args) print_logger = utils.get_logger(os.path.join(args.job_dir, "logger.log")) utils.print_params(vars(args), print_logger.info) writer_train = SummaryWriter(args.job_dir +'/run/train') writer_test = SummaryWriter(args.job_dir+ '/run/test') ## hyperparameters settings ## n_layers = (args.num_layers - 2) * 2 unit_k_bits = int(args.k_bits) kbits_list = [unit_k_bits for i in range(n_layers)] print_logger.info(f'k_bits_list {kbits_list}') # Data loading print('=> Preparing data..') if args.dataset in ['cifar10', 'cifar100','mnist']: IMAGE_SIZE = 32 elif args.dataset == 'tinyimagenet': IMAGE_SIZE = 64 else: IMAGE_SIZE = 224 if args.dataset == 'imagenet': train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1) val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1) elif args.dataset == 'tinyimagenet': train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1) val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1) elif args.dataset == 'cifar10': train_loader = get_cifar_iter_dali(type='train', image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers) val_loader = get_cifar_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers) # Create model print('=> Building model...') if args.dataset =='cifar10': num_classes = 10 train_data_length = 50000 eval_data_length =10000 elif args.dataset == 'imagenet': num_classes = 1000 train_data_length = 50000 eval_data_length =10000 # arch = args.arch # model = models.__dict__[arch] model_config = {'k_bits':kbits_list,'num_layers':args.num_layers,'pre_k_bits':args.pre_k_bits,'ratio':args.ratio} if args.arch == 'mobilenetv2': model_config = {'k_bits':kbits_list,'num_layers':args.num_layers,'pre_k_bits':args.pre_k_bits,'ratio':args.ratio,'width_mult':args.width_mult} if 'vgg' == args.arch and args.batchnorm: model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}{args.num_layers}_bn'](model_config) elif 'resnet20' == args.arch: model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}'](model_config) else: model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}{args.num_layers}'](model_config) model = model.to(device) print_logger.info(f'model_k_bits_list {model_k_bits}') # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer, milestones=[0.5 * args.train_epochs, 0.75 * args.train_epochs], gamma=0.1) # Optionally resume from a checkpoint resume = args.resume if resume: print('=> Loading checkpoint {}'.format(resume)) checkpoint = torch.load(resume, map_location=device) state_dict = checkpoint['state_dict'] start_epoch = checkpoint['epoch'] pre_train_best_prec1 = checkpoint['best_prec1'] model_check = load_check(state_dict,model) pdb.set_trace() model.load_state_dict(model_check) print('Prec@1:',pre_train_best_prec1) if args.test_only: test_prec1 = test(args, device, val_loader, model, criterion, writer_test,print_logger,start_epoch ) print('=> Test Prec@1: {:.2f}'.format(test_prec1)) print(f'sample k_bits {kbits_list}') return for epoch in range(0, args.train_epochs): scheduler.step(epoch) train_loss, train_prec1 = train(args, device, train_loader, train_data_length, model, criterion, optimizer, writer_train, print_logger, epoch) test_prec1 = test(args, device, val_loader, eval_data_length, model, criterion, writer_test, print_logger, epoch) is_best = best_prec1 < test_prec1 best_prec1 = max(test_prec1, best_prec1) state = { 'state_dict': model.state_dict(), 'test_prec1': test_prec1, 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'epoch': epoch + 1 } ckpt.save_model(state, epoch + 1, is_best,mode='train') print_logger.info('==> BEST ACC {:.3f}'.format(best_prec1.item()))
def main(hparams): # Set up some stuff accoring to hparams hparams.n_input = np.prod(hparams.image_shape) utils.set_num_measurements(hparams) utils.print_hparams(hparams) # get inputs xs_dict = model_input(hparams) estimators = utils.get_estimators(hparams) utils.setup_checkpointing(hparams) measurement_losses, l2_losses = utils.load_checkpoints(hparams) x_hats_dict = {model_type: {} for model_type in hparams.model_types} x_batch_dict = {} for key, x in xs_dict.iteritems(): if not hparams.not_lazy: # If lazy, first check if the image has already been # saved before by *all* estimators. If yes, then skip this image. save_paths = utils.get_save_paths(hparams, key) is_saved = all([ os.path.isfile(save_path) for save_path in save_paths.values() ]) if is_saved: continue x_batch_dict[key] = x if len(x_batch_dict) < hparams.batch_size: continue # Reshape input x_batch_list = [ x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems() ] x_batch = np.concatenate(x_batch_list) # Construct noise and measurements A = utils.get_A(hparams) noise_batch = hparams.noise_std * np.random.randn( hparams.batch_size, hparams.num_measurements) if hparams.measurement_type == 'project': y_batch = x_batch + noise_batch else: y_batch = np.matmul(x_batch, A) + noise_batch # Construct estimates using each estimator for model_type in hparams.model_types: estimator = estimators[model_type] x_hat_batch = estimator(A, y_batch, hparams) for i, key in enumerate(x_batch_dict.keys()): x = xs_dict[key] y = y_batch[i] x_hat = x_hat_batch[i] # Save the estimate x_hats_dict[model_type][key] = x_hat # Compute and store measurement and l2 loss measurement_losses[model_type][ key] = utils.get_measurement_loss(x_hat, A, y) l2_losses[model_type][key] = utils.get_l2_loss(x_hat, x) print('Processed upto image {0} / {1}'.format(key + 1, len(xs_dict))) # Checkpointing if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter == 0): utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) x_hats_dict = { model_type: {} for model_type in hparams.model_types } print('\nProcessed and saved first ', key + 1, 'images\n') x_batch_dict = {} # Final checkpoint if hparams.save_images: utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams) print('\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict))) if hparams.print_stats: for model_type in hparams.model_types: print(model_type) mean_m_loss = np.mean(measurement_losses[model_type].values()) mean_l2_loss = np.mean(l2_losses[model_type].values()) print('mean measurement loss = {0}'.format(mean_m_loss)) print('mean l2 loss = {0}'.format(mean_l2_loss)) if hparams.image_matrix > 0: utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams) # Warn the user that some things were not processsed if len(x_batch_dict) > 0: print( '\nDid NOT process last {} images because they did not fill up the last batch.' .format(len(x_batch_dict))) print('Consider rerunning lazily with a smaller batch size.')
def train(gpu: int, args: Namespace): """Implements the training loop for PyTorch a model. Args: gpu: the GPU device args: user defined arguments """ # setup process groups rank = args.nr * args.gpus + gpu setup(rank, args) # define the model model = ResNext().architecture model.cuda(gpu) # Wrap the model model = DDP(model, device_ids=[gpu]) # define loss function (criterion) and optimizer criterion = nn.BCEWithLogitsLoss() optimizer = Adam(model.parameters(), args.lr) # split data train_df = split_data(args.folds) for fold in range(args.folds): losses = [] scores = [] train_loader, valid_loader = get_data(args, train_df, fold, rank) if gpu == 0: print(f"Training started using fold {fold} for validation") # train model.train() for epoch in range(args.epochs): for i, (images, labels) in enumerate(train_loader): images = images.cuda(gpu) labels = labels.cuda(gpu) output = model(images) loss = criterion(output, labels) loss.backward() optimizer.step() optimizer.zero_grad() if i % args.log_interval == 0 and gpu == 0: print("Train Epoch: {} [{}/{} ({:.0f}%)]\tloss={:.4f}".format( epoch+1, i, len(train_loader), 100. * i / len(train_loader), loss.item())) # evaluate model.eval() with torch.no_grad(): for i, (images, labels) in enumerate(valid_loader): images = images.cuda(gpu) labels = labels.cuda(gpu) output = model(images) loss = criterion(output, labels).item() score = get_score(labels.detach().cpu(), output.detach().cpu()) losses.append(loss) scores.append(score) if gpu == 0: print("Validation loss={:.4f}\tAUC score={:.4f}".format( statistics.mean(losses), statistics.mean(scores))) # checkpoint model model = checkpoint(model, gpu, fold) if args.save_model and gpu == 0: torch.save(model.module.state_dict(), "model.pt") cleanup()
import torch import utils from option import args from data import data if args.fullTrain: from trainer import Trainer else: from preTrainer import Trainer torch.manual_seed(args.seed) checkpoint = utils.checkpoint(args) if checkpoint.ok: my_loader = data(args).get_loader() t = Trainer(my_loader, checkpoint, args) while not t.terminate(): t.train() t.test() checkpoint.done() """ my_loader = data(args).get_loader() loader_train, loader_test = my_loader check = 0 #for batch, (input, target,input_4,input_2, idx_scale) in enumerate(loader_train): # check = check+1 for batch, (input, target, idx_scale) in enumerate(loader_train): check = check+1
def train(config): gpu_manage(config) ### DATASET LOAD ### print('===> Loading datasets') dataset = Dataset(config) train_size = int(0.9 * len(dataset)) test_size = len(dataset) - train_size train_dataset, test_dataset = torch.utils.data.random_split( dataset, [train_size, test_size]) training_data_loader = DataLoader(dataset=train_dataset, num_workers=config.threads, batch_size=config.batchsize, shuffle=True) test_data_loader = DataLoader(dataset=test_dataset, num_workers=config.threads, batch_size=config.test_batchsize, shuffle=False) ### MODELS LOAD ### print('===> Loading models') if config.gen_model == 'unet': gen = UNet(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) else: print('The generator model does not exist') if config.gen_init is not None: param = torch.load(config.gen_init) gen.load_state_dict(param) print('load {} as pretrained model'.format(config.gen_init)) dis = Discriminator(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) if config.dis_init is not None: param = torch.load(config.dis_init) dis.load_state_dict(param) print('load {} as pretrained model'.format(config.dis_init)) # setup optimizer opt_gen = optim.Adam(gen.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) opt_dis = optim.Adam(dis.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) real_a = torch.FloatTensor(config.batchsize, config.in_ch, 256, 256) real_b = torch.FloatTensor(config.batchsize, config.out_ch, 256, 256) criterionL1 = nn.L1Loss() criterionMSE = nn.MSELoss() criterionSoftplus = nn.Softplus() if config.cuda: gen = gen.cuda(0) dis = dis.cuda(0) criterionL1 = criterionL1.cuda(0) criterionMSE = criterionMSE.cuda(0) criterionSoftplus = criterionSoftplus.cuda(0) real_a = real_a.cuda(0) real_b = real_b.cuda(0) real_a = Variable(real_a) real_b = Variable(real_b) logreport = LogReport(log_dir=config.out_dir) testreport = TestReport(log_dir=config.out_dir) # main for epoch in range(1, config.epoch + 1): for iteration, batch in enumerate(training_data_loader, 1): real_a_cpu, real_b_cpu = batch[0], batch[1] real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu) real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu) fake_b = gen.forward(real_a) ################ ### Update D ### ################ opt_dis.zero_grad() # train with fake fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab.detach()) batchsize, _, w, h = pred_fake.size() loss_d_fake = torch.sum( criterionSoftplus(pred_fake)) / batchsize / w / h # train with real real_ab = torch.cat((real_a, real_b), 1) pred_real = dis.forward(real_ab) loss_d_real = torch.sum( criterionSoftplus(-pred_real)) / batchsize / w / h # Combined loss loss_d = loss_d_fake + loss_d_real loss_d.backward() if epoch % config.minimax == 0: opt_dis.step() ################ ### Update G ### ################ opt_gen.zero_grad() # First, G(A) should fake the discriminator fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab) loss_g_gan = torch.sum( criterionSoftplus(-pred_fake)) / batchsize / w / h # Second, G(A) = B loss_g_l1 = criterionL1(fake_b, real_b) * config.lamb loss_g = loss_g_gan + loss_g_l1 loss_g.backward() opt_gen.step() # log if iteration % 100 == 0: print( "===> Epoch[{}]({}/{}): loss_d_fake: {:.4f} loss_d_real: {:.4f} loss_g_gan: {:.4f} loss_g_l1: {:.4f}" .format(epoch, iteration, len(training_data_loader), loss_d_fake.item(), loss_d_real.item(), loss_g_gan.item(), loss_g_l1.item())) log = {} log['epoch'] = epoch log['iteration'] = len(training_data_loader) * (epoch - 1) + iteration log['gen/loss'] = loss_g.item() log['dis/loss'] = loss_d.item() logreport(log) with torch.no_grad(): log_test = test(config, test_data_loader, gen, criterionMSE, epoch) testreport(log_test) if epoch % config.snapshot_interval == 0: checkpoint(config, epoch, gen, dis) logreport.save_lossgraph() testreport.save_lossgraph()
for i in range(opt.n_epochs): log.append([]) t_optim = 0 t0 = time.time() train_slices = utils.slice_epoch(opt.nbatch_train, opt.n_slices) val_slices = utils.slice_epoch(opt.nbatch_val, opt.n_slices) for ts, vs, j in zip(train_slices, val_slices, range(opt.n_slices)): log[i].append({}) for k, batch in zip(ts, trainLoader): t = time.time() loss_train = process_batch(batch, loss_train, i, k, "train", t0) t_optim += time.time() - t for key, value in loss_train.items(): log[i][j]["train_" + key] = np.mean(value[-opt.nbatch_train :]) for k, batch in zip(vs, valLoader): loss_val = process_batch(batch, loss_val, i, k, "val", t0) t_optim += time.time() - t for key, value in loss_val.items(): log[i][j]["val_" + key] = np.mean(value[-opt.nbatch_val :]) utils.checkpoint("%d_%d" % (i, j), model, log, opt) log[i][j]["time(optim)"] = "%.2f(%.2f)" % (time.time() - t0, t_optim) print(log[i][j]) except KeyboardInterrupt: time.sleep(2) # waiting for all threads to stop print("-" * 89) save = input("Exiting early, save the last model?[y/n]") if save == "y": print("Saving...") utils.checkpoint("final", model, log, opt)
if __name__ == '__main__': print("\nObservation\n--------------------------------") print("Shape :", obs_dim) print("\nAction\n--------------------------------") print("Shape :", action_dim, "\n") total_reward = 0 deltas = [] for episode in range(NUM_EPISODES + 1): eps = START_EPSILON / (1.0 + episode * EPSILON_TAPER) if episode%10000 == 0: cp_file = checkpoint(Q, CHECKPOINT_DIR, GAME, episode) print('Saved Checkpoint to: ', cp_file) biggest_change = 0 curr_state = env.reset() for step in range(MAX_STEPS): prev_state = curr_state state_visit_counts[prev_state] = state_visit_counts.get(prev_state,0)+1 action = epsilon_action(curr_state, eps) curr_state, reward, done, info = env.step(action) total_reward += reward old_qsa = Q[prev_state][action] update_Q(prev_state, action, reward, curr_state) biggest_change = max(biggest_change, np.abs(old_qsa - Q[prev_state][action])) if done: break
def main(): """The main function Entry point. """ global args # Setting the hyper parameters parser = argparse.ArgumentParser(description='Example of Capsule Network') parser.add_argument('--epochs', type=int, default=10, help='number of training epochs. default=10') parser.add_argument('--lr', type=float, default=0.01, help='learning rate. default=0.01') parser.add_argument('--batch-size', type=int, default=128, help='training batch size. default=128') parser.add_argument('--test-batch-size', type=int, default=128, help='testing batch size. default=128') parser.add_argument( '--log-interval', type=int, default=10, help= 'how many batches to wait before logging training status. default=10') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training. default=false') parser.add_argument( '--threads', type=int, default=4, help='number of threads for data loader to use. default=4') parser.add_argument('--seed', type=int, default=42, help='random seed for training. default=42') parser.add_argument( '--num-conv-out-channel', type=int, default=256, help='number of channels produced by the convolution. default=256') parser.add_argument( '--num-conv-in-channel', type=int, default=1, help='number of input channels to the convolution. default=1') parser.add_argument('--num-primary-unit', type=int, default=8, help='number of primary unit. default=8') parser.add_argument('--primary-unit-size', type=int, default=1152, help='primary unit size is 32 * 6 * 6. default=1152') parser.add_argument( '--num-classes', type=int, default=2, help='number of digit classes. 1 unit for one MNIST digit. default=10') parser.add_argument('--output-unit-size', type=int, default=1, help='output unit size. default=16') parser.add_argument('--num-routing', type=int, default=3, help='number of routing iteration. default=3') parser.add_argument( '--use-reconstruction-loss', type=utils.str2bool, nargs='?', default=True, help='use an additional reconstruction loss. default=True') parser.add_argument( '--regularization-scale', type=float, default=0.0005, help= 'regularization coefficient for reconstruction loss. default=0.0005') parser.add_argument('--dataset', help='the name of dataset (mnist, cifar10)', default='mnist') parser.add_argument( '--input-width', type=int, default=28, help='input image width to the convolution. default=28 for MNIST') parser.add_argument( '--input-height', type=int, default=28, help='input image height to the convolution. default=28 for MNIST') parser.add_argument('--is-training', type=int, default=1, help='Whether or not is training, default is yes') parser.add_argument('--weights', type=str, default=None, help='Load pretrained weights, default is none') args = parser.parse_args() print(args) # Check GPU or CUDA is available args.cuda = not args.no_cuda and torch.cuda.is_available() # Get reproducible results by manually seed the random number generator torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # Load data train_loader, test_loader = utils.load_data(args) # Build Capsule Network print('===> Building model') model = Net(num_conv_in_channel=args.num_conv_in_channel, num_conv_out_channel=args.num_conv_out_channel, num_primary_unit=args.num_primary_unit, primary_unit_size=args.primary_unit_size, num_classes=args.num_classes, output_unit_size=args.output_unit_size, num_routing=args.num_routing, use_reconstruction_loss=args.use_reconstruction_loss, regularization_scale=args.regularization_scale, input_width=args.input_width, input_height=args.input_height, cuda_enabled=args.cuda) if args.cuda: print('Utilize GPUs for computation') print('Number of GPU available', torch.cuda.device_count()) model.cuda() cudnn.benchmark = True model = torch.nn.DataParallel(model) # Print the model architecture and parameters print('Model architectures:\n{}\n'.format(model)) print('Parameters and size:') for name, param in model.named_parameters(): print('{}: {}'.format(name, list(param.size()))) # CapsNet has: # - 8.2M parameters and 6.8M parameters without the reconstruction subnet on MNIST. # - 11.8M parameters and 8.0M parameters without the reconstruction subnet on CIFAR10. num_params = sum([param.nelement() for param in model.parameters()]) # The coupling coefficients c_ij are not included in the parameter list, # we need to add them manually, which is 1152 * 10 = 11520 (on MNIST) or 2048 * 10 (on CIFAR10) print('\nTotal number of parameters: {}\n'.format( num_params + (11520 if args.dataset == 'mnist' else 20480))) # Optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # Make model checkpoint directory if not os.path.exists('results/trained_model'): os.makedirs('results/trained_model') # Set the logger writer = SummaryWriter() if not args.is_training: print("Loading checkpoint") checkpoint = torch.load('results/trained_model/model_epoch_50.pth', map_location=lambda storage, loc: storage) from collections import OrderedDict state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove `module.` if name[:2] == 'fc': name = 'decoder.' + name new_state_dict[name] = v epoch = checkpoint['epoch'] model.load_state_dict(new_state_dict) optimizer.load_state_dict(checkpoint['optimizer']) # Train and test for epoch in range(1, args.epochs + 1): if args.is_training: train(model, train_loader, optimizer, epoch, writer) test(model, test_loader, len(train_loader), epoch, writer) # Save model checkpoint utils.checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, epoch) writer.close()
def main(hparams): # if not hparams.use_gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Set up some stuff accoring to hparams hparams.n_input = np.prod(hparams.image_shape) #hparams.stdv = 10 #adjust to HPARAM in model_def.py #hparams.mean = 0 #adjust to HPARAM in model_def.py utils.set_num_measurements(hparams) utils.print_hparams(hparams) hparams.bol = False # hparams.dict_flag = False # get inputs if hparams.input_type == 'dict-input':# or hparams.dict_flag: hparams_load_key = copy.copy(hparams) hparams_load_key.input_type = 'full-input' hparams_load_key.measurement_type = 'project' hparams_load_key.zprior_weight = 0.0 hparams.key_field = np.load(utils.get_checkpoint_dir(hparams_load_key, hparams.model_types[0])+'candidates.npy').item() print(hparams.measurement_type) xs_dict, label_dict = model_input(hparams) estimators = utils.get_estimators(hparams) utils.setup_checkpointing(hparams) sh = utils.SaveHandler() sh.load_or_init_all(hparams.save_images,hparams.model_types,sh.get_pkl_filepaths(hparams,use_all=True)) if label_dict is None: print('No labels exist.') del sh.class_loss # measurement_losses, l2_losses, emd_losses, x_orig, x_rec, noise_batch = utils.load_checkpoints(hparams) if hparams.input_type == 'gen-span': np.save(utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'z.npy',hparams.z_from_gen) np.save(utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'images.npy',hparams.images_mat) x_hats_dict = {model_type : {} for model_type in hparams.model_types} x_batch_dict = {} x_batch=[] x_hat_batch=[] # l2_losses2=np.zeros((len(xs_dict),1)) # distances_arr=[] image_distance =np.zeros((len(xs_dict),1)) hparams.x = [] # TO REMOVE for key, x in xs_dict.iteritems(): #//each batch once (x_batch_dict emptied at end) if not hparams.not_lazy: # If lazy, first check if the image has already been # saved before by *all* estimators. If yes, then skip this image. save_paths = utils.get_save_paths(hparams, key) is_saved = all([os.path.isfile(save_path) for save_path in save_paths.values()]) if is_saved: continue x_batch_dict[key] = x hparams.x.append(x)#To REMOVE if len(x_batch_dict) < hparams.batch_size: continue # Reshape input x_batch_list = [x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()] x_batch = np.concatenate(x_batch_list) # x_batch, known_distortion, distances = get_random_distortion(x_batch) # distances_arr[(key-1)*hparams.batch_size:key*hparams.batch_size] = distances # xs_dict[(key-1)*hparams.batch_size:key*hparams.batch_size] =x_batch # Construct noise and measurements recovered, optim = utils.load_if_optimized(hparams) if recovered and np.linalg.norm(optim.x_orig-x_batch) < 1e-10: hparams.optim = optim hparams.recovered = True else: hparams.recovered=False optim.x_orig = x_batch hparams.optim = optim A, noise_batch, y_batch, c_val = utils.load_meas(hparams,sh,x_batch,xs_dict) hparams.optim.noise_batch = noise_batch if c_val: continue if hparams.measurement_type == 'sample_distribution': plot_distribution(hparams,x_batch) # for i in range(z.shape[1]):#range(1): # plt.hist(z[i,:], facecolor='blue', alpha=0.5) # directory_distr = # pl.savefig("abc.png") elif hparams.measurement_type == 'autoencoder': plot_reconstruction(hparams,x_batch) else: # Construct estimates using each estimator for model_type in hparams.model_types: estimator = estimators[model_type] start = time.time() tmp = estimator(A, y_batch, hparams) if isinstance(tmp,tuple): x_hat_batch = tmp[0] sh.z_rec = tmp[1] else: x_hat_batch = tmp del sh.z_rec end = time.time() duration = end-start print('The calculation needed {} time'.format(datetime.timedelta(seconds=duration))) np.save(utils.get_checkpoint_dir(hparams, model_type)+'elapsed_time',duration) # DEBUGGING = [] for i, key in enumerate(x_batch_dict.keys()): # x = xs_dict[key]+known_distortion[i] x = xs_dict[key] y = y_batch[i] x_hat = x_hat_batch[i] # plt.figure() # plt.imshow(np.reshape(x_hat, [64, 64, 3])*255)#, interpolation="nearest", cmap=plt.cm.gray) # plt.show() # Save the estimate x_hats_dict[model_type][key] = x_hat # Compute and store measurement and l2 loss sh.measurement_losses[model_type][key] = utils.get_measurement_loss(x_hat, A, y) # DEBUGGING.append(np.sum((x_hat.dot(A)-y)**2)/A.shape[1]) sh.l2_losses[model_type][key] = utils.get_l2_loss(x_hat, x) if hparams.class_bol and label_dict is not None: try: sh.class_losses[model_type][key] = utils.get_classifier_loss(hparams,x_hat,label_dict[key]) except: sh.class_losses[model_type][key] = NaN warnings.warn('Class loss unsuccessfull, most likely due to corrupted memory. Simply retry.') if hparams.emd_bol: try: _,sh.emd_losses[model_type][key] = utils.get_emd_loss(x_hat, x) if 'nonneg' not in hparams.tv_or_lasso_mode and 'pca' in model_type: warnings.warn('EMD requires nonnegative images, for safety insert nonneg into tv_or_lasso_mode') except ValueError: warnings.warn('EMD calculation unsuccesfull (most likely due to negative images)') pass # if l2_losses[model_type][key]-measurement_losses[model_type][key]!=0: # print('NO') # print(y) # print(x) # print(np.mean((x-y)**2)) image_distance[i] = np.linalg.norm(x_hat-x) # l2_losses2[key] = np.mean((x_hat-x)**2) # print('holla') # print(l2_losses2[key]) # print(np.linalg.norm(x_hat-x)**2/len(xs_dict[0])) # print(np.linalg.norm(x_hat-x)/len(xs_dict[0])) # print(np.linalg.norm(x_hat-x)) print('Processed upto image {0} / {1}'.format(key+1, len(xs_dict))) sh.x_orig = x_batch sh.x_rec = x_hat_batch sh.noise = noise_batch #ACTIVATE ON DEMAND #plot_bad_reconstruction(measurement_losses,x_batch) # Checkpointing if (hparams.save_images) and ((key+1) % hparams.checkpoint_iter == 0): utils.checkpoint(x_hats_dict, save_image, sh, hparams) x_hats_dict = {model_type : {} for model_type in hparams.model_types} print('\nProcessed and saved first ', key+1, 'images\n') x_batch_dict = {} if 'wavelet' in hparams.model_types[0]: print np.abs(sh.x_rec) print('The average sparsity is {}'.format(np.sum(np.abs(sh.x_rec)>=0.0001)/float(hparams.batch_size))) # Final checkpoint if hparams.save_images: utils.checkpoint(x_hats_dict, save_image, sh, hparams) print('\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict))) if hparams.dataset in ['mnist', 'fashion-mnist']: if np.array(x_batch).size: utilsM.save_images(np.reshape(x_batch, [-1, 28, 28]), [8, 8],utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'original.png') if np.array(x_hat_batch).size: utilsM.save_images(np.reshape(x_hat_batch, [-1, 28, 28]), [8, 8],utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'reconstruction.png') for model_type in hparams.model_types: # print(model_type) mean_m_loss = np.mean(sh.measurement_losses[model_type].values()) mean_l2_loss = np.mean(sh.l2_losses[model_type].values()) #\|XHUT-X\|**2/784/64 if hparams.emd_bol: mean_emd_loss = np.mean(sh.emd_losses[model_type].values()) if label_dict is not None: mean_class_loss = np.mean(sh.class_losses[model_type].values()) print('mean class loss = {0}'.format(mean_class_loss)) # print(image_distance) mean_norm_loss = np.mean(image_distance)#sum_i(\|xhut_i-x_i\|)/64 # mean_rep_error = np.mean(distances_arr) # mean_opt_meas_error_pixel = np.mean(np.array(l2_losses[model_type].values())-np.array(distances_arr)/xs_dict[0].shape) # mean_opt_meas_error = np.mean(image_distance-distances_arr) print('mean measurement loss = {0}'.format(mean_m_loss)) # print np.sum(np.asarray(DEBUGGING))/64 print('mean l2 loss = {0}'.format(mean_l2_loss)) if hparams.emd_bol: print('mean emd loss = {0}'.format(mean_emd_loss)) print('mean distance = {0}'.format(mean_norm_loss)) print('mean distance pixelwise = {0}'.format(mean_norm_loss/len(xs_dict[xs_dict.keys()[0]]))) # print('mean representation error = {0}'.format(mean_rep_error)) # print('mean optimization plus measurement error = {0}'.format(mean_opt_meas_error)) # print('mean optimization plus measurement error per pixel = {0}'.format(mean_opt_meas_error_pixel)) if hparams.image_matrix > 0: utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams) # Warn the user that some things were not processsed if len(x_batch_dict) > 0: print('\nDid NOT process last {} images because they did not fill up the last batch.'.format(len(x_batch_dict))) print('Consider rerunning lazily with a smaller batch size.')