def evaluate(args): assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True print ('The image is {:}'.format(args.image)) print ('The model is {:}'.format(args.model)) snapshot = Path(args.model) assert snapshot.exists(), 'The model path {:} does not exist' print ('The face bounding box is {:}'.format(args.face)) assert len(args.face) == 4, 'Invalid face input : {:}'.format(args.face) snapshot = torch.load(snapshot) mean_fill = tuple( [int(x*255) for x in [0.485, 0.456, 0.406] ] ) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) param = snapshot['args'] eval_transform = transforms.Compose([transforms.PreCrop(param.pre_crop_expand), transforms.TrainScale2WH((param.crop_width, param.crop_height)), transforms.ToTensor(), normalize]) net = models.__dict__[param.arch](param.modelconfig, None) net = net.cuda() weights = models.remove_module_dict(snapshot['state_dict']) net.load_state_dict(weights) dataset = datasets.GeneralDataset(eval_transform, param.sigma, param.downsample, param.heatmap_type, param.dataset_name) dataset.reset(param.num_pts) print ('[{:}] prepare the input data'.format(time_string())) [image, _, _, _, _, _, cropped_size], meta = dataset.prepare_input(args.image, args.face) inputs = image.unsqueeze(0).cuda() print ('[{:}] prepare the input data done'.format(time_string())) # network forward with torch.no_grad(): batch_heatmaps, batch_locs, batch_scos, _ = net(inputs) print ('[{:}] the network forward done'.format(time_string())) # obtain the locations on the image in the orignial size cpu = torch.device('cpu') np_batch_locs, np_batch_scos, cropped_size = batch_locs.to(cpu).numpy(), batch_scos.to(cpu).numpy(), cropped_size.numpy() locations, scores = np_batch_locs[0,:-1,:], np.expand_dims(np_batch_scos[0,:-1], -1) scale_h, scale_w = cropped_size[0] * 1. / inputs.size(-2) , cropped_size[1] * 1. / inputs.size(-1) locations[:, 0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[2], locations[:, 1] * scale_h + cropped_size[3] prediction = np.concatenate((locations, scores), axis=1).transpose(1,0) for i in range(param.num_pts): point = prediction[:, i] print ('{:02d}/{:02d} : ({:.1f}, {:.1f}), score = {:.3f}'.format(i, param.num_pts, float(point[0]), float(point[1]), float(point[2])))
def train(train_loader, train_loader1, model, criterion, optimizer, var_optimizer, epoch, args, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() rk_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() train_loader1_iter = iter(train_loader1) model.train() end = time.time() for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) input1 = next(train_loader1_iter) input1 = input1.cuda(args.gpu, non_blocking=True) bs = input.shape[0] bs1 = input1.shape[0] output = model(torch.cat([input, input1.repeat(2, 1, 1, 1)])) loss = criterion(output[:bs], target) out1_0 = output[bs:bs+bs1].softmax(-1) out1_1 = output[bs+bs1:].softmax(-1) mi1 = ent((out1_0 + out1_1)/2.) - (ent(out1_0) + ent(out1_1))/2. rank_loss = torch.nn.functional.relu(args.mi_th - mi1).mean() prec1, prec5 = accuracy(output[:bs], target, topk=(1, 5)) losses.update(loss.detach().item(), bs) rk_losses.update(rank_loss.detach().item(), bs1) top1.update(prec1.item(), bs) top5.update(prec5.item(), bs) optimizer.zero_grad() var_optimizer.zero_grad() (loss+rank_loss*args.alpha).backward() optimizer.step() var_optimizer.step() batch_time.update(time.time() - end) end = time.time() if i == len(train_loader) - 1: print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.avg:.3f} ' 'Data {data_time.avg:.3f} ' 'Loss {loss.avg:.4f} ' 'RK Loss {rk_loss.avg:.4f} ' 'Prec@1 {top1.avg:.3f} ' 'Prec@5 {top5.avg:.3f} '.format( epoch, i, len(train_loader), batch_time=batch_time, rk_loss=rk_losses, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda( async=True ) # the copy will be asynchronous with respect to the host. input = input.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log( ' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() print_log( " Epoch: [{:03d}][{:03d}/{:03d}] " "Time {batch_time.val:.3f} ({batch_time.avg:.3f}) " "Data {data_time.val:.3f} ({data_time.avg:.3f}) " "Loss {loss.val:.4f} ({loss.avg:.4f}) " "Prec@1 {top1.val:.3f} ({top1.avg:.3f}) " "Prec@5 {top5.val:.3f} ({top5.avg:.3f}) ".format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, ) + time_string(), log, ) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() for k, m in enumerate(model.modules()): if isinstance(m, nn.Conv2d): weight_copy = m.weight.data.abs().clone() mask = weight_copy.gt(0).float().cuda() m.weight.grad.data.mul_(mask) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log(' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log) return top1.avg, losses.avg
def train(lang_1, lang_2, pairs, encoder, decoder, output_dir, n_epochs=500000, learning_rate=0.001, print_every=1000, save_every=5000, debug=False): LOGGER.info('Starting training process...') save_every_epoch_start = time.time() for epoch in range(1, n_epochs + 1): start = time.time() LOGGER.debug('Start training epoch %i at %s' % (epoch, time_string())) # Train the particular iteration train_iter(lang_1, lang_2, pairs, encoder, decoder, len(pairs), print_every=print_every, learning_rate=learning_rate) LOGGER.debug('Finished training epoch %i at %s' % (epoch, time_string())) LOGGER.debug('Time taken for epoch %i = %s' % (epoch, time_since(start, epoch / n_epochs))) if epoch % save_every == 0: LOGGER.info('Saving model at epoch %i...' % epoch) LOGGER.info('Time taken for %i epochs = %s' % (save_every, time_since(save_every_epoch_start, epoch / n_epochs))) save_models(encoder, decoder, learning_rate, epoch, output_dir)
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) output = model(input_var) loss = criterion(output, target_var) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string()) print( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg)) return top1.avg, losses.avg
async def timer(self, ctx): """Prints out the timer""" time_remaining = gamedata.GAME_LENGTH while time_remaining > 0: time_remaining -= ctx.game.timer_gap if ctx.game.show_timer: time = utils.time_string(time_remaining) await ctx.send(time) await asyncio.sleep(ctx.game.timer_gap / ctx.game.game_speed)
def extract_eval_dataset(backbone, mode, extractors, all_test_datasets, test_loader, num_iters, logger, save_dir): # dataset_models = DATASET_MODELS_DICT[backbone] logger.print('\n{:} starting extract the {:} mode by {:} iters.'.format( time_string(), mode, save_dir, num_iters)) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=config) as session: for idata, test_dataset in enumerate(all_test_datasets): logger.print('===>>> {:} --->>> {:02d}/{:02d} --->>> {:}'.format( time_string(), idata, len(all_test_datasets), test_dataset)) x_save_dir = save_dir / '{:}-{:}'.format( mode, num_iters) / '{:}'.format(test_dataset) x_save_dir.mkdir(parents=True, exist_ok=True) for idx in tqdm(range(num_iters)): # extract image features and labels if mode == "val": sample = test_loader.get_validation_task( session, test_dataset) elif mode == "test": sample = test_loader.get_test_task(session, test_dataset) else: raise ValueError("invalid mode:{}".format(mode)) with torch.no_grad(): context_labels = sample['context_labels'] target_labels = sample['target_labels'] # batch x #extractors x #features context_features = extract_features( extractors, sample['context_images']) target_features = extract_features(extractors, sample['target_images']) to_save_info = { 'context_features': context_features.cpu(), 'context_labels': context_labels.cpu(), 'target_features': target_features.cpu(), 'target_labels': target_labels.cpu() } save_name = x_save_dir / '{:06d}.pth'.format(idx) torch.save(to_save_info, save_name)
def train(train_loader, model, criterion, optimizer, var_optimizer, epoch, args, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.train() end = time.time() for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) if epoch < 5: warmup_learning_rate(optimizer, var_optimizer, epoch, i, len(train_loader), args) output = model(input) loss = criterion(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.detach().item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # print_log(loss.item(), log) optimizer.zero_grad() var_optimizer.zero_grad() loss.backward() optimizer.step() var_optimizer.step() batch_time.update(time.time() - end) end = time.time() print_log( ' Epoch: [{:03d}] ' 'Time {batch_time.avg:.3f} ' 'Data {data_time.avg:.3f} ' 'Loss {loss.avg:.4f} ' 'Prec@1 {top1.avg:.3f} ' 'Prec@5 {top5.avg:.3f} '.format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, kfac, args, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # print(i) # if i == 10: # break data_time.update(time.time() - end) input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) output = model(input) dist = torch.distributions.Categorical(logits=output) sampled_labels = dist.sample() loss = criterion(output, sampled_labels) prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.detach().item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) model.zero_grad() loss.backward() kfac.update(batch_size=input.size(0)) batch_time.update(time.time() - end) end = time.time() if i == len(train_loader) - 1: print_log( 'Time {batch_time.avg:.3f} ' 'Data {data_time.avg:.3f} ' 'Loss {loss.avg:.4f} ' 'Prec@1 {top1.avg:.3f} ' 'Prec@5 {top5.avg:.3f} '.format(batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.train() end = time.time() for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) target = target.cuda(non_blocking=True) output = model(input) loss = criterion(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log( ' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() arch_prob = [] model.train() end = time.time() for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) target = target.cuda(non_blocking=True) if args.aux: output, output_aux = model(input) else: output = model(input) loss = criterion(output, target) if args.aux: loss += args.aux_weight * criterion(output_aux, target) optimizer.zero_grad() loss.backward() prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) optimizer.step() batch_time.update(time.time() - end) end = time.time() if i == len(train_loader)-1: print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.avg:.3f} ' 'Data {data_time.avg:.3f} ' 'Loss {loss.avg:.4f} ' 'Prec@1 {top1.avg:.3f} ' 'Prec@5 {top5.avg:.3f} '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) if len(arch_prob) > 0: print_log(np.array_repr(np.stack(arch_prob).sum(0)/50000.), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) return top1.avg, losses.avg
def create_summary(summary_dir, summary_off=False): if summary_off: return { 'train': tf.summary.create_noop_writer(), 'eval': tf.summary.create_noop_writer() } logdir = os.path.join(summary_dir, time_string()) train_log = os.path.join(logdir, 'train') eval_log = os.path.join(logdir, 'eval') os.makedirs(train_log, exist_ok=True) os.makedirs(eval_log, exist_ok=True) return { 'train': tf.summary.create_file_writer(train_log), 'eval': tf.summary.create_file_writer(eval_log) }
def crop_style(list_file, num_pts, save_dir): #style = 'Original' #save_dir = 'cache/{}'.format(style) print ('Cropping face images into {:}'.format(save_dir)) if not osp.isdir(save_dir): os.makedirs(save_dir) transform = transforms.Compose([transforms.PreCrop(0.2), transforms.TrainScale2WH((256, 256))]) data = datasets.GeneralDataset(transform, 1, 8, 'gaussian', 'test') data.load_list(list_file, num_pts, True) #loader = torch.utils.data.DataLoader(data, batch_size=1, shuffle=False, num_workers=12, pin_memory=True) for i, tempx in enumerate(data): image = tempx[0] #points = tempx[3] basename = osp.basename(data.datas[i]) save_name = osp.join(save_dir, basename) image.save(save_name) if i % PRINT_GAP == 0: print ('{:} --->>> process the {:4d}/{:4d}-th image into {:}'.format(time_string(), i, len(data), save_dir))
def basic_eval_all(args, loaders, net, criterion, epoch_str, logger, opt_config): args = deepcopy(args) logger.log('Basic-Eval-All evaluates {:} dataset'.format(len(loaders))) nmes = [] for i, (loader, is_video) in enumerate(loaders): logger.log( '==>>{:}, [{:}], evaluate the {:}/{:}-th dataset [{:}] : {:}'. format(time_string(), epoch_str, i, len(loaders), 'video' if is_video else 'image', loader.dataset)) with torch.no_grad(): eval_loss, eval_meta = basic_eval( args, loader, net, criterion, epoch_str + "::{:}/{:}".format(i, len(loaders)), logger, opt_config) nme, _, _ = eval_meta.compute_mse(logger) meta_path = logger.path('meta') / 'eval-{:}-{:02d}-{:02d}.pth'.format( epoch_str, i, len(loaders)) eval_meta.save(str(meta_path)) nmes.append(nme) return ', '.join(['{:.1f}'.format(x) for x in nmes])
def train(train_loader, net, criterion_CE, optimizer, epoch, recorder, logger, args): batch_time_meter = AverageMeter() stats = recorder.train_stats meters = {stat: AverageMeter() for stat in stats} net.train() end = time.time() for i, (imgs, labels, views) in enumerate(train_loader): imgs_var = torch.autograd.Variable(imgs.cuda()) labels_var = torch.autograd.Variable(labels.cuda()) _, predictions = net(imgs_var) optimizer.zero_grad() softmax = criterion_CE(predictions, labels_var) softmax.backward() acc = accuracy(predictions.data, labels.cuda(), topk=(1, )) optimizer.step() # update meters meters['acc'].update(acc[0][0], args.batch_size) meters['loss'].update(softmax.data.mean(), args.batch_size) # measure elapsed time batch_time_meter.update(time.time() - end) freq = args.batch_size / batch_time_meter.avg end = time.time() if i % args.print_freq == 0: logger.print_log( ' Epoch: [{:03d}][{:03d}/{:03d}] Freq {:.1f} '.format( epoch, i, len(train_loader), freq) + create_stat_string(meters) + time_string()) logger.print_log(' **Train** ' + create_stat_string(meters)) recorder.update(epoch=epoch, is_train=True, meters=meters)
def perform_attack(attacker, model, model_clean, train_loader, test_loader, N_iter, log, writer): # Note that, attack has to be done in evaluation model due to batch-norm. # see: https://discuss.pytorch.org/t/what-does-model-eval-do-for-batchnorm-layer/7146 model.eval() losses = AverageMeter() iter_time = AverageMeter() attack_time = AverageMeter() # attempt to use the training data to conduct BFA for _, (data, target) in enumerate(train_loader): if args.use_cuda: target = target.cuda(async=True) data = data.cuda() # Override the target to prevent label leaking _, target = model(data).data.max(1) break # evaluate the test accuracy of clean model val_acc_top1, val_acc_top5, val_loss = validate(test_loader, model, attacker.criterion, log) writer.add_scalar('attack/val_top1_acc', val_acc_top1, 0) writer.add_scalar('attack/val_top5_acc', val_acc_top5, 0) writer.add_scalar('attack/val_loss', val_loss, 0) print_log('k_top is set to {}'.format(args.k_top), log) print_log('Attack sample size is {}'.format(data.size()[0]), log) end = time.time() for i_iter in range(N_iter): print_log('**********************************', log) attacker.progressive_bit_search(model, data, target) # measure data loading time attack_time.update(time.time() - end) end = time.time() h_dist = hamming_distance(model, model_clean) # record the loss losses.update(attacker.loss_max, data.size(0)) print_log( 'Iteration: [{:03d}/{:03d}] ' 'Attack Time {attack_time.val:.3f} ({attack_time.avg:.3f}) '. format((i_iter + 1), N_iter, attack_time=attack_time, iter_time=iter_time) + time_string(), log) print_log('loss before attack: {:.4f}'.format(attacker.loss.item()), log) print_log('loss after attack: {:.4f}'.format(attacker.loss_max), log) print_log('bit flips: {:.0f}'.format(attacker.bit_counter), log) print_log('hamming_dist: {:.0f}'.format(h_dist), log) writer.add_scalar('attack/bit_flip', attacker.bit_counter, i_iter + 1) writer.add_scalar('attack/h_dist', h_dist, i_iter + 1) writer.add_scalar('attack/sample_loss', losses.avg, i_iter + 1) # exam the BFA on entire val dataset val_acc_top1, val_acc_top5, val_loss = validate( test_loader, model, attacker.criterion, log) writer.add_scalar('attack/val_top1_acc', val_acc_top1, i_iter + 1) writer.add_scalar('attack/val_top5_acc', val_acc_top5, i_iter + 1) writer.add_scalar('attack/val_loss', val_loss, i_iter + 1) # measure elapsed time iter_time.update(time.time() - end) print_log( 'iteration Time {iter_time.val:.3f} ({iter_time.avg:.3f})'.format( iter_time=iter_time), log) end = time.time() return
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy2(output.data, target, topk=(1, 1)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log( ' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg), log) # log to TensorBoard if args.tensorboard: log_value('train_loss', losses.avg, epoch) log_value('train_error', top1.avg, epoch) return top1.avg, losses.avg
def main(): # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) print('Dataset: {}'.format(args.dataset.upper())) if args.dataset == "seedlings" or args.dataset == "bone": classes, class_to_idx, num_to_class, df = GenericDataset.find_classes( args.data_path) if args.dataset == "ISIC2017": classes, class_to_idx, num_to_class, df = GenericDataset.find_classes_melanoma( args.data_path) df.head(3) args.num_classes = len(classes) # Init model, criterion, and optimizer # net = models.__dict__[args.arch](num_classes) # net= kmodels.simpleXX_generic(num_classes=args.num_classes, imgDim=args.imgDim) # net= kmodels.vggnetXX_generic(num_classes=args.num_classes, imgDim=args.imgDim) # net= kmodels.vggnetXX_generic(num_classes=args.num_classes, imgDim=args.imgDim) net = kmodels.dpn92(num_classes=args.num_classes) # print_log("=> network :\n {}".format(net), log) real_model_name = (type(net).__name__) print("=> Creating model '{}'".format(real_model_name)) import datetime exp_name = datetime.datetime.now().strftime(real_model_name + '_' + args.dataset + '_%Y-%m-%d_%H-%M-%S') print('Training ' + real_model_name + ' on {} dataset:'.format(args.dataset.upper())) mPath = args.save_path + '/' + args.dataset + '/' + real_model_name + '/' args.save_path_model = mPath if not os.path.isdir(args.save_path_model): os.makedirs(args.save_path_model) log = open(os.path.join(mPath, 'seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print("Random Seed: {}".format(args.manualSeed)) print("python version : {}".format(sys.version.replace('\n', ' '))) print("torch version : {}".format(torch.__version__)) print("cudnn version : {}".format(torch.backends.cudnn.version())) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) normalize_img = torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_trans = transforms.Compose([ transforms.RandomSizedCrop(args.img_scale), PowerPIL(), transforms.ToTensor(), # normalize_img, RandomErasing() ]) ## Normalization only for validation and test valid_trans = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(args.img_scale), transforms.ToTensor(), # normalize_img ]) test_trans = valid_trans train_data = df.sample(frac=args.validationRatio) valid_data = df[~df['file'].isin(train_data['file'])] train_set = GenericDataset(train_data, args.data_path, transform=train_trans) valid_set = GenericDataset(valid_data, args.data_path, transform=valid_trans) t_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=0) v_loader = DataLoader(valid_set, batch_size=args.batch_size, shuffle=True, num_workers=0) # test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4) dataset_sizes = { 'train': len(t_loader.dataset), 'valid': len(v_loader.dataset) } print(dataset_sizes) # net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) criterion = torch.nn.CrossEntropyLoss() # optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], # weight_decay=state['decay'], nesterov=True) # optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) # optimizer = torch.optim.Adam(net.parameters(), lr=state['learning_rate']) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.evaluate: validate(v_loader, net, criterion, log) return if args.tensorboard: configure("./logs/runs/%s" % (exp_name)) print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) # Main loop start_training_time = time.time() training_time = time.time() start_time = time.time() epoch_time = AverageMeter() for epoch in tqdm(range(args.start_epoch, args.epochs)): current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ # print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) tqdm.write( '\n==>>Epoch=[{:03d}/{:03d}]], {:s}, LR=[{}], Batch=[{}]'.format( epoch, args.epochs, time_string(), state['learning_rate'], args.batch_size) + ' [Model={}]'.format( (type(net).__name__), ), log) # train for one epoch train_acc, train_los = train(t_loader, net, criterion, optimizer, epoch, log) val_acc, val_los = validate(v_loader, net, criterion, epoch, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() training_time = time.time() - start_training_time recorder.plot_curve( os.path.join(mPath, real_model_name + '_' + exp_name + '.png'), training_time, net, real_model_name, dataset_sizes, args.batch_size, args.learning_rate, args.dataset, args.manualSeed, args.num_classes) if float(val_acc) > float(95.0): print("*** EARLY STOP ***") df_pred = testSeedlingsModel(args.test_data_path, net, num_to_class, test_trans) model_save_path = os.path.join( mPath, real_model_name + '_' + str(val_acc) + '_' + str(val_los) + "_" + str(epoch)) df_pred.to_csv(model_save_path + "_sub.csv", columns=('file', 'species'), index=None) torch.save(net.state_dict(), model_save_path + '_.pth') save_checkpoint( { 'epoch': epoch + 1, # 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), }, is_best, mPath, str(val_acc) + '_' + str(val_los) + "_" + str(epoch) + '_checkpoint.pth.tar') log.close()
def main(): # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open( os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) print_log("Compress Rate: {}".format(args.rate), log) print_log("Layer Begin: {}".format(args.layer_begin), log) print_log("Layer End: {}".format(args.layer_end), log) print_log("Layer Inter: {}".format(args.layer_inter), log) print_log("Epoch prune: {}".format(args.epoch_prune), log) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] else: assert False, "Unknow dataset : {}".format(args.dataset) train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) if args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': assert False, 'Do not finish imagenet code' else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer net = models.__dict__[args.arch](num_classes) net_ref = models.__dict__[args.arch](num_classes) print_log("=> network :\n {}".format(net), log) net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) net_ref = torch.nn.DataParallel(net_ref, device_ids=list(range(args.ngpu))) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) net_ref = checkpoint['state_dict'] print_log( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log( "=> do not use any checkpoint for {} model".format(args.arch), log) ################################################################################################################### for m, m_ref in zip(net.modules(), net_ref.modules()): if isinstance(m, nn.Conv2d): weight_copy = m_ref.weight.data.abs().clone() mask = weight_copy.gt(0).float().cuda() n = mask.sum() / float(m.in_channels) m.weight.data.normal_(0, math.sqrt(2. / n)) m.weight.data.mul_(mask) ################################################################################################################### if args.evaluate: time1 = time.time() validate(test_loader, net, criterion, log) time2 = time.time() print('function took %0.3f ms' % ((time2 - time1) * 1000.0)) return m = Mask(net) m.init_length() comp_rate = args.rate print("-" * 10 + "one epoch begin" + "-" * 10) print("the compression rate now is %f" % comp_rate) val_acc_1, val_los_1 = validate(test_loader, net_ref, criterion, log) print(" accu before is: %.3f %%" % val_acc_1) if args.use_cuda: net = net.cuda() val_acc_2, val_los_2 = validate(test_loader, net, criterion, log) print(" accu after is: %s %%" % val_acc_2) # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) num_parameters = get_conv_zero_param(net) print_log('Zero parameters: {}'.format(num_parameters), log) num_parameters = sum([param.nelement() for param in net.parameters()]) print_log('Parameters: {}'.format(num_parameters), log) # train for one epoch train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set val_acc_1, val_los_1 = validate(test_loader, net, criterion, log) is_best = recorder.update(epoch, train_los, train_acc, val_los_2, val_acc_2) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net, 'recorder': recorder, 'optimizer': optimizer.state_dict(), }, is_best, args.save_path, 'checkpoint.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() log.close()
def main(): # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init dataset if not os.path.exists(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] else: assert False, "Unknow dataset : {}".format(args.dataset) train_transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) if args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': assert False, 'Do not finish imagenet code' else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer net = models.__dict__[args.arch](num_classes) print_log("=> network :\n {}".format(net), log) net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=False) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) recorder = checkpoint['recorder'] args.start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log) else: raise ValueError("=> no checkpoint found at '{}'".format(args.resume)) else: print_log("=> do not use any checkpoint for {} model".format(args.arch), log) if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set #val_acc, val_los = extract_features(test_loader, net, criterion, log) val_acc, val_los = validate(test_loader, net, criterion, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer' : optimizer.state_dict(), 'args' : copy.deepcopy(args), }, is_best, args.save_path, 'hb16_10check.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve( os.path.join(args.save_path, 'hb16_10.png') ) log.close()
def evaluate(args): if not args.cpu: assert torch.cuda.is_available(), 'CUDA is not available.' torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True print('The image is {:}'.format(args.image)) print('The model is {:}'.format(args.model)) snapshot = Path(args.model) assert snapshot.exists(), 'The model path {:} does not exist' if args.cpu: snapshot = torch.load(snapshot, map_location='cpu') else: snapshot = torch.load(snapshot) mean_fill = tuple([int(x * 255) for x in [0.5, 0.5, 0.5]]) normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) param = snapshot['args'] eval_transform = transforms.Compose([ transforms.PreCrop(param.pre_crop_expand), transforms.TrainScale2WH((param.crop_width, param.crop_height)), transforms.ToTensor(), normalize ]) net = models.__dict__[param.arch](param.modelconfig, None) if not args.cpu: net = net.cuda() weights = models.remove_module_dict(snapshot['state_dict']) net.load_state_dict(weights) dataset = datasets.GeneralDataset(eval_transform, param.sigma, param.downsample, param.heatmap_type, param.dataset_name) dataset.reset(param.num_pts) print('[{:}] prepare the input data'.format(time_string())) print("Using MT-CNN face detector.") try: face = utils.detect_face_mtcnn(args.image) except utils.mtcnn_detector.BBoxNotFound: print("MT-CNN detector failed! Using default bbox instead.") face = [153.08, 462., 607.78, 1040.42] [image, _, _, _, _, _, cropped_size], meta = dataset.prepare_input(args.image, face) print('[{:}] prepare the input data done'.format(time_string())) print('Net : \n{:}'.format(net)) # network forward with torch.no_grad(): if args.cpu: inputs = image.unsqueeze(0) else: inputs = image.unsqueeze(0).cuda() gan_output = (net.netG_A(inputs) + net.netG_B(inputs)) / 2 gan_output = (gan_output * 0.5 + 0.5).squeeze(0).cpu().permute( 1, 2, 0).numpy() Image.fromarray((gan_output * 255).astype(np.uint8)).save( args.save_path.replace(".jpg", ".gan.jpg")) batch_heatmaps, batch_locs, batch_scos, _ = net(inputs) #print ('input-shape : {:}'.format(inputs.shape)) flops, params = get_model_infos(net, inputs.shape, None) print('\nIN-shape : {:}, FLOPs : {:} MB, Params : {:}.'.format( list(inputs.shape), flops, params)) flops, params = get_model_infos(net, None, inputs) print('\nIN-shape : {:}, FLOPs : {:} MB, Params : {:}.'.format( list(inputs.shape), flops, params)) print('[{:}] the network forward done'.format(time_string())) # obtain the locations on the image in the orignial size cpu = torch.device('cpu') np_batch_locs, np_batch_scos, cropped_size = batch_locs.to( cpu).numpy(), batch_scos.to(cpu).numpy(), cropped_size.numpy() locations, scores = np_batch_locs[0, :-1, :], np.expand_dims( np_batch_scos[0, :-1], -1) scale_h, scale_w = cropped_size[0] * 1. / inputs.size( -2), cropped_size[1] * 1. / inputs.size(-1) locations[:, 0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[ 2], locations[:, 1] * scale_h + cropped_size[3] prediction = np.concatenate((locations, scores), axis=1).transpose(1, 0) for i in range(param.num_pts): point = prediction[:, i] print( 'The coordinate of {:02d}/{:02d}-th points : ({:.1f}, {:.1f}), score = {:.3f}' .format(i, param.num_pts, float(point[0]), float(point[1]), float(point[2]))) if args.save_path: image = draw_image_by_points(args.image, prediction, 1, (255, 0, 0), False, False) image.save(args.save_path) print('save image with landmarks into {:}'.format(args.save_path)) print('finish san evaluation on a single image : {:}'.format(args.image))
def main_worker(gpu, ngpus_per_node, args): global best_acc args.gpu = gpu assert args.gpu is not None print("Use GPU: {} for training".format(args.gpu)) log = open( os.path.join( args.save_path, 'log_seed{}{}.txt'.format(args.manualSeed, '_eval' if args.evaluate else '')), 'w') log = (log, args.gpu) net = models.__dict__[args.arch](pretrained=True) disable_dropout(net) net = to_bayesian(net, args.psi_init_range) net.apply(unfreeze) print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("PyTorch version : {}".format(torch.__version__), log) print_log("CuDNN version : {}".format(torch.backends.cudnn.version()), log) print_log( "Number of parameters: {}".format( sum([p.numel() for p in net.parameters()])), log) print_log(str(args), log) if args.distributed: if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url + ":" + args.dist_port, world_size=args.world_size, rank=args.rank) torch.cuda.set_device(args.gpu) net.cuda(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu]) else: torch.cuda.set_device(args.gpu) net = net.cuda(args.gpu) criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu) mus, psis = [], [] for name, param in net.named_parameters(): if 'psi' in name: psis.append(param) else: mus.append(param) mu_optimizer = SGD(mus, args.learning_rate, args.momentum, weight_decay=args.decay, nesterov=(args.momentum > 0.0)) psi_optimizer = PsiSGD(psis, args.learning_rate, args.momentum, weight_decay=args.decay, nesterov=(args.momentum > 0.0)) recorder = RecorderMeter(args.epochs) if args.resume: if args.resume == 'auto': args.resume = os.path.join(args.save_path, 'checkpoint.pth.tar') if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume, map_location='cuda:{}'.format(args.gpu)) recorder = checkpoint['recorder'] recorder.refresh(args.epochs) args.start_epoch = checkpoint['epoch'] net.load_state_dict( checkpoint['state_dict'] if args.distributed else { k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items() }) mu_optimizer.load_state_dict(checkpoint['mu_optimizer']) psi_optimizer.load_state_dict(checkpoint['psi_optimizer']) best_acc = recorder.max_accuracy(False) print_log( "=> loaded checkpoint '{}' accuracy={} (epoch {})".format( args.resume, best_acc, checkpoint['epoch']), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log("=> do not use any checkpoint for the model", log) cudnn.benchmark = True train_loader, ood_train_loader, test_loader, adv_loader, \ fake_loader, adv_loader2 = load_dataset_ft(args) psi_optimizer.num_data = len(train_loader.dataset) if args.evaluate: evaluate(test_loader, adv_loader, fake_loader, adv_loader2, net, criterion, args, log, 20, 100) return start_time = time.time() epoch_time = AverageMeter() train_los = -1 for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_loader.sampler.set_epoch(epoch) ood_train_loader.sampler.set_epoch(epoch) cur_lr, cur_slr = adjust_learning_rate(mu_optimizer, psi_optimizer, epoch, args) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f} {:6.4f}]'.format( time_string(), epoch, args.epochs, need_time, cur_lr, cur_slr) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) train_acc, train_los = train(train_loader, ood_train_loader, net, criterion, mu_optimizer, psi_optimizer, epoch, args, log) val_acc, val_los = 0, 0 recorder.update(epoch, train_los, train_acc, val_acc, val_los) is_best = False if val_acc > best_acc: is_best = True best_acc = val_acc if args.gpu == 0: save_checkpoint( { 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'recorder': recorder, 'mu_optimizer': mu_optimizer.state_dict(), 'psi_optimizer': psi_optimizer.state_dict(), }, False, args.save_path, 'checkpoint.pth.tar') epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(args.save_path, 'log.png')) evaluate(test_loader, adv_loader, fake_loader, adv_loader2, net, criterion, args, log, 20, 100) log[0].close()
parser.add_argument('--p_epochs', type=int, default=50) parser.add_argument('--p_weight_decay', type=float, default=5e-4) parser.add_argument('--new_pop_limit', type=int, default=8) parser.add_argument('--init_pool_size', type=int, default=32) parser.add_argument('--max_samples', type=int, default=100) parser.add_argument('--step_size', type=float, default=1.) parser.add_argument('--step_batch_size', type=int, default=128) # parser.add_argument('--eval_batches', type=int, default=10) parser.add_argument('--load_workers', type=int, default=0, help='number of data loading workers') parser.add_argument('--log_dir', type=str, default='logs/searches-ws/%s' % time_string(), help='Folder to save checkpoints and log.') parser.add_argument('--nas_bench_path', default=None, type=str, help='The path to load NAS-Bench-201.') parser.add_argument('--print_freq', type=int, default=200, help='print frequency (default: 200)') parser.add_argument('--seed', type=int, default=114514, help='manual seed') parser.add_argument('--repeat', type=int, default=1) parser.add_argument('--workers', type=int, default=1) parser.add_argument('--load_checkpoint', type=str, default=None) parser.add_argument('--tag', type=str, default=None) args = parser.parse_args()
def main(): if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open( os.path.join(args.save_path, "log_seed_{}.txt".format(args.manualSeed)), "w" ) print_log("save path : {}".format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace("\n", " ")), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == "cifar10": mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == "cifar100": mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] else: assert False, "Unknow dataset : {}".format(args.dataset) train_transform = transforms.Compose( [ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std), ] ) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)] ) if args.dataset == "cifar10": train_data = dset.CIFAR10( args.data_path, train=True, transform=train_transform, download=True ) test_data = dset.CIFAR10( args.data_path, train=False, transform=test_transform, download=True ) num_classes = 10 elif args.dataset == "cifar100": train_data = dset.CIFAR100( args.data_path, train=True, transform=train_transform, download=True ) test_data = dset.CIFAR100( args.data_path, train=False, transform=test_transform, download=True ) num_classes = 100 elif args.dataset == "svhn": train_data = dset.SVHN( args.data_path, split="train", transform=train_transform, download=True ) test_data = dset.SVHN( args.data_path, split="test", transform=test_transform, download=True ) num_classes = 10 elif args.dataset == "stl10": train_data = dset.STL10( args.data_path, split="train", transform=train_transform, download=True ) test_data = dset.STL10( args.data_path, split="test", transform=test_transform, download=True ) num_classes = 10 elif args.dataset == "imagenet": assert False, "Do not finish imagenet code" else: assert False, "Do not support dataset : {}".format(args.dataset) train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, ) test_loader = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, ) # Init model, criterion, and optimizer # net = models.__dict__[args.arch](num_classes).cuda() net = SENet34() # define loss function (criterion) and optimizer criterion = F.nll_loss optimizer = torch.optim.SGD( net.parameters(), state["learning_rate"], momentum=state["momentum"], weight_decay=state["decay"], nesterov=True, ) if args.use_cuda: net.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) recorder = checkpoint["recorder"] args.start_epoch = checkpoint["epoch"] net.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) print_log( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint["epoch"] ), log, ) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log("=> do not use any checkpoint for model", log) if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate = adjust_learning_rate( optimizer, epoch, args.gammas, args.schedule ) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch) ) need_time = "[Need: {:02d}:{:02d}:{:02d}]".format( need_hour, need_mins, need_secs ) print_log( "\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]".format( time_string(), epoch, args.epochs, need_time, current_learning_rate ) + " [Best : Accuracy={:.2f}, Error={:.2f}]".format( recorder.max_accuracy(False), 100 - recorder.max_accuracy(False) ), log, ) # train for one epoch train_acc, train_los = train( train_loader, net, criterion, optimizer, epoch, log ) # evaluate on validation set val_acc, val_los = validate(test_loader, net, criterion, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) save_checkpoint( { "epoch": epoch + 1, "state_dict": net.state_dict(), "recorder": recorder, "optimizer": optimizer.state_dict(), }, is_best, args.save_path, "checkpoint.pth.tar", ) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(args.save_path, "curve.png")) log.close()
def main(): # Init logger6 if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open( os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init the tensorboard path and writer tb_path = os.path.join(args.save_path, 'tb_log', 'run_' + str(args.manualSeed)) # logger = Logger(tb_path) writer = SummaryWriter(tb_path) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] elif args.dataset == 'svhn': mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] elif args.dataset == 'mnist': mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] elif args.dataset == 'imagenet': mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: assert False, "Unknow dataset : {}".format(args.dataset) if args.dataset == 'imagenet': train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std) ]) # here is actually the validation dataset else: train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) if args.dataset == 'mnist': train_data = dset.MNIST(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.MNIST(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': train_dir = os.path.join(args.data_path, 'train') test_dir = os.path.join(args.data_path, 'val') train_data = dset.ImageFolder(train_dir, transform=train_transform) test_data = dset.ImageFolder(test_dir, transform=test_transform) num_classes = 1000 else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.attack_sample_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer net = models.__dict__[args.arch](num_classes) print_log("=> network :\n {}".format(net), log) if args.use_cuda: if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() # separate the parameters thus param groups can be updated by different optimizer all_param = [ param for name, param in net.named_parameters() if not 'step_size' in name ] step_param = [ param for name, param in net.named_parameters() if 'step_size' in name ] if args.optimizer == "SGD": print("using SGD as optimizer") optimizer = torch.optim.SGD(all_param, lr=state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) elif args.optimizer == "Adam": print("using Adam as optimizer") optimizer = torch.optim.Adam(filter(lambda param: param.requires_grad, net.parameters()), lr=state['learning_rate'], weight_decay=state['decay']) elif args.optimizer == "RMSprop": print("using RMSprop as optimizer") optimizer = torch.optim.RMSprop(filter( lambda param: param.requires_grad, net.parameters()), lr=state['learning_rate'], alpha=0.99, eps=1e-08, weight_decay=0, momentum=0) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # count number of epoches # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) if not (args.fine_tune): args.start_epoch = checkpoint['epoch'] recorder = checkpoint['recorder'] optimizer.load_state_dict(checkpoint['optimizer']) state_tmp = net.state_dict() if 'state_dict' in checkpoint.keys(): state_tmp.update(checkpoint['state_dict']) else: state_tmp.update(checkpoint) net.load_state_dict(state_tmp) print_log( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log( "=> do not use any checkpoint for {} model".format(args.arch), log) # update the step_size once the model is loaded. This is used for quantization. for m in net.modules(): if isinstance(m, quan_Conv2d) or isinstance(m, quan_Linear): # simple step size update based on the pretrained model or weight init m.__reset_stepsize__() # block for quantizer optimization if args.optimize_step: optimizer_quan = torch.optim.SGD(step_param, lr=0.01, momentum=0.9, weight_decay=0, nesterov=True) for m in net.modules(): if isinstance(m, quan_Conv2d) or isinstance(m, quan_Linear): for i in range( 300 ): # runs 200 iterations to reduce quantization error optimizer_quan.zero_grad() weight_quan = quantize(m.weight, m.step_size, m.half_lvls) * m.step_size loss_quan = F.mse_loss(weight_quan, m.weight, reduction='mean') loss_quan.backward() optimizer_quan.step() for m in net.modules(): if isinstance(m, quan_Conv2d): print(m.step_size.data.item(), (m.step_size.detach() * m.half_lvls).item(), m.weight.max().item()) # block for weight reset if args.reset_weight: for m in net.modules(): if isinstance(m, quan_Conv2d) or isinstance(m, quan_Linear): m.__reset_weight__() # print(m.weight) attacker = BFA(criterion, args.k_top) net_clean = copy.deepcopy(net) # weight_conversion(net) if args.enable_bfa: perform_attack(attacker, net, net_clean, train_loader, test_loader, args.n_iter, log, writer) return if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate, current_momentum = adjust_learning_rate( optimizer, epoch, args.gammas, args.schedule) # Display simulation time need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log( '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}][M={:1.2f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate, current_momentum) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100 - recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set val_acc, _, val_los = validate(test_loader, net, criterion, log) recorder.update(epoch, train_los, train_acc, val_los, val_acc) is_best = val_acc >= recorder.max_accuracy(False) if args.model_only: checkpoint_state = {'state_dict': net.state_dict} else: checkpoint_state = { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), } save_checkpoint(checkpoint_state, is_best, args.save_path, 'checkpoint.pth.tar', log) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(args.save_path, 'curve.png')) # save addition accuracy log for plotting accuracy_logger(base_dir=args.save_path, epoch=epoch, train_accuracy=train_acc, test_accuracy=val_acc) # ============ TensorBoard logging ============# ## Log the graidents distribution for name, param in net.named_parameters(): name = name.replace('.', '/') writer.add_histogram(name + '/grad', param.grad.clone().cpu().data.numpy(), epoch + 1, bins='tensorflow') # ## Log the weight and bias distribution for name, module in net.named_modules(): name = name.replace('.', '/') class_name = str(module.__class__).split('.')[-1].split("'")[0] if "Conv2d" in class_name or "Linear" in class_name: if module.weight is not None: writer.add_histogram( name + '/weight/', module.weight.clone().cpu().data.numpy(), epoch + 1, bins='tensorflow') writer.add_scalar('loss/train_loss', train_los, epoch + 1) writer.add_scalar('loss/test_loss', val_los, epoch + 1) writer.add_scalar('accuracy/train_accuracy', train_acc, epoch + 1) writer.add_scalar('accuracy/test_accuracy', val_acc, epoch + 1) # ============ TensorBoard logging ============# log.close()
def main(): # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) # used for file names, etc time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') log = open( os.path.join( args.save_path, 'log_seed_{0}_{1}.txt'.format(args.manualSeed, time_stamp)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] else: assert False, "Unknow dataset : {}".format(args.dataset) writer = SummaryWriter() # # Data transforms # mean = [0.5071, 0.4867, 0.4408] # std = [0.2675, 0.2565, 0.2761] train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]) #[transforms.CenterCrop(32), transforms.ToTensor(), # transforms.Normalize(mean, std)]) #) test_transform = transforms.Compose([ transforms.CenterCrop(32), transforms.ToTensor(), transforms.Normalize(mean, std) ]) if args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'imagenet': assert False, 'Did not finish imagenet code' else: assert False, 'Does not support dataset : {}'.format(args.dataset) #step_sizes = 2500 step_sizes = args.alinit indices = [l for l in range(0, 50000)] annot_indices = [ ] # indices which are added to the training pool, list as we store it for all steps unannot_indices = [ indices ] # indices which have not been added to the training pool selections = random.sample(range(0, len(unannot_indices[-1])), step_sizes) temp = list(np.asarray(unannot_indices[-1])[selections]) annot_indices.append(temp) unannot_indices.append( list(set(unannot_indices[-1]) - set(annot_indices[-1]))) labelled_dset = torch.utils.data.Subset(train_data, annot_indices[-1]) unlabelled_dset = torch.utils.data.Subset(train_data, unannot_indices[-1]) #train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, # num_workers=args.workers, pin_memory=True) labelled_loader = torch.utils.data.DataLoader(labelled_dset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) #unlabelled_loader = torch.utils.data.DataLoader(unlabelled_dset, batch_size=args.batch_size, shuffle=True, #num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer net = models.__dict__[args.arch](num_classes) #torch.save(net, 'net.pth') #init_net = torch.load('net.pth') #net.load_my_state_dict(init_net.state_dict()) print_log("=> network :\n {}".format(net), log) net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() #optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.005, nesterov=False) optimizer = torch.optim.Adadelta( net.parameters(), lr=0.1, rho=0.9, eps=1e-3, # momentum=state['momentum'], weight_decay=0.001) print_log("=> Seed '{}'".format(args.manualSeed), log) print_log("=> dataset mean and std '{} - {}'".format(str(mean), str(std)), log) states_settings = {'optimizer': optimizer.state_dict()} print_log("=> optimizer '{}'".format(states_settings), log) # 50k,95k,153k,195k,220k milestones = [100, 190, 306, 390, 440, 540] scheduler = lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) recorder = checkpoint['recorder'] args.start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_log( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log( "=> did not use any checkpoint for {} model".format(args.arch), log) if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() al_steps = int(50000 / args.alinit) curr_al_step = 0 dump_data = [] for (al_step, epoch) in [(a, b) for a in range(al_steps) for b in range(args.start_epoch, args.epochs)]: print(" Current AL_step and epoch " + str((al_step, epoch))) if (al_step != curr_al_step): #These return scores of datapoints in unlabelled dataset according to their indices #indices of the data points(w.r.t to the original indexing from 1 to 50000) in the #unlabelled dataset curr_al_step = al_step #Resetting the learning rate scheduler scheduler = lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1) scores_unlabelled = score(unlabelled_dset, net, criterion) indices_sorted = np.argsort(scores_unlabelled) #Greedy Sampling temp_selections = indices_sorted[-1 * args.alinit:] selections = np.asarray(list( unlabelled_dset.indices))[temp_selections].tolist() annot_indices.append(selections) unannot_indices.append( set(unannot_indices[-1]) - set(annot_indices[-1])) labelled_dset = torch.utils.data.Subset(train_data, annot_indices[-1]) labelled_loader = torch.utils.data.DataLoader( labelled_dset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) unlabelled_dset = torch.utils.data.Subset(train_data, unannot_indices[-1]) indices_data = [annot_indices, unannot_indices] filehandler = open("indices.pickle", "wb") pickle.dump(indices_data, filehandler) filehandler.close() #current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) current_learning_rate = float(scheduler.get_lr()[-1]) #print('lr:',current_learning_rate) scheduler.step() #adjust_learning_rate(optimizer, epoch) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:.6f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch #train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) train_acc, train_los = train(labelled_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set #val_acc, val_los = extract_features(test_loader, net, criterion, log) val_acc, val_los = validate(test_loader, net, criterion, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) dump_data.append(([al_step, epoch], [train_acc, train_los], [val_acc, val_los])) if (epoch % 50 == 0): filehandler = open("accuracy.pickle", "wb") pickle.dump(dump_data, filehandler) filehandler.close() if epoch == 180: save_checkpoint( { 'epoch': epoch, 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), }, False, args.save_path, 'checkpoint_{0}_{1}.pth.tar'.format(epoch, time_stamp), time_stamp) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), }, is_best, args.save_path, 'checkpoint_{0}.pth.tar'.format(time_stamp), time_stamp) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve( os.path.join( args.save_path, 'training_plot_{0}_{1}.png'.format(args.manualSeed, time_stamp))) writer.close() log.close()
def train(cfg, writer, logger): # This statement must be declared before using pytorch use_cuda = False if cfg.get("cuda", None) is not None: if cfg.get("cuda", None) != "all": os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None) use_cuda = torch.cuda.is_available() # Setup random seed seed = cfg["training"].get("seed", random.randint(1, 10000)) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) np.random.seed(seed) random.seed(seed) # Setup Dataloader train_loader, val_loader = get_loader(cfg) # Setup Model model = get_model(cfg) # writer.add_graph(model, torch.rand([1, 3, 224, 224])) if use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model, device_ids=list( range(torch.cuda.device_count()))) # Setup optimizer, lr_scheduler and loss function optimizer = get_optimizer(model.parameters(), cfg) scheduler = get_scheduler(optimizer, cfg) loss_fn = get_loss_fn(cfg) # Setup Metrics epochs = cfg["training"]["epochs"] recorder = RecorderMeter(epochs) start_epoch = 0 # save model parameters every <n> epochs save_interval = cfg["training"]["save_interval"] if use_cuda: model.cuda() loss_fn.cuda() # Resume Trained Model resume_path = os.path.join(writer.file_writer.get_logdir(), cfg["training"]["resume"]) best_path = os.path.join(writer.file_writer.get_logdir(), cfg["training"]["best_model"]) if cfg["training"]["resume"] is not None: if os.path.isfile(resume_path): logger.info( "Loading model and optimizer from checkpoint '{}'".format( resume_path)) checkpoint = torch.load(resume_path) state = checkpoint["state_dict"] if torch.cuda.device_count() <= 1: state = convert_state_dict(state) model.load_state_dict(state) optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["scheduler"]) start_epoch = checkpoint["epoch"] recorder = checkpoint['recorder'] logger.info("Loaded checkpoint '{}' (epoch {})".format( resume_path, checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format(resume_path)) epoch_time = AverageMeter() for epoch in range(start_epoch, epochs): start_time = time.time() need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) logger.info( '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:8.6f}]'. format(time_string(), epoch, epochs, need_time, optimizer. param_groups[0]['lr']) + # scheduler.get_last_lr() >=1.4 ' [Best : Accuracy={:.2f}]'.format(recorder.max_accuracy(False))) train_acc, train_los = train_epoch(train_loader, model, loss_fn, optimizer, use_cuda, logger) val_acc, val_los = validate_epoch(val_loader, model, loss_fn, use_cuda, logger) scheduler.step() is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) if is_best or epoch % save_interval == 0 or epoch == epochs - 1: # save model (resume model and best model) save_checkpoint( { 'epoch': epoch + 1, 'recorder': recorder, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), }, is_best, best_path, resume_path) for name, param in model.named_parameters(): # save histogram writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) writer.add_scalar('Train/loss', train_los, epoch) # save curves writer.add_scalar('Train/acc', train_acc, epoch) writer.add_scalar('Val/loss', val_los, epoch) writer.add_scalar('Val/acc', val_acc, epoch) epoch_time.update(time.time() - start_time) writer.close()
def step(self): log = open( os.path.join( args.save_path, 'log_seed_{0}_{1}.txt'.format(args.manualSeed, self.time_stamp)), 'a') start_time = time.time() epoch_time = AverageMeter() #current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) #current_learning_rate = float(self.scheduler.get_last_lr()[-1]) #print('lr:',current_learning_rate) #self.scheduler.step() #adjust_learning_rate(optimizer, epoch) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - self.i)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:.6f}]'.format(time_string(), self.i, args.epochs, need_time, self.args['lr']) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(self.recorder.max_accuracy(False), 100-self.recorder.max_accuracy(False)),log) train_acc, train_los = self.train1() val_acc, val_los = self.val1() is_best = self.recorder.update(self.i - 1, train_los, train_acc, val_los, val_acc) # save_checkpoint({ # 'epoch': self.i, # 'arch': args.arch, # 'state_dict': self.net.state_dict(), # 'recorder': self.recorder, # 'optimizer' : self.optimizer.state_dict(), # }, is_best, args.save_path, 'checkpoint_{0}.pth.tar'.format(self.time_stamp), self.time_stamp) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() #self.recorder.plot_curve( os.path.join(args.save_path, 'training_plot_{0}_{1}.png'.format(args.manualSeed, self.time_stamp)) ) log.close() return train_acc, train_los, val_acc, val_los
def main(): if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] else: assert False, "Unknow dataset : {}".format(args.dataset) train_transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) if args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': assert False, 'Do not finish imagenet code' else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # Init model, criterion, and optimizer #net = models.__dict__[args.arch](num_classes).cuda() net = SENet34() # define loss function (criterion) and optimizer criterion = F.nll_loss optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) if args.use_cuda: net.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) recorder = checkpoint['recorder'] args.start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log("=> do not use any checkpoint for model", log) if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set val_acc, val_los = validate(test_loader, net, criterion, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer' : optimizer.state_dict(), }, is_best, args.save_path, 'checkpoint.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve( os.path.join(args.save_path, 'curve.png') ) log.close()