def __init__(self, d_latent, device='cuda', log_dir=''): super().__init__() self.d_latent = d_latent self.device = device n_blocks = [1, 1, 1, 1] mult = 8 n_output_planes = [16 * mult, 32 * mult, 64 * mult, 128 * mult] self.n_in_planes = n_output_planes[0] self.layer0 = nn.Sequential( nn_ops.conv3x3(3, self.n_in_planes, 1), nn.BatchNorm2d(self.n_in_planes), nn.ReLU(inplace=True) ) self.layer1 = self._make_layer(BasicBlock, n_blocks[0], n_output_planes[0], 2) self.layer2 = self._make_layer(BasicBlock, n_blocks[1], n_output_planes[1], 2) self.layer3 = self._make_layer(BasicBlock, n_blocks[2], n_output_planes[2], 2) self.layer4 = self._make_layer(BasicBlock, n_blocks[3], n_output_planes[3], 2) self.latent_mapping = nn.Sequential( nn.Linear(n_output_planes[3] * BasicBlock.expansion, d_latent, True), nn.BatchNorm1d(d_latent), nn.Tanh() ) self.apply(nn_ops.variable_init) self.to(device) utils.model_info(self, 'celebA_encoder', log_dir)
def load_checkpoint(filepath, device): if device == 'cpu': checkpoint = torch.load(filepath, map_location=lambda storage, loc: storage) else: checkpoint = torch.load(filepath) model = models.__dict__[checkpoint['pretrained']](pretrained=True) _, last_layer_name = model_info(model) if (last_layer_name == 'classifier'): classifier = checkpoint['classifier'] model.classifier = classifier elif (last_layer_name == 'fc'): fc = checkpoint['classifier'] model.fc = fc model.load_state_dict(checkpoint['state_dict']) model.class_to_idx = checkpoint['dict'] model.optimizer = checkpoint['optimizer'] return model
def __init__(self, d_latent, device='cuda', log_dir=''): super().__init__() self.d_latent = d_latent self.device = device self.mult = 8 self.latent_mapping = nn.Sequential( nn.Linear(self.d_latent, 4 * 4 * 128 * self.mult), nn.BatchNorm1d(4 * 4 * 128 * self.mult), nn.ReLU()) self.block1 = DecoderBlock(128 * self.mult, 64 * self.mult) self.block2 = DecoderBlock(64 * self.mult, 32 * self.mult) self.block3 = DecoderBlock(32 * self.mult, 16 * self.mult) self.block4 = DecoderBlock(16 * self.mult, 8 * self.mult) self.block5 = DecoderBlock(8 * self.mult, 4 * self.mult) self.block6 = DecoderBlock(4 * self.mult, 2 * self.mult) self.output_conv = nn_ops.conv3x3(2 * self.mult, 3, 1, True) self.final_act = nn.Sigmoid() self.apply(nn_ops.variable_init) self.to(device) utils.model_info(self, 'celebA_decoder', log_dir)
def main(): from utils import (init_torch_seeds, model_info, profile, profile_training) init_torch_seeds(seed=1234) # analyze backbone characterstics of different models model_builders = [ models.resnet18, models.resnet50, models.vgg16, models.shufflenet_v2_x2_0, models.mobilenet_v2, Yolov5, ghostnet, ][-2:] for model_builder in model_builders: print(f'{10*"-"} {model_builder.__name__} {10*"-"}') model = get_backbone(model_builder, pretrained=False) model_info(model, verbose=False, img_size=512) profile(model, verbose=True, amp=True) profile_training(model, amp=True) '''
def info(self, verbose=False, img_size=512): # print model information model_info(self, verbose, img_size)
def __init__(self, mode): # Define Saver self.saver = Saver(opt, mode) self.logger = self.saver.logger # Visualize self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Dataset dataloader self.train_dataset, self.train_loader = make_data_loader(opt) self.nbatch_train = len(self.train_loader) self.val_dataset, self.val_loader = make_data_loader(opt, mode="val") self.nbatch_val = len(self.val_loader) # Model if opt.sync_bn is None and len(opt.gpu_id) > 1: opt.sync_bn = True else: opt.sync_bn = False # model = DeepLab(opt) # model = CSRNet() model = CRGNet(opt) model_info(model, self.logger) self.model = model.to(opt.device) # Loss if opt.use_balanced_weights: classes_weights_file = osp.join(opt.root_dir, 'train_classes_weights.npy') if os.path.isfile(classes_weights_file): weight = np.load(classes_weights_file) else: weight = calculate_weigths_labels( self.train_loader, opt.root_dir) print(weight) opt.loss['weight'] = weight self.loss = build_loss(opt.loss) # Define Evaluator self.evaluator = Evaluator() # use region to eval: class_num is 2 # Resuming Checkpoint self.best_pred = 0.0 self.start_epoch = 0 if opt.resume: if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) self.start_epoch = checkpoint['epoch'] self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) if len(opt.gpu_id) > 1: print("Using multiple gpu") self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id) # Define Optimizer # train_params = [{'params': model.get_1x_lr_params(), 'lr': opt.lr}, # {'params': model.get_10x_lr_params(), 'lr': opt.lr * 10}] # self.optimizer = torch.optim.SGD(train_params, # momentum=opt.momentum, # weight_decay=opt.decay) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.decay) # Define lr scheduler # self.scheduler = LR_Scheduler(mode=opt.lr_scheduler, # base_lr=opt.lr, # num_epochs=opt.epochs, # iters_per_epoch=self.nbatch_train, # lr_step=140) self.scheduler = optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=[round(opt.epochs * x) for x in opt.steps], gamma=opt.gamma) # Time self.loss_hist = collections.deque(maxlen=500) self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val) self.step_time = collections.deque(maxlen=opt.print_freq)
# args path = results.path[0] learning_rate, weight_decay, momentum = results.learning_rate, results.weight_decay, results.momentum gpu, save_dir, hidden_units = results.gpu, results.save_dir, results.hidden_units arch, epochs, dropout = results.arch, results.epochs, results.dropout # load data from path data_transforms, image_datasets, dataloaders, cat_to_name = utils.load_data( path) # load pretrained model model = models.__dict__[arch](pretrained=True) # get pretrained model in_features number for the last layer in_features, last_layer_name = utils.model_info(model) # freeze pretrained model parameters if hasattr(model, 'features'): for param in model.features.parameters(): param.requires_grad = False else: # resnet for param in model.parameters(): param.requires_grad = False # create network with custom classifier model = utils.create_network(model, in_features, last_layer_name, hidden_units, dropout) print(model) # set loss
model_dict = self.state_dict() if by_name: pretrianed_dict_update = {} for k, v in pretrianed_dict.items(): if k in model_dict: vv = model_dict[k] if v.size() == vv.size(): pretrianed_dict_update[k] = v model_dict.update(pretrianed_dict_update) else: model_dict.update(pretrianed_dict) self.load_state_dict(model_dict) if __name__ == '__main__': # import os # os.environ['CUDA_VISIBLE_DEVICES'] = '3' # anchors = get_anchors( '/data1/chenww/my_research/Two-Stage-Defect-Detection/detector/config/small_8cls/anchors.txt' ).to('cuda') model = ResNet(anchors).to('cuda') model_info(model, verbose=True) # print(model) # input = torch.randn(1, 3, 224, 224) # map, outputs = model(input) # print([o.size() for o in map]) # print() # torch.save(model.state_dict(), 'model.pth')
for trial, seed in enumerate(seeds): logger.info('trial {} / {} ... '.format(trial + 1, n_trial)) random.seed(seed) np.random.seed(seed) th.manual_seed(seed) exp_dir = join(args.exp_root, 'regg-model_{:03d}'.format(trial)) utils.prepare_directory(exp_dir) cm_zsl_path = join(exp_dir, 'cm_zsl') cm_gzslu_path = join(exp_dir, 'cm_gzslu') cm_gzsls_path = join(exp_dir, 'cm_gzsls') logger.info('Initializing a regressor model ...') regg = classifiers.Regressor(args, dset.d_ft, dset.d_attr) utils.model_info(regg.net, 'regg', exp_dir) for epoch in range(args.n_epoch): train_loss, train_acc = regg.train_epoch(train_iter, _Sall) train_logs[trial, epoch, :] = train_loss, train_acc acc_zsl, _ = regg.test(unseen_test_iter, _Sall, _Cu, cm_zsl_path) acc_gzslu, _ = regg.test(unseen_test_iter, _Sall, confmat_path=cm_gzslu_path) acc_gzsls, _ = regg.test(seen_test_iter, _Sall, confmat_path=cm_gzsls_path) acc_gzslh = 2. * acc_gzslu * acc_gzsls / (acc_gzslu + acc_gzsls) accs[trial, epoch, :] = acc_zsl, acc_gzslu, acc_gzsls, acc_gzslh
train_iter = data.Iterator( [Xtr, Ytr], args.batch_size, shuffle=True, sampling_weights=sampling_weights, continuous=False) logger.info ('Initializing {} model ...'.format(args.clf_type)) clf = None if args.clf_type == 'bilinear-comp': clf = classifiers.BilinearCompatibility(dset.d_ft, dset.d_attr, args) elif args.clf_type == 'mlp': clf = classifiers.MLP(dset.d_ft, dset.n_Call, args) elif args.clf_type == 'multilayer-comp': clf = classifiers.MultiLayerCompatibility(dset.d_ft, dset.d_attr, args) utils.model_info(clf.net, 'clf', exp_dir) for epoch in range(args.n_epoch): if args.clf_type == 'bilinear-comp' or args.clf_type == 'multilayer-comp': clf.train_epoch(train_iter, Str) acc_zsl, _ = clf.test(unseen_test_iter, _Sall, _Cu, cm_zsl_path) acc_gzslu, _ = clf.test(unseen_test_iter, _Sall, confmat_path=cm_gzslu_path) acc_gzsls, _ = clf.test(seen_test_iter, _Sall, confmat_path=cm_gzsls_path) acc_gzslh = 2. * acc_gzslu * acc_gzsls / (acc_gzslu + acc_gzsls) accs[trial, epoch, :] = acc_zsl, acc_gzslu, acc_gzsls, acc_gzslh else: clf.train_epoch(train_iter) acc_zsl, _ = clf.test(unseen_test_iter, _Cu, cm_zsl_path) acc_gzslu, _ = clf.test(unseen_test_iter, confmat_path=cm_gzslu_path) acc_gzsls, _ = clf.test(seen_test_iter, confmat_path=cm_gzsls_path) acc_gzslh = 2. * acc_gzslu * acc_gzsls / (acc_gzslu + acc_gzsls)
def main(): utils.prepare_directory(args.exp_dir, force_delete=False) utils.init_logger(join(args.exp_dir, 'program.log')) utils.write_args(args) # **************************************** load dataset **************************************** dset = data.XianDataset(args.data_dir, args.mode, feature_norm=args.feature_norm) _X_s_tr = FN(dset.X_s_tr).to(args.device) _Y_s_tr_ix = FN(dil(dset.Y_s_tr, dset.Cs)).to(args.device) # indexed labels _Ss = FN(dset.Sall[dset.Cs]).to(args.device) _Su = FN(dset.Sall[dset.Cu]).to(args.device) if args.d_noise == 0: args.d_noise = dset.d_attr # **************************************** create data loaders **************************************** _sampling_weights = None if args.dataset != 'SUN': _sampling_weights = data.compute_sampling_weights( dil(dset.Y_s_tr, dset.Cs)).to(args.device) xy_iter = data.Iterator([_X_s_tr, _Y_s_tr_ix], args.batch_size, sampling_weights=_sampling_weights) label_iter = data.Iterator([torch.arange(dset.n_Cs, device=args.device)], args.batch_size) class_iter = data.Iterator([torch.arange(dset.n_Cs)], 1) # **************************************** per-class means and stds **************************************** # per class samplers and first 2 class moments per_class_iters = [] Xs_tr_mean, Xs_tr_std = [], [] Xs_te_mean, Xs_te_std = [], [] Xu_te_mean, Xu_te_std = [], [] for c_ix, c in enumerate(dset.Cs): # training samples of seen classes _inds = np.where(dset.Y_s_tr == c)[0] assert _inds.shape[0] > 0 _X = dset.X_s_tr[_inds] Xs_tr_mean.append(_X.mean(axis=0, keepdims=True)) Xs_tr_std.append(_X.std(axis=0, keepdims=True)) if args.n_gm_iter > 0: _y = np.ones([_inds.shape[0]], np.int64) * c_ix per_class_iters.append( data.Iterator([FN(_X).to(args.device), FN(_y).to(args.device)], args.per_class_batch_size)) # test samples of seen classes _inds = np.where(dset.Y_s_te == c)[0] assert _inds.shape[0] > 0 _X = dset.X_s_te[_inds] Xs_te_mean.append(_X.mean(axis=0, keepdims=True)) Xs_te_std.append(_X.std(axis=0, keepdims=True)) # test samples of unseen classes for c_ix, c in enumerate(dset.Cu): _inds = np.where(dset.Y_u_te == c)[0] assert _inds.shape[0] > 0 _X = dset.X_u_te[_inds] Xu_te_mean.append(_X.mean(axis=0, keepdims=True)) Xu_te_std.append(_X.std(axis=0, keepdims=True)) del _X, _inds, c_ix, c Xs_tr_mean = FN(np.concatenate(Xs_tr_mean, axis=0)).to(args.device) Xs_tr_std = FN(np.concatenate(Xs_tr_std, axis=0)).to(args.device) Xs_te_mean = FN(np.concatenate(Xs_te_mean, axis=0)).to(args.device) Xs_te_std = FN(np.concatenate(Xs_te_std, axis=0)).to(args.device) Xu_te_mean = FN(np.concatenate(Xu_te_mean, axis=0)).to(args.device) Xu_te_std = FN(np.concatenate(Xu_te_std, axis=0)).to(args.device) # **************************************** create networks **************************************** g_net = modules.get_generator(args.gen_type)( dset.d_attr, args.d_noise, args.n_g_hlayer, args.n_g_hunit, args.normalize_noise, args.dp_g, args.leakiness_g).to(args.device) g_optim = optim.Adam(g_net.parameters(), args.gan_optim_lr_g, betas=(args.gan_optim_beta1, args.gan_optim_beta2), weight_decay=args.gan_optim_wd) d_net = modules.ConditionalDiscriminator(dset.d_attr, args.n_d_hlayer, args.n_d_hunit, args.d_normalize_ft, args.dp_d, args.leakiness_d).to(args.device) d_optim = optim.Adam(d_net.parameters(), args.gan_optim_lr_d, betas=(args.gan_optim_beta1, args.gan_optim_beta2), weight_decay=args.gan_optim_wd) start_it = 1 utils.model_info(g_net, 'g_net', args.exp_dir) utils.model_info(d_net, 'd_net', args.exp_dir) if args.n_gm_iter > 0: if args.clf_type == 'bilinear-comp': clf = classifiers.BilinearCompatibility(dset.d_ft, dset.d_attr, args) elif args.clf_type == 'mlp': clf = classifiers.MLP(dset.d_ft, dset.n_Cs, args) utils.model_info(clf.net, 'clf', args.exp_dir) pret_clf = None if os.path.isfile(args.pretrained_clf_ckpt): logger.info('Loading pre-trained {} checkpoint at {} ...'.format( args.clf_type, args.pretrained_clf_ckpt)) ckpt = torch.load(args.pretrained_clf_ckpt, map_location=args.device) pret_clf = classifiers.BilinearCompatibility(dset.d_ft, dset.d_attr, args) pret_clf.net.load_state_dict(ckpt[args.clf_type]) pret_clf.net.eval() for p in pret_clf.net.parameters(): p.requires_grad = False pret_regg = None if os.path.isfile(args.pretrained_regg_ckpt): logger.info( 'Loading pre-trained regressor checkpoint at {} ...'.format( args.pretrained_regg_ckpt)) ckpt = torch.load(args.pretrained_regg_ckpt, map_location=args.device) pret_regg = classifiers.Regressor(args, dset.d_ft, dset.d_attr) pret_regg.net.load_state_dict(ckpt['regressor']) pret_regg.net.eval() for p in pret_regg.net.parameters(): p.requires_grad = False training_log_titles = [ 'd/loss', 'd/real', 'd/fake', 'd/penalty', 'gm/loss', 'gm/real_loss', 'gm/fake_loss', 'g/fcls_loss', 'g/cycle_loss', 'clf/train_loss', 'clf/train_acc', 'mmad/X_s_tr', 'mmad/X_s_te', 'mmad/X_u_te', 'smad/X_s_tr', 'smad/X_s_te', 'smad/X_u_te', ] if args.n_gm_iter > 0: training_log_titles.extend([ 'grad-cossim/{}'.format(n) for n, p in clf.net.named_parameters() ]) training_log_titles.extend( ['grad-mse/{}'.format(n) for n, p in clf.net.named_parameters()]) training_logger = utils.Logger(os.path.join(args.exp_dir, 'training-logs'), 'logs', training_log_titles) t0 = time.time() logger.info('penguenler olmesin') for it in range(start_it, args.n_iter + 1): # **************************************** Discriminator updates **************************************** for p in d_net.parameters(): p.requires_grad = True for p in g_net.parameters(): p.requires_grad = False for _ in range(args.n_d_iter): x_real, y_ix = next(xy_iter) s = _Ss[y_ix] x_fake = g_net(s) d_real = d_net(x_real, s).mean() d_fake = d_net(x_fake, s).mean() d_penalty = modules.gradient_penalty(d_net, x_real, x_fake, s) d_loss = d_fake - d_real + args.L * d_penalty d_optim.zero_grad() d_loss.backward() d_optim.step() training_logger.update_meters( ['d/real', 'd/fake', 'd/loss', 'd/penalty'], [ d_real.mean().item(), d_fake.mean().item(), d_loss.item(), d_penalty.item() ], x_real.size(0)) # **************************************** Generator updates **************************************** for p in d_net.parameters(): p.requires_grad = False for p in g_net.parameters(): p.requires_grad = True g_optim.zero_grad() [y_fake] = next(label_iter) s = _Ss[y_fake] x_fake = g_net(s) # wgan loss d_fake = d_net(x_fake, s).mean() g_wganloss = -d_fake # f-cls loss fcls_loss = 0.0 if pret_clf is not None: fcls_loss = pret_clf.loss(x_fake, _Ss, y_fake) training_logger.update_meters(['g/fcls_loss'], [fcls_loss.item()], x_fake.size(0)) # cycle-loss cycle_loss = 0.0 if pret_regg is not None: cycle_loss = pret_regg.loss(x_fake, s) training_logger.update_meters(['g/cycle_loss'], [cycle_loss.item()], x_fake.size(0)) g_loss = args.C * fcls_loss + args.R * cycle_loss + g_wganloss g_loss.backward() # gmn iterations for _ in range(args.n_gm_iter): c = next(class_iter)[0].item() x_real, y_real = next(per_class_iters[c]) y_fake = y_real.detach().repeat(args.gm_fake_repeat) s = _Ss[y_fake] x_fake = g_net(s) # gm loss clf.net.zero_grad() if args.clf_type == 'bilinear-comp': real_loss = clf.loss(x_real, _Ss, y_real) fake_loss = clf.loss(x_fake, _Ss, y_fake) elif args.clf_type == 'mlp': real_loss = clf.loss(x_real, y_real) fake_loss = clf.loss(x_fake, y_fake) grad_cossim = [] grad_mse = [] for n, p in clf.net.named_parameters(): # if len(p.shape) == 1: continue real_grad = grad([real_loss], [p], create_graph=True, only_inputs=True)[0] fake_grad = grad([fake_loss], [p], create_graph=True, only_inputs=True)[0] if len(p.shape) > 1: _cossim = F.cosine_similarity(fake_grad, real_grad, dim=1).mean() else: _cossim = F.cosine_similarity(fake_grad, real_grad, dim=0) # _cossim = F.cosine_similarity(fake_grad, real_grad, dim=1).mean() _mse = F.mse_loss(fake_grad, real_grad) grad_cossim.append(_cossim) grad_mse.append(_mse) training_logger.update_meters( ['grad-cossim/{}'.format(n), 'grad-mse/{}'.format(n)], [_cossim.item(), _mse.item()], x_real.size(0)) grad_cossim = torch.stack(grad_cossim) grad_mse = torch.stack(grad_mse) gm_loss = (1.0 - grad_cossim).sum() * args.Q + grad_mse.sum() * args.Z gm_loss.backward() training_logger.update_meters( ['gm/real_loss', 'gm/fake_loss'], [real_loss.item(), fake_loss.item()], x_real.size(0)) g_optim.step() # **************************************** Classifier update **************************************** if args.n_gm_iter > 0: if it % args.clf_reset_iter == 0: if args.clf_reset_iter == 1: # no need to generate optimizer each time clf.init_params() else: clf.reset() else: x, y_ix = next(xy_iter) if args.clf_type == 'bilinear-comp': clf_acc, clf_loss = clf.train_step(x, _Ss, y_ix) else: clf_acc, clf_loss = clf.train_step(x, y_ix) training_logger.update_meters( ['clf/train_loss', 'clf/train_acc'], [clf_loss, clf_acc], x.size(0)) # **************************************** Log **************************************** if it % 1000 == 0: g_net.eval() # synthesize samples for seen classes and compute their first 2 moments Xs_fake_mean, Xs_fake_std = [], [] with torch.no_grad(): for c in range(dset.n_Cs): y = torch.ones(256, device=args.device, dtype=torch.long) * c a = _Ss[y] x_fake = g_net(a) Xs_fake_mean.append(x_fake.mean(dim=0, keepdim=True)) Xs_fake_std.append(x_fake.std(dim=0, keepdim=True)) Xs_fake_mean = torch.cat(Xs_fake_mean) Xs_fake_std = torch.cat(Xs_fake_std) # synthesize samples for unseen classes and compute their first 2 moments def compute_firsttwo_moments(S, C): X_mean, X_std = [], [] with torch.no_grad(): for c in range(dset.n_Cu): y = torch.ones( 256, device=args.device, dtype=torch.long) * c a = _Su[y] x_fake = g_net(a) X_mean.append(x_fake.mean(dim=0, keepdim=True)) X_std.append(x_fake.std(dim=0, keepdim=True)) X_mean = torch.cat(X_mean) X_std = torch.cat(X_std) Xu_fake_mean, Xu_fake_std = [], [] with torch.no_grad(): for c in range(dset.n_Cu): y = torch.ones(256, device=args.device, dtype=torch.long) * c a = _Su[y] x_fake = g_net(a) Xu_fake_mean.append(x_fake.mean(dim=0, keepdim=True)) Xu_fake_std.append(x_fake.std(dim=0, keepdim=True)) Xu_fake_mean = torch.cat(Xu_fake_mean) Xu_fake_std = torch.cat(Xu_fake_std) g_net.train() training_logger.update_meters([ 'mmad/X_s_tr', 'smad/X_s_tr', 'mmad/X_s_te', 'smad/X_s_te', 'mmad/X_u_te', 'smad/X_u_te' ], [ torch.abs(Xs_tr_mean - Xs_fake_mean).sum(dim=1).mean().item(), torch.abs(Xs_tr_std - Xs_fake_std).sum(dim=1).mean().item(), torch.abs(Xs_te_mean - Xs_fake_mean).sum(dim=1).mean().item(), torch.abs(Xs_te_std - Xs_fake_std).sum(dim=1).mean().item(), torch.abs(Xu_te_mean - Xu_fake_mean).sum(dim=1).mean().item(), torch.abs(Xu_te_std - Xu_fake_std).sum(dim=1).mean().item() ]) training_logger.flush_meters(it) elapsed = time.time() - t0 per_iter = elapsed / it apprx_rem = (args.n_iter - it) * per_iter logging.info('Iter:{:06d}/{:06d}, '\ '[ET:{:.1e}(min)], ' \ '[IT:{:.1f}(ms)], ' \ '[REM:{:.1e}(min)]'.format( it, args.n_iter, elapsed / 60., per_iter * 1000., apprx_rem / 60)) if it % 10000 == 0: utils.save_checkpoint( { 'g_net': g_net.state_dict(), 'd_net': d_net.state_dict(), 'g_optim': g_optim.state_dict(), 'd_optim': d_optim.state_dict(), 'iteration': it }, args.exp_dir, None, it if it % (args.n_iter // args.n_ckpt) == 0 else None, ) training_logger.close()
def train(**kwargs): """train the crnn model""" opt.parse(kwargs) opt.print_args() train_test_split(path=opt.data_path, img_format=opt.img_format, label_format=opt.label_format, generating_again=opt.generating_again, split_rate=opt.split_rate) device = t.device('cuda') if opt.use_gpu else t.device('cpu') #Step 0 Decide the structure of the model# #Step 1 Load the data set# dataset, dataloader = \ GetDataLoader(path = opt.data_path, train = True, img_format = opt.img_format, label_format = opt.label_format, img_height = opt.img_height, img_width = opt.img_width, img_channels = opt.img_channels, batch_size = opt.batch_size) #Step 2 Reshape the inputs# #Step 3 Normalize the inputs# #Step 4 Initialize parameters# #Step 5 Forward propagation(Vectorization/Activation functions)# crnn_model = CRNN_def(in_c=opt.img_channels, feature_size=512, lstm_hidden=opt.lstm_hidden, output_size=opt.output_size, multilines=opt.multilines, multisteps=opt.multisteps, num_rows=opt.num_rows) crnn_model.to(device) distilled_crnn_model = Distilled_CRNN_def(in_c=opt.img_channels, feature_size=512, lstm_hidden=opt.lstm_hidden, output_size=opt.output_size, multilines=opt.multilines, multisteps=opt.multisteps, num_rows=opt.num_rows) distilled_crnn_model.to(device) print('CRNN model : ') for name, parameters in crnn_model.named_parameters(): print('\t', name, '...', parameters.requires_grad) print('Distilled CRNN model : ') for name, parameters in distilled_crnn_model.named_parameters(): print('\t', name, '...', parameters.requires_grad) #Step 6 Compute cost# ctc_loss = t.nn.CTCLoss().to( device) #use CTC to derive the whole loss function #Step 7 Backward propagation(Vectorization/Activation functions gradients)# if opt.optimizer == 'sgd' or opt.optimizer == 'momentum' or opt.optimizer == 'nesterov': crnn_optimizer = t.optim.SGD( filter(lambda p: p.requires_grad, crnn_model.parameters()), lr=opt.init_lr, momentum=0.9 if opt.optimizer == 'momentum' or opt.optimizer == 'nesterov' else 0., nesterov=True if opt.optimizer == 'nesterov' else False, weight_decay=opt.weight_decay) distilled_crnn_optimizer = t.optim.SGD( filter(lambda p: p.requires_grad, distilled_crnn_model.parameters()), lr=opt.init_lr, momentum=0.9 if opt.optimizer == 'momentum' or opt.optimizer == 'nesterov' else 0., nesterov=True if opt.optimizer == 'nesterov' else False, weight_decay=opt.weight_decay) elif opt.optimizer == 'adam' or opt.optimizer == 'amsgrad': crnn_optimizer = t.optim.Adam( filter(lambda p: p.requires_grad, crnn_model.parameters()), lr=opt.init_lr, amsgrad=True if opt.optimizer == 'amsgrad' else False, weight_decay=opt.weight_decay) distilled_crnn_optimizer = t.optim.Adam( filter(lambda p: p.requires_grad, distilled_crnn_model.parameters()), lr=opt.init_lr, amsgrad=True if opt.optimizer == 'amsgrad' else False, weight_decay=opt.weight_decay) else: raise Exception('No other optimizers!') crnn_lr_schedule = t.optim.lr_scheduler.MultiStepLR( crnn_optimizer, milestones=opt.lr_decay_epochs, gamma=opt.lr_decay_rate) distilled_lr_schedule = t.optim.lr_scheduler.MultiStepLR( distilled_crnn_optimizer, milestones=opt.lr_decay_epochs, gamma=opt.lr_decay_rate) _ = model_info(crnn_model) _ = model_info(distilled_crnn_model) train_crnn_loss = [] train_crnn_acc = [] best_crnn_acc = 0.5 #must have better accuracy than random guess of 0.5 train_distilled_crnn_loss = [] train_distilled_crnn_acc = [] best_distilled_crnn_acc = 0.5 #must have better accuracy than random guess of 0.5 cd_loss = [] lstm_loss = [] h_loss = [] c_loss = [] softloss = [] #Step 8 Update parameters# for epoch in tqdm.tqdm(range(opt.epochs)): print('Epoch : %d / %d.' % (epoch + 1, opt.epochs)) print('Current epoch learning rate for CRNN: ', crnn_optimizer.param_groups[0]['lr']) if opt.distilled: print('Current epoch learning rate for Distilled_CRNN: ', distilled_crnn_optimizer.param_groups[0]['lr']) epoch_crnn_acc = 0. epoch_distilled_crnn_acc = 0. count = 0 for i, (batch_x, index, path) in enumerate(dataloader): batch_x = batch_x.to(device) index = index.to(device) batch_x = batch_x.view(batch_x.size(0), opt.img_channels, opt.img_height, opt.img_width) crnn_optimizer.zero_grad() if not opt.multisteps: labels = get_batch_label(dataset, index) text, length = opt.converter.encode(labels) outputt, teachers, (hts, cts) = crnn_model(batch_x) #output has shape : [m, t, output_size] preds_size = [outputt.size(0)] * outputt.size( 1) #batch_size * time_steps batch_crnn_cost = ctc_loss( outputt, text.to(t.long).to(device), t.IntTensor(preds_size).to(t.long).to(device), length.to(t.long).to(device)) #ctc loss else: outputts, teachers, (htss, ctss) = crnn_model(batch_x) preds_size = [outputts[0].size(0)] * outputts[0].size( 1) #batch_size * time_steps batch_crnn_cost = 0. labels = get_batch_label(dataset, index, multisteps=opt.multisteps, num_rows=opt.num_rows) for step in range(len(outputts)): outputt = outputts[step] label = labels[step] text, length = opt.converter.encode(label) batch_crnn_cost += ctc_loss( outputt, text.to(t.long).to(device), t.IntTensor(preds_size).to(t.long).to(device), length.to(t.long).to(device)) #ctc loss batch_crnn_cost /= len(outputts) batch_crnn_cost.backward() crnn_optimizer.step() if opt.distilled: distilled_crnn_optimizer.zero_grad() if not opt.multisteps: outputs, students, (hss, css) = distilled_crnn_model(batch_x) #output has shape : [m, t, output_size] preds_size = [outputs.size(0)] * outputs.size( 1) #batch_size * time_steps else: outputss, students, (hsss, csss) = distilled_crnn_model(batch_x) preds_size = [outputss[0].size(0)] * outputss[0].size( 1) #batch_size * time_steps #1. CTC loss if not opt.multisteps: batch_distilled_crnn_cost = ctc_loss( outputs, text.to(t.long).to(device), t.IntTensor(preds_size).to(t.long).to(device), length.to(t.long).to(device)) else: batch_ctc_loss = 0. for step in range(len(outputss)): outputs = outputss[step] label = labels[step] text, length = opt.converter.encode(label) batch_ctc_loss += ctc_loss( outputs, text.to(t.long).to(device), t.IntTensor(preds_size).to(t.long).to(device), length.to(t.long).to(device)) batch_distilled_crnn_cost = batch_ctc_loss / ( len(outputss) * 1.) #2. cd loss count_ = 0 batch_cd_loss = 0. for teacher, student in zip(teachers, students): batch_cd_loss += t.mean(t.pow(teacher - student, 2)).to(device) count_ += 1 batch_cd_loss /= count_ batch_distilled_crnn_cost += opt.alpha * batch_cd_loss #3. lstm loss #3.1 H values count_ = 0 cur_lossh = 0. if not opt.multisteps: for ht, hs in zip(hts, hss): cur_lossh += t.mean(t.pow(ht - hs, 2)).to(device) count_ += 1 else: for hts, hss in zip(htss, hsss): cur_loss = 0. q = 0. for ht, hs in zip(hts, hss): cur_loss += t.mean(t.pow(ht - hs, 2)).to(device) q += 1. cur_lossh += cur_loss / q count_ += 1 cur_lossh /= count_ #3.2 C values cur_lossc = 0. count_ = 0 if not opt.multisteps: for ct, cs in zip(cts, css): cur_lossc += t.mean(t.pow(ct - cs, 2)).to(device) count_ += 1 else: for cts, css in zip(ctss, csss): cur_loss = 0. q = 0. for ct, cs in zip(cts, css): cur_loss += t.mean(t.pow(ct - cs, 2)).to(device) q += 1. cur_lossc += cur_loss / q count_ += 1 cur_lossc /= count_ batch_lstm_loss = (cur_lossc + cur_lossh) / 2. batch_distilled_crnn_cost += opt.beta * batch_lstm_loss #4. soft loss if not opt.multisteps: batch_softloss = -t.mean(t.sum(F.softmax(outputt.detach() / opt.temperature, dim = 1) * \ t.log(F.softmax(outputs / opt.temperature, dim = 1) + 1e-10), dim = 1)).to(device) else: batch_softloss = 0. for outputt, outputs in zip(outputts, outputss): batch_softloss += -t.mean(t.sum(F.softmax(outputt.detach() / opt.temperature, dim = 1) * \ t.log(F.softmax(outputs / opt.temperature, dim = 1) + 1e-10), dim = 1)).to(device) batch_softloss /= len(outputts) batch_distilled_crnn_cost += opt.gamma * batch_softloss batch_distilled_crnn_cost.backward() distilled_crnn_optimizer.step() if i % opt.batch_size == 0: count += 1 train_crnn_loss.append(batch_crnn_cost.item()) crnn_model.eval() batch_crnn_acc, predictions = cal_batch_acc(crnn_model, opt.converter, batch_x, labels, level=opt.level) print('\nCRNN samples predictions: ') print('=' * 30) print('Labels : ', label) print('*' * 20) print('Predictions : ', predictions) print('=' * 30) crnn_model.train() train_crnn_acc.append(batch_crnn_acc) if opt.distilled: train_distilled_crnn_loss.append( batch_distilled_crnn_cost.item()) cd_loss.append(opt.alpha * batch_cd_loss.item()) lstm_loss.append(opt.beta * batch_lstm_loss.item()) h_loss.append(opt.beta * cur_lossh.item()) c_loss.append(opt.beta * cur_lossc.item()) softloss.append(opt.gamma * batch_softloss.item()) distilled_crnn_model.eval() batch_distilled_crnn_acc, predictions = cal_batch_acc( distilled_crnn_model, opt.converter, batch_x, label, level=opt.level) print('=' * 50) print('Distilled CRNN samples predictions : ') print('=' * 30) print('Labels : ', label) print('*' * 20) print('Predictions : ', predictions) print('=' * 30) distilled_crnn_model.train() train_distilled_crnn_acc.append(batch_distilled_crnn_acc) print('\tCRNN : ') print('\tBatch %d has crnn cost : %.3f.|| Accuracy : ' % (i + 1, batch_crnn_cost.item()), end='') if isinstance(batch_crnn_acc, tuple): print( 'Character-level acc : %.2f%%; Image-level acc : %.2f%%.' % (batch_crnn_acc[0] * 100., batch_crnn_acc[1] * 100.)) combined_acc = ( 2. * batch_crnn_acc[0] * batch_crnn_acc[1]) / ( batch_crnn_acc[0] + batch_crnn_acc[1] + 1e-7) #f1 epoch_crnn_acc += combined_acc else: if opt.level == 'char': print('Character-level acc : %.2f%%.' % (batch_crnn_acc * 100.)) elif opt.level == 'whole': print('Image-level acc : %.2f%%.' % (batch_crnn_acc * 100.)) else: raise Exception('No other levels!') epoch_crnn_acc += batch_crnn_acc if opt.distilled: print('\tDistilled : ') print( '\tBatch %d has distilled crnn cost : %.3f.[softloss %3f & cd loss %.3f & lstm loss %.3f & h_loss %.3f & c_loss %.3f]. --> \n\t\tAccuracy : ' % (i + 1, batch_distilled_crnn_cost.item(), opt.gamma * batch_softloss.item(), opt.alpha * batch_cd_loss.item(), opt.beta * batch_lstm_loss.item(), opt.beta * cur_lossh.item(), opt.beta * cur_lossc.item()), end='') if isinstance(batch_distilled_crnn_acc, tuple): print( 'Character-level acc : %.2f%%; Image-level acc : %.2f%%.' % (batch_distilled_crnn_acc[0] * 100., batch_distilled_crnn_acc[1] * 100.)) combined_acc = (2. * batch_distilled_crnn_acc[0] * batch_distilled_crnn_acc[1]) / ( batch_distilled_crnn_acc[0] + batch_distilled_crnn_acc[1] + 1e-7 ) # f1 epoch_distilled_crnn_acc += combined_acc else: if opt.level == 'char': print('Character-level acc : %.2f%%.' % (batch_distilled_crnn_acc * 100.)) elif opt.level == 'whole': print('Image-level acc : %.2f%%.' % (batch_distilled_crnn_acc * 100.)) else: raise Exception('No other levels!') epoch_distilled_crnn_acc += batch_distilled_crnn_acc epoch_crnn_acc /= count epoch_distilled_crnn_acc /= count print('This epoch has crnn acc : {:.2f}%.'.format(epoch_crnn_acc * 100.)) if opt.save_best_model: if epoch % opt.save_best_model_iter == 0: if epoch_crnn_acc > best_crnn_acc: best_crnn_acc = epoch_crnn_acc t.save( crnn_model, './checkpoints/save_best_train_crnn_model_epoch_%d_%s.pkl' % (epoch + 1, opt.model_config)) else: print( 'This epoch has no improvement on training accuracy on crnn model, skipping saving the model!' ) if opt.distilled: print('This epoch has distilled crnn acc : {:.2f}%.'.format( epoch_distilled_crnn_acc * 100.)) if opt.save_best_model: if epoch % opt.save_best_model_iter == 0: if epoch_distilled_crnn_acc > best_distilled_crnn_acc: best_distilled_crnn_acc = epoch_distilled_crnn_acc t.save( distilled_crnn_model, './checkpoints/save_best_train_distilled_crnn_model_epoch_%d_%s.pkl' % (epoch + 1, opt.model_config)) else: print( 'This epoch has no improvement on training accuracy on distilled crnn model, skipping saving the model!' ) crnn_lr_schedule.step() distilled_lr_schedule.step() t.save(crnn_model, './checkpoints/final_crnn_model_%s.pkl' % opt.model_config) f, ax = plt.subplots(1, 2) f.suptitle('Useful statistics for CRNN') ax[0].plot(range(len(train_crnn_loss)), train_crnn_loss, label='CRNN training loss') ax[0].grid(True) ax[0].set_title('CRNN training loss') ax[0].legend(loc='best') if isinstance(train_crnn_acc[0], tuple): char_acc = [c_acc[0] for c_acc in train_crnn_acc] whole_acc = [c_acc[1] for c_acc in train_crnn_acc] ax[1].plot(range(len(char_acc)), char_acc, label='Character-level acc') ax[1].plot(range(len(whole_acc)), whole_acc, label='Image-level acc') else: if opt.level == 'char': ax[1].plot(range(len(train_crnn_acc)), train_crnn_acc, label='Character-level acc') elif opt.level == 'whole': ax[1].plot(range(len(train_crnn_acc)), train_crnn_acc, label='Image-level acc') else: raise Exception('No other levels!') ax[1].grid(True) ax[1].set_title('CRNN training acc') ax[1].legend(loc='best') plt.savefig('./results/training_crnn_statistics_%s.png' % opt.model_config) plt.close() if opt.distilled: t.save( distilled_crnn_model, './checkpoints/final_distilled_crnn_model_%s.pkl' % opt.model_config) f, ax = plt.subplots(1, 5) f.suptitle('Useful statistics for Distilled CRNN') ax[0].plot(range(len(train_distilled_crnn_loss)), train_distilled_crnn_loss, label='Distilled CRNN training loss') ax[0].grid(True) ax[0].set_title('Distilled CRNN training loss') ax[0].legend(loc='best') if isinstance(train_distilled_crnn_acc[0], tuple): char_acc = [c_acc[0] for c_acc in train_distilled_crnn_acc] whole_acc = [c_acc[1] for c_acc in train_distilled_crnn_acc] ax[1].plot(range(len(char_acc)), char_acc, label=' Character-level acc') ax[1].plot(range(len(whole_acc)), whole_acc, label='Image-level acc') else: if opt.level == 'char': ax[1].plot(range(len(train_distilled_crnn_acc)), train_distilled_crnn_acc, label='Character-level acc') elif opt.level == 'whole': ax[1].plot(range(len(train_distilled_crnn_acc)), train_distilled_crnn_acc, label='Image-level acc') else: raise Exception('No other levels!') ax[1].grid(True) ax[1].set_title('Distilled training acc') ax[1].legend(loc='best') ax[2].plot(range(len(cd_loss)), cd_loss, label='Distilled CRNN training cd loss') ax[2].grid(True) ax[2].set_title('Distilled CRNN training cd loss') ax[2].legend(loc='best') ax[3].plot(range(len(softloss)), softloss, label='Distilled CRNN training soft loss') ax[3].grid(True) ax[3].set_title('Distilled CRNN training soft loss') ax[3].legend(loc='best') ax[4].plot(range(len(lstm_loss)), lstm_loss, label='Distilled CRNN training lstm loss') ax[4].plot(range(len(h_loss)), h_loss, label='Distilled CRNN training lstm hidden loss') ax[4].plot(range(len(c_loss)), c_loss, label='Distilled CRNN training lstm cell loss') ax[4].grid(True) ax[4].set_title('Distilled CRNN training lstm loss') ax[4].legend(loc='best') plt.savefig('./results/training_distilled_crnn_statistics_%s.png' % opt.model_config) plt.close() print('Training is done!\n')