def test(opt, model, dataloader): # Logging logger = logging.Logger(opt.ckpt_path, opt.split) stats = logging.Statistics(opt.ckpt_path, opt.split) logger.log(opt) model.load(opt.load_ckpt_paths, opt.load_opts, opt.load_epoch) all_scores = [] video_names = [] for step, data in enumerate(dataloader, 1): inputs, label, vid_name = data info_acc, logits, scores = model.test(inputs, label, opt.timestep) all_scores.append(scores) video_names.append(vid_name[0]) update = stats.update(logits.shape[0], info_acc) if utils.is_due(step, opt.print_every): utils.info('step {}/{}: {}'.format(step, len(dataloader), update)) logger.log('[Summary] {}'.format(stats.summarize())) # Evaluate iou_thresholds = [0.1, 0.3, 0.5] groundtruth_dir = os.path.join(opt.dset_path, opt.dset, 'groundtruth', 'validation/cross-subject') assert os.path.exists(groundtruth_dir), '{} does not exist'.format( groundtruth_dir) mean_aps = calc_map(opt, all_scores, video_names, groundtruth_dir, iou_thresholds) for i in range(len(iou_thresholds)): logger.log('IoU: {}, mAP: {}'.format(iou_thresholds[i], mean_aps[i]))
def train(opt, model, dataloader): # Logging logger = logging.Logger(opt.ckpt_path, opt.split) stats = logging.Statistics(opt.ckpt_path, opt.split) logger.log(opt) model.load(opt.load_ckpt_paths, opt.load_opts, opt.load_epoch) for epoch in range(1, opt.n_epochs + 1): for step, data in enumerate(dataloader, 1): # inputs is a list of input of each modality inputs, label, _ = data ret = model.train(inputs, label) update = stats.update(len(label), ret) if utils.is_due(step, opt.print_every): utils.info('epoch {}/{}, step {}/{}: {}'.format( epoch, opt.n_epochs, step, len(dataloader), update)) logger.log('[Summary] epoch {}/{}: {}'.format(epoch, opt.n_epochs, stats.summarize())) if utils.is_due(epoch, opt.n_epochs, opt.save_every): model.save(epoch) stats.save() logger.log('***** saved *****') if utils.is_due(epoch, opt.lr_decay_at): lrs = model.lr_decay() logger.log('***** lr decay *****: {}'.format(lrs))
def init_config(conf): conf.is_finished = False assert conf.ptl in conf.model # configure the training device. assert conf.world is not None, "Please specify the gpu ids." conf.world = ([int(x) for x in conf.world.split(",")] if "," in conf.world else [int(conf.world)]) conf.n_sub_process = len(conf.world) # init the masking scheduler. conf.masking_scheduler_conf_ = (param_parser.dict_parser( conf.masking_scheduler_conf) if conf.masking_scheduler_conf is not None else None) if conf.masking_scheduler_conf is not None: for k, v in conf.masking_scheduler_conf_.items(): setattr(conf, f"masking_scheduler_{k}", v) # init the layers to mask. assert conf.layers_to_mask is not None, "Please specify which BERT layers to mask." conf.layers_to_mask_ = ([ int(x) for x in conf.layers_to_mask.split(",") ] if "," in conf.layers_to_mask else [int(conf.layers_to_mask)]) # init the params for structure pruning. if (conf.structured_masking is not None and conf.structured_masking_types is not None): conf.structured_masking_types_ = conf.structured_masking_types.split( ",") else: conf.structured_masking_types_ = None # init the params for do_tuning_on_MS_scheme if conf.do_tuning_on_MS: assert conf.do_tuning_on_MS_scheme is not None conf.do_tuning_on_MS_scheme_ = conf.do_tuning_on_MS_scheme.split(",") # re-configure batch_size if sub_process > 1. if conf.n_sub_process > 1: conf.batch_size = conf.batch_size * conf.n_sub_process # configure cuda related. assert torch.cuda.is_available() torch.manual_seed(conf.manual_seed) torch.cuda.manual_seed(conf.manual_seed) torch.cuda.set_device(conf.world[0]) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True if conf.train_fast else False # define checkpoint for logging. checkpoint.init_checkpoint(conf) # display the arguments' info. logging.display_args(conf) # configure logger. conf.logger = logging.Logger(conf.checkpoint_root)
def main(args): # s_ = time.time() save_dir = args.save_dir #模型存储位置 mkdir_if_missing(save_dir) #检查该存储文件是否可用/utils库 sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) #打印当前训练模型的参数 start = 0 model = models.create(args.net, pretrained = False , model_path = None, normalized = True) #@@@创建模型/ pretrained = true 将会去读取现有预训练模型/models文件中的函数 model = torch.nn.DataParallel(model) #使用torch进行模型的并行训练/分布 model = model.cuda() #使用GPU print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) #优化器 criterion = losses.create(args.loss, margin_same=args.margin_same, margin_diff=args.margin_diff).cuda() #TWConstrativeloss data = DataSet.create(name = args.data, root=args.data_root, set_name = args.set_name) #数据 set_name = "test" or "train" ; train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size,shuffle = True, drop_last=True, pin_memory=True, num_workers=args.nThreads) for epoch in range(start, 50): #args.epochs L = train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) losses_.append(L) if (epoch+1) % args.save_step == 0 or epoch==0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch+1), }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar')) # added batch_nums = range(1, len(losses_) + 1) import matplotlib.pyplot as plt plt.plot(batch_nums, losses_) plt.show()
def test_with_fields(self): """It should append the `extras` dictionary to the logger. """ logger = logging.Logger('app.testing', {'user': '******'}) logger.with_fields(pid=10) with self.assertLogs(logger.name, level=pylogging.INFO) as cm: logger.info('This is the first log.') logger.info('This is the second log.') self.assertEqual(cm.output, [ 'INFO:app.testing:(user: 12345) (pid: 10) This is the first log.', 'INFO:app.testing:(user: 12345) (pid: 10) This is the second log.', ])
def __init__(self, app_id): self.app_id = app_id super().__init__(application_id=self.app_id, flags=Gio.ApplicationFlags.HANDLES_COMMAND_LINE) self.connect('command-line', self.do_command_line) GLib.set_application_name(_("Good Old Mupen64+")) GLib.set_prgname('gom64p') #GLib.setenv("") #self.settings = Gio.Settings.new('org.mupen64plus.good-old-m64p') self.main = None self.args = None self.frontend_conf = None self.logger = u_log.Logger()
default=0e-3, help='try to make the last linear weight matrix to ' 'approximate the orthogonal matrix') args = parser.parse_args() print(args.nums) print(type(args.nums)) if args.log_dir is None: log_dir = os.path.join('checkpoints', args.loss) else: log_dir = os.path.join('checkpoints', args.log_dir) mkdir_if_missing(log_dir) # write log sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) # display information of current training print('train on dataset %s' % args.data) print('batch size is: %d' % args.BatchSize) print('num_instance is %d' % args.num_instances) print('dimension of the embedding space is %d' % args.dim) print('log dir is: %s' % args.log_dir) print('the network is : %s' % args.net) print('loss function for training is: %s' % args.loss) print('learn rate : %f' % args.lr) print('base parameter de learn rate is : %f' % args.base) print('the orthogonal weight regular is %f ' % args.orth_cof) # load pretrained models if args.r is not None:
default=4, type=float, help='triplet loss margin') parser.add_argument('--file_name', default='result', type=str, help='file name to save') parser.add_argument('--pmap', default=False, help='use part_map') parser.add_argument('--mat', default='', type=str, help='name for saving representation') opt = parser.parse_args() sys.stdout = logging.Logger( os.path.join( '/home/guojianyuan/ReID_Duke/' + opt.file_name + '/' + opt.name + '/', 'log.txt')) tripletloss = TripletLoss(opt.margin) gpu_ids = [] str_gpu_ids = opt.gpu_ids.split(',') for str_id in str_gpu_ids: gpu_ids.append(int(str_id)) torch.cuda.set_device(gpu_ids[0]) # Load Data if opt.pmap: transform_train_list = [ transforms.Resize((384, 128), interpolation=3), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
def train_fun(args, train_loader, feat_loader, current_task, fisher={}, prototype={}): log_dir = args.log_dir mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) model = models.create(args.net, Embed_dim=args.dim) # load part of the model if args.method == 'Independent' or current_task == 0: model_dict = model.state_dict() if args.net == 'resnet32': pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") pretrained_dict = torch.load( 'pretrained_models/Finetuning_0_task_0_200_model_task2_cifar100_seed1993.pkl', map_location=lambda storage, loc: storage, pickle_module=pickle) pretrained_dict = pretrained_dict.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items( ) if k in model_dict and 'fc' not in k} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) elif args.net == 'resnet18' and args.data == 'imagenet_sub': pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") pretrained_dict = torch.load( 'pretrained_models/Finetuning_0_task_0_200_model_task2_imagenet_sub_seed1993.pkl', map_location=lambda storage, loc: storage, pickle_module=pickle) pretrained_dict = pretrained_dict.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items( ) if k in model_dict and 'fc' not in k} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: print (' Oops! That was no valid models. ') if args.method != 'Independent' and current_task > 0: model = torch.load(os.path.join(log_dir, args.method + '_' + args.exp + '_task_' + str(current_task-1) + '_%d_model.pkl' % int(args.epochs-1))) model_old = deepcopy(model) model_old.eval() model_old = freeze_model(model_old) model = model.cuda() torch.save(model, os.path.join(log_dir, args.method + '_' + args.exp + '_task_' + str(current_task) + '_pre_model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [ {'params': base_params, 'lr_mult': 0.1}, {'params': new_params, 'lr_mult': 1.0}] criterion = losses.create(args.loss, margin=args.margin, num_instances=args.num_instances).cuda() optimizer = torch.optim.Adam( param_groups, lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=200, gamma=0.1) if args.data == 'cifar100' or args.data == 'imagenet_sub': if current_task > 0: model.eval() for epoch in range(args.start, args.epochs): running_loss = 0.0 running_lwf = 0.0 scheduler.step() for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() _, embed_feat = model(inputs) if current_task == 0: loss_aug = 0*torch.sum(embed_feat) else: if args.method == 'Finetuning' or args.method == 'Independent': loss_aug = 0*torch.sum(embed_feat) elif args.method == 'LwF': _, embed_feat_old = model_old(inputs) loss_aug = args.tradeoff * \ torch.sum((embed_feat-embed_feat_old).pow(2))/2. elif args.method == 'EWC' or args.method == 'MAS': loss_aug = 0 for (name, param), (_, param_old) in zip(model.named_parameters(), model_old.named_parameters()): loss_aug += args.tradeoff * \ torch.sum(fisher[name]*(param_old-param).pow(2))/2. if args.loss == 'MSLoss': loss = criterion(embed_feat, labels) inter_ = 0 dist_ap = 0 dist_an = 0 else: loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) loss += loss_aug loss.backward() optimizer.step() running_loss += loss.data[0] running_lwf += loss_aug.data[0] if epoch == 0 and i == 0: print(50*'#') print('Train Begin -- HA-HA-HA') print('[Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, running_lwf, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, args.method + '_' + args.exp + '_task_' + str(current_task) + '_%d_model.pkl' % epoch)) if args.method == 'EWC' or args.method == 'MAS': fisher = fisher_matrix_diag( model, criterion, train_loader, number_samples=500) return fisher
def main(args): # 训练日志保存 log_dir = os.path.join('checkpoints', args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net, Embed_dim=args.dim) # load part of the model model_dict = model.state_dict() # print(model_dict) if args.net == 'bn': pretrained_dict = torch.load( 'pretrained_models/bn_inception-239d2248.pth') else: pretrained_dict = torch.load( 'pretrained_models/inception_v3_google-1a9a5a14.pth') pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: # resume model model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) for epoch in range(args.start, args.epochs): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) if args.orth > 0: loss = orth_reg(model, loss, cof=args.orth) loss.backward() optimizer.step() running_loss += loss.data[0] if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') print( '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
def train_task(args, train_loader, current_task, prototype={}, pre_index=0): num_class_per_task = (args.num_class-args.nb_cl_fg) // args.num_task task_range = list(range(args.nb_cl_fg + (current_task - 1) * num_class_per_task, args.nb_cl_fg + current_task * num_class_per_task)) if num_class_per_task==0: pass # JT else: old_task_factor = args.nb_cl_fg // num_class_per_task + current_task - 1 log_dir = os.path.join(args.ckpt_dir, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log_task{}.txt'.format(current_task))) tb_writer = SummaryWriter(log_dir) display(args) if 'miniimagenet' in args.data: model = models.create('resnet18_imagenet', pretrained=False, feat_dim=args.feat_dim,embed_dim=args.num_class) elif 'cifar' in args.data: model = models.create('resnet18_cifar', pretrained=False, feat_dim=args.feat_dim,embed_dim=args.num_class) # mlp = ClassifierMLP() if current_task > 0: model = torch.load(os.path.join(log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model.pkl' % int(args.epochs - 1))) model_old = deepcopy(model) model_old.eval() model_old = freeze_model(model_old) # mlp_old = deepcopy(mlp) # mlp_old.eval() # mlp_old = freeze_model(mlp_old) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model = model.cuda() model = model.to(device) # mlp = mlp.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay) # optimizer_mlp = torch.optim.Adam(mlp.parameters(), lr=args.lr, weight_decay=args.weight_decay) # scheduler_mlp = StepLR(optimizer_mlp, step_size=args.lr_decay_step, gamma=args.lr_decay) loss_mse = torch.nn.MSELoss(reduction='sum') # # Loss weight for gradient penalty used in W-GAN lambda_gp = args.lambda_gp lambda_lwf = args.gan_tradeoff # Initialize generator and discriminator if current_task == 0: generator = Generator(feat_dim=args.feat_dim,latent_dim=args.latent_dim, hidden_dim=args.hidden_dim, class_dim=args.num_class) discriminator = Discriminator(feat_dim=args.feat_dim,hidden_dim=args.hidden_dim, class_dim=args.num_class) else: generator = torch.load(os.path.join(log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_generator.pkl' % int(args.epochs_gan - 1))) discriminator = torch.load(os.path.join(log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_discriminator.pkl' % int(args.epochs_gan - 1))) generator_old = deepcopy(generator) generator_old.eval() generator_old = freeze_model(generator_old) FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor # if args.learn_inner_lr: # learned_lrs = [] # for i in range(args.update_steps): # gen_lrs =[Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True)]*len(generator.parameters()) # # nway_lrs = [Variable(self.FloatTensor(1).fill_(self.update_lr), requires_grad=True)]*len(self.nway_net.parameters()) # discrim_lrs = [Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True)]*len(discriminator.parameters()) # learned_lrs.append((discrim_lrs, gen_lrs)) generator = generator.to(device) discriminator = discriminator.to(device) optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999)) # optimizer_lr = torch.optim.Adam(learned_lrs, lr=args.gan_lr, betas=(0.5, 0.999)) # scheduler_G = StepLR(optimizer_G, step_size=200, gamma=0.3) # scheduler_D = StepLR(optimizer_D, step_size=200, gamma=0.3) y_onehot = torch.FloatTensor(args.meta_batch_size, args.num_class) for p in generator.parameters(): # set requires_grad to False p.requires_grad = False if current_task>0: model = model.eval() for epoch in range(args.epochs): loss_log = {'C/loss': 0.0, 'C/loss_aug': 0.0, 'C/loss_cls': 0.0, 'C/loss_cls_q':0.0} scheduler.step() ##### MAML on feature extraction # db = DataLoader(mini, args.meta_batch_size, shuffle=True, num_workers=1, pin_memory=True) for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(train_loader): x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to(device), x_qry.to(device), y_qry.to(device) loss = torch.zeros(1).to(device) loss_cls = torch.zeros(1).to(device) loss_aug = torch.zeros(1).to(device) loss_tmp = torch.zeros(1).to(device) meta_batch_size, setsz, c_, h, w = x_spt.size() querysz = x_qry.size(1) losses_q = [0 for _ in range(args.update_step + 1)] # losses_q[i] is the loss on step i corrects = [0 for _ in range(args.update_step + 1)] for i in range(args.meta_batch_size): # 1. run the i-th task and compute loss for k=0 embed_feat = model(x_spt[i]) # $$$$$$$$$$$$$$$$ if current_task == 0: soft_feat = model.embed(embed_feat) # y_pred = mlp(soft_feat) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i]) loss += loss_cls else: embed_feat_old = model_old(x_spt[i]) ### Feature Extractor Loss if current_task > 0: loss_aug = torch.dist(embed_feat, embed_feat_old , 2) # loss_tmp += args.tradeoff * loss_aug * old_task_factor loss += args.tradeoff * loss_aug * old_task_factor ### Replay and Classification Loss if current_task > 0: embed_sythesis = [] embed_label_sythesis = [] ind = list(range(len(pre_index))) if args.mean_replay: for _ in range(setsz): np.random.shuffle(ind) tmp = prototype['class_mean'][ind[0]]+np.random.normal()*prototype['class_std'][ind[0]] embed_sythesis.append(tmp) embed_label_sythesis.append(prototype['class_label'][ind[0]]) embed_sythesis = np.asarray(embed_sythesis) embed_label_sythesis=np.asarray(embed_label_sythesis) embed_sythesis = torch.from_numpy(embed_sythesis).to(device) embed_label_sythesis = torch.from_numpy(embed_label_sythesis) else: for _ in range(setsz): np.random.shuffle(ind) embed_label_sythesis.append(pre_index[ind[0]]) embed_label_sythesis = np.asarray(embed_label_sythesis) embed_label_sythesis = torch.from_numpy(embed_label_sythesis) y_onehot.zero_() y_onehot.scatter_(1, embed_label_sythesis[:, None], 1) syn_label_pre = y_onehot.to(device) z = torch.Tensor(np.random.normal(0, 1, (setsz, args.latent_dim))).to(device) embed_sythesis = generator(z, syn_label_pre) embed_sythesis = torch.cat((embed_feat,embed_sythesis)) embed_label_sythesis = torch.cat((y_spt[i],embed_label_sythesis.to(device))) soft_feat_syt = model.embed(embed_sythesis) batch_size1 = inputs1.shape[0] batch_size2 = embed_feat.shape[0] # soft_feat_syt = mlp(soft_feat_syt) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1]) loss_cls_old = torch.nn.CrossEntropyLoss()(soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:]) loss_cls += loss_cls_old * old_task_factor loss_cls /= args.nb_cl_fg // num_class_per_task + current_task loss += loss_cls # $$$$$$$$$$$$$$$$ # loss = F.cross_entropy(embed_feat, y_spt[i]) grad = torch.autograd.grad(loss, model.parameters(),create_graph=True, retain_graph=True) # fast_weights = list(map(lambda p: p[1] - args.update_lr * p[0], zip(grad, model.parameters()))) fast_weights_dict = fast_weights(grad,model.state_dict(),args.update_lr) # this is the loss and accuracy before first update with torch.no_grad(): # [setsz, nway] embed_feat_q = model(x_qry[i]) soft_feat_q = model.embed(embed_feat_q) # y_pred_q = mlp(soft_feat_q) loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i]) # loss_q = F.cross_entropy(embed_feat_q, y_qry[i]) # loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i]) losses_q[0] += loss_q pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1) correct = torch.eq(pred_q, y_qry[i]).sum().item() corrects[0] = corrects[0] + correct # this is the loss and accuracy after the first update with torch.no_grad(): # [setsz, nway] model.load_state_dict(fast_weights_dict) embed_feat_q = model(x_qry[i]) soft_feat_q = model.embed(embed_feat_q) # y_pred_q = mlp(soft_feat_q) loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i]) # loss_q = torch.nn.cross_entropy(soft_feat_q, y_qry[i]) losses_q[1] += loss_q # [setsz] pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1) correct = torch.eq(pred_q, y_qry[i]).sum().item() corrects[1] = corrects[1] + correct for k in range(1, args.update_step): # 1. run the i-th task and compute loss for k=1~K-1 model.load_state_dict(fast_weights_dict) embed_feat = model(x_spt[i]) # loss = torch.nn.cross_entropy(embed_feat, y_spt[i]) loss = torch.zeros(1).to(device) if current_task>0: embed_feat_old = model_old(x_spt[i]) loss_aug = torch.dist(embed_feat, embed_feat_old , 2) loss += args.tradeoff * loss_aug * old_task_factor soft_feat_syt = model.embed(embed_sythesis) batch_size1 = inputs1.shape[0] batch_size2 = embed_feat.shape[0] # soft_feat_syt = mlp(soft_feat_syt) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1]) loss_cls_old = torch.nn.CrossEntropyLoss()(soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:]) loss_cls += loss_cls_old * old_task_factor loss_cls /= args.nb_cl_fg // num_class_per_task + current_task loss += loss_cls else: soft_feat = model.embed(embed_feat) # y_pred = mlp(soft_feat) # loss_cls = torch.nn.CrossEntropyLoss()(y_pred, y_spt[i]) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i]) loss += loss_cls # 2. compute grad on theta_pi grad = torch.autograd.grad(loss, model.parameters(),create_graph=True, retain_graph=True) # 3. theta_pi = theta_pi - train_lr * grad # fast_weights = list(map(lambda p: p[1] - args.update_lr * p[0], zip(grad, fast_weights))) fast_weights_dict = fast_weights(grad,model.state_dict(),args.update_lr) model.load_state_dict(fast_weights_dict) embed_feat_q = model(x_qry[i]) soft_feat_q = model.embed(embed_feat_q) # loss_q will be overwritten and just keep the loss_q on last update step. # soft_feat_q = mlp(soft_feat_q) loss_q = torch.nn.cross_entropy(soft_feat_q, y_qry[i]) losses_q[k + 1] += loss_q with torch.no_grad(): pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1) correct = torch.eq(pred_q, y_qry[i]).sum().item() # convert to numpy corrects[k + 1] = corrects[k + 1] + correct # end of all tasks # sum over all losses on query set across all tasks loss_q = losses_q[-1] / meta_batch_size # loss += loss_q # optimize theta parameters optimizer.zero_grad() # optimizer_mlp.zero_grad() # loss.backward() loss_q.backward() # print('meta update') # for p in self.net.parameters()[:5]: # print(torch.norm(p).item()) optimizer.step() # optimizer_mlp.step() accs = np.array(corrects) / (querysz * meta_batch_size) loss_log['C/loss'] += loss.item() loss_log['C/loss_cls'] += loss_cls.item() loss_log['C/loss_aug'] += args.tradeoff*loss_aug.item() if args.tradeoff != 0 else 0 loss_log['C/loss_cls_q'] += loss_q.item() del loss_cls del loss_q if epoch == 0 and i == 0: print(50 * '#')
parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--z_dim', type=int, default=100) parser.add_argument('--lr_adam', type=float, default=2e-4) parser.add_argument('--lr_rmsprop', type=float, default=2e-4) parser.add_argument('--beta1', type=float, default=0.5, help='for adam') parser.add_argument('--slope', type=float, default=0.2, help='for leaky ReLU') parser.add_argument('--std', type=float, default=0.02, help='for weight') parser.add_argument('--dropout', type=float, default=0.2) parser.add_argument('--clamp', type=float, default=1e-2) parser.add_argument('--wasserstein', type=bool, default=False) opt = parser.parse_args() if opt.clean_ckpt: shutil.rmtree(opt.ckpt_path) os.makedirs(opt.ckpt_path, exist_ok=True) logger = logging.Logger(opt.ckpt_path) opt.seed = 1 torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) cudnn.benchmark = True EPS = 1e-12 transform = transforms.Compose([transforms.Scale(opt.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset = dset.CIFAR10(root=opt.dataset_path, train=True, download=False, transform=transform) data_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) D = model.Discriminator(opt).cuda() G = model.Generator(opt).cuda()
def main(args): s_ = time.time() # 训练日志保存 save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) if args.r is None: model_dict = model.state_dict() # orthogonal init if args.init == 'orth': w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.orthogonal_(w) else: print('initialize the FC layer kai-ming-ly') w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.kaiming_normal_( w) # zero bias model_dict['classifier.0.bias'] = torch.zeros(args.dim) model.load_state_dict(model_dict) else: # resume model chk_pt = load_checkpoint(args.r) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() # freeze BN if args.BN == 1: print(40 * '#', 'BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'center-nca': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'cluster-nca': criterion = losses.create(args.loss, alpha=args.alpha, beta=args.beta).cuda() elif args.loss == 'neighbour': criterion = losses.create(args.loss, k=args.k, margin=args.margin).cuda() elif args.loss == 'nca': criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() elif args.loss == 'triplet': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'bin' or args.loss == 'ori_bin': criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha) else: criterion = losses.create(args.loss).cuda() # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, root=None) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() for epoch in range(start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 running_pos = 0.0 running_neg = 0.0 if epoch == 1: optimizer.param_groups[0]['lr_mul'] = 0.1 if (epoch == 1000 and args.data == 'car') or \ (epoch == 550 and args.data == 'cub') or \ (epoch == 100 and args.data in ['shop', 'jd']): param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=0.1 * args.lr, weight_decay=args.weight_decay) for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) # decor_loss = Decor_loss(embed_feat) # loss += args.theta * decor_loss if not type(loss) == torch.Tensor: print('One time con not back-ward') continue loss.backward() optimizer.step() running_loss += loss.item() running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --') loss_list.append(running_loss) pos_list.append(running_pos / (i + 1)) neg_list.append(running_neg / (i + 1)) print( '[Epoch %03d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss / (i + 1), inter_, dist_ap, dist_an)) if (epoch + 1) % args.save_step == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar')) np.savez(os.path.join(save_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list) t = time.time() - s_ print('training takes %.2f hour' % (t / 3600))
def main(args): # s_ = time.time() print(torch.cuda.get_device_properties(device=0).total_memory) torch.cuda.empty_cache() print(args) save_dir = args.save_dir mkdir_if_missing(save_dir) num_txt = len(glob.glob(save_dir + "/*.txt")) sys.stdout = logging.Logger( os.path.join(save_dir, "log_" + str(num_txt) + ".txt")) display(args) start = 0 model = models.create(args.net, pretrained=args.pretrained, dim=args.dim, self_supervision_rot=args.self_supervision_rot) all_pretrained = glob.glob(save_dir + "/*.pth.tar") if (args.resume is None) or (len(all_pretrained) == 0): model_dict = model.state_dict() else: # resume model all_pretrained_epochs = sorted( [int(x.split("/")[-1][6:-8]) for x in all_pretrained]) args.resume = os.path.join( save_dir, "ckp_ep" + str(all_pretrained_epochs[-1]) + ".pth.tar") print('load model from {}'.format(args.resume)) chk_pt = load_checkpoint(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() fake_centers_dir = os.path.join(args.save_dir, "fake_center.npy") if np.sum(["train_1.txt" in x for x in glob.glob(args.save_dir + "/**/*")]) == 0: if args.rot_only: create_fake_labels(None, None, args) else: data = dataset.Dataset(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root, self_supervision_rot=0, mode="test", rot_bt=args.rot_bt, corruption=args.corruption, args=args) fake_train_loader = torch.utils.data.DataLoader( data.train, batch_size=100, shuffle=False, drop_last=False, pin_memory=True, num_workers=args.nThreads) train_feature, train_labels = extract_features( model, fake_train_loader, print_freq=1e5, metric=None, pool_feature=args.pool_feature, org_feature=True) create_fake_labels(train_feature, train_labels, args) del train_feature fake_centers = "k-means++" torch.cuda.empty_cache() elif os.path.exists(fake_centers_dir): fake_centers = np.load(fake_centers_dir) else: fake_centers = "k-means++" time.sleep(60) model.train() # freeze BN if (args.freeze_BN is True) and (args.pretrained): print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_rot_param_ids = set() if args.self_supervision_rot: new_rot_param_ids = set( map(id, model.module.classifier_rot.parameters())) print(new_rot_param_ids) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] new_rot_params = [ p for p in model.module.parameters() if id(p) in new_rot_param_ids ] base_params = [ p for p in model.module.parameters() if (id(p) not in new_param_ids) and (id(p) not in new_rot_param_ids) ] param_groups = [{ 'params': base_params }, { 'params': new_params }, { 'params': new_rot_params, 'lr': args.rot_lr }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha, beta=args.beta, base=args.loss_base).cuda() data = dataset.Dataset(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.save_dir, self_supervision_rot=args.self_supervision_rot, rot_bt=args.rot_bt, corruption=1, args=args) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information for epoch in range(start, args.epochs): train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar')) if ((epoch + 1) % args.up_step == 0) and (not args.rot_only): # rewrite train_1.txt file data = dataset.Dataset(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root, self_supervision_rot=0, mode="test", rot_bt=args.rot_bt, corruption=args.corruption, args=args) fake_train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=args.nThreads) train_feature, train_labels = extract_features( model, fake_train_loader, print_freq=1e5, metric=None, pool_feature=args.pool_feature, org_feature=(args.dim % 64 != 0)) fake_centers = create_fake_labels(train_feature, train_labels, args, init_centers=fake_centers) del train_feature torch.cuda.empty_cache() time.sleep(60) np.save(fake_centers_dir, fake_centers) # reload data data = dataset.Dataset( args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.save_dir, self_supervision_rot=args.self_supervision_rot, rot_bt=args.rot_bt, corruption=1, args=args) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler( data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # test on testing data # extract_recalls(data=args.data, data_root=args.data_root, width=args.width, net=args.net, checkpoint=None, # dim=args.dim, batch_size=args.batch_size, nThreads=args.nThreads, pool_feature=args.pool_feature, # gallery_eq_query=args.gallery_eq_query, model=model) model.train() if (args.freeze_BN is True) and (args.pretrained): print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval)
# -*- coding: utf-8 -*- import json import utils.logging as logging from django.core.files.storage import default_storage from django.core.files.base import ContentFile from utils.gzip import GzipFile, gunzip_bytes, is_gzipped from utils.encoders import DjangoPartialModelJsonEncoder logger = logging.Logger(__name__) def get_object(prefix): """Retrieve an object from S3 and load into memory. """ with default_storage.open(prefix) as fo: content = fo.read() if is_gzipped(content): content = gunzip_bytes(content) if isinstance(content, (bytes,)): content = content.decode() return json.loads(content) def put_object(prefix, content, gzipped=True): """Load a blob into S3. """ FileClass = GzipFile if gzipped else ContentFile
def main(args): print(args.p_lambda) save_dir = args.save_dir mkdir_if_missing(save_dir) print("DRO:", args.DRO) # sys.stdout: output from console # sys.stderr: exceptions from python sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) #sys.stdout --> 'log.txt' sys.stderr = logging.Logger(os.path.join(save_dir, 'error.txt')) #sys.stderr --> 'error.txt' display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) save_checkpoint({ 'state_dict': model.state_dict(), 'epoch': 0, }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep'+ str(start) + '.pth.tar')) # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) chk_pt = load_checkpoint(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() # freeze BN if args.freeze_BN is True: print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) # m represents default layers. else: print(40*'#', 'BatchNorm NOT frozen') optimizer = torch.optim.Adam(model.module.parameters(), lr=args.lr, weight_decay=args.weight_decay) print("--------------------------:", args.p_lambda) criterion = DRO.create(args.DRO, loss = args.loss, margin=args.margin, alpha=args.alpha, beta = args.beta, p_lambda = args.p_lambda, p_lambda_neg = args.p_lambda_neg, K = args.K, select_TOPK_all = args.select_TOPK_all, p_choice = args.p_choice, truncate_p = args.truncate_p).cuda() # Decor_loss = losses.create('decode').cuda() print("Train, RAE:", args.mode) data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root, RAE=args.mode) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information for epoch in range(start, args.epochs): train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if epoch == 1: optimizer.param_groups[0]['lr_mul'] = 0.1 if (epoch+1) % args.save_step == 0 or epoch==0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch+1), }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def main(args): num_class_dict = {'cub': int(100), 'car': int(98)} # 训练日志保存 log_dir = os.path.join(args.checkpoints, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net, Embed_dim=args.dim) # load part of the model model_dict = model.state_dict() # print(model_dict) if args.net == 'bn': pretrained_dict = torch.load('pretrained_models/bn_inception-239d2248.pth') else: pretrained_dict = torch.load('pretrained_models/inception_v3_google-1a9a5a14.pth') pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) # orth init if args.init == 'orth': print('initialize the FC layer orthogonally') _, _, v = torch.svd(model_dict['Embed.linear.weight']) model_dict['Embed.linear.weight'] = v.t() # zero bias model_dict['Embed.linear.bias'] = torch.zeros(args.dim) model.load_state_dict(model_dict) else: # resume model model = torch.load(args.r) model = model.cuda() # compute the cluster centers for each class here def normalize(x): norm = x.norm(dim=1, p=2, keepdim=True) x = x.div(norm.expand_as(x)) return x data = DataSet.create(args.data, root=None, test=False) if args.center_init == 'cluster': data_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, shuffle=False, drop_last=False) features, labels = extract_features(model, data_loader, print_freq=32, metric=None) features = [feature.resize_(1, args.dim) for feature in features] features = torch.cat(features) features = features.numpy() labels = np.array(labels) centers, center_labels = cluster_(features, labels, n_clusters=args.n_cluster) center_labels = [int(center_label) for center_label in center_labels] centers = Variable(torch.FloatTensor(centers).cuda(), requires_grad=True) center_labels = Variable(torch.LongTensor(center_labels)).cuda() print(40*'#', '\n Clustering Done') else: center_labels = int(args.n_cluster) * list(range(num_class_dict[args.data])) center_labels = Variable(torch.LongTensor(center_labels).cuda()) centers = normalize(torch.rand(num_class_dict[args.data]*args.n_cluster, args.dim)) centers = Variable(centers.cuda(), requires_grad=True) torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [ {'params': base_params, 'lr_mult': 0.1}, {'params': new_params, 'lr_mult': 1.0}, {'params': centers, 'lr_mult': 1.0}] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) cluster_counter = np.zeros([num_class_dict[args.data], args.n_cluster]) criterion = losses.create(args.loss, alpha=args.alpha, centers=centers, center_labels=center_labels, cluster_counter=cluster_counter).cuda() # random sampling to generate mini-batch train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, shuffle=True, drop_last=False) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() # _mask = Variable(torch.ByteTensor(np.ones([2, 4]))).cuda() dtype = torch.ByteTensor _mask = torch.ones(int(num_class_dict[args.data]), args.n_cluster).type(dtype) _mask = Variable(_mask).cuda() for epoch in range(args.start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 running_pos = 0.0 running_neg = 0.0 to_zero(cluster_counter) for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() # centers.zero_grad() embed_feat = model(inputs) # update network weight loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels, _mask) loss.backward() optimizer.step() centers.data = normalize(centers.data) running_loss += loss.data[0] running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') if i % 10 == 9: print('[Epoch %05d Iteration %2d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, i+1, loss.data[0], inter_, dist_ap, dist_an)) # cluster number counter show here print(cluster_counter) loss_list.append(running_loss) pos_list.append(running_pos / i) neg_list.append(running_neg / i) # update the _mask to make the cluster with only 1 or no member to be silent # _mask = Variable(torch.FloatTensor(cluster_counter) > 1).cuda() # cluster_distribution = torch.sum(_mask, 1).cpu().data.numpy().tolist() # print(cluster_distribution) # print('[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' # % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list)
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu dir = '%s_%s_dis_%s_%s_%s_%0.2f_%s' % (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda, args.lr) log_dir = os.path.join('checkpoints', dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) # Teacher Netowrk if args.r is None: Network_T = args.TNet model_T = models.create(Network_T, Embed_dim=args.dim) model_dict_T = model_T.state_dict() if args.data == 'cub': model_T = torch.load('checkpoints/cub_Tmodel.pkl') elif args.data == 'car': model_T = torch.load('checkpoints/car_Tmodel.pkl') elif args.data == 'product': model_T = torch.load('checkpoints/product_Tmodel.pkl') else: model_T = torch.load(args.r) model_T = model_T.cuda() model_T.eval() # Student network if args.r is None: model = models.create(args.net, Embed_dim=args.dim) model_dict = model.state_dict() if args.net == 'bn': pretrained_dict = torch.load( 'pretrained_models/bn_inception-239d2248.pth') elif args.net == 'resnet101': pretrained_dict = torch.load( 'pretrained_models/resnet101-5d3b4d8f.pth') elif args.net == 'resnet50': pretrained_dict = torch.load( 'pretrained_models/resnet50-19c8e357.pth') elif args.net == 'resnet34': pretrained_dict = torch.load( 'pretrained_models/resnet34-333f7ec4.pth') elif args.net == 'resnet18': pretrained_dict = torch.load( 'pretrained_models/resnet18-5c106cde.pth') elif args.net == 'inception': pretrained_dict = torch.load( 'pretrained_models/inception_v3_google-1a9a5a14.pth') else: print(' Oops! That was no valid models. ') pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: model = torch.load(args.r) if args.continue_train: model = torch.load(log_dir + '/%d_model.pkl' % (args.start)) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'knnsoftmax': criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() else: criterion = losses.create(args.loss).cuda() data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) loss_log = [] for i in range(3): loss_log.append([]) loss_dis = [] for i in range(3): loss_dis.append([]) for epoch in range(args.start, args.epochs): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) embed_feat_T = model_T(inputs) loss_net, inter_, dist_ap, dist_an, dis_pos, dis_neg, dis = criterion( embed_feat, labels) loss_net_T, inter_T, dist_ap_T, dist_an_T, dis_pos_T, dis_neg_T, dis_T = criterion( embed_feat_T, labels) lamda = args.lamda if args.Ttype == 'relative': loss_dis[0].append( torch.mean(torch.norm(dis - dis_T, p=2)).data[0]) loss_dis[1].append(0.0) loss_dis[2].append(0.0) loss_distillation = 0.0 * torch.mean( F.pairwise_distance(embed_feat, embed_feat_T)) loss_distillation += torch.mean(torch.norm(dis - dis_T, p=2)) loss = loss_net + lamda * loss_distillation elif args.Ttype == 'absolute': loss_dis[0].append(0.0) loss_dis[1].append(0.0) loss_dis[2].append( torch.mean(F.pairwise_distance(embed_feat, embed_feat_T)).data[0]) loss_distillation = torch.mean( F.pairwise_distance(embed_feat, embed_feat_T)) loss = loss_net + lamda * loss_distillation else: print('This type does not exist') loss.backward() optimizer.step() running_loss += loss.data[0] loss_log[0].append(loss.data[0]) loss_log[1].append(loss_net.data[0]) loss_log[2].append(lamda * loss_distillation.data[0]) if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') print( '[Epoch %05d]\t Loss_net: %.3f \t Loss_distillation: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, loss_net, lamda * loss_distillation, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) #plot loss line1, = plt.plot( loss_log[0], 'r-', label="Total loss", ) line2, = plt.plot(loss_log[1], 'b-', label="KNNsoftmax loss") line3, = plt.plot(loss_log[2], 'g--', label="Distillation loss") plt.title( '%s_%s_dis_%s_%s_%s_%0.2f' % (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda)) plt.legend([line1, line2, line3], ['Total loss', 'Contrastive loss', 'Distance loss']) plt.savefig( './fig/%s_%s_dis_%s_%s_%s_%0.2f.jpg' % (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda))
def main(args): s_ = time.time() # 训练日志保存 log_dir = args.log_dir mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net) model = load_parameter(model) else: # resume model print('Resume from model at Epoch %d' % args.start) model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set.union( set(map(id, model.Embedding.parameters())), set(map(id, model.attention_blocks.parameters()))) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'bin': criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha).cuda() Div = losses.create('div').cuda() else: criterion = losses.create(args.loss).cuda() data = DataSet.create(args.data, root=None) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() for epoch in range(args.start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 divergence = 0.0 running_pos = 0.0 running_neg = 0.0 if epoch == 2: param_groups[0]['lr_mult'] = 0.1 for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) div = Div(embed_feat) loss_ = loss + args.theta * div if not type(loss) == torch.Tensor: print('One time con not back-ward') continue loss_.backward() optimizer.step() running_loss += loss.item() divergence += div.item() running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --') loss_list.append(running_loss) pos_list.append(running_pos / i) neg_list.append(running_neg / i) print( '[Epoch %05d]\t Loss: %.2f \t Divergence: %.2f \t Accuracy: %.2f \t Pos-Dist: %.2f \t Neg-Dist: %.2f' % (epoch + 1, running_loss, divergence, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list) t = time.time() - s_ print('training takes %.2f hour' % (t / 3600))
def main(args): # s_ = time.time() save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) # model_frozen = models.create(args.net, pretrained=True, dim=args.dim) # # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) model_dict = model.state_dict() model_dict_frozen = model_frozen.state_dict() chk_pt = torch.load(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] pretrained_dict = {k: v for k, v in weight.items() if k in model_dict} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) pretrained_dict_frozen = { k: v for k, v in weight.items() if k in model_dict_frozen } model_dict_frozen.update(pretrained_dict_frozen) model_frozen.load_state_dict(model_dict_frozen) model_frozen.eval() model = torch.nn.DataParallel(model) model = model.cuda() model_frozen = torch.nn.DataParallel(model_frozen) model_frozen = model_frozen.cuda() # freeze BN if args.freeze_BN is True: print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids_fc_layer = set(map(id, model.module.fc_layer.parameters())) new_param_ids = new_param_ids_fc_layer new_params_fc = [ p for p in model.module.parameters() if id(p) in new_param_ids_fc_layer ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] frozen_params = [p for p in model_frozen.module.parameters() ] # frozen the model, but with learning_rate = 0.0 for p in frozen_params: p.requires_grad = False # if fine-tune basenetwork, then lr_mult: 0.1. if lr_mult=0.0, then the basenetwork is not updated param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params_fc, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion_loss = losses.create(args.loss, margin=args.margin, alpha=args.alpha, base=args.loss_base).cuda() CE_loss = nn.CrossEntropyLoss().cuda() l2_loss = L2Norm().cuda() similarity_loss = Similarity_preserving().cuda() criterion = [criterion_loss, CE_loss, l2_loss, similarity_loss] # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information best_accuracy = 0 model_list = [model, model_frozen] if args.Incremental_flag == False: print( "######################This is non-incremental learning! ########################" ) if args.Incremental_flag == True: print( "#########################This is incremental learning! #########################" ) else: NotImplementedError() for epoch in range(start, args.epochs): accuracy = train(epoch=epoch, model=model_list, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict( ) # save the parameters from updated model else: state_dict = model.state_dict() is_best = accuracy > best_accuracy best_accuracy = max(accuracy, best_accuracy) save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def main(args): # s_ = time.time() save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) chk_pt = load_checkpoint(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() # freeze BN if args.freeze_BN is True: print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha, base=args.loss_base).cuda() # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information for epoch in range(start, args.epochs): train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if epoch == 1: optimizer.param_groups[0]['lr_mul'] = 0.1 if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def train_task(args, train_loader, current_task, prototype={}, pre_index=0): num_class_per_task = (args.num_class - args.nb_cl_fg) // args.num_task task_range = list( range(args.nb_cl_fg + (current_task - 1) * num_class_per_task, args.nb_cl_fg + current_task * num_class_per_task)) if num_class_per_task == 0: pass # JT else: old_task_factor = args.nb_cl_fg // num_class_per_task + current_task - 1 log_dir = os.path.join(args.ckpt_dir, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger( os.path.join(log_dir, 'log_task{}.txt'.format(current_task))) tb_writer = SummaryWriter(log_dir) display(args) # One-hot encoding or attribute encoding if 'imagenet' in args.data: model = models.create('resnet18_imagenet', pretrained=False, feat_dim=args.feat_dim, embed_dim=args.num_class) elif 'cifar' in args.data: model = models.create('resnet18_cifar', pretrained=False, feat_dim=args.feat_dim, embed_dim=args.num_class) if current_task > 0: model = torch.load( os.path.join( log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model.pkl' % int(args.epochs - 1))) model_old = deepcopy(model) model_old.eval() model_old = freeze_model(model_old) model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay) loss_mse = torch.nn.MSELoss(reduction='sum') # Loss weight for gradient penalty used in W-GAN lambda_gp = args.lambda_gp lambda_lwf = args.gan_tradeoff # Initialize generator and discriminator if current_task == 0: generator = Generator(feat_dim=args.feat_dim, latent_dim=args.latent_dim, hidden_dim=args.hidden_dim, class_dim=args.num_class) discriminator = Discriminator(feat_dim=args.feat_dim, hidden_dim=args.hidden_dim, class_dim=args.num_class) else: generator = torch.load( os.path.join( log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_generator.pkl' % int(args.epochs_gan - 1))) discriminator = torch.load( os.path.join( log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_discriminator.pkl' % int(args.epochs_gan - 1))) generator_old = deepcopy(generator) generator_old.eval() generator_old = freeze_model(generator_old) generator = generator.cuda() discriminator = discriminator.cuda() optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999)) scheduler_G = StepLR(optimizer_G, step_size=200, gamma=0.3) scheduler_D = StepLR(optimizer_D, step_size=200, gamma=0.3) # Y_onehot is used to generate one-hot encoding y_onehot = torch.FloatTensor(args.BatchSize, args.num_class) for p in generator.parameters(): # set requires_grad to False p.requires_grad = False ###############################################################Feature extractor training#################################################### if current_task > 0: model = model.eval() for epoch in range(args.epochs): loss_log = {'C/loss': 0.0, 'C/loss_aug': 0.0, 'C/loss_cls': 0.0} scheduler.step() for i, data in enumerate(train_loader, 0): inputs1, labels1 = data inputs1, labels1 = inputs1.cuda(), labels1.cuda() loss = torch.zeros(1).cuda() loss_cls = torch.zeros(1).cuda() loss_aug = torch.zeros(1).cuda() optimizer.zero_grad() inputs, labels = inputs1, labels1 #! ### Classification loss embed_feat = model(inputs) if current_task == 0: soft_feat = model.embed(embed_feat) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, labels) loss += loss_cls else: embed_feat_old = model_old(inputs) ### Feature Extractor Loss if current_task > 0: loss_aug = torch.dist(embed_feat, embed_feat_old, 2) loss += args.tradeoff * loss_aug * old_task_factor ### Replay and Classification Loss if current_task > 0: embed_sythesis = [] embed_label_sythesis = [] ind = list(range(len(pre_index))) if args.mean_replay: for _ in range(args.BatchSize): np.random.shuffle(ind) tmp = prototype['class_mean'][ ind[0]] + np.random.normal( ) * prototype['class_std'][ind[0]] embed_sythesis.append(tmp) embed_label_sythesis.append( prototype['class_label'][ind[0]]) embed_sythesis = np.asarray(embed_sythesis) embed_label_sythesis = np.asarray(embed_label_sythesis) embed_sythesis = torch.from_numpy(embed_sythesis).cuda() embed_label_sythesis = torch.from_numpy( embed_label_sythesis) else: for _ in range(args.BatchSize): np.random.shuffle(ind) embed_label_sythesis.append(pre_index[ind[0]]) embed_label_sythesis = np.asarray(embed_label_sythesis) embed_label_sythesis = torch.from_numpy( embed_label_sythesis) y_onehot.zero_() y_onehot.scatter_(1, embed_label_sythesis[:, None], 1) syn_label_pre = y_onehot.cuda() z = torch.Tensor( np.random.normal( 0, 1, (args.BatchSize, args.latent_dim))).cuda() embed_sythesis = generator(z, syn_label_pre) embed_sythesis = torch.cat((embed_feat, embed_sythesis)) embed_label_sythesis = torch.cat( (labels, embed_label_sythesis.cuda())) soft_feat_syt = model.embed(embed_sythesis) # real samples, exemplars, synthetic samples # batch_size1 batch_size2 batch_size1 = inputs1.shape[0] batch_size2 = embed_feat.shape[0] loss_cls = torch.nn.CrossEntropyLoss()( soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1]) loss_cls_old = torch.nn.CrossEntropyLoss()( soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:]) loss_cls += loss_cls_old * old_task_factor loss_cls /= args.nb_cl_fg // num_class_per_task + current_task loss += loss_cls loss.backward() optimizer.step() loss_log['C/loss'] += loss.item() loss_log['C/loss_cls'] += loss_cls.item() loss_log['C/loss_aug'] += args.tradeoff * loss_aug.item( ) if args.tradeoff != 0 else 0 del loss_cls if epoch == 0 and i == 0: print(50 * '#') print('[Metric Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t' % (epoch + 1, loss_log['C/loss'], loss_log['C/loss_aug'])) for k, v in loss_log.items(): if v != 0: tb_writer.add_scalar( 'Task {} - Classifier/{}'.format(current_task, k), v, epoch + 1) if epoch == args.epochs - 1: torch.save( model, os.path.join( log_dir, 'task_' + str(current_task).zfill(2) + '_%d_model.pkl' % epoch)) ################################################################## W-GAN Training stage#################################################### model = model.eval() for p in model.parameters(): # set requires_grad to False p.requires_grad = False for p in generator.parameters(): # set requires_grad to False p.requires_grad = True criterion_softmax = torch.nn.CrossEntropyLoss().cuda() if current_task != args.num_task: for epoch in range(args.epochs_gan): loss_log = { 'D/loss': 0.0, 'D/new_rf': 0.0, 'D/new_lbls': 0.0, 'D/new_gp': 0.0, 'D/prev_rf': 0.0, 'D/prev_lbls': 0.0, 'D/prev_gp': 0.0, 'G/loss': 0.0, 'G/new_rf': 0.0, 'G/new_lbls': 0.0, 'G/prev_rf': 0.0, 'G/prev_mse': 0.0, 'G/new_classifier': 0.0, 'E/kld': 0.0, 'E/mse': 0.0, 'E/loss': 0.0 } scheduler_D.step() scheduler_G.step() for i, data in enumerate(train_loader, 0): for p in discriminator.parameters(): p.requires_grad = True inputs, labels = data inputs = Variable(inputs.cuda()) ############################# Train Disciminator########################### optimizer_D.zero_grad() real_feat = model(inputs) z = torch.Tensor( np.random.normal( 0, 1, (args.BatchSize, args.latent_dim))).cuda() y_onehot.zero_() y_onehot.scatter_(1, labels[:, None], 1) syn_label = y_onehot.cuda() fake_feat = generator(z, syn_label) fake_validity, _ = discriminator(fake_feat, syn_label) real_validity, disc_real_acgan = discriminator( real_feat, syn_label) # Adversarial loss d_loss_rf = -torch.mean(real_validity) + torch.mean( fake_validity) gradient_penalty = compute_gradient_penalty( discriminator, real_feat, fake_feat, syn_label).mean() d_loss_lbls = criterion_softmax(disc_real_acgan, labels.cuda()) d_loss = d_loss_rf + lambda_gp * gradient_penalty d_loss.backward() optimizer_D.step() loss_log['D/loss'] += d_loss.item() loss_log['D/new_rf'] += d_loss_rf.item() loss_log['D/new_lbls'] += 0 #!!! loss_log['D/new_gp'] += gradient_penalty.item( ) if lambda_gp != 0 else 0 del d_loss_rf, d_loss_lbls ############################# Train Generaator########################### # Train the generator every n_critic steps if i % args.n_critic == 0: for p in discriminator.parameters(): p.requires_grad = False ############################# Train GAN########################### optimizer_G.zero_grad() # Generate a batch of images fake_feat = generator(z, syn_label) # Loss measures generator's ability to fool the discriminator # Train on fake images fake_validity, disc_fake_acgan = discriminator( fake_feat, syn_label) if current_task == 0: loss_aug = 0 * torch.sum(fake_validity) else: ind = list(range(len(pre_index))) embed_label_sythesis = [] for _ in range(args.BatchSize): np.random.shuffle(ind) embed_label_sythesis.append(pre_index[ind[0]]) embed_label_sythesis = np.asarray(embed_label_sythesis) embed_label_sythesis = torch.from_numpy( embed_label_sythesis) y_onehot.zero_() y_onehot.scatter_(1, embed_label_sythesis[:, None], 1) syn_label_pre = y_onehot.cuda() pre_feat = generator(z, syn_label_pre) pre_feat_old = generator_old(z, syn_label_pre) loss_aug = loss_mse(pre_feat, pre_feat_old) g_loss_rf = -torch.mean(fake_validity) g_loss_lbls = criterion_softmax(disc_fake_acgan, labels.cuda()) g_loss = g_loss_rf \ + lambda_lwf*old_task_factor * loss_aug loss_log['G/loss'] += g_loss.item() loss_log['G/new_rf'] += g_loss_rf.item() loss_log['G/new_lbls'] += 0 #! loss_log['G/new_classifier'] += 0 #! loss_log['G/prev_mse'] += loss_aug.item( ) if lambda_lwf != 0 else 0 del g_loss_rf, g_loss_lbls g_loss.backward() optimizer_G.step() print( '[GAN Epoch %05d]\t D Loss: %.3f \t G Loss: %.3f \t LwF Loss: %.3f' % (epoch + 1, loss_log['D/loss'], loss_log['G/loss'], loss_log['G/prev_rf'])) for k, v in loss_log.items(): if v != 0: tb_writer.add_scalar( 'Task {} - GAN/{}'.format(current_task, k), v, epoch + 1) if epoch == args.epochs_gan - 1: torch.save( generator, os.path.join( log_dir, 'task_' + str(current_task).zfill(2) + '_%d_model_generator.pkl' % epoch)) torch.save( discriminator, os.path.join( log_dir, 'task_' + str(current_task).zfill(2) + '_%d_model_discriminator.pkl' % epoch)) tb_writer.close() prototype = compute_prototype(model, train_loader) #! return prototype
def train_task(args, train_loader, current_task, prototype={}, pre_index=0): num_class_per_task = (args.num_class - args.nb_cl_fg) // args.num_task task_range = list( range(args.nb_cl_fg + (current_task - 1) * num_class_per_task, args.nb_cl_fg + current_task * num_class_per_task)) if num_class_per_task == 0: pass # JT else: old_task_factor = args.nb_cl_fg // num_class_per_task + current_task - 1 print(old_task_factor) log_dir = os.path.join(args.ckpt_dir, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger( os.path.join(log_dir, 'log_task{}.txt'.format(current_task))) tb_writer = SummaryWriter(log_dir) display(args) if 'miniimagenet' in args.data: model = models.create('resnet18_imagenet', pretrained=False, feat_dim=args.feat_dim, embed_dim=args.num_class, hidden_dim=256, norm=True) elif 'cifar100' in args.data: model = models.create('resnet18_cifar', pretrained=False, feat_dim=args.feat_dim, hidden_dim=256, embed_dim=args.num_class, norm=True) if current_task > 0: if 'miniimagenet' in args.data: model = models.create('resnet18_imagenet', pretrained=False, feat_dim=args.feat_dim, embed_dim=args.num_class, hidden_dim=256, norm=True) elif 'cifar100' in args.data: model = models.create('resnet18_cifar', pretrained=False, feat_dim=args.feat_dim, hidden_dim=256, embed_dim=args.num_class, norm=True) model = torch.load( os.path.join( log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model.pkl' % int(args.epochs - 1))) model_old = deepcopy(model) model_old.eval() model_old = freeze_model(model_old) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model = model.cuda() model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay) loss_mse = torch.nn.MSELoss(reduction='sum') # # Loss weight for gradient penalty used in W-GAN lambda_gp = args.lambda_gp lambda_lwf = args.gan_tradeoff # Initialize generator and discriminator if current_task == 0: generator = Generator(feat_dim=args.feat_dim, latent_dim=args.latent_dim, hidden_dim=args.hidden_dim, class_dim=args.num_class, norm=True) discriminator = Discriminator(feat_dim=args.feat_dim, hidden_dim=args.hidden_dim, class_dim=args.num_class) else: generator = torch.load( os.path.join( log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_generator.pkl' % int(args.epochs_gan - 1))) discriminator = torch.load( os.path.join( log_dir, 'task_' + str(current_task - 1).zfill(2) + '_%d_model_discriminator.pkl' % int(args.epochs_gan - 1))) generator_old = deepcopy(generator) generator_old.eval() generator_old = freeze_model(generator_old) cuda = torch.cuda.is_available() FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor g_len = 0 d_len = 0 for p in generator.parameters(): g_len += 1 for p in discriminator.parameters(): d_len += 1 learned_lrs = [] params = [] for i in range(args.update_step): g_lrs = [ Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True) ] * g_len # len(generator.parameters()) d_lrs = [ Variable(FloatTensor(1).fill_(args.update_lr), requires_grad=True) ] * d_len # len(discriminator.parameters()) learned_lrs.append((g_lrs, d_lrs)) for param_list in learned_lrs[i]: params += param_list generator = generator.to(device) discriminator = discriminator.to(device) optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.gan_lr, betas=(0.5, 0.999)) optimizer_lr = torch.optim.Adam(params, lr=args.lr) scheduler_G = StepLR(optimizer_G, step_size=150, gamma=0.3) scheduler_D = StepLR(optimizer_D, step_size=150, gamma=0.3) for p in generator.parameters(): # set requires_grad to False p.requires_grad = False for epoch in range(args.epochs): loss_log = { 'C/loss': 0.0, 'C/loss_aug': 0.0, 'C/loss_cls': 0.0, 'C/loss_cls_q': 0.0 } ##### MAML on feature extraction for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(train_loader): x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to( device), x_qry.to(device), y_qry.to(device) loss = torch.zeros(1).to(device) loss_cls = torch.zeros(1).to(device) loss_aug = torch.zeros(1).to(device) loss_tmp = torch.zeros(1).to(device) BatchSize, setsz, c_, h, w = x_spt.size() querysz = x_qry.size(1) losses_q = [0 for _ in range(args.update_step + 1) ] # losses_q[i] is the loss on step i corrects = [0.0 for _ in range(args.update_step + 1)] correct_s = [0.0 for _ in range(args.update_step + 1)] y_onehot = torch.cuda.FloatTensor(setsz, args.num_class) y_onehot_q = torch.cuda.FloatTensor(querysz, args.num_class) for i in range(args.BatchSize): # 1. run the i-th task and compute loss for k=0 embed_feat = model(x_spt[i]) if current_task == 0: soft_feat = model.embed(embed_feat) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i]) loss = loss.clone() + loss_cls else: embed_feat_old = model_old(x_spt[i]) ### Feature Extractor Loss if current_task > 0: loss_aug = torch.dist(embed_feat, embed_feat_old, 2) loss = loss.clone( ) + args.tradeoff * loss_aug * old_task_factor ### Replay and Classification Loss if current_task > 0: embed_sythesis = [] embed_label_sythesis = [] ind = list(range(len(pre_index))) if args.mean_replay: for _ in range(setsz): np.random.shuffle(ind) tmp = prototype['class_mean'][ ind[0]] + np.random.normal( ) * prototype['class_std'][ind[0]] embed_sythesis.append(tmp) embed_label_sythesis.append( prototype['class_label'][ind[0]]) embed_sythesis = np.asarray(embed_sythesis) embed_label_sythesis = np.asarray(embed_label_sythesis) embed_sythesis = torch.from_numpy(embed_sythesis).to( device) embed_label_sythesis = torch.from_numpy( embed_label_sythesis) else: for _ in range(setsz): np.random.shuffle(ind) embed_label_sythesis.append(pre_index[ind[0]]) embed_label_sythesis = np.asarray(embed_label_sythesis) embed_label_sythesis = torch.from_numpy( embed_label_sythesis).to(device) y_onehot.zero_() y_onehot.scatter(1, embed_label_sythesis[:, None], 1) syn_label_pre = y_onehot.to(device) z = torch.Tensor( np.random.normal( 0, 1, (setsz, args.latent_dim))).to(device) embed_sythesis = generator(z, syn_label_pre) embed_sythesis = torch.cat((embed_feat, embed_sythesis)) embed_label_sythesis = torch.cat( (y_spt[i], embed_label_sythesis.to(device))) soft_feat_syt = model.embed(embed_sythesis) batch_size1 = x_spt[i].shape[0] batch_size2 = embed_feat.shape[0] loss_cls = torch.nn.CrossEntropyLoss()( soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1]) loss_cls_old = torch.nn.CrossEntropyLoss()( soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:]) loss_cls += loss_cls_old * old_task_factor loss_cls /= args.nb_cl_fg // num_class_per_task + current_task loss += loss_cls grad = torch.autograd.grad(loss, model.parameters(), create_graph=True, retain_graph=True) # this is the loss and accuracy before first update with torch.no_grad(): # [setsz, nway] embed_feat_q = model(x_qry[i]) soft_feat_q = model.embed(embed_feat_q) loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i]) losses_q[0] += loss_q embed_feat = model(x_spt[i]) soft_feat = model.embed(embed_feat) pred_s = F.softmax(soft_feat, dim=1).argmax(dim=1) corr = torch.eq(pred_s, y_spt[i]).sum().item() # convert to numpy correct_s[0] = correct_s[0] + corr pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1) correct = torch.eq(pred_q, y_qry[i]).sum().item() corrects[0] = corrects[0] + correct # this is the loss and accuracy after the first update with torch.no_grad(): # [setsz, nway] for e, param in enumerate(model.parameters(), 0): param.data -= args.update_lr * grad[e] embed_feat_q = model(x_qry[i]) soft_feat_q = model.embed(embed_feat_q) loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i]) losses_q[1] += loss_q # [setsz] embed_feat = model(x_spt[i]) soft_feat = model.embed(embed_feat) pred_s = F.softmax(soft_feat, dim=1).argmax(dim=1) corr = torch.eq(pred_s, y_spt[i]).sum().item() # convert to numpy correct_s[1] = correct_s[1] + corr pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1) correct = torch.eq(pred_q, y_qry[i]).sum().item() corrects[1] = corrects[1] + correct for k in range(1, args.update_step): # 1. run the i-th task and compute loss for k=1~K-1 embed_feat = model(x_spt[i]) loss = torch.zeros(1).to(device) if current_task > 0: embed_feat_old = model_old(x_spt[i]) loss_aug = torch.dist(embed_feat, embed_feat_old, 2) loss += args.tradeoff * loss_aug * old_task_factor embed_sythesis = [] embed_label_sythesis = [] ind = list(range(len(pre_index))) if args.mean_replay: for _ in range(setsz): np.random.shuffle(ind) tmp = prototype['class_mean'][ ind[0]] + np.random.normal( ) * prototype['class_std'][ind[0]] embed_sythesis.append(tmp) embed_label_sythesis.append( prototype['class_label'][ind[0]]) embed_sythesis = np.asarray(embed_sythesis) embed_label_sythesis = np.asarray( embed_label_sythesis) embed_sythesis = torch.from_numpy( embed_sythesis).to(device) embed_label_sythesis = torch.from_numpy( embed_label_sythesis) else: for _ in range(setsz): np.random.shuffle(ind) embed_label_sythesis.append(pre_index[ind[0]]) embed_label_sythesis = np.asarray( embed_label_sythesis) embed_label_sythesis = torch.from_numpy( embed_label_sythesis).to(device) y_onehot.zero_() y_onehot.scatter(1, embed_label_sythesis[:, None], 1) syn_label_pre = y_onehot.to(device) z = torch.Tensor( np.random.normal( 0, 1, (setsz, args.latent_dim))).to(device) embed_sythesis = generator(z, syn_label_pre) embed_sythesis = torch.cat( (embed_feat, embed_sythesis)) embed_label_sythesis = torch.cat( (y_spt[i], embed_label_sythesis.to(device))) soft_feat_syt = model.embed(embed_sythesis) batch_size1 = x_spt[i].shape[0] batch_size2 = embed_feat.shape[0] loss_cls = torch.nn.CrossEntropyLoss()( soft_feat_syt[:batch_size1], embed_label_sythesis[:batch_size1]) loss_cls_old = torch.nn.CrossEntropyLoss()( soft_feat_syt[batch_size2:], embed_label_sythesis[batch_size2:]) loss_cls += loss_cls_old * old_task_factor loss_cls /= args.nb_cl_fg // num_class_per_task + current_task loss += loss_cls else: soft_feat = model.embed(embed_feat) loss_cls = torch.nn.CrossEntropyLoss()(soft_feat, y_spt[i]) loss += loss_cls # 2. compute grad on theta_pi grad = torch.autograd.grad(loss, model.parameters(), create_graph=True, retain_graph=True, allow_unused=True) # 3. theta_pi = theta_pi - train_lr * grad for e, param in enumerate(model.parameters(), 0): param.data -= args.update_lr * grad[e] embed_feat = model(x_spt[i]) soft_feat = model.embed(embed_feat) # soft_feat = mlp(embed_feat) embed_feat_q = model(x_qry[i]) soft_feat_q = model.embed(embed_feat_q) # loss_q will be overwritten and just keep the loss_q on last update step. loss_q = torch.nn.CrossEntropyLoss()(soft_feat_q, y_qry[i]) losses_q[k + 1] += loss_q with torch.no_grad(): pred_s = F.softmax(soft_feat, dim=1).argmax(dim=1) corr = torch.eq( pred_s, y_spt[i]).sum().item() # convert to numpy correct_s[k + 1] = correct_s[k + 1] + corr pred_q = F.softmax(soft_feat_q, dim=1).argmax(dim=1) correct = torch.eq( pred_q, y_qry[i]).sum().item() # convert to numpy corrects[k + 1] = corrects[k + 1] + correct # end of all tasks # sum over all losses on query set across all tasks loss_q = losses_q[-1] / BatchSize loss_q = Variable(loss_q, requires_grad=True) # optimize theta parameters optimizer.zero_grad() loss_q.backward() optimizer.step() scheduler.step() accs = np.array([float(c) for c in corrects]) / float(querysz * BatchSize) accs_spt = np.array([float(c) for c in correct_s]) / float( setsz * BatchSize) loss_log['C/loss'] += loss.item() loss_log['C/loss_cls'] += loss_cls.item() loss_log['C/loss_aug'] += args.tradeoff * loss_aug.item( ) if args.tradeoff != 0 else 0 loss_log['C/loss_cls_q'] += loss_q.item() del loss_cls del loss_q print( '[Metric Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t Spt Accuracy FeatureX: %.3f \t Query Loss: %.3f \t Query Accuracy FeatureX: %.3f \t' % (epoch + 1, loss_log['C/loss'], loss_log['C/loss_aug'], accs_spt[-1], loss_log['C/loss_cls_q'], accs[-1])) for k, v in loss_log.items(): if v != 0: tb_writer.add_scalar( 'Task {} - Classifier/{}'.format(current_task, k), v, epoch + 1) tb_writer.add_scalar('Task {}'.format(current_task), accs[-1], epoch + 1) if epoch == args.epochs - 1: torch.save( model, os.path.join( log_dir, 'task_' + str(current_task).zfill(2) + '_%d_model.pkl' % epoch)) ################# feature extraction training end ######################## ############################################## GAN Training #################################################### model = model.eval() for p in model.parameters(): # set requires_grad to False p.requires_grad = False for p in generator.parameters(): # set requires_grad to True p.requires_grad = True for p in discriminator.parameters(): p.requires_grad = True criterion_softmax = torch.nn.CrossEntropyLoss().to(device) if current_task != args.num_task: for epoch in range(args.epochs_gan): loss_log = { 'D/loss': 0.0, 'D/new_rf': 0.0, 'D/new_lbls': 0.0, 'D/new_gp': 0.0, 'D/prev_rf': 0.0, 'D/prev_lbls': 0.0, 'D/prev_gp': 0.0, 'D/loss_q': 0.0, 'D/new_rf_q': 0.0, 'D/new_lbls_q': 0.0, 'D/new_gp_q': 0.0, 'G/loss': 0.0, 'G/new_rf': 0.0, 'G/new_lbls': 0.0, 'G/prev_rf': 0.0, 'G/prev_mse': 0.0, 'G/new_classifier': 0.0, 'G/loss_q': 0.0, 'G/new_rf_q': 0.0, 'G/new_lbls_q': 0.0, 'G/new_gp_q': 0.0, 'E/kld': 0.0, 'E/mse': 0.0, 'E/loss': 0.0 } for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(train_loader, 0): x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to( device), x_qry.to(device), y_qry.to(device) BatchSize, setsz, c_, h, w = x_spt.size() querysz = x_qry.size(1) d_losses_q = [0.0 for _ in range(args.update_step)] g_losses_q = [0.0 for _ in range(args.update_step)] y_onehot = torch.cuda.FloatTensor(setsz, args.num_class) y_onehot_q = torch.cuda.FloatTensor(querysz, args.num_class) y_onehot_pre = torch.cuda.FloatTensor(setsz, args.num_class) for i in range(args.BatchSize): # This is inner loop not task inputs = Variable(x_spt[i]) labels = y_spt[i] real_feat = model(inputs) z = torch.Tensor( np.random.normal(0, 1, (setsz, args.latent_dim))).to(device) labels_q = y_qry[i] real_feat_q = model(x_qry[i]) z_q = torch.Tensor( np.random.normal( 0, 1, (querysz, args.latent_dim))).to(device) y_onehot.zero_() y_onehot.scatter(1, labels[:, None], 1) syn_label = y_onehot.to(device) y_onehot_q.zero_() y_onehot_q.scatter(1, labels_q[:, None], 1) syn_label_q = y_onehot_q.to(device) ############################# Train MetaGAN ########################### for k in range(args.update_step): fake_feat = generator(z, syn_label) fake_validity, disc_fake_acgan = discriminator( fake_feat, syn_label) real_validity, disc_real_acgan = discriminator( real_feat, syn_label) if current_task == 0: loss_aug = 0 * torch.sum(fake_validity) else: ind = list(range(len(pre_index))) embed_label_sythesis = [] for _ in range(setsz): np.random.shuffle(ind) embed_label_sythesis.append(pre_index[ind[0]]) embed_label_sythesis = np.asarray( embed_label_sythesis) embed_label_sythesis = torch.from_numpy( embed_label_sythesis) y_onehot_pre.zero_() y_onehot_pre.scatter( 1, embed_label_sythesis[:, None].to(device), 1) syn_label_pre = y_onehot_pre.to(device) pre_feat = generator(z, syn_label_pre) pre_feat_old = generator_old(z, syn_label_pre) loss_aug = loss_mse(pre_feat, pre_feat_old) # Adversarial loss (wasserstein) g_loss_lbls = criterion_softmax( disc_fake_acgan, labels.to(device)) d_loss_rf = -torch.mean(real_validity) + torch.mean( fake_validity) d_gradient_penalty = compute_gradient_penalty( discriminator, real_feat, fake_feat, syn_label).mean() d_loss_lbls = criterion_softmax( disc_real_acgan, labels.to(device)) d_loss = d_loss_rf + lambda_gp * d_gradient_penalty + 0.5 * ( d_loss_lbls + g_loss_lbls) g_loss_rf = -torch.mean(fake_validity) g_loss = g_loss_rf + lambda_lwf * old_task_factor * loss_aug + g_loss_lbls grad_d = torch.autograd.grad( d_loss, discriminator.parameters(), create_graph=True, retain_graph=True) grad_g = torch.autograd.grad(g_loss, generator.parameters(), create_graph=True, retain_graph=True) grad_d = clip_grad_by_norm_(grad_d, max_norm=5.0) grad_g = clip_grad_by_norm_(grad_g, max_norm=5) g_lr, d_lr = learned_lrs[k] for e, param in enumerate(discriminator.parameters(), 0): param.data = param.data.clone() - d_lr[e] * grad_d[ e] # args.update_lr * grad_d[e] for e, param in enumerate(generator.parameters(), 0): param.data = param.data.clone() - g_lr[e] * grad_g[ e] # args.update_lr * grad_g[e] fake_feat_q = generator(z_q, syn_label_q) fake_validity_q, disc_fake_acgan_q = discriminator( fake_feat_q, syn_label_q) real_validity_q, disc_real_acgan_q = discriminator( real_feat_q, syn_label_q) # Adversarial loss query d_loss_rf_q = -torch.mean( real_validity_q) + torch.mean(fake_validity_q) d_gradient_penalty_q = compute_gradient_penalty( discriminator, real_feat_q, fake_feat_q, syn_label_q).mean() d_loss_lbls_q = criterion_softmax( disc_real_acgan_q, labels_q.to(device)) d_loss_q = d_loss_rf_q + lambda_gp * d_gradient_penalty_q + d_loss_lbls_q d_losses_q[ k] = d_losses_q[k] + d_loss_q # + d_loss_lbls_q g_loss_rf_q = -torch.mean(fake_validity_q) g_loss_lbls_q = criterion_softmax( disc_fake_acgan_q, labels_q.to(device)) g_loss_q = g_loss_rf_q + g_loss_lbls_q # + lambda_lwf*old_task_factor * loss_aug_q g_losses_q[k] = g_losses_q[k] + g_loss_lbls_q #with torch.autograd.detect_anomaly(): optimizer_D.zero_grad() optimizer_G.zero_grad() optimizer_lr.zero_grad() d_loss_q_total = d_losses_q[-1].clone() / args.BatchSize g_loss_q_total = g_losses_q[-1].clone() / args.BatchSize d_loss_q_total.backward() g_loss_q_total.backward() torch.nn.utils.clip_grad_norm_(discriminator.parameters(), 5) torch.nn.utils.clip_grad_norm_(generator.parameters(), 5) optimizer_D.step() optimizer_G.step() optimizer_lr.step() scheduler_G.step() scheduler_G.step() loss_log['D/loss'] += d_loss.item() loss_log['D/new_rf'] += d_loss_rf.item() loss_log['D/new_lbls'] += d_loss_lbls.item() #!!! loss_log['D/new_gp'] += d_gradient_penalty.item( ) if lambda_gp != 0 else 0 loss_log['D/loss_q'] += d_loss_q_total.item() #loss_log['D/new_rf_q'] += d_loss_rf_q.item() #loss_log['D/new_lbls_q'] += d_loss_lbls_q.item() #!!! #loss_log['D/new_gp_q'] += d_gradient_penalty_q.item() if lambda_gp != 0 else 0 del d_loss_rf, d_loss_lbls loss_log['G/loss'] += g_loss.item() loss_log['G/new_rf'] += g_loss_rf.item() loss_log['G/new_lbls'] += g_loss_lbls.item() #! loss_log['G/loss_q'] += g_loss_q_total.item() #loss_log['G/new_rf_q'] += g_loss_rf_q.item() #loss_log['G/new_lbls_q'] += g_loss_lbls_q.item() #!!! #loss_log['G/new_classifier'] += 0 #! loss_log['G/prev_mse'] += loss_aug.item( ) if lambda_lwf != 0 else 0 del g_loss_rf, g_loss_lbls print( '[GAN Epoch %05d]\t D Total Loss: %.3f \t G Total Loss: %.3f \t LwF Loss: %.3f' % (epoch + 1, loss_log['D/loss'], loss_log['G/loss'], loss_log['G/prev_rf'])) print( '[GAN Epoch %05d]\t D Total Loss Query: %.3f \t G Total Loss Query: %.3f \t' % (epoch + 1, loss_log['D/loss_q'], loss_log['G/loss_q'])) for k, v in loss_log.items(): if v != 0: tb_writer.add_scalar( 'Task {} - GAN/{}'.format(current_task, k), v, epoch + 1) if epoch == args.epochs_gan - 1: torch.save( generator, os.path.join( log_dir, 'task_' + str(current_task).zfill(2) + '_%d_model_generator.pkl' % epoch)) torch.save( discriminator, os.path.join( log_dir, 'task_' + str(current_task).zfill(2) + '_%d_model_discriminator.pkl' % epoch)) tb_writer.close() prototype = compute_prototype(model, train_loader, batch_size=args.BatchSize) #! return prototype
def __init__(self): self.logger = logging.Logger("metamapper.graphql")
scheduler = StepLR(optimizer, step_size=20, gamma=10) # optimizer.step() # scheduler.step() global_step = 0 np.random.seed(2001) num_classes = 100 nb_cl_fg = 60 random_perm = list(range(num_classes)) traindir = os.path.join('home/abhilash/trial/', 'miniimagenet') trainfolder = miniimagenet('miniimagenet', mode='train', resize=84, cls_index=random_perm) #[:nb_cl_fg] log_dir = 'checkpoints' sys.stdout = logging.Logger(os.path.join(log_dir, 'pre_train.txt')) tb_writer = SummaryWriter(log_dir) batchsize = 16 train_loader = torch.utils.data.DataLoader(trainfolder, batch_size=batchsize, shuffle=True, drop_last=True, num_workers=1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #device = torch.device("cpu") epochs = 100 print("Training Base Classes") print("Total No. of classes: ", num_classes) print("Number of Base Classes: ", nb_cl_fg) print("Batch Size: ", batchsize) print("No. of Epochs: ", epochs)
def main(args): # 训练日志保存 log_dir = os.path.join(args.checkpoints, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net) # load part of the model model_dict = model.state_dict() # print(model_dict) model = models.create(args.net, pretrained=True) model.features = torch.nn.Sequential( model.features, torch.nn.MaxPool2d(7), # torch.nn.BatchNorm2d(512), torch.nn.Dropout(p=0.01)) model.classifier = torch.nn.Sequential(torch.nn.Linear(512, args.dim)) # # orth init if args.init == 'orth': w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.orthogonal_(w) else: print('initialize the FC layer kaiming-ly') w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.kaiming_normal_( w) # zero bias model_dict['classifier.0.bias'] = torch.zeros(args.dim) else: # resume model print('Resume from model at Epoch %d' % args.start) model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.classifier.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'center-nca': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'cluster-nca': criterion = losses.create(args.loss, alpha=args.alpha, beta=args.beta).cuda() elif args.loss == 'neighbour': criterion = losses.create(args.loss, k=args.k, margin=args.margin).cuda() elif args.loss == 'nca': criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() elif args.loss == 'triplet': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'bin': criterion = losses.create(args.loss, margin=args.margin) else: criterion = losses.create(args.loss).cuda() if args.data == 'shop': data = DataSet.create(args.data, root=None, gallery=False, query=False) else: data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() for epoch in range(args.start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 running_pos = 0.0 running_neg = 0.0 if epoch == 1500: optimizer = torch.optim.Adam(param_groups, lr=0.1 * args.lr, weight_decay=args.weight_decay) for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) loss.backward() optimizer.step() running_loss += loss.item() running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --') loss_list.append(running_loss) pos_list.append(running_pos / i) neg_list.append(running_neg / i) print( '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list)