def pt_1(): epochs = 25 training_width = 80 training_height = 60 face_dataset = FaceDataset(1, 32, root_dir, training_width, training_height) training = create_dataloader(face_dataset, 5) validation_dataset = FaceDataset(33, 40, root_dir, training_width, training_height) validation = create_dataloader(validation_dataset, 1) #test_dataloader(dataloader, training_width, training_height) net = Net() loss = nn.MSELoss() opt = Adam(net.parameters(), lr=0.001) training_losses = [] validation_losses = [] for epoch in range(epochs): epoch_loss = torch.zeros((1, 1)) for i, (images, labels) in enumerate(training): prediction = net(images) output = loss(prediction, labels[:, -6]) epoch_loss += output output.backward() opt.step() opt.zero_grad() epoch_loss = epoch_loss / len(face_dataset) training_losses.append([epoch, epoch_loss.item() * 100]) epoch_loss = torch.zeros((1, 1)) for i, (images, labels) in enumerate(validation): prediction = net(images) output = loss(prediction, labels[:, -6]) epoch_loss += output opt.zero_grad() epoch_loss = epoch_loss / len(face_dataset) validation_losses.append([epoch, epoch_loss.item() * 100]) training_losses = np.array(training_losses) validation_losses = np.array(validation_losses) plt.plot(training_losses[:, 0], training_losses[:, 1]) plt.plot(validation_losses[:, 0], validation_losses[:, 1]) plt.plot() plt.savefig('results/pt_1/epoch_loss_decrease.png') plt.show()
def main(): args = get_args() if args.opts: cfg.merge_from_list(args.opts) cfg.freeze() # create model print("=> creating model '{}'".format(cfg.MODEL.ARCH)) model = get_model(model_name=cfg.MODEL.ARCH, pretrained=None) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) # load checkpoint resume_path = args.resume if Path(resume_path).is_file(): print("=> loading checkpoint '{}'".format(resume_path)) checkpoint = torch.load(resume_path, map_location="cpu") model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(resume_path)) else: raise ValueError("=> no checkpoint found at '{}'".format(resume_path)) if device == "cuda": cudnn.benchmark = True test_dataset = FaceDataset(args.data_dir, "test", img_size=cfg.MODEL.IMG_SIZE, augment=False) test_loader = DataLoader(test_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.WORKERS, drop_last=False) print("=> start testing") _, _, test_mae = validate(test_loader, model, None, 0, device) print(f"test mae: {test_mae:.3f}")
def evaluate(self): """ 评估准确率 :return: accuracy rate """ self.model.eval() dataset = FaceDataset(self.args, mode="test") steps = 100 accuracy = 0.0 for step in range(steps): log.info("step: %d", step) names, params, images = dataset.get_batch(batch_size=self.args.batch_size, edge=False) loss, _ = self.itr_train(images) accuracy += 1.0 - loss accuracy = accuracy / steps log.info("accuracy rate is %f", accuracy) return accuracy
def main(): args = get_args() model = get_model() # precompute validation Features valid_dataset = FaceDataset(args.data_dir, "valid", img_size=cfg.MODEL.IMG_SIZE, augment=False) valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=cfg.TRAIN.WORKERS, drop_last=True) valid_features, valid_labels = preconvfeat(valid_loader, model) with open('valid_features.pkl', 'wb') as f: pickle.dump(valid_features, f) with open('valid_labels.pkl', 'wb') as f: pickle.dump(valid_labels, f) # precompute training Features train_dataset = FaceDataset(args.data_dir, "train", img_size=cfg.MODEL.IMG_SIZE, augment=True, age_stddev=cfg.TRAIN.AGE_STDDEV) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=cfg.TRAIN.WORKERS, drop_last=True) train_features, train_labels = preconvfeat(train_loader, model) with open('train_features.pkl', 'wb') as f: pickle.dump(train_features, f) with open('train_labels.pkl', 'wb') as f: pickle.dump(train_labels, f)
def batch_train(self, cuda=False): """ batch training :param cuda: 是否开启gpu加速运算 """ rnd_input = torch.randn(self.args.batch_size, self.args.params_cnt) if cuda: rnd_input = rnd_input.cuda() self.writer.add_graph(self, input_to_model=rnd_input) self.model.train() dataset = FaceDataset(self.args, mode="train") initial_step = self.initial_step total_steps = self.args.total_steps progress = tqdm(range(initial_step, total_steps + 1), initial=initial_step, total=total_steps) for step in progress: names, params, images = dataset.get_batch( batch_size=self.args.batch_size, edge=False) if cuda: params = params.cuda() images = images.cuda() loss, y_ = self.itr_train(params, images) loss_ = loss.cpu().detach().numpy() progress.set_description("loss: {:.3f}".format(loss_)) self.writer.add_scalar('imitator/loss', loss_, step) if (step + 1) % self.args.prev_freq == 0: path = "{1}/imit_{0}.jpg".format(step + 1, self.prev_path) self.capture(path, images, y_, self.args.parsing_checkpoint, cuda) x = step / float(total_steps) lr = self.args.learning_rate * (x**2 - 2 * x + 1) + 2e-3 utils.update_optimizer_lr(self.optimizer, lr) self.writer.add_scalar('imitator/learning rate', lr, step) self.upload_weights(step) if (step + 1) % self.args.save_freq == 0: self.save(step) self.writer.close()
def main(): args = get_args() if args.opts: cfg.merge_from_list(args.opts) cfg.freeze() # create model print("=> creating model '{}'".format(cfg.MODEL.ARCH)) model = get_model(model_name=cfg.MODEL.ARCH, pretrained=None) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) # TODO: delete if torch.cuda.device_count() > 1: print("Let's use [1,2,4,5] GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model, device_ids=[1, 2, 4, 5]) model.to(device) # load checkpoint resume_path = args.resume if Path(resume_path).is_file(): print("=> loading checkpoint '{}'".format(resume_path)) checkpoint = torch.load(resume_path, map_location="cpu") model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(resume_path)) else: raise ValueError("=> no checkpoint found at '{}'".format(resume_path)) if device == "cuda": cudnn.benchmark = True test_dataset = FaceDataset(args.data_dir, "test", img_size=cfg.MODEL.IMG_SIZE, augment=False) test_loader = DataLoader(test_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.WORKERS, drop_last=False) criterion = nn.CrossEntropyLoss().to(device) print("=> start testing") _, _, test_mae, gen_acc = validate(test_loader, model, criterion, 0, device) print(f"Test age mae: {test_mae:.3f}") print(f"Test gender accuracy: {gen_acc:.2f}")
def batch_train(self, cuda): log.info("feature extractor train") initial_step = self.initial_step total_steps = self.args.total_extractor_steps self.training = True self.dataset = FaceDataset(self.args, mode="train") rnd_input = torch.randn(self.args.batch_size, 1, 64, 64) if cuda: rnd_input = rnd_input.cuda() self.writer.add_graph(self, input_to_model=rnd_input) progress = tqdm(range(initial_step, total_steps + 1), initial=initial_step, total=total_steps) for step in progress: if self.train_mode == Extractor.TRAIN_SYNC: progress.set_description("sync mode ") names, _, images = self.dataset.get_batch(batch_size=self.args.batch_size, edge=True) if cuda: images = images.cuda() self.sync_train(images, names, step) else: image1, image2, name = self.dataset.get_cache(cuda) if image1 is None or image2 is None: self.change_mode(Extractor.TRAIN_SYNC) continue loss = self.asyn_train(image1, image2) loss_ = loss.detach().numpy() loss_display = loss_ * 1000 progress.set_description("loss: {:.3f}".format(loss_display)) self.writer.add_scalar('extractor/loss', loss_display, step) if step % self.args.extractor_prev_freq == 0: self.capture(image1, image2, name, step, cuda) lr = self.args.extractor_learning_rate * loss_display self.writer.add_scalar('extractor/learning rate', lr, step) utils.update_optimizer_lr(self.optimizer, lr) if step % self.args.extractor_save_freq == 0: self.save(step) self.writer.close()
'ngf': 512, 'ndf': 16, 'nzf': 100, 'optimizer': 'sgd', 'lr_D': 0.01, 'lr_G': 0.01, 'momentum': 0.9, 'nestrov': False, 'lr_schedule': None, 'epochs': 100 } image_size = config['image_size'] batch_size = config['batch_size'] data_path = '/home/scott/Desktop/dataset/face3k/train.txt' train_set = FaceDataset(data_path, 128, augment=True, cache_image=False) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) netG = Generator(input_size=(batch_size, 100, 1, 1), image_size=image_size, ngf=config['ngf'], leaky_relu=True) netD = Discriminator(image_size=config['image_size'], ndf=config['ndf'], leaky_relu=True) ''' load trained weight
from torch.utils.data import DataLoader from util.config import DATASET_PARAMETERS, NETWORKS_PARAMETERS from util.parse_dataset import csv_to_list from network import restore_train, get_network from utils import Meter, cycle, save_model, get_collate_fn, Logger from dataset import VoiceDataset, FaceDataset # dataset and dataloader print('Parsing your dataset...') voice_list, face_list, id_class_num, emotion_class_num = csv_to_list( DATASET_PARAMETERS) print('voice samples num = %d, face samples num = %d' % (len(voice_list), len(face_list))) print('Preparing the datasets...') voice_dataset = VoiceDataset(voice_list, DATASET_PARAMETERS['nframe_range']) face_dataset = FaceDataset(face_list) print('Preparing the dataloaders...') collate_fn = get_collate_fn(DATASET_PARAMETERS['nframe_range']) voice_loader = DataLoader( voice_dataset, shuffle=True, drop_last=True, batch_size=DATASET_PARAMETERS['batch_size'], num_workers=DATASET_PARAMETERS['workers_num'], # 使用多进程加载的进程数 collate_fn=collate_fn) # 如何将多个样本数据拼接成一个batch face_loader = DataLoader(face_dataset, shuffle=True, drop_last=True, batch_size=DATASET_PARAMETERS['batch_size'], num_workers=DATASET_PARAMETERS['workers_num'])
def run(): opt = Config() if opt.display: visualizer = Visualizer() # device = torch.device("cuda") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_dataset = FaceDataset(opt.train_root, opt.train_list, phase='train', input_shape=opt.input_shape) trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.train_batch_size, shuffle=True, num_workers=opt.num_workers) print('{} train iters per epoch:'.format(len(trainloader))) # Focal Loss, 解决类别不均衡问题,减少易分类样本的权重,使得模型在训练时更专注于难分类的样本 # https://blog.csdn.net/u014380165/article/details/77019084 # #定义损失函数 if opt.loss == 'focal_loss': criterion = FocalLoss(gamma=2) # else: criterion = torch.nn.CrossEntropyLoss() #定义模型 if opt.backbone == 'resnet18': model = resnet_face18(use_se=opt.use_se) elif opt.backbone == 'resnet34': model = resnet34() elif opt.backbone == 'resnet50': model = resnet50() #全连接层? if opt.metric == 'add_margin': metric_fc = AddMarginProduct(512, opt.num_classes, s=30, m=0.35) elif opt.metric == 'arc_margin': metric_fc = ArcMarginProduct(512, opt.num_classes, s=30, m=0.5, easy_margin=opt.easy_margin) elif opt.metric == 'sphere': metric_fc = SphereProduct(512, opt.num_classes, m=4) else: metric_fc = nn.Linear(512, opt.num_classes) # view_model(model, opt.input_shape) print(model) model.to(device) model = DataParallel(model) metric_fc.to(device) metric_fc = DataParallel(metric_fc) #定义优化算法 if opt.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=opt.lr, weight_decay=opt.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=opt.lr, weight_decay=opt.weight_decay) # https://www.programcreek.com/python/example/98143/torch.optim.lr_scheduler.StepLR # ? 每过{lr_step}个epoch训练,学习率就乘gamma scheduler = StepLR(optimizer, step_size=opt.lr_step, gamma=0.1) start = time.time() for i in range(opt.max_epoch): scheduler.step() model.train() # train模式,eval模式 for ii, data in enumerate(trainloader): data_input, label = data data_input = data_input.to(device) label = label.to(device).long() feature = model(data_input) output = metric_fc(feature, label) # 全连接层? 将原本用于输出分类的层,改成输出512维向量?似乎不是? loss = criterion(output, label) # criterion:做出判断的依据 optimizer.zero_grad() loss.backward() optimizer.step() iters = i * len(trainloader) + ii if iters % opt.print_freq == 0: output = output.data.cpu().numpy() output = np.argmax(output, axis=1) #最大值所在的索引? index <-> one-hot相互转换 label = label.data.cpu().numpy() # print(output) # print(label) acc = np.mean((output == label).astype(int)) speed = opt.print_freq / (time.time() - start) time_str = time.asctime(time.localtime(time.time())) print('{} train epoch {} iter {} {} iters/s loss {} acc {}'. format(time_str, i, ii, speed, loss.item(), acc)) if opt.display: visualizer.display_current_results(iters, loss.item(), name='train_loss') visualizer.display_current_results(iters, acc, name='train_acc') start = time.time() if i % opt.save_interval == 0 or i == opt.max_epoch: save_model(model, opt.checkpoints_path, opt.backbone, i) # train结束,模型设置为eval模式 model.eval() #测试? identity_list = get_lfw_list(opt.lfw_test_list) img_paths = [ os.path.join(opt.lfw_root, each) for each in identity_list ] acc = lfw_test(model, img_paths, identity_list, opt.lfw_test_list, opt.test_batch_size) if opt.display: visualizer.display_current_results(iters, acc, name='test_acc')
def main(): global args, config, best_loss args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for k, v in config['common'].items(): setattr(args, k, v) config = EasyDict(config['common']) rank, world_size, device_id = dist_init( os.path.join(args.distributed_path, config.distributed_file)) args.save_path_dated = args.save_path + '/' + args.datetime if args.run_tag != '': args.save_path_dated += '-' + args.run_tag # create model model = model_entry(config.model) model.cuda() model = nn.parallel.DistributedDataParallel(model, device_ids=[device_id]) # create optimizer opt_config = config.optimizer opt_config.kwargs.lr = config.lr_scheduler.base_lr opt_config.kwargs.params = model.parameters() optimizer = optim_entry(opt_config) # optionally resume from a checkpoint last_iter = -1 best_loss = 1e9 if args.load_path: if args.recover: best_loss, last_iter = load_state(args.load_path, model, optimizer=optimizer) else: load_state(args.load_path, model) cudnn.benchmark = True # train augmentation if config.augmentation.get('imgnet_mean', False): model_mean = (0.485, 0.456, 0.406) model_std = (0.229, 0.224, 0.225) else: model_mean = (0.5, 0.5, 0.5) model_std = (0.5, 0.5, 0.5) trans = albumentations.Compose([ RandomResizedCrop(config.augmentation.input_size, config.augmentation.input_size, scale=(config.augmentation.min_scale**2., 1.), ratio=(1., 1.)), HorizontalFlip(p=0.5), RandomBrightnessContrast(brightness_limit=0.25, contrast_limit=0.1, p=0.5), JpegCompression(p=.2, quality_lower=50), MotionBlur(p=0.5), Normalize(mean=model_mean, std=model_std), ToTensorV2() ]) train_dataset = FaceDataset(config.train_root, config.train_source, transform=trans, resize=config.augmentation.input_size, image_format=config.get('image_format', None), random_frame=config.get( 'train_random_frame', False), bgr=config.augmentation.get('bgr', False)) train_sampler = DistributedGivenIterationSampler( train_dataset, config.lr_scheduler.max_iter, config.batch_size, last_iter=last_iter) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, sampler=train_sampler) # validation augmentation trans = albumentations.Compose([ Resize(config.augmentation.input_size, config.augmentation.input_size), Normalize(mean=model_mean, std=model_std), ToTensorV2() ]) val_multi_loader = [] if args.val_source != '': for dataset_idx in range(len(args.val_source)): val_dataset = FaceDataset( args.val_root[dataset_idx], args.val_source[dataset_idx], transform=trans, output_index=True, resize=config.augmentation.input_size, image_format=config.get('image_format', None), bgr=config.augmentation.get('bgr', False)) val_sampler = DistributedSampler(val_dataset, round_up=False) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, sampler=val_sampler) val_multi_loader.append(val_loader) config.lr_scheduler['optimizer'] = optimizer config.lr_scheduler['last_iter'] = last_iter lr_scheduler = get_scheduler(config.lr_scheduler) if rank == 0: mkdir(args.save_path) mkdir(args.save_path_dated) tb_logger = SummaryWriter(args.save_path_dated) logger = create_logger('global_logger', args.save_path_dated + '-log.txt') logger.info('{}'.format(args)) logger.info(model) logger.info(parameters_string(model)) logger.info('len(train dataset) = %d' % len(train_loader.dataset)) for dataset_idx in range(len(val_multi_loader)): logger.info( 'len(val%d dataset) = %d' % (dataset_idx, len(val_multi_loader[dataset_idx].dataset))) mkdir(args.save_path_dated + '/saves') else: tb_logger = None positive_weight = config.get('positive_weight', 0.5) weight = torch.tensor([1. - positive_weight, positive_weight]) * 2. if rank == 0: logger.info('using class weights: {}'.format(weight.tolist())) criterion = nn.CrossEntropyLoss(weight=weight).cuda() if args.evaluate: if args.evaluate_path: all_ckpt = get_all_checkpoint(args.evaluate_path, args.range_list, rank) for ckpt in all_ckpt: if rank == 0: logger.info('Testing ckpt: ' + ckpt) last_iter = -1 _, last_iter = load_state(ckpt, model, optimizer=optimizer) for dataset_idx in range(len(val_multi_loader)): validate(dataset_idx, val_multi_loader[dataset_idx], model, criterion, tb_logger, curr_step=last_iter, save_softmax=True) else: for dataset_idx in range(len(val_multi_loader)): validate(dataset_idx, val_multi_loader[dataset_idx], model, criterion, tb_logger, curr_step=last_iter, save_softmax=True) return train(train_loader, val_multi_loader, model, criterion, optimizer, lr_scheduler, last_iter + 1, tb_logger) return
def main(): args = get_args() if args.opts: cfg.merge_from_list(args.opts) cfg.freeze() start_epoch = 0 checkpoint_dir = Path(args.checkpoint) checkpoint_dir.mkdir(parents=True, exist_ok=True) # create model print("=> creating model '{}'".format(cfg.MODEL.ARCH)) model = get_model(model_name=cfg.MODEL.ARCH) multitask(model) if cfg.TRAIN.OPT == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) else: optimizer = torch.optim.Adam(model.parameters(), lr=cfg.TRAIN.LR) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) # optionally resume from a checkpoint resume_path = args.resume if resume_path: if Path(resume_path).is_file(): print("=> loading checkpoint '{}'".format(resume_path)) checkpoint = torch.load(resume_path, map_location="cpu") start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(resume_path, checkpoint['epoch'])) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) else: print("=> no checkpoint found at '{}'".format(resume_path)) if args.multi_gpu: model = nn.DataParallel(model) if device == "cuda": cudnn.benchmark = True get_ca = True if "megaage" in args.dataset.lower() else True # display cummulative acuracy value_ca = True if "megaage" in args.dataset.lower() else False # use CA to update saved model if get_ca: print("Cummulative Accuracy will be calculated for", args.dataset) if value_ca: print("Cummulative Accuracy will be compared to update saved model") criterion = nn.CrossEntropyLoss().to(device) train_dataset = FaceDataset(args.data_dir, "train", args.dataset, img_size=cfg.MODEL.IMG_SIZE, augment=args.aug, age_stddev=cfg.TRAIN.AGE_STDDEV, label=True, expand= args.expand, gender=True) train_loader = DataLoader(train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, num_workers=cfg.TRAIN.WORKERS, drop_last=False) val_dataset = FaceDataset(args.data_dir, "valid", args.dataset, img_size=cfg.MODEL.IMG_SIZE, augment=False, label=True, expand= args.expand, gender=True) val_loader = DataLoader(val_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.WORKERS, drop_last=False) scheduler = StepLR(optimizer, step_size=cfg.TRAIN.LR_DECAY_STEP, gamma=cfg.TRAIN.LR_DECAY_RATE, last_epoch=start_epoch - 1) best_val_mae = 10000.0 train_writer = None global_ca = {3: 0.0, 5: 0.0, 7: 0.0} train_count = len(train_dataset) val_count = len(val_dataset) all_train_loss = [] all_train_accu = [] all_val_loss = [] all_val_accu = [] # range(start_epoch, cfg.TRAIN.EPOCHS): for epoch in range(cfg.TRAIN.EPOCHS): # train train_loss, train_acc = train( train_loader, model, criterion, optimizer, epoch, device, train_count) # validate val_loss, val_acc, val_mae, new_ca= validate( val_loader, model, criterion, epoch, device, val_count, get_ca) all_train_loss.append(float(train_loss)) all_train_accu.append(float(train_acc)) all_val_loss.append(float(val_loss)) all_val_accu.append(float(val_mae)) # checkpoint if ((not value_ca) and (val_mae < best_val_mae)) or ((get_ca and value_ca) and (new_ca[3] > global_ca[3])): print( f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}") model_state_dict = model.module.state_dict( ) if args.multi_gpu else model.state_dict() torch.save( { 'epoch': epoch + 1, 'arch': cfg.MODEL.ARCH, 'state_dict': model_state_dict, 'optimizer_state_dict': optimizer.state_dict() }, str(checkpoint_dir.joinpath("epoch{:03d}_{}_{:.5f}_{:.4f}_{}_{}_ldl.pth".format( epoch, args.dataset, val_loss, val_mae, datetime.now().strftime("%Y%m%d"), cfg.MODEL.ARCH))) ) best_val_mae = val_mae best_checkpoint = str(checkpoint_dir.joinpath("epoch{:03d}_{}_{:.5f}_{:.4f}_{}_{}_ldl.pth".format(epoch, args.dataset, val_loss, val_mae, datetime.now().strftime("%Y%m%d"), cfg.MODEL.ARCH))) if get_ca: global_ca = new_ca else: print( f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})") # adjust learning rate scheduler.step() print("=> training finished") print(f"additional opts: {args.opts}") print(f"best val mae: {best_val_mae:.3f}") if get_ca: print("CA3: {:.2f} CA5: {:.2f} CA7: {:.2f}".format(global_ca[3] * 100, global_ca[5]*100, global_ca[7]*100)) print("best mae saved model:", best_checkpoint) x = np.arange(cfg.TRAIN.EPOCHS) plt.xlabel("Epoch") plt.ylabel("Train Loss") plt.plot(x, all_train_loss) plt.savefig("savefig/{}_{}_{}_train_loss.png".format(args.dataset, cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d"))) plt.clf() plt.ylabel("Train Accuracy") plt.plot(x, all_train_accu) plt.savefig("savefig/{}_{}_{}_train_accu.png".format(args.dataset, cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d"))) plt.clf() plt.ylabel("Validation Loss") plt.plot(x, all_val_loss) plt.savefig("savefig/{}_{}_{}_val_loss.png".format(args.dataset, cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d"))) plt.clf() plt.ylabel("Validation Accuracy") plt.plot(x, all_val_accu) plt.savefig("savefig/{}_{}_{}_val_mae.png".format(args.dataset, cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d")))
class Extractor(nn.Module): TRAIN_ASYN = 1 TRAIN_SYNC = 2 def __init__(self, name, args, imitator=None, momentum=0.5): """ feature extractor :param name: model name :param args: argparse options :param imitator: imitate engine's behaviour :param momentum: momentum for optimizer """ super(Extractor, self).__init__() log.info("construct feature_extractor %s", name) self.name = name self.imitator = imitator self.initial_step = 0 self.args = args self.model_path = "./output/extractor" self.prev_path = "./output/preview" self.training = False self.params_cnt = self.args.params_cnt self.dataset = None self.train_mode = Extractor.TRAIN_SYNC self.train_refer = 32 self.net = Net(args.udp_port, args) self.clean() self.writer = SummaryWriter(comment="feature extractor", log_dir=args.path_tensor_log) self.model = nn.Sequential( nn.Conv2d(1, 4, kernel_size=7, stride=2, padding=3), # 1. (batch, 4, 32, 32) nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # 2. (batch, 4, 16, 16) group(4, 8, kernel_size=3, stride=1, padding=1), # 3. (batch, 8, 16, 16) ResidualBlock.make_layer(8, channels=8), # 4. (batch, 8, 16, 16) group(8, 16, kernel_size=3, stride=1, padding=1), # 5. (batch, 16, 16, 16) ResidualBlock.make_layer(8, channels=16), # 6. (batch, 16, 16, 16) group(16, 64, kernel_size=3, stride=1, padding=1), # 7. (batch, 64, 16, 16) ResidualBlock.make_layer(8, channels=64), # 8. (batch, 64, 16, 16) group(64, self.params_cnt, kernel_size=3, stride=1, padding=1), # 9. (batch, params_cnt, 16, 16) ResidualBlock.make_layer(4, channels=self.params_cnt), # 10. (batch, params_cnt, 16, 16) nn.Dropout(0.5), ) self.fc = nn.Linear(self.params_cnt * 16 * 16, self.params_cnt) self.optimizer = optim.Adam(self.parameters(), lr=args.extractor_learning_rate) utils.debug_parameters(self, "_extractor_") def forward(self, input): output = self.model(input) output = output.view(output.size(0), -1) output = self.fc(output) output = F.dropout(output, training=self.training) output = torch.sigmoid(output) return output def itr_train(self, image): """ 第一种方法 这里train的方式使用的是imitator (同步) :param image: [batch, 3, 512, 512] :return: loss scalar """ self.optimizer.zero_grad() param_ = self.forward(image) img_ = self.imitator.forward(param_) loss = utils.content_loss(image, img_) loss.backward() self.optimizer.step() return loss, param_ def sync_train(self, image, name, step): """ 第二种方法是 通过net把params发生引擎生成image (异步) (这种方法需要保证同步,但效果肯定比imitator效果好) :param step: train step :param name: 图片名 [batch] :param image: [batch, 1, 64, 64] """ self.train_refer = self.train_refer - 1 if self.train_refer <= 0: self.change_mode(Extractor.TRAIN_ASYN) param_ = self.forward(image) self.net.send_params(param_, name, step) def asyn_train(self, image1, image2): """ cache 中累计一定量的时候就可以asyn train :param image1: input image :param image2: generate image :return: loss, type scalar """ self.train_refer = self.train_refer - 1 if self.train_refer <= 0: self.change_mode(Extractor.TRAIN_SYNC) self.optimizer.zero_grad() loss = F.mse_loss(image1, image2) loss.backward() self.optimizer.step() return loss def change_mode(self, mode): """ 切换train mode 并恢复计数 :param mode: train mode """ self.train_refer = 32 if mode == Extractor.TRAIN_ASYN: self.train_refer = 36 self.train_mode = mode def batch_train(self, cuda): log.info("feature extractor train") initial_step = self.initial_step total_steps = self.args.total_extractor_steps self.training = True self.dataset = FaceDataset(self.args, mode="train") rnd_input = torch.randn(self.args.batch_size, 1, 64, 64) if cuda: rnd_input = rnd_input.cuda() self.writer.add_graph(self, input_to_model=rnd_input) progress = tqdm(range(initial_step, total_steps + 1), initial=initial_step, total=total_steps) for step in progress: if self.train_mode == Extractor.TRAIN_SYNC: progress.set_description("sync mode ") names, _, images = self.dataset.get_batch(batch_size=self.args.batch_size, edge=True) if cuda: images = images.cuda() self.sync_train(images, names, step) else: image1, image2, name = self.dataset.get_cache(cuda) if image1 is None or image2 is None: self.change_mode(Extractor.TRAIN_SYNC) continue loss = self.asyn_train(image1, image2) loss_ = loss.detach().numpy() loss_display = loss_ * 1000 progress.set_description("loss: {:.3f}".format(loss_display)) self.writer.add_scalar('extractor/loss', loss_display, step) if step % self.args.extractor_prev_freq == 0: self.capture(image1, image2, name, step, cuda) lr = self.args.extractor_learning_rate * loss_display self.writer.add_scalar('extractor/learning rate', lr, step) utils.update_optimizer_lr(self.optimizer, lr) if step % self.args.extractor_save_freq == 0: self.save(step) self.writer.close() def load_checkpoint(self, path, training=False, cuda=False): """ 从checkpoint 中恢复net :param path: checkpoint's path :param training: 恢复之后 是否接着train :param cuda: gpu speedup """ path_ = self.args.path_to_inference + "/" + path if not os.path.exists(path_): raise NeuralException("not exist checkpoint of extractor with path " + path) if cuda: checkpoint = torch.load(path_) else: checkpoint = torch.load(path_, map_location='cpu') self.load_state_dict(checkpoint['net']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.initial_step = checkpoint['epoch'] log.info("recovery imitator from %s", path) if training: self.batch_train(cuda) def clean(self): """ 清空前记得备份 """ ops.clear_folder(self.model_path) ops.clear_files(self.args.path_to_cache) ops.clear_files(self.args.path_tensor_log) ops.clear_files(self.prev_path) def save(self, step): """ save checkpoint :param step: train step """ state = {'net': self.state_dict(), 'optimizer': self.optimizer.state_dict(), 'epoch': step} if not os.path.exists(self.model_path): os.mkdir(self.model_path) ext = "cuda" if self.cuda() else "cpu" torch.save(state, '{1}/extractor_{0}_{2}.pth'.format(step, self.model_path, ext)) def inference(self, cp_name, photo_path, cuda): """ feature extractor: 由图片生成捏脸参数 :param cuda: gpu speed up :param cp_name: checkpoint's path :param photo_path: input photo's path :return: params [1, params_cnt] """ img = cv2.imread(photo_path) scaled = align.align_face(img, size=(64, 64)) self.load_checkpoint(cp_name, training=False, cuda=cuda) img = utils.faceparsing_ndarray(scaled, self.args.parsing_checkpoint, cuda) img = utils.img_edge(img) with torch.no_grad: input = torch.from_numpy(img) input = input.view([1, 1, 64, 64]) params_ = self(input) log.info(params_) return params_ def evaluate(self): """ 评估准确率 :return: accuracy rate """ self.model.eval() dataset = FaceDataset(self.args, mode="test") steps = 100 accuracy = 0.0 for step in range(steps): log.info("step: %d", step) names, params, images = dataset.get_batch(batch_size=self.args.batch_size, edge=False) loss, _ = self.itr_train(images) accuracy += 1.0 - loss accuracy = accuracy / steps log.info("accuracy rate is %f", accuracy) return accuracy def capture(self, tensor1, tensor2, name, step, cuda): """ extractor 快照 :param tensor1: input photo :param tensor2: generated image :param cuda: use gpu to speed up :param step: train step :param name: picture name """ path = "{1}/{2}_{0}.jpg".format(step, self.prev_path, name[3:-6]) orig_path = os.path.join(self.args.path_to_dataset + "2", name) img3 = cv2.imread(orig_path) img4 = utils.faceparsing_ndarray(img3, self.args.parsing_checkpoint, cuda) image1 = 255 - tensor1.cpu().detach().numpy() * 255 image2 = 255 - tensor2.cpu().detach().numpy() * 255 shape = image1.shape if len(shape) == 2: image1 = image1[:, :, np.newaxis] image2 = image2[:, :, np.newaxis] img1 = ops.fill_gray(image1) img2 = ops.fill_gray(image2) img = ops.merge_4image(img1, img2, img3, img4) cv2.imwrite(path, img)
print("Number of train images :", len(tr_img_paths)) print("Number of test images :", len(te_img_paths)) print("-" * 30) # exit() """ setup data """ # TODO: リサイズのサイズ策定 # Normalizeは? transforms = Compose([ Resize((224, 224)), # for vgg16 ToTensor(), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ]) tr_dataset = FaceDataset(tr_img_paths, tr_raw_labels, embedding_dict, transform=transforms) te_dataset = FaceDataset(te_img_paths, te_raw_labels, None, transform=transforms) tr_loader = DataLoader(tr_dataset, batch_size=batch_size, shuffle=True) te_loader = DataLoader(te_dataset, batch_size=batch_size, shuffle=True) """ setup model """ # NOTE: 特徴抽出層は完全に凍結してるが、学習する内容的に学習し直した方がいい # 人物分類で事前学習したほうがよいかもしれない model = my_vgg16_bn(out_features=n_classes) if weight: model.load_state_dict(torch.load(weight)) # HACK: featuresの上にmodelという階層ができてしまっているので、モデルクラス内での学習済みモデルの利用を改良したい # CNNのrequires_gradをTrueにするとメモリーが枯渇する
if args.l is not None: LOAD_MODEL = True LOAD_MODEL_NAME = args.l detecteur_image = False if args.d is not None: IMAGE_PATH = (args.d == 1) detecteur_image = True if args.c is not None: CONFIDENCE = (args.c == 1) transform = tf.Compose([tf.RandomHorizontalFlip(), tf.RandomVerticalFlip(), tf.RandomRotation(90), tf.ColorJitter(brightness=0.5, contrast=0.75, saturation=0, hue=0), tf.ToTensor(), tf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) transform_test = tf.Compose([tf.ToTensor(), tf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = FaceDataset(TRAIN_DATA, CLASSIFIED_TRAIN_DATA_55000, transform=transform) validset = FaceDataset(TRAIN_DATA, CLASSIFIED_VALID_DATA_36000, transform=transform) trainloader = {'train': torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS), 'val': torch.utils.data.DataLoader(validset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)} testset = TestDataset(TEST_DATA, CLASSIFIED_TEST_DATA, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS) ttset = FaceDataset(TRAIN_DATA, CLASSIFIED_TRAIN_DATA, transform=transform_test) ttloader = torch.utils.data.DataLoader(ttset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)
model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(resume_path, checkpoint['epoch'])) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) else: print("=> no checkpoint found at '{}'".format(resume_path)) if args.multi_gpu: print(args.multi_gpu) model = nn.DataParallel(model) if device == "cuda": cudnn.benchmark = True criterion = homosedastic_case if cfg.MODEL.HOMOSCEDASTIC else aleatoric_loss train_dataset = FaceDataset(args.data_dir, "train", img_size=cfg.MODEL.IMG_SIZE, augment=True, age_stddev=cfg.TRAIN.AGE_STDDEV) train_loader = DataLoader(train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, num_workers=cfg.TRAIN.WORKERS, drop_last=True) val_dataset = FaceDataset(args.data_dir, "valid", img_size=cfg.MODEL.IMG_SIZE, augment=False) val_loader = DataLoader(val_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.WORKERS, drop_last=False) scheduler = StepLR(optimizer, step_size=cfg.TRAIN.LR_DECAY_STEP, gamma=cfg.TRAIN.LR_DECAY_RATE, last_epoch=start_epoch - 1) best_val_mae = 10000.0 train_writer = None if args.tensorboard is not None: opts_prefix = "_".join(args.opts) train_writer = SummaryWriter(log_dir=args.tensorboard + "/" + opts_prefix + "_train") val_writer = SummaryWriter(log_dir=args.tensorboard + "/" + opts_prefix + "_val")
def main(): args = get_args() if args.opts: cfg.merge_from_list(args.opts) cfg.freeze() start_epoch = 0 checkpoint_dir = Path(args.checkpoint) checkpoint_dir.mkdir(parents=True, exist_ok=True) # display nb of workers print(f"number of train workers {cfg.TRAIN.WORKERS}") # create model print("=> creating model '{}'".format(cfg.MODEL.ARCH)) model = get_model(model_name=cfg.MODEL.ARCH) if cfg.TRAIN.OPT == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=cfg.TRAIN.LR, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) else: optimizer = torch.optim.Adam(model.parameters(), lr=cfg.TRAIN.LR) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) # optionally resume from a checkpoint resume_path = args.resume if resume_path: if Path(resume_path).is_file(): print("=> loading checkpoint '{}'".format(resume_path)) checkpoint = torch.load(resume_path, map_location="cpu") start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( resume_path, checkpoint['epoch'])) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) else: print("=> no checkpoint found at '{}'".format(resume_path)) if args.multi_gpu: model = nn.DataParallel(model) if device == "cuda": cudnn.benchmark = True criterion = nn.CrossEntropyLoss().to(device) train_dataset = FaceDataset(args.data_dir, "train", img_size=cfg.MODEL.IMG_SIZE, augment=True, age_stddev=cfg.TRAIN.AGE_STDDEV) train_loader = DataLoader(train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True, num_workers=cfg.TRAIN.WORKERS, drop_last=True) val_dataset = FaceDataset(args.data_dir, "valid", img_size=cfg.MODEL.IMG_SIZE, augment=False) val_loader = DataLoader(val_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.WORKERS, drop_last=False) scheduler = StepLR(optimizer, step_size=cfg.TRAIN.LR_DECAY_STEP, gamma=cfg.TRAIN.LR_DECAY_RATE, last_epoch=start_epoch - 1) best_val_mae = 10000.0 train_writer = None if args.tensorboard is not None: opts_prefix = "_".join(args.opts) train_writer = SummaryWriter(log_dir=args.tensorboard + "/" + opts_prefix + "_train") val_writer = SummaryWriter(log_dir=args.tensorboard + "/" + opts_prefix + "_val") for epoch in range(start_epoch, cfg.TRAIN.EPOCHS): # train train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, device) # validate val_loss, val_acc, val_mae = validate(val_loader, model, criterion, epoch, device) if args.tensorboard is not None: train_writer.add_scalar("loss", train_loss, epoch) train_writer.add_scalar("acc", train_acc, epoch) val_writer.add_scalar("loss", val_loss, epoch) val_writer.add_scalar("acc", val_acc, epoch) val_writer.add_scalar("mae", val_mae, epoch) # checkpoint if val_mae < best_val_mae: print( f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}" ) model_state_dict = model.module.state_dict( ) if args.multi_gpu else model.state_dict() torch.save( { 'epoch': epoch + 1, 'arch': cfg.MODEL.ARCH, 'state_dict': model_state_dict, 'optimizer_state_dict': optimizer.state_dict() }, str( checkpoint_dir.joinpath( "epoch{:03d}_{:.5f}_{:.4f}.pth".format( epoch, val_loss, val_mae)))) best_val_mae = val_mae else: print( f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})" ) # adjust learning rate scheduler.step() print("=> training finished") print(f"additional opts: {args.opts}") print(f"best val mae: {best_val_mae:.3f}")
def train_style_transfer(args): if not (args.train_data and args.valid_data): print("must chose train_data and valid_data") sys.exit() # make dataset trans = transforms.ToTensor() train_dataset = FaceDataset(args.train_data, transform=trans) label_dict = train_dataset.get_label_dict() valid_dataset = FaceDataset(args.valid_data, transform=trans) valid_dataset.give_label_dict(label_dict) train_loader = data_utils.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) valid_loader = data_utils.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) train_size = len(train_dataset) valid_size = len(valid_dataset) loaders = {"train": train_loader, "valid": valid_loader} dataset_sizes = {"train": train_size, "valid": valid_size} if args.gpu: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") # make network if args.model_type == "VAE": net = Autoencoder(train_dataset.label_num()).to(device) optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) best_model_wts = net.state_dict() best_loss = 1e10 if args.generator_model and os.path.exists(args.generator_model): net.load_state_dict(torch.load(args.generator_model)) elif args.model_type == "VAEGAN": generator = Autoencoder(train_dataset.label_num()).to(device) discriminator = Discriminator().to(device) classifier = Classifier(train_dataset.label_num()).to(device) generator_optimizer = optim.Adam(generator.parameters(), lr=args.lr, weight_decay=args.weight_decay) discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=args.lr * 0.1, weight_decay=args.weight_decay) best_generator_wts = generator.state_dict() best_discriminator_wts = discriminator.state_dict() best_generator_loss = 1e10 best_discriminator_loss = 1e10 if args.generator_model and os.path.exists(args.generator_model): generator.load_state_dict(torch.load(args.generator_model)) if args.discriminator_model and os.path.exists( args.discriminator_model): discriminator.load_state_dict(torch.load(args.discriminator_model)) if args.classifier_model: classifier.load_state_dict(torch.load(args.classifier_model)) # make loss function and optimizer criterion = nn.BCELoss(reduction="sum") classifier_criterion = nn.CrossEntropyLoss(reduction="sum") # initialize loss loss_history = {"train": [], "valid": []} # start training start_time = time.time() for epoch in range(args.epochs): print("epoch {}".format(epoch + 1)) for phase in ["train", "valid"]: if phase == "train": if args.model_type == "VAE": net.train(True) elif args.model_type == "VAEGAN": generator.train(True) discriminator.train(True) else: if args.model_type == "VAE": net.train(False) elif args.model_type == "VAEGAN": generator.train(False) discriminator.train(False) # initialize running loss generator_running_loss = 0.0 discriminator_running_loss = 0.0 for i, data in enumerate(loaders[phase]): inputs, label = data # wrap the in valiables if phase == "train": inputs = Variable(inputs).to(device) label = Variable(label).to(device) torch.set_grad_enabled(True) else: inputs = Variable(inputs).to(device) label = Variable(label).to(device) torch.set_grad_enabled(False) # zero gradients if args.model_type == "VAE": optimizer.zero_grad() mu, var, outputs = net(inputs, label) loss = loss_func(inputs, outputs, mu, var) if phase == "train": loss.backward() optimizer.step() generator_running_loss += loss.item() elif args.model_type == "VAEGAN": real_label = Variable( torch.ones((inputs.size()[0], 1), dtype=torch.float) - 0.2 * (torch.rand(inputs.size()[0], 1))).to(device) fake_label = Variable( torch.zeros((inputs.size()[0], 1), dtype=torch.float) + 0.2 * (torch.rand(inputs.size()[0], 1))).to(device) discriminator_optimizer.zero_grad() real_pred = discriminator(inputs) real_loss = criterion(real_pred, real_label) random_index = np.random.randint(0, train_dataset.label_num(), inputs.size()[0]) generate_label = Variable( torch.zeros_like(label)).to(device) for i, index in enumerate(random_index): generate_label[i][index] = 1 mu, var, outputs = generator(inputs, label) fake_pred = discriminator(outputs.detach()) fake_loss = criterion(fake_pred, fake_label) discriminator_loss = real_loss + fake_loss if phase == "train": discriminator_loss.backward() discriminator_optimizer.step() generator_optimizer.zero_grad() #class_loss = classifier_criterion(classifier(outputs), torch.max(label, 1)[1]) dis_loss = criterion(discriminator(outputs), real_label) gen_loss = loss_func(inputs, outputs, mu, var) generator_loss = dis_loss + gen_loss if phase == "train": generator_loss.backward() generator_optimizer.step() discriminator_running_loss += discriminator_loss.item() generator_running_loss += generator_loss.item() if args.model_type == "VAE": epoch_loss = generator_running_loss / dataset_sizes[ phase] * args.batch_size loss_history[phase].append(epoch_loss) print("{} loss {:.4f}".format(phase, epoch_loss)) if phase == "valid" and epoch_loss < best_loss: best_model_wts = net.state_dict() best_loss = epoch_loss elif args.model_type == "VAEGAN": epoch_generator_loss = generator_running_loss / dataset_sizes[ phase] * args.batch_size epoch_discriminator_loss = discriminator_running_loss / dataset_sizes[ phase] * args.batch_size print("{} generator loss {:.4f}".format( phase, epoch_generator_loss)) print("{} discriminator loss {:.4f}".format( phase, epoch_discriminator_loss)) if phase == "valid" and epoch_generator_loss < best_generator_loss: best_generator_wts = generator.state_dict() best_generator_loss = epoch_generator_loss if phase == "valid" and epoch_discriminator_loss < best_discriminator_loss: best_discriminator_wts = discriminator.state_dict() best_generator_loss = epoch_discriminator_loss elapsed_time = time.time() - start_time print("training complete in {:.0f}s".format(elapsed_time)) if args.model_type == "VAE": net.load_state_dict(best_model_wts) return net, label_dict elif args.model_type == "VAEGAN": generator.load_state_dict(best_generator_wts) discriminator.load_state_dict(best_discriminator_wts) return (generator, discriminator), label_dict
def train_classifier(args): if not (args.train_data and args.valid_data): print("must chose train_data and valid_data") sys.exit() trans = transforms.ToTensor() train_dataset = FaceDataset(args.train_data, transform=trans) label_dict = train_dataset.get_label_dict() valid_dataset = FaceDataset(args.valid_data, transform=trans) valid_dataset.give_label_dict(label_dict) train_loader = data_utils.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) valid_loader = data_utils.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) loaders = {"train": train_loader, "valid": valid_loader} dataset_sizes = {"train": len(train_dataset), "valid": len(valid_dataset)} if args.gpu: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") classifier = Classifier(len(label_dict)).to(device).float() optimizer = optim.Adam(classifier.parameters(), lr=args.lr, weight_decay=args.weight_decay) best_model_wts = classifier.state_dict() best_loss = 1e10 if args.classifier_model and os.path.exists(args.classifier_model): classifier.load_state_dict(torch.load(args.classifier_model)) criterion = nn.CrossEntropyLoss(reduction="sum") start_time = time.time() for epoch in range(args.epochs): print("epoch {}".format(epoch + 1)) for phase in ["train", "valid"]: if phase == "train": classifier.train(True) else: classifier.train(False) running_loss = 0.0 running_acc = 0 for i, data in enumerate(loaders[phase]): inputs, label = data inputs = Variable(inputs).to(device) label = Variable(label).to(device) if phase == "train": torch.set_grad_enabled(True) else: torch.set_grad_enabled(False) optimizer.zero_grad() pred = classifier(inputs) reg_loss = 0 for param in classifier.parameters(): reg_loss += (param * param).sum() loss = criterion(pred, torch.max(label, 1)[1]) + 1e-9 * reg_loss * reg_loss if phase == "train": loss.backward() optimizer.step() running_loss += loss.item() running_acc += (torch.max(pred, 1)[1] == torch.max( label, 1)[1]).sum().item() epoch_loss = running_loss / dataset_sizes[phase] * args.batch_size epoch_acc = running_acc / dataset_sizes[phase] print("{} loss {:.4f}".format(phase, epoch_loss)) print("{} acc {:.6f}".format(phase, epoch_acc)) if phase == "valid" and epoch_loss < best_loss: best_model_wts = classifier.state_dict() best_loss = epoch_loss elapsed_time = time.time() - start_time print("training_complete in {:.0f}".format(elapsed_time)) classifier.load_state_dict(best_model_wts) return classifier, label_dict
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--device_ids', type=str, default='0') arg('--folds', type=str, help='fold', default='0,1,2,3,4,5,6,7,8,9') arg('--model', type=str, default='se_resnext101', choices=list(model_list.keys())) arg('--batch_size', type=int, default=BS) args = parser.parse_args() GPUs = [int(i) for i in args.device_ids.split(',')] folds_to_use = [int(i) for i in args.folds.split(',')] path_images = list( filter(lambda x: x.endswith('.jpg'), os.listdir('../data/test/'))) unet_base_model = model_list[args.model] test_data_loader_normal = DataLoader(FaceDataset( '../data/test', path_images, transforms=test_norm_transforms), batch_size=args.batch_size, num_workers=10, shuffle=False) test_data_loader_flip = DataLoader(FaceDataset( '../data/test', path_images, transforms=test_flip_transforms), batch_size=args.batch_size, num_workers=10, shuffle=False) savedir_base = f'logits/{args.model}/' os.makedirs(savedir_base, exist_ok=True) for cur_fold_num in folds_to_use: search_dir = f'checkpoints/{args.model}_fold_{cur_fold_num}/' files_in_dir = os.listdir(search_dir) scores = [ float('0.' + i.split('_')[1].split('.')[0]) for i in files_in_dir ] chckp_to_use = files_in_dir[np.argmin(scores)] chkp_pth = f'{search_dir}{chckp_to_use}' print('use checkpoint ', chkp_pth) savedir = f'{savedir_base}fold_{cur_fold_num}/' os.makedirs(savedir, exist_ok=True) unet = unet_base_model(pretrained=False) if torch.cuda.is_available(): unet = unet.cuda() unet = nn.DataParallel(unet, GPUs) unet.load_state_dict(torch.load(chkp_pth)['model']) unet.eval() img_cntr = 0 with torch.no_grad(): for batch_n, batch_f in tqdm(zip(test_data_loader_normal, test_data_loader_flip), total=len(test_data_loader_normal)): inp_n = cuda(batch_n[0]) inp_f = cuda(batch_f[0]) output_n = unet.forward(inp_n) output_f = unet.forward(inp_f) for img_batch_index in range(output_n.shape[0]): img_n = output_n[img_batch_index].cpu().numpy()[0] img_f = output_f[img_batch_index].cpu().numpy()[0] img_f = np.fliplr(img_f) img_id = path_images[img_cntr].split('.')[0] np.save(f'{savedir}/id_{img_id}_normal', img_n) np.save(f'{savedir}/id_{img_id}_tta', img_n) img_cntr += 1
def pt_2(): epochs = 25 H = 120 W = 160 face_dataset = FaceDataset(1, 32, root_dir, W, H, CustomTransforms()) training = create_dataloader(face_dataset, 5) validation_dataset = FaceDataset(33, 40, root_dir, W, H, CustomTransforms()) validation = create_dataloader(validation_dataset, 1) #test_dataloader(training, W, H) net = Net() loss = nn.MSELoss() opt = Adam(net.parameters(), lr=0.001) training_losses = [] validation_losses = [] for epoch in range(epochs): epoch_loss = torch.zeros((1, 1)) for i, (images, labels) in enumerate(training): prediction = net(images) output = loss(prediction, labels.type(torch.float32).view(-1, 116)) epoch_loss += output output.backward() opt.step() opt.zero_grad() epoch_loss = epoch_loss / len(face_dataset) print("EPOCH " + str(i) + " LOSS: " + str(epoch_loss)) training_losses.append([epoch, epoch_loss.item() * 100]) epoch_loss = torch.zeros((1, 1), requires_grad=False) for i, (images, labels) in enumerate(validation): prediction = net(images) output = loss(prediction, labels.type(torch.float32).view(-1, 116)) epoch_loss += output opt.zero_grad() epoch_loss = epoch_loss / len(face_dataset) validation_losses.append([epoch, epoch_loss.item() * 100]) training_losses = np.array(training_losses) validation_losses = np.array(validation_losses) plt.plot(training_losses[:, 0], training_losses[:, 1]) plt.plot(validation_losses[:, 0], validation_losses[:, 1]) plt.plot() plt.savefig('results/pt_2/epoch_loss_decrease.png') plt.show() """ Handy visualization code copied and pasted from: https://colab.research.google.com/github/Niranjankumar-c/DeepLearning-PadhAI/blob/master/DeepLearning_Materials/6_VisualizationCNN_Pytorch/CNNVisualisation.ipynb#scrollTo=cWmfCalUvzbS as linked on the piazza. """ def plot_filters_single_channel(i, t): #kernels depth * number of kernels nplots = t.shape[0] * t.shape[1] ncols = 12 nrows = 1 + nplots // ncols #convert tensor to numpy image npimg = np.array(t.numpy(), np.float32) count = 0 fig = plt.figure(figsize=(ncols, nrows)) #looping through all the kernels in each channel for i in range(t.shape[0]): for j in range(t.shape[1]): count += 1 ax1 = fig.add_subplot(nrows, ncols, count) npimg = np.array(t[i, j].numpy(), np.float32) npimg = (npimg - np.mean(npimg)) / np.std(npimg) npimg = np.minimum(1, np.maximum(0, (npimg + 0.5))) ax1.imshow(npimg) ax1.set_title(str(i) + ',' + str(j)) ax1.axis('off') ax1.set_xticklabels([]) ax1.set_yticklabels([]) plt.tight_layout() plt.savefig(str(i) + 'weight_visualization.png') plt.show() for i in range(len(net.conv)): if i == 0: plot_filters_single_channel(i, net.conv[i].weight.data) validation_dataset = FaceDataset(33, 40, root_dir, W, H, CustomTransforms()) dataloader = create_dataloader(validation_dataset, 1) with torch.no_grad(): for i, (image, label) in enumerate(dataloader): prediction = net(image) output = loss(prediction, label.type(torch.float32).view(-1, 116)) print("LOSS FOR IMAGE IS: " + str(output)) prediction = prediction.view(-1, 58, 2) plt.imshow(image[0][0], cmap='gray') plt.scatter(prediction[0, :, 0] * W, prediction[0, :, 1] * H, s=10, marker='o', c='r') plt.scatter(label[0, :, 0] * W, label[0, :, 1] * H, marker='o', color='green') plt.savefig('results/prediction_' + str(i) + '_' + str(epochs)) plt.show()
log.info('feature extractor train mode') extractor = Extractor("neural extractor", args) if cuda: extractor.cuda() extractor.batch_train(cuda) elif args.phase == "inference_imitator": log.info("inference imitator") imitator = Imitator("neural imitator", args, clean=False) if cuda: imitator.cuda() imitator.load_checkpoint(args.imitator_model, True, cuda=cuda) elif args.phase == "prev_imitator": log.info("preview imitator") imitator = Imitator("neural imitator", args, clean=False) imitator.load_checkpoint(args.imitator_model, False, cuda=False) dataset = FaceDataset(args) name, param, img = dataset.get_picture() param = np.array(param, dtype=np.float32) b_param = param[np.newaxis, :] log.info(b_param.shape) t_param = torch.from_numpy(b_param) output = imitator(t_param) output = output.cpu().detach().numpy() output = np.squeeze(output, axis=0) output = output.swapaxes(0, 2) * 255 cv2.imwrite('./output/{0}.jpg'.format(name), output) elif args.phase == "inference_extractor": log.info("inference extractor") extractor = Extractor("neural extractor", args) if cuda: extractor.cuda()
netD.apply(weights_init) netG.apply(weights_init) criterion = nn.BCELoss() fixed_noise = torch.randn(64, n_vector, 1, 1, device=device) real_label = 1 fake_label = 0 optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) facedataset = FaceDataset(dataset_dir=dataset_dir, transform=transforms.Compose([ transforms.Resize(64), transforms.CenterCrop(64), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) dataloader = DataLoader(facedataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) # Training loop img_list = [] G_losses = [] D_losses = [] iters = 0 print("Starting Training Loop...") for epoch in range(num_epochs):