Example #1
0
def pt_1():
    epochs = 25
    training_width = 80
    training_height = 60

    face_dataset = FaceDataset(1, 32, root_dir, training_width,
                               training_height)
    training = create_dataloader(face_dataset, 5)

    validation_dataset = FaceDataset(33, 40, root_dir, training_width,
                                     training_height)
    validation = create_dataloader(validation_dataset, 1)

    #test_dataloader(dataloader, training_width, training_height)

    net = Net()
    loss = nn.MSELoss()
    opt = Adam(net.parameters(), lr=0.001)

    training_losses = []
    validation_losses = []
    for epoch in range(epochs):
        epoch_loss = torch.zeros((1, 1))
        for i, (images, labels) in enumerate(training):
            prediction = net(images)
            output = loss(prediction, labels[:, -6])
            epoch_loss += output
            output.backward()
            opt.step()
            opt.zero_grad()
        epoch_loss = epoch_loss / len(face_dataset)
        training_losses.append([epoch, epoch_loss.item() * 100])

        epoch_loss = torch.zeros((1, 1))
        for i, (images, labels) in enumerate(validation):
            prediction = net(images)
            output = loss(prediction, labels[:, -6])
            epoch_loss += output
            opt.zero_grad()
        epoch_loss = epoch_loss / len(face_dataset)
        validation_losses.append([epoch, epoch_loss.item() * 100])

    training_losses = np.array(training_losses)
    validation_losses = np.array(validation_losses)

    plt.plot(training_losses[:, 0], training_losses[:, 1])
    plt.plot(validation_losses[:, 0], validation_losses[:, 1])
    plt.plot()
    plt.savefig('results/pt_1/epoch_loss_decrease.png')
    plt.show()
Example #2
0
def main():
    args = get_args()

    if args.opts:
        cfg.merge_from_list(args.opts)

    cfg.freeze()

    # create model
    print("=> creating model '{}'".format(cfg.MODEL.ARCH))
    model = get_model(model_name=cfg.MODEL.ARCH, pretrained=None)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # load checkpoint
    resume_path = args.resume

    if Path(resume_path).is_file():
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(resume_path))
    else:
        raise ValueError("=> no checkpoint found at '{}'".format(resume_path))

    if device == "cuda":
        cudnn.benchmark = True

    test_dataset = FaceDataset(args.data_dir, "test", img_size=cfg.MODEL.IMG_SIZE, augment=False)
    test_loader = DataLoader(test_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False,
                             num_workers=cfg.TRAIN.WORKERS, drop_last=False)

    print("=> start testing")
    _, _, test_mae = validate(test_loader, model, None, 0, device)
    print(f"test mae: {test_mae:.3f}")
Example #3
0
 def evaluate(self):
     """
     评估准确率
     :return: accuracy rate
     """
     self.model.eval()
     dataset = FaceDataset(self.args, mode="test")
     steps = 100
     accuracy = 0.0
     for step in range(steps):
         log.info("step: %d", step)
         names, params, images = dataset.get_batch(batch_size=self.args.batch_size, edge=False)
         loss, _ = self.itr_train(images)
         accuracy += 1.0 - loss
     accuracy = accuracy / steps
     log.info("accuracy rate is %f", accuracy)
     return accuracy
Example #4
0
def main():
    args = get_args()

    model = get_model()

    # precompute validation Features
    valid_dataset = FaceDataset(args.data_dir,
                                "valid",
                                img_size=cfg.MODEL.IMG_SIZE,
                                augment=False)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=True,
                              num_workers=cfg.TRAIN.WORKERS,
                              drop_last=True)

    valid_features, valid_labels = preconvfeat(valid_loader, model)

    with open('valid_features.pkl', 'wb') as f:
        pickle.dump(valid_features, f)

    with open('valid_labels.pkl', 'wb') as f:
        pickle.dump(valid_labels, f)

    # precompute training Features
    train_dataset = FaceDataset(args.data_dir,
                                "train",
                                img_size=cfg.MODEL.IMG_SIZE,
                                augment=True,
                                age_stddev=cfg.TRAIN.AGE_STDDEV)
    train_loader = DataLoader(train_dataset,
                              batch_size=1,
                              shuffle=True,
                              num_workers=cfg.TRAIN.WORKERS,
                              drop_last=True)

    train_features, train_labels = preconvfeat(train_loader, model)

    with open('train_features.pkl', 'wb') as f:
        pickle.dump(train_features, f)
    with open('train_labels.pkl', 'wb') as f:
        pickle.dump(train_labels, f)
Example #5
0
    def batch_train(self, cuda=False):
        """
        batch training
        :param cuda: 是否开启gpu加速运算
        """
        rnd_input = torch.randn(self.args.batch_size, self.args.params_cnt)
        if cuda:
            rnd_input = rnd_input.cuda()
        self.writer.add_graph(self, input_to_model=rnd_input)

        self.model.train()
        dataset = FaceDataset(self.args, mode="train")
        initial_step = self.initial_step
        total_steps = self.args.total_steps
        progress = tqdm(range(initial_step, total_steps + 1),
                        initial=initial_step,
                        total=total_steps)
        for step in progress:
            names, params, images = dataset.get_batch(
                batch_size=self.args.batch_size, edge=False)
            if cuda:
                params = params.cuda()
                images = images.cuda()

            loss, y_ = self.itr_train(params, images)
            loss_ = loss.cpu().detach().numpy()
            progress.set_description("loss: {:.3f}".format(loss_))
            self.writer.add_scalar('imitator/loss', loss_, step)

            if (step + 1) % self.args.prev_freq == 0:
                path = "{1}/imit_{0}.jpg".format(step + 1, self.prev_path)
                self.capture(path, images, y_, self.args.parsing_checkpoint,
                             cuda)
                x = step / float(total_steps)
                lr = self.args.learning_rate * (x**2 - 2 * x + 1) + 2e-3
                utils.update_optimizer_lr(self.optimizer, lr)
                self.writer.add_scalar('imitator/learning rate', lr, step)
                self.upload_weights(step)
            if (step + 1) % self.args.save_freq == 0:
                self.save(step)
        self.writer.close()
def main():
    args = get_args()

    if args.opts:
        cfg.merge_from_list(args.opts)

    cfg.freeze()

    # create model
    print("=> creating model '{}'".format(cfg.MODEL.ARCH))
    model = get_model(model_name=cfg.MODEL.ARCH, pretrained=None)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # TODO: delete
    if torch.cuda.device_count() > 1:
        print("Let's use [1,2,4,5] GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model, device_ids=[1, 2, 4, 5])
    model.to(device)

    # load checkpoint
    resume_path = args.resume

    if Path(resume_path).is_file():
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(resume_path))
    else:
        raise ValueError("=> no checkpoint found at '{}'".format(resume_path))

    if device == "cuda":
        cudnn.benchmark = True

    test_dataset = FaceDataset(args.data_dir,
                               "test",
                               img_size=cfg.MODEL.IMG_SIZE,
                               augment=False)
    test_loader = DataLoader(test_dataset,
                             batch_size=cfg.TEST.BATCH_SIZE,
                             shuffle=False,
                             num_workers=cfg.TRAIN.WORKERS,
                             drop_last=False)
    criterion = nn.CrossEntropyLoss().to(device)

    print("=> start testing")
    _, _, test_mae, gen_acc = validate(test_loader, model, criterion, 0,
                                       device)
    print(f"Test age mae: {test_mae:.3f}")
    print(f"Test gender accuracy: {gen_acc:.2f}")
Example #7
0
    def batch_train(self, cuda):
        log.info("feature extractor train")
        initial_step = self.initial_step
        total_steps = self.args.total_extractor_steps
        self.training = True
        self.dataset = FaceDataset(self.args, mode="train")

        rnd_input = torch.randn(self.args.batch_size, 1, 64, 64)
        if cuda:
            rnd_input = rnd_input.cuda()
        self.writer.add_graph(self, input_to_model=rnd_input)

        progress = tqdm(range(initial_step, total_steps + 1), initial=initial_step, total=total_steps)
        for step in progress:
            if self.train_mode == Extractor.TRAIN_SYNC:
                progress.set_description("sync  mode ")
                names, _, images = self.dataset.get_batch(batch_size=self.args.batch_size, edge=True)
                if cuda:
                    images = images.cuda()
                self.sync_train(images, names, step)
            else:
                image1, image2, name = self.dataset.get_cache(cuda)
                if image1 is None or image2 is None:
                    self.change_mode(Extractor.TRAIN_SYNC)
                    continue
                loss = self.asyn_train(image1, image2)
                loss_ = loss.detach().numpy()
                loss_display = loss_ * 1000
                progress.set_description("loss: {:.3f}".format(loss_display))
                self.writer.add_scalar('extractor/loss', loss_display, step)
                if step % self.args.extractor_prev_freq == 0:
                    self.capture(image1, image2, name, step, cuda)
                    lr = self.args.extractor_learning_rate * loss_display
                    self.writer.add_scalar('extractor/learning rate', lr, step)
                    utils.update_optimizer_lr(self.optimizer, lr)
            if step % self.args.extractor_save_freq == 0:
                self.save(step)
        self.writer.close()
Example #8
0
    'ngf': 512,
    'ndf': 16,
    'nzf': 100,
    'optimizer': 'sgd',
    'lr_D': 0.01,
    'lr_G': 0.01,
    'momentum': 0.9,
    'nestrov': False,
    'lr_schedule': None,
    'epochs': 100
    }
image_size = config['image_size']
batch_size = config['batch_size']

data_path = '/home/scott/Desktop/dataset/face3k/train.txt'
train_set = FaceDataset(data_path, 128, augment=True, cache_image=False)
train_loader = DataLoader(train_set,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=8,
                          pin_memory=True)

netG = Generator(input_size=(batch_size, 100, 1, 1),
                 image_size=image_size,
                 ngf=config['ngf'],
                 leaky_relu=True)
netD = Discriminator(image_size=config['image_size'],
                     ndf=config['ndf'],
                     leaky_relu=True)
'''
load trained weight
Example #9
0
from torch.utils.data import DataLoader
from util.config import DATASET_PARAMETERS, NETWORKS_PARAMETERS
from util.parse_dataset import csv_to_list
from network import restore_train, get_network
from utils import Meter, cycle, save_model, get_collate_fn, Logger
from dataset import VoiceDataset, FaceDataset

# dataset and dataloader
print('Parsing your dataset...')
voice_list, face_list, id_class_num, emotion_class_num = csv_to_list(
    DATASET_PARAMETERS)
print('voice samples num = %d, face samples num = %d' %
      (len(voice_list), len(face_list)))
print('Preparing the datasets...')
voice_dataset = VoiceDataset(voice_list, DATASET_PARAMETERS['nframe_range'])
face_dataset = FaceDataset(face_list)

print('Preparing the dataloaders...')
collate_fn = get_collate_fn(DATASET_PARAMETERS['nframe_range'])
voice_loader = DataLoader(
    voice_dataset,
    shuffle=True,
    drop_last=True,
    batch_size=DATASET_PARAMETERS['batch_size'],
    num_workers=DATASET_PARAMETERS['workers_num'],  # 使用多进程加载的进程数
    collate_fn=collate_fn)  # 如何将多个样本数据拼接成一个batch
face_loader = DataLoader(face_dataset,
                         shuffle=True,
                         drop_last=True,
                         batch_size=DATASET_PARAMETERS['batch_size'],
                         num_workers=DATASET_PARAMETERS['workers_num'])
Example #10
0
def run():
    opt = Config()

    if opt.display:
        visualizer = Visualizer()

    # device = torch.device("cuda")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_dataset = FaceDataset(opt.train_root,
                                opt.train_list,
                                phase='train',
                                input_shape=opt.input_shape)
    trainloader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=opt.train_batch_size,
                                              shuffle=True,
                                              num_workers=opt.num_workers)
    print('{} train iters per epoch:'.format(len(trainloader)))

    # Focal Loss, 解决类别不均衡问题,减少易分类样本的权重,使得模型在训练时更专注于难分类的样本
    # https://blog.csdn.net/u014380165/article/details/77019084
    #

    #定义损失函数
    if opt.loss == 'focal_loss':
        criterion = FocalLoss(gamma=2)  #
    else:
        criterion = torch.nn.CrossEntropyLoss()

    #定义模型
    if opt.backbone == 'resnet18':
        model = resnet_face18(use_se=opt.use_se)
    elif opt.backbone == 'resnet34':
        model = resnet34()
    elif opt.backbone == 'resnet50':
        model = resnet50()

    #全连接层?
    if opt.metric == 'add_margin':
        metric_fc = AddMarginProduct(512, opt.num_classes, s=30, m=0.35)
    elif opt.metric == 'arc_margin':
        metric_fc = ArcMarginProduct(512,
                                     opt.num_classes,
                                     s=30,
                                     m=0.5,
                                     easy_margin=opt.easy_margin)
    elif opt.metric == 'sphere':
        metric_fc = SphereProduct(512, opt.num_classes, m=4)
    else:
        metric_fc = nn.Linear(512, opt.num_classes)

    # view_model(model, opt.input_shape)
    print(model)
    model.to(device)
    model = DataParallel(model)
    metric_fc.to(device)
    metric_fc = DataParallel(metric_fc)

    #定义优化算法
    if opt.optimizer == 'sgd':
        optimizer = torch.optim.SGD([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                    lr=opt.lr,
                                    weight_decay=opt.weight_decay)
    else:
        optimizer = torch.optim.Adam([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                     lr=opt.lr,
                                     weight_decay=opt.weight_decay)

    # https://www.programcreek.com/python/example/98143/torch.optim.lr_scheduler.StepLR
    # ? 每过{lr_step}个epoch训练,学习率就乘gamma
    scheduler = StepLR(optimizer, step_size=opt.lr_step, gamma=0.1)

    start = time.time()
    for i in range(opt.max_epoch):
        scheduler.step()

        model.train()  # train模式,eval模式
        for ii, data in enumerate(trainloader):
            data_input, label = data
            data_input = data_input.to(device)
            label = label.to(device).long()

            feature = model(data_input)
            output = metric_fc(feature,
                               label)  # 全连接层? 将原本用于输出分类的层,改成输出512维向量?似乎不是?
            loss = criterion(output, label)  # criterion:做出判断的依据

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            iters = i * len(trainloader) + ii

            if iters % opt.print_freq == 0:
                output = output.data.cpu().numpy()
                output = np.argmax(output,
                                   axis=1)  #最大值所在的索引? index <-> one-hot相互转换
                label = label.data.cpu().numpy()
                # print(output)
                # print(label)
                acc = np.mean((output == label).astype(int))
                speed = opt.print_freq / (time.time() - start)
                time_str = time.asctime(time.localtime(time.time()))
                print('{} train epoch {} iter {} {} iters/s loss {} acc {}'.
                      format(time_str, i, ii, speed, loss.item(), acc))
                if opt.display:
                    visualizer.display_current_results(iters,
                                                       loss.item(),
                                                       name='train_loss')
                    visualizer.display_current_results(iters,
                                                       acc,
                                                       name='train_acc')

                start = time.time()

        if i % opt.save_interval == 0 or i == opt.max_epoch:
            save_model(model, opt.checkpoints_path, opt.backbone, i)

        # train结束,模型设置为eval模式
        model.eval()

        #测试?
        identity_list = get_lfw_list(opt.lfw_test_list)
        img_paths = [
            os.path.join(opt.lfw_root, each) for each in identity_list
        ]
        acc = lfw_test(model, img_paths, identity_list, opt.lfw_test_list,
                       opt.test_batch_size)

        if opt.display:
            visualizer.display_current_results(iters, acc, name='test_acc')
Example #11
0
def main():
    global args, config, best_loss
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for k, v in config['common'].items():
        setattr(args, k, v)
    config = EasyDict(config['common'])

    rank, world_size, device_id = dist_init(
        os.path.join(args.distributed_path, config.distributed_file))

    args.save_path_dated = args.save_path + '/' + args.datetime
    if args.run_tag != '':
        args.save_path_dated += '-' + args.run_tag

    # create model
    model = model_entry(config.model)
    model.cuda()

    model = nn.parallel.DistributedDataParallel(model, device_ids=[device_id])

    # create optimizer
    opt_config = config.optimizer
    opt_config.kwargs.lr = config.lr_scheduler.base_lr
    opt_config.kwargs.params = model.parameters()

    optimizer = optim_entry(opt_config)

    # optionally resume from a checkpoint
    last_iter = -1
    best_loss = 1e9
    if args.load_path:
        if args.recover:
            best_loss, last_iter = load_state(args.load_path,
                                              model,
                                              optimizer=optimizer)
        else:
            load_state(args.load_path, model)

    cudnn.benchmark = True

    # train augmentation
    if config.augmentation.get('imgnet_mean', False):
        model_mean = (0.485, 0.456, 0.406)
        model_std = (0.229, 0.224, 0.225)
    else:
        model_mean = (0.5, 0.5, 0.5)
        model_std = (0.5, 0.5, 0.5)
    trans = albumentations.Compose([
        RandomResizedCrop(config.augmentation.input_size,
                          config.augmentation.input_size,
                          scale=(config.augmentation.min_scale**2., 1.),
                          ratio=(1., 1.)),
        HorizontalFlip(p=0.5),
        RandomBrightnessContrast(brightness_limit=0.25,
                                 contrast_limit=0.1,
                                 p=0.5),
        JpegCompression(p=.2, quality_lower=50),
        MotionBlur(p=0.5),
        Normalize(mean=model_mean, std=model_std),
        ToTensorV2()
    ])

    train_dataset = FaceDataset(config.train_root,
                                config.train_source,
                                transform=trans,
                                resize=config.augmentation.input_size,
                                image_format=config.get('image_format', None),
                                random_frame=config.get(
                                    'train_random_frame', False),
                                bgr=config.augmentation.get('bgr', False))

    train_sampler = DistributedGivenIterationSampler(
        train_dataset,
        config.lr_scheduler.max_iter,
        config.batch_size,
        last_iter=last_iter)
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=False,
                              num_workers=config.workers,
                              pin_memory=True,
                              sampler=train_sampler)

    # validation augmentation
    trans = albumentations.Compose([
        Resize(config.augmentation.input_size, config.augmentation.input_size),
        Normalize(mean=model_mean, std=model_std),
        ToTensorV2()
    ])
    val_multi_loader = []
    if args.val_source != '':
        for dataset_idx in range(len(args.val_source)):
            val_dataset = FaceDataset(
                args.val_root[dataset_idx],
                args.val_source[dataset_idx],
                transform=trans,
                output_index=True,
                resize=config.augmentation.input_size,
                image_format=config.get('image_format', None),
                bgr=config.augmentation.get('bgr', False))
            val_sampler = DistributedSampler(val_dataset, round_up=False)
            val_loader = DataLoader(val_dataset,
                                    batch_size=config.batch_size,
                                    shuffle=False,
                                    num_workers=config.workers,
                                    pin_memory=True,
                                    sampler=val_sampler)
            val_multi_loader.append(val_loader)

    config.lr_scheduler['optimizer'] = optimizer
    config.lr_scheduler['last_iter'] = last_iter
    lr_scheduler = get_scheduler(config.lr_scheduler)

    if rank == 0:
        mkdir(args.save_path)

        mkdir(args.save_path_dated)
        tb_logger = SummaryWriter(args.save_path_dated)

        logger = create_logger('global_logger',
                               args.save_path_dated + '-log.txt')
        logger.info('{}'.format(args))
        logger.info(model)
        logger.info(parameters_string(model))
        logger.info('len(train dataset) = %d' % len(train_loader.dataset))
        for dataset_idx in range(len(val_multi_loader)):
            logger.info(
                'len(val%d dataset) = %d' %
                (dataset_idx, len(val_multi_loader[dataset_idx].dataset)))

        mkdir(args.save_path_dated + '/saves')
    else:
        tb_logger = None

    positive_weight = config.get('positive_weight', 0.5)
    weight = torch.tensor([1. - positive_weight, positive_weight]) * 2.
    if rank == 0:
        logger.info('using class weights: {}'.format(weight.tolist()))

    criterion = nn.CrossEntropyLoss(weight=weight).cuda()

    if args.evaluate:
        if args.evaluate_path:
            all_ckpt = get_all_checkpoint(args.evaluate_path, args.range_list,
                                          rank)

            for ckpt in all_ckpt:
                if rank == 0:
                    logger.info('Testing ckpt: ' + ckpt)
                last_iter = -1
                _, last_iter = load_state(ckpt, model, optimizer=optimizer)
                for dataset_idx in range(len(val_multi_loader)):
                    validate(dataset_idx,
                             val_multi_loader[dataset_idx],
                             model,
                             criterion,
                             tb_logger,
                             curr_step=last_iter,
                             save_softmax=True)
        else:
            for dataset_idx in range(len(val_multi_loader)):
                validate(dataset_idx,
                         val_multi_loader[dataset_idx],
                         model,
                         criterion,
                         tb_logger,
                         curr_step=last_iter,
                         save_softmax=True)

        return

    train(train_loader, val_multi_loader, model, criterion, optimizer,
          lr_scheduler, last_iter + 1, tb_logger)
    return
def main():
    args = get_args()

    if args.opts:
        cfg.merge_from_list(args.opts)

    cfg.freeze()
    start_epoch = 0
    checkpoint_dir = Path(args.checkpoint)
    checkpoint_dir.mkdir(parents=True, exist_ok=True)

    # create model
    print("=> creating model '{}'".format(cfg.MODEL.ARCH))
    model = get_model(model_name=cfg.MODEL.ARCH)
    multitask(model)

    if cfg.TRAIN.OPT == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=cfg.TRAIN.LR,
                                    momentum=cfg.TRAIN.MOMENTUM,
                                    weight_decay=cfg.TRAIN.WEIGHT_DECAY)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=cfg.TRAIN.LR)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # optionally resume from a checkpoint
    resume_path = args.resume

    if resume_path:
        if Path(resume_path).is_file():
            print("=> loading checkpoint '{}'".format(resume_path))
            checkpoint = torch.load(resume_path, map_location="cpu")
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(resume_path, checkpoint['epoch']))
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(resume_path))

    if args.multi_gpu:
        model = nn.DataParallel(model)

    if device == "cuda":
        cudnn.benchmark = True
    
    get_ca = True if "megaage" in args.dataset.lower() else True # display cummulative acuracy 
    value_ca = True if "megaage" in args.dataset.lower() else False # use CA to update saved model
    if get_ca:
        print("Cummulative Accuracy will be calculated for", args.dataset)
    if value_ca:
        print("Cummulative Accuracy will be compared to update saved model")

    criterion = nn.CrossEntropyLoss().to(device)
    train_dataset = FaceDataset(args.data_dir, "train", args.dataset, img_size=cfg.MODEL.IMG_SIZE, augment=args.aug,
                                age_stddev=cfg.TRAIN.AGE_STDDEV, label=True, expand= args.expand, gender=True)
    train_loader = DataLoader(train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True,
                              num_workers=cfg.TRAIN.WORKERS, drop_last=False)

    val_dataset = FaceDataset(args.data_dir, "valid", args.dataset,
                              img_size=cfg.MODEL.IMG_SIZE, augment=False, label=True, expand= args.expand, gender=True)
    val_loader = DataLoader(val_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False,
                            num_workers=cfg.TRAIN.WORKERS, drop_last=False)

    scheduler = StepLR(optimizer, step_size=cfg.TRAIN.LR_DECAY_STEP, gamma=cfg.TRAIN.LR_DECAY_RATE,
                       last_epoch=start_epoch - 1)
    best_val_mae = 10000.0
    train_writer = None
    global_ca = {3: 0.0, 5: 0.0, 7: 0.0}
    train_count = len(train_dataset)
    val_count = len(val_dataset)

    all_train_loss = []
    all_train_accu = []
    all_val_loss = []
    all_val_accu = []

    # range(start_epoch, cfg.TRAIN.EPOCHS):
    for epoch in range(cfg.TRAIN.EPOCHS):
        # train
        train_loss, train_acc = train(
            train_loader, model, criterion, optimizer, epoch, device, train_count)

        # validate
        val_loss, val_acc, val_mae, new_ca= validate(
            val_loader, model, criterion, epoch, device, val_count, get_ca)

        all_train_loss.append(float(train_loss))
        all_train_accu.append(float(train_acc))
        all_val_loss.append(float(val_loss))
        all_val_accu.append(float(val_mae))

        # checkpoint
        if ((not value_ca) and (val_mae < best_val_mae)) or ((get_ca and value_ca) and (new_ca[3] > global_ca[3])):
            print(
                f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}")
            model_state_dict = model.module.state_dict(
            ) if args.multi_gpu else model.state_dict()
            torch.save(
                {
                    'epoch': epoch + 1,
                    'arch': cfg.MODEL.ARCH,
                    'state_dict': model_state_dict,
                    'optimizer_state_dict': optimizer.state_dict()
                },
                str(checkpoint_dir.joinpath("epoch{:03d}_{}_{:.5f}_{:.4f}_{}_{}_ldl.pth".format(
                    epoch, args.dataset, val_loss, val_mae, datetime.now().strftime("%Y%m%d"), cfg.MODEL.ARCH)))
            )
            best_val_mae = val_mae
            best_checkpoint = str(checkpoint_dir.joinpath("epoch{:03d}_{}_{:.5f}_{:.4f}_{}_{}_ldl.pth".format(epoch, args.dataset, val_loss, val_mae, datetime.now().strftime("%Y%m%d"), cfg.MODEL.ARCH)))
            if get_ca:
                global_ca = new_ca
        else:
            print(
                f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})")

        # adjust learning rate
        scheduler.step()

    print("=> training finished")
    print(f"additional opts: {args.opts}")
    print(f"best val mae: {best_val_mae:.3f}")
    if get_ca:
        print("CA3: {:.2f} CA5: {:.2f} CA7: {:.2f}".format(global_ca[3] * 100, global_ca[5]*100, global_ca[7]*100))
    print("best mae saved model:", best_checkpoint)

    x = np.arange(cfg.TRAIN.EPOCHS)
    plt.xlabel("Epoch")

    plt.ylabel("Train Loss")
    plt.plot(x, all_train_loss)
    plt.savefig("savefig/{}_{}_{}_train_loss.png".format(args.dataset,
                                                         cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d")))
    plt.clf()

    plt.ylabel("Train Accuracy")
    plt.plot(x, all_train_accu)
    plt.savefig("savefig/{}_{}_{}_train_accu.png".format(args.dataset,
                                                         cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d")))
    plt.clf()

    plt.ylabel("Validation Loss")
    plt.plot(x, all_val_loss)
    plt.savefig("savefig/{}_{}_{}_val_loss.png".format(args.dataset,
                                                       cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d")))
    plt.clf()

    plt.ylabel("Validation Accuracy")
    plt.plot(x, all_val_accu)
    plt.savefig("savefig/{}_{}_{}_val_mae.png".format(args.dataset,
                                                      cfg.MODEL.ARCH, datetime.now().strftime("%Y%m%d")))
Example #13
0
class Extractor(nn.Module):
    TRAIN_ASYN = 1
    TRAIN_SYNC = 2

    def __init__(self, name, args, imitator=None, momentum=0.5):
        """
        feature extractor
        :param name: model name
        :param args: argparse options
        :param imitator: imitate engine's behaviour
        :param momentum:  momentum for optimizer
        """
        super(Extractor, self).__init__()
        log.info("construct feature_extractor %s", name)
        self.name = name
        self.imitator = imitator
        self.initial_step = 0
        self.args = args
        self.model_path = "./output/extractor"
        self.prev_path = "./output/preview"
        self.training = False
        self.params_cnt = self.args.params_cnt
        self.dataset = None
        self.train_mode = Extractor.TRAIN_SYNC
        self.train_refer = 32
        self.net = Net(args.udp_port, args)
        self.clean()
        self.writer = SummaryWriter(comment="feature extractor", log_dir=args.path_tensor_log)
        self.model = nn.Sequential(
            nn.Conv2d(1, 4, kernel_size=7, stride=2, padding=3),  # 1. (batch, 4, 32, 32)
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),  # 2. (batch, 4, 16, 16)
            group(4, 8, kernel_size=3, stride=1, padding=1),  # 3. (batch, 8, 16, 16)
            ResidualBlock.make_layer(8, channels=8),  # 4. (batch, 8, 16, 16)
            group(8, 16, kernel_size=3, stride=1, padding=1),  # 5. (batch, 16, 16, 16)
            ResidualBlock.make_layer(8, channels=16),  # 6. (batch, 16, 16, 16)
            group(16, 64, kernel_size=3, stride=1, padding=1),  # 7. (batch, 64, 16, 16)
            ResidualBlock.make_layer(8, channels=64),  # 8. (batch, 64, 16, 16)
            group(64, self.params_cnt, kernel_size=3, stride=1, padding=1),  # 9. (batch, params_cnt, 16, 16)
            ResidualBlock.make_layer(4, channels=self.params_cnt),  # 10. (batch, params_cnt, 16, 16)
            nn.Dropout(0.5),
        )
        self.fc = nn.Linear(self.params_cnt * 16 * 16, self.params_cnt)
        self.optimizer = optim.Adam(self.parameters(), lr=args.extractor_learning_rate)
        utils.debug_parameters(self, "_extractor_")

    def forward(self, input):
        output = self.model(input)
        output = output.view(output.size(0), -1)
        output = self.fc(output)
        output = F.dropout(output, training=self.training)
        output = torch.sigmoid(output)
        return output

    def itr_train(self, image):
        """
        第一种方法 这里train的方式使用的是imitator (同步)
        :param image: [batch, 3, 512, 512]
        :return: loss scalar
        """
        self.optimizer.zero_grad()
        param_ = self.forward(image)
        img_ = self.imitator.forward(param_)
        loss = utils.content_loss(image, img_)
        loss.backward()
        self.optimizer.step()
        return loss, param_

    def sync_train(self, image, name, step):
        """
        第二种方法是 通过net把params发生引擎生成image (异步)
        (这种方法需要保证同步,但效果肯定比imitator效果好)
        :param step: train step
        :param name: 图片名 [batch]
        :param image: [batch, 1, 64, 64]
        """
        self.train_refer = self.train_refer - 1
        if self.train_refer <= 0:
            self.change_mode(Extractor.TRAIN_ASYN)
        param_ = self.forward(image)
        self.net.send_params(param_, name, step)

    def asyn_train(self, image1, image2):
        """
        cache 中累计一定量的时候就可以asyn train
        :param image1: input image
        :param image2: generate image
        :return: loss, type scalar
        """
        self.train_refer = self.train_refer - 1
        if self.train_refer <= 0:
            self.change_mode(Extractor.TRAIN_SYNC)

        self.optimizer.zero_grad()
        loss = F.mse_loss(image1, image2)
        loss.backward()
        self.optimizer.step()
        return loss

    def change_mode(self, mode):
        """
        切换train mode 并恢复计数
        :param mode: train mode
        """
        self.train_refer = 32
        if mode == Extractor.TRAIN_ASYN:
            self.train_refer = 36
        self.train_mode = mode

    def batch_train(self, cuda):
        log.info("feature extractor train")
        initial_step = self.initial_step
        total_steps = self.args.total_extractor_steps
        self.training = True
        self.dataset = FaceDataset(self.args, mode="train")

        rnd_input = torch.randn(self.args.batch_size, 1, 64, 64)
        if cuda:
            rnd_input = rnd_input.cuda()
        self.writer.add_graph(self, input_to_model=rnd_input)

        progress = tqdm(range(initial_step, total_steps + 1), initial=initial_step, total=total_steps)
        for step in progress:
            if self.train_mode == Extractor.TRAIN_SYNC:
                progress.set_description("sync  mode ")
                names, _, images = self.dataset.get_batch(batch_size=self.args.batch_size, edge=True)
                if cuda:
                    images = images.cuda()
                self.sync_train(images, names, step)
            else:
                image1, image2, name = self.dataset.get_cache(cuda)
                if image1 is None or image2 is None:
                    self.change_mode(Extractor.TRAIN_SYNC)
                    continue
                loss = self.asyn_train(image1, image2)
                loss_ = loss.detach().numpy()
                loss_display = loss_ * 1000
                progress.set_description("loss: {:.3f}".format(loss_display))
                self.writer.add_scalar('extractor/loss', loss_display, step)
                if step % self.args.extractor_prev_freq == 0:
                    self.capture(image1, image2, name, step, cuda)
                    lr = self.args.extractor_learning_rate * loss_display
                    self.writer.add_scalar('extractor/learning rate', lr, step)
                    utils.update_optimizer_lr(self.optimizer, lr)
            if step % self.args.extractor_save_freq == 0:
                self.save(step)
        self.writer.close()

    def load_checkpoint(self, path, training=False, cuda=False):
        """
        从checkpoint 中恢复net
        :param path: checkpoint's path
        :param training: 恢复之后 是否接着train
        :param cuda: gpu speedup
        """
        path_ = self.args.path_to_inference + "/" + path
        if not os.path.exists(path_):
            raise NeuralException("not exist checkpoint of extractor with path " + path)
        if cuda:
            checkpoint = torch.load(path_)
        else:
            checkpoint = torch.load(path_, map_location='cpu')
        self.load_state_dict(checkpoint['net'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])
        self.initial_step = checkpoint['epoch']
        log.info("recovery imitator from %s", path)
        if training:
            self.batch_train(cuda)

    def clean(self):
        """
        清空前记得备份
        """
        ops.clear_folder(self.model_path)
        ops.clear_files(self.args.path_to_cache)
        ops.clear_files(self.args.path_tensor_log)
        ops.clear_files(self.prev_path)

    def save(self, step):
        """
        save checkpoint
        :param step: train step
        """
        state = {'net': self.state_dict(), 'optimizer': self.optimizer.state_dict(), 'epoch': step}
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
        ext = "cuda" if self.cuda() else "cpu"
        torch.save(state, '{1}/extractor_{0}_{2}.pth'.format(step, self.model_path, ext))

    def inference(self, cp_name, photo_path, cuda):
        """
        feature extractor: 由图片生成捏脸参数
        :param cuda: gpu speed up
        :param cp_name: checkpoint's path
        :param photo_path: input photo's path
        :return: params [1, params_cnt]
        """
        img = cv2.imread(photo_path)
        scaled = align.align_face(img, size=(64, 64))
        self.load_checkpoint(cp_name, training=False, cuda=cuda)
        img = utils.faceparsing_ndarray(scaled, self.args.parsing_checkpoint, cuda)
        img = utils.img_edge(img)
        with torch.no_grad:
            input = torch.from_numpy(img)
            input = input.view([1, 1, 64, 64])
            params_ = self(input)
            log.info(params_)
            return params_

    def evaluate(self):
        """
        评估准确率
        :return: accuracy rate
        """
        self.model.eval()
        dataset = FaceDataset(self.args, mode="test")
        steps = 100
        accuracy = 0.0
        for step in range(steps):
            log.info("step: %d", step)
            names, params, images = dataset.get_batch(batch_size=self.args.batch_size, edge=False)
            loss, _ = self.itr_train(images)
            accuracy += 1.0 - loss
        accuracy = accuracy / steps
        log.info("accuracy rate is %f", accuracy)
        return accuracy

    def capture(self, tensor1, tensor2, name, step, cuda):
        """
        extractor 快照
        :param tensor1: input photo
        :param tensor2: generated image
        :param cuda: use gpu to speed up
        :param step: train step
        :param name: picture name
        """
        path = "{1}/{2}_{0}.jpg".format(step, self.prev_path, name[3:-6])
        orig_path = os.path.join(self.args.path_to_dataset + "2", name)
        img3 = cv2.imread(orig_path)
        img4 = utils.faceparsing_ndarray(img3, self.args.parsing_checkpoint, cuda)
        image1 = 255 - tensor1.cpu().detach().numpy() * 255
        image2 = 255 - tensor2.cpu().detach().numpy() * 255
        shape = image1.shape
        if len(shape) == 2:
            image1 = image1[:, :, np.newaxis]
            image2 = image2[:, :, np.newaxis]
        img1 = ops.fill_gray(image1)
        img2 = ops.fill_gray(image2)
        img = ops.merge_4image(img1, img2, img3, img4)
        cv2.imwrite(path, img)
Example #14
0
print("Number of train images :", len(tr_img_paths))
print("Number of test images :", len(te_img_paths))
print("-" * 30)

# exit()
"""     setup data         """
# TODO: リサイズのサイズ策定
# Normalizeは?
transforms = Compose([
    Resize((224, 224)),  # for vgg16
    ToTensor(),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

tr_dataset = FaceDataset(tr_img_paths,
                         tr_raw_labels,
                         embedding_dict,
                         transform=transforms)
te_dataset = FaceDataset(te_img_paths,
                         te_raw_labels,
                         None,
                         transform=transforms)
tr_loader = DataLoader(tr_dataset, batch_size=batch_size, shuffle=True)
te_loader = DataLoader(te_dataset, batch_size=batch_size, shuffle=True)
"""     setup model         """
# NOTE: 特徴抽出層は完全に凍結してるが、学習する内容的に学習し直した方がいい
#       人物分類で事前学習したほうがよいかもしれない
model = my_vgg16_bn(out_features=n_classes)
if weight:
    model.load_state_dict(torch.load(weight))
# HACK: featuresの上にmodelという階層ができてしまっているので、モデルクラス内での学習済みモデルの利用を改良したい
#       CNNのrequires_gradをTrueにするとメモリーが枯渇する
Example #15
0
if args.l is not None:
    LOAD_MODEL = True
    LOAD_MODEL_NAME = args.l

detecteur_image = False
if args.d is not None:
    IMAGE_PATH = (args.d == 1)
    detecteur_image = True
if args.c is not None:
    CONFIDENCE = (args.c == 1)

transform = tf.Compose([tf.RandomHorizontalFlip(), tf.RandomVerticalFlip(), tf.RandomRotation(90), tf.ColorJitter(brightness=0.5, contrast=0.75, saturation=0, hue=0), tf.ToTensor(), tf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform_test = tf.Compose([tf.ToTensor(), tf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = FaceDataset(TRAIN_DATA, CLASSIFIED_TRAIN_DATA_55000, transform=transform)
validset = FaceDataset(TRAIN_DATA, CLASSIFIED_VALID_DATA_36000, transform=transform)
trainloader = {'train': torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=WORKERS),
                'val': torch.utils.data.DataLoader(validset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=WORKERS)}

testset = TestDataset(TEST_DATA, CLASSIFIED_TEST_DATA, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=WORKERS)

ttset = FaceDataset(TRAIN_DATA, CLASSIFIED_TRAIN_DATA, transform=transform_test)
ttloader = torch.utils.data.DataLoader(ttset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=WORKERS)

Example #16
0
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(resume_path, checkpoint['epoch']))
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(resume_path))

    if args.multi_gpu:
        print(args.multi_gpu)
        model = nn.DataParallel(model)

    if device == "cuda":
        cudnn.benchmark = True

    criterion = homosedastic_case if cfg.MODEL.HOMOSCEDASTIC else aleatoric_loss
    train_dataset = FaceDataset(args.data_dir, "train", img_size=cfg.MODEL.IMG_SIZE, augment=True,
                                age_stddev=cfg.TRAIN.AGE_STDDEV)
    train_loader = DataLoader(train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE, shuffle=True,
                              num_workers=cfg.TRAIN.WORKERS, drop_last=True)

    val_dataset = FaceDataset(args.data_dir, "valid", img_size=cfg.MODEL.IMG_SIZE, augment=False)
    val_loader = DataLoader(val_dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False,
                            num_workers=cfg.TRAIN.WORKERS, drop_last=False)
    scheduler = StepLR(optimizer, step_size=cfg.TRAIN.LR_DECAY_STEP, gamma=cfg.TRAIN.LR_DECAY_RATE,
                       last_epoch=start_epoch - 1)
    best_val_mae = 10000.0
    train_writer = None
    if args.tensorboard is not None:
        opts_prefix = "_".join(args.opts)
        train_writer = SummaryWriter(log_dir=args.tensorboard + "/" + opts_prefix + "_train")
        val_writer = SummaryWriter(log_dir=args.tensorboard + "/" + opts_prefix + "_val")
Example #17
0
def main():
    args = get_args()

    if args.opts:
        cfg.merge_from_list(args.opts)

    cfg.freeze()
    start_epoch = 0
    checkpoint_dir = Path(args.checkpoint)
    checkpoint_dir.mkdir(parents=True, exist_ok=True)

    # display nb of workers
    print(f"number of train workers {cfg.TRAIN.WORKERS}")

    # create model
    print("=> creating model '{}'".format(cfg.MODEL.ARCH))
    model = get_model(model_name=cfg.MODEL.ARCH)

    if cfg.TRAIN.OPT == "sgd":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=cfg.TRAIN.LR,
                                    momentum=cfg.TRAIN.MOMENTUM,
                                    weight_decay=cfg.TRAIN.WEIGHT_DECAY)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=cfg.TRAIN.LR)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)

    # optionally resume from a checkpoint
    resume_path = args.resume

    if resume_path:
        if Path(resume_path).is_file():
            print("=> loading checkpoint '{}'".format(resume_path))
            checkpoint = torch.load(resume_path, map_location="cpu")
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                resume_path, checkpoint['epoch']))
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(resume_path))

    if args.multi_gpu:
        model = nn.DataParallel(model)

    if device == "cuda":
        cudnn.benchmark = True

    criterion = nn.CrossEntropyLoss().to(device)
    train_dataset = FaceDataset(args.data_dir,
                                "train",
                                img_size=cfg.MODEL.IMG_SIZE,
                                augment=True,
                                age_stddev=cfg.TRAIN.AGE_STDDEV)
    train_loader = DataLoader(train_dataset,
                              batch_size=cfg.TRAIN.BATCH_SIZE,
                              shuffle=True,
                              num_workers=cfg.TRAIN.WORKERS,
                              drop_last=True)

    val_dataset = FaceDataset(args.data_dir,
                              "valid",
                              img_size=cfg.MODEL.IMG_SIZE,
                              augment=False)
    val_loader = DataLoader(val_dataset,
                            batch_size=cfg.TEST.BATCH_SIZE,
                            shuffle=False,
                            num_workers=cfg.TRAIN.WORKERS,
                            drop_last=False)

    scheduler = StepLR(optimizer,
                       step_size=cfg.TRAIN.LR_DECAY_STEP,
                       gamma=cfg.TRAIN.LR_DECAY_RATE,
                       last_epoch=start_epoch - 1)
    best_val_mae = 10000.0
    train_writer = None

    if args.tensorboard is not None:
        opts_prefix = "_".join(args.opts)
        train_writer = SummaryWriter(log_dir=args.tensorboard + "/" +
                                     opts_prefix + "_train")
        val_writer = SummaryWriter(log_dir=args.tensorboard + "/" +
                                   opts_prefix + "_val")

    for epoch in range(start_epoch, cfg.TRAIN.EPOCHS):
        # train
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch, device)

        # validate
        val_loss, val_acc, val_mae = validate(val_loader, model, criterion,
                                              epoch, device)

        if args.tensorboard is not None:
            train_writer.add_scalar("loss", train_loss, epoch)
            train_writer.add_scalar("acc", train_acc, epoch)
            val_writer.add_scalar("loss", val_loss, epoch)
            val_writer.add_scalar("acc", val_acc, epoch)
            val_writer.add_scalar("mae", val_mae, epoch)

        # checkpoint
        if val_mae < best_val_mae:
            print(
                f"=> [epoch {epoch:03d}] best val mae was improved from {best_val_mae:.3f} to {val_mae:.3f}"
            )
            model_state_dict = model.module.state_dict(
            ) if args.multi_gpu else model.state_dict()
            torch.save(
                {
                    'epoch': epoch + 1,
                    'arch': cfg.MODEL.ARCH,
                    'state_dict': model_state_dict,
                    'optimizer_state_dict': optimizer.state_dict()
                },
                str(
                    checkpoint_dir.joinpath(
                        "epoch{:03d}_{:.5f}_{:.4f}.pth".format(
                            epoch, val_loss, val_mae))))
            best_val_mae = val_mae
        else:
            print(
                f"=> [epoch {epoch:03d}] best val mae was not improved from {best_val_mae:.3f} ({val_mae:.3f})"
            )

        # adjust learning rate
        scheduler.step()

    print("=> training finished")
    print(f"additional opts: {args.opts}")
    print(f"best val mae: {best_val_mae:.3f}")
Example #18
0
def train_style_transfer(args):
    if not (args.train_data and args.valid_data):
        print("must chose train_data and valid_data")
        sys.exit()

    # make dataset
    trans = transforms.ToTensor()
    train_dataset = FaceDataset(args.train_data, transform=trans)
    label_dict = train_dataset.get_label_dict()
    valid_dataset = FaceDataset(args.valid_data, transform=trans)
    valid_dataset.give_label_dict(label_dict)
    train_loader = data_utils.DataLoader(train_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=1)
    valid_loader = data_utils.DataLoader(valid_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=1)
    train_size = len(train_dataset)
    valid_size = len(valid_dataset)
    loaders = {"train": train_loader, "valid": valid_loader}
    dataset_sizes = {"train": train_size, "valid": valid_size}

    if args.gpu:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device("cpu")

    # make network
    if args.model_type == "VAE":
        net = Autoencoder(train_dataset.label_num()).to(device)
        optimizer = optim.Adam(net.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
        best_model_wts = net.state_dict()
        best_loss = 1e10
        if args.generator_model and os.path.exists(args.generator_model):
            net.load_state_dict(torch.load(args.generator_model))

    elif args.model_type == "VAEGAN":
        generator = Autoencoder(train_dataset.label_num()).to(device)
        discriminator = Discriminator().to(device)
        classifier = Classifier(train_dataset.label_num()).to(device)
        generator_optimizer = optim.Adam(generator.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)
        discriminator_optimizer = optim.Adam(discriminator.parameters(),
                                             lr=args.lr * 0.1,
                                             weight_decay=args.weight_decay)
        best_generator_wts = generator.state_dict()
        best_discriminator_wts = discriminator.state_dict()
        best_generator_loss = 1e10
        best_discriminator_loss = 1e10
        if args.generator_model and os.path.exists(args.generator_model):
            generator.load_state_dict(torch.load(args.generator_model))
        if args.discriminator_model and os.path.exists(
                args.discriminator_model):
            discriminator.load_state_dict(torch.load(args.discriminator_model))
        if args.classifier_model:
            classifier.load_state_dict(torch.load(args.classifier_model))
    # make loss function and optimizer
    criterion = nn.BCELoss(reduction="sum")
    classifier_criterion = nn.CrossEntropyLoss(reduction="sum")

    # initialize loss
    loss_history = {"train": [], "valid": []}

    # start training
    start_time = time.time()
    for epoch in range(args.epochs):
        print("epoch {}".format(epoch + 1))

        for phase in ["train", "valid"]:
            if phase == "train":
                if args.model_type == "VAE":
                    net.train(True)
                elif args.model_type == "VAEGAN":
                    generator.train(True)
                    discriminator.train(True)
            else:
                if args.model_type == "VAE":
                    net.train(False)
                elif args.model_type == "VAEGAN":
                    generator.train(False)
                    discriminator.train(False)

            # initialize running loss
            generator_running_loss = 0.0
            discriminator_running_loss = 0.0

            for i, data in enumerate(loaders[phase]):
                inputs, label = data

                # wrap the in valiables
                if phase == "train":
                    inputs = Variable(inputs).to(device)
                    label = Variable(label).to(device)
                    torch.set_grad_enabled(True)
                else:
                    inputs = Variable(inputs).to(device)
                    label = Variable(label).to(device)
                    torch.set_grad_enabled(False)

                # zero gradients
                if args.model_type == "VAE":
                    optimizer.zero_grad()
                    mu, var, outputs = net(inputs, label)
                    loss = loss_func(inputs, outputs, mu, var)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                    generator_running_loss += loss.item()

                elif args.model_type == "VAEGAN":
                    real_label = Variable(
                        torch.ones((inputs.size()[0], 1), dtype=torch.float) -
                        0.2 * (torch.rand(inputs.size()[0], 1))).to(device)
                    fake_label = Variable(
                        torch.zeros((inputs.size()[0], 1), dtype=torch.float) +
                        0.2 * (torch.rand(inputs.size()[0], 1))).to(device)
                    discriminator_optimizer.zero_grad()

                    real_pred = discriminator(inputs)
                    real_loss = criterion(real_pred, real_label)

                    random_index = np.random.randint(0,
                                                     train_dataset.label_num(),
                                                     inputs.size()[0])
                    generate_label = Variable(
                        torch.zeros_like(label)).to(device)
                    for i, index in enumerate(random_index):
                        generate_label[i][index] = 1
                    mu, var, outputs = generator(inputs, label)
                    fake_pred = discriminator(outputs.detach())
                    fake_loss = criterion(fake_pred, fake_label)

                    discriminator_loss = real_loss + fake_loss
                    if phase == "train":
                        discriminator_loss.backward()
                        discriminator_optimizer.step()

                    generator_optimizer.zero_grad()
                    #class_loss = classifier_criterion(classifier(outputs), torch.max(label, 1)[1])

                    dis_loss = criterion(discriminator(outputs), real_label)
                    gen_loss = loss_func(inputs, outputs, mu, var)
                    generator_loss = dis_loss + gen_loss
                    if phase == "train":
                        generator_loss.backward()
                        generator_optimizer.step()

                    discriminator_running_loss += discriminator_loss.item()
                    generator_running_loss += generator_loss.item()

            if args.model_type == "VAE":
                epoch_loss = generator_running_loss / dataset_sizes[
                    phase] * args.batch_size
                loss_history[phase].append(epoch_loss)

                print("{} loss {:.4f}".format(phase, epoch_loss))
                if phase == "valid" and epoch_loss < best_loss:
                    best_model_wts = net.state_dict()
                    best_loss = epoch_loss

            elif args.model_type == "VAEGAN":
                epoch_generator_loss = generator_running_loss / dataset_sizes[
                    phase] * args.batch_size
                epoch_discriminator_loss = discriminator_running_loss / dataset_sizes[
                    phase] * args.batch_size

                print("{} generator loss {:.4f}".format(
                    phase, epoch_generator_loss))
                print("{} discriminator loss {:.4f}".format(
                    phase, epoch_discriminator_loss))
                if phase == "valid" and epoch_generator_loss < best_generator_loss:
                    best_generator_wts = generator.state_dict()
                    best_generator_loss = epoch_generator_loss
                if phase == "valid" and epoch_discriminator_loss < best_discriminator_loss:
                    best_discriminator_wts = discriminator.state_dict()
                    best_generator_loss = epoch_discriminator_loss

    elapsed_time = time.time() - start_time
    print("training complete in {:.0f}s".format(elapsed_time))
    if args.model_type == "VAE":
        net.load_state_dict(best_model_wts)
        return net, label_dict

    elif args.model_type == "VAEGAN":
        generator.load_state_dict(best_generator_wts)
        discriminator.load_state_dict(best_discriminator_wts)
        return (generator, discriminator), label_dict
Example #19
0
def train_classifier(args):
    if not (args.train_data and args.valid_data):
        print("must chose train_data and valid_data")
        sys.exit()

    trans = transforms.ToTensor()
    train_dataset = FaceDataset(args.train_data, transform=trans)
    label_dict = train_dataset.get_label_dict()
    valid_dataset = FaceDataset(args.valid_data, transform=trans)
    valid_dataset.give_label_dict(label_dict)
    train_loader = data_utils.DataLoader(train_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=1)
    valid_loader = data_utils.DataLoader(valid_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=1)
    loaders = {"train": train_loader, "valid": valid_loader}
    dataset_sizes = {"train": len(train_dataset), "valid": len(valid_dataset)}

    if args.gpu:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device("cpu")

    classifier = Classifier(len(label_dict)).to(device).float()
    optimizer = optim.Adam(classifier.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    best_model_wts = classifier.state_dict()
    best_loss = 1e10
    if args.classifier_model and os.path.exists(args.classifier_model):
        classifier.load_state_dict(torch.load(args.classifier_model))
    criterion = nn.CrossEntropyLoss(reduction="sum")
    start_time = time.time()
    for epoch in range(args.epochs):
        print("epoch {}".format(epoch + 1))

        for phase in ["train", "valid"]:
            if phase == "train":
                classifier.train(True)
            else:
                classifier.train(False)

            running_loss = 0.0
            running_acc = 0
            for i, data in enumerate(loaders[phase]):
                inputs, label = data
                inputs = Variable(inputs).to(device)
                label = Variable(label).to(device)
                if phase == "train":
                    torch.set_grad_enabled(True)
                else:
                    torch.set_grad_enabled(False)

                optimizer.zero_grad()
                pred = classifier(inputs)
                reg_loss = 0
                for param in classifier.parameters():
                    reg_loss += (param * param).sum()

                loss = criterion(pred,
                                 torch.max(label,
                                           1)[1]) + 1e-9 * reg_loss * reg_loss
                if phase == "train":
                    loss.backward()
                    optimizer.step()
                running_loss += loss.item()
                running_acc += (torch.max(pred, 1)[1] == torch.max(
                    label, 1)[1]).sum().item()
            epoch_loss = running_loss / dataset_sizes[phase] * args.batch_size
            epoch_acc = running_acc / dataset_sizes[phase]
            print("{} loss {:.4f}".format(phase, epoch_loss))
            print("{} acc {:.6f}".format(phase, epoch_acc))
            if phase == "valid" and epoch_loss < best_loss:
                best_model_wts = classifier.state_dict()
                best_loss = epoch_loss

    elapsed_time = time.time() - start_time
    print("training_complete in {:.0f}".format(elapsed_time))
    classifier.load_state_dict(best_model_wts)
    return classifier, label_dict
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--device_ids', type=str, default='0')
    arg('--folds', type=str, help='fold', default='0,1,2,3,4,5,6,7,8,9')
    arg('--model',
        type=str,
        default='se_resnext101',
        choices=list(model_list.keys()))
    arg('--batch_size', type=int, default=BS)

    args = parser.parse_args()
    GPUs = [int(i) for i in args.device_ids.split(',')]
    folds_to_use = [int(i) for i in args.folds.split(',')]

    path_images = list(
        filter(lambda x: x.endswith('.jpg'), os.listdir('../data/test/')))

    unet_base_model = model_list[args.model]

    test_data_loader_normal = DataLoader(FaceDataset(
        '../data/test', path_images, transforms=test_norm_transforms),
                                         batch_size=args.batch_size,
                                         num_workers=10,
                                         shuffle=False)

    test_data_loader_flip = DataLoader(FaceDataset(
        '../data/test', path_images, transforms=test_flip_transforms),
                                       batch_size=args.batch_size,
                                       num_workers=10,
                                       shuffle=False)

    savedir_base = f'logits/{args.model}/'
    os.makedirs(savedir_base, exist_ok=True)

    for cur_fold_num in folds_to_use:
        search_dir = f'checkpoints/{args.model}_fold_{cur_fold_num}/'
        files_in_dir = os.listdir(search_dir)
        scores = [
            float('0.' + i.split('_')[1].split('.')[0]) for i in files_in_dir
        ]
        chckp_to_use = files_in_dir[np.argmin(scores)]
        chkp_pth = f'{search_dir}{chckp_to_use}'

        print('use checkpoint ', chkp_pth)
        savedir = f'{savedir_base}fold_{cur_fold_num}/'
        os.makedirs(savedir, exist_ok=True)

        unet = unet_base_model(pretrained=False)

        if torch.cuda.is_available():
            unet = unet.cuda()
            unet = nn.DataParallel(unet, GPUs)

        unet.load_state_dict(torch.load(chkp_pth)['model'])
        unet.eval()

        img_cntr = 0
        with torch.no_grad():
            for batch_n, batch_f in tqdm(zip(test_data_loader_normal,
                                             test_data_loader_flip),
                                         total=len(test_data_loader_normal)):
                inp_n = cuda(batch_n[0])
                inp_f = cuda(batch_f[0])

                output_n = unet.forward(inp_n)
                output_f = unet.forward(inp_f)

                for img_batch_index in range(output_n.shape[0]):
                    img_n = output_n[img_batch_index].cpu().numpy()[0]
                    img_f = output_f[img_batch_index].cpu().numpy()[0]
                    img_f = np.fliplr(img_f)

                    img_id = path_images[img_cntr].split('.')[0]

                    np.save(f'{savedir}/id_{img_id}_normal', img_n)
                    np.save(f'{savedir}/id_{img_id}_tta', img_n)

                    img_cntr += 1
Example #21
0
def pt_2():
    epochs = 25
    H = 120
    W = 160

    face_dataset = FaceDataset(1, 32, root_dir, W, H, CustomTransforms())
    training = create_dataloader(face_dataset, 5)

    validation_dataset = FaceDataset(33, 40, root_dir, W, H,
                                     CustomTransforms())
    validation = create_dataloader(validation_dataset, 1)

    #test_dataloader(training, W, H)

    net = Net()
    loss = nn.MSELoss()
    opt = Adam(net.parameters(), lr=0.001)

    training_losses = []
    validation_losses = []
    for epoch in range(epochs):
        epoch_loss = torch.zeros((1, 1))
        for i, (images, labels) in enumerate(training):
            prediction = net(images)
            output = loss(prediction, labels.type(torch.float32).view(-1, 116))
            epoch_loss += output
            output.backward()
            opt.step()
            opt.zero_grad()
        epoch_loss = epoch_loss / len(face_dataset)
        print("EPOCH " + str(i) + " LOSS: " + str(epoch_loss))
        training_losses.append([epoch, epoch_loss.item() * 100])

        epoch_loss = torch.zeros((1, 1), requires_grad=False)
        for i, (images, labels) in enumerate(validation):
            prediction = net(images)
            output = loss(prediction, labels.type(torch.float32).view(-1, 116))
            epoch_loss += output
            opt.zero_grad()
        epoch_loss = epoch_loss / len(face_dataset)
        validation_losses.append([epoch, epoch_loss.item() * 100])

    training_losses = np.array(training_losses)
    validation_losses = np.array(validation_losses)

    plt.plot(training_losses[:, 0], training_losses[:, 1])
    plt.plot(validation_losses[:, 0], validation_losses[:, 1])
    plt.plot()
    plt.savefig('results/pt_2/epoch_loss_decrease.png')
    plt.show()
    """
    Handy visualization code copied and pasted from:
    https://colab.research.google.com/github/Niranjankumar-c/DeepLearning-PadhAI/blob/master/DeepLearning_Materials/6_VisualizationCNN_Pytorch/CNNVisualisation.ipynb#scrollTo=cWmfCalUvzbS
    as linked on the piazza. 
    """
    def plot_filters_single_channel(i, t):

        #kernels depth * number of kernels
        nplots = t.shape[0] * t.shape[1]
        ncols = 12

        nrows = 1 + nplots // ncols
        #convert tensor to numpy image
        npimg = np.array(t.numpy(), np.float32)

        count = 0
        fig = plt.figure(figsize=(ncols, nrows))

        #looping through all the kernels in each channel
        for i in range(t.shape[0]):
            for j in range(t.shape[1]):
                count += 1
                ax1 = fig.add_subplot(nrows, ncols, count)
                npimg = np.array(t[i, j].numpy(), np.float32)
                npimg = (npimg - np.mean(npimg)) / np.std(npimg)
                npimg = np.minimum(1, np.maximum(0, (npimg + 0.5)))
                ax1.imshow(npimg)
                ax1.set_title(str(i) + ',' + str(j))
                ax1.axis('off')
                ax1.set_xticklabels([])
                ax1.set_yticklabels([])

        plt.tight_layout()
        plt.savefig(str(i) + 'weight_visualization.png')
        plt.show()

    for i in range(len(net.conv)):
        if i == 0:
            plot_filters_single_channel(i, net.conv[i].weight.data)

    validation_dataset = FaceDataset(33, 40, root_dir, W, H,
                                     CustomTransforms())
    dataloader = create_dataloader(validation_dataset, 1)

    with torch.no_grad():
        for i, (image, label) in enumerate(dataloader):
            prediction = net(image)
            output = loss(prediction, label.type(torch.float32).view(-1, 116))
            print("LOSS FOR IMAGE IS: " + str(output))

            prediction = prediction.view(-1, 58, 2)

            plt.imshow(image[0][0], cmap='gray')
            plt.scatter(prediction[0, :, 0] * W,
                        prediction[0, :, 1] * H,
                        s=10,
                        marker='o',
                        c='r')
            plt.scatter(label[0, :, 0] * W,
                        label[0, :, 1] * H,
                        marker='o',
                        color='green')
            plt.savefig('results/prediction_' + str(i) + '_' + str(epochs))

            plt.show()
Example #22
0
     log.info('feature extractor train mode')
     extractor = Extractor("neural extractor", args)
     if cuda:
         extractor.cuda()
     extractor.batch_train(cuda)
 elif args.phase == "inference_imitator":
     log.info("inference imitator")
     imitator = Imitator("neural imitator", args, clean=False)
     if cuda:
         imitator.cuda()
     imitator.load_checkpoint(args.imitator_model, True, cuda=cuda)
 elif args.phase == "prev_imitator":
     log.info("preview imitator")
     imitator = Imitator("neural imitator", args, clean=False)
     imitator.load_checkpoint(args.imitator_model, False, cuda=False)
     dataset = FaceDataset(args)
     name, param, img = dataset.get_picture()
     param = np.array(param, dtype=np.float32)
     b_param = param[np.newaxis, :]
     log.info(b_param.shape)
     t_param = torch.from_numpy(b_param)
     output = imitator(t_param)
     output = output.cpu().detach().numpy()
     output = np.squeeze(output, axis=0)
     output = output.swapaxes(0, 2) * 255
     cv2.imwrite('./output/{0}.jpg'.format(name), output)
 elif args.phase == "inference_extractor":
     log.info("inference extractor")
     extractor = Extractor("neural extractor", args)
     if cuda:
         extractor.cuda()
Example #23
0
netD.apply(weights_init)
netG.apply(weights_init)

criterion = nn.BCELoss()
fixed_noise = torch.randn(64, n_vector, 1, 1, device=device)

real_label = 1
fake_label = 0

optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

facedataset = FaceDataset(dataset_dir=dataset_dir,
                          transform=transforms.Compose([
                              transforms.Resize(64),
                              transforms.CenterCrop(64),
                              transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                          ]))

dataloader = DataLoader(facedataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

# Training loop
img_list = []
G_losses = []
D_losses = []
iters = 0

print("Starting Training Loop...")

for epoch in range(num_epochs):