예제 #1
0
def setup_model(config):
    in_ch, out_ch = config['model']['in_ch'], config['model']['out_ch']
    depth = config['model']['depth']
    detach = config['model']['detach']

    generator = DCGanGenerator(in_ch, out_ch, depth=depth, detach=detach)
    discriminator = DCGanDiscriminator(out_ch, depth=depth, detach=detach)

    if config['model']['pretrained']['generator'] is not None:
        load_pretrained_model(generator,
                              config['model']['pretrained']['generator'])
    if config['model']['pretrained']['discriminator'] is not None:
        load_pretrained_model(discriminator,
                              config['model']['pretrained']['discriminator'])

    if config['optimizer']['generator'] is not None:
        gen_optimizer = Adam(generator.parameters(),
                             **config['optimizer']['generator'])
    else:
        gen_optimizer = None

    if config['optimizer']['discriminator'] is not None:
        dis_optimizer = Adam(discriminator.parameters(),
                             **config['optimizer']['discriminator'])
    else:
        dis_optimizer = None

    return {
        'generator': generator,
        'discriminator': discriminator,
        'gen_optimizer': gen_optimizer,
        'dis_optimizer': dis_optimizer,
    }
예제 #2
0
def run_test():

    model = AutoEncoder(n=2, dim_x=50, dim_hidden=2000)
    device_id = 0
    model_file = 'model/dae.pth'
    net = load_pretrained_model(model, model_file, device_id)

    x_test = pickle.load(open('data/manifold_test.pkl', 'rb'))
    x_true = x_test[:]

    noise_shape = x_true.shape[1:]
    n_dim = np.prod(noise_shape)
    sigma_train = 0.01
    sigma_proposal = 0.01
    sigma_noise_hat_init = 0.01

    for noise in [0.01, 0.02, 0.03, 0.04]:

        sigma_noise = [noise] * n_dim
        sigma_noise = np.array(sigma_noise[:n_dim]).reshape(noise_shape)

        rmse_mean, rmse_std, noise_mean, noise_std, variance_noise_hat_em = \
            solve_agem(net=net, x_true=x_true, sigma_noise=sigma_noise,
                       sigma_train=sigma_train, sigma_noise_hat_init=sigma_noise_hat_init,
                       sigma_proposal=sigma_proposal, type_proposal='mala',
                       candidate='mean', em_epochs=10, sample_epochs=1000)

        print('[AGEM] noise_gt: %.2f | rmse %.4f (%.4f), noise_est: %.4f (%.4f)' % (
            noise, rmse_mean, rmse_std, noise_mean, noise_std
        ))
예제 #3
0
def get_top_birds_classification(y):
    """Predict the 5 most likely bird's species using the PANN model."""
    # TODO: Add a plot.
    model = load_pretrained_model()
    predictions = get_model_predictions_for_clip(y, model)
    class_probs = predictions[BIRDS].sum().reset_index()
    class_probs.columns = ["ebird", "p"]
    class_probs = class_probs.sort_values(by="p")

    top_birds = class_probs.tail(5).reset_index(drop=True)
    return top_birds
예제 #4
0
파일: main.py 프로젝트: shuxiao0312/STRG
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        #        opt.device = torch.device(f'cuda:{index}')
        opt.device = torch.device('cuda:{}'.format(index))

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes, opt.strg)

    if opt.strg:
        model = STRG(model, nclass=opt.n_classes, nrois=opt.nrois)
        rpn = RPN(nrois=opt.nrois)
        rpn = make_data_parallel(rpn, opt.distributed, opt.device)
    else:
        rpn = None

    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)

    model = make_data_parallel(model, opt.distributed, opt.device)

    #    if opt.pretrain_path:
    #        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    #    else:
    parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    criterion = CrossEntropyLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        #from torch.utils.tensorboard import SummaryWriter
        from tensorboardX import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    if opt.wandb:
        name = str(opt.result_path)
        wandb.init(
            project='strg',
            name=name,
            config=opt,
            dir=name,
            #            resume=str(opt.resume_path) != '',
            sync_tensorboard=True)

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_epoch(i,
                        train_loader,
                        model,
                        criterion,
                        optimizer,
                        opt.device,
                        current_lr,
                        train_logger,
                        train_batch_logger,
                        tb_writer,
                        opt.distributed,
                        rpn=rpn,
                        det_interval=opt.det_interval,
                        nrois=opt.nrois)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i,
                                      val_loader,
                                      model,
                                      criterion,
                                      opt.device,
                                      val_logger,
                                      tb_writer,
                                      opt.distributed,
                                      rpn=rpn,
                                      det_interval=opt.det_interval,
                                      nrois=opt.nrois)

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)
예제 #5
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    print('after generating model:', model.fc.in_features, ':',
          model.fc.out_features)
    print('feature weights:', model.fc.weight.shape, ':', model.fc.bias.shape)

    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)
    print('after resume model:', model.fc.in_features, ':',
          model.fc.out_features)
    print('feature weights:', model.fc.weight.shape, ':', model.fc.bias.shape)
    # summary(model, input_size=(3, 112, 112))
    #    if opt.pretrain_path:
    #        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
    #                                      opt.n_finetune_classes)

    print('after pretrained  model:', model.fc.in_features, ':',
          model.fc.out_features)
    print('feature weights:', model.fc.weight.shape, ':', model.fc.bias.shape)
    print(torch_summarize(model))
    # parameters = model.parameters()
    # for name, param in model.named_parameters():
    #     if param.requires_grad:
    #         print(name, param.data)
    #    summary(model, (3, 112, 112))
    #    return

    #    print('model parameters shape', parameters.shape)

    (train_loader, train_sampler, train_logger, train_batch_logger, optimizer,
     scheduler) = get_train_utils(opt, model.parameters())

    for i, (inputs, targets) in enumerate(train_loader):
        print('input shape:', inputs.shape)
        print('targets shape:', targets.shape)
        outputs = model(inputs)
        print("output shape", outputs.shape)
        model_arch = make_dot(outputs, params=dict(model.named_parameters()))
        print(model_arch)
        model_arch.render("/apollo/data/model.png", format="png")
        # Source(model_arch).render('/apollo/data/model.png')
        # print("generating /apollo/data/model.png")
        break

    # make_dot(yhat, params=dict(list(model.named_parameters()))).render("rnn_torchviz", format="png")

    return

    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)
    model = make_data_parallel(model, opt.distributed, opt.device)

    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    criterion = CrossEntropyLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, current_lr, train_logger,
                        train_batch_logger, tb_writer, opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed)

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)
예제 #6
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)

    model = make_data_parallel(model, opt.distributed, opt.device)

    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    criterion = CrossEntropyLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.resume_path is not None:
        if not opt.no_train:
            opt.begin_epoch, model, optimizer, scheduler = resume(
                opt.resume_path, opt.arch, opt.begin_epoch, model, optimizer,
                scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
        else:
            opt.begin_epoch, model, _, _ = resume(opt.resume_path, opt.arch,
                                                  opt.begin_epoch, model)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, current_lr, train_logger,
                        train_batch_logger, tb_writer, opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed)

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)
예제 #7
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)
    if opt.dropout:
        n_classes = opt.n_classes
        if opt.pretrain_path is not None:
            n_classes = opt.n_finetune_classes
        model = replace_fc_layer(model=model,
                                 dropout_factor=opt.dropout_factor,
                                 n_classes=n_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)

    model = make_data_parallel(model, opt.distributed, opt.device)
    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    if opt.labelsmoothing:
        criterion = LabelSmoothingCrossEntropy().to(opt.device)
    else:
        criterion = CrossEntropyLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    if opt.lr_finder and not opt.no_train and not opt.no_val:
        print(
            "Performing Learning Rate Search\nWith Leslie Smith's approach...")
        lr_finder = LRFinder(model, optimizer, criterion, device=opt.device)
        lr_finder.range_test(train_loader,
                             val_loader=val_loader,
                             start_lr=opt.learning_rate,
                             end_lr=opt.lrf_end_lr,
                             num_iter=opt.lrf_num_it,
                             step_mode=opt.lrf_mode)
        lr_finder.plot(log_lr=False)
        with (opt.result_path / 'lr_search.json').open('w') as results_file:
            json.dump(lr_finder.history, results_file, default=json_serial)
        lr_finder.reset()
        return

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            #current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, train_logger, train_batch_logger,
                        scheduler, opt.lr_scheduler, tb_writer,
                        opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed)

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)
        elif not opt.no_train and opt.lr_scheduler == 'cosineannealing':
            scheduler.step()

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)
예제 #8
0
def main(opt):
    place = fluid.CPUPlace() if opt.no_cuda else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        print(place)
        random.seed(opt.manual_seed)
        np.random.seed(opt.manual_seed)
        prog = fluid.default_main_program()
        prog.global_seed(opt.manual_seed)
        os.environ['PYTHONHASHSEED'] = str(opt.manual_seed)

        model = generate_model(opt)
        if opt.pretrain_path:
            model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                          opt.n_finetune_classes)

        if opt.resume_path is not None:
            model = resume_model(opt.resume_path, model)

        if opt.pretrain_path:
            parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
        else:
            parameters = model.parameters()

        if not opt.no_train:
            (train_loader, train_logger, train_batch_logger, optimizer,
             scheduler) = get_train_utils(opt, parameters)
            if opt.resume_path is not None:
                opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                    opt.resume_path, optimizer, scheduler)
                if opt.overwrite_milestones:
                    scheduler.milestones = opt.multistep_milestones

        if not opt.no_val:
            val_loader, val_logger = get_val_utils(opt)

        best_acc = 0.88
        for epoch in range(opt.begin_epoch, opt.n_epochs + 1):
            if not opt.no_train:
                train_epoch(epoch, train_loader, model, optimizer, scheduler,
                            train_logger, train_batch_logger)

                if epoch % opt.checkpoint == 0:
                    save_file_path = str(
                        opt.result_path) + 'save_{}_{}_{}'.format(
                            epoch, opt.train_crop, opt.batch_size)
                    save_checkpoint(save_file_path, model, optimizer)

            if not opt.no_val:
                prev_val_loss, val_acc = val_epoch(epoch, val_loader, model,
                                                   val_logger)

            if not opt.no_train and opt.lr_scheduler == 'multistep':
                scheduler.epoch()
            elif not opt.no_train and opt.lr_scheduler == 'plateau':
                scheduler.step(prev_val_loss)

            if not opt.no_val:
                if val_acc > best_acc:
                    best_acc = val_acc
                    save_file_path = str(
                        opt.result_path) + 'save_{}_{}_best_val_acc'.format(
                            epoch, opt.train_crop)
                    save_checkpoint(save_file_path, model, optimizer)

            if not opt.no_train:
                current_lr = optimizer.current_step_lr()
                print("current val_loss is %s, current lr is %s" %
                      (prev_val_loss.numpy()[0], current_lr))

        if opt.inference:
            inference_loader, inference_class_names = get_inference_utils(opt)
            inference_result_path = opt.result_path / '{}_{}.json'.format(
                opt.inference_subset, opt.train_crop)

            inference.inference(inference_loader, model, inference_result_path,
                                inference_class_names,
                                opt.inference_no_average, opt.output_topk)
예제 #9
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0
    model = generate_model(opt)
    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)
        print('resume model from ', opt.resume_path)

    print('model after resume:', model)

    # save model to current running id
    # mlflow.pytorch.log_model(model, "action_model")
    # model_path = mlflow.get_artifact_uri("action_model")
    # print('mlflow action model path: ', model_path)
    # model = mlflow.pytorch.load_model(model_path)
    if opt.ml_tag_name != '' and opt.ml_tag_value != '':
        # mlflow.set_tag("test_tag", 'inference_test')
        mlflow.set_tag(opt.ml_tag_name, opt.ml_tag_value)

    # load from previous published model version
    if opt.ml_model_name != '' and opt.ml_model_version != '':
        # model_name = 'action_model'
        # model_version = '1'
        model_uri = "models:/{}/{}".format(opt.ml_model_name,
                                           opt.ml_model_version)
        model = mlflow.pytorch.load_model(model_uri)

    model = make_data_parallel(model, opt.distributed, opt.device)

    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    criterion = CrossEntropyLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, current_lr, train_logger,
                        train_batch_logger, tb_writer, opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)
                if opt.ml_model_name != '':
                    mlflow.pytorch.log_model(model, opt.ml_model_name)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed)
            mlflow.log_metric("loss", prev_val_loss)

        if not opt.no_train and opt.lr_scheduler == 'multistep':
            scheduler.step()
        elif not opt.no_train and opt.lr_scheduler == 'plateau':
            scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)
    def score(self):

        normalize = get_normalize_method(self.opt.mean, self.opt.std, self.opt.no_mean_norm,
                                         self.opt.no_std_norm)
        spatial_transform = [
            Resize(self.opt.sample_size),
            CenterCrop(self.opt.sample_size),
            ToTensor()
        ]

        spatial_transform.extend([ScaleValue(self.opt.value_scale), normalize])
        spatial_transform = Compose(spatial_transform)

        temporal_transform = []
        if self.opt.sample_t_stride > 1:
            temporal_transform.append(TemporalSubsampling(self.opt.sample_t_stride))
        temporal_transform.append(
            TemporalEvenCrop(self.opt.sample_duration, self.opt.n_val_samples))
        temporal_transform = TemporalCompose(temporal_transform)


        frame_count = get_n_frames(self.opt.video_jpgs_dir_path)

        frame_indices = list(range(0, frame_count))

        frame_indices = temporal_transform(frame_indices)

        spatial_transform.randomize_parameters()

        image_name_formatter = lambda x: f'image_{x:05d}.jpg'

        loader = VideoLoader(image_name_formatter)

        print('frame_indices', frame_indices)

        #clips = []
        video_outputs = []
        model = generate_model(self.opt)


        model = load_pretrained_model(model, self.opt.pretrain_path, self.opt.model,
                                      self.opt.n_finetune_classes)

        i =0
        for frame_indice in frame_indices:
            print("%d indice: %s" % (i, str(frame_indice)))
            i+=1

            clip = loader(self.opt.video_jpgs_dir_path, frame_indice)



            clip = [spatial_transform(img) for img in clip]
            clip = torch.stack(clip, 0).permute(1, 0, 2, 3)





            #parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)


            #print('clips:', clips)


            #for clip in clips:
            with torch.no_grad():

                print(clip.shape)
                output = model(torch.unsqueeze(clip, 0))
                output = F.softmax(output, dim=1).cpu()

                #print(output)
                video_outputs.append(output[0])

            del clip

        video_outputs = torch.stack(video_outputs)
        average_scores = torch.mean(video_outputs, dim=0)

        #inference_loader, inference_class_names = main.get_inference_utils(self.opt)
        with self.opt.annotation_path.open('r') as f:
            data = json.load(f)

        class_to_idx = get_class_labels(data)
        idx_to_class = {}
        for name, label in class_to_idx.items():
            idx_to_class[label] = name
        print(idx_to_class)

        inference_result = inference.get_video_results(
            average_scores, idx_to_class, self.opt.output_topk)

        print(inference_result)
예제 #11
0
    print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:.4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(),
               os.path.join(config.MODEL_PATH, config.MODEL_NAME))
    return model


if __name__ == '__main__':

    model_ft = model.load_pretrained_model()
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.SGD(model_ft.parameters(),
                             lr=config.LEARNING_RATE,
                             momentum=config.MOMENTUM)

    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft,
                                           step_size=7,
                                           gamma=0.1)
    model_ft = train_model(model_ft,
                           criterion,
                           optimizer_ft,
                           exp_lr_scheduler,
                           num_epochs=config.NUM_EPOCHS)

    predict.visualize_model(model_ft)
예제 #12
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)
    model = make_data_parallel(model, opt.distributed, opt.device)

    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    #criterion = CrossEntropyLoss().to(opt.device)
    # ADDED for 231n
    criterion = FocalLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    conf_mtx_dict = {}  # ADDED for CS231n

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, current_lr, train_logger,
                        train_batch_logger, tb_writer, opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed,
                                      conf_mtx_dict)  # ADDED for CS231n

        # ADDED for 231n - uncomment if using cross entropy loss
        #if not opt.no_train and opt.lr_scheduler == 'multistep':
        #    scheduler.step()
        #elif not opt.no_train and opt.lr_scheduler == 'plateau':
        #    scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)

    # ADDED for CS231n
    conf_mtx_file = csv.writer(open("conf_mtxs.csv", "w+"))
    for key, val in conf_mtx_dict.items():
        conf_mtx_file.writerow([key, val])
예제 #13
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    if opt.inference:
        model = generate_model(opt)
    else:
        model = generate_model(opt, use_features=True)

    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path,
                                      opt.n_finetune_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)
    model = make_data_parallel(model, opt.distributed, opt.device)

    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    #####################################################################################
    ### here add a classifier to predict videos and audios
    if opt.inference is False:
        ### define loss
        criterion = CrossEntropyLoss().to(opt.device)

        if opt.use_audio or opt.use_image:
            criterion_jsd = JSDLoss(weight=0.5)

        #################################################################################
        if opt.use_audio:
            ### define loss
            criterion_ct_av = NCELoss(temperature=0.5)
            ### audio teacher model
            feature_dim = 512 * 2
            if opt.pretrain_path is not None:
                joint_prediction_aud = generate_prediction(
                    feature_dim, opt.n_finetune_classes, normalization=True)
            else:
                joint_prediction_aud = generate_prediction(feature_dim,
                                                           opt.n_classes,
                                                           normalization=True)
            if opt.resume_path is not None:
                aux_checkpoint = Path(
                    os.path.join(str(opt.resume_path.parent),
                                 str(opt.resume_path.name[:-4] +
                                     '_audio.pth')))
                joint_prediction_aud = resume_model(aux_checkpoint, opt.arch,
                                                    joint_prediction_aud)

            joint_prediction_aud = make_data_parallel(joint_prediction_aud,
                                                      opt.distributed,
                                                      opt.device)
            aud_para = joint_prediction_aud.parameters()
            joint_prediction_aud.cuda()
        else:
            aud_para = None

        #################################################################################
        if opt.use_image:
            ### define loss
            criterion_ct_iv = NCELoss(temperature=0.1)
            ### image teacher model
            image_model = torchvision.models.resnet34(pretrained=True)
            # remove the fc layers (only use the image features)
            image_model = torch.nn.Sequential(
                *list(image_model.children())[:-1])
            image_model = make_data_parallel(image_model, opt.distributed,
                                             opt.device)
            feature_dim = 512 * 2
            if opt.pretrain_path is not None:
                joint_prediction_img = generate_prediction(
                    feature_dim, opt.n_finetune_classes, normalization=True)
            else:
                joint_prediction_img = generate_prediction(feature_dim,
                                                           opt.n_classes,
                                                           normalization=True)
            if opt.resume_path is not None:
                aux_checkpoint = Path(
                    os.path.join(str(opt.resume_path.parent),
                                 str(opt.resume_path.name[:-4] +
                                     '_image.pth')))
                joint_prediction_img = resume_model(aux_checkpoint, opt.arch,
                                                    joint_prediction_img)

            joint_prediction_img = make_data_parallel(joint_prediction_img,
                                                      opt.distributed,
                                                      opt.device)
            img_para = joint_prediction_img.parameters()
            joint_prediction_img.cuda()
        else:
            img_para = None

        #################################################################################
        (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, optimizer_av, optimizer_iv, scheduler) = \
            get_train_utils(opt, model_parameters=parameters, av_parameters=aud_para, iv_parameters=img_para)

        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones

    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    prev_val_loss = None
    pre_val_acc = 0.0
    if opt.image_size > opt.sample_size:
        image_size = opt.image_size
    else:
        image_size = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            if optimizer_av is None and optimizer_iv is None:
                train_epoch(epoch=i,
                            data_loader=train_loader,
                            model=model,
                            criterion=criterion,
                            optimizer=optimizer,
                            device=opt.device,
                            current_lr=current_lr,
                            epoch_logger=train_logger,
                            batch_logger=train_batch_logger,
                            tb_writer=tb_writer,
                            distributed=opt.distributed)
            elif optimizer_av is not None and optimizer_iv is None:
                train_a_epoch(epoch=i,
                              data_loader=train_loader,
                              model=model,
                              joint_prediction_aud=joint_prediction_aud,
                              criterion=criterion,
                              criterion_jsd=criterion_jsd,
                              criterion_ct_av=criterion_ct_av,
                              optimizer=optimizer,
                              optimizer_av=optimizer_av,
                              device=opt.device,
                              current_lr=current_lr,
                              epoch_logger=train_logger,
                              batch_logger=train_batch_logger,
                              tb_writer=tb_writer,
                              distributed=opt.distributed)
            elif optimizer_av is None and optimizer_iv is not None:
                train_i_epoch(epoch=i,
                              data_loader=train_loader,
                              model=model,
                              image_model=image_model,
                              joint_prediction_img=joint_prediction_img,
                              criterion=criterion,
                              criterion_jsd=criterion_jsd,
                              criterion_ct_iv=criterion_ct_iv,
                              optimizer=optimizer,
                              optimizer_iv=optimizer_iv,
                              device=opt.device,
                              current_lr=current_lr,
                              epoch_logger=train_logger,
                              batch_logger=train_batch_logger,
                              tb_writer=tb_writer,
                              distributed=opt.distributed,
                              image_size=image_size)
            else:
                train_ai_epoch(epoch=i,
                               data_loader=train_loader,
                               model=model,
                               image_model=image_model,
                               joint_prediction_aud=joint_prediction_aud,
                               joint_prediction_img=joint_prediction_img,
                               criterion=criterion,
                               criterion_jsd=criterion_jsd,
                               criterion_ct_av=criterion_ct_av,
                               criterion_ct_iv=criterion_ct_iv,
                               optimizer=optimizer,
                               optimizer_av=optimizer_av,
                               optimizer_iv=optimizer_iv,
                               device=opt.device,
                               current_lr=current_lr,
                               epoch_logger=train_logger,
                               batch_logger=train_batch_logger,
                               tb_writer=tb_writer,
                               distributed=opt.distributed,
                               image_size=image_size,
                               loss_weight=opt.loss_weight)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)
                if opt.use_audio:
                    save_file_path = opt.result_path / 'save_{}_audio.pth'.format(
                        i)
                    save_checkpoint(save_file_path, i, opt.arch,
                                    joint_prediction_aud, optimizer, scheduler)
                if opt.use_image:
                    save_file_path = opt.result_path / 'save_{}_image.pth'.format(
                        i)
                    save_checkpoint(save_file_path, i, opt.arch,
                                    joint_prediction_img, optimizer, scheduler)
            if not opt.no_val and i % opt.val_freq == 0:
                prev_val_loss, val_acc = val_epoch(i, val_loader, model,
                                                   criterion, opt.device,
                                                   val_logger, tb_writer,
                                                   opt.distributed)
                if pre_val_acc < val_acc:
                    pre_val_acc = val_acc
                    save_file_path = opt.result_path / 'save_model.pth'
                    save_checkpoint(save_file_path, i, opt.arch, model,
                                    optimizer, scheduler)

            if not opt.no_train and opt.lr_scheduler == 'multistep':
                scheduler.step()
            elif not opt.no_train and opt.lr_scheduler == 'plateau':
                if prev_val_loss is not None:
                    scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)
        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)