Ejemplo n.º 1
0
    def __init__(self, Net, device, global_records, config):
        # Initializations
        self.device = device
        self.global_records = global_records
        self.config = config
        self.logger = logging.getLogger(__name__)

        # Initialize network
        self.net = Net(**self.config['net'])

        # Then load its params if available
        if self.config['net'].get('saved_params_path', None) is not None:
            self.load_net(self.config['net']['saved_params_path'])

        # Initialize optimizer
        self.setup_optimizer()

        # Initialize learning rate scheduler
        self.setup_lr_scheduler()

        # Transfer network to device
        self.net.to(self.device)
        self.logger.info(self.net)
        self.logger.info("Number of parameters: %d" %
                         (count_parameters(self.net)))

        # Losses for all models (more can be defined in derived models if needed)
        self.mse_loss_fn = nn.MSELoss(reduction='none')
        self.mae_loss_fn = nn.L1Loss(reduction='none')

        # Initialize epoch number
        self.epoch = 0
Ejemplo n.º 2
0
def main():
    torch.manual_seed(1)
    if args.cuda:
        torch.cuda.manual_seed(1)
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True

    exp_dir = os.path.join("data", args.exp_name)
    make_dir_if_not_exist(exp_dir)

    model = Resnet(device, args.num_classes)
    model = nn.DataParallel(model, device_ids=args.gpu_devices)
    model.to(device)

    model_dict = None
    if args.ckp:
        if os.path.isfile(args.ckp):
            print("=> Loading checkpoint '{}'".format(args.ckp))
            model_dict = torch.load(args.ckp)
            print("=> Loaded checkpoint '{}'".format(args.ckp))

    if (args.mode == 'demo') and (model_dict is None):
        print("Please specify model path")
        return

    if model_dict is not None:
        model.load_state_dict(model_dict['state_dict'])

    cudnn.benchmark = True

    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            params += [{'params': [value]}]

    print("Number of trainable params - {}".format(count_parameters(model)))

    criterion = torch.nn.NLLLoss(None, ignore_index=255)
    optimizer = optim.Adam(params, lr=args.lr)

    if args.mode == 'demo':
        train_data_loader, test_data_loader = sample_data(args.dset)
        test(test_data_loader, model, criterion, demo=True)
        return

    for epoch in range(1, args.epochs + 1):
        train_data_loader, test_data_loader = sample_data(args.dset)
        test(test_data_loader, model, criterion)
        train(train_data_loader, model, criterion, optimizer, epoch)
        model_to_save = {
            "epoch": epoch + 1,
            'state_dict': model.state_dict(),
        }
        if epoch % args.ckp_freq == 0:
            file_name = os.path.join(exp_dir,
                                     "checkpoint_" + str(epoch) + ".pth")
            save_checkpoint(model_to_save, file_name)
Ejemplo n.º 3
0
def start_training():
    logger.info("Setup config, data and model...")
    opt = BaseOptions().parse()
    set_seed(opt.seed)
    if opt.debug:  # keep the model run deterministically
        # 'cudnn.benchmark = True' enabled auto finding the best algorithm for a specific input/net config.
        # Enable this only when input size is fixed.
        cudnn.benchmark = False
        cudnn.deterministic = True

    opt.writer = SummaryWriter(opt.tensorboard_log_dir)
    opt.train_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Loss] {loss_str}\n"
    opt.eval_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Metrics] {eval_metrics_str}\n"

    train_dataset = ExCLDataset(
        dset_name=opt.dset_name,
        data_path=opt.train_path,
        desc_bert_path_or_handler=opt.desc_bert_path,
        sub_bert_path_or_handler=opt.sub_bert_path,
        max_desc_len=opt.max_desc_l,
        max_ctx_len=opt.max_ctx_l,
        vid_feat_path_or_handler=opt.vid_feat_path,
        clip_length=opt.clip_length,
        ctx_mode=opt.ctx_mode,
        h5driver=opt.h5driver,
        data_ratio=opt.data_ratio,
        normalize_vfeat=not opt.no_norm_vfeat,
        normalize_tfeat=not opt.no_norm_tfeat,
    )

    if opt.eval_path is not None:
        eval_dataset = ExCLDataset(
            dset_name=opt.dset_name,
            data_path=opt.eval_path,
            desc_bert_path_or_handler=train_dataset.desc_bert_h5,
            sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None,
            max_desc_len=opt.max_desc_l,
            max_ctx_len=opt.max_ctx_l,
            vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None,
            clip_length=opt.clip_length,
            ctx_mode=opt.ctx_mode,
            h5driver=opt.h5driver,
            data_ratio=opt.data_ratio,
            normalize_vfeat=not opt.no_norm_vfeat,
            normalize_tfeat=not opt.no_norm_tfeat,
            video_duration_idx_path=opt.video_duration_idx_path,
            eval_split_name=opt.eval_split_name
        )
    else:
        eval_dataset = None

    model_config = EDict(
        visual_input_size=opt.vid_feat_size,
        sub_input_size=opt.sub_feat_size,  # for both desc and subtitles
        query_input_size=opt.q_feat_size,  # for both desc and subtitles
        hidden_size=opt.hidden_size,
        drop=opt.drop,
        ctx_mode=opt.ctx_mode,  # video, sub or video_sub
        initializer_range=opt.initializer_range
    )
    logger.info("model_config {}".format(model_config))
    model = EXCL(model_config)
    count_parameters(model)
    logger.info("Start Training...")
    train(model, train_dataset, eval_dataset, opt)
    return opt.results_dir, opt.eval_split_name, opt.eval_path, opt.debug
Ejemplo n.º 4
0
print("Log save path:", log_path)

# ----------------------- Dataset -----------------------
train_dataset = SyntheticTrainingDataset(npz_path=train_path,
                                         params_from='all')
val_dataset = SyntheticTrainingDataset(npz_path=val_path, params_from='all')
train_val_monitor_datasets = [train_dataset, val_dataset]
print("Training examples found:", len(train_dataset))
print("Validation examples found:", len(val_dataset))

# ----------------------- Models -----------------------
# Regressor
regressor = SingleInputRegressor(resnet_in_channels,
                                 resnet_layers,
                                 ief_iters=ief_iters)
num_params = count_parameters(regressor)
print("\nRegressor model Loaded. ", num_params, "trainable parameters.")

# SMPL model
smpl_model = SMPL(config.SMPL_MODEL_DIR, batch_size=batch_size)

# Camera and NMR part/silhouette renderer
# Assuming camera rotation is identity (since it is dealt with by global_orients in SMPL)
mean_cam_t = np.array([0., 0.2, 42.])
mean_cam_t = torch.from_numpy(mean_cam_t).float().to(device)
mean_cam_t = mean_cam_t[None, :].expand(batch_size, -1)
cam_K = get_intrinsics_matrix(config.REGRESSOR_IMG_WH, config.REGRESSOR_IMG_WH,
                              config.FOCAL_LENGTH)
cam_K = torch.from_numpy(cam_K.astype(np.float32)).to(device)
cam_K = cam_K[None, :, :].expand(batch_size, -1, -1)
cam_R = torch.eye(3).to(device)
Ejemplo n.º 5
0
def start_training():
    logger.info("Setup config, data and model...")
    opt = BaseOptions().parse()
    set_seed(opt.seed)
    if opt.debug:  # keep the model run deterministically
        # 'cudnn.benchmark = True' enabled auto finding the best algorithm for a specific input/net config.
        # Enable this only when input size is fixed.
        cudnn.benchmark = False
        cudnn.deterministic = True

    opt.writer = SummaryWriter(opt.tensorboard_log_dir)
    opt.train_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Loss] {loss_str}\n"
    opt.eval_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Metrics] {eval_metrics_str}\n"

    train_dataset = StartEndDataset(
        dset_name=opt.dset_name,
        data_path=opt.train_path,
        desc_bert_path_or_handler=opt.desc_bert_path,
        sub_bert_path_or_handler=opt.sub_bert_path,
        max_desc_len=opt.max_desc_l,
        max_ctx_len=opt.max_ctx_l,
        vid_feat_path_or_handler=opt.vid_feat_path,
        clip_length=opt.clip_length,
        ctx_mode=opt.ctx_mode,
        h5driver=opt.h5driver,
        data_ratio=opt.data_ratio,
        normalize_vfeat=not opt.no_norm_vfeat,
        normalize_tfeat=not opt.no_norm_tfeat,
    )

    if opt.eval_path is not None:
        # val dataset, used to get eval loss
        train_eval_dataset = StartEndDataset(
            dset_name=opt.dset_name,
            data_path=opt.eval_path,
            desc_bert_path_or_handler=train_dataset.desc_bert_h5,
            sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None,
            max_desc_len=opt.max_desc_l,
            max_ctx_len=opt.max_ctx_l,
            vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None,
            clip_length=opt.clip_length,
            ctx_mode=opt.ctx_mode,
            h5driver=opt.h5driver,
            data_ratio=opt.data_ratio,
            normalize_vfeat=not opt.no_norm_vfeat,
            normalize_tfeat=not opt.no_norm_tfeat
        )

        eval_dataset = StartEndEvalDataset(
            dset_name=opt.dset_name,
            eval_split_name=opt.eval_split_name,  # should only be val set
            data_path=opt.eval_path,
            desc_bert_path_or_handler=train_dataset.desc_bert_h5,
            sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None,
            max_desc_len=opt.max_desc_l,
            max_ctx_len=opt.max_ctx_l,
            video_duration_idx_path=opt.video_duration_idx_path,
            vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None,
            clip_length=opt.clip_length,
            ctx_mode=opt.ctx_mode,
            data_mode="query",
            h5driver=opt.h5driver,
            data_ratio=opt.data_ratio,
            normalize_vfeat=not opt.no_norm_vfeat,
            normalize_tfeat=not opt.no_norm_tfeat
        )
    else:
        eval_dataset = None

    model_config = EDict(
        merge_two_stream=not opt.no_merge_two_stream,  # merge video and subtitles
        cross_att=not opt.no_cross_att,  # use cross-attention when encoding video and subtitles
        span_predictor_type=opt.span_predictor_type,  # span_predictor_type
        encoder_type=opt.encoder_type,  # gru, lstm, transformer
        add_pe_rnn=opt.add_pe_rnn,  # add pe for RNNs
        pe_type=opt.pe_type,  #
        visual_input_size=opt.vid_feat_size,
        sub_input_size=opt.sub_feat_size,  # for both desc and subtitles
        query_input_size=opt.q_feat_size,  # for both desc and subtitles
        hidden_size=opt.hidden_size,  #
        stack_conv_predictor_conv_kernel_sizes=opt.stack_conv_predictor_conv_kernel_sizes,  #
        conv_kernel_size=opt.conv_kernel_size,
        conv_stride=opt.conv_stride,
        max_ctx_l=opt.max_ctx_l,
        max_desc_l=opt.max_desc_l,
        input_drop=opt.input_drop,
        cross_att_drop=opt.cross_att_drop,
        drop=opt.drop,
        n_heads=opt.n_heads,  # self-att heads
        initializer_range=opt.initializer_range,  # for linear layer
        ctx_mode=opt.ctx_mode,  # video, sub or video_sub
        margin=opt.margin,  # margin for ranking loss
        ranking_loss_type=opt.ranking_loss_type,  # loss type, 'hinge' or 'lse'
        lw_neg_q=opt.lw_neg_q,  # loss weight for neg. query and pos. context
        lw_neg_ctx=opt.lw_neg_ctx,  # loss weight for pos. query and neg. context
        lw_st_ed=0,  # will be assigned dynamically at training time
        use_hard_negative=False,  # reset at each epoch
        hard_pool_size=opt.hard_pool_size,
        use_self_attention=not opt.no_self_att,  # whether to use self attention
        no_modular=opt.no_modular
    )
    logger.info("model_config {}".format(model_config))
    model = XML(model_config)
    count_parameters(model)
    logger.info("Start Training...")
    train(model, train_dataset, train_eval_dataset, eval_dataset, opt)
    return opt.results_dir, opt.eval_split_name, opt.eval_path, opt.debug
Ejemplo n.º 6
0
def start_training():
    logger.info("Setup config, data and model...")
    opt = BaseOptions().parse()
    set_seed(opt.seed)
    if opt.debug:  # keep the model run deterministically
        # 'cudnn.benchmark = True' enabled auto finding the best algorithm for a specific input/net config.
        # Enable this only when input size is fixed.
        cudnn.benchmark = False
        cudnn.deterministic = True

    opt.writer = SummaryWriter(opt.tensorboard_log_dir)
    opt.train_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Loss] {loss_str}\n"
    opt.eval_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Metrics] {eval_metrics_str}\n"

    train_dataset = RetrievalDataset(
        dset_name=opt.dset_name,
        data_path=opt.train_path,
        desc_bert_path_or_handler=opt.desc_bert_path,
        sub_bert_path_or_handler=opt.sub_bert_path,
        vid_feat_path_or_handler=opt.vid_feat_path,
        max_desc_len=opt.max_desc_l,
        max_ctx_len=opt.max_ctx_l,
        ctx_mode=opt.ctx_mode,
        h5driver=opt.h5driver,
        data_ratio=opt.data_ratio,
        normalize_vfeat=not opt.no_norm_vfeat,
        normalize_tfeat=not opt.no_norm_tfeat,
    )

    if opt.eval_path is not None:
        eval_dataset = RetrievalEvalDataset(
            dset_name=opt.dset_name,
            eval_split_name=opt.eval_split_name,  # should only be val set
            data_path=opt.eval_path,
            desc_bert_path_or_handler=train_dataset.desc_bert_h5,
            sub_bert_path_or_handler=train_dataset.sub_bert_h5
            if "sub" in opt.ctx_mode else None,
            max_desc_len=opt.max_desc_l,
            max_ctx_len=opt.max_ctx_l,
            video_duration_idx_path=opt.video_duration_idx_path,
            vid_feat_path_or_handler=train_dataset.vid_feat_h5
            if "video" in opt.ctx_mode else None,
            ctx_mode=opt.ctx_mode,
            data_mode="query",
            h5driver=opt.h5driver,
            data_ratio=opt.data_ratio,
            normalize_vfeat=not opt.no_norm_vfeat,
            normalize_tfeat=not opt.no_norm_tfeat,
        )
    else:
        eval_dataset = None

    model_config = EDict(
        ctx_mode=opt.ctx_mode,
        text_input_size=opt.sub_feat_size,
        vid_input_size=opt.vid_feat_size,  #
        output_size=opt.output_size,
        margin=opt.margin,  # margin for ranking loss
    )
    logger.info("model_config {}".format(model_config))
    model = MEE(model_config)
    count_parameters(model)
    logger.info("Start Training...")
    train(model, train_dataset, eval_dataset, opt)
    return opt.results_dir, opt.eval_split_name, opt.eval_path, opt.debug