def load_model():
    opt = parse_args()

    # Load infos
    infos = load_infos(opt)

    ignore = ["id", "batch_size", "beam_size", "start_from_best"]
    for k in vars(infos['opt']).keys():
        if k not in ignore:
            if k in vars(opt):
                assert vars(opt)[k] == vars(
                    infos['opt'])[k], k + ' option not consistent'
            else:
                vars(opt).update({k: vars(infos['opt'])[k]
                                  })  # copy over options from model

    print(opt)

    # Setup the model
    model_cnn = models.setup_cnn(opt)
    model_cnn.cuda()

    model = models.setup(opt)
    model.cuda()

    # Make sure in the evaluation mode
    model_cnn.eval()
    model.eval()

    ix_to_word = infos['vocab']

    return model_cnn, model, ix_to_word, opt
Ejemplo n.º 2
0
def main():

    opt = parse_args()

    # make dirs
    print(opt.output_dir)
    if not os.path.isdir(opt.output_dir):
        os.makedirs(opt.output_dir)

    # Load infos
    infos = load_infos(opt)

    ignore = ["id", "batch_size", "beam_size", "start_from_best"]
    for k in vars(infos['opt']).keys():
        if k not in ignore:
            if k in vars(opt):
                assert vars(opt)[k] == vars(
                    infos['opt'])[k], k + ' option not consistent'
            else:
                vars(opt).update({k: vars(infos['opt'])[k]
                                  })  # copy over options from model

    print(opt)

    # Setup the model
    model_cnn = models.setup_cnn(opt)
    model_cnn.cuda()

    model = models.setup(opt)
    model.cuda()

    # Make sure in the evaluation mode
    model_cnn.eval()
    model.eval()

    str_id = ''.join(opt.id.split('_'))

    path_zip = opt.output_dir + '/results.zip'

    # zipf = zipfile.ZipFile(path_zip, 'w', zipfile.ZIP_DEFLATED)

    for dataset in opt.datasets:

        loader = DataLoaderRaw({
            'folder_path':
            os.path.join(opt.image_folder, dataset),
            'batch_size':
            opt.batch_size
        })
        loader.ix_to_word = infos['vocab']

        # Set sample options
        predictions = eval_split(model_cnn, model, loader, vars(opt))

        path_json = opt.output_dir + '/captions_' + dataset + '_' + str_id + '_results.json'

        json.dump(predictions, open(path_json, 'w'))
Ejemplo n.º 3
0
def main():

    opt = parse_args()

    # make dirs
    print(opt.output_dir)
    if not os.path.isdir(opt.output_dir):
        os.makedirs(opt.output_dir)

    # Load infos
    infos = load_infos(opt)

    ignore = [
        "id", "batch_size", "beam_size", "start_from_best",
        "checkpoint_best_path"
    ]
    for k in vars(infos['opt']).keys():
        if k not in ignore:
            if k in vars(opt):
                assert vars(opt)[k] == vars(
                    infos['opt'])[k], k + ' option not consistent'
            else:
                vars(opt).update({k: vars(infos['opt'])[k]
                                  })  # copy over options from model

    print(opt)

    # Setup the model
    model_cnn = models.setup_cnn(opt)
    model_cnn.cuda()

    model = models.setup(opt)
    model.cuda()

    # Make sure in the evaluation mode
    model_cnn.eval()
    model.eval()

    save_model_best(model, model_cnn, infos, opt)

    loader = DataLoaderRaw({
        'folder_path': opt.image_folder,
        'batch_size': opt.batch_size
    })
    loader.ix_to_word = infos['vocab']

    # Set sample options
    predictions = eval_split(model_cnn, model, loader, vars(opt))

    json.dump(predictions, open(opt.output_dir + '/result.json', 'w'))
Ejemplo n.º 4
0
def train(opt):
    loader = DataLoader(opt)
    opt.vocab_size = loader.vocab_size
    opt.seq_length = loader.seq_length
    opt.caption_model = "SCST"

    infos = {}
    if opt.start_from is not None and len(opt.start_from) > 0:
        print("start from %s" % (opt.start_from))
        # open old infos and check if models are compatible
        with open(os.path.join(opt.start_from,
                               'infos_' + opt.id + '.pkl')) as f:
            infos = cPickle.load(f)
            saved_model_opt = infos['opt']
            need_be_same = ["caption_model", "rnn_size", "num_layers"]
            for checkme in need_be_same:
                assert vars(saved_model_opt)[checkme] == vars(
                    opt
                )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

    iteration = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    lr_history = infos.get('lr_history', {})

    loader.iterators = infos.get('iterators', loader.iterators)
    if opt.load_best_score == 1:
        best_val_score = infos.get('best_val_score', None)

    model_cnn = models.setup_cnn(opt)
    model_cnn.cuda()

    model = models.setup(opt)
    model.cuda()

    model_cnn.train()
    model.train()

    fc_expander = utils.FeatExpander(5)
    att_expander = utils.FeatExpander(5)

    crit = Criterion.LanguageModelCriterion()
    crit_reinforce = Criterion.SCSTCriterion()

    optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate)
    optimizer_cnn = optim.Adam(model_cnn.parameters(),
                               lr=opt.cnn_learning_rate,
                               weight_decay=opt.cnn_weight_decay)

    # Load the optimizer
    if opt.start_from is not None and len(opt.start_from) > 0:
        optimizer.load_state_dict(
            torch.load(os.path.join(opt.start_from, 'optimizer.pth')))
        optimizer_cnn.load_state_dict(
            torch.load(os.path.join(opt.start_from, 'optimizer_cnn.pth')))

    finetune_cnn_start = False
    use_reinforce = False
    update_lr_flag = True

    while True:

        if update_lr_flag:
            # Assign the learning rate
            if opt.learning_rate_decay_start >= 0 and epoch >= opt.learning_rate_decay_start:
                frac = (epoch - opt.learning_rate_decay_start
                        ) // opt.learning_rate_decay_every
                decay_factor = opt.learning_rate_decay_rate**frac
                opt.current_lr = opt.learning_rate * decay_factor
                utils.set_lr(optimizer, opt.current_lr)  # set the decayed rate
            else:
                opt.current_lr = opt.learning_rate

            if opt.finetune_cnn_after >= 0 and epoch >= opt.finetune_cnn_after:
                for p in model_cnn.parameters():
                    p.requires_grad = True
                model_cnn.train()
                finetune_cnn_start = True
            else:
                for p in model_cnn.parameters():
                    p.requires_grad = False
                model_cnn.eval()
                finetune_cnn_start = False

            if opt.cnn_learning_rate_decay_start >= 0 and epoch >= opt.cnn_learning_rate_decay_start:
                frac = (epoch - opt.cnn_learning_rate_decay_start
                        ) // opt.cnn_learning_rate_decay_every
                decay_factor = opt.cnn_learning_rate_decay_rate**frac
                opt.current_cnn_lr = opt.cnn_learning_rate * decay_factor
                utils.set_lr(optimizer_cnn,
                             opt.current_cnn_lr)  # set the decayed rate
            else:
                opt.current_cnn_lr = opt.cnn_learning_rate

            update_lr_flag = False

        start_total = time.time()

        data = loader.get_batch('train')
        vocab = loader.get_vocab()

        images = torch.from_numpy(data['images']).cuda()
        images = utils.prepro(images, False)
        images = Variable(images, requires_grad=False)

        labels = torch.from_numpy(data['labels']).cuda()
        labels = Variable(labels, requires_grad=False)

        fc_feats, att_feats = model_cnn(images)

        fc_feats_ext = fc_expander(fc_feats)
        att_feats_ext = att_expander(att_feats)

        optimizer.zero_grad()
        if opt.finetune_cnn_after >= 0 and epoch >= opt.finetune_cnn_after:
            optimizer_cnn.zero_grad()

        reward = 0
        reward1 = 0
        if opt.reinforce_start >= 0 and epoch >= opt.reinforce_start:
            use_reinforce = True
            output1, seq1 = model(fc_feats_ext, att_feats_ext, labels, "test")
            output, seq = model(fc_feats_ext, att_feats_ext, labels, "train")
            loss, reward, reward1 = crit_reinforce(output, output1, seq, seq1,
                                                   labels, vocab)
            loss.backward()
        else:
            use_reinforce = False
            output, _ = model(fc_feats_ext, att_feats_ext, labels, "xent")
            loss = crit(output, labels)
            loss.backward()

        utils.clip_gradient(optimizer, opt.grad_clip)
        optimizer.step()
        if opt.finetune_cnn_after >= 0 and epoch >= opt.finetune_cnn_after:
            utils.clip_gradient(optimizer_cnn, opt.grad_clip)
            optimizer_cnn.step()

        train_loss = loss.data[0]

        print("iter {} (epoch {}), train_loss = {:.3f}, lr = {} lr_cnn = {} finetune_cnn = {} use_reinforce = {} reward = {} reward1 = {} time/batch = {:.3f}" \
              .format(iteration, epoch, train_loss, opt.current_lr, opt.current_cnn_lr, finetune_cnn_start, use_reinforce, reward, reward1, time.time() - start_total))

        # Update the iteration and epoch
        iteration += 1
        if data['bounds']['wrapped']:
            epoch += 1
            update_lr_flag = True

        # Write the training loss summary
        if (iteration % opt.losses_log_every == 0):
            loss_history[iteration] = train_loss
            lr_history[iteration] = opt.current_lr

        # make evaluation on validation set, and save model
        if (iteration % opt.save_checkpoint_every == 0):
            # eval model
            eval_kwargs = {
                'split': 'val',
                'dataset': opt.input_json,
                'caption_model': 'SCST'
            }
            eval_kwargs.update(vars(opt))
            val_loss, predictions, lang_stats, str_stats = eval_utils.eval_split(
                model_cnn, model, crit, loader, eval_kwargs)

            if not os.path.exists(opt.eval_result_path):
                os.makedirs(opt.eval_result_path)

            eval_result_file = os.path.join(opt.eval_result_path,
                                            opt.id + ".csv")
            with open(eval_result_file, 'a') as f:
                f.write(str_stats + "\n")

            predictions_file = os.path.join(opt.eval_result_path,
                                            opt.id + ".json")
            with open(predictions_file, 'w') as f:
                json.dump(predictions, f)

            val_result_history[iteration] = {
                'loss': val_loss,
                'lang_stats': lang_stats,
                'predictions': predictions
            }

            # Save model if is improving on validation result
            if opt.language_eval == 1:
                current_score = lang_stats['CIDEr']
            else:
                current_score = -val_loss

            best_flag = False
            if True:  # if true
                if best_val_score is None or current_score > best_val_score:
                    best_val_score = current_score
                    best_flag = True
                if not os.path.exists(opt.checkpoint_path):
                    os.makedirs(opt.checkpoint_path)

                checkpoint_path = os.path.join(opt.checkpoint_path,
                                               'model.pth')
                torch.save(model.state_dict(), checkpoint_path)
                print("model saved to {}".format(checkpoint_path))

                checkpoint_path_cnn = os.path.join(opt.checkpoint_path,
                                                   'model_cnn.pth')
                torch.save(model_cnn.state_dict(), checkpoint_path_cnn)
                print("model cnn saved to {}".format(checkpoint_path_cnn))

                optimizer_path = os.path.join(opt.checkpoint_path,
                                              'optimizer.pth')
                torch.save(optimizer.state_dict(), optimizer_path)
                print("optimizer saved to {}".format(optimizer_path))

                optimizer_path_cnn = os.path.join(opt.checkpoint_path,
                                                  'optimizer_cnn.pth')
                torch.save(optimizer_cnn.state_dict(), optimizer_path_cnn)
                print("optimizer cnn saved to {}".format(optimizer_path_cnn))

                infos['iter'] = iteration
                infos['epoch'] = epoch
                infos['iterators'] = loader.iterators
                infos['best_val_score'] = best_val_score
                infos['opt'] = opt
                infos['val_result_history'] = val_result_history
                infos['loss_history'] = loss_history
                infos['lr_history'] = lr_history
                infos['vocab'] = loader.get_vocab()

                info_path = os.path.join(opt.checkpoint_path,
                                         'infos_' + opt.id + '.pkl')
                with open(info_path, 'wb') as f:
                    cPickle.dump(infos, f)

                if best_flag:
                    checkpoint_path = os.path.join(opt.checkpoint_path,
                                                   'model_best.pth')
                    torch.save(model.state_dict(), checkpoint_path)
                    print("model saved to {}".format(checkpoint_path))

                    checkpoint_path_cnn = os.path.join(opt.checkpoint_path,
                                                       'model_cnn_best.pth')
                    torch.save(model_cnn.state_dict(), checkpoint_path_cnn)
                    print("model cnn saved to {}".format(checkpoint_path_cnn))

                    info_path = os.path.join(opt.checkpoint_path,
                                             'infos_' + opt.id + '_best.pkl')
                    with open(info_path, 'wb') as f:
                        cPickle.dump(infos, f)

        if epoch >= opt.max_epochs and opt.max_epochs != -1:
            break
Ejemplo n.º 5
0
def main():

    opt = parse_args()

    # make dirs
    print(opt.eval_result_path)
    if not os.path.isdir(opt.eval_result_path):
        os.makedirs(opt.eval_result_path)

    # Load infos
    infos = load_infos(opt)

    ignore = [
        "id", "input_json", "input_h5", "input_anno", "images_root",
        "coco_caption_path", "batch_size", "beam_size", "start_from_best",
        "eval_result_path"
    ]
    for k in vars(infos['opt']).keys():
        if k not in ignore:
            if k in vars(opt):
                assert vars(opt)[k] == vars(
                    infos['opt'])[k], k + ' option not consistent'
            else:
                vars(opt).update({k: vars(infos['opt'])[k]
                                  })  # copy over options from model

    # print(opt)

    # Setup the model
    model_cnn = models.setup_cnn(opt)
    model_cnn.cuda()

    model = models.setup(opt)
    model.cuda()

    # Make sure in the evaluation mode
    model_cnn.eval()
    model.eval()

    if models.has_bu(opt.caption_model) or \
            models.has_sub_regions(opt.caption_model) or \
            models.has_sub_region_bu(opt.caption_model):
        loader = DataLoaderThreadBu(opt)
        print("DataLoaderThreadBu")
    else:
        loader = DataLoaderThreadNew(opt)
        print("DataLoaderThreadNew")

    loader.ix_to_word = infos['vocab']

    eval_kwargs = {'split': opt.val_split, 'dataset': opt.input_json}
    eval_kwargs.update(vars(opt))

    start_beam = 0
    total_beam = 20
    for beam in range(start_beam, total_beam):
        opt.beam_size = beam + 1
        eval_kwargs.update(vars(opt))
        print("beam_size: " + str(opt.beam_size))
        print("start eval ...")
        crit = None
        val_loss, predictions, lang_stats, str_stats = eval_utils.eval_split(
            model_cnn, model, crit, loader, eval_kwargs)
        print("end eval ...")
        msg = "str_stats = {}".format(str_stats)
        print(msg)
        save_result(str(opt.beam_size) + "," + str_stats, predictions, opt)
def main():

    opt = parse_args()

    opt.datasets = opt.datasets.split(',')
    opt.ids = opt.ids.split(',')

    # make dirs
    print(opt.output_dir)
    if not os.path.isdir(opt.output_dir):
        os.makedirs(opt.output_dir)

    print(opt.output_beam_dir)
    if not os.path.isdir(opt.output_beam_dir):
        os.makedirs(opt.output_beam_dir)

    # print(opt)

    all_model_cnns = []
    all_models = []

    for i in range(len(opt.ids)):

        # id
        opt.id = opt.ids[i]

        # Load infos
        infos = load_infos(opt)

        ignore = ["id", "batch_size", "beam_size", "start_from_best", "input_json",
                  "input_h5", "input_anno", "images_root", "aic_caption_path", "input_bu"]

        for k in vars(infos['opt']).keys():
            if k not in ignore:
                vars(opt).update({k: vars(infos['opt'])[k]})

        opt.relu_type = 0

        # Setup the model
        model_cnn = models.setup_cnn(opt)
        # model_cnn.cuda()
        model_cnn = nn.DataParallel(model_cnn.cuda())

        model = models.setup(opt)
        model.cuda()

        # Make sure in the evaluation mode
        model_cnn.eval()
        model.eval()

        all_model_cnns.append(model_cnn)
        all_models.append(model)

    if opt.eval_type == 0: # local test

        print('eval local')

        if models.has_bu(opt.caption_model):
            loader = DataLoaderThreadBu(opt)
        else:
            loader = DataLoaderThreadNew(opt)

        # Set sample options
        predictions, lang_stats, str_stats, beam_vis = eval_split(all_model_cnns, all_models, loader, opt, vars(opt))

        save_result(opt.output_dir, str_stats, predictions)

        save_beam_vis_result(opt.output_beam_dir, "eval_beam_vis.json", beam_vis)


    elif opt.eval_type == 1: # server

        print('eval server')

        for dataset in opt.datasets:

            print(os.path.join(opt.image_folder, dataset))

            loader = DataLoaderRaw({'folder_path': os.path.join(opt.image_folder, dataset),
                                    'batch_size': opt.batch_size,
                                    'start': opt.start,
                                    'num': opt.num,
                                    'use_bu_att': opt.use_bu_att,
                                    'input_bu': opt.input_bu,
                                    'bu_size': opt.bu_size,
                                    'bu_feat_size': opt.bu_feat_size})

            loader.ix_to_word = infos['vocab']

            # Set sample options
            predictions, lang_stats, str_stats, beam_vis = eval_split(all_model_cnns, all_models, loader, opt, vars(opt))

            path_json = opt.output_dir + '/captions_' + dataset + str(opt.start) + '_ensemble_results.json'

            json.dump(predictions, open(path_json, 'w'))

            save_beam_vis_result(opt.output_beam_dir, dataset + str(opt.start) + "_beam_size_" + str(opt.beam_size) + "_beam_type_" + str(opt.beam_type) + "_eval_beam_vis.json", beam_vis)
Ejemplo n.º 7
0
def train(opt):

    notifier = notify()
    notifier.login()

    # init path
    if not os.path.exists(opt.eval_result_path):
        os.makedirs(opt.eval_result_path)

    config_file = os.path.join(opt.eval_result_path, opt.id + '_config.txt')
    with open(config_file, 'w') as f:
        f.write("{}\n".format(json.dumps(vars(opt), sort_keys=True, indent=2)))

    torch.backends.cudnn.benchmark = True

    if opt.use_tensorboard:

        if opt.tensorboard_type == 0:
            board = tensorboard.TensorBoard()
            board.start(opt.id, opt.tensorboard_ip, opt.tensorboard_port)
        else:
            board = trans_client.TransClient()
            board.start(opt.id)

    print(opt.cnn_model)

    loader = get_loader()

    opt.vocab_size = loader.vocab_size
    opt.seq_length = loader.seq_length
    vocab = loader.get_vocab()
    opt.vocab = vocab
    batch_size = loader.batch_size

    infos = get_infos()
    infos['vocab'] = vocab

    iteration = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    lr_history = infos.get('lr_history', {})
    finetune_cnn_history = infos.get('finetune_cnn_history', {})

    loader.iterators = infos.get('iterators', loader.iterators)
    if opt.load_best_score == 1:
        best_val_score = infos.get('best_val_score', None)
    else:
        best_val_score = None

    model_cnn = models.setup_cnn(opt)
    model_cnn = model_cnn.cuda()
    model_cnn = nn.DataParallel(model_cnn)

    model = models.setup(opt)
    model = model.cuda()
    # if models.is_transformer(opt.caption_model) or models.is_ctransformer(opt.caption_model):
    #     model = nn.DataParallel(model)

    train_utils.save_model_conf(model_cnn, model, opt)

    update_lr_flag = True

    model_cnn.train()
    model.train()

    fc_expander, att_expander, bu_expander = get_expander()

    optimizer = None
    optimizer_cnn = None
    finetune_cnn_start = False

    early_stop_cnt = 0

    params = {}
    params['model'] = model
    params['vocab'] = vocab

    # crit_pg, crit_rl, crit_ctc, crit_c, crit_ac, crit
    params['crit_pg'] = None
    params['crit_rl'] = None
    params['crit_ctc'] = None
    params['crit_c'] = None
    params['crit_ac'] = None
    params['crit'] = None

    is_eval_start = opt.is_eval_start

    if opt.use_auto_learning_rate == 1:
        train_process = train_utils.init_train_process()
        train_process_index = infos.get('train_process_index', 0)
        train_step = train_process[train_process_index]
        optimizer_cnn = None
        optimizer = None
        opt.learning_rate = train_step.learning_rate
        opt.cnn_learning_rate = train_step.cnn_learning_rate
        opt.finetune_cnn_after = train_step.finetune_cnn_after

    while True:

        current_score = None

        # make evaluation on validation set, and save model
        if (iteration > 0 and iteration % opt.save_checkpoint_every == 0 and
                not val_result_history.has_key(iteration)) or is_eval_start:

            predictions, best_val_score, best_flag, current_score = eval_model(
                model_cnn, model, params, loader, board, iteration, notifier,
                val_result_history, best_val_score)

            infos['best_val_score'] = best_val_score
            infos['val_result_history'] = val_result_history
            train_utils.save_infos(infos, opt)

            if best_flag:
                train_utils.save_best_result(predictions, opt)
                train_utils.save_model_best(model, model_cnn, infos, opt)
                early_stop_cnt = 0
            else:
                early_stop_cnt += 1

            is_eval_start = False

        if epoch >= opt.max_epochs and opt.max_epochs != -1:
            msg = "max epoch"
            logger.info(msg)
            break

        # auto update model
        if opt.use_auto_learning_rate == 1 and current_score is not None:
            if early_stop_cnt > opt.auto_early_stop_cnt or current_score < opt.auto_early_stop_score:
                early_stop_cnt = 0
                train_process_index += 1
                msg = opt.id + " early stop " + str(train_process_index)
                logger.info(msg)

                infos['train_process_index'] = train_process_index
                train_utils.save_infos(infos, opt)

                if train_process_index >= len(train_process):
                    notifier.send(opt.id + " early stop", msg)
                    logger.info("break")
                    break

                train_step = train_process[train_process_index]
                optimizer_cnn = None
                optimizer = None
                opt.learning_rate = train_step.learning_rate
                opt.cnn_learning_rate = train_step.cnn_learning_rate
                opt.finetune_cnn_after = train_step.finetune_cnn_after
                opt.start_from_best = opt.auto_start_from_best

                # model_cnn_path = os.path.join(opt.auto_start_from_best, opt.id + '_model_cnn_best.pth')
                # model_cnn.load_state_dict(torch.load(model_cnn_path))
                # model_cnn = model_cnn.cuda()
                # model_cnn = nn.DataParallel(model_cnn)
                #
                # model_path = os.path.join(opt.auto_start_from_best, opt.id + '_model_best.pth')
                # model.load_state_dict(torch.load(model_path))
                # model = model.cuda()

                del model_cnn
                del model

                torch.cuda.empty_cache()

                model_cnn = models.setup_cnn(opt)
                model_cnn = model_cnn.cuda()
                model_cnn = nn.DataParallel(model_cnn)

                model = models.setup(opt)
                model = model.cuda()

                model_cnn.train()
                model.train()

                update_lr_flag = True

        # start train

        # Update the iteration and epoch
        iteration += 1

        if update_lr_flag:
            if opt.finetune_cnn_after >= 0 and epoch >= opt.finetune_cnn_after:
                finetune_cnn_start = True
            else:
                finetune_cnn_start = False

            optimizer_cnn = train_utils.get_cnn_optimizer(
                model_cnn, optimizer_cnn, finetune_cnn_start, opt)

            train_utils.update_lr(epoch, optimizer, optimizer_cnn,
                                  finetune_cnn_start, opt)

            update_lr_flag = False

        if opt.reinforce_start >= 0 and epoch >= opt.reinforce_start:
            use_reinforce = True
        else:
            use_reinforce = False

        optimizer = get_optimizer(optimizer, epoch, model, model_cnn)

        start_total = time.time()
        start = time.time()

        optimizer.zero_grad()
        if finetune_cnn_start:
            optimizer_cnn.zero_grad()

        # batch data
        data = loader.get_batch('train', batch_size)

        images = data['images']
        bus = None
        if models.has_bu(opt.caption_model):
            bus = data['bus']

        if opt.verbose:
            print('data {:.3f}'.format(time.time() - start))

        start = time.time()

        fc_feats, att_feats, bu_feats = train_cnn(model_cnn, images, bus,
                                                  fc_expander, att_expander,
                                                  bu_expander, use_reinforce)

        if opt.verbose:
            print('model_cnn {:.3f}'.format(time.time() - start))

        # get input data
        params['fc_feats'] = fc_feats
        params['att_feats'] = att_feats
        params['bu_feats'] = bu_feats

        # get target data
        params['labels'] = data['labels']
        params['masks'] = data['masks']
        params['tokens'] = data['tokens']
        params['gts'] = data['gts']
        params['targets'] = data['targets']

        # crit_pg, crit_rl, crit_ctc, crit_c, crit_ac, crit,
        train_loss, reward_mean, use_reinforce = train_model(
            params, iteration, epoch, board)

        # update the gradient
        update_gradient(optimizer, optimizer_cnn, finetune_cnn_start)

        time_batch = time.time() - start_total
        left_time = (opt.save_checkpoint_every -
                     iteration % opt.save_checkpoint_every) * time_batch
        s_left_time = utils.format_time(left_time)
        msg = "id {} iter {} (epoch {}), train_loss = {:.3f}, lr = {} lr_cnn = {} f_cnn = {} rf = {} r = {:.3f} early_stop_cnt = {} time/batch = {:.3f}s time/eval = {}" \
            .format(opt.id, iteration, epoch, train_loss, opt.current_lr, opt.current_cnn_lr, finetune_cnn_start,
                    use_reinforce, reward_mean, early_stop_cnt, time_batch, s_left_time)
        logger.info(msg)

        if opt.use_tensorboard:
            if iteration % opt.tensorboard_for_train_every == 0:
                board.loss_train(train_loss, iteration)

        if data['bounds']['wrapped']:
            epoch += 1
            update_lr_flag = True

        # Write the training loss summary
        if iteration % opt.losses_log_every == 0:
            loss_history[iteration] = train_loss
            lr_history[iteration] = opt.current_lr
            finetune_cnn_history[iteration] = finetune_cnn_start

        # update infos
        infos['iter'] = iteration
        infos['epoch'] = epoch
        infos['iterators'] = loader.iterators
        infos['best_val_score'] = best_val_score
        infos['opt'] = opt
        infos['val_result_history'] = val_result_history
        infos['loss_history'] = loss_history
        infos['lr_history'] = lr_history
        infos['finetune_cnn_history'] = finetune_cnn_history
        if opt.use_auto_learning_rate == 1:
            infos['train_process_index'] = train_process_index

        if opt.save_snapshot_every > 0 and iteration % opt.save_snapshot_every == 0:
            train_utils.save_model(model, model_cnn, infos, opt)

    loader.terminate()
Ejemplo n.º 8
0
def train(opt):

    notifier = notify()
    notifier.login()

    # init path
    if not os.path.exists(opt.eval_result_path):
        os.makedirs(opt.eval_result_path)

    config_file = os.path.join(opt.eval_result_path, opt.id + '_config.txt')
    with open(config_file, 'w') as f:
        f.write("{}\n".format(json.dumps(vars(opt), sort_keys=True, indent=2)))

    torch.backends.cudnn.benchmark = True

    if opt.use_tensorboard:
        board = tensorboard.TensorBoard()
        board.start(opt.id, opt.tensorboard_ip)

        # board = trans_client.TransClient()
        # board.start(opt.id)

    print(opt.cnn_model)

    loader = DataLoaderThreadBu(opt)

    opt.vocab_size = loader.vocab_size
    opt.seq_length = loader.seq_length
    vocab = loader.get_vocab()
    batch_size = loader.batch_size

    try:
        if opt.is_load_infos == 1:
            infos = train_utils.load_infos(opt)
        else:
            infos = {}
    except:
        infos = {}
        print('load infos error')

    iteration = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    lr_history = infos.get('lr_history', {})

    loader.iterators = infos.get('iterators', loader.iterators)
    if opt.load_best_score == 1:
        best_val_score = infos.get('best_val_score', None)
    else:
        best_val_score = None

    model_cnn = models.setup_cnn(opt)
    model_cnn = nn.DataParallel(model_cnn.cuda())

    model = models.setup(opt)
    model.cuda()

    train_utils.save_model_conf(model_cnn, model, opt)

    update_lr_flag = True

    model_cnn.train()
    model.train()

    if opt.seq_per_img > 1:
        fc_expander = utils.FeatExpander(opt.seq_per_img)
        att_expander = utils.FeatExpander(opt.seq_per_img)
        bu_expander = utils.FeatExpander(opt.seq_per_img)

    # crit = Criterion.LanguageModelWeightNewCriterion()
    crit = Criterion.LanguageModelWithProbWeightCriterion(
        opt.prob_weight_alpha)

    crit_rl = None

    # print(model_cnn)

    optimizer = optim.Adam(model.parameters(),
                           lr=opt.learning_rate,
                           betas=(opt.optim_alpha, opt.optim_beta),
                           eps=opt.optim_epsilon)
    optimizer_cnn = None
    finetune_cnn_start = False

    early_stop_cnt = 0

    params = {}
    params['model'] = model
    params['crit'] = crit
    params['vocab'] = vocab

    while True:

        # try:
        if update_lr_flag:
            if opt.finetune_cnn_after >= 0 and epoch >= opt.finetune_cnn_after:
                finetune_cnn_start = True
            else:
                finetune_cnn_start = False

            optimizer_cnn = train_utils.finetune_cnn(model_cnn, optimizer_cnn,
                                                     finetune_cnn_start, opt)

            train_utils.update_lr(epoch, optimizer, optimizer_cnn,
                                  finetune_cnn_start, opt)

            update_lr_flag = False

        start_total = time.time()
        start = time.time()

        optimizer.zero_grad()
        if finetune_cnn_start:
            optimizer_cnn.zero_grad()

        # batch data
        data = loader.get_batch('train', batch_size)

        images = data['images']
        labels = data['labels']
        masks = data['masks']
        tokens = data['tokens']
        gts = data['gts']

        if opt.verbose:
            print('data {:.3f}'.format(time.time() - start))

        # train cnn
        fc_feats, att_feats, bu_feats = model_cnn(images)
        if opt.seq_per_img > 1:
            fc_feats = fc_expander(fc_feats)
            att_feats = att_expander(att_feats)
            bu_feats = bu_expander(bu_feats)

        params['fc_feats'] = fc_feats
        params['att_feats'] = att_feats
        params['bu_feats'] = bu_feats
        params['labels'] = labels
        params['masks'] = masks
        params['tokens'] = tokens
        params['gts'] = gts

        if opt.reinforce_start >= 0 and epoch >= opt.reinforce_start:
            use_reinforce = True

            if crit_rl is None:
                if opt.is_aic_data:
                    crit_rl = Criterion.RewardCriterionAIC(opt, vocab)
                else:
                    crit_rl = Criterion.RewardCriterion(opt)

            params['crit_rl'] = crit_rl

            train_loss, reward_mean, sample_mean, greedy_mean = train_utils.train_reinforce(
                params, opt)

            if opt.use_tensorboard:
                board.val("sample_mean", sample_mean, iteration)
                board.val("greedy_mean", greedy_mean, iteration)

        else:
            use_reinforce = False
            params['crit'] = crit

            train_loss, reward_mean = train_utils.train_with_prob_weight(
                params, opt)

        # update the gradient
        utils.clip_gradient(optimizer, opt.grad_clip)
        optimizer.step()
        if finetune_cnn_start:
            utils.clip_gradient(optimizer_cnn, opt.grad_clip)
            optimizer_cnn.step()

        msg = "iter {} (epoch {}), train_loss = {:.3f}, lr = {} lr_cnn = {} f_cnn = {} rf = {} r = {:.3f} time/batch = {:.3f}" \
            .format(iteration, epoch, train_loss, opt.current_lr, opt.current_cnn_lr, finetune_cnn_start,
                    use_reinforce, reward_mean, time.time() - start_total)
        logger.info(msg)

        if opt.use_tensorboard:
            board.loss_train(train_loss, iteration)

        # Update the iteration and epoch
        if not opt.is_eval_start:
            iteration += 1

        if data['bounds']['wrapped']:
            epoch += 1
            update_lr_flag = True

        # Write the training loss summary
        if (iteration % opt.losses_log_every == 0):
            loss_history[iteration] = train_loss
            lr_history[iteration] = opt.current_lr

        # make evaluation on validation set, and save model
        if (iteration % opt.save_checkpoint_every == 0):

            if opt.is_every_eval:

                # eval model
                eval_kwargs = {
                    'split': opt.val_split,
                    'dataset': opt.input_json
                }
                eval_kwargs.update(vars(opt))

                print("start eval ...")

                val_loss, predictions, lang_stats, str_stats = eval_utils.eval_split_with_region_bu(
                    model_cnn, model, crit, loader, eval_kwargs)

                if opt.use_tensorboard:
                    board.accuracy(lang_stats, iteration)
                    board.loss_val(val_loss, iteration)

                print("end eval ...")

                msg = "iteration = {} val_loss = {} str_stats = {}".format(
                    iteration, val_loss, str_stats)
                notifier.send(opt.id + " val result", opt.id + " :\n" + msg)
                logger.info(msg)

                train_utils.save_result(str_stats + ',' + str(val_loss),
                                        predictions, opt)

                val_result_history[iteration] = {
                    'loss': val_loss,
                    'lang_stats': lang_stats,
                    'predictions': predictions
                }

                # Save model if is improving on validation result
                if opt.language_eval == 1:
                    eval_metric = opt.eval_metric
                    current_score = lang_stats[eval_metric]
                else:
                    current_score = -val_loss

                best_flag = False
                if best_val_score is None or current_score > best_val_score:
                    best_val_score = current_score
                    best_flag = True

            else:
                best_flag = True

            infos['iter'] = iteration
            infos['epoch'] = epoch
            infos['iterators'] = loader.iterators
            infos['best_val_score'] = best_val_score
            infos['opt'] = opt
            infos['val_result_history'] = val_result_history
            infos['loss_history'] = loss_history
            infos['lr_history'] = lr_history
            infos['vocab'] = loader.get_vocab()

            train_utils.save_model(model, model_cnn, infos, opt)

            if best_flag:
                train_utils.save_model_best(model, model_cnn, infos, opt)
                early_stop_cnt = 0
            else:
                early_stop_cnt += 1

        if epoch >= opt.max_epochs and opt.max_epochs != -1:
            break

        if opt.is_eval_start:
            iteration += 1

    loader.terminate()