Ejemplo n.º 1
0
    def evaluate(self, device: str, strategy_name, pretrained):
        # init strategy
        if strategy_name == 'tpu':
            resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(resolver)
            tf.tpu.experimental.initialize_tpu_system(resolver)
            strategy = tf.distribute.experimental.TPUStrategy(resolver)
            print('TPU strategy selected.')
        else:
            strategy = tf.distribute.OneDeviceStrategy(device=device)
            print('GPU strategy selected.')

        dl = DataLoader(self.record_paths)
        self.test_ds = dl.get_dataset(
            'test', batch_size=self.config['train_batch_size'])

        # infer steps for fitting and data structure
        test_steps = get_dataset_size(self.test_ds)
        data_structure = get_dataset_structure(self.test_ds)

        with strategy.scope():

            model = get_model(input_shape=data_structure, name="VGG16_tf")
            model.load_weights(
                os.path.join(MODEL_PATH, pretrained, 'checkpoint'))
            optimizer = Nadam()
            model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                          optimizer=optimizer,
                          metrics=[
                              tf.keras.metrics.CategoricalAccuracy(),
                              tf.keras.metrics.Recall(),
                              tf.keras.metrics.Precision()
                          ])
            model.summary()
            model.evaluate(self.test_ds, steps=test_steps)
Ejemplo n.º 2
0
def main():
    manager = Manager.init()

    models = [["model", MobileNetV2(**manager.args.model)]]

    manager.init_model(models)
    args = manager.args
    criterion = Criterion()
    optimizer, scheduler = Optimizer(models, args.optim).init()

    args.cuda = args.cuda and torch.cuda.is_available()
    if args.cuda:
        for item in models:
            item[1].cuda()
        criterion.cuda()

    dataloader = DataLoader(args.dataloader, args.cuda)

    summary = manager.init_summary()
    trainer = Trainer(models, criterion, optimizer, scheduler, dataloader,
                      summary, args.cuda)

    for epoch in range(args.runtime.start_epoch,
                       args.runtime.num_epochs + args.runtime.start_epoch):
        try:
            print("epoch {}...".format(epoch))
            trainer.train(epoch)
            manager.save_checkpoint(models, epoch)

            if (epoch + 1) % args.runtime.test_every == 0:
                trainer.validate()
        except KeyboardInterrupt:
            print("Training had been Interrupted\n")
            break
    trainer.test()
Ejemplo n.º 3
0
    def train(self, device: str, strategy_name='tpu'):

        # init strategy
        if strategy_name == 'tpu':
            resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
            tf.config.experimental_connect_to_cluster(resolver)
            tf.tpu.experimental.initialize_tpu_system(resolver)
            strategy = tf.distribute.experimental.TPUStrategy(resolver)
            print('TPU strategy selected.')
        else:
            strategy = tf.distribute.OneDeviceStrategy(device=device)
            print('GPU strategy selected.')

        # decode records to tensorflow datasets
        dl = DataLoader(self.record_paths)
        self.train_ds = dl.get_dataset(
            'train', batch_size=self.config['train_batch_size'])
        self.validation_ds = dl.get_dataset(
            'validation', batch_size=self.config['validation_batch_size'])

        # infer steps for fitting and data structure
        train_steps = get_dataset_size(self.train_ds)
        validation_steps = get_dataset_size(self.validation_ds)
        data_structure = get_dataset_structure(self.train_ds)

        callbacks = self.get_checkpoints()
        with strategy.scope():

            model = get_model(input_shape=data_structure, name=config['model'])
            optimizer = Nadam()
            model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                          optimizer=optimizer,
                          metrics=[
                              tf.keras.metrics.CategoricalAccuracy(),
                              tf.keras.metrics.Recall(),
                              tf.keras.metrics.Precision()
                          ])
            model.summary()
            model.fit(self.train_ds.repeat(),
                      epochs=self.config['epochs'],
                      steps_per_epoch=train_steps,
                      validation_data=self.validation_ds.repeat(),
                      validation_steps=validation_steps,
                      callbacks=callbacks)
Ejemplo n.º 4
0
def evaluate(params):
    # load mode info
    model_prefix = osp.join('outputV2', params['dataset_splitBy'], params['id'], 'mrcn_cmr_with_st')
    infos = json.load(open(model_prefix + '.json'))
    model_opt = infos['opt']
    model_path = model_prefix + '.pth'
    model = load_model(model_path, model_opt)

    # set up loader
    data_json = osp.join('cache/prepro', params['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', params['dataset_splitBy'], 'data.h5')
    sub_obj_wds = osp.join('cache/sub_obj_wds', model_opt['dataset_splitBy'], 'sub_obj.json')
    similarity = osp.join('cache/similarity', model_opt['dataset_splitBy'], 'similarity.json')
    loader = DataLoader(data_h5=data_h5, data_json=data_json, sub_obj_wds=sub_obj_wds, similarity=similarity, opt=model_opt)

    # loader's feats
    feats_dir = '%s_%s_%s' % (model_opt['net_name'], model_opt['imdb_name'], model_opt['tag'])
    args.imdb_name = model_opt['imdb_name']
    args.net_name = model_opt['net_name']
    args.tag = model_opt['tag']
    args.iters = model_opt['iters']
    loader.prepare_mrcn(head_feats_dir=osp.join('cache/feats/', model_opt['dataset_splitBy'], 'mrcn', feats_dir),
                        args=args)
    ann_feats = osp.join('cache/feats', model_opt['dataset_splitBy'], 'mrcn',
                         '%s_%s_%s_ann_feats.h5' % (model_opt['net_name'], model_opt['imdb_name'], model_opt['tag']))
    # load ann features
    loader.loadFeats({'ann': ann_feats})

    # check model_info and params
    assert model_opt['dataset'] == params['dataset']
    assert model_opt['splitBy'] == params['splitBy']

    # evaluate on the split,
    split = params['split']
    model_opt['num_sents'] = params['num_sents']
    model_opt['verbose'] = params['verbose']

    val_loss, acc, predictions = eval_utils.eval_split(loader, model, split, model_opt)

    print('Comprehension on %s\'s %s (%s sents) is %.2f%%' % \
          (params['dataset_splitBy'], params['split'], len(predictions), acc * 100.))

    # save
    out_dir = osp.join('resultsV2', params['dataset_splitBy'], 'easy')
    if not osp.isdir(out_dir):
        os.makedirs(out_dir)
    out_file = osp.join(out_dir, params['id'] + '_' + params['split'] + '.json')
    with open(out_file, 'w') as of:
        json.dump({'predictions': predictions, 'acc': acc}, of)

    # write to results.txt
    f = open('experiments/easy_results.txt', 'a')
    f.write('[%s]: [%s][%s], id[%s]\'s acc is %.2f%%\n' % \
            (params['id'], params['dataset_splitBy'], params['split'], params['id'], acc * 100.0))
Ejemplo n.º 5
0
def load_data(cfg):
    train_data = DataLoader(cfg, split='train')
    test_data = DataLoader(cfg, split='test')

    num_train = len(train_data)
    num_test = len(test_data)

    train_data = tf.data.Dataset.from_generator(train_data,
                                                (tf.float32, tf.int32))
    test_data = tf.data.Dataset.from_generator(test_data,
                                               (tf.float32, tf.int32))

    train_data = train_data.batch(batch_size=cfg['SOLVER']['BATCH_SIZE'],
                                  drop_remainder=False)
    test_data = test_data.batch(batch_size=cfg['SOLVER']['BATCH_SIZE'],
                                drop_remainder=False)

    return train_data, test_data
Ejemplo n.º 6
0
def evaluate(params):
    # load mode info
    #model_prefix = osp.join('outputV2', params['dataset_splitBy'], params['id'], 'mrcn_cmr_with_st')
    model_prefix = osp.join('output', params['dataset_splitBy'], params['id'],
                            'mrcn_cmr_with_st')
    infos = json.load(open(model_prefix + '.json'))
    model_opt = infos['opt']
    model_path = model_prefix + '.pth'
    model = load_model(model_path, actor_state_size, action_size)

    Re_model_prefix = osp.join('output', params['dataset_splitBy'],
                               params['id'], 'mrcn_cmr_with_st_Re')
    Re_model_path = Re_model_prefix + '.pth'
    Re_model = load_model_Re(Re_model_path, Re_state_size, action_size)

    normalization = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    # set up loader
    data_json = osp.join('cache/prepro', params['dataset_splitBy'],
                         'data.json')
    data_h5 = osp.join('cache/prepro', params['dataset_splitBy'], 'data.h5')
    loader = DataLoader(data_h5=data_h5,
                        data_json=data_json,
                        opt=model_opt,
                        normalization=normalization)

    # loader's feats
    feats_dir = '%s_%s_%s' % (model_opt['net_name'], model_opt['imdb_name'],
                              model_opt['tag'])
    args.imdb_name = model_opt['imdb_name']
    args.net_name = model_opt['net_name']
    args.tag = model_opt['tag']
    args.iters = model_opt['iters']
    loader.prepare_mrcn(head_feats_dir=osp.join('cache/feats/',
                                                model_opt['dataset_splitBy'],
                                                'mrcn', feats_dir),
                        args=args)
    ann_feats = osp.join(
        'cache/feats', model_opt['dataset_splitBy'], 'mrcn',
        '%s_%s_%s_ann_feats.h5' %
        (model_opt['net_name'], model_opt['imdb_name'], model_opt['tag']))
    # load ann features
    loader.loadFeats({'ann': ann_feats})

    # check model_info and params
    assert model_opt['dataset'] == params['dataset']
    assert model_opt['splitBy'] == params['splitBy']

    # evaluate on the split,
    split = params['split']
    model_opt['num_sents'] = params['num_sents']
    model_opt['verbose'] = params['verbose']

    acc = eval_utils.eval_split_final(loader, model, split, model_opt,
                                      normalization, Re_model)
    #acc = eval_utils.eval_split(loader, model, split, model_opt, normalization)

    print('Comprehension on %s\'s %s is %.2f%%' % \
          (params['dataset_splitBy'], params['split'],  acc * 100.))

    # save
    #out_dir = osp.join('resultsV2', params['dataset_splitBy'], 'easy')
    out_dir = osp.join('results', params['dataset_splitBy'], 'easy')
    if not osp.isdir(out_dir):
        os.makedirs(out_dir)
    out_file = osp.join(out_dir,
                        params['id'] + '_' + params['split'] + '.json')
    with open(out_file, 'w') as of:
        json.dump({'acc': acc}, of)

    # write to results.txt
    f = open('experiments/easy_results.txt', 'a')
    f.write('[%s]: [%s][%s], id[%s]\'s acc is %.2f%%\n' % \
            (params['id'], params['dataset_splitBy'], params['split'], params['id'], acc * 100.0))
Ejemplo n.º 7
0
def main(args):
    opt = vars(args)
    # initialize
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
    checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'],
                              opt['exp_id'])
    if not osp.isdir(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    opt['actor_learning_rate'] = SL_actor_learning_rate
    opt['save_every'] = SL_save_every
    opt['learning_rate_decay_start'] = SL_learning_rate_decay_start
    opt['learning_rate_decay_every'] = SL_learning_rate_decay_every
    opt['max_iters'] = SL_max_iters
    opt['action_size'] = action_size
    opt['actor_state_size'] = actor_state_size
    opt['history_actions_length'] = history_actions_length
    opt['batch_size'] = SL_batch_size
    opt['COCO_path'] = COCO_path

    # set random seed
    #torch.manual_seed(opt['seed'])
    #random.seed(opt['seed'])

    normalization = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    if SL_first_train == True:
        actor = Actor(opt['actor_state_size'], opt['action_size'])
    else:
        model_prefix = osp.join('output', opt['dataset_splitBy'],
                                opt['exp_id'], 'mrcn_cmr_with_st')
        infos = json.load(open(model_prefix + '.json'))
        model_opt = infos['opt']
        model_path = model_prefix + '.pth'
        actor = load_model(model_path, actor_state_size, action_size)

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
    loader = DataLoader(data_h5=data_h5,
                        data_json=data_json,
                        opt=opt,
                        normalization=normalization)

    CE_loss = nn.CrossEntropyLoss(reduce=False)
    Softmax_loss = torch.nn.Softmax(dim=1)
    BCE_loss = nn.BCELoss()

    sig = torch.nn.Sigmoid()

    infos = {}
    if opt['start_from'] is not None:
        pass

    iter = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_accuracies = infos.get('val_accuracies', [])
    val_loss_history = infos.get('val_loss_history', {})
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    if opt['load_best_score'] == 1:
        best_val_score = infos.get('best_val_score', None)
    if opt['gpuid'] >= 0:
        actor.cuda()

    actor_lr = opt['actor_learning_rate']

    # set up optimizer
    actor_optimizer = torch.optim.Adam(actor.parameters(),
                                       lr=actor_lr,
                                       betas=(opt['optim_alpha'],
                                              opt['optim_beta']),
                                       eps=opt['optim_epsilon'],
                                       weight_decay=opt['weight_decay'])

    data_time, model_time = 0, 0
    start_time = time.time()

    f = open("./result", "w")
    f.close()

    f = open("./SL_loss_log", "w")
    f.close()

    while True:

        #torch.cuda.empty_cache()

        actor.train()
        actor_optimizer.zero_grad()

        T = {}

        tic = time.time()
        data = loader.getSLBatch('train', opt)

        T['data'] = time.time() - tic
        tic = time.time()

        batch_ref_img_tensor = data['batch_ref_img_tensor']
        batch_sent_feat = data['batch_sent_feat']
        batch_triad_feat = data['batch_triad_feat']
        batch_action_tensor = data['batch_action_tensor']
        batch_location_tensor = data['batch_location_tensor']
        batch_history_actions_tensor = data['batch_history_actions_tensor']

        # pass into the actor model
        actions_tensor, actions_cat = actor(batch_ref_img_tensor,
                                            batch_triad_feat,
                                            batch_location_tensor,
                                            batch_history_actions_tensor)
        actions_tensor = sig(actions_tensor)

        #actor_loss = CE_loss(actions_tensor, batch_action_tensor)
        actor_loss = BCE_loss(actions_tensor, batch_action_tensor)
        actor_loss_sum = torch.sum(actor_loss)
        actor_loss_sum.backward()

        model_utils.clip_gradient(actor_optimizer, opt['grad_clip'])
        actor_optimizer.step()

        T['model'] = time.time() - tic
        wrapped = data['bounds']['wrapped']

        data_time += T['data']
        model_time += T['model']

        total_time = (time.time() - start_time) / 3600
        total_time = round(total_time, 2)

        if iter % opt['losses_log_every'] == 0:
            loss_history[iter] = (actor_loss.data[0]).item()
            #print('i[%s], e[%s], sub_loss=%.1f, obj_loss=%.1f, rel_loss=%.1f, lr=%.2E, time=%.3f h' % (iter, epoch, sub_loss.data[0].item(), obj_loss.data[0].item(), rel_loss.data[0].item(), lr, total_time))
            print(
                'i[%s], e[%s], loss=%.3f, actor_lr=%.2E, time=%.3f h' %
                (iter, epoch, actor_loss.data[0].item(), actor_lr, total_time))
            data_time, model_time = 0, 0

            f = open("./SL_loss_log", "a")
            f.write(
                str('i[%s], e[%s], loss=%.3f, actor_lr=%.2E, time=%.3f h' %
                    (iter, epoch, actor_loss.data[0].item(), actor_lr,
                     total_time)) + "\n")
            f.close()

        if opt['learning_rate_decay_start'] > 0 and iter > opt[
                'learning_rate_decay_start']:
            frac = (iter - opt['learning_rate_decay_start']
                    ) / opt['learning_rate_decay_every']
            decay_factor = 0.1**frac
            actor_lr = opt['actor_learning_rate'] * decay_factor
            model_utils.set_lr(actor_optimizer, actor_lr)

        if (iter % opt['save_every']
                == 0) and (iter > 0) or iter == opt['max_iters']:
            #if (iter % opt['eval_every'] == 0) or iter == opt['max_iters']:

            acc = eval_utils.eval_split(loader, actor, 'testA', opt,
                                        normalization)
            val_accuracies += [(iter, acc)]
            current_score = acc

            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                checkpoint_path = osp.join(checkpoint_dir, opt['id'] + '.pth')
                checkpoint = {}
                checkpoint['model'] = actor
                checkpoint['opt'] = opt
                torch.save(checkpoint, checkpoint_path)
                print('model saved to %s' % checkpoint_path)

            # write json report
            infos['iter'] = iter
            infos['epoch'] = epoch
            infos['iterators'] = loader.iterators
            infos['loss_history'] = loss_history
            infos['val_accuracies'] = val_accuracies
            infos['val_loss_history'] = val_loss_history
            infos['best_val_score'] = best_val_score

            infos['opt'] = opt
            infos['val_result_history'] = val_result_history

            #with open(osp.join(checkpoint_dir, opt['id'] + '.json'), 'w', encoding="utf8") as io:
            # json.dump(infos, io)
            with open(osp.join(checkpoint_dir, opt['id'] + '.json'),
                      'w') as io:
                json.dump(infos, io)

        iter += 1
        if wrapped:
            epoch += 1
        if iter >= opt['max_iters'] and opt['max_iters'] > 0:
            print(str(best_val_score))
            break
Ejemplo n.º 8
0
def main(args):
    opt = vars(args)
    # initialize
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
    checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'],
                              opt['exp_id'])
    if not osp.isdir(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # set random seed
    torch.manual_seed(opt['seed'])
    random.seed(opt['seed'])

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
    loader = DataLoader(data_h5=data_h5, data_json=data_json)

    # prepare feats
    feats_dir = '%s_%s_%s' % (args.net_name, args.imdb_name, args.tag)
    head_feats_dir = osp.join('cache/feats/', opt['dataset_splitBy'], 'mrcn',
                              feats_dir)

    loader.prepare_mrcn(head_feats_dir, args)

    ann_feats = osp.join(
        'cache/feats', opt['dataset_splitBy'], 'mrcn',
        '%s_%s_%s_ann_feats.h5' %
        (opt['net_name'], opt['imdb_name'], opt['tag']))
    loader.loadFeats({'ann': ann_feats})

    # set up model
    opt['vocab_size'] = loader.vocab_size
    opt['fc7_dim'] = loader.fc7_dim
    opt['pool5_dim'] = loader.pool5_dim
    opt['num_atts'] = loader.num_atts
    model = AdaptiveReconstruct(opt)

    infos = {}
    if opt['start_from'] is not None:
        pass
    iter = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_accuracies = infos.get('val_accuracies', [])
    val_loss_history = infos.get('val_loss_history', {})
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    if opt['load_best_score'] == 1:
        best_val_score = infos.get('best_val_score', None)

    att_weights = loader.get_attribute_weights()

    if opt['gpuid'] >= 0:
        model.cuda()

    # set up optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt['learning_rate'],
                                 betas=(opt['optim_alpha'], opt['optim_beta']),
                                 eps=opt['optim_epsilon'])

    data_time, model_time = 0, 0
    lr = opt['learning_rate']
    best_prediction, best_overall = None, None
    while True:
        model.train()
        optimizer.zero_grad()

        T = {}

        tic = time.time()
        data = loader.getBatch('train', opt)

        labels = data['labels']
        enc_labels = data['enc_labels']
        dec_labels = data['dec_labels']
        Feats = data['Feats']
        att_labels, select_ixs = data['att_labels'], data['select_ixs']

        T['data'] = time.time() - tic

        tic = time.time()
        scores, loss, _, _, _, _, _, vis_res_loss, att_res_loss, lang_res_loss = model(
            Feats['pool5'], Feats['fc7'], Feats['lfeats'], Feats['dif_lfeats'],
            Feats['cxt_fc7'], Feats['cxt_lfeats'], labels, enc_labels,
            dec_labels, att_labels, select_ixs, att_weights)

        loss.backward()
        model_utils.clip_gradient(optimizer, opt['grad_clip'])
        optimizer.step()
        T['model'] = time.time() - tic
        wrapped = data['bounds']['wrapped']

        data_time += T['data']
        model_time += T['model']

        if iter % opt['losses_log_every'] == 0:
            loss_history[iter] = (loss.data[0]).item()
            print('iter[%s](epoch[%s]), train_loss=%.3f, lr=%.2E, data:%.2fs/iter, model:%.2fs/iter' \
                  % (iter, epoch, loss.data[0].item(), lr, data_time / opt['losses_log_every'], model_time/opt['losses_log_every']))
            data_time, model_time = 0, 0

        if opt['learning_rate_decay_start'] > 0 and iter > opt[
                'learning_rate_decay_start']:
            frac = (iter - opt['learning_rate_decay_start']
                    ) / opt['learning_rate_decay_every']
            decay_factor = 0.1**frac
            lr = opt['learning_rate'] * decay_factor
            model_utils.set_lr(optimizer, lr)

        if (iter
            ) % opt['save_checkpoint_every'] == 0 or iter == opt['max_iters']:
            val_loss, acc, predictions, val_vis_res_loss, val_lang_res_loss = eval.eval_split(
                loader, model, 'testB', opt)
            val_loss_history[iter] = val_loss
            val_result_history[iter] = {'loss': val_loss, 'accuracy': acc}
            val_accuracies += [(iter, acc)]
            print('validation loss: %.2f' % val_loss)
            print('validation acc : %.2f%%\n' % (acc * 100.0))

            current_score = acc
            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                best_predictions = predictions
                checkpoint_path = osp.join(checkpoint_dir, opt['id'] + '.pth')
                checkpoint = {}
                checkpoint['model'] = model
                checkpoint['opt'] = opt
                torch.save(checkpoint, checkpoint_path)
                print('model saved to %s' % checkpoint_path)

            infos['iter'] = iter
            infos['epoch'] = epoch
            infos['iterators'] = loader.iterators
            infos['loss_history'] = loss_history
            infos['val_accuracies'] = val_accuracies
            infos['val_loss_history'] = val_loss_history
            infos['best_val_score'] = best_val_score
            infos[
                'best_predictions'] = predictions if best_predictions is None else best_predictions

            infos['opt'] = opt
            infos['val_result_history'] = val_result_history
            infos['word_to_ix'] = loader.word_to_ix
            infos['att_to_ix'] = loader.att_to_ix
            with open(osp.join(checkpoint_dir, opt['id'] + '.json'),
                      'w',
                      encoding="utf8") as io:
                json.dump(infos, io)

        iter += 1
        if wrapped:
            epoch += 1
        if iter >= opt['max_iters'] and opt['max_iters'] > 0:
            break
Ejemplo n.º 9
0
def main(args):
    opt = vars(args)
    # initialize
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
    checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'],
                              opt['exp_id'])
    if not osp.isdir(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    opt['actor_learning_rate'] = PG_actor_learning_rate
    opt['save_every'] = PG_save_every
    opt['learning_rate_decay_start'] = PG_learning_rate_decay_start
    opt['learning_rate_decay_every'] = PG_learning_rate_decay_every
    opt['max_iters'] = PG_max_iters
    opt['action_size'] = action_size
    opt['actor_state_size'] = actor_state_size
    opt['history_actions_length'] = history_actions_length
    opt['COCO_path'] = COCO_path

    # set random seed
    torch.manual_seed(opt['seed'])
    random.seed(opt['seed'])

    normalization = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    if PG_first_train == True:
        actor = Actor(opt['actor_state_size'], opt['action_size'])
    else:
        model_prefix = osp.join('output', opt['dataset_splitBy'],
                                opt['exp_id'], 'mrcn_cmr_with_st')
        infos = json.load(open(model_prefix + '.json'))
        model_opt = infos['opt']
        model_path = model_prefix + '.pth'
        actor = load_model(model_path, actor_state_size, action_size)

    critic = Critic(opt['actor_state_size'])

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
    loader = DataLoader(data_h5=data_h5,
                        data_json=data_json,
                        opt=opt,
                        normalization=normalization)

    CE_loss = nn.CrossEntropyLoss(reduce=False)
    Softmax_loss = torch.nn.Softmax(dim=1)

    infos = {}
    if opt['start_from'] is not None:
        pass

    iter = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_accuracies = infos.get('val_accuracies', [])
    val_loss_history = infos.get('val_loss_history', {})
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    if opt['load_best_score'] == 1:
        best_val_score = infos.get('best_val_score', None)
    if opt['gpuid'] >= 0:
        actor.cuda()
        critic.cuda()

    actor_lr = opt['actor_learning_rate']

    # set up optimizer
    actor_optimizer = torch.optim.Adam(actor.parameters(),
                                       lr=actor_lr,
                                       betas=(opt['optim_alpha'],
                                              opt['optim_beta']),
                                       eps=opt['optim_epsilon'],
                                       weight_decay=opt['weight_decay'])

    data_time, model_time = 0, 0
    start_time = time.time()

    f = open("./result", "w")
    f.close()

    f = open("./PG_loss_log", "w")
    f.close()

    ref_img_tensor_pool = []
    sent_feat_pool = []
    location_tensor_pool = []
    history_actions_tensor_pool = []
    action_pool = []
    reward_pool = []

    steps = 0

    #acc = eval_utils.eval_split(loader, actor, 'val', opt, normalization)

    for e in count():

        #torch.cuda.empty_cache()

        T = {}
        tic = time.time()
        data = loader.getPGBatch('testA', opt)
        T['data'] = time.time() - tic
        tic = time.time()

        history_acc = 0
        current_acc = 0

        img_id = data['img_id']
        img_path = data['img_path']
        img_W = data['img_W']
        img_H = data['img_H']
        sent_feat = data['sent_feat']
        triad_feat = data['triad_feat']
        triad_raw = data['triad_raw']
        gd_box = data['gd_box']

        sent_feat = torch.Tensor(sent_feat).unsqueeze(0).cuda()
        triad_feat = torch.Tensor(triad_feat).unsqueeze(0).cuda()

        # import the image
        img = Image.open(img_path)
        img = img.convert("RGB")

        # the ground truth bounding box of the target
        gd_box_x0 = int(gd_box[0])
        gd_box_y0 = int(gd_box[1])
        gd_box_x1 = int(gd_box[2])
        gd_box_y1 = int(gd_box[3])
        gd_box_W = gd_box_x1 - gd_box_x0
        gd_box_H = gd_box_y1 - gd_box_y0
        gd_box_wh = [gd_box_x0, gd_box_y0, gd_box_W, gd_box_H]

        # current bounding box of the current search region
        t_box_x0 = 0
        t_box_y0 = 0
        t_box_x1 = img_W
        t_box_y1 = img_H
        t_box_w = t_box_x1 - t_box_x0
        t_box_h = t_box_y1 - t_box_y0

        # initialize the history_actions
        action = -1
        history_actions = []
        for a_i in range(opt['history_actions_length']):
            history_actions.append(-1)

        action_count = 0

        # run the epidemic
        for t in count():

            steps += 1

            t_box_wh = [
                int(t_box_x0),
                int(t_box_y0),
                int(t_box_x1 - t_box_x0),
                int(t_box_y1 - t_box_y0)
            ]
            history_acc = computeIoU(gd_box_wh, t_box_wh)

            # state = ref_img_tensor + sent_feat + location_tensor + history_actions_tensor

            # ref_img_tensor
            ref_img = img.crop((t_box_x0, t_box_y0, t_box_x1, t_box_y1))
            #ref_img.show()
            ref_img = [normalization(ref_img).cpu().numpy()]
            ref_img_tensor = torch.Tensor(ref_img).cuda()

            # location_tensor
            location_tensor = torch.FloatTensor(
                np.array([
                    float(t_box_x0) / float(img_W),
                    float(t_box_y0) / float(img_H),
                    float(t_box_x1) / float(img_W),
                    float(t_box_y1) / float(img_H),
                    (float(t_box_w) * float(t_box_h)) / (img_W * img_H)
                ]))
            location_tensor = location_tensor.view(1, -1).cuda()

            # history_actions_tensor
            history_actions_tensor = torch.FloatTensor(
                np.array(history_actions))
            history_actions_tensor = history_actions_tensor.view(1, -1).cuda()

            # predict the action
            #actions_tensor, actions_cat = actor(ref_img_tensor, sent_feat, location_tensor, history_actions_tensor)
            actions_tensor, actions_cat = actor(ref_img_tensor, triad_feat,
                                                location_tensor,
                                                history_actions_tensor)

            action = actions_cat.sample()
            action_value = int(action.cpu().numpy())
            if action_count > max_action_steps:
                action_value = 4

            # update the history action list (add current action)
            history_actions.pop(0)
            history_actions.append(action_value)

            # execture the action
            # action
            # 0 up
            # 1 down
            # 2 left
            # 3 down
            # 4 stop

            if action_value == 0:
                t_box_y0 = int(t_box_y0 + move_ratio * (t_box_y1 - t_box_y0))

            if action_value == 1:
                t_box_y1 = int(t_box_y1 - move_ratio * (t_box_y1 - t_box_y0))

            if action_value == 2:
                t_box_x0 = int(t_box_x0 + move_ratio * (t_box_x1 - t_box_x0))

            if action_value == 3:
                t_box_x1 = int(t_box_x1 - move_ratio * (t_box_x1 - t_box_x0))

            t_box_w = t_box_x1 - t_box_x0
            t_box_h = t_box_y1 - t_box_y0

            # generate the reward (reward function)
            t_box_wh = [
                int(t_box_x0),
                int(t_box_y0),
                int(t_box_x1 - t_box_x0),
                int(t_box_y1 - t_box_y0)
            ]
            IoU = computeIoU(gd_box_wh, t_box_wh)
            current_acc = IoU

            # discrete 1
            if IoU > 0.5:
                reward = 1
            else:
                reward = 0
            '''
            # discrete 2
            if IoU < 0.3:
                reward = 0
            elif IoU < 0.5:
                reward = 1
            else:
                reward = 10
            '''
            '''
            # continue
            if IoU < 0.5:
                reward = (IoU*IoU)*100
            else:
                reward = 100
            '''
            '''
            # difference
            if current_acc > 0.5:
                reward = 10
            elif current_acc > history_acc:
                reward = 1
            else:
                reward = 0
            '''

            # store related data for tranining
            ref_img_tensor_pool.append(ref_img_tensor)
            sent_feat_pool.append(sent_feat)
            location_tensor_pool.append(location_tensor)
            history_actions_tensor_pool.append(history_actions_tensor)
            action_pool.append(action)
            reward_pool.append(reward)

            # action to stop the episode
            if (action_value
                    == 4) or (IoU > accuracy_thre) or (t > max_action_steps):
                break

        # update the policy
        if e > 0 and e % PG_e_batch == 0:
            #if e >= 0:

            # Discount reward
            running_add = 0
            for i in reversed(range(steps)):
                if reward_pool[i] == 0:
                    running_add = 0
                else:
                    running_add = running_add * 0.99 + reward_pool[i]
                    reward_pool[i] = running_add

            # Normalize reward
            reward_mean = np.mean(reward_pool)
            #reward_std = np.std(reward_pool)
            for i in range(steps):
                #reward_pool[i] = (reward_pool[i] - reward_mean) / reward_std
                reward_pool[i] = reward_pool[i] - reward_mean

            # Gradient Desent
            actor.train()
            actor_optimizer.zero_grad()

            for i in range(steps):
                ref_img_tensor = ref_img_tensor_pool[i]
                sent_feat = sent_feat_pool[i]
                location_tensor = location_tensor_pool[i]
                history_actions_tensor = history_actions_tensor_pool[i]

                action = Variable(torch.FloatTensor([action_pool[i]])).cuda()
                reward = reward_pool[i]

                #actions_tensor, actions_cat = actor(ref_img_tensor, sent_feat, location_tensor, history_actions_tensor)
                actions_tensor, actions_cat = actor(ref_img_tensor, triad_feat,
                                                    location_tensor,
                                                    history_actions_tensor)

                log_prob = actions_cat.log_prob(action)

                loss = -log_prob * reward  # Negtive score function x reward
                loss.backward()

                actor_optimizer.step()

            ref_img_tensor_pool = []
            sent_feat_pool = []
            location_tensor_pool = []
            history_actions_tensor_pool = []
            action_pool = []
            reward_pool = []
            steps = 0

            T['model'] = time.time() - tic
            wrapped = data['bounds']['wrapped']

            data_time += T['data']
            model_time += T['model']

            total_time = (time.time() - start_time) / 3600
            total_time = round(total_time, 2)

            if e % opt['losses_log_every'] == 0:
                print('e[%s], loss=%.3f, actor_lr=%.2E, time=%.3f h' %
                      (e, loss.data[0].item(), actor_lr, total_time))
                data_time, model_time = 0, 0

                f = open("./PG_loss_log", "a")
                f.write(
                    str('e[%s], loss=%.3f, actor_lr=%.2E, time=%.3f h' %
                        (e, loss.data[0].item(), actor_lr, total_time)) + "\n")
                f.close()

            if opt['learning_rate_decay_start'] > 0 and e > opt[
                    'learning_rate_decay_start']:
                frac = (e - opt['learning_rate_decay_start']
                        ) / opt['learning_rate_decay_every']
                decay_factor = 0.1**frac
                actor_lr = opt['actor_learning_rate'] * decay_factor
                model_utils.set_lr(actor_optimizer, actor_lr)

            if (e % opt['save_every']
                    == 0) and (e > 0) or iter == opt['max_iters']:
                #if (e % opt['save_every'] == 0) or e == opt['max_iters']:

                acc = eval_utils.eval_split(loader, actor, 'testA', opt,
                                            normalization)
                val_accuracies += [(iter, acc)]
                print('validation acc : %.2f%%\n' % (acc * 100.0))

                current_score = acc

                f = open("./result", "a")
                f.write(str(current_score) + "\n")
                f.close()

                if best_val_score is None or current_score > best_val_score:
                    best_val_score = current_score
                    checkpoint_path = osp.join(checkpoint_dir,
                                               opt['id'] + '.pth')
                    checkpoint = {}
                    checkpoint['model'] = actor
                    checkpoint['opt'] = opt
                    torch.save(checkpoint, checkpoint_path)
                    print('model saved to %s' % checkpoint_path)

                # write json report
                infos['e'] = e
                infos['iterators'] = loader.iterators
                infos['loss_history'] = loss_history
                infos['val_accuracies'] = val_accuracies
                infos['val_loss_history'] = val_loss_history
                infos['best_val_score'] = best_val_score

                infos['opt'] = opt
                infos['val_result_history'] = val_result_history

                #with open(osp.join(checkpoint_dir, opt['id'] + '.json'), 'w', encoding="utf8") as io:
                # json.dump(infos, io)
                with open(osp.join(checkpoint_dir, opt['id'] + '.json'),
                          'w') as io:
                    json.dump(infos, io)

            iter += 1
            if wrapped:
                epoch += 1
            if iter >= opt['max_iters'] and opt['max_iters'] > 0:
                print(str(best_val_score))
                break
Ejemplo n.º 10
0
def main(args):
    opt = vars(args)
    # initialize
    opt['dataset_splitBy'] = opt['dataset'] + '_' + opt['splitBy']
    checkpoint_dir = osp.join(opt['checkpoint_path'], opt['dataset_splitBy'],
                              opt['exp_id'])
    if not osp.isdir(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    opt['actor_learning_rate'] = SL_actor_learning_rate
    opt['critic_learning_rate'] = critic_learning_rate
    opt['save_every'] = SL_save_every
    opt['learning_rate_decay_start'] = SL_learning_rate_decay_start
    opt['learning_rate_decay_every'] = SL_learning_rate_decay_every
    opt['max_iters'] = SL_max_iters
    opt['action_size'] = action_size
    opt['actor_state_size'] = actor_state_size
    opt['history_actions_length'] = history_actions_length
    opt['COCO_path'] = COCO_path

    # set random seed
    torch.manual_seed(opt['seed'])
    random.seed(opt['seed'])

    normalization = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    if SL_first_train == True:
        actor = Actor(opt['actor_state_size'], opt['action_size'])
    else:
        model_prefix = osp.join('output', opt['dataset_splitBy'],
                                opt['exp_id'], 'mrcn_cmr_with_st')
        infos = json.load(open(model_prefix + '.json'))
        model_opt = infos['opt']
        model_path = model_prefix + '.pth'
        actor = load_model(model_path, actor_state_size, action_size)

    if Critic_first_train == True:
        critic = Critic(opt['actor_state_size'])
    else:
        model_prefix = osp.join('output', opt['dataset_splitBy'],
                                opt['exp_id'], 'mrcn_cmr_with_st')
        infos = json.load(open(model_prefix + '.json'))
        model_opt = infos['opt']
        model_path = model_prefix + '_critic' + '.pth'
        critic = load_critic_model(model_path, actor_state_size)

    # set up loader
    data_json = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.json')
    data_h5 = osp.join('cache/prepro', opt['dataset_splitBy'], 'data.h5')
    loader = DataLoader(data_h5=data_h5,
                        data_json=data_json,
                        opt=opt,
                        normalization=normalization)

    CE_loss = nn.CrossEntropyLoss(reduce=False)
    Softmax_loss = torch.nn.Softmax(dim=1)

    infos = {}
    if opt['start_from'] is not None:
        pass

    iter = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_accuracies = infos.get('val_accuracies', [])
    val_loss_history = infos.get('val_loss_history', {})
    val_result_history = infos.get('val_result_history', {})
    loss_history = infos.get('loss_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    if opt['load_best_score'] == 1:
        best_val_score = infos.get('best_val_score', None)
    if opt['gpuid'] >= 0:
        actor.cuda()
        critic.cuda()

    actor_lr = opt['actor_learning_rate']
    critic_lr = opt['critic_learning_rate']

    # set up optimizer
    actor_optimizer = torch.optim.Adam(actor.parameters(),
                                       lr=actor_lr,
                                       betas=(opt['optim_alpha'],
                                              opt['optim_beta']),
                                       eps=opt['optim_epsilon'],
                                       weight_decay=opt['weight_decay'])

    critic_optimizer = torch.optim.Adam(critic.parameters(),
                                        lr=critic_lr,
                                        betas=(opt['optim_alpha'],
                                               opt['optim_beta']),
                                        eps=opt['optim_epsilon'],
                                        weight_decay=opt['weight_decay'])

    data_time, model_time = 0, 0
    start_time = time.time()

    f = open("./result", "w")
    f.close()

    f = open("./AC_loss_log", "w")
    f.close()

    #acc = eval_utils.eval_split(loader, actor, 'val', opt, normalization)

    for e in count():

        #torch.cuda.empty_cache()

        T = {}
        tic = time.time()
        #data = loader.getPGBatch('testA', opt)
        data = loader.getPGBatch('val', opt)
        T['data'] = time.time() - tic
        tic = time.time()

        history_acc = 0
        current_acc = 0

        img_id = data['img_id']
        img_path = data['img_path']
        img_W = data['img_W']
        img_H = data['img_H']
        #sent_feat = data['sent_feat']
        triad_feat = data['triad_feat']
        triad_raw = data['triad_raw']
        gd_box = data['gd_box']

        triad_feat = torch.Tensor(triad_feat).unsqueeze(0).cuda()

        # import the image
        img = Image.open(img_path)
        img = img.convert("RGB")

        # the ground truth bounding box of the target
        gd_box_x0 = int(gd_box[0])
        gd_box_y0 = int(gd_box[1])
        gd_box_x1 = int(gd_box[2])
        gd_box_y1 = int(gd_box[3])
        gd_box_W = gd_box_x1 - gd_box_x0
        gd_box_H = gd_box_y1 - gd_box_y0
        gd_box_wh = [gd_box_x0, gd_box_y0, gd_box_W, gd_box_H]

        # current bounding box of the current search region
        t_box_x0 = 0
        t_box_y0 = 0
        t_box_x1 = img_W
        t_box_y1 = img_H
        t_box_w = t_box_x1 - t_box_x0
        t_box_h = t_box_y1 - t_box_y0

        # initialize the history_actions
        action = -1
        history_actions = []
        for a_i in range(opt['history_actions_length']):
            history_actions.append(-1)

        action_count = 0

        AC_log_probs = []
        AC_values = []
        AC_rewards = []
        AC_masks = []
        AC_entropy = 0

        # run the epidemic
        for t in count():

            t_box_wh = [
                int(t_box_x0),
                int(t_box_y0),
                int(t_box_x1 - t_box_x0),
                int(t_box_y1 - t_box_y0)
            ]
            history_acc = computeIoU(gd_box_wh, t_box_wh)

            #########################################
            # create the state
            # state = ref_img_tensor + sent_feat + location_tensor + history_actions_tensor

            # ref_img_tensor
            ref_img = img.crop((t_box_x0, t_box_y0, t_box_x1, t_box_y1))
            #ref_img.show()
            ref_img = [normalization(ref_img).cpu().numpy()]
            ref_img_tensor = torch.Tensor(ref_img).cuda()

            # location_tensor
            location_tensor = torch.FloatTensor(
                np.array([
                    float(t_box_x0) / float(img_W),
                    float(t_box_y0) / float(img_H),
                    float(t_box_x1) / float(img_W),
                    float(t_box_y1) / float(img_H),
                    (float(t_box_w) * float(t_box_h)) / (img_W * img_H)
                ]))
            location_tensor = location_tensor.view(1, -1).cuda()

            # history_actions_tensor
            history_actions_tensor = torch.FloatTensor(
                np.array(history_actions))
            history_actions_tensor = history_actions_tensor.view(1, -1).cuda()

            #############################################
            # predict the action and value
            #actions_tensor, actions_cat = actor(ref_img_tensor, sent_feat, location_tensor, history_actions_tensor)
            actions_tensor, actions_cat = actor(ref_img_tensor, triad_feat,
                                                location_tensor,
                                                history_actions_tensor)
            value = critic(ref_img_tensor, triad_feat, location_tensor,
                           history_actions_tensor)

            action = actions_cat.sample()
            action_value = int(action.cpu().numpy())
            if action_count > max_action_steps:
                action_value = 4

            log_prob = actions_cat.log_prob(action)

            # update the history action list (add current action)
            history_actions.pop(0)
            history_actions.append(action_value)

            ############################################
            # execture the action, state=next_state
            # action
            # 0 up
            # 1 down
            # 2 left
            # 3 down
            # 4 stop

            if action_value == 0:
                t_box_y0 = int(t_box_y0 + move_ratio * (t_box_y1 - t_box_y0))

            if action_value == 1:
                t_box_y1 = int(t_box_y1 - move_ratio * (t_box_y1 - t_box_y0))

            if action_value == 2:
                t_box_x0 = int(t_box_x0 + move_ratio * (t_box_x1 - t_box_x0))

            if action_value == 3:
                t_box_x1 = int(t_box_x1 - move_ratio * (t_box_x1 - t_box_x0))

            t_box_w = t_box_x1 - t_box_x0
            t_box_h = t_box_y1 - t_box_y0

            ###################################################
            # generate the reward (reward function)
            t_box_wh = [
                int(t_box_x0),
                int(t_box_y0),
                int(t_box_x1 - t_box_x0),
                int(t_box_y1 - t_box_y0)
            ]
            IoU = computeIoU(gd_box_wh, t_box_wh)
            current_acc = IoU

            # generate done
            if (action_value
                    == 4) or (IoU > accuracy_thre) or (t > max_action_steps):
                done = 1
            else:
                done = 0
            '''
            # discrete 1
            if IoU > 0.5:
                reward = 1
            else:
                reward = 0
            '''
            '''
            # discrete 2
            if (IoU < 0.3) or (current_acc <= history_acc):
                reward = 0
            elif (IoU < 0.5) and (current_acc > history_acc):
                reward = 1
            elif (IoU >=0.5) and (current_acc > history_acc):
                reward = 10
            '''

            # continue

            if current_acc <= history_acc:
                reward = 0

            if current_acc > history_acc:
                if IoU < 0.5:
                    reward = (IoU * IoU) * 100
                else:
                    reward = 100
            '''
            # difference
            if (current_acc > 0.5) and (if current_acc > history_acc):
                reward = 10
            elif current_acc > history_acc:
                reward = 1
            else:
                reward = 0
            '''

            #####################################
            # record related data for tranining (AC)
            AC_log_probs.append(log_prob)
            AC_values.append(value)
            AC_rewards.append(torch.Tensor([reward]).cuda())
            AC_masks.append(torch.Tensor([1 - done]).cuda())

            # action to stop the episode
            if (action_value
                    == 4) or (IoU > accuracy_thre) or (t > max_action_steps):
                break

        #########################################
        # create the final next state
        # state = ref_img_tensor + sent_feat + location_tensor + history_actions_tensor

        # ref_img_tensor
        ref_img = img.crop((t_box_x0, t_box_y0, t_box_x1, t_box_y1))
        # ref_img.show()
        ref_img = [normalization(ref_img).cpu().numpy()]
        ref_img_tensor = torch.Tensor(ref_img).cuda()

        # location_tensor
        location_tensor = torch.FloatTensor(
            np.array([
                float(t_box_x0) / float(img_W),
                float(t_box_y0) / float(img_H),
                float(t_box_x1) / float(img_W),
                float(t_box_y1) / float(img_H),
                (float(t_box_w) * float(t_box_h)) / (img_W * img_H)
            ]))
        location_tensor = location_tensor.view(1, -1).cuda()

        # history_actions_tensor
        history_actions_tensor = torch.FloatTensor(np.array(history_actions))
        history_actions_tensor = history_actions_tensor.view(1, -1).cuda()

        #################################################
        # this iteration is finished here (done==True, game is over, etc.)
        # next_value is used as the estimated return value of the next state of the states(-1), to calculate the return of states(-1)
        next_value = critic(ref_img_tensor, triad_feat, location_tensor,
                            history_actions_tensor)
        AC_returns = compute_returns(next_value, AC_rewards, AC_masks)

        # [n, 1] all of these 4 variables below
        log_probs = torch.cat(AC_log_probs)
        returns = torch.cat(AC_returns).detach()
        AC_values = torch.cat(AC_values)
        advantage = returns - AC_values

        # single value
        actor_loss = -(log_probs * advantage.detach()).mean()
        # x.pow(2) = x^2
        critic_loss = advantage.pow(2).mean()

        actor_optimizer.zero_grad()
        critic_optimizer.zero_grad()
        actor_loss.backward(retain_graph=True)
        critic_loss.backward(retain_graph=True)
        actor_optimizer.step()
        critic_optimizer.step()
        torch.cuda.empty_cache()

        T['model'] = time.time() - tic
        wrapped = data['bounds']['wrapped']

        data_time += T['data']
        model_time += T['model']

        total_time = (time.time() - start_time) / 3600
        total_time = round(total_time, 2)

        if e % opt['losses_log_every'] == 0:
            print('e[%s], loss=%.3f, actor_lr=%.2E, time=%.3f h' %
                  (e, actor_loss.data[0].item(), actor_lr, total_time))
            data_time, model_time = 0, 0

            f = open("./PG_loss_log", "a")
            f.write(
                str('e[%s], loss=%.3f, actor_lr=%.2E, time=%.3f h' %
                    (e, actor_loss.data[0].item(), actor_lr, total_time)) +
                "\n")
            f.close()

        if opt['learning_rate_decay_start'] > 0 and e > opt[
                'learning_rate_decay_start']:
            frac = (e - opt['learning_rate_decay_start']
                    ) / opt['learning_rate_decay_every']
            decay_factor = 0.1**frac

            actor_lr = opt['actor_learning_rate'] * decay_factor
            model_utils.set_lr(actor_optimizer, actor_lr)

            critic_lr = opt['critic_learning_rate'] * decay_factor
            model_utils.set_lr(critic_optimizer, critic_lr)

        if (e % opt['save_every']
                == 0) and (e > 0) or iter == opt['max_iters']:
            #if (e % opt['save_every'] == 0) or e == opt['max_iters']:

            #acc = eval_utils.eval_split(loader, actor, 'testA', opt, normalization)
            acc = eval_utils.eval_split(loader, actor, 'val', opt,
                                        normalization)
            val_accuracies += [(iter, acc)]
            current_score = acc

            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score

                # save actor model
                checkpoint_path = osp.join(checkpoint_dir, opt['id'] + '.pth')
                checkpoint = {}
                checkpoint['model'] = actor
                checkpoint['opt'] = opt
                torch.save(checkpoint, checkpoint_path)
                print('actor model saved to %s' % checkpoint_path)

                # save critic model
                critic_checkpoint_path = osp.join(
                    checkpoint_dir, opt['id'] + '_critic' + '.pth')
                critic_checkpoint = {}
                critic_checkpoint['model'] = critic
                critic_checkpoint['opt'] = opt
                torch.save(critic_checkpoint, critic_checkpoint_path)
                print('actor model saved to %s' % critic_checkpoint_path)

            # write json report
            infos['e'] = e
            infos['iterators'] = loader.iterators
            infos['loss_history'] = loss_history
            infos['val_accuracies'] = val_accuracies
            infos['val_loss_history'] = val_loss_history
            infos['best_val_score'] = best_val_score

            infos['opt'] = opt
            infos['val_result_history'] = val_result_history

            with open(osp.join(checkpoint_dir, opt['id'] + '.json'),
                      'w') as io:
                json.dump(infos, io)

        iter += 1
        if wrapped:
            epoch += 1
        if iter >= opt['max_iters'] and opt['max_iters'] > 0:
            print(str(best_val_score))
            break