コード例 #1
0
ファイル: REGer.py プロジェクト: llxuan/ReferWhat
    def get_self_critical_reward(self, data_gts, gen_result):

        batch_size = len(gen_result)  # batch_size = sample_size * seq_per_img
        res = {}
        for i in range(batch_size):
            res[i] = [' '.join(gen_result[i])]
        batch_reward = 0
        gts = {}
        for i in range(batch_size):
            gts[i] = data_gts[i]

        if self.cfg.TRAIN.RL.Cider_Reward_Weight > 0:
            cider, cider_scores = Cider().compute_score(gts, res)
            cider_scores = self.cfg.TRAIN.RL.Cider_Reward_Weight * torch.FloatTensor(
                cider_scores)
            batch_reward = cider
        else:
            cider_scores = 0

        rewards = cider_scores.unsqueeze(1).repeat(1, self.bi_max)

        return batch_reward, rewards
コード例 #2
0
                model.load_state_dict(training_checkpoint[key])
        if _A.start_from_checkpoint.split("_")[-1][:-4] == 'best':
            start_iteration = 1
        else:
            start_iteration = int(
                _A.start_from_checkpoint.split("_")[-1][:-4]) + 1
    else:
        start_iteration = 1

    start_iteration = 1
    # Initialize the lr for start iteration
    lr_scheduler.step(start_iteration - 1)

    # Construct a cider evaluation object
    cider_train = Cider(
        PTBTokenizer.tokenize(
            train_dataset._captions_reader._ref_caps_full_sentences))

    # --------------------------------------------------------------------------------------------
    #   TRAINING LOOP
    # --------------------------------------------------------------------------------------------
    model.eval()
    model._is_val = False
    running_reward = .0
    reward_counter = 1
    for iteration in tqdm(range(start_iteration, _C.OPTIM.NUM_ITERATIONS + 1)):
        # keys: {"image_id", "image_features", "caption_tokens"}
        batch = next(train_dataloader)
        batch_size = batch["image_features"].shape[0]

        optimizer.zero_grad()
コード例 #3
0
    if not os.path.isfile('vocab_%s.pkl' % args.exp_name):
        print("Building vocabulary")
        text_field.build_vocab(train_dataset, val_dataset, min_freq=5)
        pickle.dump(text_field.vocab, open('vocab_%s.pkl' % args.exp_name, 'wb'))
    else:
        text_field.vocab = pickle.load(open('vocab_%s.pkl' % args.exp_name, 'rb'))

    # Model and dataloaders
    encoder = MemoryAugmentedEncoder(3, 0, attention_module=ScaledDotProductAttentionMemory,
                                     attention_module_kwargs={'m': args.m})
    decoder = MeshedDecoder(len(text_field.vocab), 54, 3, text_field.vocab.stoi['<pad>'])
    model = Transformer(text_field.vocab.stoi['<bos>'], encoder, decoder).to(device)

    dict_dataset_train = train_dataset.image_dictionary({'image': image_field, 'text': RawField()})
    ref_caps_train = list(train_dataset.text)
    cider_train = Cider(PTBTokenizer.tokenize(ref_caps_train))
    dict_dataset_val = val_dataset.image_dictionary({'image': image_field, 'text': RawField()})
    dict_dataset_test = test_dataset.image_dictionary({'image': image_field, 'text': RawField()})


    def lambda_lr(s):
        warm_up = args.warmup
        s += 1
        return (model.d_model ** -.5) * min(s ** -.5, s * warm_up ** -1.5)


    # Initial conditions
    optim = Adam(model.parameters(), lr=1, betas=(0.9, 0.98))
    scheduler = LambdaLR(optim, lambda_lr)
    loss_fn = NLLLoss(ignore_index=text_field.vocab.stoi['<pad>'])
    use_rl = False
コード例 #4
0
ファイル: train.py プロジェクト: e-bug/syncap
    encoder = MemoryAugmentedEncoder(
        3,
        0,
        attention_module=ScaledDotProductAttentionMemory,
        attention_module_kwargs={'m': args.m})
    decoder = MeshedDecoder(len(text_field.vocab), 108, 3,
                            text_field.vocab.stoi['<pad>'])
    model = Transformer(text_field.vocab.stoi['<start>'], encoder,
                        decoder).to(device)

    dict_dataset_train = train_dataset.image_dictionary({
        'image': image_field,
        'text': RawField()
    })
    ref_caps_train = list(train_dataset.text)
    cider_train = Cider(my_tokenize(ref_caps_train))
    dict_dataset_val = val_dataset.image_dictionary({
        'image': image_field,
        'text': RawField()
    })
    dict_dataset_test = test_dataset.image_dictionary({
        'image': image_field,
        'text': RawField()
    })

    def lambda_lr(s):
        warm_up = args.warmup
        s += 1
        return (model.d_model**-.5) * min(s**-.5, s * warm_up**-1.5)

    # Initial conditions
コード例 #5
0
    # Pipeline for text
    text_field = TextField(init_token='<bos>',
                           eos_token='<eos>',
                           lower=True,
                           tokenize='spacy',
                           remove_punctuation=True,
                           nopoints=False)

    # Create the dataset
    dataset = COCO(image_field, text_field, 'coco/images/',
                   args.annotation_folder, args.annotation_folder)
    _, _, test_dataset = dataset.splits
    text_field.vocab = pickle.load(open('vocab.pkl', 'rb'))

    ref_caps_test = list(test_dataset.text)
    cider_test = Cider(PTBTokenizer.tokenize(ref_caps_test))

    # Model and dataloaders
    Transformer, TransformerEncoder, TransformerDecoderLayer, ScaledDotProductAttention = model_factory(
        args)
    encoder = TransformerEncoder(3,
                                 0,
                                 attention_module=ScaledDotProductAttention,
                                 d_in=args.dim_feats,
                                 d_k=args.d_k,
                                 d_v=args.d_v,
                                 h=args.head)
    decoder = TransformerDecoderLayer(len(text_field.vocab),
                                      54,
                                      3,
                                      text_field.vocab.stoi['<pad>'],