Exemple #1
0
def model_init(trial):
    """
        Initializes models for hyperparameter searches.
    """
    model_class = LxmertForQuestionAnswering
    model = LxmertForQuestionAnswering.from_pretrained(pretrained)
    return model
Exemple #2
0
    def __init__(self, device='cuda:0'):
        self.device = device
        # load models and model components
        frcnn_cfg = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")
        self.frcnn = GeneralizedRCNN.from_pretrained("unc-nlp/frcnn-vg-finetuned", config=frcnn_cfg).to(device)

        self.image_preprocess = Preprocess(frcnn_cfg)
        self.lxmert_tokenizer = LxmertTokenizer.from_pretrained("unc-nlp/lxmert-base-uncased")
        self.lxmert_gqa = LxmertForQuestionAnswering.from_pretrained("unc-nlp/lxmert-gqa-uncased").to(device)
Exemple #3
0
    def __init__(self):
        self.config = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned")

        self.cnn = GeneralizedRCNN.from_pretrained(
            "unc-nlp/frcnn-vg-finetuned", config=self.config
        )

        self.image_preprocess = Preprocess(self.config)

        self.tokenizer = LxmertTokenizer.from_pretrained("unc-nlp/lxmert-base-uncased")
        self.vqa = LxmertForQuestionAnswering.from_pretrained(
            "unc-nlp/lxmert-vqa-uncased"
        )
    def create_and_check_lxmert_for_question_answering(
        self,
        config,
        input_ids,
        visual_feats,
        bounding_boxes,
        token_type_ids,
        input_mask,
        obj_labels,
        masked_lm_labels,
        matched_label,
        ans,
        output_attentions,
    ):
        model = LxmertForQuestionAnswering(config=config)
        model.to(torch_device)
        model.eval()
        result = model(
            input_ids,
            visual_feats,
            bounding_boxes,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
            labels=ans,
            output_attentions=output_attentions,
            return_dict=True,
        )
        result = model(input_ids, visual_feats, bounding_boxes, labels=ans)
        result = model(
            input_ids,
            visual_feats,
            bounding_boxes,
            labels=ans,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
            output_attentions=output_attentions,
        )
        result = model(
            input_ids,
            visual_feats,
            bounding_boxes,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
            labels=ans,
            output_attentions=not output_attentions,
            return_dict=True,
        )

        self.parent.assertEqual(result.question_answering_score.shape,
                                (self.batch_size, self.num_qa_labels))
def build_model_with_pretrain_weights():
    logger.info("start loading %s" % (MINIVAL_QA_PATH))
    val_img_ids, val_ques_ids, val_ques_inputs, val_labels, val_quesid2data = get_QA([
                                                                                     MINIVAL_QA_PATH])
    val_generator = DataGenerator(val_img_ids,
                                  val_ques_ids,
                                  val_ques_inputs,
                                  val_labels,
                                  [VAL_IMGFEAT_PATH],
                                  BATCH_SIZE,
                                  False)
    logger.info("successfully build val generator")

    model = TFLxmertForQuestionAnswering()
    tf_weights = model.get_weights()

    lxmert_pytorch = LxmertForQuestionAnswering.from_pretrained(
        "unc-nlp/lxmert-vqa-uncased")

    pt_weights = []
    for param, weight in zip(lxmert_pytorch.parameters(), tf_weights):
        if(param.data.shape == weight.shape):
            pt_weights.append(param.data)
        else:
            pt_weights.append(param.data.T)

    model.set_weights(pt_weights)

    # Run a validation loop at the end of each epoch.
    quesid2ans = {}
    for ques_ids, x_batch_val, y_batch_val in val_generator:
        val_logits = val_step(x_batch_val, y_batch_val, model)
        # batch_size * num_class
        # batch_size * 1
        label = tf.argmax(val_logits, axis=1)
        for qid, l in zip(ques_ids, label):
            ans = LABEL2ANS[l]
            quesid2ans[qid.item()] = ans

    logger.info("\nVal accuracy: %0.2f\n" %
                (evaluate(quesid2ans, val_quesid2data) * 100.))
    return
    def resize_lxmert_num_qa_labels(
        self,
        config,
        input_ids,
        visual_feats,
        bounding_boxes,
        token_type_ids,
        input_mask,
        obj_labels,
        masked_lm_labels,
        matched_label,
        ans,
        output_attentions,
    ):

        start_labels = config.num_qa_labels
        num_large_labels = config.num_qa_labels * 2
        num_small_labels = int(config.num_qa_labels * 2)
        less_labels_ans = ids_tensor([self.batch_size], num_small_labels)
        more_labels_ans = ids_tensor([self.batch_size], num_large_labels)
        model_pretrain = LxmertForPreTraining(config=config).to(torch_device)
        model_qa = LxmertForQuestionAnswering(config=config).to(torch_device)
        config.num_labels = num_small_labels
        end_labels = config.num_labels

        result_pretrain = model_pretrain(
            input_ids,
            visual_feats,
            bounding_boxes,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
            ans=ans,
        )

        result_qa = model_qa(
            input_ids,
            visual_feats,
            bounding_boxes,
            labels=ans,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
        )

        model_pretrain.resize_num_qa_labels(num_small_labels)
        model_qa.resize_num_qa_labels(num_small_labels)

        result_pretrain_less = model_pretrain(
            input_ids,
            visual_feats,
            bounding_boxes,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
            ans=less_labels_ans,
        )

        result_qa_less = model_qa(
            input_ids,
            visual_feats,
            bounding_boxes,
            labels=less_labels_ans,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
        )

        model_pretrain.resize_num_qa_labels(num_large_labels)
        model_qa.resize_num_qa_labels(num_large_labels)

        result_pretrain_more = model_pretrain(
            input_ids,
            visual_feats,
            bounding_boxes,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
            ans=more_labels_ans,
        )

        result_qa_more = model_qa(
            input_ids,
            visual_feats,
            bounding_boxes,
            labels=more_labels_ans,
            token_type_ids=token_type_ids,
            attention_mask=input_mask,
        )

        model_qa_labels = model_qa.num_qa_labels

        self.parent.assertNotEqual(start_labels, end_labels)
        self.parent.assertNotEqual(model_qa_labels, start_labels)
        self.parent.assertEqual(result_qa.question_answering_score.shape,
                                (self.batch_size, start_labels))
        self.parent.assertEqual(result_pretrain.question_answering_score.shape,
                                (self.batch_size, start_labels))
        self.parent.assertEqual(result_qa_less.question_answering_score.shape,
                                (self.batch_size, num_small_labels))
        self.parent.assertEqual(
            result_pretrain_less.question_answering_score.shape,
            (self.batch_size, num_small_labels))
        self.parent.assertEqual(result_qa_more.question_answering_score.shape,
                                (self.batch_size, num_large_labels))
        self.parent.assertEqual(
            result_pretrain_more.question_answering_score.shape,
            (self.batch_size, num_large_labels))
Exemple #7
0
                                          dtype=dtype)
                item[key] = item[key].reshape(shape)
                item[key].setflags(write=False)

            # Normalize the boxes (to 0 ~ 1)
            item["normalized_boxes"] = item["boxes"].copy()
            img_h, img_w = item["img_h"], item["img_w"]
            item["normalized_boxes"][:, (0, 2)] /= img_w
            item["normalized_boxes"][:, (1, 3)] /= img_h
            item["normalized_boxes"].setflags(write=False)

            imgid2img[item["img_id"]] = item

lxmert_tokenizer = LxmertTokenizer.from_pretrained(
    "unc-nlp/lxmert-base-uncased")
lxmert_gqa = LxmertForQuestionAnswering.from_pretrained(
    "unc-nlp/lxmert-gqa-uncased").to("cuda")

GQA_URL = "https://raw.githubusercontent.com/airsplay/lxmert/master/data/gqa/trainval_label2ans.json"
gqa_answers = utils.get_data(GQA_URL)

accuracy = 0
for question_id, question_obj in tqdm(dataset.items()):
    inputs = lxmert_tokenizer(question_obj["question"],
                              return_token_type_ids=True,
                              return_attention_mask=True,
                              add_special_tokens=True,
                              return_tensors="pt").to("cuda")

    output_gqa = lxmert_gqa(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,