def model_init(trial): """ Initializes models for hyperparameter searches. """ model_class = LxmertForQuestionAnswering model = LxmertForQuestionAnswering.from_pretrained(pretrained) return model
def __init__(self, device='cuda:0'): self.device = device # load models and model components frcnn_cfg = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned") self.frcnn = GeneralizedRCNN.from_pretrained("unc-nlp/frcnn-vg-finetuned", config=frcnn_cfg).to(device) self.image_preprocess = Preprocess(frcnn_cfg) self.lxmert_tokenizer = LxmertTokenizer.from_pretrained("unc-nlp/lxmert-base-uncased") self.lxmert_gqa = LxmertForQuestionAnswering.from_pretrained("unc-nlp/lxmert-gqa-uncased").to(device)
def __init__(self): self.config = Config.from_pretrained("unc-nlp/frcnn-vg-finetuned") self.cnn = GeneralizedRCNN.from_pretrained( "unc-nlp/frcnn-vg-finetuned", config=self.config ) self.image_preprocess = Preprocess(self.config) self.tokenizer = LxmertTokenizer.from_pretrained("unc-nlp/lxmert-base-uncased") self.vqa = LxmertForQuestionAnswering.from_pretrained( "unc-nlp/lxmert-vqa-uncased" )
def build_model_with_pretrain_weights(): logger.info("start loading %s" % (MINIVAL_QA_PATH)) val_img_ids, val_ques_ids, val_ques_inputs, val_labels, val_quesid2data = get_QA([ MINIVAL_QA_PATH]) val_generator = DataGenerator(val_img_ids, val_ques_ids, val_ques_inputs, val_labels, [VAL_IMGFEAT_PATH], BATCH_SIZE, False) logger.info("successfully build val generator") model = TFLxmertForQuestionAnswering() tf_weights = model.get_weights() lxmert_pytorch = LxmertForQuestionAnswering.from_pretrained( "unc-nlp/lxmert-vqa-uncased") pt_weights = [] for param, weight in zip(lxmert_pytorch.parameters(), tf_weights): if(param.data.shape == weight.shape): pt_weights.append(param.data) else: pt_weights.append(param.data.T) model.set_weights(pt_weights) # Run a validation loop at the end of each epoch. quesid2ans = {} for ques_ids, x_batch_val, y_batch_val in val_generator: val_logits = val_step(x_batch_val, y_batch_val, model) # batch_size * num_class # batch_size * 1 label = tf.argmax(val_logits, axis=1) for qid, l in zip(ques_ids, label): ans = LABEL2ANS[l] quesid2ans[qid.item()] = ans logger.info("\nVal accuracy: %0.2f\n" % (evaluate(quesid2ans, val_quesid2data) * 100.)) return
dtype=dtype) item[key] = item[key].reshape(shape) item[key].setflags(write=False) # Normalize the boxes (to 0 ~ 1) item["normalized_boxes"] = item["boxes"].copy() img_h, img_w = item["img_h"], item["img_w"] item["normalized_boxes"][:, (0, 2)] /= img_w item["normalized_boxes"][:, (1, 3)] /= img_h item["normalized_boxes"].setflags(write=False) imgid2img[item["img_id"]] = item lxmert_tokenizer = LxmertTokenizer.from_pretrained( "unc-nlp/lxmert-base-uncased") lxmert_gqa = LxmertForQuestionAnswering.from_pretrained( "unc-nlp/lxmert-gqa-uncased").to("cuda") GQA_URL = "https://raw.githubusercontent.com/airsplay/lxmert/master/data/gqa/trainval_label2ans.json" gqa_answers = utils.get_data(GQA_URL) accuracy = 0 for question_id, question_obj in tqdm(dataset.items()): inputs = lxmert_tokenizer(question_obj["question"], return_token_type_ids=True, return_attention_mask=True, add_special_tokens=True, return_tensors="pt").to("cuda") output_gqa = lxmert_gqa( input_ids=inputs.input_ids, attention_mask=inputs.attention_mask,