def create_and_check_electra_for_question_answering( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, fake_token_labels, ): model = ElectraForQuestionAnswering(config=config) model.to(torch_device) model.eval() result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, start_positions=sequence_labels, end_positions=sequence_labels, ) self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
def create_and_check_electra_for_question_answering( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, fake_token_labels, ): model = ElectraForQuestionAnswering(config=config) model.to(torch_device) model.eval() loss, start_logits, end_logits = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, start_positions=sequence_labels, end_positions=sequence_labels, ) result = { "loss": loss, "start_logits": start_logits, "end_logits": end_logits, } self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length]) self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length]) self.check_loss_output(result)
for conf in configurations: conf_id = '{}_config_{}_{}_{}'.format(j, *conf) print( 'Run [{}] --learning rate: {}, --hidden size: {}, --hidden layer: {}'. format(j, *conf)) # Initializing a ELECTRA electra-base-uncased style configuration electra_conf = ElectraConfig(hidden_size=conf[1], num_hidden_layers=conf[2]) # Initializing a model from the electra-base-uncased style configuration model = ElectraForQuestionAnswering(electra_conf) configuration = model.config model.to(device) model.train() train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) learning_rate = conf[0] optim = AdamW(model.parameters(), lr=learning_rate) loss_records = [] bi = 0 for epoch in tqdm(range(EPOCHS)): for batch in train_loader: if (bi > 3): break optim.zero_grad()