def prepare_config_and_inputs_for_decoder(self): ( config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ) = self.prepare_config_and_inputs() config.is_decoder = True encoder_hidden_states = floats_tensor( [self.batch_size, self.seq_length, self.hidden_size]) encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) return ( config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, )
def create_and_check_reader( self, config, input_ids, token_type_ids, input_mask, scorer_encoder_inputs, reader_inputs, sequence_labels, token_labels, choice_labels, ): model = RealmReader(config=config) model.to(torch_device) model.eval() relevance_score = floats_tensor([self.reader_beam_size]) result = model( reader_inputs[0], attention_mask=reader_inputs[1], token_type_ids=reader_inputs[2], relevance_score=relevance_score, ) self.parent.assertEqual(result.block_idx.shape, ()) self.parent.assertEqual(result.candidate.shape, ()) self.parent.assertEqual(result.start_pos.shape, ()) self.parent.assertEqual(result.end_pos.shape, ())
def prepare_config_and_inputs(self): input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size) attention_mask = random_attention_mask([self.batch_size, self.seq_length]) config = self.get_config() return config, input_values, attention_mask
def prepare_config_and_inputs(self): input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size) attention_mask = random_attention_mask( [self.batch_size, self.seq_length]) config = HubertConfig( hidden_size=self.hidden_size, feat_extract_norm=self.feat_extract_norm, feat_extract_dropout=self.feat_extract_dropout, feat_extract_activation=self.feat_extract_activation, conv_dim=self.conv_dim, conv_stride=self.conv_stride, conv_kernel=self.conv_kernel, conv_bias=self.conv_bias, num_conv_pos_embeddings=self.num_conv_pos_embeddings, num_conv_pos_embedding_groups=self.num_conv_pos_embedding_groups, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, hidden_dropout_prob=self.hidden_dropout_prob, intermediate_size=self.intermediate_size, layer_norm_eps=self.layer_norm_eps, hidden_act=self.hidden_act, initializer_range=self.initializer_range, vocab_size=self.vocab_size, ) return config, input_values, attention_mask
def create_and_check_encoder( self, config, input_ids, token_type_ids, input_mask, scorer_encoder_inputs, reader_inputs, sequence_labels, token_labels, choice_labels, ): model = RealmKnowledgeAugEncoder(config=config) model.to(torch_device) model.eval() relevance_score = floats_tensor([self.batch_size, self.num_candidates]) result = model( scorer_encoder_inputs[0], attention_mask=scorer_encoder_inputs[1], token_type_ids=scorer_encoder_inputs[2], relevance_score=relevance_score, labels=token_labels, ) self.parent.assertEqual(result.logits.shape, (self.batch_size * self.num_candidates, self.seq_length, self.vocab_size))
def prepare_config_and_inputs_for_common(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) visual_embeds = floats_tensor([self.batch_size, self.visual_seq_length, self.visual_embedding_dim]) attention_mask = None if self.use_attention_mask: attention_mask = torch.ones((self.batch_size, self.seq_length), dtype=torch.long, device=torch_device) visual_attention_mask = None if self.use_visual_attention_mask: visual_attention_mask = torch.ones( (self.batch_size, self.visual_seq_length), dtype=torch.long, device=torch_device ) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) visual_token_type_ids = None if self.use_visual_token_type_ids: visual_token_type_ids = ids_tensor([self.batch_size, self.visual_seq_length], self.type_vocab_size) config = self.prepare_config() return config, { "input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask, "visual_embeds": visual_embeds, "visual_token_type_ids": visual_token_type_ids, "visual_attention_mask": visual_attention_mask, }
def check_xvector_training(self, config, *args): config.ctc_zero_infinity = True model = UniSpeechSatForXVector(config=config) model.to(torch_device) model.train() # freeze everything but the classification head model.freeze_base_model() # use a longer sequence length to account for TDNN temporal downsampling input_values = floats_tensor([self.batch_size, self.seq_length * 2], self.vocab_size) input_lengths = [input_values.shape[-1] // i for i in [4, 2, 1]] labels = ids_tensor((input_values.shape[0], 1), len(model.config.id2label)) # pad input for i in range(len(input_lengths)): input_values[i, input_lengths[i]:] = 0.0 loss = model(input_values, labels=labels).loss self.parent.assertFalse(torch.isinf(loss).item()) loss.backward()
def prepare_config_and_inputs_for_flickr(self): region_to_phrase_position = torch.cat( ( ids_tensor([self.batch_size, self.seq_length], self.visual_seq_length), torch.ones(self.batch_size, self.visual_seq_length, dtype=torch.long, device=torch_device) * -1, ), dim=-1, ) flickr_labels = None if self.use_labels: flickr_labels = floats_tensor([ self.batch_size, self.seq_length + self.visual_seq_length, self.visual_seq_length ]) config, input_dict = self.prepare_config_and_inputs_for_common() input_dict.update({ "region_to_phrase_position": region_to_phrase_position, "labels": flickr_labels }) return config, input_dict
def test_training(self): if not self.model_tester.is_training: return config, *inputs = self.model_tester.prepare_config_and_inputs() input_ids, token_type_ids, input_mask, scorer_encoder_inputs = inputs[0:4] config.return_dict = True tokenizer = RealmTokenizer.from_pretrained("google/realm-orqa-nq-openqa") # RealmKnowledgeAugEncoder training model = RealmKnowledgeAugEncoder(config) model.to(torch_device) model.train() inputs_dict = { "input_ids": scorer_encoder_inputs[0].to(torch_device), "attention_mask": scorer_encoder_inputs[1].to(torch_device), "token_type_ids": scorer_encoder_inputs[2].to(torch_device), "relevance_score": floats_tensor([self.model_tester.batch_size, self.model_tester.num_candidates]), } inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) inputs = inputs_dict loss = model(**inputs).loss loss.backward() # RealmForOpenQA training openqa_config = copy.deepcopy(config) openqa_config.vocab_size = 30522 # the retrieved texts will inevitably have more than 99 vocabs. openqa_config.num_block_records = 5 openqa_config.searcher_beam_size = 2 block_records = np.array( [ b"This is the first record.", b"This is the second record.", b"This is the third record.", b"This is the fourth record.", b"This is the fifth record.", ], dtype=np.object, ) retriever = RealmRetriever(block_records, tokenizer) model = RealmForOpenQA(openqa_config, retriever) model.to(torch_device) model.train() inputs_dict = { "input_ids": input_ids[:1].to(torch_device), "attention_mask": input_mask[:1].to(torch_device), "token_type_ids": token_type_ids[:1].to(torch_device), "answer_ids": input_ids[:1].tolist(), } inputs = self._prepare_for_class(inputs_dict, RealmForOpenQA) loss = model(**inputs).reader_output.loss loss.backward()
def prepare_config_and_inputs_for_vqa(self): vqa_labels = None if self.use_labels: vqa_labels = floats_tensor([self.batch_size, self.num_labels]) config, input_dict = self.prepare_config_and_inputs_for_common() input_dict.update({"labels": vqa_labels}) return config, input_dict
def prepare_config_and_inputs(self): pixel_values = floats_tensor([ self.batch_size, self.num_channels, self.image_size, self.image_size ]) labels = None if self.use_labels: labels = ids_tensor([self.batch_size], self.type_sequence_label_size) config = self.get_config() return config, pixel_values, labels
def prepare_config_and_inputs(self): pixel_values = floats_tensor([self.batch_size, self.num_channels, self.min_size, self.max_size]).to( torch_device ) pixel_mask = torch.ones([self.batch_size, self.min_size, self.max_size], device=torch_device) mask_labels = ( torch.rand([self.batch_size, self.num_labels, self.min_size, self.max_size], device=torch_device) > 0.5 ).float() class_labels = (torch.rand((self.batch_size, self.num_labels), device=torch_device) > 0.5).long() config = self.get_config() return config, pixel_values, pixel_mask, mask_labels, class_labels