def __call__(self, batch): myid, text_input, humor_rating,humor_contr = map(list, zip(*batch)) input_lengths = torch.tensor( [len(s) for s in text_input], device=self.device) # attention mask max_length = max(input_lengths) inputs_pad_mask = pad_mask(input_lengths, max_length=max_length, device=self.device) # Pad inputs and targets padded_inputs = ( pad_sequence(text_input, batch_first=True, padding_value=self.pad_indx) .to(self.device)) humor_rating = mktensor(humor_rating, dtype=torch.float) humor_contr = mktensor(humor_contr, dtype=torch.long) return myid, padded_inputs,inputs_pad_mask, humor_rating, humor_contr
def __getitem__(self, index): text, humor = self.data[index] is_humor = self.label2idx(humor) if not self.transforms: text = self.tokenizer(text) text = mktensor(text['input_ids'], dtype=torch.long) else: for t in self.transforms: text = t(text) is_humor = int(is_humor) return text, is_humor
def __getitem__(self, index): if self.splitname == 'train': myid, text, is_humor, humor_rating, humor_controversy, offense_rating\ = \ self.data[index] else: myid, text = self.data[index] humor_rating = None humor_controversy = None if not self.transforms: text = self.tokenizer(text) text = mktensor(text['input_ids'], dtype=torch.long) else: for t in self.transforms: text = t(text) myid = int(myid) if self.splitname == 'train': humor_rating = float(humor_rating) humor_controversy = int(humor_controversy) return myid, text, humor_rating, humor_controversy