def gen_model_loaders(config): model, tokenizers = M.build_model(config) pad_sequence = PadSequence( tokenizers.src.pad_token_id, tokenizers.tgt.pad_token_id) train_loader = DataLoader(IndicDataset(tokenizers.src, tokenizers.tgt, config.data, True), batch_size=config.batch_size, shuffle=False, collate_fn=pad_sequence) eval_loader = DataLoader(IndicDataset(tokenizers.src, tokenizers.tgt, config.data, False), batch_size=config.eval_size, shuffle=False, collate_fn=pad_sequence) return model, tokenizers, train_loader, eval_loader
def val_dataloader(self): return DataLoader(IndicDataset(self.tokenizers.src, self.tokenizers.tgt, self.config.data, False), batch_size=self.config.eval_size, shuffle=False, collate_fn=self.pad_sequence)
def train_dataloader(self): return DataLoader(IndicDataset(self.tokenizers.src, self.tokenizers.tgt, self.hparam.data, True), batch_size=self.hparam.batch_size, shuffle=False, collate_fn=self.pad_sequence)
def train_dataloader(self): train_loader = DataLoader(IndicDataset(self.tokenizers.src, self.tokenizers.tgt, self.config.data, True), batch_size=self.config.batch_size, shuffle=False, collate_fn=self.pad_sequence) return train_loader
def val_dataloader(self): from data import IndicDataset, PadSequence pad_sequence = PadSequence(self.tokenizers.src.pad_token_id, self.tokenizers.tgt.pad_token_id) return DataLoader(IndicDataset(self.tokenizers.src, self.tokenizers.tgt, self.config.data, False), batch_size=self.config.eval_size, shuffle=False, collate_fn=pad_sequence)
def test_dataloader(self): from data import IndicDataset, PadSequence pad_sequence = PadSequence(self.src_tokenizers.pad_token_id, self.tgt_tokenizers.pad_token_id) return DataLoader(IndicDataset(self.src_tokenizers, self.tgt_tokenizers, self.config.data, False, True), batch_size=1, shuffle=False, collate_fn=pad_sequence)
def prepare_data(self): self.indic_train = IndicDataset( self.tokenizers.src, self.tokenizers.tgt, self.config.data, True) self.indic_test = IndicDataset( self.tokenizers.src, self.tokenizers.tgt, self.config.data, False)