Beispiel #1
0
def gen_model_loaders(config):
    model, tokenizers = M.build_model(config)
    pad_sequence = PadSequence(
        tokenizers.src.pad_token_id, tokenizers.tgt.pad_token_id)
    train_loader = DataLoader(IndicDataset(tokenizers.src, tokenizers.tgt, config.data, True),
                              batch_size=config.batch_size,
                              shuffle=False,
                              collate_fn=pad_sequence)
    eval_loader = DataLoader(IndicDataset(tokenizers.src, tokenizers.tgt, config.data, False),
                             batch_size=config.eval_size,
                             shuffle=False,
                             collate_fn=pad_sequence)
    return model, tokenizers, train_loader, eval_loader
Beispiel #2
0
 def val_dataloader(self):
     return DataLoader(IndicDataset(self.tokenizers.src,
                                    self.tokenizers.tgt, self.config.data,
                                    False),
                       batch_size=self.config.eval_size,
                       shuffle=False,
                       collate_fn=self.pad_sequence)
Beispiel #3
0
    def train_dataloader(self):

        return DataLoader(IndicDataset(self.tokenizers.src,
                                       self.tokenizers.tgt, self.hparam.data,
                                       True),
                          batch_size=self.hparam.batch_size,
                          shuffle=False,
                          collate_fn=self.pad_sequence)
 def train_dataloader(self):
     train_loader = DataLoader(IndicDataset(self.tokenizers.src,
                                            self.tokenizers.tgt,
                                            self.config.data, True),
                               batch_size=self.config.batch_size,
                               shuffle=False,
                               collate_fn=self.pad_sequence)
     return train_loader
Beispiel #5
0
    def val_dataloader(self):
        from data import IndicDataset, PadSequence
        pad_sequence = PadSequence(self.tokenizers.src.pad_token_id, self.tokenizers.tgt.pad_token_id)

        return DataLoader(IndicDataset(self.tokenizers.src, self.tokenizers.tgt, self.config.data, False), 
                           batch_size=self.config.eval_size, 
                           shuffle=False, 
                           collate_fn=pad_sequence)
Beispiel #6
0
    def test_dataloader(self):
        from data import IndicDataset, PadSequence
        pad_sequence = PadSequence(self.src_tokenizers.pad_token_id,
                                   self.tgt_tokenizers.pad_token_id)

        return DataLoader(IndicDataset(self.src_tokenizers,
                                       self.tgt_tokenizers, self.config.data,
                                       False, True),
                          batch_size=1,
                          shuffle=False,
                          collate_fn=pad_sequence)
Beispiel #7
0
 def prepare_data(self):
     self.indic_train = IndicDataset(
         self.tokenizers.src, self.tokenizers.tgt, self.config.data, True)
     self.indic_test = IndicDataset(
         self.tokenizers.src, self.tokenizers.tgt, self.config.data, False)