예제 #1
0
 def initialize(self, random, ranker, dataset):
     super().initialize(random, ranker, dataset)
     self.random = random
     self.input_spec = self.ranker.input_spec()
     self.iter_fields = self.input_spec['fields'] | {'relscore'}
     self.train_iter_core = datasets.record_iter(
         self.dataset,
         fields=self.iter_fields,
         source=self.source,
         minrel=None if self.minrel == -999 else self.minrel,
         shuf=True,
         random=self.random,
         inf=True)
     self.train_iter = self.iter_batches(self.train_iter_core)
예제 #2
0
 def __init__(self, config, ranker, vocab, logger, train_ds, random):
     super().__init__(config, ranker, vocab, train_ds, logger, random)
     self.dataset = train_ds
     self.input_spec = ranker.input_spec()
     self.iter_fields = self.input_spec['fields'] | {'relscore'}
     self.train_iter_core = datasets.record_iter(
         train_ds,
         fields=self.iter_fields,
         source=self.config['source'],
         minrel=None
         if self.config['minrel'] == -999 else self.config['minrel'],
         shuf=True,
         random=self.random,
         inf=True)
     self.train_iter = self.iter_batches(self.train_iter_core)
예제 #3
0
 def _iter_batches(self, device):
     fields = set(self.input_spec['fields']) | {'query_id', 'doc_id'}
     it = datasets.record_iter(self.dataset,
                               fields=fields,
                               source=self.config['source'],
                               run_threshold=self.config['run_threshold'],
                               minrel=None,
                               shuf=False,
                               random=self.random,
                               inf=False)
     for batch_items in util.chunked(it, self.config['batch_size']):
         batch = {}
         for record in batch_items:
             for k, seq in record.items():
                 batch.setdefault(k, []).append(seq)
         batch = spec.apply_spec_batch(batch, self.input_spec, device)
         # ship 'em
         yield batch