def initialize(self, random, ranker, dataset): super().initialize(random, ranker, dataset) self.random = random self.input_spec = self.ranker.input_spec() self.iter_fields = self.input_spec['fields'] | {'relscore'} self.train_iter_core = datasets.record_iter( self.dataset, fields=self.iter_fields, source=self.source, minrel=None if self.minrel == -999 else self.minrel, shuf=True, random=self.random, inf=True) self.train_iter = self.iter_batches(self.train_iter_core)
def __init__(self, config, ranker, vocab, logger, train_ds, random): super().__init__(config, ranker, vocab, train_ds, logger, random) self.dataset = train_ds self.input_spec = ranker.input_spec() self.iter_fields = self.input_spec['fields'] | {'relscore'} self.train_iter_core = datasets.record_iter( train_ds, fields=self.iter_fields, source=self.config['source'], minrel=None if self.config['minrel'] == -999 else self.config['minrel'], shuf=True, random=self.random, inf=True) self.train_iter = self.iter_batches(self.train_iter_core)
def _iter_batches(self, device): fields = set(self.input_spec['fields']) | {'query_id', 'doc_id'} it = datasets.record_iter(self.dataset, fields=fields, source=self.config['source'], run_threshold=self.config['run_threshold'], minrel=None, shuf=False, random=self.random, inf=False) for batch_items in util.chunked(it, self.config['batch_size']): batch = {} for record in batch_items: for k, seq in record.items(): batch.setdefault(k, []).append(seq) batch = spec.apply_spec_batch(batch, self.input_spec, device) # ship 'em yield batch