def generate_batched_itr(self, data_itr, maxlen_a=0, maxlen_b=200, cuda_device=None, timer=None): """Iterate over a batched dataset and yield individual translations. Args: maxlen_a/b: generate sequences of maximum length ax + b, where x is the source sentence length. cuda_device: GPU on which to do generation. timer: StopwatchMeter for timing generations. """ def lstrip_pad(tensor): return tensor[tensor.eq(self.pad).sum():] for sample in data_itr: s = utils.prepare_sample(sample, volatile=True, cuda_device=cuda_device) input = s['net_input'] srclen = input['src_tokens'].size(1) if timer is not None: timer.start() hypos = self.generate(input['src_tokens'], input['src_positions'], maxlen=(maxlen_a * srclen + maxlen_b)) if timer is not None: timer.stop(s['ntokens']) for i, id in enumerate(s['id']): src = input['src_tokens'].data[i, :] # remove padding from ref, which appears at the beginning ref = lstrip_pad(s['target'].data[i, :]) yield id, src, ref, hypos[i]
def generate_batched_itr(self, data_itr, beam_size=None, maxlen_a=0.0, maxlen_b=None, cuda_device=None, timer=None): """Iterate over a batched dataset and yield individual translations. Args: maxlen_a/b: generate sequences of maximum length ax + b, where x is the source sentence length. cuda_device: GPU on which to do generation. timer: StopwatchMeter for timing generations. """ if maxlen_b is None: maxlen_b = self.maxlen for sample in data_itr: s = utils.prepare_sample(sample, volatile=True, cuda_device=cuda_device) input = s['net_input'] srclen = input['src_tokens'].size(1) if timer is not None: timer.start() hypos = self.generate(input['src_tokens'], beam_size=beam_size, maxlen=int(maxlen_a*srclen + maxlen_b)) if timer is not None: timer.stop(s['ntokens']) for i, id in enumerate(s['id']): src = input['src_tokens'].data[i, :] # remove padding from ref ref = utils.rstrip_pad(s['target'].data[i, :], self.pad) yield id, src, ref, hypos[i]
def _async_prepare_sample(self, rank, device_id, sample, volatile): if sample is None: self._sample = None else: self._sample = utils.prepare_sample(sample, volatile=volatile, cuda_device=device_id)
def _async_prepare_sample(self, rank, device_id, sample, volatile): if sample is None: self._sample = None else: if hasattr(torch.cuda, 'empty_cache'): # clear the caching allocator if this is the largest sample we've seen if sample['target'].size(0) > self._max_bsz_seen: self._max_bsz_seen = sample['target'].size(0) torch.cuda.empty_cache() self._sample = utils.prepare_sample(sample, volatile=volatile, cuda_device=device_id)
def _async_prepare_sample(self, rank, device_id, sample, volatile): if sample is None: self._sample = None else: ''' self._sample: {'id': LongTensor[181], 'ntokens': 1314, 'target': LongTensor[181*12], 'net_input': { 'src_token': LongTensor[181*12], 'src_position': LongTensor[181*12], 'input_tokens': LongTensor[181*12], 'input_position': LongTensor[181*12] } } ''' self._sample = utils.prepare_sample(sample, volatile=volatile, cuda_device=device_id)
def _scatter_samples(self, samples, volatile=False): """Split and distribute a sample across GPUs.""" res = [ utils.prepare_sample(sample, volatile=volatile, cuda_device=device_id) for sample, device_id in zip(samples, self.device_ids) ] # Pad with None until its size is equal to the number of replicas. res = res + [None] * (self.num_replicas - len(samples)) # Synchronize GPU devices after data is sent to prevent # race conditions. events = [] for d in self.device_ids: with torch.cuda.device(d): event = torch.cuda.Event(interprocess=True) event.record() events.append(event) return res, events
def generate_batched_itr(self, data_itr, beam_size=None, maxlen_a=0.0, maxlen_b=None, cuda_device=None, timer=None): """Iterate over a batched dataset and yield individual translations. Args: maxlen_a/b: generate sequences of maximum length ax + b, where x is the source sentence length. cuda_device: GPU on which to do generation. timer: StopwatchMeter for timing generations. """ if maxlen_b is None: maxlen_b = self.maxlen for sample in data_itr: s = utils.prepare_sample(sample, volatile=True, cuda_device=cuda_device) input = s['net_input'] srclen = input['src_tokens'].size(1) if timer is not None: timer.start() hypos = self.generate(input['src_tokens'], beam_size=beam_size, maxlen=int(maxlen_a * srclen + maxlen_b)) if timer is not None: timer.stop(s['ntokens']) for i, id in enumerate(s['id']): src = input['src_tokens'].data[i, :] # remove padding from ref ref = utils.rstrip_pad(s['target'].data[i, :], self.pad) yield id, src, ref, hypos[i]