def _adjust_time_resolution(self, batch, local_condition, max_time_steps): '''Adjust time resolution between audio and local condition ''' if local_condition: new_batch = [] for b in batch: x, c, g, l = b self._assert_ready_for_upsample(x, c) if max_time_steps is not None: max_steps = _ensure_divisible( max_time_steps, audio.get_hop_size(self._hparams), True) if len(x) > max_time_steps: max_time_frames = max_steps // audio.get_hop_size( self._hparams) start = np.random.randint(0, len(c) - max_time_frames) time_start = start * audio.get_hop_size(self._hparams) x = x[time_start:time_start + max_time_frames * audio.get_hop_size(self._hparams)] c = c[start:start + max_time_frames, :] self._assert_ready_for_upsample(x, c) new_batch.append((x, c, g, l)) return new_batch else: new_batch = [] for b in batch: x, c, g, l = b x = audio.trim(x) if max_time_steps is not None and len(x) > max_time_steps: start = np.random.randint(0, len(c) - max_time_steps) x = x[start:start + max_time_steps] new_batch.append((x, c, g, l)) return new_batch
def _adjust_time_resolution(self, batch, local_condition, max_time_steps): '''Adjust time resolution between audio and local condition ''' if local_condition: new_batch = [] for b in batch: x, c, g, l = b if len(x) < len(c) * audio.get_hop_size(self._hparams): pad_length = audio.get_hop_size( self._hparams) * len(c) - len(x) if pad_length % 2 == 0: x = np.pad(x, (pad_length // 2, pad_length // 2), mode='constant', constant_values=_pad) else: x = np.pad(x, (pad_length // 2, (pad_length + 1) // 2), mode='constant', constant_values=_pad) else: c = self._pad_specs( c, len(x) // audio.get_hop_size(self._hparams)) self._assert_ready_for_upsample(x, c) if max_time_steps is not None: max_steps = _ensure_divisible( max_time_steps, audio.get_hop_size(self._hparams), True) if len(x) > max_time_steps: max_time_frames = max_steps // audio.get_hop_size( self._hparams) start = np.random.randint(0, len(c) - max_time_frames) time_start = start * audio.get_hop_size(self._hparams) x = x[time_start:time_start + max_time_frames * audio.get_hop_size(self._hparams)] c = c[start:start + max_time_frames, :] self._assert_ready_for_upsample(x, c) new_batch.append((x, c, g, l)) return new_batch else: new_batch = [] for b in batch: x, c, g, l = b x = audio.trim(x) if max_time_steps is not None and len(x) > max_time_steps: start = np.random.randint(0, len(c) - max_time_steps) x = x[start:start + max_time_steps] new_batch.append((x, c, g, l)) return new_batch