def perturb_speed(self, factor: float, sampling_rate: int, affix_id: bool = True) -> 'SupervisionSegment': """ Return a ``SupervisionSegment`` that has time boundaries matching the recording/cut perturbed with the same factor. :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster). :param sampling_rate: The sampling rate is necessary to accurately perturb the start and duration (going through the sample counts). :param affix_id: When true, we will modify the ``id`` and ``recording_id`` fields by affixing it with "_sp{factor}". :return: a modified copy of the current ``Recording``. """ start_sample = compute_num_samples(self.start, sampling_rate) num_samples = compute_num_samples(self.duration, sampling_rate) new_start = perturb_num_samples(start_sample, factor) / sampling_rate new_duration = perturb_num_samples(num_samples, factor) / sampling_rate return fastcopy(self, id=f'{self.id}_sp{factor}' if affix_id else self.id, recording_id=f'{self.recording_id}_sp{factor}' if affix_id else self.id, start=new_start, duration=new_duration, alignment={ type: [ item.perturb_speed(factor=factor, sampling_rate=sampling_rate) for item in ali ] for type, ali in self.alignment.items() } if self.alignment else None)
def reverse_timestamps( self, offset: Seconds, duration: Optional[Seconds], sampling_rate: int, ) -> Tuple[Seconds, Optional[Seconds]]: """ This method helps estimate the original offset and duration for a recording before tempo perturbation was applied. We need this estimate to know how much audio to actually load from disk during the call to ``load_audio()``. """ start_sample = compute_num_samples(offset, sampling_rate) num_samples = ( compute_num_samples(duration, sampling_rate) if duration is not None else None ) start_sample = perturb_num_samples(start_sample, 1 / self.factor) num_samples = ( perturb_num_samples(num_samples, 1 / self.factor) if num_samples is not None else None ) return ( start_sample / sampling_rate, num_samples / sampling_rate if num_samples is not None else None, )
def perturb_speed(self, factor: float, sampling_rate: int) -> 'AlignmentItem': """ Return an ``AlignmentItem`` that has time boundaries matching the recording/cut perturbed with the same factor. See :meth:`SupervisionSegment.perturb_speed` for details. """ start_sample = compute_num_samples(self.start, sampling_rate) num_samples = compute_num_samples(self.duration, sampling_rate) new_start = perturb_num_samples(start_sample, factor) / sampling_rate new_duration = perturb_num_samples(num_samples, factor) / sampling_rate return AlignmentItem(self.symbol, new_start, new_duration)
def _adjust_for_speed_perturbation(self, offset: Seconds, duration: Seconds) -> Tuple[Seconds, Seconds]: """ This internal method helps estimate the original offset and duration for a recording before speed perturbation was applied. We need this estimate to know how much audio to actually load from disk during the call to ``load_audio()``. """ if self.transforms is None or all(t['name'] != 'Speed' for t in self.transforms): return offset, duration start_sample = offset * self.sampling_rate num_samples = duration * self.sampling_rate if duration is not None else None for tfr in reversed(self.transforms): if tfr['name'] != 'Speed': continue factor = tfr['kwargs']['factor'] start_sample = perturb_num_samples(start_sample, 1 / factor) num_samples = perturb_num_samples(num_samples, 1 / factor) if num_samples is not None else None return start_sample / self.sampling_rate, num_samples / self.sampling_rate if num_samples is not None else None
def perturb_speed(self, factor: float, sampling_rate: int, affix_id: bool = True) -> 'SupervisionSegment': """ Return a ``SupervisionSegment`` that has time boundaries matching the recording/cut perturbed with the same factor. :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster). :param sampling_rate: The sampling rate is necessary to accurately perturb the start and duration (going through sample counts). :param affix_id: When true, we will modify the ``Recording.id`` field by affixing it with "_sp{factor}". :return: a modified copy of the current ``Recording``. """ start_sample = round(self.start * sampling_rate) num_samples = round(self.duration * sampling_rate) new_start = perturb_num_samples(start_sample, factor) / sampling_rate new_duration = perturb_num_samples(num_samples, factor) / sampling_rate return fastcopy(self, id=f'{self.id}_sp{factor}' if affix_id else self.id, start=new_start, duration=new_duration)
def perturb_speed(self, factor: float, affix_id: bool = True) -> 'Recording': """ Return a new ``Recording`` that will lazily perturb the speed while loading audio. The ``num_samples`` and ``duration`` fields are updated to reflect the shrinking/extending effect of speed. :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster). :param affix_id: When true, we will modify the ``Recording.id`` field by affixing it with "_sp{factor}". :return: a modified copy of the current ``Recording``. """ transforms = self.transforms if self.transforms is not None else [] transforms.append(Speed(factor=factor).to_dict()) new_num_samples = perturb_num_samples(self.num_samples, factor) new_duration = new_num_samples / self.sampling_rate return fastcopy( self, id=f'{self.id}_sp{factor}' if affix_id else self.id, num_samples=new_num_samples, duration=new_duration, transforms=transforms )