예제 #1
0
    def perturb_speed(self,
                      factor: float,
                      sampling_rate: int,
                      affix_id: bool = True) -> 'SupervisionSegment':
        """
        Return a ``SupervisionSegment`` that has time boundaries matching the
        recording/cut perturbed with the same factor.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param sampling_rate: The sampling rate is necessary to accurately perturb the start
            and duration (going through the sample counts).
        :param affix_id: When true, we will modify the ``id`` and ``recording_id`` fields
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``Recording``.
        """
        start_sample = compute_num_samples(self.start, sampling_rate)
        num_samples = compute_num_samples(self.duration, sampling_rate)
        new_start = perturb_num_samples(start_sample, factor) / sampling_rate
        new_duration = perturb_num_samples(num_samples, factor) / sampling_rate
        return fastcopy(self,
                        id=f'{self.id}_sp{factor}' if affix_id else self.id,
                        recording_id=f'{self.recording_id}_sp{factor}'
                        if affix_id else self.id,
                        start=new_start,
                        duration=new_duration,
                        alignment={
                            type: [
                                item.perturb_speed(factor=factor,
                                                   sampling_rate=sampling_rate)
                                for item in ali
                            ]
                            for type, ali in self.alignment.items()
                        } if self.alignment else None)
예제 #2
0
 def reverse_timestamps(
     self,
     offset: Seconds,
     duration: Optional[Seconds],
     sampling_rate: int,
 ) -> Tuple[Seconds, Optional[Seconds]]:
     """
     This method helps estimate the original offset and duration for a recording
     before tempo perturbation was applied.
     We need this estimate to know how much audio to actually load from disk during the
     call to ``load_audio()``.
     """
     start_sample = compute_num_samples(offset, sampling_rate)
     num_samples = (
         compute_num_samples(duration, sampling_rate)
         if duration is not None
         else None
     )
     start_sample = perturb_num_samples(start_sample, 1 / self.factor)
     num_samples = (
         perturb_num_samples(num_samples, 1 / self.factor)
         if num_samples is not None
         else None
     )
     return (
         start_sample / sampling_rate,
         num_samples / sampling_rate if num_samples is not None else None,
     )
예제 #3
0
 def perturb_speed(self, factor: float, sampling_rate: int) -> 'AlignmentItem':
     """
     Return an ``AlignmentItem`` that has time boundaries matching the
     recording/cut perturbed with the same factor. See :meth:`SupervisionSegment.perturb_speed` 
     for details.
     """
     start_sample = compute_num_samples(self.start, sampling_rate)
     num_samples = compute_num_samples(self.duration, sampling_rate)
     new_start = perturb_num_samples(start_sample, factor) / sampling_rate
     new_duration = perturb_num_samples(num_samples, factor) / sampling_rate
     return AlignmentItem(self.symbol, new_start, new_duration)
예제 #4
0
 def _adjust_for_speed_perturbation(self, offset: Seconds, duration: Seconds) -> Tuple[Seconds, Seconds]:
     """
     This internal method helps estimate the original offset and duration for a recording
     before speed perturbation was applied.
     We need this estimate to know how much audio to actually load from disk during the
     call to ``load_audio()``.
     """
     if self.transforms is None or all(t['name'] != 'Speed' for t in self.transforms):
         return offset, duration
     start_sample = offset * self.sampling_rate
     num_samples = duration * self.sampling_rate if duration is not None else None
     for tfr in reversed(self.transforms):
         if tfr['name'] != 'Speed':
             continue
         factor = tfr['kwargs']['factor']
         start_sample = perturb_num_samples(start_sample, 1 / factor)
         num_samples = perturb_num_samples(num_samples, 1 / factor) if num_samples is not None else None
     return start_sample / self.sampling_rate, num_samples / self.sampling_rate if num_samples is not None else None
예제 #5
0
    def perturb_speed(self,
                      factor: float,
                      sampling_rate: int,
                      affix_id: bool = True) -> 'SupervisionSegment':
        """
        Return a ``SupervisionSegment`` that has time boundaries matching the
        recording/cut perturbed with the same factor.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param sampling_rate: The sampling rate is necessary to accurately perturb the start
            and duration (going through sample counts).
        :param affix_id: When true, we will modify the ``Recording.id`` field
        by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``Recording``.
        """
        start_sample = round(self.start * sampling_rate)
        num_samples = round(self.duration * sampling_rate)
        new_start = perturb_num_samples(start_sample, factor) / sampling_rate
        new_duration = perturb_num_samples(num_samples, factor) / sampling_rate
        return fastcopy(self,
                        id=f'{self.id}_sp{factor}' if affix_id else self.id,
                        start=new_start,
                        duration=new_duration)
예제 #6
0
    def perturb_speed(self, factor: float, affix_id: bool = True) -> 'Recording':
        """
        Return a new ``Recording`` that will lazily perturb the speed while loading audio.
        The ``num_samples`` and ``duration`` fields are updated to reflect the
        shrinking/extending effect of speed.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``Recording.id`` field
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``Recording``.
        """
        transforms = self.transforms if self.transforms is not None else []
        transforms.append(Speed(factor=factor).to_dict())
        new_num_samples = perturb_num_samples(self.num_samples, factor)
        new_duration = new_num_samples / self.sampling_rate
        return fastcopy(
            self,
            id=f'{self.id}_sp{factor}' if affix_id else self.id,
            num_samples=new_num_samples,
            duration=new_duration,
            transforms=transforms
        )