Esempio n. 1
0
def test_repeat(manifest_type, preserve_id):
    data = DummyManifest(manifest_type, begin_id=0, end_id=10)

    expected = data + data

    eager_result = data.repeat(times=2, preserve_id=preserve_id)
    if preserve_id or manifest_type == FeatureSet:
        assert list(eager_result) == list(expected)
    else:
        items = list(eager_result)
        ref_items = list(expected)
        assert len(items) == len(ref_items)
        for i, refi in zip(items, ref_items):
            assert i.id.endswith("_repeat0") or i.id.endswith("_repeat1")
            i_modi = fastcopy(i, id=refi.id)
            assert i_modi == refi

    with as_lazy(data) as lazy_data:
        lazy_result = lazy_data.repeat(times=2, preserve_id=preserve_id)
        if preserve_id or manifest_type == FeatureSet:
            assert list(lazy_result) == list(expected)
        else:
            items = list(lazy_result)
            ref_items = list(expected)
            assert len(items) == len(ref_items)
            for i, refi in zip(items, ref_items):
                assert i.id.endswith("_repeat0") or i.id.endswith("_repeat1")
                i_modi = fastcopy(i, id=refi.id)
                assert i_modi == refi
Esempio n. 2
0
def cut_set():
    cut = MonoCut(
        id="cut-1",
        start=0.0,
        duration=10.0,
        channel=0,
        features=Features(
            type="fbank",
            num_frames=100,
            num_features=40,
            frame_shift=0.01,
            sampling_rate=16000,
            start=0.0,
            duration=10.0,
            storage_type="lilcom",
            storage_path="irrelevant",
            storage_key="irrelevant",
        ),
        recording=Recording(
            id="rec-1",
            sampling_rate=16000,
            num_samples=160000,
            duration=10.0,
            sources=[
                AudioSource(type="file", channels=[0], source="irrelevant")
            ],
        ),
        supervisions=[
            SupervisionSegment(id="sup-1",
                               recording_id="irrelevant",
                               start=0.5,
                               duration=6.0),
            SupervisionSegment(id="sup-2",
                               recording_id="irrelevant",
                               start=7.0,
                               duration=2.0),
        ],
    )
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id="cut-nosup", supervisions=[]),
        fastcopy(cut, id="cut-norec", recording=None),
        fastcopy(cut, id="cut-nofeat", features=None),
        cut.pad(duration=30.0, direction="left"),
        cut.pad(duration=30.0, direction="right"),
        cut.pad(duration=30.0, direction="both"),
        cut.mix(cut, offset_other_by=5.0, snr=8),
    ])
Esempio n. 3
0
def test_bucketing_sampler_cut_pairs_equal_len(shuffle):
    cut_set = DummyManifest(CutSet, begin_id=0, end_id=1000)
    for idx, c in enumerate(cut_set):
        c.duration = (
            3 + idx * 1 / 50
        )  # each cut has a different duration between [3, 23]
    # Target CutSet is going to have different durations
    # -- make sure the bucketing works well with that.
    cut_set_tgt = cut_set.map(lambda c: fastcopy(c, duration=1 / c.duration))

    sampler = BucketingSampler(
        cut_set,
        cut_set_tgt,
        sampler_type=CutPairsSampler,
        bucket_method="equal_len",
        num_buckets=10,
        shuffle=shuffle,
    )

    bucket_cum_durs = []
    for bucket_src, bucket_tgt in sampler.buckets:
        bucket_cum_durs.append(sum(c.duration for c in bucket_src))
        assert len(bucket_src) == 100
        assert list(bucket_src.ids) == list(bucket_tgt.ids)

    # The variations in duration are over 10% of the mean bucket duration (because of equal lengths).
    mean_bucket_dur = mean(bucket_cum_durs)
    assert not all(
        abs(d - mean_bucket_dur) < 0.1 * mean_bucket_dur for d in bucket_cum_durs
    )
Esempio n. 4
0
def test_bucketing_sampler_cut_pairs_equal_duration(shuffle):
    cut_set = DummyManifest(CutSet, begin_id=0, end_id=1000)
    for idx, c in enumerate(cut_set):
        c.duration = (
            3 + idx * 1 / 50
        )  # each cut has a different duration between [3, 23]
    # Target CutSet is going to have different durations
    # -- make sure the bucketing works well with that.
    cut_set_tgt = cut_set.map(lambda c: fastcopy(c, duration=1 / c.duration))

    sampler = BucketingSampler(
        cut_set,
        cut_set_tgt,
        sampler_type=CutPairsSampler,
        bucket_method="equal_duration",
        num_buckets=10,
        shuffle=shuffle,
    )

    # Ensure that each consecutive bucket has less cuts than the previous one
    prev_len = float("inf")
    bucket_cum_durs = []
    for bucket_src, bucket_tgt in sampler.buckets:
        assert list(bucket_src.ids) == list(bucket_tgt.ids)
        bucket_cum_durs.append(sum(c.duration for c in bucket_src))
        curr_len = len(bucket_src)
        assert curr_len < prev_len
        prev_len = curr_len

    # Assert that all bucket cumulative durations are within 1/10th of the mean
    mean_bucket_dur = mean(bucket_cum_durs)  # ~ 1300s
    for d in bucket_cum_durs:
        assert abs(d - mean_bucket_dur) < 0.1 * mean_bucket_dur
Esempio n. 5
0
    def move_to_memory(
        self,
        start: Seconds = 0,
        duration: Optional[Seconds] = None,
    ) -> "Features":
        from lhotse.features.io import get_memory_writer

        if self.storage_type in ("memory_lilcom", "memory_writer"):
            return self  # nothing to do

        arr = self.load(start=start, duration=duration)
        if issubclass(arr.dtype.type, np.floating):
            writer = get_memory_writer("memory_lilcom")()
        else:
            writer = get_memory_writer("memory_raw")()
        data = writer.write("", arr)  # key is ignored by in memory writers
        return fastcopy(
            self,
            # note: to understand why start is set to zero here, consider two cases:
            # 1) this method moves the whole array to memory => the start was 0 anyway
            # 2) this method moves a subset of the array to memory => the manifest is
            #    now relative to the start of that subset, and since it describes the
            #    whole subset, start=0 and duration=self.duration
            start=0.0,
            duration=ifnone(duration, self.duration),
            num_frames=arr.shape[0],
            storage_type=writer.name,
            storage_key=data,
            storage_path="",
        )
Esempio n. 6
0
    def transform_alignment(
        self,
        transform_fn: Callable[[str], str],
        type: Optional[str] = 'word'
    ) -> 'SupervisionSegment':
        """
        Return a copy of the current segment with transformed ``alignment`` field.
        Useful for text normalization, phonetic transcription, etc.

        :param type:  alignment type to transform (key for alignment dict).
        :param transform_fn: a function that accepts a string and returns a string.
        :return: a ``SupervisionSegment`` with adjusted alignments.
        """
        if self.alignment is None:
            return self
        return fastcopy(
            self,
            alignment={
                ali_type:[
                    item.transform(transform_fn=transform_fn)
                    if ali_type == type else item
                    for item in ali
                ]
                for ali_type, ali in self.alignment.items()
            }
        )
Esempio n. 7
0
 def with_alignment_from_ctm(self, ctm_file: Pathlike, type: str = 'word', match_channel: bool = False) -> 'SupervisionSet':
     """
     Add alignments from CTM file to the supervision set.
     
     :param ctm: Path to CTM file.
     :param type: Alignment type (optional, default = `word`).
     :param match_channel: if True, also match channel between CTM and SupervisionSegment
     :return: A new SupervisionSet with AlignmentItem objects added to the segments.
     """
     ctm_words = []
     with open(ctm_file) as f:
         for line in f:
             reco_id, channel, start, duration, symbol = line.strip().split()
             ctm_words.append((reco_id, int(channel), float(start), float(duration), symbol))
     ctm_words = sorted(ctm_words, key=lambda x:(x[0], x[2]))
     reco_to_ctm = defaultdict(list, {k: list(v) for k,v in groupby(ctm_words, key=lambda x:x[0])})
     segments = []
     num_total = len(ctm_words)
     num_overspanned = 0
     for reco_id in set([s.recording_id for s in self]):
         if reco_id in reco_to_ctm:
             for seg in self.find(recording_id=reco_id):
                 alignment = [AlignmentItem(symbol=word[4], start=word[2], duration=word[3]) for word in reco_to_ctm[reco_id] 
                                 if overspans(seg, TimeSpan(word[2], word[2] + word[3]))
                                 and (seg.channel == word[1] or not match_channel)
                             ]
                 num_overspanned += len(alignment)
                 segments.append(fastcopy(seg, alignment={type: alignment}))
         else:
             segments.append([s for s in self.find(recording_id=reco_id)])
     logging.info(f"{num_overspanned} alignments added out of {num_total} total. If there are several"
         " missing, there could be a mismatch problem.")
     return SupervisionSet.from_segments(segments)
Esempio n. 8
0
    def perturb_speed(self,
                      factor: float,
                      sampling_rate: int,
                      affix_id: bool = True) -> 'SupervisionSegment':
        """
        Return a ``SupervisionSegment`` that has time boundaries matching the
        recording/cut perturbed with the same factor.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param sampling_rate: The sampling rate is necessary to accurately perturb the start
            and duration (going through the sample counts).
        :param affix_id: When true, we will modify the ``id`` and ``recording_id`` fields
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``Recording``.
        """
        start_sample = compute_num_samples(self.start, sampling_rate)
        num_samples = compute_num_samples(self.duration, sampling_rate)
        new_start = perturb_num_samples(start_sample, factor) / sampling_rate
        new_duration = perturb_num_samples(num_samples, factor) / sampling_rate
        return fastcopy(self,
                        id=f'{self.id}_sp{factor}' if affix_id else self.id,
                        recording_id=f'{self.recording_id}_sp{factor}'
                        if affix_id else self.id,
                        start=new_start,
                        duration=new_duration,
                        alignment={
                            type: [
                                item.perturb_speed(factor=factor,
                                                   sampling_rate=sampling_rate)
                                for item in ali
                            ]
                            for type, ali in self.alignment.items()
                        } if self.alignment else None)
Esempio n. 9
0
def recording(file_source):
    return Recording(
        id='rec',
        sources=[file_source, fastcopy(file_source, channels=[1])],
        sampling_rate=8000,
        num_samples=4000,
        duration=0.5)
Esempio n. 10
0
def text_normalizer(segment: SupervisionSegment) -> SupervisionSegment:
    text = segment.text.upper()
    text = re.sub(r"[^\w !?]", "", text)
    text = re.sub(r"^\s+", "", text)
    text = re.sub(r"\s+$", "", text)
    text = re.sub(r"\s+", " ", text)
    return fastcopy(segment, text=text)
Esempio n. 11
0
def test_cutset_from_webdataset_sharded_pipe():
    cuts = CutSet.from_file("test/fixtures/libri/cuts.json")
    cut = cuts[0]
    cuts = []
    for i in range(10):
        cuts.append(fastcopy(cut, id=cut.id + "-" + str(i)))
    cuts = CutSet.from_cuts(cuts)

    with TemporaryDirectory() as dir_path:
        tar_pattern = f"pipe:gzip -c > {dir_path}/shard-%06d.tar.gz"
        export_to_webdataset(cuts, output_path=tar_pattern, shard_size=2)

        # disabling shard shuffling for testing purposes here
        cuts_ds = CutSet.from_webdataset(
            "pipe:gunzip -c " + dir_path + "/shard-{000000..000004}.tar.gz",
            shuffle_shards=False,
        )

        assert list(cuts.ids) == list(cuts_ds.ids)

        for c, cds in zip(cuts, cuts_ds):
            np.testing.assert_equal(c.load_audio(), cds.load_audio())
            np.testing.assert_almost_equal(
                c.load_features(), cds.load_features(), decimal=2
            )
Esempio n. 12
0
def text_normalizer(segment: SupervisionSegment) -> SupervisionSegment:
    text = segment.text.upper()
    text = re.sub(r'[^\w !?]', '', text)
    text = re.sub(r'^\s+', '', text)
    text = re.sub(r'\s+$', '', text)
    text = re.sub(r'\s+', ' ', text)
    return fastcopy(segment, text=text)
Esempio n. 13
0
def _upload_one(item: Features, url: str) -> Features:
    feats_mtx = item.load()
    feats_writer = LilcomURLWriter(url)
    new_key = feats_writer.write(key=item.storage_key, value=feats_mtx)
    return fastcopy(item,
                    storage_path=url,
                    storage_key=new_key,
                    storage_type=feats_writer.name)
Esempio n. 14
0
def cut_set():
    cut = Cut(id='cut-1',
              start=0.0,
              duration=10.0,
              channel=0,
              features=Features(
                  type='fbank',
                  num_frames=100,
                  num_features=40,
                  frame_shift=0.01,
                  sampling_rate=16000,
                  start=0.0,
                  duration=10.0,
                  storage_type='lilcom',
                  storage_path='irrelevant',
                  storage_key='irrelevant',
              ),
              recording=Recording(id='rec-1',
                                  sampling_rate=16000,
                                  num_samples=160000,
                                  duration=10.0,
                                  sources=[
                                      AudioSource(type='file',
                                                  channels=[0],
                                                  source='irrelevant')
                                  ]),
              supervisions=[
                  SupervisionSegment(id='sup-1',
                                     recording_id='irrelevant',
                                     start=0.5,
                                     duration=6.0),
                  SupervisionSegment(id='sup-2',
                                     recording_id='irrelevant',
                                     start=7.0,
                                     duration=2.0)
              ])
    return CutSet.from_cuts([
        cut,
        fastcopy(cut, id='cut-nosup', supervisions=[]),
        fastcopy(cut, id='cut-norec', recording=None),
        fastcopy(cut, id='cut-nofeat', features=None),
        cut.pad(duration=30.0, direction='left'),
        cut.pad(duration=30.0, direction='right'),
        cut.pad(duration=30.0, direction='both'),
        cut.mix(cut, offset_other_by=5.0, snr=8)
    ])
Esempio n. 15
0
 def with_offset(self, offset: Seconds) -> 'SupervisionSegment':
     """Return an identical ``SupervisionSegment``, but with the ``offset`` added to the ``start`` field."""
     return fastcopy(
         self,
         start=round(self.start + offset, ndigits=8),
         alignment={
             type: [item.with_offset(offset=offset) for item in ali]
             for type, ali in self.alignment.items()
         } if self.alignment else None)
Esempio n. 16
0
    def transform_text(self, transform_fn: Callable[[str], str]) -> 'SupervisionSegment':
        """
        Return a copy of the current segment with transformed ``text`` field.
        Useful for text normalization, phonetic transcription, etc.

        :param transform_fn: a function that accepts a string and returns a string.
        :return: a ``SupervisionSegment`` with adjusted text.
        """
        if self.text is None:
            return self
        return fastcopy(self, text=transform_fn(self.text))
Esempio n. 17
0
    def trim(self, end: Seconds) -> 'SupervisionSegment':
        """
        Return an identical ``SupervisionSegment``, but ensure that ``self.start`` is not negative (in which case
        it's set to 0) and ``self.end`` does not exceed the ``end`` parameter.

        This method is useful for ensuring that the supervision does not exceed a cut's bounds,
        in which case pass ``cut.duration`` as the ``end`` argument, since supervision times are relative to the cut.
        """
        start_exceeds_by = abs(min(0, self.start))
        end_exceeds_by = max(0, self.end - end)
        return fastcopy(self, start=max(0, self.start), duration=self.duration - end_exceeds_by - start_exceeds_by)
Esempio n. 18
0
def test_manifest_is_lazy(manifests, manifest_type):
    # Eager manifest is not lazy
    eager = manifests[manifest_type]
    cls = type(eager)
    assert not eager.is_lazy

    # Save the manifest to JSONL and open it lazily
    with NamedTemporaryFile(suffix=".jsonl") as f, cls.open_writer(f.name) as writer:
        for item in eager:
            writer.write(item)
        f.flush()

        lazy = writer.open_manifest()

        # Lazy manifest is lazy
        assert lazy.is_lazy

        # Concatenation of eager + eager manifests is eager
        # (we have to modify ids to concatenate because of sanity checks)
        eager_eager_cat = eager + cls.from_items(
            fastcopy(it, id=it.id + "_cpy") if hasattr(it, "id") else it for it in eager
        )
        assert not eager_eager_cat.is_lazy

        # Concatenation of lazy + eager manifests is lazy
        lazy_eager_cat = lazy + eager
        assert lazy_eager_cat.is_lazy

        # Concatenation of eager + lazy manifests is lazy
        eager_lazy_cat = eager + lazy
        assert eager_lazy_cat.is_lazy

        # Concatenation of eager + lazy manifests is lazy
        lazy_lazy_cat = eager + lazy
        assert lazy_lazy_cat.is_lazy

        # Muxing of eager + eager manifests is lazy
        eager_eager_mux = cls.mux(eager, eager)
        assert eager_eager_mux.is_lazy

        # Muxing of lazy + eager manifests is lazy
        lazy_eager_mux = cls.mux(lazy, eager)
        assert lazy_eager_mux.is_lazy

        # Muxing of eager + lazy manifests is lazy
        eager_lazy_mux = cls.mux(eager, lazy)
        assert eager_lazy_mux.is_lazy

        # Muxing of eager + lazy manifests is lazy
        lazy_lazy_mux = cls.mux(lazy, lazy)
        assert lazy_lazy_mux.is_lazy
Esempio n. 19
0
    def reverb_rir(self, affix_id: bool = True) -> "SupervisionSegment":
        """
        Return a ``SupervisionSegment`` with modified ids.

        :param affix_id: When true, we will modify the ``id`` and ``recording_id`` fields
            by affixing it with "_rvb".
        :return: a modified copy of the current ``SupervisionSegment``.
        """

        return fastcopy(
            self,
            id=f"{self.id}_rvb" if affix_id else self.id,
            recording_id=f"{self.recording_id}_rvb" if affix_id else self.recording_id,
        )
Esempio n. 20
0
 def copy_feats(self, writer: FeaturesWriter) -> "Features":
     """
     Read the referenced feature array and save it using ``writer``.
     Returns a copy of the manifest with updated fields related to the feature storage.
     """
     feats = self.load()
     new_key = writer.write(self.storage_key, feats)
     item = fastcopy(
         self,
         storage_type=writer.name,
         storage_path=writer.storage_path,
         storage_key=new_key,
     )
     return item
Esempio n. 21
0
def test_cut_set_copy_feats_output_path(cuts):
    # Make a CutSet with MonoCut, PaddingCut, and MixedCut
    cuts = CutSet.from_cuts([
        # MonoCut
        cuts[0],
        # MonoCut without feats
        fastcopy(cuts[0], id="cut-no-feats").drop_features(),
    ])
    with NamedTemporaryFile(suffix=".jsonl") as f, TemporaryDirectory(
    ) as d, NumpyFilesWriter(d) as w:
        cpy = cuts.copy_feats(writer=w, output_path=f.name)
        assert len(cpy) == len(cuts)
        assert list(cpy.ids) == list(cuts.ids)
        for cut, orig in zip(cpy, cuts):
            assert (not orig.has_features
                    or (cut.load_features() == orig.load_features()).all())
Esempio n. 22
0
def test_cut_set_copy_feats(cuts):
    # Make a CutSet with MonoCut, PaddingCut, and MixedCut
    cuts = CutSet.from_cuts([
        # MonoCut
        cuts[0],
        # MonoCut without feats
        fastcopy(cuts[0], id="cut-no-feats").drop_features(),
    ])
    with TemporaryDirectory() as d, NumpyFilesWriter(d) as w:
        cpy = cuts.copy_feats(writer=w)
        assert len(cpy) == len(cuts)
        for cut, orig in zip(cpy, cuts):
            if not orig.has_features:
                continue
            data = cut.load_features()
            assert isinstance(data, np.ndarray)
            ref_data = orig.load_features()
            np.testing.assert_almost_equal(data, ref_data)
Esempio n. 23
0
    def perturb_volume(self,
                       factor: float,
                       affix_id: bool = True) -> "SupervisionSegment":
        """
        Return a ``SupervisionSegment`` with modified ids.

        :param factor: The volume will be adjusted this many times (e.g. factor=1.1 means 1.1x louder).
        :param affix_id: When true, we will modify the ``id`` and ``recording_id`` fields
            by affixing it with "_vp{factor}".
        :return: a modified copy of the current ``SupervisionSegment``.
        """

        return fastcopy(
            self,
            id=f"{self.id}_vp{factor}" if affix_id else self.id,
            recording_id=f"{self.recording_id}_vp{factor}"
            if affix_id else self.recording_id,
        )
Esempio n. 24
0
    def trim(self,
             end: Seconds,
             start: Optional[Seconds] = 0) -> 'SupervisionSegment':
        """
        Return an identical ``SupervisionSegment``, but ensure that ``self.start`` is not negative (in which case
        it's set to 0) and ``self.end`` does not exceed the ``end`` parameter. If a `start` is optionally
        provided, the supervision is trimmed from the left (note that start should be relative to the cut times).

        This method is useful for ensuring that the supervision does not exceed a cut's bounds,
        in which case pass ``cut.duration`` as the ``end`` argument, since supervision times are relative to the cut.
        """
        assert start >= 0
        start_exceeds_by = abs(min(0, self.start - start))
        end_exceeds_by = max(0, self.end - end)
        return fastcopy(
            self,
            start=max(start, self.start),
            duration=self.duration - end_exceeds_by - start_exceeds_by,
        )
Esempio n. 25
0
def test_export_to_webdataset():
    cuts = CutSet.from_file("test/fixtures/libri/cuts.json")
    cut = cuts[0]
    cuts = []
    for i in range(10):
        cuts.append(fastcopy(cut, id=cut.id + "-" + str(i)))
    cuts = CutSet.from_cuts(cuts)

    with NamedTemporaryFile(suffix=".tar") as f:
        export_to_webdataset(cuts, output_path=f.name)
        f.flush()

        ds = webdataset.WebDataset(f.name)

        dicts = (pickle.loads(data["data"]) for data in ds)

        cuts_ds = CutSet.from_dicts(dicts)

    assert list(cuts.ids) == list(cuts_ds.ids)
Esempio n. 26
0
def test_cutset_from_webdataset():
    cuts = CutSet.from_file("test/fixtures/libri/cuts.json")
    cut = cuts[0]
    cuts = []
    for i in range(10):
        cuts.append(fastcopy(cut, id=cut.id + "-" + str(i)))
    cuts = CutSet.from_cuts(cuts)

    with NamedTemporaryFile(suffix=".tar") as f:
        export_to_webdataset(cuts, output_path=f.name)
        f.flush()

        cuts_ds = CutSet.from_webdataset(f.name)

        assert list(cuts.ids) == list(cuts_ds.ids)

        for c, cds in zip(cuts, cuts_ds):
            np.testing.assert_equal(c.load_audio(), cds.load_audio())
            np.testing.assert_almost_equal(
                c.load_features(), cds.load_features(), decimal=2
            )
Esempio n. 27
0
 def resample(self, sampling_rate: int) -> 'Recording':
     """
     Return a new ``Recording`` that will be lazily resampled while loading audio.
     :param sampling_rate: The new sampling rate.
     :return: A resampled ``Recording``.
     """
     resampling = [
         Resample(source_sampling_rate=self.sampling_rate, target_sampling_rate=sampling_rate).to_dict()
     ]
     new_num_samples = compute_num_samples(self.duration, sampling_rate, rounding=ROUND_HALF_UP)
     # Duration might need an adjustment when doing a non-trivial resampling
     # (e.g. 16000 -> 22050), where the resulting number of samples cannot
     # correspond to old duration exactly.
     new_duration = new_num_samples / sampling_rate
     return fastcopy(
         self,
         duration=new_duration,
         num_samples=new_num_samples,
         sampling_rate=sampling_rate,
         transforms=(self.transforms or []) + resampling
     )
Esempio n. 28
0
    def perturb_speed(self, factor: float, affix_id: bool = True) -> 'Recording':
        """
        Return a new ``Recording`` that will lazily perturb the speed while loading audio.
        The ``num_samples`` and ``duration`` fields are updated to reflect the
        shrinking/extending effect of speed.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param affix_id: When true, we will modify the ``Recording.id`` field
            by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``Recording``.
        """
        transforms = self.transforms if self.transforms is not None else []
        transforms.append(Speed(factor=factor).to_dict())
        new_num_samples = perturb_num_samples(self.num_samples, factor)
        new_duration = new_num_samples / self.sampling_rate
        return fastcopy(
            self,
            id=f'{self.id}_sp{factor}' if affix_id else self.id,
            num_samples=new_num_samples,
            duration=new_duration,
            transforms=transforms
        )
Esempio n. 29
0
    def perturb_speed(self,
                      factor: float,
                      sampling_rate: int,
                      affix_id: bool = True) -> 'SupervisionSegment':
        """
        Return a ``SupervisionSegment`` that has time boundaries matching the
        recording/cut perturbed with the same factor.

        :param factor: The speed will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param sampling_rate: The sampling rate is necessary to accurately perturb the start
            and duration (going through sample counts).
        :param affix_id: When true, we will modify the ``Recording.id`` field
        by affixing it with "_sp{factor}".
        :return: a modified copy of the current ``Recording``.
        """
        start_sample = round(self.start * sampling_rate)
        num_samples = round(self.duration * sampling_rate)
        new_start = perturb_num_samples(start_sample, factor) / sampling_rate
        new_duration = perturb_num_samples(num_samples, factor) / sampling_rate
        return fastcopy(self,
                        id=f'{self.id}_sp{factor}' if affix_id else self.id,
                        start=new_start,
                        duration=new_duration)
Esempio n. 30
0
    def perturb_tempo(self,
                      factor: float,
                      sampling_rate: int,
                      affix_id: bool = True) -> "SupervisionSegment":
        """
        Return a ``SupervisionSegment`` that has time boundaries matching the
        recording/cut perturbed with the same factor.

        :param factor: The tempo will be adjusted this many times (e.g. factor=1.1 means 1.1x faster).
        :param sampling_rate: The sampling rate is necessary to accurately perturb the start
            and duration (going through the sample counts).
        :param affix_id: When true, we will modify the ``id`` and ``recording_id`` fields
            by affixing it with "_tp{factor}".
        :return: a modified copy of the current ``SupervisionSegment``.
        """

        # speed and tempo perturbation have the same effect on supervisions
        perturbed = self.perturb_speed(factor, sampling_rate, affix_id=False)
        return fastcopy(
            perturbed,
            id=f"{self.id}_tp{factor}" if affix_id else self.id,
            recording_id=f"{self.recording_id}_tp{factor}"
            if affix_id else self.recording_id,
        )