Python CutSet.open_writer Examples

Programming Language: Python

Namespace/Package Name: lhotse.cut

Class/Type: CutSet

Method/Function: open_writer

Examples at hotexamples.com: 2

Python CutSet.open_writer - 2 examples found. These are the top rated real world Python examples of lhotse.cut.CutSet.open_writer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_cuts(28)

from_json(18)

from_manifests(10)

from_yaml(7)

from_file(4)

CutSet(3)

open_writer(2)

sort_by_duration(2)

copy_feats(1)

from_dicts(1)

index_supervisions(1)

Example #1

Show file

File: cut.py Project: AmirHussein96/lhotse

def trim_to_supervisions(
    cuts: Pathlike,
    output_cuts: Pathlike,
    keep_overlapping: bool,
    min_duration: Optional[float],
    context_direction: str,
):
    """
    Splits each input cut into as many cuts as there are supervisions.
    These cuts have identical start times and durations as the supervisions.
    When there are overlapping supervisions, they can be kept or discarded with options.

    \b
    For example, the following cut:
                Cut
        |-----------------|
         Sup1
        |----|  Sup2
           |-----------|

    \b
    is transformed into two cuts:
         Cut1
        |----|
         Sup1
        |----|
           Sup2
           |-|
                Cut2
           |-----------|
           Sup1
           |-|
                Sup2
           |-----------|
    """
    from lhotse.serialization import load_manifest_lazy_or_eager

    cuts = load_manifest_lazy_or_eager(cuts)

    with CutSet.open_writer(output_cuts) as writer:
        for c in cuts:
            subcuts = c.trim_to_supervisions(
                keep_overlapping=keep_overlapping,
                min_duration=min_duration,
                context_direction=context_direction,
            )
            for sc in subcuts:
                writer.write(sc)

Example #2

Show file

File: manipulation.py Project: glynpu/lhotse

def copy_feats(
    input_manifest: Pathlike,
    output_manifest: Pathlike,
    storage_path: str,
    storage_type: str,
    max_jobs: int,
) -> None:
    """
    Load INPUT_MANIFEST of type :class:`lhotse.FeatureSet` or `lhotse.CutSet`,
    read every feature matrix using ``features.load()`` or ``cut.load_features()``,
    save them in STORAGE_PATH and save the updated manifest to OUTPUT_MANIFEST.
    """
    from lhotse.serialization import load_manifest_lazy_or_eager
    from lhotse.manipulation import combine as combine_manifests

    manifests = load_manifest_lazy_or_eager(input_manifest)

    if isinstance(manifests, FeatureSet):
        with get_writer(storage_type)(storage_path) as w:
            # FeatureSet is copied in-memory and written (TODO: make it incremental if needed)
            manifests = manifests.copy_feats(writer=w)
            manifests.to_file(output_manifest)

    elif isinstance(manifests, CutSet):
        # Group cuts by their underlying feature files.
        manifests = sorted(manifests,
                           key=lambda cut: cut.features.storage_path)
        subsets = groupby(manifests, lambda cut: cut.features.storage_path)
        unique_storage_paths, subsets = zip(*[(k, CutSet.from_cuts(grp))
                                              for k, grp in subsets])

        # Create paths for new feature files and subset cutsets.
        tot_items = len(unique_storage_paths)
        new_storage_paths = [
            f"{storage_path}/feats-{i}" for i in range(tot_items)
        ]
        partial_manifest_paths = [
            f"{storage_path}/cuts-{i}.jsonl.gz" for i in range(tot_items)
        ]

        num_jobs = len(unique_storage_paths)
        if max_jobs > 0:
            num_jobs = min(num_jobs, max_jobs)

        # Create directory if needed (storage_path might be an URL)
        if Path(storage_path).parent.is_dir():
            Path(storage_path).mkdir(exist_ok=True)

        # Copy each partition in parallel and combine lazily opened manifests.
        with ProcessPoolExecutor(num_jobs) as ex:
            futures = []
            for cs, nsp, pmp in zip(subsets, new_storage_paths,
                                    partial_manifest_paths):
                futures.append(
                    ex.submit(copy_feats_worker, cs, nsp, storage_type, pmp))

            all_cuts = combine_manifests(
                (f.result() for f in as_completed(futures)))

        # Combine and save subset cutsets into the final file.
        with CutSet.open_writer(output_manifest) as w:
            for c in all_cuts:
                w.write(c)
    else:
        raise ValueError(
            f"Unsupported manifest type ({type(manifests)}) at: {input_manifest}"
        )