Exemple #1
0
def test_nistems():
    mp4exc = stempeg.cmds.find_cmd("MP4Box")

    stems, rate = stempeg.read_stems(stempeg.example_stem_path())
    with tmp.NamedTemporaryFile(delete=False, suffix='.m4a') as tempfile:

        stempeg.write_stems(tempfile.name,
                            stems,
                            sample_rate=rate,
                            writer=stempeg.NIStemsWriter())
        callArgs = [mp4exc]
        callArgs.extend(["-dump-udta", "0:stem", tempfile.name])
        sp.check_call(callArgs)

        root, ext = os.path.splitext(tempfile.name)
        udtaFile = root + "_stem.udta"
        with open(stempeg.default_metadata()) as f:
            d_metadata = json.load(f)

        try:
            fileObj = codecs.open(udtaFile, encoding="utf-8")
            fileObj.seek(8)
            l_metadata = json.load(fileObj)
        except json.decoder.JSONDecodeError:
            with open(udtaFile) as json_file:
                l_metadata = json.load(json_file)

        assert ordered(l_metadata) == ordered(d_metadata)
Exemple #2
0
def test_shape(nb_samples):
    R = np.random.random((5, nb_samples, 2))
    stempeg.write_stems("./random.stem.m4a", R, writer=stempeg.StreamsWriter())
    S, rate = stempeg.read_stems("./random.stem.m4a")

    assert S.shape[0] == R.shape[0]
    assert S.shape[2] == R.shape[2]
    assert S.shape[1] % 1024 == 0
Exemple #3
0
def test_multifileformats(audio, multifile_format, nb_stems):
    with tmp.NamedTemporaryFile(delete=False,
                                suffix='.' + multifile_format) as tempfile:
        stem_names = [str(k) for k in range(nb_stems)]
        stempeg.write_stems(tempfile.name,
                            audio,
                            sample_rate=44100,
                            writer=stempeg.FilesWriter(stem_names=stem_names))
Exemple #4
0
def test_shape(nb_samples):
    R = np.random.random((5, nb_samples, 2))
    stempeg.write_stems(R, "./random.stem.mp4")
    S, rate = stempeg.read_stems("./random.stem.mp4")

    assert S.shape[0] == R.shape[0]
    assert S.shape[2] == R.shape[2]
    assert S.shape[1] % 1024 == 0
Exemple #5
0
def test_ffmpeg_errors(audio):
    if audio.ndim == 3:
        with pytest.raises(RuntimeError):
            with tmp.NamedTemporaryFile(delete=False,
                                        suffix='.wav') as tempfile:
                stempeg.write_stems(tempfile.name,
                                    audio,
                                    sample_rate=44100,
                                    writer=stempeg.StreamsWriter())
Exemple #6
0
def test_multichannel_containers(audio, nb_channels, multichannel_format):
    with tmp.NamedTemporaryFile(delete=False,
                                suffix='.' + multichannel_format) as tempfile:
        stempeg.write_stems(tempfile.name,
                            audio,
                            sample_rate=44100,
                            writer=ChannelsWriter())
        loaded_audio, rate = stempeg.read_stems(
            tempfile.name,
            always_3d=True,
            reader=stempeg.ChannelsReader(nb_channels=nb_channels))
        assert audio.shape == loaded_audio.shape
Exemple #7
0
def test_multistream_containers(audio, multistream_format, nb_stems):
    if nb_stems > 1:
        with tmp.NamedTemporaryFile(delete=False,
                                    suffix='.' +
                                    multistream_format) as tempfile:
            stem_names = [str(k) for k in range(nb_stems)]
            stempeg.write_stems(tempfile.name,
                                audio,
                                sample_rate=44100,
                                writer=stempeg.StreamsWriter(
                                    codec='aac', stem_names=stem_names))
            loaded_audio, rate = stempeg.read_stems(tempfile.name,
                                                    always_3d=True)
            assert audio.shape == loaded_audio.shape
            if multistream_format == "m4a":
                info = stempeg.Info(tempfile.name)
                loaded_stem_names = info.title_streams
                # check if titles could be extracted
                assert all(
                    [a == b for a, b in zip(stem_names, loaded_stem_names)])
Exemple #8
0
import stempeg

# 0 - The mixture,
# 1 - The drums,
# 2 - The bass,
# 3 - The rest of the accompaniment,
# 4 - The vocals.

# example
S, rate = stempeg.read_stems(stempeg.example_stem_path())
stempeg.write_stems(
 "output.mp4",
 S,
 sample_rate=rate,
 writer=stempeg.StreamsWriter())

S, rate = stempeg.read_stems("C:/Users/hahla/Downloads/output.mp4", stem_id=[0])



stems_folder = "D:/Development/github/GAN-tests/audio_files_split/audio_files_001"

filename_mix = "D:/Development/github/GAN-tests/audio_files_split/audio_file_mixture_0002.wav"
filename_drums = "D:/Development/github/GAN-tests/audio_files_split/audio_file_hits_0002.wav"
filename_bass = "D:/Development/github/GAN-tests/audio_files_split/audio_file_soundless_audio_0002.wav"
filename_other = "D:/Development/github/GAN-tests/audio_files_split/audio_file_background_0002.wav"
filename_vocals = "D:/Development/github/GAN-tests/audio_files_split/audio_file_soundless_audio_0002.wav"

S_filename_mix, rate = stempeg.read_stems(filename_mix, stem_id=0)
Exemple #9
0
def test_write():
    S, rate = stempeg.read_stems(
        "tests/data/The Easton Ellises - Falcon 69.stem.mp4"
    )
    stempeg.write_stems(S, "./stems.mp4")
Exemple #10
0
    args = parser.parse_args()

    # load stems
    stems, rate = stempeg.read_stems(args.input)

    # load stems,
    # resample to 96000 Hz,
    # use multiprocessing
    stems, rate = stempeg.read_stems(args.input,
                                     sample_rate=96000,
                                     multiprocess=True)

    # --> stems now has `shape=(stem x samples x channels)``

    # save stems from tensor as multi-stream mp4
    stempeg.write_stems("test.stem.m4a", stems, sample_rate=96000)

    # save stems as dict for convenience
    stems = {
        "mix": stems[0],
        "drums": stems[1],
        "bass": stems[2],
        "other": stems[3],
        "vocals": stems[4],
    }
    # keys will be automatically used

    # from dict as files
    stempeg.write_stems("test.stem.m4a", data=stems, sample_rate=96000)

    # `write_stems` is a preset for the following settings
Exemple #11
0
def test_write():
    S, rate = stempeg.read_stems(stempeg.example_stem_path())
    stempeg.write_stems(S, "./stems.mp4")
Exemple #12
0
def separate():
    parser = argparse.ArgumentParser(
        description="UMX Inference",
        add_help=True,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument("input",
                        type=str,
                        nargs="+",
                        help="List of paths to wav/flac files.")

    parser.add_argument(
        "--model",
        default="umxhq",
        type=str,
        help="path to mode base directory of pretrained models",
    )

    parser.add_argument(
        "--targets",
        nargs="+",
        type=str,
        help="provide targets to be processed. \
              If none, all available targets will be computed",
    )

    parser.add_argument(
        "--outdir",
        type=str,
        help="Results path where audio evaluation results are stored",
    )

    parser.add_argument(
        "--ext",
        type=str,
        default=".wav",
        help="Output extension which sets the audio format",
    )

    parser.add_argument("--start",
                        type=float,
                        default=0.0,
                        help="Audio chunk start in seconds")

    parser.add_argument(
        "--duration",
        type=float,
        help="Audio chunk duration in seconds, negative values load full track",
    )

    parser.add_argument("--no-cuda",
                        action="store_true",
                        default=False,
                        help="disables CUDA inference")

    parser.add_argument(
        "--audio-backend",
        type=str,
        default="sox_io",
        help="Set torchaudio backend "
        "(`sox_io`, `sox`, `soundfile` or `stempeg`), defaults to `sox_io`",
    )

    parser.add_argument(
        "--niter",
        type=int,
        default=1,
        help="number of iterations for refining results.",
    )

    parser.add_argument(
        "--wiener-win-len",
        type=int,
        default=300,
        help="Number of frames on which to apply filtering independently",
    )

    parser.add_argument(
        "--residual",
        type=str,
        default=None,
        help="if provided, build a source with given name"
        "for the mix minus all estimated targets",
    )

    parser.add_argument(
        "--aggregate",
        type=str,
        default=None,
        help="if provided, must be a string containing a valid expression for "
        "a dictionary, with keys as output target names, and values "
        "a list of targets that are used to build it. For instance: "
        '\'{"vocals":["vocals"], "accompaniment":["drums",'
        '"bass","other"]}\'',
    )

    parser.add_argument(
        "--filterbank",
        type=str,
        default="torch",
        help="filterbank implementation method. "
        "Supported: `['torch', 'asteroid']`. `torch` is ~30% faster"
        "compared to `asteroid` on large FFT sizes such as 4096. However"
        "asteroids stft can be exported to onnx, which makes is practical"
        "for deployment.",
    )
    args = parser.parse_args()

    if args.audio_backend != "stempeg":
        torchaudio.set_audio_backend(args.audio_backend)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print("Using ", device)
    # parsing the output dict
    aggregate_dict = None if args.aggregate is None else json.loads(
        args.aggregate)

    # create separator only once to reduce model loading
    # when using multiple files
    separator = utils.load_separator(
        model_str_or_path=args.model,
        targets=args.targets,
        niter=args.niter,
        residual=args.residual,
        wiener_win_len=args.wiener_win_len,
        device=device,
        pretrained=True,
        filterbank=args.filterbank,
    )

    separator.freeze()
    separator.to(device)

    if args.audio_backend == "stempeg":
        try:
            import stempeg
        except ImportError:
            raise RuntimeError("Please install pip package `stempeg`")

    # loop over the files
    for input_file in args.input:
        if args.audio_backend == "stempeg":
            audio, rate = stempeg.read_stems(
                input_file,
                start=args.start,
                duration=args.duration,
                sample_rate=separator.sample_rate,
                dtype=np.float32,
            )
            audio = torch.tensor(audio)
        else:
            audio, rate = data.load_audio(input_file,
                                          start=args.start,
                                          dur=args.duration)
        estimates = predict.separate(
            audio=audio,
            rate=rate,
            aggregate_dict=aggregate_dict,
            separator=separator,
            device=device,
        )
        if not args.outdir:
            model_path = Path(args.model)
            if not model_path.exists():
                outdir = Path(Path(input_file).stem + "_" + args.model)
            else:
                outdir = Path(Path(input_file).stem + "_" + model_path.stem)
        else:
            outdir = Path(args.outdir) / Path(input_file).stem
        outdir.mkdir(exist_ok=True, parents=True)

        # write out estimates
        if args.audio_backend == "stempeg":
            target_path = str(outdir / Path("target").with_suffix(args.ext))
            # convert torch dict to numpy dict
            estimates_numpy = {}
            for target, estimate in estimates.items():
                estimates_numpy[target] = torch.squeeze(
                    estimate).detach().cpu().numpy().T

            stempeg.write_stems(
                target_path,
                estimates_numpy,
                sample_rate=separator.sample_rate,
                writer=stempeg.FilesWriter(multiprocess=True,
                                           output_sample_rate=rate),
            )
        else:
            for target, estimate in estimates.items():
                target_path = str(outdir / Path(target).with_suffix(args.ext))
                torchaudio.save(
                    target_path,
                    torch.squeeze(estimate).to("cpu"),
                    sample_rate=separator.sample_rate,
                )
Exemple #13
0
"""Opens a stem file and saves (reencodes) back to a stem file
"""
import argparse
import stempeg
import numpy as np
from os import path as op

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('input', )
    args = parser.parse_args()

    # read stems
    stems, rate = stempeg.read_stems(args.input)
    print(stems.shape)
    stempeg.write_stems(stems, "stems.mp4")
    stems2, rate = stempeg.read_stems("stems.mp4")
    print(stems2.shape)