コード例 #1
0
def identify_genre():
    global music_genre
    spectro = []

    y = load_audio(audio_file)[0]

    signals = split_audio(y)
    spec_array = to_melspec(signals)

    spectro.extend(spec_array)
    spectro = np.array(spectro)
    spectro = np.squeeze(np.stack((spectro, ) * 3, -1))

    predictions = np.array(model.predict(spectro))
    preds = np.argmax(predictions, axis=1)

    print('-' * 40)
    music_genre = genres[np.bincount(preds).argmax()]
    print(music_genre)
コード例 #2
0
def main(args):
    CWD = Path(hydra.utils.get_original_cwd())
    os.chdir(CWD)
    # Load model args
    trained_dirpath = Path(args.trained_dirpath)
    run_args = torch.load(trained_dirpath / 'args.pth')

    # define args from trained model
    sr = run_args.sr
    num_scales = run_args.num_scales
    scale_factor = run_args.scale_factor
    max_value = run_args.max_val
    max_value_f0 = run_args.max_val_f0
    cond_freq = run_args.cond_freq

    # Convert filepaths
    input_dirpath = Path(args.input_dirpath)
    input_files = input_dirpath.glob('*.wav')

    output_dirpath = trained_dirpath.joinpath(args.exp_name)
    try:
        output_dirpath.mkdir()
    except FileExistsError:
        print('Directory already exists')

    # Pytorch device
    device = torch.device("cuda")

    #load input file
    base_audio = BaseAudio(args.crepe_path, device, args.unvoiced_flag)
    srs = create_srs(sr, num_scales, scale_factor)
    samplers = create_samplers(srs, device=device)

    if args.norm_loudness_flag:
        norm_dicts = load_norm_dicts(trained_dirpath / 'loudness.json')
    else:
        norm_dicts = None

    octave_shifts = [2**x for x in args.octaves]

    # Load Trained models
    Gs = load_trained_pyramid(trained_dirpath, network_params=run_args.generator_params, device=device, srs=srs)

    filepath = next(iter(input_files))
    octave = next(iter(octave_shifts))

    real_audio = load_audio(filepath, sr, max_value)
    loudness_hop = 8 * sr // cond_freq
    real_audio = real_audio[:len(real_audio) // loudness_hop * loudness_hop]
    loudness_list = calc_loudness_list(audio=real_audio, srs=srs, device=device,
                                        sr_in=sr, norm_dicts=norm_dicts)
    real_audio = base_audio.forward(real_audio, sr, max_value_f0, numpy_flag=True, octave=octave)

    real_audio_orig = real_audio[None, None, ...].to(device)
    # resample input to the wanted scale
    real_audio = resample_torch(real_audio_orig, sr, srs[0], max_val=max_value_f0)

    BUFFER_SIZES = [256, 512, 1024, 2048, 4096, 8192, 16384, 32768]
    num_iters = 100
    times = []
    for bs in BUFFER_SIZES:
        this_ra = real_audio[..., :bs // 8]
        this_ll = [l[..., :bs // (64000 // l.shape[-1])] for l in loudness_list]
        with torch.no_grad():
            for i in trange(num_iters):
                start_time = time.time()
                audio_outputs = f0_transfer(this_ra, this_ll, Gs, samplers, max_val=max_value, save_all = False)
                time_elapsed = time.time() - start_time
                times.append(
                    ["htp", "gpu", bs, time_elapsed]
                )
    
    df = pd.DataFrame(times)
    df.to_csv("htp_gpu_rtf.csv")
コード例 #3
0
def main(args):
    CWD = Path(hydra.utils.get_original_cwd())
    os.chdir(CWD)
    # Load model args
    trained_dirpath = Path(args.trained_dirpath)
    run_args = torch.load(trained_dirpath / 'args.pth')

    # define args from trained model
    sr = run_args.sr
    num_scales = run_args.num_scales
    scale_factor = run_args.scale_factor
    max_value = run_args.max_val
    max_value_f0 = run_args.max_val_f0
    cond_freq = run_args.cond_freq

    # Convert filepaths
    input_dirpath = Path(args.input_dirpath)
    input_files = input_dirpath.glob('*.wav')

    output_dirpath = trained_dirpath.joinpath(args.exp_name)
    try:
        output_dirpath.mkdir()
    except FileExistsError:
        print('Directory already exists')

    # Pytorch device
    device = torch.device("cuda")

    #load input file
    base_audio = BaseAudio(args.crepe_path, device, args.unvoiced_flag)
    srs = create_srs(sr, num_scales, scale_factor)
    samplers = create_samplers(srs, device=device)

    if args.norm_loudness_flag:
        norm_dicts = load_norm_dicts(trained_dirpath / 'loudness.json')
    else:
        norm_dicts = None

    octave_shifts = [2**x for x in args.octaves]

    # Load Trained models
    Gs = load_trained_pyramid(trained_dirpath, network_params=run_args.generator_params, device=device, srs=srs)

    for filepath in tqdm(input_files, desc='Generating audio file'):
        for octave in tqdm(octave_shifts, desc='Octave'):
            real_audio = load_audio(filepath, sr, max_value)
            loudness_hop = 8 * sr // cond_freq
            real_audio = real_audio[:len(real_audio) // loudness_hop * loudness_hop]
            loudness_list = calc_loudness_list(audio=real_audio, srs=srs, device=device,
                                               sr_in=sr, norm_dicts=norm_dicts)
            real_audio = base_audio.forward(real_audio, sr, max_value_f0, numpy_flag=True, octave=octave)

            real_audio_orig = real_audio[None, None, ...].to(device)
            # resample input to the wanted scale
            real_audio = resample_torch(real_audio_orig, sr, srs[0], max_val=max_value_f0)

            audio_outputs = f0_transfer(real_audio,loudness_list, Gs, samplers, max_val=max_value, save_all = False)

            # add f0 sine input
            audio_outputs.append(real_audio_orig)
            save_audios(output_dirpath, audio_outputs,f'{filepath.stem}_{octave}', [srs[-1]])
コード例 #4
0
def main(args):
    CWD = Path(hydra.utils.get_original_cwd())
    os.chdir(CWD)
    # Load model args
    trained_dirpath = Path(args.trained_dirpath)
    run_args = torch.load(trained_dirpath / 'args.pth')

    # define args from trained model
    sr = run_args.sr
    num_scales = run_args.num_scales
    scale_factor = run_args.scale_factor
    max_value = run_args.max_val
    max_value_f0 = run_args.max_val_f0
    cond_freq = run_args.cond_freq

    # Convert filepaths
    input_dirpath = Path(args.input_dirpath)
    input_files = input_dirpath.glob('*.wav')

    output_dirpath = trained_dirpath.joinpath(args.exp_name)
    try:
        output_dirpath.mkdir()
    except FileExistsError:
        print('Directory already exists')

    # Pytorch device
    device = torch.device("cuda")

    #load input file
    base_audio = BaseAudio(args.crepe_path, device, args.unvoiced_flag)
    srs = create_srs(sr, num_scales, scale_factor)
    samplers = create_samplers(srs, device=device)

    if args.norm_loudness_flag:
        norm_dicts = load_norm_dicts(trained_dirpath / 'loudness.json')
    else:
        norm_dicts = None

    octave_shifts = [2**x for x in args.octaves]

    # Load Trained models
    Gs = load_trained_pyramid(trained_dirpath, network_params=run_args.generator_params, device=device, srs=srs)

    filepath = next(iter(input_files))
    octave = next(iter(octave_shifts))

    real_audio = load_audio(filepath, sr, max_value)
    loudness_hop = 8 * sr // cond_freq
    real_audio = real_audio[:len(real_audio) // loudness_hop * loudness_hop]
    loudness_list = calc_loudness_list(audio=real_audio, srs=srs, device=device,
                                        sr_in=sr, norm_dicts=norm_dicts)
    real_audio = base_audio.forward(real_audio, sr, max_value_f0, numpy_flag=True, octave=octave)

    real_audio_orig = real_audio[None, None, ...].to(device)
    # resample input to the wanted scale
    real_audio = resample_torch(real_audio_orig, sr, srs[0], max_val=max_value_f0)

    num_iters = 100
    times = []
    with torch.no_grad():
        for i in trange(num_iters):
            start_time = time.time()
            audio_outputs = f0_transfer(real_audio,loudness_list, Gs, samplers, max_val=max_value, save_all = False)
            time_elapsed = time.time() - start_time
            times.append(time_elapsed)
    
    length_in_seconds = 4
    rtfs = np.array(times) / length_in_seconds
    print("Mean RTF: %.4f" % np.mean(rtfs))
    print("90th percentile RTF: %.4f" % np.percentile(rtfs, 90))