Python undo_preprocessingの例

プログラミング言語: Python

名前空間/パッケージ名: magnolia.features.preprocessing

メソッド/関数: undo_preprocessing

hotexamples.comのコード掲載数: 6

Python undo_preprocessing - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmagnolia.features.preprocessing.undo_preprocessingの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def main():
    # from model settings
    model_params = {
        'nonlinearity': 'tanh',
        'layer_size': 600,
        'embedding_size': 40,
        'normalize': 'False'
    }
    uid_settings = '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/assign_uids_LibriSpeech_UrbanSound8K.json'
    model_save_base = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/large_l41'

    model_location = '/cpu:0'
    model_settings = ''
    mixes = [
        '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_in_sample.json'
    ]
    from_disk = True
    mix_number = 1
    output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/sample_wav_files/large_lab41'

    os.makedirs(output_path, exist_ok=True)

    mixer = MixIterator(mixes_settings_filenames=mixes,
                        batch_size=1,
                        from_disk=from_disk)

    # get frequency dimension
    frequency_dim = mixer.sample_dimensions()[0]

    # get number of sources
    settings = json.load(open(uid_settings))
    uid_file = settings['output_file']
    uid_csv = pd.read_csv(uid_file)
    number_of_sources = uid_csv['uid'].max() + 1

    model = L41Model(**model_params,
                     num_speakers=number_of_sources,
                     F=frequency_dim,
                     device=model_location)

    model.load(model_save_base)

    assert (mix_number <= mixer.epoch_size())

    mix_settings = json.load(open(mixes[0]))

    signal = mix_settings['signals'][0]
    preprocessing_settings = json.load(open(signal['preprocessing_settings']))
    stft_args = preprocessing_settings['processing_parameters']['stft_args']
    istft_args = convert_preprocessing_parameters(stft_args)
    preemphasis_coeff = preprocessing_settings['processing_parameters'][
        'preemphasis_coeff']
    n_fft = 2048
    if 'n_fft' in stft_args:
        n_fft = stft_args['n_fft']

    for i in range(mix_number):
        spec, bin_masks, source_specs, uids, snrs = next(mixer)

    model_spec = spec
    spec = spec[0]
    bin_masks = bin_masks[0]
    source_specs = source_specs[0]
    uids = uids[0]
    snrs = snrs[0]

    print('SNR of this mix: {}'.format(snrs))

    y_mix = undo_preprocessing(spec,
                               mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

    # NOTE: this is only to make comparisons to the reconstructed waveforms later
    y_mix[-n_fft:] = 0.0
    y_mix = standardize_waveform(y_mix)

    # print('Mixed sample')
    lr.output.write_wav(os.path.join(output_path,
                                     'mix_{}.wav'.format(mix_number)),
                        y_mix,
                        mixer.sample_rate(),
                        norm=True)

    for i, source_spec in enumerate(source_specs):
        y = undo_preprocessing(source_spec,
                               mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

        # NOTE: this is only to make comparisons to the reconstructed waveforms later
        y[-n_fft:] = 0.0
        y = standardize_waveform(y)

        # print('Sample for source {}'.format(i + 1))
        lr.output.write_wav(os.path.join(
            output_path,
            'mix_{}_original_source_{}.wav'.format(mix_number, i + 1)),
                            y,
                            mixer.sample_rate(),
                            norm=True)

    source_specs = l41_clustering_separate(
        model_spec, model,
        mixer.number_of_samples_in_mixes())  #, binary_mask=False)

    for i, source_spec in enumerate(source_specs):
        y = undo_preprocessing(source_spec,
                               mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

        # NOTE: this is only because the masking creates a chirp in the last
        #       fft frame (likely due to the mask)
        y[-n_fft:] = 0.0
        y = standardize_waveform(y)

        # print('Separated sample for source {}'.format(i + 1))
        lr.output.write_wav(os.path.join(
            output_path, 'mix_{}_separated_{}.wav'.format(mix_number, i + 1)),
                            y,
                            mixer.sample_rate(),
                            norm=True)

コード例 #2

ファイルを表示

ファイル: separate_mix.py プロジェクト: makabakas/Magnolia

def main():
    # from model settings
    params = {}
    params['cf'] = 'kl'
    params['sparsity'] = 5
    params['R'] = 1000
    params['conv_eps'] = 1e-3
    params['verbose'] = False
    T_L = 8
    T_R = 0
    random_seed = 1234567890
    uid_settings = '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/assign_uids_LibriSpeech_UrbanSound8K.json'
    library_output_file = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/snmf/library_weights.hdf5'
    # library_output_file = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/snmf/REMOVE_library_weights.hdf5'

    model_settings = ''
    params['max_iter'] = 25
    mixes = [
        '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_out_of_sample.json'
    ]
    # mixes = ['/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_in_sample.json']
    from_disk = True
    output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/evaluations/snmf/out_of_sample_test'
    # output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/evaluations/snmf/in_sample_test'
    eval_sr = 8000

    params['rng'] = np.random.RandomState(random_seed)

    mixer = MixIterator(mixes_settings_filenames=mixes,
                        batch_size=1,
                        from_disk=from_disk)

    # get frequency dimension
    frequency_dim = mixer.sample_dimensions()[0]

    # get number of sources
    settings = json.load(open(uid_settings))
    uid_file = settings['output_file']
    uid_csv = pd.read_csv(uid_file)
    number_of_sources = uid_csv['uid'].max() + 1

    model = SNMF(T_L, T_R, params['R'], params['sparsity'], params['cf'])

    model.load(library_output_file)

    mix_settings = json.load(open(mixes[0]))

    signal = mix_settings['signals'][0]
    preprocessing_settings = json.load(open(signal['preprocessing_settings']))
    stft_args = preprocessing_settings['processing_parameters']['stft_args']
    istft_args = convert_preprocessing_parameters(stft_args)
    preemphasis_coeff = preprocessing_settings['processing_parameters'][
        'preemphasis_coeff']
    n_fft = 2048
    if 'n_fft' in stft_args:
        n_fft = stft_args['n_fft']

    os.makedirs(output_path, exist_ok=True)
    mix_count = 0
    for _ in tqdm.trange(mixer.epoch_size()):
        spec, bin_masks, source_specs, uids, snrs = next(mixer)
        spec = spec[0]
        bin_masks = bin_masks[0]
        source_specs = source_specs[0]
        uids = uids[0]
        snrs = snrs[0]

        # print('SNR of mix {}: {}'.format(mix_count + 1, snrs))

        y_mix = undo_preprocessing(spec,
                                   mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)

        # NOTE: this is only to make comparisons to the reconstructed waveforms later
        y_mix[-n_fft:] = 0.0
        y_mix = lr.core.resample(y_mix,
                                 mixer.sample_rate(),
                                 eval_sr,
                                 scale=True)
        y_mix = standardize_waveform(y_mix)

        filename = os.path.join(
            output_path, 'mix_{}_snr_{:.2f}.wav'.format(mix_count + 1, snrs))
        lr.output.write_wav(filename, y_mix, eval_sr, norm=True)

        originals = {}
        for i, source_spec in enumerate(source_specs):
            y = undo_preprocessing(source_spec,
                                   mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)
            # NOTE: this is only to make comparisons to the reconstructed waveforms later
            y[-n_fft:] = 0.0
            y = lr.core.resample(y, mixer.sample_rate(), eval_sr, scale=True)
            y = standardize_waveform(y)

            originals[i] = y

        # use model to source-separate the spectrogram
        source_specs = model.source_separate(spec,
                                             max_iter=params['max_iter'],
                                             conv_eps=params['conv_eps'],
                                             rng=params['rng'],
                                             verbose=params['verbose'])

        # for i, source_spec in enumerate(source_specs):
        for i, source_spec in enumerate(source_specs.keys()):
            y = undo_preprocessing(source_specs[source_spec],
                                   mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)
            # NOTE: this is only because the masking creates a chirp in the last
            #       fft frame (likely due to the binary mask)
            y[-n_fft:] = 0.0
            y = lr.core.resample(y, mixer.sample_rate(), eval_sr, scale=True)
            y = standardize_waveform(y)

            # match this waveform with an original source waveform
            min_key = 0
            min_mse = np.inf
            for key in originals:
                mse = np.mean((y - originals[key])**2)
                if mse < min_mse:
                    min_key = key
                    min_mse = mse

            # print('Separated sample for source {}'.format(i + 1))
            filename = os.path.join(
                output_path,
                'mix_{}_original_source_{}.wav'.format(mix_count + 1,
                                                       min_key + 1))
            lr.output.write_wav(filename,
                                originals[min_key],
                                eval_sr,
                                norm=True)
            filename = os.path.join(
                output_path, 'mix_{}_separated_source_{}.wav'.format(
                    mix_count + 1, min_key + 1))
            lr.output.write_wav(filename, y, eval_sr, norm=True)

            y_original = originals.pop(min_key, None)
            if y_original is None:
                print("something went horribly wrong")

        mix_count += 1

コード例 #3

ファイルを表示

ファイル: separate_from_mix.py プロジェクト: karllab41/Magnolia

def main():
    # from model settings
    model_params = {
        'nonlinearity': 'tanh',
        'layer_size': 600,
        'embedding_size': 40,
        'normalize': 'False'
    }
    uid_settings = '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/assign_uids_LibriSpeech_UrbanSound8K.json'
    model_save_base = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/l41'

    model_location = '/cpu:0'
    model_settings = ''
    mixes = [
        '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_in_sample.json'
    ]
    from_disk = True
    mix_number = 1
    output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux'

    mixer = MixIterator(mixes_settings_filenames=mixes,
                        batch_size=1,
                        from_disk=from_disk)

    # get frequency dimension
    frequency_dim = mixer.sample_dimensions()[0]

    # get number of sources
    settings = json.load(open(uid_settings))
    uid_file = settings['output_file']
    uid_csv = pd.read_csv(uid_file)
    number_of_sources = uid_csv['uid'].max() + 1

    model = L41Model(**model_params,
                     num_speakers=number_of_sources,
                     F=frequency_dim,
                     device=model_location)

    model.load(model_save_base)

    assert (mix_number <= mixer.epoch_size())

    mix_settings = json.load(open(mixes[0]))

    signal = mix_settings['signals'][0]
    preprocessing_settings = json.load(open(signal['preprocessing_settings']))
    istft_args = convert_preprocessing_parameters(
        preprocessing_settings['processing_parameters']['stft_args'])
    preemphasis_coeff = preprocessing_settings['processing_parameters'][
        'preemphasis_coeff']

    for i in range(mix_number):
        spec, bin_masks, source_specs, uids, snrs = next(mixer)

    model_spec = spec
    spec = spec[0]
    bin_masks = bin_masks[0]
    source_specs = source_specs[0]
    uids = uids[0]
    snrs = snrs[0]

    print('SNR of this mix: {}'.format(snrs))

    y_mix = undo_preprocessing(spec,
                               mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

    # print('Mixed sample')
    lr.output.write_wav('{}_mix.wav'.format(output_path),
                        y_mix,
                        mixer.sample_rate(),
                        norm=True)

    for i, source_spec in enumerate(source_specs):
        y = undo_preprocessing(source_spec,
                               mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

        # print('Sample for source {}'.format(i + 1))
        lr.output.write_wav('{}_original_source_{}.wav'.format(output_path, i),
                            y,
                            mixer.sample_rate(),
                            norm=True)

    source_specs = l41_clustering_separate(model_spec, model,
                                           mixer.number_of_samples_in_mixes())

    for i, source_spec in enumerate(source_specs):
        y = undo_preprocessing(source_spec,
                               mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

        # print('Separated sample for source {}'.format(i + 1))
        lr.output.write_wav('{}_separated_source_{}.wav'.format(
            output_path, i),
                            y,
                            mixer.sample_rate(),
                            norm=True)

コード例 #4

ファイルを表示

ファイル: separate_sample_from_mix.py プロジェクト: zhaoforever/Magnolia

def main():
    # from model settings
    params = {}
    params['cf'] = 'kl'
    params['sparsity'] = 5
    params['R'] = 1000
    params['conv_eps'] = 1e-3
    params['verbose'] = False
    T_L = 8
    T_R = 0
    random_seed = 1234567890
    uid_settings = '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/assign_uids_LibriSpeech_UrbanSound8K.json'
    library_output_file = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/snmf/library_weights.hdf5'
    # library_output_file = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/snmf/REMOVE_library_weights.hdf5'

    params['max_iter'] = 25
    mixes = ['/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_in_sample.json']
    from_disk = True
    mix_number = 1
    output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/sample_wav_files/snmf'


    os.makedirs(output_path, exist_ok=True)

    params['rng'] = np.random.RandomState(random_seed)

    mixer = MixIterator(mixes_settings_filenames=mixes,
                        batch_size=1,
                        from_disk=from_disk)

    # get frequency dimension
    frequency_dim = mixer.sample_dimensions()[0]

    # get number of sources
    settings = json.load(open(uid_settings))
    uid_file = settings['output_file']
    uid_csv = pd.read_csv(uid_file)
    number_of_sources = uid_csv['uid'].max() + 1

    model = SNMF(T_L, T_R, params['R'], params['sparsity'], params['cf'])

    model.load(library_output_file)

    assert(mix_number <= mixer.epoch_size())

    mix_settings = json.load(open(mixes[0]))

    signal = mix_settings['signals'][0]
    preprocessing_settings = json.load(open(signal['preprocessing_settings']))
    stft_args = preprocessing_settings['processing_parameters']['stft_args']
    istft_args = convert_preprocessing_parameters(stft_args)
    preemphasis_coeff = preprocessing_settings['processing_parameters']['preemphasis_coeff']
    n_fft = 2048
    if 'n_fft' in stft_args:
        n_fft = stft_args['n_fft']

    for i in range(mix_number):
        spec, bin_masks, source_specs, uids, snrs = next(mixer)

    spec = spec[0]
    bin_masks = bin_masks[0]
    source_specs = source_specs[0]
    uids = uids[0]
    snrs = snrs[0]

    print('SNR of this mix: {}'.format(snrs))

    y_mix = undo_preprocessing(spec, mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

    # NOTE: this is only to make comparisons to the reconstructed waveforms later
    y_mix[-n_fft:] = 0.0
    y_mix = standardize_waveform(y_mix)

    lr.output.write_wav(os.path.join(output_path, 'mix_{}.wav'.format(mix_number)), y_mix, mixer.sample_rate(), norm=True)

    for i, source_spec in enumerate(source_specs):
        y = undo_preprocessing(source_spec, mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

        # NOTE: this is only to make comparisons to the reconstructed waveforms later
        y[-n_fft:] = 0.0
        y = standardize_waveform(y)
        lr.output.write_wav(os.path.join(output_path, 'mix_{}_original_source_{}.wav'.format(mix_number, i + 1)), y, mixer.sample_rate(), norm=True)

    source_specs = model.source_separate(spec,
                                         max_iter=params['max_iter'],
                                         conv_eps=params['conv_eps'],
                                         rng=params['rng'],
                                         verbose=params['verbose'])

    for source_spec in source_specs:
        y = undo_preprocessing(source_specs[source_spec], mixer.sample_length_in_bits(),
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)

        # NOTE: this is only because the masking creates a chirp in the last
        #       fft frame (likely due to the mask)
        y[-n_fft:] = 0.0
        y = standardize_waveform(y)
        lr.output.write_wav(os.path.join(output_path, 'mix_{}_separated_{}.wav'.format(mix_number, source_spec)), y, mixer.sample_rate(), norm=True)

コード例 #5

ファイルを表示

def main():
    # from model settings
    model_params = {
    }
    uid_settings = '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/assign_uids_LibriSpeech_UrbanSound8K.json'
    model_save_base = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/chimera'

    model_location = '/cpu:0'
    model_settings = ''
    # mixes = ['/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_in_sample.json']
    mixes = ['/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_out_of_sample.json']
    from_disk = True
    # output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/evaluations/chimera/in_sample_test'
    output_path = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/evaluations/chimera/out_of_sample_test'
    eval_sr = 8000

    mixer = MixIterator(mixes_settings_filenames=mixes,
                        batch_size=1,
                        from_disk=from_disk)

    # get frequency dimension
    frequency_dim = mixer.sample_dimensions()[0]

    # get number of sources
    settings = json.load(open(uid_settings))
    uid_file = settings['output_file']
    uid_csv = pd.read_csv(uid_file)
    number_of_sources = uid_csv['uid'].max() + 1

    model = Chimera(**model_params,
                    F=frequency_dim,
                    device=model_location)

    model.load(model_save_base)

    mix_settings = json.load(open(mixes[0]))

    signal = mix_settings['signals'][0]
    preprocessing_settings = json.load(open(signal['preprocessing_settings']))
    stft_args = preprocessing_settings['processing_parameters']['stft_args']
    istft_args = convert_preprocessing_parameters(stft_args)
    preemphasis_coeff = preprocessing_settings['processing_parameters']['preemphasis_coeff']
    n_fft = 2048
    if 'n_fft' in stft_args:
        n_fft = stft_args['n_fft']


    os.makedirs(output_path, exist_ok=True)
    mix_count = 0
    for _ in tqdm.trange(mixer.epoch_size()):
        spec, bin_masks, source_specs, uids, snrs = next(mixer)
        model_spec = spec
        spec = spec[0]
        bin_masks = bin_masks[0]
        source_specs = source_specs[0]
        uids = uids[0]
        snrs = snrs[0]

        # print('SNR of mix {}: {}'.format(mix_count + 1, snrs))

        y_mix = undo_preprocessing(spec, mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)


        # NOTE: this is only to make comparisons to the reconstructed waveforms later
        y_mix[-n_fft:] = 0.0
        y_mix = lr.core.resample(y_mix, mixer.sample_rate(), eval_sr, scale=True)
        y_mix = standardize_waveform(y_mix)

        filename = os.path.join(output_path, 'mix_{}_snr_{:.2f}.wav'.format(mix_count + 1, snrs))
        lr.output.write_wav(filename, y_mix, eval_sr, norm=True)

        originals = {}
        for i, source_spec in enumerate(source_specs):
            y = undo_preprocessing(source_spec, mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)
            # NOTE: this is only to make comparisons to the reconstructed waveforms later
            y[-n_fft:] = 0.0
            y = lr.core.resample(y, mixer.sample_rate(), eval_sr, scale=True)
            y = standardize_waveform(y)

            originals[i] = y

        # use dc-head of model + clustering to source-separate the spectrogram
        source_specs = chimera_clustering_separate(model_spec, model, mixer.number_of_samples_in_mixes())

        for i, source_spec in enumerate(source_specs):
            y = undo_preprocessing(source_spec, mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)
            # NOTE: this is only because the masking creates a chirp in the last
            #       fft frame (likely due to the binary mask)
            y[-n_fft:] = 0.0
            y = lr.core.resample(y, mixer.sample_rate(), eval_sr, scale=True)
            y = standardize_waveform(y)

            # match this waveform with an original source waveform
            min_key = 0
            min_mse = np.inf
            for key in originals:
                mse = np.mean((y - originals[key])**2)
                if mse < min_mse:
                    min_key = key
                    min_mse = mse

            # print('Separated sample for source {}'.format(i + 1))
            filename = os.path.join(output_path, 'mix_{}_original_source_{}.wav'.format(mix_count + 1, min_key + 1))
            lr.output.write_wav(filename, originals[min_key], eval_sr, norm=True)
            filename = os.path.join(output_path, 'mix_{}_dc_separated_source_{}.wav'.format(mix_count + 1, min_key + 1))
            lr.output.write_wav(filename, y, eval_sr, norm=True)

            y_original = originals.pop(min_key, None)
            if y_original is None:
                print("something went horribly wrong")

        # use mi-head of model to source-separate the spectrogram
        source_specs = chimera_mask(model_spec, model)[0]

        for i in range(source_specs.shape[2]):
            source_spec = source_specs[:, :, i]

            y = undo_preprocessing(source_spec, mixer.sample_length_in_bits(),
                                   preemphasis_coeff=preemphasis_coeff,
                                   istft_args=istft_args)
            # NOTE: this is only because the masking creates a chirp in the last
            #       fft frame (likely due to the binary mask)
            y[-n_fft:] = 0.0
            y = lr.core.resample(y, mixer.sample_rate(), eval_sr, scale=True)
            y = standardize_waveform(y)

            filename = os.path.join(output_path, 'mix_{}_mi_separated_source_{}.wav'.format(mix_count + 1, i + 1))
            lr.output.write_wav(filename, y, eval_sr, norm=True)

        mix_count += 1

コード例 #6

ファイルを表示

def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(
        description='Reconstruct waveforms from mixes.')
    parser.add_argument('--sample',
                        '-n',
                        default=1,
                        type=int,
                        help='sample number to write to file (1-indexed)')
    parser.add_argument('--output_file',
                        '-o',
                        default='mix.wav',
                        help='output file name (wav format)')
    parser.add_argument('--settings',
                        '-s',
                        default='../../settings/mixing_template.json',
                        help='sample mixing settings JSON file')
    parser.add_argument('--logger_settings',
                        '-l',
                        default='../../settings/logging.conf',
                        help='logging configuration file')
    args = parser.parse_args()

    # Load logging configuration
    logging.config.fileConfig(args.logger_settings)
    logger = logging.getLogger('iteration')

    mixer = MixIterator([args.settings], batch_size=1)
    mixer_iter = iter(mixer)

    with open(args.settings) as settings_file:
        settings = json.load(settings_file)
        total_number_of_mixed_samples = settings['number_of_mixed_samples']

        assert (args.sample <= total_number_of_mixed_samples
                and args.sample > 0)

        signal = settings['signals'][0]
        preprocessing_settings = json.load(
            open(signal['preprocessing_settings']))
        istft_args = convert_preprocessing_parameters(
            preprocessing_settings['processing_parameters']['stft_args'])
        preemphasis_coeff = preprocessing_settings['processing_parameters'][
            'preemphasis_coeff']
        sample_rate = preprocessing_settings['processing_parameters'][
            'target_sample_rate']
        sample_length = settings['target_sample_length']
        total_length = int(sample_length * sample_rate)

        for i in range(args.sample):
            spec, bin_masks, source_specs, uids, snrs = next(mixer_iter)

        spec = spec[0]
        bin_masks = bin_masks[0]
        uids = uids[0]
        snrs = snrs[0]

        print('SNR of this mix: {}'.format(snrs))

        mix_file_name = '{}_mix.wav'.format(
            os.path.splitext(args.output_file)[0])
        y = undo_preprocessing(spec,
                               total_length,
                               preemphasis_coeff=preemphasis_coeff,
                               istft_args=istft_args)
        lr.output.write_wav(mix_file_name, y, sample_rate, norm=True)

        for i in range(bin_masks.shape[0]):
            source_file_name = '{}_{}.wav'.format(
                os.path.splitext(args.output_file)[0], uids[i])
            source_spec = apply_binary_mask(bin_masks[i], spec)
            source_y = undo_preprocessing(source_spec,
                                          total_length,
                                          preemphasis_coeff=preemphasis_coeff,
                                          istft_args=istft_args)
            lr.output.write_wav(source_file_name,
                                source_y,
                                sample_rate,
                                norm=True)