コード例 #1
0
ファイル: inference.py プロジェクト: 00001101-xt/DeepAFx
def inference(path_model,
              params_path,
              name_task,
              input_file_path,
              output_filepath,
              dafx_wise=0,
              max_len_sec=50):
    """Evalue a model, given a params file, the task, and output directory
    """

    model_name = os.path.basename(path_model)
    kTask = name_task

    # Verify the task
    assert name_task in [
        'distortion', 'nonspeech', 'mastering'
    ], 'task should be "distortion", "nonspeech" or "mastering"'

    # loads config
    k = np.load(params_path, allow_pickle=True).item()

    # Define Constants
    kPathAudio = k['path_audio']
    if kTask != 'mastering':
        kXRecording = k['x_recording']
        kYRecording = k['y_recording']
    kSR = k['sr']
    kNumSamples = k['num_samples']
    kBatchSize = k['batch_size']
    kStepsPerEpoch = k['steps_per_epoch']
    kEpochs = k['epochs']
    kPatience = k['patience']
    kHopSamples = k['hop_samples']
    kGradientMethod = k['gradient_method']
    kComputeSignalGradient = k['compute_signal_gradient']
    kMultiprocess = k['multiprocess']
    kParams = k['params']
    kPluginUri = k['plugin_uri']
    kParamMap = k['param_map']
    kOutputLength = k['output_length']
    kStereo = k['stereo']
    kSetNonTrainableParameters = k['set_nontrainable_parameters']
    kNewParameterRange = k['new_parameter_range']
    kFxChain = True
    kGreedyDafxPretraining = k['greedy_dafx_pretraining']
    kDefaultPretraining = k['default_pretraining']
    kEncoder = k['encoder']

    # Load test dataset or makes partition, for some tasks random_state seed should be the same as in training script
    data, samplerate = sf.read(input_file_path)
    xTestAudio = librosa.resample(data.T, samplerate, 22050)
    xTestAudio = xTestAudio[:min(kSR * 30, len(xTestAudio))]

    # Creates model
    print('Creating model...')
    model, encoder, dafx = models.deepAFx(
        kNumSamples,
        kSR,
        kHopSamples,
        kBatchSize,
        kParams,
        kPluginUri,
        kParamMap,
        kGradientMethod,
        kComputeSignalGradient,
        kMultiprocess,
        kEncoder,
        output_length=kOutputLength,
        stereo=kStereo,
        non_learnable_params_settings=kSetNonTrainableParameters,
        new_params_range=kNewParameterRange,
        fx_chain=kFxChain)

    # Loads model weights
    model.load_weights(path_model)

    # if fxchain=True loads the weights of the model that contains all plugins
    if kFxChain:
        dafx_wise = len(kPluginUri)
    else:
        dafx_wise = 1

    dafx.set_greedy_pretraining(dafx_wise)

    model.compile()

    # Computes objective metric and saves audio files and parameter automations
    #   It will process first 50 seconds of each test sample
    #   or full sample for the distortion task
    print('Processing the test file...')
    if True:

        xtest = xTestAudio

        # Gets parameter prediction
        layer_name = 'logMelgram'
        intermediate_layer_model = keras.Model(
            inputs=model.input, outputs=model.get_layer(layer_name).output)

        kBlockSize = 64  # Block size for inference audio plugins
        xtest_w = utils.slicing(xtest, kNumSamples, kOutputLength)
        steps = int(np.ceil(len(xtest_w) / kBatchSize))

        intermediate_output = intermediate_layer_model(xtest_w)
        parameters = encoder.predict(intermediate_output,
                                     batch_size=kBatchSize,
                                     steps=steps,
                                     verbose=1)

        xtest_w = utils.slicing(xtest, kOutputLength, kOutputLength)
        xtest_w_smooth = utils.slicing(xtest, kBlockSize, kBlockSize)

        # creates dafx plugins for inference
        if kFxChain:

            dafx_inference_smooth = lv2_plugin.LV2_Plugin_Chain(
                kPluginUri, kStereo, kSR, hop_samples=kBlockSize)

            dafx_inference_smooth.reset_plugin_state(kBlockSize * 100)

            for j, set_nontrainabel_paramenters in enumerate(
                    kSetNonTrainableParameters):
                for i in set_nontrainabel_paramenters:
                    dafx_inference_smooth.set_param(
                        j, i, set_nontrainabel_paramenters[i])

        else:

            dafx_inference_smooth = lv2_plugin.LV2_Plugin(
                kPluginUri, kSR, hop_samples=kBlockSize)

            for i in kSetNonTrainableParameters:
                dafx_inference_smooth.set_param(i,
                                                kSetNonTrainableParameters[i])

        # Low pass filter the parameters, whether is a fx_chain or a single effect

        b1, a1 = scipy.signal.butter(4, 0.5, 'low')
        b2, a2 = scipy.signal.butter(4, 0.001, 'low')

        try:
            parameters_smooth_1 = []
            for i in range(np.sum(kParams)):
                filtered_signal = scipy.signal.filtfilt(
                    b1, a1, parameters[:, i])
                parameters_smooth_1.append(filtered_signal)
            parameters_smooth_1 = np.asarray(parameters_smooth_1).T

            p_original_time = np.repeat(parameters, kOutputLength,
                                        axis=0)[:xtest.shape[0], :]
            p_smooth_1_time = np.repeat(parameters_smooth_1,
                                        kOutputLength,
                                        axis=0)[:xtest.shape[0], :]

            parameters_smooth_2 = []
            for i in range(np.sum(kParams)):
                filtered_signal = scipy.signal.filtfilt(
                    b2, a2, p_smooth_1_time[:, i])
                parameters_smooth_2.append(filtered_signal)
            p_smooth_2_time = np.asarray(parameters_smooth_2).T

            parameters_resampled = scipy.signal.resample(
                p_smooth_2_time, xtest_w_smooth.shape[0])
            parameters_resampled = np.clip(parameters_resampled, 0, 1)

        # If parameters length is too short, it only applies one filter (this is for models with large output frames)
        except:

            p_original_time = np.repeat(parameters, kOutputLength,
                                        axis=0)[:xtest.shape[0], :]
            parameters_smooth_2 = []
            for i in range(np.sum(kParams)):
                filtered_signal = scipy.signal.filtfilt(
                    b2, a2, p_original_time[:, i])
                parameters_smooth_2.append(filtered_signal)
            p_smooth_2_time = np.asarray(parameters_smooth_2).T

            parameters_resampled = scipy.signal.resample(
                p_smooth_2_time, xtest_w_smooth.shape[0])
            parameters_resampled = np.clip(parameters_resampled, 0, 1)

        ztest_smooth = utils.processFramesDAFx(
            dafx_inference_smooth,
            kParamMap,
            xtest_w_smooth,
            parameters_resampled,
            new_param_range=kNewParameterRange,
            stereo=kStereo,
            greedy_pretraining=dafx_wise)
        ztest_smooth = ztest_smooth[:xtest.shape[0]]
        z_smooth = ztest_smooth.copy()

        librosa.output.write_wav(output_filepath, z_smooth, kSR, norm=False)

        dafx.reset_dafx_state(kSR * 1)

    dafx.shutdown()
    del model, encoder
コード例 #2
0
    print('Time elapsed:', timeElapsed)
    np.save(kPathModels + kFullModel + '.history_pretraining', hist1.history)
    np.save(kPathModels + kFullModel + '.params', k)

    dafx.shutdown()
    del model, encoder, genTrain, genValid

model, encoder, dafx = models.deepAFx(
    kNumSamples,
    kSR,
    kHopSamples,
    kBatchSize,
    kParams,
    kPluginUri,
    kParamMap,
    kGradientMethod,
    kComputeSignalGradient,
    kMultiprocess,
    kEncoder,
    output_length=kOutputLength,
    stereo=kStereo,
    non_learnable_params_settings=kSetNonTrainableParameters,
    new_params_range=kNewParameterRange,
    fx_chain=kFxChain)

genTrain = generators.Data_Generator_Stateful_Distortion(
    xTrainAudio,
    yTrainAudio,
    dafx,
    kBatchSize,
    kNumSamples,
コード例 #3
0
ファイル: evaluate.py プロジェクト: 00001101-xt/DeepAFx
def evaluate(path_model,
             params_path,
             name_task,
             output_dir,
             dafx_wise=0,
             max_len_sec=50):
    """Evalue a model, given a params file, the task, and output directory
    """

    model_name = os.path.basename(path_model)
    kTask = name_task

    # Verify the task
    assert name_task in [
        'distortion', 'nonspeech', 'mastering'
    ], 'task should be "distortion", "nonspeech" or "mastering"'

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
        print("creating folder : ", output_dir)

    # loads config
    k = np.load(params_path, allow_pickle=True).item()

    # Define Constants
    kPathAudio = k['path_audio']
    if kTask != 'mastering':
        kXRecording = k['x_recording']
        kYRecording = k['y_recording']
    kSR = k['sr']
    kNumSamples = k['num_samples']
    kBatchSize = k['batch_size']
    kStepsPerEpoch = k['steps_per_epoch']
    kEpochs = k['epochs']
    kPatience = k['patience']
    kHopSamples = k['hop_samples']
    kGradientMethod = k['gradient_method']
    kComputeSignalGradient = k['compute_signal_gradient']
    kMultiprocess = k['multiprocess']
    kParams = k['params']
    kPluginUri = k['plugin_uri']
    kParamMap = k['param_map']
    kOutputLength = k['output_length']
    kStereo = k['stereo']
    kSetNonTrainableParameters = k['set_nontrainable_parameters']
    kNewParameterRange = k['new_parameter_range']
    kFxChain = True
    kGreedyDafxPretraining = k['greedy_dafx_pretraining']
    kDefaultPretraining = k['default_pretraining']
    kEncoder = k['encoder']

    # Load test dataset or makes partition, for some tasks random_state seed should be the same as in training script

    if kTask == 'nonspeech':

        print('Loading test partition...')

        xPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kXRecording),
                                        '*.wav')
        xTest = []
        for path in xPathFiles:
            if 'f9' in path or 'm9' in path:
                xTest.append(path)

        xTestAudio = []
        for path in xTest:
            audio, _ = sf.read(path)
            xTestAudio.append(audio)

        yPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kYRecording),
                                        '*.wav')
        yTest = []
        for path in yPathFiles:
            if 'f9' in path or 'm9' in path:
                yTest.append(path)

        yTestAudio = []
        for path in yTest:
            audio, _ = sf.read(path)
            yTestAudio.append(audio)

        xTestAudio = utils.highpassFiltering(xTestAudio, 100, kSR)
        yTestAudio = utils.highpassFiltering(yTestAudio, 100, kSR)

    elif kTask == 'distortion':

        xPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kXRecording),
                                        '*.wav')
        yPathFiles = utils.getFilesPath(os.path.join(kPathAudio, kYRecording),
                                        '*.wav')

        xTrain, xTest, yTrain, yTest = sklearn.model_selection.train_test_split(
            xPathFiles, yPathFiles, test_size=0.10, random_state=0)

        xTrain, xValid, yTrain, yValid = sklearn.model_selection.train_test_split(
            xTrain, yTrain, test_size=0.11111111111, random_state=0)

        xTestAudio = []
        for path in xTest:
            audio, _ = sf.read(path)
            xTestAudio.append(audio)

        yTestAudio = []
        for path in yTest:
            audio, _ = sf.read(path)
            yTestAudio.append(audio)

    elif kTask == 'mastering':

        kPathFiles = utils.getFilesPath(kPathAudio, '*.wav')

        xPathFiles = []
        yPathFiles = []
        for path in kPathFiles:
            name = path.split('/')[-1]
            recording = name.split('-')[0]
            if recording[-1] is 'a':
                xPathFiles.append(path)
            elif recording[-1] is 'b':
                yPathFiles.append(path)
        xPathFiles.sort()
        yPathFiles.sort()

        xTrain, xTest, yTrain, yTest = sklearn.model_selection.train_test_split(
            xPathFiles, yPathFiles, test_size=0.10, random_state=0)

        xTrain, xValid, yTrain, yValid = sklearn.model_selection.train_test_split(
            xTrain, yTrain, test_size=0.11111111111, random_state=0)

        xTestAudio = []
        for path in xTest:
            audio, _ = sf.read(path)
            audio = librosa.core.to_mono(audio.T)
            audio = utils.lufs_normalize(audio, kSR, -25.0)
            xTestAudio.append(audio)

        yTestAudio = []
        for path in yTest:
            audio, _ = sf.read(path)
            audio = librosa.core.to_mono(audio.T)
            yTestAudio.append(audio)

    # Creates model
    print('Creating model...')
    model, encoder, dafx = models.deepAFx(
        kNumSamples,
        kSR,
        kHopSamples,
        kBatchSize,
        kParams,
        kPluginUri,
        kParamMap,
        kGradientMethod,
        kComputeSignalGradient,
        kMultiprocess,
        kEncoder,
        output_length=kOutputLength,
        stereo=kStereo,
        non_learnable_params_settings=kSetNonTrainableParameters,
        new_params_range=kNewParameterRange,
        fx_chain=kFxChain)

    # Creates generators

    if kTask == 'nonspeech':

        genTest = generators.Data_Generator_Stateful_Nonspeech(
            xTestAudio,
            yTestAudio,
            dafx,
            kBatchSize,
            kNumSamples,
            steps_per_epoch=kStepsPerEpoch,
            sr=kSR,
            pad=0,
            crop=True,
            output_length=kOutputLength,
            large_frame_length_secs=10,
            augment=True)

    elif kTask == 'distortion':

        genTest = generators.Data_Generator_Stateful_Distortion(
            xTestAudio,
            yTestAudio,
            dafx,
            kBatchSize,
            kNumSamples,
            steps_per_epoch=kStepsPerEpoch,
            sr=kSR,
            pad=0,
            crop=True,
            output_length=kOutputLength,
            center_frames=True)

    elif kTask == 'mastering':

        genTest = generators.Data_Generator_Stateful_Mastering(
            xTestAudio,
            yTestAudio,
            dafx,
            kBatchSize,
            kNumSamples,
            steps_per_epoch=kStepsPerEpoch,
            sr=kSR,
            pad=0,
            crop=True,
            output_length=kOutputLength,
            large_frame_length_secs=10,
            augment=False)

    # Loss_function
    spectral_loss = losses.multiScaleSpectralLoss(loss_type='L2',
                                                  mag_weight=1.,
                                                  logmag_weight=1.,
                                                  time_loss=True,
                                                  time_loss_type='L1',
                                                  time_loss_weight=10.0,
                                                  fft_sizes=(1024, ),
                                                  overlap=0.0,
                                                  time_shifting=True,
                                                  batch_size=kBatchSize)

    # Loads model weights
    model.load_weights(path_model)

    # if fxchain=True loads the weights of the model that contains all plugins
    if kFxChain:
        dafx_wise = len(kPluginUri)
    else:
        dafx_wise = 1

    dafx.set_greedy_pretraining(dafx_wise)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss=spectral_loss,
                  metrics=['mae'])

    # Computes test loss
    print('Computing test loss...')
    metrics = {}
    K.set_learning_phase(0)
    score = model.evaluate(genTest,
                           steps=int(kStepsPerEpoch * 0.1),
                           batch_size=kBatchSize,
                           verbose=1,
                           return_dict=True)
    metrics['test_losses'] = score

    # Computes objective metric and saves audio files and parameter automations
    #   It will process first 50 seconds of each test sample
    #   or full sample for the distortion task
    secs = [0, max_len_sec]
    mfcc_cosine = []
    print('Processing the test dataset...')
    for idx_track in range(len(xTestAudio)):

        xtest = xTestAudio[idx_track][secs[0] * kSR:secs[1] * kSR]
        ytest = yTestAudio[idx_track][secs[0] * kSR:secs[1] * kSR]

        # Gets parameter prediction
        layer_name = 'logMelgram'
        intermediate_layer_model = keras.Model(
            inputs=model.input, outputs=model.get_layer(layer_name).output)

        kBlockSize = 64  # Block size for inference audio plugins
        xtest_w = utils.slicing(xtest, kNumSamples, kOutputLength)
        steps = int(np.ceil(len(xtest_w) / kBatchSize))

        intermediate_output = intermediate_layer_model(xtest_w)
        parameters = encoder.predict(intermediate_output,
                                     batch_size=kBatchSize,
                                     steps=steps,
                                     verbose=1)

        xtest_w = utils.slicing(xtest, kOutputLength, kOutputLength)
        xtest_w_smooth = utils.slicing(xtest, kBlockSize, kBlockSize)

        # creates dafx plugins for inference. dafx_inference_smooth runs smoothed parameters
        if kFxChain:

            dafx_inference = lv2_plugin.LV2_Plugin_Chain(
                kPluginUri, kStereo, kSR, hop_samples=kOutputLength)

            dafx_inference_smooth = lv2_plugin.LV2_Plugin_Chain(
                kPluginUri, kStereo, kSR, hop_samples=kBlockSize)

            dafx_inference.reset_plugin_state(kOutputLength * 100)
            dafx_inference_smooth.reset_plugin_state(kBlockSize * 100)

            for j, set_nontrainabel_paramenters in enumerate(
                    kSetNonTrainableParameters):
                for i in set_nontrainabel_paramenters:
                    dafx_inference.set_param(j, i,
                                             set_nontrainabel_paramenters[i])
                    dafx_inference_smooth.set_param(
                        j, i, set_nontrainabel_paramenters[i])

        else:

            dafx_inference = lv2_plugin.LV2_Plugin(kPluginUri,
                                                   kSR,
                                                   hop_samples=kOutputLength)

            dafx_inference_smooth = lv2_plugin.LV2_Plugin(
                kPluginUri, kSR, hop_samples=kBlockSize)

            for i in kSetNonTrainableParameters:
                dafx_inference.set_param(i, kSetNonTrainableParameters[i])
                dafx_inference_smooth.set_param(i,
                                                kSetNonTrainableParameters[i])

        # Low pass filter the parameters, whether is a fx_chain or a single effect

        b1, a1 = scipy.signal.butter(4, 0.5, 'low')
        b2, a2 = scipy.signal.butter(4, 0.001, 'low')

        try:
            parameters_smooth_1 = []
            for i in range(np.sum(kParams)):
                filtered_signal = scipy.signal.filtfilt(
                    b1, a1, parameters[:, i])
                parameters_smooth_1.append(filtered_signal)
            parameters_smooth_1 = np.asarray(parameters_smooth_1).T

            p_original_time = np.repeat(parameters, kOutputLength,
                                        axis=0)[:xtest.shape[0], :]
            p_smooth_1_time = np.repeat(parameters_smooth_1,
                                        kOutputLength,
                                        axis=0)[:xtest.shape[0], :]

            parameters_smooth_2 = []
            for i in range(np.sum(kParams)):
                filtered_signal = scipy.signal.filtfilt(
                    b2, a2, p_smooth_1_time[:, i])
                parameters_smooth_2.append(filtered_signal)
            p_smooth_2_time = np.asarray(parameters_smooth_2).T

            parameters_resampled = scipy.signal.resample(
                p_smooth_2_time, xtest_w_smooth.shape[0])
            parameters_resampled = np.clip(parameters_resampled, 0, 1)

        # If parameters length is too short, it only applies one filter (this is for models with large output frames)
        except:

            p_original_time = np.repeat(parameters, kOutputLength,
                                        axis=0)[:xtest.shape[0], :]
            parameters_smooth_2 = []
            for i in range(np.sum(kParams)):
                filtered_signal = scipy.signal.filtfilt(
                    b2, a2, p_original_time[:, i])
                parameters_smooth_2.append(filtered_signal)
            p_smooth_2_time = np.asarray(parameters_smooth_2).T

            parameters_resampled = scipy.signal.resample(
                p_smooth_2_time, xtest_w_smooth.shape[0])
            parameters_resampled = np.clip(parameters_resampled, 0, 1)

        # praces frames using parameters and smoothed parameters
        ztest = utils.processFramesDAFx(dafx_inference,
                                        kParamMap,
                                        xtest_w,
                                        parameters,
                                        new_param_range=kNewParameterRange,
                                        stereo=kStereo,
                                        greedy_pretraining=dafx_wise)
        ztest = ztest[:xtest.shape[0]]

        ztest_smooth = utils.processFramesDAFx(
            dafx_inference_smooth,
            kParamMap,
            xtest_w_smooth,
            parameters_resampled,
            new_param_range=kNewParameterRange,
            stereo=kStereo,
            greedy_pretraining=dafx_wise)
        ztest_smooth = ztest_smooth[:xtest.shape[0]]

        # Saves audio files and parameter automation
        x = xtest.copy()
        y = ytest.copy()
        z = ztest.copy()
        z_smooth = ztest_smooth.copy()

        librosa.output.write_wav(os.path.join(output_dir,
                                              f'{idx_track}_input.wav'),
                                 x,
                                 kSR,
                                 norm=False)
        librosa.output.write_wav(os.path.join(output_dir,
                                              f'{idx_track}_target.wav'),
                                 y,
                                 kSR,
                                 norm=False)
        librosa.output.write_wav(os.path.join(output_dir,
                                              f'{idx_track}_output.wav'),
                                 z_smooth,
                                 kSR,
                                 norm=False)
        np.save(os.path.join(output_dir, f'{idx_track}_parameters'),
                parameters_resampled)

        # Uncomment to save audio output and paramenters withouth smoothing:
        #         librosa.output.write_wav(kPathModels+'results/'+kFullModel+f'_{idx_track}_output_nonsmooth.wav',
        #                                  z, kSR, norm=False)
        #         np.save(kPathModels+'results/'+kFullModel+f'_{idx_track}_parameters', parameters)

        d = utils.getMSE_MFCC(y, z_smooth, kSR, mean_norm=False)
        mfcc_cosine.append(d['cosine'])

        dafx.reset_dafx_state(kSR * 1)

    metrics['mfcc_cosine'] = str(round(np.mean(mfcc_cosine), 5))
    print(metrics)
    print('audio samples saved at ' + output_dir)

    with open(os.path.join(output_dir, model_name + '_test_losses.json'),
              'w') as outfile:
        json.dump(metrics, outfile)

    dafx.shutdown()
    del model, encoder, genTest