예제 #1
0
def process_csv_data(i, file_path, cqt_length):
    print('[{}] Processing CSV data...'.format(i))
    result = np.array([])
    with open(file_path) as csvfile:
        reader = csv.reader(csvfile, delimiter='\t')
        # Skip header
        next(reader)
        row_count = sum(1 for row in reader)

        sets = np.zeros([row_count, 3])

        csvfile.seek(0)
        next(reader)
        for i, row in enumerate(reader):
            if len(row) != 3:
                continue

            onset = row[0]
            offset = row[1]
            midi_pitch = row[2]

            sets[i] = [onset, offset, midi_pitch]

        result = np.zeros([PITCH_RANGE * 2, cqt_length])
        time_stamps = frames_to_time(range(0, cqt_length + 1),
                                     sr=TARGET_SAMPLE_RATE,
                                     hop_length=HOP_LENGTH)
        process_csv_data_jit(sets, time_stamps, cqt_length, result)

    return result
def index():

    form = Params(request.form)

    # if user submit the form
    if (request.method == 'POST' and form.validate()):
        # Get data from form
        extratorAlgorithm, filename, centroids_number, musicPath = getFormData(
        )

        # Extract features, then normalize
        matrix = getFeatureMatrix(extratorAlgorithm, filename)
        matrix_norm = normalize(matrix)

        # Get the centroids and calculate the distance to all frames
        centroids, distancesCentroid = getCentroids(matrix_norm,
                                                    centroids_number)
        distancesCentroid = np.array(distancesCentroid)

        # Define the number of closest frames from centroids to get
        centroidClosestPoints = 5

        # Get the ID of frames closest to each centroid
        closest_frames_idx = distancesCentroid.argsort(axis=0).T.tolist()
        centroid_frame_counts = np.bincount(
            np.argmin(distancesCentroid, axis=1))

        # Get the time from each frame
        tempos = frames_to_time(closest_frames_idx,
                                sr=44100,
                                hop_length=1024,
                                n_fft=2048)
        tempos = tempos.tolist()

        # Apply some transformations
        pca = reduceDimensionality(matrix_norm)
        matrix_norm = pca.transform(matrix_norm)
        centroids = pca.transform(centroids)

        # Generate and encode graph to send to view
        graph = Graph(
            matrix_norm,
            centroids,
            filename,
            centroid_frame_count=centroid_frame_counts).generateGraph()
        print tempos
        # Send all structures to the view
        return render_template('index.html',
                               graph=graph,
                               form=form,
                               musicPath=musicPath,
                               tempos=tempos)

    # Generate an empty graphic
    graph = Graph().generateGraph()
    return render_template('index.html', graph=graph, form=form)
예제 #3
0
def __coord_time(n, sr=22050, hop_length=512, **_kwargs):
    """Get time coordinates from frames"""
    return core.frames_to_time(np.arange(n + 1), sr=sr, hop_length=hop_length)
예제 #4
0
def onset_detect(
    y=None,
    sr=22050,
    onset_envelope=None,
    hop_length=512,
    backtrack=False,
    energy=None,
    units="frames",
    normalize=True,
    **kwargs
):
    """Basic onset detector.  Locate note onset events by picking peaks in an
    onset strength envelope. Modified from `librosa.onset.onset_detect` to add a
    `normalize` flag.

    The `peak_pick` parameters were chosen by large-scale hyper-parameter
    optimization over the dataset provided by [1]_.

    .. [1] https://github.com/CPJKU/onset_db


    Parameters
    ----------
    y          : np.ndarray [shape=(n,)]
        audio time series

    sr         : number > 0 [scalar]
        sampling rate of `y`

    onset_envelope     : np.ndarray [shape=(m,)]
        (optional) pre-computed onset strength envelope

    hop_length : int > 0 [scalar]
        hop length (in samples)

    units : {'frames', 'samples', 'time'}
        The units to encode detected onset events in.
        By default, 'frames' are used.

    backtrack : bool
        If `True`, detected onset events are backtracked to the nearest
        preceding minimum of `energy`.

        This is primarily useful when using onsets as slice points for segmentation.

    energy : np.ndarray [shape=(m,)] (optional)
        An energy function to use for backtracking detected onset events.
        If none is provided, then `onset_envelope` is used.

    noramlize : bool (optional)
        If `True`, normalize the onset envelope before peak picking. By default
        this parameter is `True`.

    kwargs : additional keyword arguments
        Additional parameters for peak picking.

        See `librosa.util.peak_pick` for details.


    Returns
    -------

    onsets : np.ndarray [shape=(n_onsets,)]
        estimated positions of detected onsets, in whichever units
        are specified.  By default, frame indices.

        .. note::
            If no onset strength could be detected, onset_detect returns
            an empty list.


    Raises
    ------
    ParameterError
        if neither `y` nor `onsets` are provided

        or if `units` is not one of 'frames', 'samples', or 'time'
    """

    # First, get the frame->beat strength profile if we don't already have one
    if onset_envelope is None:
        if y is None:
            raise ParameterError("y or onset_envelope must be provided")

        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Shift onset envelope up to be non-negative
    # (a common normalization step to make the threshold more consistent)
    onset_envelope -= onset_envelope.min()

    # Do we have any onsets to grab?
    if not onset_envelope.any():
        return np.array([], dtype=np.int)

    if normalize:
        # Normalize onset strength function to [0, 1] range
        onset_envelope /= onset_envelope.max()

    # These parameter settings found by large-scale search
    kwargs.setdefault("pre_max", 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1)  # 0ms
    kwargs.setdefault("pre_avg", 0.10 * sr // hop_length)  # 100ms
    kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1)  # 100ms
    kwargs.setdefault("wait", 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault("delta", 0.07)

    # Peak pick the onset envelope
    onsets = util.peak_pick(onset_envelope, **kwargs)

    # Optionally backtrack the events
    if backtrack:
        if energy is None:
            energy = onset_envelope

        onsets = onset_backtrack(onsets, energy)

    if units == "frames":
        pass
    elif units == "samples":
        onsets = core.frames_to_samples(onsets, hop_length=hop_length)
    elif units == "time":
        onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
    else:
        raise ParameterError("Invalid unit type: {}".format(units))

    return onsets
예제 #5
0
    3464.84375, 3468.75, 3472.65625, 3476.5625, 3480.46875, 3484.375,
    3488.28125, 3492.1875, 3496.09375, 3500.0, 3503.90625, 3507.8125,
    3511.71875, 3515.625, 3519.53125, 3523.4375, 3527.34375, 3531.25,
    3535.15625, 3539.0625, 3542.96875, 3546.875, 3550.78125, 3554.6875,
    3558.59375, 3562.5, 3566.40625, 3570.3125, 3574.21875, 3578.125,
    3582.03125, 3585.9375, 3589.84375, 3593.75, 3597.65625, 3601.5625,
    3605.46875, 3609.375, 3613.28125, 3617.1875, 3621.09375, 3625.0,
    3628.90625, 3632.8125, 3636.71875, 3640.625, 3644.53125, 3648.4375,
    3652.34375, 3656.25, 3660.15625, 3664.0625, 3667.96875, 3671.875,
    3675.78125, 3679.6875, 3683.59375, 3687.5, 3691.40625, 3695.3125,
    3699.21875, 3703.125, 3707.03125, 3710.9375, 3714.84375, 3718.75,
    3722.65625, 3726.5625, 3730.46875, 3734.375, 3738.28125, 3742.1875,
    3746.09375
]
frequency_strength_thr = 1e2
times_of_bins = lambda hm_steps: frames_to_time(range(
    0, hm_steps), sample_rate, fft_hop_len, fft_bins)

zscore_scale = True
minmax_scale = False
log_scale = False

data_path = 'data'
dev_ratio = 0

## model params

timestep_size = len(frequencies_to_pick)

in_size = timestep_size
out_size = timestep_size
예제 #6
0
def frame_level_predict(model_name, filename, cache=True, plot=False):
    """
    Predict Voice Activity Regions at a Frame Level for a given song.
    For each frame of the MFCC a Voice Detection Probability is predicted, then the output have shape: (n_frames, 1)

    :param model_name: name of the trained model
    :param filename:  path to the music file to be predicted
    :param cache: flag to optimize heavy operations with caching in disk
    :param plot: flag to plot MFCCs and SVD in an aligned plot if GUI available.
    :return: (Time, Predictions): SVD probabilities at frame level with time markings
    """
    audio_name = filename.parts[-1]
    audio_name_prefix = '.'.join(filename.parts[:-1])
    serialized_filename = PREDICTIONS_DIR / '{}.{}.{}.csv'.format(
        audio_name_prefix, audio_name, model_name)
    mel = process_single_audio(filename, cache=cache)

    try:
        if not cache:
            raise IOError
        data = np.loadtxt(serialized_filename, delimiter=',')
        time = data[0]
        frame_level_y_pred = data[1]
        print("info: loaded serialized prediction")
    except Exception:

        # transform raw predictions to frame level
        y_pred = predict_song(model_name, filename, cache=cache)
        aligned_y_pred = [[] for _ in range(mel.shape[1])]
        for first_frame_idx, window_prediction in enumerate(y_pred):
            # for each prediction
            for offset, frame_prediction in enumerate(window_prediction):
                # accumulate overlapped predictions in a list
                aligned_y_pred[first_frame_idx + offset].append(
                    frame_prediction[0])

        frame_level_y_pred = []
        for _, predictions in enumerate(aligned_y_pred[:-1]):
            # reduce the overlapped predictions to a single value
            frame_level_y_pred.append(min(predictions))

        time = frames_to_time(range(len(frame_level_y_pred)),
                              sr=SR_HPSS,
                              n_fft=N_FFT_HPSS_2,
                              hop_length=N_HOP_HPSS_2)
        np.savetxt(serialized_filename,
                   np.asarray((time, frame_level_y_pred)),
                   delimiter=",")
        print("info: saved serialized prediction")
    if plot:
        import matplotlib.pyplot as plt
        import librosa.display

        # plot stacked MFCCs
        plt.figure(figsize=(14, 5))
        plt.subplot(211)
        librosa.display.specshow(mel,
                                 sr=SR_HPSS,
                                 x_axis='time',
                                 y_axis='hz',
                                 hop_length=N_HOP_HPSS_2)

        # plot frame level predictions
        plt.subplot(313)
        plt.plot(time, frame_level_y_pred)
        plt.xlabel("Time")
        plt.ylabel("Singing Voice Activation")
        plt.show()
        print("info: plotted")
    print('info: done')
    return time, frame_level_y_pred