Python frames_to_time 예제들, librosa.core.frames_to_time Python 예제들

예제 #1

0

파일 보기

def process_csv_data(i, file_path, cqt_length):
    print('[{}] Processing CSV data...'.format(i))
    result = np.array([])
    with open(file_path) as csvfile:
        reader = csv.reader(csvfile, delimiter='\t')
        # Skip header
        next(reader)
        row_count = sum(1 for row in reader)

        sets = np.zeros([row_count, 3])

        csvfile.seek(0)
        next(reader)
        for i, row in enumerate(reader):
            if len(row) != 3:
                continue

            onset = row[0]
            offset = row[1]
            midi_pitch = row[2]

            sets[i] = [onset, offset, midi_pitch]

        result = np.zeros([PITCH_RANGE * 2, cqt_length])
        time_stamps = frames_to_time(range(0, cqt_length + 1),
                                     sr=TARGET_SAMPLE_RATE,
                                     hop_length=HOP_LENGTH)
        process_csv_data_jit(sets, time_stamps, cqt_length, result)

    return result

예제 #2

0

파일 보기

파일: controller.py 프로젝트: vitorshinohara/musicSignalsVisualization

def index():

    form = Params(request.form)

    # if user submit the form
    if (request.method == 'POST' and form.validate()):
        # Get data from form
        extratorAlgorithm, filename, centroids_number, musicPath = getFormData(
        )

        # Extract features, then normalize
        matrix = getFeatureMatrix(extratorAlgorithm, filename)
        matrix_norm = normalize(matrix)

        # Get the centroids and calculate the distance to all frames
        centroids, distancesCentroid = getCentroids(matrix_norm,
                                                    centroids_number)
        distancesCentroid = np.array(distancesCentroid)

        # Define the number of closest frames from centroids to get
        centroidClosestPoints = 5

        # Get the ID of frames closest to each centroid
        closest_frames_idx = distancesCentroid.argsort(axis=0).T.tolist()
        centroid_frame_counts = np.bincount(
            np.argmin(distancesCentroid, axis=1))

        # Get the time from each frame
        tempos = frames_to_time(closest_frames_idx,
                                sr=44100,
                                hop_length=1024,
                                n_fft=2048)
        tempos = tempos.tolist()

        # Apply some transformations
        pca = reduceDimensionality(matrix_norm)
        matrix_norm = pca.transform(matrix_norm)
        centroids = pca.transform(centroids)

        # Generate and encode graph to send to view
        graph = Graph(
            matrix_norm,
            centroids,
            filename,
            centroid_frame_count=centroid_frame_counts).generateGraph()
        print tempos
        # Send all structures to the view
        return render_template('index.html',
                               graph=graph,
                               form=form,
                               musicPath=musicPath,
                               tempos=tempos)

    # Generate an empty graphic
    graph = Graph().generateGraph()
    return render_template('index.html', graph=graph, form=form)

예제 #3

0

파일 보기

def __coord_time(n, sr=22050, hop_length=512, **_kwargs):
    """Get time coordinates from frames"""
    return core.frames_to_time(np.arange(n + 1), sr=sr, hop_length=hop_length)

예제 #4

0

파일 보기

def onset_detect(
    y=None,
    sr=22050,
    onset_envelope=None,
    hop_length=512,
    backtrack=False,
    energy=None,
    units="frames",
    normalize=True,
    **kwargs
):
    """Basic onset detector.  Locate note onset events by picking peaks in an
    onset strength envelope. Modified from `librosa.onset.onset_detect` to add a
    `normalize` flag.

    The `peak_pick` parameters were chosen by large-scale hyper-parameter
    optimization over the dataset provided by [1]_.

    .. [1] https://github.com/CPJKU/onset_db


    Parameters
    ----------
    y          : np.ndarray [shape=(n,)]
        audio time series

    sr         : number > 0 [scalar]
        sampling rate of `y`

    onset_envelope     : np.ndarray [shape=(m,)]
        (optional) pre-computed onset strength envelope

    hop_length : int > 0 [scalar]
        hop length (in samples)

    units : {'frames', 'samples', 'time'}
        The units to encode detected onset events in.
        By default, 'frames' are used.

    backtrack : bool
        If `True`, detected onset events are backtracked to the nearest
        preceding minimum of `energy`.

        This is primarily useful when using onsets as slice points for segmentation.

    energy : np.ndarray [shape=(m,)] (optional)
        An energy function to use for backtracking detected onset events.
        If none is provided, then `onset_envelope` is used.

    noramlize : bool (optional)
        If `True`, normalize the onset envelope before peak picking. By default
        this parameter is `True`.

    kwargs : additional keyword arguments
        Additional parameters for peak picking.

        See `librosa.util.peak_pick` for details.


    Returns
    -------

    onsets : np.ndarray [shape=(n_onsets,)]
        estimated positions of detected onsets, in whichever units
        are specified.  By default, frame indices.

        .. note::
            If no onset strength could be detected, onset_detect returns
            an empty list.


    Raises
    ------
    ParameterError
        if neither `y` nor `onsets` are provided

        or if `units` is not one of 'frames', 'samples', or 'time'
    """

    # First, get the frame->beat strength profile if we don't already have one
    if onset_envelope is None:
        if y is None:
            raise ParameterError("y or onset_envelope must be provided")

        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Shift onset envelope up to be non-negative
    # (a common normalization step to make the threshold more consistent)
    onset_envelope -= onset_envelope.min()

    # Do we have any onsets to grab?
    if not onset_envelope.any():
        return np.array([], dtype=np.int)

    if normalize:
        # Normalize onset strength function to [0, 1] range
        onset_envelope /= onset_envelope.max()

    # These parameter settings found by large-scale search
    kwargs.setdefault("pre_max", 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1)  # 0ms
    kwargs.setdefault("pre_avg", 0.10 * sr // hop_length)  # 100ms
    kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1)  # 100ms
    kwargs.setdefault("wait", 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault("delta", 0.07)

    # Peak pick the onset envelope
    onsets = util.peak_pick(onset_envelope, **kwargs)

    # Optionally backtrack the events
    if backtrack:
        if energy is None:
            energy = onset_envelope

        onsets = onset_backtrack(onsets, energy)

    if units == "frames":
        pass
    elif units == "samples":
        onsets = core.frames_to_samples(onsets, hop_length=hop_length)
    elif units == "time":
        onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
    else:
        raise ParameterError("Invalid unit type: {}".format(units))

    return onsets

예제 #5

0

파일 보기

파일: config.py 프로젝트: MultiTrickFox/Wave_Gen3

    3464.84375, 3468.75, 3472.65625, 3476.5625, 3480.46875, 3484.375,
    3488.28125, 3492.1875, 3496.09375, 3500.0, 3503.90625, 3507.8125,
    3511.71875, 3515.625, 3519.53125, 3523.4375, 3527.34375, 3531.25,
    3535.15625, 3539.0625, 3542.96875, 3546.875, 3550.78125, 3554.6875,
    3558.59375, 3562.5, 3566.40625, 3570.3125, 3574.21875, 3578.125,
    3582.03125, 3585.9375, 3589.84375, 3593.75, 3597.65625, 3601.5625,
    3605.46875, 3609.375, 3613.28125, 3617.1875, 3621.09375, 3625.0,
    3628.90625, 3632.8125, 3636.71875, 3640.625, 3644.53125, 3648.4375,
    3652.34375, 3656.25, 3660.15625, 3664.0625, 3667.96875, 3671.875,
    3675.78125, 3679.6875, 3683.59375, 3687.5, 3691.40625, 3695.3125,
    3699.21875, 3703.125, 3707.03125, 3710.9375, 3714.84375, 3718.75,
    3722.65625, 3726.5625, 3730.46875, 3734.375, 3738.28125, 3742.1875,
    3746.09375
]
frequency_strength_thr = 1e2
times_of_bins = lambda hm_steps: frames_to_time(range(
    0, hm_steps), sample_rate, fft_hop_len, fft_bins)

zscore_scale = True
minmax_scale = False
log_scale = False

data_path = 'data'
dev_ratio = 0

## model params

timestep_size = len(frequencies_to_pick)

in_size = timestep_size
out_size = timestep_size

예제 #6

0

파일 보기

def frame_level_predict(model_name, filename, cache=True, plot=False):
    """
    Predict Voice Activity Regions at a Frame Level for a given song.
    For each frame of the MFCC a Voice Detection Probability is predicted, then the output have shape: (n_frames, 1)

    :param model_name: name of the trained model
    :param filename:  path to the music file to be predicted
    :param cache: flag to optimize heavy operations with caching in disk
    :param plot: flag to plot MFCCs and SVD in an aligned plot if GUI available.
    :return: (Time, Predictions): SVD probabilities at frame level with time markings
    """
    audio_name = filename.parts[-1]
    audio_name_prefix = '.'.join(filename.parts[:-1])
    serialized_filename = PREDICTIONS_DIR / '{}.{}.{}.csv'.format(
        audio_name_prefix, audio_name, model_name)
    mel = process_single_audio(filename, cache=cache)

    try:
        if not cache:
            raise IOError
        data = np.loadtxt(serialized_filename, delimiter=',')
        time = data[0]
        frame_level_y_pred = data[1]
        print("info: loaded serialized prediction")
    except Exception:

        # transform raw predictions to frame level
        y_pred = predict_song(model_name, filename, cache=cache)
        aligned_y_pred = [[] for _ in range(mel.shape[1])]
        for first_frame_idx, window_prediction in enumerate(y_pred):
            # for each prediction
            for offset, frame_prediction in enumerate(window_prediction):
                # accumulate overlapped predictions in a list
                aligned_y_pred[first_frame_idx + offset].append(
                    frame_prediction[0])

        frame_level_y_pred = []
        for _, predictions in enumerate(aligned_y_pred[:-1]):
            # reduce the overlapped predictions to a single value
            frame_level_y_pred.append(min(predictions))

        time = frames_to_time(range(len(frame_level_y_pred)),
                              sr=SR_HPSS,
                              n_fft=N_FFT_HPSS_2,
                              hop_length=N_HOP_HPSS_2)
        np.savetxt(serialized_filename,
                   np.asarray((time, frame_level_y_pred)),
                   delimiter=",")
        print("info: saved serialized prediction")
    if plot:
        import matplotlib.pyplot as plt
        import librosa.display

        # plot stacked MFCCs
        plt.figure(figsize=(14, 5))
        plt.subplot(211)
        librosa.display.specshow(mel,
                                 sr=SR_HPSS,
                                 x_axis='time',
                                 y_axis='hz',
                                 hop_length=N_HOP_HPSS_2)

        # plot frame level predictions
        plt.subplot(313)
        plt.plot(time, frame_level_y_pred)
        plt.xlabel("Time")
        plt.ylabel("Singing Voice Activation")
        plt.show()
        print("info: plotted")
    print('info: done')
    return time, frame_level_y_pred