Example #1
0
def compute_ssm(wav_file, h, ssm_read_pk, is_ismir=False, tonnetz=False):
    """Computes the self similarity matrix from an audio file.

    Parameters
    ----------
    wav_file: str
        Path to the wav file to be read.
    h : float
        Hop size.
    ssm_read_pk : bool
        Whether to read the ssm from a pickle file or not (note: this function
        utomatically saves the ssm in a pickle file).
    is_ismir : bool
        Produce the plots that appear on the ISMIR paper.
    tonnetz : bool
        Compute tonnetz instead of Chroma features.

    Returns
    -------
    X : np.array((N, N))
        Self-similarity matrix
    """
    if not ssm_read_pk:
        # Read WAV file
        logging.info("Reading the WAV file...")
        C = utils.compute_audio_chromagram(wav_file, h)
        C = utils.median_filter(C, L=9)

        if is_ismir:
            ismir.plot_chroma(C)

        # Compute Tonnetz if needed
        F = C
        if tonnetz:
            F = utils.chroma_to_tonnetz(C)

        # Compute the self similarity matrix
        logging.info("Computing key-invariant self-similarity matrix...")
        X = utils.compute_key_inv_ssm(F, h)

        #plt.imshow(X, interpolation="nearest", aspect="auto")
        #plt.show()

        utils.write_cPickle(wav_file + "-audio-ssm.pk", X)
    else:
        X = utils.read_cPickle(wav_file + "-audio-ssm.pk")

    if is_ismir:
        #X = X**2.5
        ismir.plot_ssm(X)
        ismir.plot_score_examples(X)

    return X
Example #2
0
def process(wav_file, outfile, csv_file=None, bpm=None, tol=0.35,
            ssm_read_pk=False, read_pk=False, rho=2, is_ismir=False,
            tonnetz=False, sonify=False):
    """Main process to find the patterns in a polyphonic audio file.

    Parameters
    ----------
    wav_file : str
        Path to the wav file to be analyzed.
    csv_file : str
        Path to the csv containing the midi_score of the input audio file
        (needed to produce a result that can be read for JKU dataset).
    outfile : str
        Path to file to save the results.
    bpm : int
        Beats per minute of the piece. If None, bpms are read from the JKU.
    tol : float
        Tolerance to find the segments in the SSM.
    ssm_read_pk : bool
        Whether to read the SSM from a pickle file.
    read_pk : bool
        Whether to read the segments from a pickle file.
    rho : int
        Positive integer to compute the score of the segments.
    is_ismir : bool
        Produce the plots that appear on the ISMIR paper.
    tonnetz : bool
        Whether to use Tonnetz or Chromas.
    sonify : bool
        Whether to sonify the patterns or not.
    """

    # Get the correct bpm if needed
    if bpm is None:
        bpm = get_bpm(wav_file)

    # Algorithm parameters
    min_notes = 8
    max_diff_notes = 4
    h = bpm / 60. / 8.  # Hop size /8 works better than /4, but it takes longer
                        # to process

    # Obtain the Self Similarity Matrix
    X = compute_ssm(wav_file, h, ssm_read_pk, is_ismir, tonnetz)

    # Read CSV file
    if csv_file is not None:
        logging.info("Reading the CSV file for MIDI pitches...")
        midi_score = utils.read_csv(csv_file)

    patterns = []
    csv_patterns = []
    while patterns == [] or csv_patterns == []:
        # Find the segments inside the self similarity matrix
        logging.info("Finding segments in the self-similarity matrix...")
        max_diff = int(max_diff_notes / float(h))
        min_dur = int(np.ceil(min_notes / float(h)))
        #print min_dur, min_notes, h, max_diff
        if not read_pk:
            segments = []
            while segments == []:
                logging.info("\ttrying tolerance %.2f" % tol)
                segments = utils.find_segments(X, min_dur, th=tol, rho=rho)
                tol -= 0.05
            utils.write_cPickle(wav_file + "-audio.pk", segments)
        else:
            segments = utils.read_cPickle(wav_file + "-audio.pk")

        # Obtain the patterns from the segments and split them if needed
        logging.info("Obtaining the patterns from the segments...")
        patterns = obtain_patterns(segments, max_diff)

        # Decrease tolerance in case we couldn't find any patterns
        tol -= 0.05

        # Get the csv patterns if they exist
        if csv_file is not None:
            csv_patterns = patterns_to_csv(patterns, midi_score, h)
        else:
            csv_patterns = [0]

    # Sonify patterns if needed
    if sonify:
        logging.info("Sonifying Patterns...")

        utils.sonify_patterns(wav_file, patterns, h)

    # Formatting csv patterns and save results
    if csv_file is not None:
        logging.info("Writting results into %s" % outfile)
        utils.save_results(csv_patterns, outfile=outfile)
    else:
        # If not csv, just print the results on the screen
        print_patterns(patterns, h)

    if is_ismir:
        ismir.plot_segments(X, segments)

    # Alright, we're done :D
    logging.info("Algorithm finished.")