Example #1
0
def match_one_midi(midi_file):
    '''
    Hash and match a single MIDI file against the MSD

    :parameters:
        - midi_file : str
            Path to a MIDI file to match
    '''
    # Get a beat-synchronous piano roll of the MIDI
    pm = pretty_midi.PrettyMIDI(midi_file)
    piano_roll = pm.get_piano_roll(times=pm.get_beats()).T
    piano_roll = piano_roll[np.newaxis, :, 36:84]
    # Make the piano roll look like it does when we trained the hasher
    piano_roll = (piano_roll - train_stats['mean'])/train_stats['std']
    hashed_piano_roll = hash(
        piano_roll[np.newaxis].astype(theano.config.floatX))
    # Compute hash sequence
    query = hash_match.vectors_to_ints(hashed_piano_roll > 0)
    query = query.astype('uint16')
    # Get indices of sequences which are within 40% -> 1/40% of this seq length
    valid_length_indices = hash_match.filter_by_length(query, sequences, .4)
    # Compute MIDI mean chroma vector
    query_chroma = pm.get_chroma().mean(axis=1)
    # Get sequences less than the mean chroma distance
    valid_chroma_indices = hash_match.filter_by_mean_chroma(
        query_chroma, mean_chromas, 20)
    # Intersect to get valid index set
    valid_indices = np.intersect1d(valid_length_indices, valid_chroma_indices)
    # Match the MIDI file query hash list against all sequences
    matches, scores = hash_match.match_one_sequence(
        query, sequences, .9, 4, valid_indices)
    return matches, scores
Example #2
0
def process_one_file(index_entry, base_path='msd', train_stats=train_stats,
                     hash=hash):
    '''
    Hash the features in a single npz file.

    :parameters:
        - index_entry : dict
            Entry in an index with keys 'path', 'artist', and 'title'
        - base_path : str
            Which dataset are we processing?
        - train_stats : dict
            Dict where train_stats['mean'] is the training set mean feature
            and train_stats['std'] is the per-feature std dev
        - hash : theano.function
            Theano function which takes in feature matrix and outputs hashes
    '''
    try:
        npz_file = os.path.join(BASE_DATA_PATH, base_path, 'npz',
                                index_entry['path'] + '.npz')
        output_filename = npz_file.replace('npz', 'pkl')
        if os.path.exists(output_filename):
            return
        features = np.load(npz_file)
        sync_gram = features['sync_gram']
        if sync_gram.shape[0] < 6:
            return
        mean_cqt = sync_gram.mean(axis=0)
        sync_gram = sync_gram[np.newaxis]
        sync_gram = (sync_gram - train_stats['mean'])/train_stats['std']
        if np.isnan(sync_gram).any():
            return
        hashed_features = hash(
            sync_gram[np.newaxis].astype(theano.config.floatX))
        hashes = hash_match.vectors_to_ints(hashed_features > 0)
        hashes = hashes.astype('uint16')
        output_dict = dict([('hash_list', hashes), ('mean_cqt', mean_cqt)],
                           **index_entry)
        if not os.path.exists(os.path.split(output_filename)[0]):
            os.makedirs(os.path.split(output_filename)[0])
        with open(output_filename, 'wb') as f:
            pickle.dump(output_dict, f)
    except Exception as e:
        print "Error creating {}: {}".format(index_entry['path'], e)
        return