def score(data, hmm_depth=3, cache=None, obs=1000, smooth=True, check_len=True):
    song = data if isinstance(data, list) else utils.parse_song(data)
    song = utils.trim_song(song, length=2500)
    song_len = len(song[0])

    # I don't know of any reliable way to normalize the log-likelyhood
    # for different length probabilities, so we're just going to limit
    # things to a fixed number of observations to normalize the lenght
    # of observations per file, intestead of normalizing the probabilities
    # of different length songs
    if check_len and song_len < obs:
        print " !! %s is too short (%d)" % (data, song_len)
        return None

    scores = []

    for x in range(0, obs):
        frame = utils.sized_observation_from_index(song, start=x, length=hmm_depth)
        frame_obs = frame.split(".")
        top_frame = frame_obs[-1].split("|")
        song_chunk = [[], [], []]
        for a_frame in frame_obs[:-1]:
            note_1, note_2, note_3 = a_frame.split("|")
            song_chunk[0].append(note_1)
            song_chunk[1].append(note_2)
            song_chunk[2].append(note_3)
        scores.append(score_transition(song_chunk, top_frame, smooth, cache))

    return sum(scores)
def train_on_files(files, max_hmm_order=8):

    for a_file in files:
        if has_file_been_recorded(a_file):
            print "Not recalculating counts for %s" % (a_file,)
            continue
        else:
            print "Beginning to calculate counts for %s" % (a_file,)
            record_file(a_file)

        song = parse_song(a_file)
        song = trim_song(song, length=2500)
        song_len = len(song[0])

        if song_len < 10:
            print "Song is too short for consideration.  May be a sound effect or something trivial.  Ignoring."
            continue

        record_obs('S|S|S')
        for x in range(0, song_len):
            for y in range(0, max_hmm_order + 1):
                if y > 0:
                    frame = sized_observation_from_index(song, start=x, length=y)
                    record_obs(frame)
                else:
                    frame = serialize_observation(song, x)
        commit()
        print "finished calculating counts from %s" % (a_file,)
def train_on_files(files):

    for a_file in files:
        if has_file_been_recorded(a_file):
            print "Not recalculating counts for %s" % (a_file,)
            continue
        else:
            print "Beginning to calculate counts for %s" % (a_file,)
            record_file(a_file)

        song = parse_song(a_file)
        song = trim_song(song, length=2500)
        song_len = len(song[0])

        if song_len < 100:
            print "Song is too short for consideration.  May be a sound effect or something trivial.  Ignoring."
            continue

        for x in range(0, song_len):
            frame = sized_observation_from_index(song, start=x, length=2)
            frame_1, frame_2 = frame.split(".")
            frame_1_chan_1, frame_1_chan_2, frame_1_chan_3 = frame_1.split("|")
            frame_2_chan_1, frame_2_chan_2, frame_2_chan_3 = frame_2.split("|")

            # We treat channel one as the most important one, so first
            # record the counts of all channels in frame 1 transitioning
            # into channel 1 in frame 2
            record_cross_obs_count(frame_1_chan_1, 1, frame_2_chan_1, 1)
            record_cross_obs_count(frame_1_chan_2, 2, frame_2_chan_1, 1)
            record_cross_obs_count(frame_1_chan_3, 3, frame_2_chan_1, 1)

            # Now also record the remaining channels trainsitioning to
            # themselves in the next channel
            record_cross_obs_count(frame_1_chan_2, 2, frame_2_chan_2, 2)
            record_cross_obs_count(frame_1_chan_3, 3, frame_2_chan_3, 3)

            # Last, record the transitioning from the assumed "melody" in the
            # new frame to the assumed "background" channels in the new
            # frame
            record_inner_obs_count(frame_2_chan_1, frame_2_chan_2, 2)
            record_inner_obs_count(frame_2_chan_1, frame_2_chan_3, 3)

        commit()
        print "finished calculating counts from %s" % (a_file,)
def score(data, cache=None, obs=1000, smooth=False, check_len=True):
    song = data if isinstance(data, list) else utils.parse_song(data)
    song = utils.trim_song(song, length=2500)
    song_len = len(song[0])

    # I don't know of any reliable way to normalize the log-likelyhood
    # for different length probabilities, so we're just going to limit
    # things to a fixed number of observations to normalize the lenght
    # of observations per file, intestead of normalizing the probabilities
    # of different length songs
    if check_len and song_len < obs:
        print " !! %s is too short (%d)" % (data, song_len)
        return None

    numerator_smooth, denom_smooth = (1, 128) if smooth else (0, 0)

    scores = []

    for x in range(0, obs):
        frame = utils.sized_observation_from_index(song, start=x, length=2)
        frame_1, frame_2 = frame.split(".")
        scores.append(score_transition(frame_1.split("|"), frame_2.split("|"), smooth))

    return sum(scores)
def get_begin_of_song(relative_file_path, depth):
    song = utils.parse_song(relative_file_path)
    song = utils.trim_song(song, length=2500)
    begin = [channel[:depth] for channel in song]
    return begin
import os
import midi
import utils
from utils import trim_song, parse_song, sized_observation_from_index

data_dir = os.path.join("data", "training_songs")
training_files = []
max_pitch = 0
min_pitch = 100
for root, dirs, files in os.walk(data_dir):
    for name in [a_file for a_file in files if a_file[-4:] == ".mid"]:
        relative_path = os.path.join(root, name)
        print relative_path
        song = parse_song(relative_path)
        song = trim_song(song, length=2500)
        song_len = len(song[0])

        for x in range(0, song_len):
            frame = sized_observation_from_index(song, start=x, length=1)
            current_observation = [str(chan_data[x]) for chan_data in song.values()]
            for i in current_observation:
                print i
                if i > max_pitch:
                    max_pitch = i
                if i < min_pitch:
                    print i
                    min_pitch = i

print max_pitch
print min_pitch