def score(data, hmm_depth=3, cache=None, obs=1000, smooth=True, check_len=True): song = data if isinstance(data, list) else utils.parse_song(data) song = utils.trim_song(song, length=2500) song_len = len(song[0]) # I don't know of any reliable way to normalize the log-likelyhood # for different length probabilities, so we're just going to limit # things to a fixed number of observations to normalize the lenght # of observations per file, intestead of normalizing the probabilities # of different length songs if check_len and song_len < obs: print " !! %s is too short (%d)" % (data, song_len) return None scores = [] for x in range(0, obs): frame = utils.sized_observation_from_index(song, start=x, length=hmm_depth) frame_obs = frame.split(".") top_frame = frame_obs[-1].split("|") song_chunk = [[], [], []] for a_frame in frame_obs[:-1]: note_1, note_2, note_3 = a_frame.split("|") song_chunk[0].append(note_1) song_chunk[1].append(note_2) song_chunk[2].append(note_3) scores.append(score_transition(song_chunk, top_frame, smooth, cache)) return sum(scores)
def train_on_files(files, max_hmm_order=8): for a_file in files: if has_file_been_recorded(a_file): print "Not recalculating counts for %s" % (a_file,) continue else: print "Beginning to calculate counts for %s" % (a_file,) record_file(a_file) song = parse_song(a_file) song = trim_song(song, length=2500) song_len = len(song[0]) if song_len < 10: print "Song is too short for consideration. May be a sound effect or something trivial. Ignoring." continue record_obs('S|S|S') for x in range(0, song_len): for y in range(0, max_hmm_order + 1): if y > 0: frame = sized_observation_from_index(song, start=x, length=y) record_obs(frame) else: frame = serialize_observation(song, x) commit() print "finished calculating counts from %s" % (a_file,)
def train_on_files(files): for a_file in files: if has_file_been_recorded(a_file): print "Not recalculating counts for %s" % (a_file,) continue else: print "Beginning to calculate counts for %s" % (a_file,) record_file(a_file) song = parse_song(a_file) song = trim_song(song, length=2500) song_len = len(song[0]) if song_len < 100: print "Song is too short for consideration. May be a sound effect or something trivial. Ignoring." continue for x in range(0, song_len): frame = sized_observation_from_index(song, start=x, length=2) frame_1, frame_2 = frame.split(".") frame_1_chan_1, frame_1_chan_2, frame_1_chan_3 = frame_1.split("|") frame_2_chan_1, frame_2_chan_2, frame_2_chan_3 = frame_2.split("|") # We treat channel one as the most important one, so first # record the counts of all channels in frame 1 transitioning # into channel 1 in frame 2 record_cross_obs_count(frame_1_chan_1, 1, frame_2_chan_1, 1) record_cross_obs_count(frame_1_chan_2, 2, frame_2_chan_1, 1) record_cross_obs_count(frame_1_chan_3, 3, frame_2_chan_1, 1) # Now also record the remaining channels trainsitioning to # themselves in the next channel record_cross_obs_count(frame_1_chan_2, 2, frame_2_chan_2, 2) record_cross_obs_count(frame_1_chan_3, 3, frame_2_chan_3, 3) # Last, record the transitioning from the assumed "melody" in the # new frame to the assumed "background" channels in the new # frame record_inner_obs_count(frame_2_chan_1, frame_2_chan_2, 2) record_inner_obs_count(frame_2_chan_1, frame_2_chan_3, 3) commit() print "finished calculating counts from %s" % (a_file,)
def score(data, cache=None, obs=1000, smooth=False, check_len=True): song = data if isinstance(data, list) else utils.parse_song(data) song = utils.trim_song(song, length=2500) song_len = len(song[0]) # I don't know of any reliable way to normalize the log-likelyhood # for different length probabilities, so we're just going to limit # things to a fixed number of observations to normalize the lenght # of observations per file, intestead of normalizing the probabilities # of different length songs if check_len and song_len < obs: print " !! %s is too short (%d)" % (data, song_len) return None numerator_smooth, denom_smooth = (1, 128) if smooth else (0, 0) scores = [] for x in range(0, obs): frame = utils.sized_observation_from_index(song, start=x, length=2) frame_1, frame_2 = frame.split(".") scores.append(score_transition(frame_1.split("|"), frame_2.split("|"), smooth)) return sum(scores)
def get_begin_of_song(relative_file_path, depth): song = utils.parse_song(relative_file_path) song = utils.trim_song(song, length=2500) begin = [channel[:depth] for channel in song] return begin
import os import midi import utils from utils import trim_song, parse_song, sized_observation_from_index data_dir = os.path.join("data", "training_songs") training_files = [] max_pitch = 0 min_pitch = 100 for root, dirs, files in os.walk(data_dir): for name in [a_file for a_file in files if a_file[-4:] == ".mid"]: relative_path = os.path.join(root, name) print relative_path song = parse_song(relative_path) song = trim_song(song, length=2500) song_len = len(song[0]) for x in range(0, song_len): frame = sized_observation_from_index(song, start=x, length=1) current_observation = [str(chan_data[x]) for chan_data in song.values()] for i in current_observation: print i if i > max_pitch: max_pitch = i if i < min_pitch: print i min_pitch = i print max_pitch print min_pitch