def getclusters(basedir,ext='.h5') : print 'inside clusters' features = [] cfeatures = [] decadecount = defaultdict(int) deccount = defaultdict(int) i=0 for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: h5 = hdf5_getters.open_h5_file_read(f) year = hdf5_getters.get_year(h5) if year == 0: h5.close() continue bins = getbin(year) # decade.append(bin) # dec = (year/10)*10 # decadecount[dec] += 1 # if dec < 1960: # h5.close() # continue deccount[bin] += 1 if decadecount[bins] > cap: flag = checkforcompletion(decadecount) h5.close() if flag: for dec in decadecount.keys(): print 'Dec : ' + str(dec) + ' Count : ' + str(decadecount[dec]) return features,cfeatures continue i += 1 print i try: bttimbre = bt.get_bttimbre(h5) btT = bttimbre.T for x in btT: features.append(x) decadecount[bins] += 1 btchroma = bt.get_btchromas(h5) btc = btchroma.T for x in btc: cfeatures.append(x) decadecount[bins] += 1 except: h5.close() continue h5.close() for dec in deccount.keys(): print 'Dec : ' + str(dec) + ' Count : ' + str(decadecount[dec]) features = array(features) cfeatures = array(cfeatures) return features,cfeatures
def beat_aligned_chroma(song_id, file_name) : btchromas = beat_aligned_feats.get_btchromas(file_name) if btchromas is None : return [], None btchromas = btchromas.T rows = [] for i in range(btchromas.shape[0]) : vals = {'song_id': song_id, 'beat_number': i} for j in range(12) : vals[pitch_schema[j]] = btchromas[i][j] rows.append(vals) return rows, btchromas
# sanity checks if not os.path.isfile(songfile): print 'ERROR: %s does not exist.' % songfile sys.exit(0) # tbm path, import stuff import beat_aligned_feats as BAF import pylab as P import warnings warnings.filterwarnings('ignore', category=DeprecationWarning) # get chroma btchroma = BAF.get_btchromas_loudness(songfile) btchroma_db = np.log10(BAF.get_btchromas_loudness(songfile)) * 20. btchroma_normal = BAF.get_btchromas(songfile) # get landmarks landmarks = get_landmarks(btchroma, decay=decay) landmarks_normal = get_landmarks(btchroma_normal, decay=decay) # plot pargs = {'aspect': 'auto', 'cmap': P.cm.gray_r, 'interpolation': 'nearest', 'origin': 'lower'} P.subplot(4, 1, 1) P.imshow(btchroma, **pargs) P.subplot(4, 1, 2) P.imshow(landmarks_normal, **pargs)
def buildfeatures(basedir,cluster,ccluster,ext='.h5'): global cap i = 0 features = [] decade = [] decadecount = defaultdict(int) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: h5 = hdf5_getters.open_h5_file_read(f) year = hdf5_getters.get_year(h5) if year == 0: h5.close() continue bins = getbin(year) # dec = (year/10)*10 # if dec < 1960: # h5.close() # continue if decadecount[bins] > cap: flag = checkforcompletion(decadecount) h5.close() if flag: return features,decade continue i += 1 print i clustercount = {} cclustercount = {} for x in range(50): clustercount[x] = 0 cclustercount[x] = 0 feature = [] cfeature =[] try: bttimbre = bt.get_bttimbre(h5) btT = bttimbre.T for x in btT: label = kmeans.predict(x) clustercount[label[0]] += 1 for cl in clustercount.keys(): feature.append(clustercount[cl]) btchroma = bt.get_btchromas(h5) btc = btchroma.T for y in btc : clabel = ckmeans.predict(y) cclustercount[clabel[0]] +=1 for cl in cclustercount.keys(): feature.append(cclustercount[cl]) features.append(feature) decade.append(bins) decadecount[bins] += 1 except: h5.close() continue h5.close() print len(features) print len(decade) return features,decade
# sanity checks if not os.path.isfile(songfile): print 'ERROR: %s does not exist.' % songfile sys.exit(0) # tbm path, import stuff import beat_aligned_feats as BAF import pylab as P import warnings warnings.filterwarnings('ignore', category=DeprecationWarning) # get chroma btchroma = BAF.get_btchromas_loudness(songfile) btchroma_db = np.log10(BAF.get_btchromas_loudness(songfile)) * 20. btchroma_normal = BAF.get_btchromas(songfile) # get landmarks landmarks = get_landmarks(btchroma, decay=decay) landmarks_normal = get_landmarks(btchroma_normal, decay=decay) # plot pargs = { 'aspect': 'auto', 'cmap': P.cm.gray_r, 'interpolation': 'nearest', 'origin': 'lower' } P.subplot(4, 1, 1) P.imshow(btchroma, **pargs) P.subplot(4, 1, 2)
def get_all_titles(basedir,ext='.h5') : global errorcount global count global cap global truecount features = [] decade = [] decadecount = defaultdict(int) timbre = None i = 0 for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: feature = [] try: h5 = hdf5_getters.open_h5_file_read(f) except HDF5ExtError as e: errorcount += 1 print "Unexpected error:", sys.exc_info()[0] print traceback.format_exc() continue year = hdf5_getters.get_year(h5) print i i+=1 if year == 0: h5.close() continue label = getbin(year) #label = (year/10)*10 truecount[label] += 1 if decadecount[label] > cap: flag = checkforcompletion(decadecount) h5.close() if flag: for dec in decadecount.keys(): print 'Decade : ' + str(dec) + ' Count : ' + str(decadecount[dec]) return features,decade continue # dec = (year/10)*10 # if dec < 1960: # h5.close() # continue # try: # # bttimbre = bt.get_bttimbre(h5) # timbres = bttimbre.argmax(axis = 0) + 1 # Is a vector of timbre values sutiable for training an HMM # for timbre in timbres: # timbredict[timbre] += 1 # for i in range(1,13): # feature.append(timbredict[i]) # except: # h5.close() # continue # clustercount = {} # for x in range(12): # clustercount[x] = 0 # try: # bttimbre = bt.get_bttimbre(h5) # btT = bttimbre.T # for x in btT: # timbre = x.argmax(axis = 0) # clustercount[timbre]+=1 # except: # h5.close() # continue # for y in range(12): # features.append(clustercount[y]) try: btchromas = bt.get_btchromas(h5) for chroma in btchromas: feature.append(mean(chroma)) covmat = get_covariance(btchromas) feature.extend(covmat) bttimbre = bt.get_bttimbre(h5) for timbre in bttimbre: feature.append(mean(timbre)) covmat = get_covariance(bttimbre) feature.extend(covmat) # btT = bttimbre.T # for x in btT: # timbre = x.argmax(axis = 0) # clustercount[timbre]+=1 # for y in range(12): # feature.append(clustercount[y]) except: errorcount += 1 h5.close() continue loudness = hdf5_getters.get_loudness(h5) feature.append(loudness) duration = hdf5_getters.get_duration(h5) feature.append(duration) features.append(feature) decade.append(label) decadecount[label] += 1 count += 1 h5.close() # title = hdf5_getters.get_title(h5) # segstarts = hdf5_getters.get_segments_start(h5) # segstarts = np.array(segstarts).flatten() # btstarts = hdf5_getters.get_beats_start(h5) # btstarts = np.array(btstarts).flatten() for dec in decadecount.keys(): print 'Decade : ' + str(dec) + ' Count : ' + str(decadecount[dec]) return features,decade
# Load in list of files which were aligned correctly, and the start/end times of the good alignment files, start_times, end_times = load_results(tsv_path) for filename, start_time, end_time in zip(files, start_times, end_times): # Load in MSD hdf5 file h5 = hdf5_getters.open_h5_file_read(to_h5_path(filename)) # Load in beat times from MSD beats = hdf5_getters.get_beats_start(h5) # Some files have no EN analysis if beats.size == 0: continue # Get indices which fall within the range of correct alignment time_mask = np.logical_and(beats > start_time, beats < end_time) beats = beats[time_mask] # and beat-synchronous feature matrices, within the time range of correct alignment chroma = beat_aligned_feats.get_btchromas(h5)[:, time_mask] timbre = beat_aligned_feats.get_bttimbre(h5)[:, time_mask] loudness = beat_aligned_feats.get_btloudnessmax(h5)[:, time_mask] h5.close() # Stack it msd_features = np.vstack([chroma, timbre, loudness]) if np.isnan(msd_features).any(): print filename continue # Load in pretty midi object pm = pretty_midi.PrettyMIDI(midi.read_midifile(to_midi_path(filename))) # Construct piano roll, aligned to the msd beat times piano_roll = pm.get_piano_roll(times=beats) # Ignore notes below 36 and above 84 piano_roll = piano_roll[36:84, :] # Write out