def debug_from_song_file(connect, h5path, verbose=0): """ Slow debugging function that takes a h5 file, reads the info, check the match with musicbrainz db, prints out the result. Only prints when we dont get exact match! RETURN counts of how many files we filled for years, tags """ import hdf5_utils as HDF5 import hdf5_getters as GETTERS h5 = HDF5.open_h5_file_read(h5path) title = GETTERS.get_title(h5) release = GETTERS.get_release(h5) artist = GETTERS.get_artist_name(h5) ambid = GETTERS.get_artist_mbid(h5) h5.close() # mbid gotmbid = 1 if ambid == '': gotmbid = 0 if verbose > 0: print('no mb id for:', artist) # year year = find_year_safemode(connect, ambid, title, release, artist) gotyear = 1 if year > 0 else 0 if verbose > 0: print('no years for:', artist, '|', release, '|', title) # tags tags, counts = get_artist_tags(connect, ambid) gottags = 1 if len(tags) > 0 else 0 if gottags == 0 and verbose > 0: print('no tags for:', artist) # return indicator for mbid, year, tag return gotmbid, gotyear, gottags
def debug_from_song_file(connect,h5path,verbose=0): """ Slow debugging function that takes a h5 file, reads the info, check the match with musicbrainz db, prints out the result. Only prints when we dont get exact match! RETURN counts of how many files we filled for years, tags """ import hdf5_utils as HDF5 import hdf5_getters as GETTERS h5 = HDF5.open_h5_file_read(h5path) title = GETTERS.get_title(h5) release = GETTERS.get_release(h5) artist = GETTERS.get_artist_name(h5) ambid = GETTERS.get_artist_mbid(h5) h5.close() # mbid gotmbid=1 if ambid=='': gotmbid = 0 if verbose>0: print('no mb id for:',artist) # year year = find_year_safemode(connect,ambid,title,release,artist) gotyear = 1 if year > 0 else 0 if verbose>0: print('no years for:',artist,'|',release,'|',title) # tags tags,counts = get_artist_tags(connect,ambid) gottags = 1 if len(tags) > 0 else 0 if gottags == 0 and verbose>0: print('no tags for:',artist) # return indicator for mbid, year, tag return gotmbid,gotyear,gottags
def getURLFromH5(h5path): if not os.path.isfile(h5path): print 'invalid path (not a file):', h5path sys.exit(0) h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview return preview sys.exit(0)
def getURLFromH5(h5path): if not os.path.isfile(h5path): print 'invalid path (not a file):',h5path sys.exit(0) h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! print "Suggested Song URLs For you" print "===========================" if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview return preview sys.exit(0)
def getData(starting_point): starting = starting_point * 10000 files = glob.glob('/mnt/snap/data/*/*/*/*.h5') file_one_round = files[starting:starting + 10000] artist_ids = [] song_beats_persecond = [] song_duration = [] song_end_fade_in = [] song_start_fade_out = [] song_key = [] song_loudness = [] song_segments_loudness_max = [] song_segments_loudness_min = [] song_segments_loudness_med = [] song_segments_loudness_time_max = [] song_segments_loudness_time_min = [] song_segments_loudness_time_med = [] song_mode = [] song_sections_start = [] song_pitches = [] song_timbre = [] song_tempo = [] song_time_signature = [] song_title = [] artist_name = [] year = [] idx = np.triu_indices(12) #count = 1 for f in file_one_round: h5 = HDF5.open_h5_file_read(f) songYear = g.get_year(h5) if songYear < 1990: continue artist_id = g.get_artist_id(h5) song_beat = (g.get_beats_start(h5)).tolist() songDuration = g.get_duration(h5) song_beat_persecond = float(len(song_beat)) / songDuration song_end_fadein = g.get_end_of_fade_in(h5) song_start_fadeout = g.get_start_of_fade_out(h5) songKey = g.get_key(h5) songLoudness = g.get_loudness(h5) song_loudness_max = (g.get_segments_loudness_max(h5)) // 10 song_loudness_antilog = np.power(10, song_loudness_max) song_segmentsLoudness_max = np.amax(song_loudness_antilog) song_segmentsLoudness_min = np.amin(song_loudness_antilog) song_segmentsLoudness_med = np.median(song_loudness_antilog) song_segmentsLoudness_max_time = ( g.get_segments_loudness_max_time(h5)).tolist() song_loudness_time = np.multiply(song_loudness_antilog, song_segmentsLoudness_max_time) song_segmentsLoudnessTime_max = np.amax(song_loudness_time) song_segmentsLoudnessTime_min = np.amin(song_loudness_time) song_segmentsLoudnessTime_med = np.median(song_loudness_time) songMode = g.get_mode(h5) song_sectionsStart = (g.get_sections_start(h5)).tolist() songPitches = g.get_segments_pitches(h5) songPitches_cov = np.cov(songPitches, rowvar=False) songPitches_mean = np.mean(songPitches, axis=0) #print(songPitches_cov.shape) songTimbre = g.get_segments_timbre(h5) songTimbre_cov = np.cov(songTimbre, rowvar=False) songTimbre_mean = np.mean(songTimbre, axis=0) #print(songTimbre_cov.shape) songTempo = g.get_tempo(h5) songTime_signature = g.get_time_signature(h5) songTitle = g.get_title(h5) artistName = g.get_artist_name(h5) artist_ids.append(artist_id) song_beats_persecond.append(song_beat_persecond) song_duration.append(songDuration) song_end_fade_in.append(song_end_fadein) song_start_fade_out.append(song_start_fadeout) song_key.append(songKey) song_loudness.append(songLoudness) song_segments_loudness_max.append(song_segmentsLoudness_max) song_segments_loudness_min.append(song_segmentsLoudness_min) song_segments_loudness_med.append(song_segmentsLoudness_med) song_segments_loudness_time_max.append(song_segmentsLoudnessTime_max) song_segments_loudness_time_min.append(song_segmentsLoudnessTime_min) song_segments_loudness_time_med.append(song_segmentsLoudnessTime_med) song_mode.append(songMode) song_sections_start.append(song_sectionsStart) pitches_mean_cov = (songPitches_cov[idx]).tolist() pitches_mean_cov.extend((songPitches_mean).tolist()) song_pitches.append(pitches_mean_cov) timbre_mean_cov = (songTimbre_cov[idx]).tolist() timbre_mean_cov.extend((songTimbre_mean).tolist()) song_timbre.append(timbre_mean_cov) song_tempo.append(songTempo) song_time_signature.append(songTime_signature) song_title.append(songTitle) artist_name.append(artistName) year.append(songYear) #print(count) #count = count + 1 h5.close() #def createDictsFrom2DArray(dictionary, colName, featureList): # for i in range(0,12): # dictionary[colName+str(i)] = featureList[i] #i = 1 #for t in itertools.izip_longest(*featureList): # dictionary[colName+str(i)] = t # i = i + 1 # return dictionary data = collections.OrderedDict() data['year'] = year data['artist_name'] = artist_name data['artist_id'] = artist_ids data['song_title'] = song_title data['song_beats_persecond'] = song_beats_persecond data['song_duration'] = song_duration data['song_end_fade_in'] = song_end_fade_in data['song_start_fade_out'] = song_start_fade_out data['song_key'] = song_key data['song_loudness'] = song_loudness data['song_loudness_max'] = song_segments_loudness_max data['song_loudness_min'] = song_segments_loudness_min data['song_loudness_med'] = song_segments_loudness_med data['song_loudness_time_max'] = song_segments_loudness_time_max data['song_loudness_time_min'] = song_segments_loudness_time_min data['song_loudness_time_med'] = song_segments_loudness_time_med data['song_mode'] = song_mode data['song_tempo'] = song_tempo data['song_time_signature'] = song_time_signature data = createDictsFrom1DArray(data, 'pitches', song_pitches) data = createDictsFrom1DArray(data, 'timbre', song_timbre) data = createDictsFrom1DArray(data, 'sections_start', song_sections_start) df = pd.DataFrame(data) print('before return ' + str(starting_point)) return df
h5path = sys.argv[1] # sanity checks if DIGITAL7_API_KEY is None: print 'You need to set a 7digital API key!' print 'Get one at: http://developer.7digital.net/' print 'Pass it as a flag: -7digitalkey KEY' print 'or set it under environment variable: DIGITAL7_API_KEY' sys.exit(0) if not os.path.isfile(h5path): print 'invalid path (not a file):',h5path sys.exit(0) # open h5 song, get all we know about the song h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview
# params h5path = sys.argv[1] # sanity checks if DIGITAL7_API_KEY is None: print 'You need to set a 7digital API key!' print 'Get one at: http://developer.7digital.net/' print 'Pass it as a flag: -7digitalkey KEY' print 'or set it under environment variable: DIGITAL7_API_KEY' sys.exit(0) if not os.path.isfile(h5path): print 'invalid path (not a file):', h5path sys.exit(0) # open h5 song, get all we know about the song h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview
t1 = time.time() # create hash tables hash_table_terms = [None] * NUMBUCKETS hash_table_mbtags = [None] * NUMBUCKETS for k in range(NUMBUCKETS): hash_table_terms[k] = set() hash_table_mbtags[k] = set() # iterate HDF5 files cnt_files = 0 if artistfile == '': for root, dirs, files in os.walk(maindir): files = glob.glob(os.path.join(root,'*.h5')) for f in files : h5 = hdf5_utils.open_h5_file_read(f) terms = get_artist_terms(h5) mbtags = get_artist_mbtags(h5) h5.close() # iterate over terms for t in terms: put_term_in_hash_table(hash_table_terms,t) for t in mbtags: put_term_in_hash_table(hash_table_mbtags,t) cnt_files += 1 else: f = open(artistfile,'r') trackids = [] for line in f.xreadlines(): if line == '' or line.strip() == '': continue
t1 = time.time() # create hash tables hash_table_terms = [None] * NUMBUCKETS hash_table_mbtags = [None] * NUMBUCKETS for k in range(NUMBUCKETS): hash_table_terms[k] = set() hash_table_mbtags[k] = set() # iterate HDF5 files cnt_files = 0 if artistfile == "": for root, dirs, files in os.walk(maindir): files = glob.glob(os.path.join(root, "*.h5")) for f in files: h5 = hdf5_utils.open_h5_file_read(f) terms = get_artist_terms(h5) mbtags = get_artist_mbtags(h5) h5.close() # iterate over terms for t in terms: put_term_in_hash_table(hash_table_terms, t) for t in mbtags: put_term_in_hash_table(hash_table_mbtags, t) cnt_files += 1 else: f = open(artistfile, "r") trackids = [] for line in f.xreadlines(): if line == "" or line.strip() == "": continue