Esempio n. 1
0
  def process_h5_file_info(self, h5):
    """
    This function does 3 simple things:
    - open the song file
    - get info
    - close the file
    """
    try:
      trackId = Getters.get_track_id(h5)
      tags = Getters.get_artist_mbtags(h5);
      timbres_list = Getters.get_segments_timbre(h5)
    except:
      return 0
    if len(tags) == 0:
      return 0

    tag_list = np.zeros(NUMBER_OF_TAGS)
    someSeen = False
    for tag in tags:
      if tag in self.styles.keys():
        tag_list[self.styles[tag]] = 1
        someSeen = True
    if not someSeen:
      return 0
    if len(timbres_list) < 300:
      return 0
    created = 0
    #only take 5 at most
    for i in range(0,min((len(timbres_list)/400),5)*400, 400):
      timbres_list_segment = timbres_list[i:(i + 300), ]
      self.ids_list.append(trackId)
      self.tags_list.append(tag_list)
      self.timbres_list.append(timbres_list_segment)
      print(Getters.get_artist_name() + ": " + Getters.get_title(h5))
      created+=1
    return created
Esempio n. 2
0
OUTDIR = '../data/audio'
wmf_item2i = pickle.load(open('../data/wmf/index_dicts.pkl', 'rb'))['item2i']
track_to_song = pickle.load(open('../data/wmf/track_to_song.pkl', 'rb'))

h5path = '../data/song_metadata/msd_summary_file.h5'

if not os.path.isdir(OUTDIR):
    os.mkdir(OUTDIR)

h5 = hdf5_utils.open_h5_file_read(h5path)
num_songs = GETTERS.get_num_songs(h5)

for i in range(num_songs):
    artist_name = GETTERS.get_artist_name(h5, songidx=i).decode('utf-8')
    track_name = GETTERS.get_title(h5, songidx=i).decode('utf-8')
    track_id = GETTERS.get_track_id(h5, songidx=i).decode('utf-8')

    out_path = os.path.join(OUTDIR, os.path.splitext(track_id)[0]) + '.mp3'
    if os.path.exists(
            out_path) or not track_to_song[track_id] in wmf_item2i.keys():
        continue

    track_name = re.sub('_', '', track_name)
    artist_name_re = re.sub(' *([;_/&,*]|(feat))+.*', '', artist_name)
    artist_name_re = re.sub(' *[\[\(]*feat*.*[\]\)]*',
                            '',
                            artist_name_re,
                            flags=re.IGNORECASE)
    track_name_re = re.sub(' *[\[\(]+.*[\]\)]+', '', track_name)
    artist_name_re = re.sub(' *[\[\(]*featuring*.*[\]\)]*',
Esempio n. 3
0
def hdf5_to_csv(directory):
    with open("msds.csv", "w") as csvfile:
        index = 0
        # Column headers
        headers = "index,artist_name,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode," \
                  "mode_confidence,artist_hotttness,song_hotttness,start_of_fade_out,tempo,time_signature," \
                  "time_signature_confidence,title,release,year,track_id"
        csvfile.write(headers)
        csvfile.write("\n")
        # Recursively visit every sub-dir until we find the h5 files
        for root, dirs, filenames in os.walk(directory):
            for file in filenames:
                # print(os.path.join(root, file))
                # Use the hd5 wrappers to open the file
                h5_file = hdf5_getters.open_h5_file_read(os.path.join(root, file))
                # EXTRACT FEATURES!!!! and remove punctuation from strings

                # Artist name
                artist_name = hdf5_getters.get_artist_name(h5_file)
                # artist = re.sub(punc_re, "", artist_name)
                artist = artist_name.decode('UTF-8')

                # Danceability
                danceability = hdf5_getters.get_danceability(h5_file)

                # Duration
                duration = hdf5_getters.get_duration(h5_file)

                # End of fade in
                end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5_file)

                # Energy
                energy = hdf5_getters.get_energy(h5_file)

                # Key
                key = hdf5_getters.get_key(h5_file)

                # Key confidence
                key_confidence = hdf5_getters.get_key_confidence(h5_file)

                # Loudness
                loudness = hdf5_getters.get_loudness(h5_file)

                # Mode
                mode = hdf5_getters.get_mode(h5_file)

                # Mode confidence
                mode_confidence = hdf5_getters.get_mode_confidence(h5_file)

                # artist HOTTTNESS
                artist_hotttness = hdf5_getters.get_artist_hotttnesss(h5_file)

                # song HOTTTNESS
                song_hotttness = hdf5_getters.get_song_hotttnesss(h5_file)

                # Start of fade out
                start_of_fade_out = hdf5_getters.get_start_of_fade_out(h5_file)

                # Tempo
                tempo = hdf5_getters.get_tempo(h5_file)

                # Time signature
                time_signature = hdf5_getters.get_time_signature(h5_file)

                # Time signature confidence
                time_signature_confidence = hdf5_getters.get_time_signature_confidence(h5_file)

                # Song title
                song_title = hdf5_getters.get_title(h5_file)
                # title = re.sub(punc_re, "", song_title)
                title = song_title.decode('UTF-8')

                # Track ID
                track_id = hdf5_getters.get_track_id(h5_file)
                song_id = track_id.decode('UTF-8')

                # Release (I think this means the album title)
                release = hdf5_getters.get_release(h5_file).decode('UTF-8')

                # Year
                year = hdf5_getters.get_year(h5_file)

                # Number of songs in file?
                num_songs = hdf5_getters.get_num_songs(h5_file)

                # Close the file
                h5_file.close()

                data = str(index) + "," + artist + "," + str(danceability) + "," + str(duration) + "," + str(end_of_fade_in) + "," + \
                       str(energy) + "," + str(key) + "," + str(key_confidence) + "," + str(loudness) + "," + \
                       str(mode) + "," + str(mode_confidence) + "," + str(artist_hotttness) + "," + str(song_hotttness)\
                       + "," + str(start_of_fade_out) + "," + str(tempo) + "," + str(time_signature) + "," + \
                       str(time_signature_confidence) + "," + title.encode("UTF-8") + "," + release + "," + str(year) + "," + song_id
                csvfile.write(data)
                csvfile.write("\n")
                index += 1
                print("{} by {}".format(title, artist.encode("UTF-8")))
                print("Processed: {}".format(index))
        segments_loudness_max = hdf5_getters.get_segments_loudness_max(h5)
        segments_loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
        segments_loudness_start = hdf5_getters.get_segments_loudness_start(h5)
        segments_pitches = hdf5_getters.get_segments_pitches(h5)
        segments_start = hdf5_getters.get_segments_start(h5)
        segments_timbre = hdf5_getters.get_segments_timbre(h5)
        similar_artists = hdf5_getters.get_similar_artists(h5)
        song_hotttnesss = hdf5_getters.get_song_hotttnesss(h5)
        song_id = hdf5_getters.get_song_id(h5)
        start_of_fade_out = hdf5_getters.get_start_of_fade_out(h5)
        tatums_confidence = hdf5_getters.get_tatums_confidence(h5)
        tatums_start = hdf5_getters.get_tatums_start(h5)
        tempo = hdf5_getters.get_tempo(h5)
        time_signature = hdf5_getters.get_time_signature(h5)
        time_signature_confidence = hdf5_getters.get_time_signature_confidence(h5)
        title = hdf5_getters.get_title(h5)
        track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
        track_id = hdf5_getters.get_track_id(h5)
        year = hdf5_getters.get_year(h5)

        h5.close()
        writer.writerow({
            'artist_mbid': artist_mbid,
            'artist_mbtags': artist_mbtags,
            'artist_name': artist_name,
            'artist_playmeid': artist_playmeid,
            'artist_terms': artist_terms,
            'artist_terms_freq': artist_terms_freq,
            'artist_terms_weight': artist_terms_weight,
            'audio_md5': audio_md5,
            'bars_confidence': bars_confidence,
Esempio n. 5
0
    num_songs = len(songs)
    perc_i = 0

    for song in songs:

        if songs.index(song) * 10 / num_songs > perc_i:
            print(str(perc_i * 10) + "% done.")
            perc_i = perc_i + 1

        h5 = hdf5_getters.open_h5_file_read(song)

        track_id = str(hdf5_getters.get_song_id(h5), "utf-8")

        artist = str(hdf5_getters.get_artist_name(h5), "utf-8")

        title = str(hdf5_getters.get_title(h5), "utf-8")

        loudness = float(hdf5_getters.get_loudness(h5))

        release_year = int(hdf5_getters.get_year(h5))

        tempo = float(hdf5_getters.get_tempo(h5))

        danceability = float(hdf5_getters.get_danceability(h5))

        tags = hdf5_getters.get_artist_mbtags(h5)
        tags = tags.tolist()
        tags_refined = []
        for tag in tags:
            tags_refined.append(str(tag, "utf-8"))
Esempio n. 6
0
    # sanity checks
    if SPOTIFY_API_KEY is None:
        print ('You need to set a 7digital API key!')
        print ('Get one at: http://developer.7digital.net/')
        print ('Pass it as a flag: -7digitalkey KEY')
        print ('or set it under environment variable: SPOTIFY_API_KEY')
        sys.exit(0)
    if not os.path.isfile(h5path):
        print ('invalid path (not a file):',h5path)
        sys.exit(0)

    # open h5 song, get all we know about the song
    h5 = hdf5_utils.open_h5_file_read(h5path)
    artist_name = GETTERS.get_artist_name(h5).decode('utf-8')
    track_name = GETTERS.get_title(h5).decode('utf-8')
    h5.close()

    print('Searching for track: ', artist_name, ' - ', track_name)
    #search by artist name + track title
    if res is None:
        print( 'Did not find track using artist name and track title')
    else:
        res = get_trackid_from_text_search(track_name, artistname=artist_name)
        name, preview_url = res
        print(name)
    #     sys.exit(0)
    # closest_track,trackid = res
    # if closest_track != track_name:
    #     print(( 'we approximate your song title:',track_name,'by:',closest_track))
    # preview = get_preview_from_trackid(trackid)
Esempio n. 7
0
    track_id = os.path.splitext(track_name)[0]
    track_id_to_info[track_id] = None

print(len(track_id_to_info))

h5 = hdf5_utils.open_h5_file_read(h5path)
num_songs = GETTERS.get_num_songs(h5)

print('Retrieving meta data from hdf5 file...')

for i in tqdm(range(num_songs)):
    track_id = GETTERS.get_track_id(h5, songidx=i).decode('utf-8')

    if track_id in track_id_to_info:
        artist_name = GETTERS.get_artist_name(h5, songidx=i)
        track_name = GETTERS.get_title(h5, songidx=i)
        year = GETTERS.get_year(h5, songidx=i)
        tempo = GETTERS.get_tempo(h5, songidx=i)

        info_dict = {
            'artist_name': artist_name,
            'track_name': track_name,
            'year': year,
            'tempo': tempo
        }

        track_id_to_info[track_id] = info_dict

pickle.dump(track_id_to_info, open('../track_id_to_info.pkl', 'wb'))
h5.close()