Esempio n. 1
0
def get_artist_name_from_track(track_filename):
    print "[Processing file]: ", track_filename
    h5_file = hdf5getters.open_h5_file_read(track_filename)
    artist_name = hdf5getters.get_artist_name(h5_file)
    num_songs = hdf5getters.get_num_songs(h5_file)
    print "\t- [Artist name found]: ", artist_name
    print "\t- [Num songs]:", num_songs
    all_artists_name.add(artist_name)
    h5_file.close()
Esempio n. 2
0
def get_artist_name_from_track(track_filename):
    print "[Processing file]: ", track_filename
    h5_file = hdf5getters.open_h5_file_read(track_filename)
    artist_name = hdf5getters.get_artist_name(h5_file)
    num_songs = hdf5getters.get_num_songs(h5_file)
    print "\t- [Artist name found]: ", artist_name
    print "\t- [Num songs]:", num_songs
    all_artists_name.add(artist_name)
    h5_file.close()
Esempio n. 3
0
  def process_h5_file_info(self, h5):
    """
    This function does 3 simple things:
    - open the song file
    - get info
    - close the file
    """
    try:
      trackId = Getters.get_track_id(h5)
      tags = Getters.get_artist_mbtags(h5);
      timbres_list = Getters.get_segments_timbre(h5)
    except:
      return 0
    if len(tags) == 0:
      return 0

    tag_list = np.zeros(NUMBER_OF_TAGS)
    someSeen = False
    for tag in tags:
      if tag in self.styles.keys():
        tag_list[self.styles[tag]] = 1
        someSeen = True
    if not someSeen:
      return 0
    if len(timbres_list) < 300:
      return 0
    created = 0
    #only take 5 at most
    for i in range(0,min((len(timbres_list)/400),5)*400, 400):
      timbres_list_segment = timbres_list[i:(i + 300), ]
      self.ids_list.append(trackId)
      self.tags_list.append(tag_list)
      self.timbres_list.append(timbres_list_segment)
      print(Getters.get_artist_name() + ": " + Getters.get_title(h5))
      created+=1
    return created
Esempio n. 4
0
unfound_count = 0

OUTDIR = '../data/audio'
wmf_item2i = pickle.load(open('../data/wmf/index_dicts.pkl', 'rb'))['item2i']
track_to_song = pickle.load(open('../data/wmf/track_to_song.pkl', 'rb'))

h5path = '../data/song_metadata/msd_summary_file.h5'

if not os.path.isdir(OUTDIR):
    os.mkdir(OUTDIR)

h5 = hdf5_utils.open_h5_file_read(h5path)
num_songs = GETTERS.get_num_songs(h5)

for i in range(num_songs):
    artist_name = GETTERS.get_artist_name(h5, songidx=i).decode('utf-8')
    track_name = GETTERS.get_title(h5, songidx=i).decode('utf-8')
    track_id = GETTERS.get_track_id(h5, songidx=i).decode('utf-8')

    out_path = os.path.join(OUTDIR, os.path.splitext(track_id)[0]) + '.mp3'
    if os.path.exists(
            out_path) or not track_to_song[track_id] in wmf_item2i.keys():
        continue

    track_name = re.sub('_', '', track_name)
    artist_name_re = re.sub(' *([;_/&,*]|(feat))+.*', '', artist_name)
    artist_name_re = re.sub(' *[\[\(]*feat*.*[\]\)]*',
                            '',
                            artist_name_re,
                            flags=re.IGNORECASE)
    track_name_re = re.sub(' *[\[\(]+.*[\]\)]+', '', track_name)
Esempio n. 5
0
def hdf5_to_csv(directory):
    with open("msds.csv", "w") as csvfile:
        index = 0
        # Column headers
        headers = "index,artist_name,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode," \
                  "mode_confidence,artist_hotttness,song_hotttness,start_of_fade_out,tempo,time_signature," \
                  "time_signature_confidence,title,release,year,track_id"
        csvfile.write(headers)
        csvfile.write("\n")
        # Recursively visit every sub-dir until we find the h5 files
        for root, dirs, filenames in os.walk(directory):
            for file in filenames:
                # print(os.path.join(root, file))
                # Use the hd5 wrappers to open the file
                h5_file = hdf5_getters.open_h5_file_read(os.path.join(root, file))
                # EXTRACT FEATURES!!!! and remove punctuation from strings

                # Artist name
                artist_name = hdf5_getters.get_artist_name(h5_file)
                # artist = re.sub(punc_re, "", artist_name)
                artist = artist_name.decode('UTF-8')

                # Danceability
                danceability = hdf5_getters.get_danceability(h5_file)

                # Duration
                duration = hdf5_getters.get_duration(h5_file)

                # End of fade in
                end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5_file)

                # Energy
                energy = hdf5_getters.get_energy(h5_file)

                # Key
                key = hdf5_getters.get_key(h5_file)

                # Key confidence
                key_confidence = hdf5_getters.get_key_confidence(h5_file)

                # Loudness
                loudness = hdf5_getters.get_loudness(h5_file)

                # Mode
                mode = hdf5_getters.get_mode(h5_file)

                # Mode confidence
                mode_confidence = hdf5_getters.get_mode_confidence(h5_file)

                # artist HOTTTNESS
                artist_hotttness = hdf5_getters.get_artist_hotttnesss(h5_file)

                # song HOTTTNESS
                song_hotttness = hdf5_getters.get_song_hotttnesss(h5_file)

                # Start of fade out
                start_of_fade_out = hdf5_getters.get_start_of_fade_out(h5_file)

                # Tempo
                tempo = hdf5_getters.get_tempo(h5_file)

                # Time signature
                time_signature = hdf5_getters.get_time_signature(h5_file)

                # Time signature confidence
                time_signature_confidence = hdf5_getters.get_time_signature_confidence(h5_file)

                # Song title
                song_title = hdf5_getters.get_title(h5_file)
                # title = re.sub(punc_re, "", song_title)
                title = song_title.decode('UTF-8')

                # Track ID
                track_id = hdf5_getters.get_track_id(h5_file)
                song_id = track_id.decode('UTF-8')

                # Release (I think this means the album title)
                release = hdf5_getters.get_release(h5_file).decode('UTF-8')

                # Year
                year = hdf5_getters.get_year(h5_file)

                # Number of songs in file?
                num_songs = hdf5_getters.get_num_songs(h5_file)

                # Close the file
                h5_file.close()

                data = str(index) + "," + artist + "," + str(danceability) + "," + str(duration) + "," + str(end_of_fade_in) + "," + \
                       str(energy) + "," + str(key) + "," + str(key_confidence) + "," + str(loudness) + "," + \
                       str(mode) + "," + str(mode_confidence) + "," + str(artist_hotttness) + "," + str(song_hotttness)\
                       + "," + str(start_of_fade_out) + "," + str(tempo) + "," + str(time_signature) + "," + \
                       str(time_signature_confidence) + "," + title.encode("UTF-8") + "," + release + "," + str(year) + "," + song_id
                csvfile.write(data)
                csvfile.write("\n")
                index += 1
                print("{} by {}".format(title, artist.encode("UTF-8")))
                print("Processed: {}".format(index))
                  'danceability', 'duration', 'end_of_fade_in', 'energy', 'key', 'key_confidence', 'loudness',
                  'mode', 'mode_confidence', 'release', 'sections_confidence', 'sections_start', 'segments_confidence',
                  'segments_loudness_max', 'segments_loudness_max_time', 'segments_loudness_start', 'segments_pitches',
                  'segments_start', 'segments_timbre', 'similar_artists', 'song_hotttnesss', 'song_id', 'start_of_fade_out',
                  'tatums_confidence', 'tatums_start', 'tempo', 'time_signature', 'time_signature_confidence', 'title',
                  'track_7digitalid', 'track_id', 'year']

    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()


    for song in songs:
        h5 = hdf5_getters.open_h5_file_read(song)
        artist_mbid = hdf5_getters.get_artist_mbid(h5)
        artist_mbtags = hdf5_getters.get_artist_mbtags(h5)
        artist_name = hdf5_getters.get_artist_name(h5)
        artist_playmeid = hdf5_getters.get_artist_playmeid(h5)
        artist_terms = hdf5_getters.get_artist_7digitalid(h5)
        artist_terms_freq = hdf5_getters.get_artist_terms_freq(h5)
        artist_terms_weight = hdf5_getters.get_artist_terms_weight(h5)
        audio_md5 = hdf5_getters.get_audio_md5(h5)
        bars_confidence = hdf5_getters.get_bars_confidence(h5)
        bars_start = hdf5_getters.get_bars_start(h5)
        beats_confidence = hdf5_getters.get_beats_confidence(h5)
        beats_start = hdf5_getters.get_beats_start(h5)
        danceability =  hdf5_getters.get_danceability(h5)
        duration = hdf5_getters.get_duration(h5)
        end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5)
        energy = hdf5_getters.get_energy(h5)
        key = hdf5_getters.get_key(h5)
        key_confidence = hdf5_getters.get_key_confidence(h5)
Esempio n. 7
0
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    num_songs = len(songs)
    perc_i = 0

    for song in songs:

        if songs.index(song) * 10 / num_songs > perc_i:
            print(str(perc_i * 10) + "% done.")
            perc_i = perc_i + 1

        h5 = hdf5_getters.open_h5_file_read(song)

        track_id = str(hdf5_getters.get_song_id(h5), "utf-8")

        artist = str(hdf5_getters.get_artist_name(h5), "utf-8")

        title = str(hdf5_getters.get_title(h5), "utf-8")

        loudness = float(hdf5_getters.get_loudness(h5))

        release_year = int(hdf5_getters.get_year(h5))

        tempo = float(hdf5_getters.get_tempo(h5))

        danceability = float(hdf5_getters.get_danceability(h5))

        tags = hdf5_getters.get_artist_mbtags(h5)
        tags = tags.tolist()
        tags_refined = []
        for tag in tags:
Esempio n. 8
0
    h5path = sys.argv[1]

    # sanity checks
    if SPOTIFY_API_KEY is None:
        print ('You need to set a 7digital API key!')
        print ('Get one at: http://developer.7digital.net/')
        print ('Pass it as a flag: -7digitalkey KEY')
        print ('or set it under environment variable: SPOTIFY_API_KEY')
        sys.exit(0)
    if not os.path.isfile(h5path):
        print ('invalid path (not a file):',h5path)
        sys.exit(0)

    # open h5 song, get all we know about the song
    h5 = hdf5_utils.open_h5_file_read(h5path)
    artist_name = GETTERS.get_artist_name(h5).decode('utf-8')
    track_name = GETTERS.get_title(h5).decode('utf-8')
    h5.close()

    print('Searching for track: ', artist_name, ' - ', track_name)
    #search by artist name + track title
    if res is None:
        print( 'Did not find track using artist name and track title')
    else:
        res = get_trackid_from_text_search(track_name, artistname=artist_name)
        name, preview_url = res
        print(name)
    #     sys.exit(0)
    # closest_track,trackid = res
    # if closest_track != track_name:
    #     print(( 'we approximate your song title:',track_name,'by:',closest_track))
Esempio n. 9
0
    track_name = os.path.basename(file_name)
    track_id = os.path.splitext(track_name)[0]
    track_id_to_info[track_id] = None

print(len(track_id_to_info))

h5 = hdf5_utils.open_h5_file_read(h5path)
num_songs = GETTERS.get_num_songs(h5)

print('Retrieving meta data from hdf5 file...')

for i in tqdm(range(num_songs)):
    track_id = GETTERS.get_track_id(h5, songidx=i).decode('utf-8')

    if track_id in track_id_to_info:
        artist_name = GETTERS.get_artist_name(h5, songidx=i)
        track_name = GETTERS.get_title(h5, songidx=i)
        year = GETTERS.get_year(h5, songidx=i)
        tempo = GETTERS.get_tempo(h5, songidx=i)

        info_dict = {
            'artist_name': artist_name,
            'track_name': track_name,
            'year': year,
            'tempo': tempo
        }

        track_id_to_info[track_id] = info_dict

pickle.dump(track_id_to_info, open('../track_id_to_info.pkl', 'wb'))
h5.close()