Example #1
0
def debug_from_song_file(connect, h5path, verbose=0):
    """
    Slow debugging function that takes a h5 file, reads the info,
    check the match with musicbrainz db, prints out the result.
    Only prints when we dont get exact match!
    RETURN counts of how many files we filled for years, tags
    """
    import hdf5_utils as HDF5
    import hdf5_getters as GETTERS
    h5 = HDF5.open_h5_file_read(h5path)
    title = GETTERS.get_title(h5)
    release = GETTERS.get_release(h5)
    artist = GETTERS.get_artist_name(h5)
    ambid = GETTERS.get_artist_mbid(h5)
    h5.close()
    # mbid
    gotmbid = 1
    if ambid == '':
        gotmbid = 0
        if verbose > 0: print('no mb id for:', artist)
    # year
    year = find_year_safemode(connect, ambid, title, release, artist)
    gotyear = 1 if year > 0 else 0
    if verbose > 0: print('no years for:', artist, '|', release, '|', title)
    # tags
    tags, counts = get_artist_tags(connect, ambid)
    gottags = 1 if len(tags) > 0 else 0
    if gottags == 0 and verbose > 0: print('no tags for:', artist)
    # return indicator for mbid, year, tag
    return gotmbid, gotyear, gottags
Example #2
0
def debug_from_song_file(connect,h5path,verbose=0):
    """
    Slow debugging function that takes a h5 file, reads the info,
    check the match with musicbrainz db, prints out the result.
    Only prints when we dont get exact match!
    RETURN counts of how many files we filled for years, tags
    """
    import hdf5_utils as HDF5
    import hdf5_getters as GETTERS
    h5 = HDF5.open_h5_file_read(h5path)
    title = GETTERS.get_title(h5)
    release = GETTERS.get_release(h5)
    artist = GETTERS.get_artist_name(h5)
    ambid = GETTERS.get_artist_mbid(h5)
    h5.close()
    # mbid
    gotmbid=1
    if ambid=='':
        gotmbid = 0
        if verbose>0: print('no mb id for:',artist)
    # year
    year = find_year_safemode(connect,ambid,title,release,artist)
    gotyear = 1 if year > 0 else 0
    if verbose>0: print('no years for:',artist,'|',release,'|',title)
    # tags
    tags,counts = get_artist_tags(connect,ambid)
    gottags = 1 if len(tags) > 0 else 0
    if gottags == 0 and verbose>0: print('no tags for:',artist)
    # return indicator for mbid, year, tag
    return gotmbid,gotyear,gottags
Example #3
0
def getURLFromH5(h5path):
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):', h5path
        sys.exit(0)
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
            print preview
            return preview
            sys.exit(0)
def getURLFromH5(h5path):
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):',h5path
        sys.exit(0)
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    print "Suggested Song URLs For you"
    print "==========================="
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
	    print preview
            return preview
            sys.exit(0)
def getData(starting_point):

    starting = starting_point * 10000
    files = glob.glob('/mnt/snap/data/*/*/*/*.h5')

    file_one_round = files[starting:starting + 10000]

    artist_ids = []

    song_beats_persecond = []
    song_duration = []
    song_end_fade_in = []
    song_start_fade_out = []
    song_key = []
    song_loudness = []

    song_segments_loudness_max = []
    song_segments_loudness_min = []
    song_segments_loudness_med = []

    song_segments_loudness_time_max = []
    song_segments_loudness_time_min = []
    song_segments_loudness_time_med = []

    song_mode = []
    song_sections_start = []
    song_pitches = []
    song_timbre = []
    song_tempo = []
    song_time_signature = []
    song_title = []
    artist_name = []
    year = []

    idx = np.triu_indices(12)

    #count = 1

    for f in file_one_round:
        h5 = HDF5.open_h5_file_read(f)

        songYear = g.get_year(h5)
        if songYear < 1990:
            continue

        artist_id = g.get_artist_id(h5)
        song_beat = (g.get_beats_start(h5)).tolist()
        songDuration = g.get_duration(h5)
        song_beat_persecond = float(len(song_beat)) / songDuration

        song_end_fadein = g.get_end_of_fade_in(h5)
        song_start_fadeout = g.get_start_of_fade_out(h5)
        songKey = g.get_key(h5)
        songLoudness = g.get_loudness(h5)

        song_loudness_max = (g.get_segments_loudness_max(h5)) // 10
        song_loudness_antilog = np.power(10, song_loudness_max)
        song_segmentsLoudness_max = np.amax(song_loudness_antilog)
        song_segmentsLoudness_min = np.amin(song_loudness_antilog)
        song_segmentsLoudness_med = np.median(song_loudness_antilog)

        song_segmentsLoudness_max_time = (
            g.get_segments_loudness_max_time(h5)).tolist()
        song_loudness_time = np.multiply(song_loudness_antilog,
                                         song_segmentsLoudness_max_time)
        song_segmentsLoudnessTime_max = np.amax(song_loudness_time)
        song_segmentsLoudnessTime_min = np.amin(song_loudness_time)
        song_segmentsLoudnessTime_med = np.median(song_loudness_time)

        songMode = g.get_mode(h5)
        song_sectionsStart = (g.get_sections_start(h5)).tolist()
        songPitches = g.get_segments_pitches(h5)
        songPitches_cov = np.cov(songPitches, rowvar=False)
        songPitches_mean = np.mean(songPitches, axis=0)
        #print(songPitches_cov.shape)
        songTimbre = g.get_segments_timbre(h5)
        songTimbre_cov = np.cov(songTimbre, rowvar=False)
        songTimbre_mean = np.mean(songTimbre, axis=0)
        #print(songTimbre_cov.shape)
        songTempo = g.get_tempo(h5)
        songTime_signature = g.get_time_signature(h5)
        songTitle = g.get_title(h5)
        artistName = g.get_artist_name(h5)

        artist_ids.append(artist_id)

        song_beats_persecond.append(song_beat_persecond)
        song_duration.append(songDuration)
        song_end_fade_in.append(song_end_fadein)
        song_start_fade_out.append(song_start_fadeout)
        song_key.append(songKey)
        song_loudness.append(songLoudness)

        song_segments_loudness_max.append(song_segmentsLoudness_max)
        song_segments_loudness_min.append(song_segmentsLoudness_min)
        song_segments_loudness_med.append(song_segmentsLoudness_med)

        song_segments_loudness_time_max.append(song_segmentsLoudnessTime_max)
        song_segments_loudness_time_min.append(song_segmentsLoudnessTime_min)
        song_segments_loudness_time_med.append(song_segmentsLoudnessTime_med)

        song_mode.append(songMode)
        song_sections_start.append(song_sectionsStart)
        pitches_mean_cov = (songPitches_cov[idx]).tolist()
        pitches_mean_cov.extend((songPitches_mean).tolist())
        song_pitches.append(pitches_mean_cov)
        timbre_mean_cov = (songTimbre_cov[idx]).tolist()
        timbre_mean_cov.extend((songTimbre_mean).tolist())
        song_timbre.append(timbre_mean_cov)
        song_tempo.append(songTempo)
        song_time_signature.append(songTime_signature)
        song_title.append(songTitle)
        artist_name.append(artistName)
        year.append(songYear)

        #print(count)
        #count = count + 1
        h5.close()

    #def createDictsFrom2DArray(dictionary, colName, featureList):
    #	for i in range(0,12):
    #		dictionary[colName+str(i)] = featureList[i]
    #i = 1
    #for t in itertools.izip_longest(*featureList):
    #	dictionary[colName+str(i)] = t
    #	i = i + 1
    #	return dictionary

    data = collections.OrderedDict()

    data['year'] = year
    data['artist_name'] = artist_name
    data['artist_id'] = artist_ids
    data['song_title'] = song_title
    data['song_beats_persecond'] = song_beats_persecond
    data['song_duration'] = song_duration
    data['song_end_fade_in'] = song_end_fade_in
    data['song_start_fade_out'] = song_start_fade_out
    data['song_key'] = song_key
    data['song_loudness'] = song_loudness

    data['song_loudness_max'] = song_segments_loudness_max
    data['song_loudness_min'] = song_segments_loudness_min
    data['song_loudness_med'] = song_segments_loudness_med

    data['song_loudness_time_max'] = song_segments_loudness_time_max
    data['song_loudness_time_min'] = song_segments_loudness_time_min
    data['song_loudness_time_med'] = song_segments_loudness_time_med

    data['song_mode'] = song_mode
    data['song_tempo'] = song_tempo
    data['song_time_signature'] = song_time_signature
    data = createDictsFrom1DArray(data, 'pitches', song_pitches)
    data = createDictsFrom1DArray(data, 'timbre', song_timbre)

    data = createDictsFrom1DArray(data, 'sections_start', song_sections_start)

    df = pd.DataFrame(data)
    print('before return ' + str(starting_point))

    return df
    h5path = sys.argv[1]

    # sanity checks
    if DIGITAL7_API_KEY is None:
        print 'You need to set a 7digital API key!'
        print 'Get one at: http://developer.7digital.net/'
        print 'Pass it as a flag: -7digitalkey KEY'
        print 'or set it under environment variable: DIGITAL7_API_KEY'
        sys.exit(0)
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):',h5path
        sys.exit(0)


    # open h5 song, get all we know about the song
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
            print preview
    # params
    h5path = sys.argv[1]

    # sanity checks
    if DIGITAL7_API_KEY is None:
        print 'You need to set a 7digital API key!'
        print 'Get one at: http://developer.7digital.net/'
        print 'Pass it as a flag: -7digitalkey KEY'
        print 'or set it under environment variable: DIGITAL7_API_KEY'
        sys.exit(0)
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):', h5path
        sys.exit(0)

    # open h5 song, get all we know about the song
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
            print preview
Example #8
0
    t1 = time.time()

    # create hash tables
    hash_table_terms = [None] * NUMBUCKETS
    hash_table_mbtags = [None] * NUMBUCKETS
    for k in range(NUMBUCKETS):
        hash_table_terms[k] = set()
        hash_table_mbtags[k] = set()
    
    # iterate HDF5 files
    cnt_files = 0
    if artistfile == '':
        for root, dirs, files in os.walk(maindir):
            files = glob.glob(os.path.join(root,'*.h5'))
            for f in files :
                h5 = hdf5_utils.open_h5_file_read(f)
                terms = get_artist_terms(h5)
                mbtags = get_artist_mbtags(h5)
                h5.close()
                # iterate over terms
                for t in terms:
                    put_term_in_hash_table(hash_table_terms,t)
                for t in mbtags:
                    put_term_in_hash_table(hash_table_mbtags,t)
                cnt_files += 1
    else:
        f = open(artistfile,'r')
        trackids = []
        for line in f.xreadlines():
            if line == '' or line.strip() == '':
                continue
Example #9
0
    t1 = time.time()

    # create hash tables
    hash_table_terms = [None] * NUMBUCKETS
    hash_table_mbtags = [None] * NUMBUCKETS
    for k in range(NUMBUCKETS):
        hash_table_terms[k] = set()
        hash_table_mbtags[k] = set()

    # iterate HDF5 files
    cnt_files = 0
    if artistfile == "":
        for root, dirs, files in os.walk(maindir):
            files = glob.glob(os.path.join(root, "*.h5"))
            for f in files:
                h5 = hdf5_utils.open_h5_file_read(f)
                terms = get_artist_terms(h5)
                mbtags = get_artist_mbtags(h5)
                h5.close()
                # iterate over terms
                for t in terms:
                    put_term_in_hash_table(hash_table_terms, t)
                for t in mbtags:
                    put_term_in_hash_table(hash_table_mbtags, t)
                cnt_files += 1
    else:
        f = open(artistfile, "r")
        trackids = []
        for line in f.xreadlines():
            if line == "" or line.strip() == "":
                continue