def get_all_data(target, basedir, ext='.h5') :

    # header
    target.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
                 "track_id", "song_id", "title", "artist_name", "artist_location",
                 "artist_hotttnesss", "release", "year", "song_hotttnesss",
                 "danceability", "duration", "loudness", "sample_rate", "tempo"
    ))

    count = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            for line in f:
                new_file = open("tmp.txt", 'w')
                new_file.write(line)

                h5 = hdf5_getters.open_h5_file_read(new_file)
                target.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
                              hdf5_getters.get_track_id(h5),
                              hdf5_getters.get_song_id(h5),
                              hdf5_getters.get_title(h5),
                              hdf5_getters.get_artist_name(h5),
                              hdf5_getters.get_artist_location(h5),
                              hdf5_getters.get_artist_hotttnesss(h5),
                              hdf5_getters.get_release(h5),
                              hdf5_getters.get_year(h5),
                              hdf5_getters.get_song_hotttnesss(h5),
                              hdf5_getters.get_danceability(h5),
                              hdf5_getters.get_duration(h5),
                              hdf5_getters.get_loudness(h5),
                              hdf5_getters.get_analysis_sample_rate(h5),
                              hdf5_getters.get_tempo(h5)
                ))

                # show progress
                count += 1
                print "%d/10000" % (count)

                h5.close()
Ejemplo n.º 2
0
def main():
    outputFile = open('songs.csv', 'w')
    writer = csv.writer(outputFile)

    csvRowString = "song_number,artist_familiarity,artist_hotttnesss,artist_id,artist_mbid,artist_playmeid,artist_7digitalid,artist_latitude,artist_longitude,artist_location,artist_name,release,release_7digitalid,song_id,song_hotttnesss,title,track_7digitalid,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id,year"

    outputFile.write(csvRowString + "\n")
    csvRowString = ""

    #################################################
    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "."  # "." As the default means the current directory
    ext = ".H5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    songCount = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)

            values = [
                songCount,
                hdf5_getters.get_artist_familiarity(songH5File),
                hdf5_getters.get_artist_hotttnesss(songH5File),
                hdf5_getters.get_artist_id(songH5File),
                hdf5_getters.get_artist_mbid(songH5File),
                hdf5_getters.get_artist_playmeid(songH5File),
                hdf5_getters.get_artist_7digitalid(songH5File),
                hdf5_getters.get_artist_latitude(songH5File),
                hdf5_getters.get_artist_longitude(songH5File),
                hdf5_getters.get_artist_location(songH5File),
                hdf5_getters.get_artist_name(songH5File),
                hdf5_getters.get_release(songH5File),
                hdf5_getters.get_release_7digitalid(songH5File),
                hdf5_getters.get_song_id(songH5File),
                hdf5_getters.get_song_hotttnesss(songH5File),
                hdf5_getters.get_title(songH5File),
                hdf5_getters.get_track_7digitalid(songH5File),
                hdf5_getters.get_analysis_sample_rate(songH5File),
                hdf5_getters.get_audio_md5(songH5File),
                hdf5_getters.get_danceability(songH5File),
                hdf5_getters.get_duration(songH5File),
                hdf5_getters.get_end_of_fade_in(songH5File),
                hdf5_getters.get_energy(songH5File),
                hdf5_getters.get_key(songH5File),
                hdf5_getters.get_key_confidence(songH5File),
                hdf5_getters.get_loudness(songH5File),
                hdf5_getters.get_mode(songH5File),
                hdf5_getters.get_mode_confidence(songH5File),
                hdf5_getters.get_start_of_fade_out(songH5File),
                hdf5_getters.get_tempo(songH5File),
                hdf5_getters.get_time_signature(songH5File),
                hdf5_getters.get_time_signature_confidence(songH5File),
                hdf5_getters.get_track_id(songH5File),
                hdf5_getters.get_year(songH5File)
            ]
            songH5File.close()
            songCount = songCount + 1

            writer.writerow(values)

    outputFile.close()
Ejemplo n.º 3
0
def main(argv):
    if len(argv) != 1:
        print "Specify data directory"
        return
    basedir = argv[0]
    outputFile1 = open('SongCSV.csv', 'w')
    outputFile2 = open('TagsCSV.csv', 'w')
    csvRowString = ""
    csvLabelString = ""
    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" +
                "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n" +
                "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"


                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        #csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+
        #    "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+
        #    "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+
        #    "Title,Year")
        csvRowString = ("ArtistFamiliarity,ArtistHotttnesss,"+
            "BarsConfidence,BarsStart,BeatsConfidence,BeatsStart,Duration,"+
            "EndOfFadeIn,Key,KeyConfidence,Loudness,Mode,ModeConfidence,"+
            "SectionsConfidence,SectionsStart,SegmentsConfidence,SegmentsLoudnessMax,"+
            "SegmentsLoudnessMaxTime,SegmentsLoudnessStart,SegmentsStart,"+
            "SongHotttnesss,StartOfFadeOut,TatumsConfidence,TatumsStart,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "SegmentsPitches,SegmentsTimbre,Title,Year,Decade,ArtistMbtags")
        #################################################
        header = str()
        csvAttributeList = re.split('\W+', csvRowString)
        arrayAttributes = ["BarsConfidence","BarsStart","BeatsConfidence","BeatsStart",
                           "SectionsConfidence","SectionsStart","SegmentsConfidence","SegmentsLoudnessMax",
                           "SegmentsLoudnessMaxTime","SegmentsLoudnessStart","SegmentsStart",
                           "TatumsConfidence","TatumsStart"]
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
            if(v=="SegmentsPitches"):
                for i in range(90):
                    header = header + "SegmentsPitches" + str(i) + ","
            elif(v=="SegmentsTimbre"):
                for i in range(90):
                    header = header + "SegmentsTimbre" + str(i) + ","
            elif(v in arrayAttributes):
                header = header + v + str(0) + ","
                header = header + v + str(1) + ","
            else:
                header = header + v + ","
        outputFile1.write("SongNumber,");
        #outputFile1.write(csvRowString + "\n");
        outputFile1.write(header + "\n");
        csvRowString = ""

    #################################################


    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    #basedir = "MillionSongSubset/data/A/A/" # "." As the default means the current directory
    ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    all = sorted(os.walk(basedir))
    for root, dirs, files in all:
        files = sorted(glob.glob(os.path.join(root,'*'+ext)))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            #testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.analysisSampleRate = str(hdf5_getters.get_analysis_sample_rate(songH5File))
            song.artistFamiliarity = str(hdf5_getters.get_artist_familiarity(songH5File))
            song.artistHotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistMbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.barsConfidence = np.array(hdf5_getters.get_bars_confidence(songH5File))
            song.barsStart = np.array(hdf5_getters.get_bars_start(songH5File))
            song.beatsConfidence = np.array(hdf5_getters.get_beats_confidence(songH5File))
            song.beatsStart = np.array(hdf5_getters.get_beats_start(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.key = str(hdf5_getters.get_key(songH5File))
            song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File))
            song.sectionsConfidence = np.array(hdf5_getters.get_sections_confidence(songH5File))
            song.sectionsStart = np.array(hdf5_getters.get_sections_start(songH5File))
            song.segmentsConfidence = np.array(hdf5_getters.get_segments_confidence(songH5File))
            song.segmentsLoudnessMax = np.array(hdf5_getters.get_segments_loudness_max(songH5File))
            song.segmentsLoudnessMaxTime = np.array(hdf5_getters.get_segments_loudness_max_time(songH5File))
            song.segmentsLoudnessStart = np.array(hdf5_getters.get_segments_loudness_start(songH5File))
            song.segmentsPitches = np.array(hdf5_getters.get_segments_pitches(songH5File))
            song.segmentsStart = np.array(hdf5_getters.get_segments_start(songH5File))
            song.segmentsTimbre = np.array(hdf5_getters.get_segments_timbre(songH5File))
            song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File))
            song.tatumsConfidence = np.array(hdf5_getters.get_tatums_confidence(songH5File))
            song.tatumsStart = np.array(hdf5_getters.get_tatums_start(songH5File))
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File))
            song.songid = str(hdf5_getters.get_song_id(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.artistMbtags = str(hdf5_getters.get_artist_mbtags(songH5File))
            #print song count
            csvRowString += str(song.songCount) + ","
            csvLabelString += str(song.songCount) + ","
            
            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AnalysisSampleRate'.lower():
                    csvRowString += song.analysisSampleRate
                elif attribute == 'ArtistFamiliarity'.lower():
                    csvRowString += song.artistFamiliarity
                elif attribute == 'ArtistHotttnesss'.lower():
                    csvRowString += song.artistHotttnesss
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistMbid'.lower():
                    csvRowString += song.artistMbid
                elif attribute == 'BarsConfidence'.lower():
                    arr = song.barsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'BarsStart'.lower():
                    arr = song.barsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'BeatsConfidence'.lower():
                    arr = song.beatsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'BeatsStart'.lower():
                    arr = song.beatsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'EndOfFadeIn'.lower():
                    csvRowString += song.endOfFadeIn
                elif attribute == 'Energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'Key'.lower():
                    csvRowString += song.key
                elif attribute == 'KeyConfidence'.lower():
                    csvRowString += song.keyConfidence
                elif attribute == 'Loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'Mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'ModeConfidence'.lower():
                    csvRowString += song.modeConfidence
                elif attribute == 'SectionsConfidence'.lower():
                    arr = song.sectionsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SectionsStart'.lower():
                    arr = song.sectionsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsConfidence'.lower():
                    arr = song.segmentsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsLoudnessMax'.lower():
                    arr = song.segmentsLoudnessMax
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsLoudnessMaxTime'.lower():
                    arr = song.segmentsLoudnessMaxTime
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsLoudnessStart'.lower():
                    arr = song.segmentsLoudnessStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsStart'.lower():
                    arr = song.segmentsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SongHotttnesss'.lower():
                    hotttnesss = song.songHotttnesss
                    if hotttnesss == 'nan':
                        hotttnesss = 'NaN'
                    csvRowString += hotttnesss
                elif attribute == 'StartOfFadeOut'.lower():
                    csvRowString += song.startOfFadeOut
                elif attribute == 'TatumsConfidence'.lower():
                    arr = song.tatumsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'TatumsStart'.lower():
                    arr = song.tatumsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'SegmentsPitches'.lower():
                    colmean = np.mean(song.segmentsPitches,axis=0)
                    for m in colmean:
                        csvRowString += str(m) + ","
                    cov = np.dot(song.segmentsPitches.T,song.segmentsPitches)
                    utriind = np.triu_indices(cov.shape[0])
                    feats = cov[utriind]
                    for feat in feats:
                        csvRowString += str(feat) + ","
                    lastIndex = len(csvRowString)
                    csvRowString = csvRowString[0:lastIndex-1]
                elif attribute == 'SegmentsTimbre'.lower():
                    colmean = np.mean(song.segmentsTimbre,axis=0)
                    for m in colmean:
                        csvRowString += str(m) + ","
                    cov = np.dot(song.segmentsTimbre.T,song.segmentsTimbre)
                    utriind = np.triu_indices(cov.shape[0])
                    feats = cov[utriind]
                    for feat in feats:
                        csvRowString += str(feat) + ","
                    lastIndex = len(csvRowString)
                    csvRowString = csvRowString[0:lastIndex-1]
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'Decade'.lower():
                    yr = song.year
                    if yr > 0:
                        decade = song.year[:-1] + '0'
                    else:
                        decade = '0'
                    csvRowString += decade
                elif attribute == 'ArtistMbtags'.lower():
                    tags = song.artistMbtags[1:-1]
                    tags = "\"" + tags + "\""
                    tags = tags.replace("\n",'')
                    csvRowString += tags
                    tagsarray = shlex.split(tags)
                    for t in tagsarray:
                        csvLabelString += t + ","
                else:
                     csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

                '''
                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',',"")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","
                '''
            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
            
            lastIndex = len(csvLabelString)
            csvLabelString = csvLabelString[0:lastIndex-1]
            csvLabelString += "\n"
            outputFile2.write(csvLabelString)
            csvLabelString = ""

            songH5File.close()

    outputFile1.close()
    outputFile2.close()
Ejemplo n.º 4
0
def data_to_flat_file(basedir,ext='.h5') :
    """ This function extracts the information from the tables and creates the flat file. """
    count = 0; #song counter
    list_to_write= []
    group_index=0
    row_to_write = ""
    writer = csv.writer(open("complete.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    row=[]
	    print f
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
            row.append(title)
	    comma=title.find(',')
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')
            row.append(album)
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')
            row.append(artist_name)
	    duration = hdf5_getters.get_duration(h5)
            row.append(duration)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            row.append(samp_rt)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            row.append(artist_7digitalid)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
            row.append(artist_fam)
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	             artist_hotness=-1
            row.append(artist_hotness)
	    artist_id = hdf5_getters.get_artist_id(h5)
            row.append(artist_id)           
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
            row.append(artist_lat)
	    artist_loc = hdf5_getters.get_artist_location(h5)
            row.append(artist_loc)
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
            row.append(artist_lon)
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
            row.append(artist_mbid)

	    #Getting the genre				       
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) 		    #index of the highest freq
	    genre_set=0					            #flag to see if the genre has been set or not
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			for i in genres_so_far:
				final_genre.append(i)
			    	genre_set=1
			
			
	    if genre_set == 1:
		col_num=[]
		for i in final_genre:
			column=int(i)				#getting the column number of the genre
			col_num.append(column)
	
		genre_array=genre_columns(col_num)	                #genre array 
	        for i in range(len(genre_array)):                   	#appending the genre_array to the row 
			row.append(genre_array[i])
	    else:
		genre_array=genre_columns(-1)				#when there is no genre matched, return an array of [0...0]
	        for i in range(len(genre_array)):                   	#appending the genre_array to the row 
			row.append(genre_array[i])
					

	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            row.append(artist_pmid)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
            row.append(audio_md5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
            row.append(danceability)
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
            row.append(end_fade_in)
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            row.append(energy)
            song_key = hdf5_getters.get_key(h5)
            row.append(song_key)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
            row.append(key_c)
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
            row.append(loudness)
	    mode = hdf5_getters.get_mode(h5)
            row.append(mode)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
            row.append(mode_conf)
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            row.append(release_7digitalid)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
            row.append(song_hot)
	    song_id = hdf5_getters.get_song_id(h5)
            row.append(song_id)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            row.append(start_fade_out)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
            row.append(tempo)
	    time_sig = hdf5_getters.get_time_signature(h5)
            row.append(time_sig)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
            row.append(time_sig_c)
	    track_id = hdf5_getters.get_track_id(h5)
            row.append(track_id)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            row.append(track_7digitalid)
	    year = hdf5_getters.get_year(h5)
            row.append(year)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_start = hdf5_getters.get_bars_start(h5)
	    row_bars_padding=padding(245)   #this is the array that will be attached at the end of th row

	    #--------------bars---------------"
	    gral_info=[]
	    gral_info=row[:]
	    empty=[]
	    for i,item in enumerate(bars_c):
                row.append(group_index)
                row.append(i)
                row.append(bars_c[i])
	        bars_c_avg= get_avg(bars_c)
                row.append(bars_c_avg)
	        bars_c_max= get_max(bars_c)	
                row.append(bars_c_max)
	        bars_c_min = get_min(bars_c)
                row.append(bars_c_min)
	        bars_c_stddev= get_stddev(bars_c)
                row.append(bars_c_stddev)
	        bars_c_count = get_count(bars_c)
                row.append(bars_c_count)
	        bars_c_sum = get_sum(bars_c)
                row.append(bars_c_sum)
                row.append(bars_start[i])	         
	        bars_start_avg = get_avg(bars_start)
                row.append(bars_start_avg)	         
	        bars_start_max= get_max(bars_start)
                row.append(bars_start_max)	         
	        bars_start_min = get_min(bars_start)
                row.append(bars_start_min)	         
	        bars_start_stddev= get_stddev(bars_start)
                row.append(bars_start_stddev)	         
	        bars_start_count = get_count(bars_start)
                row.append(bars_start_count)	         
	        bars_start_sum = get_sum(bars_start)
                row.append(bars_start_sum)	         
		for i in row_bars_padding:
			row.append(i)

                writer.writerow(row)
		row=[]
		row=gral_info[:]
	 

            #--------beats---------------"
	    beats_c = hdf5_getters.get_beats_confidence(h5)
	    group_index=1
	    row=[]
	    row=gral_info[:]
	    row_front=padding(14)  	#blanks left in front of the row(empty spaces for bars)
	    row_beats_padding=padding(231)
	    for i,item in enumerate(beats_c):
	   	row.append(group_index)
		row.append(i)
		for index in row_front:  #padding blanks in front of the beats
			row.append(index)
		
		row.append(beats_c[i])
	        beats_c_avg= get_avg(beats_c)
		row.append(beats_c_avg)
	        beats_c_max= get_max(beats_c)
		row.append(beats_c_max)
                beats_c_min = get_min(beats_c)
		row.append(beats_c_min)
	        beats_c_stddev= get_stddev(beats_c)
		row.append(beats_c_stddev)
	        beats_c_count = get_count(beats_c)
		row.append(beats_c_count)
	        beats_c_sum = get_sum(beats_c)
		row.append(beats_c_sum)
                beats_start = hdf5_getters.get_beats_start(h5)
		row.append(beats_start[i])
 	        beats_start_avg = get_avg(beats_start)
		row.append(beats_start_avg)
	        beats_start_max= get_max(beats_start)
		row.append(beats_start_max)
	        beats_start_min = get_min(beats_start)
		row.append(beats_start_min)
	        beats_start_stddev= get_stddev(beats_start)
		row.append(beats_start_stddev)
	        beats_start_count = get_count(beats_start)
		row.append(beats_start_count)
	        beats_start_sum = get_sum(beats_start)
		row.append(beats_start_sum)
		for i in row_beats_padding:
			row.append(i)
                
		writer.writerow(row)
		row=[]
		row=gral_info[:]

            # "--------sections---------------"
	    row_sec_padding=padding(217)	#blank spaces left at the end of the row
	    sec_c = hdf5_getters.get_sections_confidence(h5)
	    group_index=2
	    row=[]
	    row=gral_info[:]
	    row_front=padding(28)		#blank spaces left in front(empty spaces for bars,beats)
	    for i,item in enumerate(sec_c):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of the sections
			row.append(index)

		row.append(sec_c[i])
                sec_c_avg= get_avg(sec_c)
		row.append(sec_c_avg)
	        sec_c_max= get_max(sec_c)
		row.append(sec_c_max)
	        sec_c_min = get_min(sec_c)
		row.append(sec_c_min)
	        sec_c_stddev= get_stddev(sec_c)
		row.append(sec_c_stddev)
	        sec_c_count = get_count(sec_c)
		row.append(sec_c_count)
	        sec_c_sum = get_sum(sec_c)
		row.append(sec_c_sum)
	        sec_start = hdf5_getters.get_sections_start(h5)
		row.append(sec_start[i])	   
                sec_start_avg = get_avg(sec_start)
		row.append(sec_start_avg)
	        sec_start_max= get_max(sec_start)
		row.append(sec_start_max)
	        sec_start_min = get_min(sec_start)
		row.append(sec_start_min)
	        sec_start_stddev= get_stddev(sec_start)
		row.append(sec_start_stddev)
	        sec_start_count = get_count(sec_start)
		row.append(sec_start_count)
	        sec_start_sum = get_sum(sec_start)
		row.append(sec_start_sum)
		for i in row_sec_padding:	#appending the blank spaces at the end of the row
			row.append(i)
                

		writer.writerow(row)
		row=[]
		row=gral_info[:]


            #--------segments---------------"
	    row_seg_padding=padding(182)	#blank spaces at the end of the row
 	    row_front=padding(42)		#blank spaces left in front of segments
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    group_index=3
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(seg_c):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of the segments
			row.append(index)

		row.append(seg_c[i])
                seg_c_avg= get_avg(seg_c)
		row.append(seg_c_avg)
	        seg_c_max= get_max(seg_c)
		row.append(seg_c_max)
	        seg_c_min = get_min(seg_c)
		row.append(seg_c_min)
	        seg_c_stddev= get_stddev(seg_c)
		row.append(seg_c_stddev)
	        seg_c_count = get_count(seg_c)
		row.append(seg_c_count)
	        seg_c_sum = get_sum(seg_c)
		row.append(seg_c_sum)
                seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
		row.append(seg_loud_max[i])
                seg_loud_max_avg= get_avg(seg_loud_max)
		row.append(seg_loud_max_avg)
	        seg_loud_max_max= get_max(seg_loud_max)
		row.append(seg_loud_max_max)
	        seg_loud_max_min = get_min(seg_loud_max)
		row.append(seg_loud_max_min)
	        seg_loud_max_stddev= get_stddev(seg_loud_max)
		row.append(seg_loud_max_stddev)
	        seg_loud_max_count = get_count(seg_loud_max)
		row.append(seg_loud_max_count)
	        seg_loud_max_sum = get_sum(seg_loud_max)
		row.append(seg_loud_max_sum)
	        seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
		row.append(seg_loud_max_time[i])
	        seg_loud_max_time_avg= get_avg(seg_loud_max_time)
		row.append(seg_loud_max_time_avg)
	        seg_loud_max_time_max= get_max(seg_loud_max_time)
		row.append(seg_loud_max_time_max)
	        seg_loud_max_time_min = get_min(seg_loud_max_time)
		row.append(seg_loud_max_time_min)
	        seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
		row.append(seg_loud_max_time_stddev)
	        seg_loud_max_time_count = get_count(seg_loud_max_time)
		row.append(seg_loud_max_time_count)
	        seg_loud_max_time_sum = get_sum(seg_loud_max_time)
		row.append(seg_loud_max_time_sum)
	        seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
		row.append(seg_loud_start[i])
	        seg_loud_start_avg= get_avg(seg_loud_start)
		row.append(seg_loud_start_avg)
	        seg_loud_start_max= get_max(seg_loud_start)
		row.append(seg_loud_start_max)
	        seg_loud_start_min = get_min(seg_loud_start)
		row.append(seg_loud_start_min)
	        seg_loud_start_stddev= get_stddev(seg_loud_start)
		row.append(seg_loud_start_stddev)
	        seg_loud_start_count = get_count(seg_loud_start)
		row.append(seg_loud_start_count)
	        seg_loud_start_sum = get_sum(seg_loud_start)					      
		row.append(seg_loud_start_sum)
	        seg_start = hdf5_getters.get_segments_start(h5)
		row.append(seg_start[i])
	        seg_start_avg= get_avg(seg_start)
		row.append(seg_start_avg)
	        seg_start_max= get_max(seg_start)
		row.append(seg_start_max)
	        seg_start_min = get_min(seg_start)
		row.append(seg_start_min)
	        seg_start_stddev= get_stddev(seg_start)
		row.append(seg_start_stddev)
	        seg_start_count = get_count(seg_start)
		row.append(seg_start_count)
	        seg_start_sum = get_sum(seg_start)
		row.append(seg_start_sum)
		for i in row_seg_padding:	#appending blank spaces at the end of the row
			row.append(i)
                
		writer.writerow(row)
		row=[]
		row=gral_info[:]

	    #----------segments pitch and timbre---------------"
	    row_seg2_padding=padding(14)	#blank spaces left at the end of the row
	    row_front=padding(77)		#blank spaces left at the front of the segments and timbre
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    transpose_pitch= seg_pitch.transpose()          #this is to tranpose the matrix,so we can have 12 rows
	    group_index=4
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(transpose_pitch[0]):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of segments and timbre
			row.append(index)
	   
		row.append(transpose_pitch[0][i])
  		seg_pitch_avg= get_avg(transpose_pitch[0])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[0])	
		row.append(seg_pitch_max)
		seg_pitch_min = get_min(transpose_pitch[0])
		row.append(seg_pitch_min)
		seg_pitch_stddev= get_stddev(transpose_pitch[0])
		row.append(seg_pitch_stddev)
		seg_pitch_count = get_count(transpose_pitch[0])
		row.append(seg_pitch_count)
		seg_pitch_sum = get_sum(transpose_pitch[0])
		row.append(seg_pitch_sum)   
 		row.append(transpose_pitch[1][i])
 		seg_pitch_avg= get_avg(transpose_pitch[1])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[1])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[1])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[1])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[1])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[1])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[2][i])
 		seg_pitch_avg= get_avg(transpose_pitch[2])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[2])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[2])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[2])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[2])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[2])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[3][i])
 		seg_pitch_avg= get_avg(transpose_pitch[3])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[3])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[3])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[3])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[3])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[3])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[4][i])
 		seg_pitch_avg= get_avg(transpose_pitch[4])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[4])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[4])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[4])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[4])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[4])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[5][i])
 		seg_pitch_avg= get_avg(transpose_pitch[5])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[5])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[5])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[5])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[5])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[5])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[6][i])
 		seg_pitch_avg= get_avg(transpose_pitch[6])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[6])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[6])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[6])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[6])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[6])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[7][i])
 		seg_pitch_avg= get_avg(transpose_pitch[7])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[7])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[7])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[7])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[7])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[7])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[8][i])
 		seg_pitch_avg= get_avg(transpose_pitch[8])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[8])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[8])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[8])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[8])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[8])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[9][i])
 		seg_pitch_avg= get_avg(transpose_pitch[9])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[9])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[9])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[9])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[9])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[9])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[10][i])
 		seg_pitch_avg= get_avg(transpose_pitch[10])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[10])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[10])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[10])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[10])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[10])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[11][i])
 		seg_pitch_avg= get_avg(transpose_pitch[11])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[11])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[11])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[11])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[11])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[11])
		row.append(seg_pitch_sum)   
		#timbre arrays
	        seg_timbre = hdf5_getters.get_segments_timbre(h5)
                transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
		row.append(transpose_timbre[0][i])
  		seg_timbre_avg= get_avg(transpose_timbre[0])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[0])	
		row.append(seg_timbre_max)
		seg_timbre_min = get_min(transpose_timbre[0])
		row.append(seg_timbre_min)
		seg_timbre_stddev=get_stddev(transpose_timbre[0])
		row.append(seg_timbre_stddev)
		seg_timbre_count = get_count(transpose_timbre[0])
		row.append(seg_timbre_count)
		seg_timbre_sum = get_sum(transpose_timbre[0])
		row.append(seg_timbre_sum)   
 		row.append(transpose_timbre[1][i])
 		seg_timbre_avg= get_avg(transpose_timbre[1])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[1])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[1])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[1])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[1])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[1])
		row.append(seg_timbre_sum)   
		row.append(transpose_timbre[2][i])
 		seg_timbre_avg= get_avg(transpose_timbre[2])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[2])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[2])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[2])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[2])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[2])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[3][i])
 		seg_timbre_avg= get_avg(transpose_timbre[3])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[3])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[3])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[3])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[3])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[3])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[4][i])
 		seg_timbre_avg= get_avg(transpose_timbre[4])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[4])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[4])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[4])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[4])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[4])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[5][i])
 		seg_timbre_avg= get_avg(transpose_timbre[5])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[5])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[5])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[5])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[5])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[5])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[6][i])
 		seg_timbre_avg= get_avg(transpose_timbre[6])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[6])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[6])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[6])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[6])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[6])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[7][i])
 		seg_timbre_avg= get_avg(transpose_timbre[7])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[7])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[7])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[7])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[7])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[7])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[8][i])
 		seg_timbre_avg= get_avg(transpose_timbre[8])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[8])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[8])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[8])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[8])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[8])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[9][i])
 		seg_timbre_avg= get_avg(transpose_timbre[9])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[9])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[9])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[9])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[9])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[9])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[10][i])
 		seg_timbre_avg= get_avg(transpose_timbre[10])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[10])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[10])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[10])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[10])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[10])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[11][i])
 		seg_timbre_avg= get_avg(transpose_timbre[11])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[11])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[11])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[11])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[11])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[11])
		row.append(seg_timbre_sum)
	        for item in row_seg2_padding:
			row.append(item)
		writer.writerow(row)
		row=[]
		row=gral_info[:]


            # "--------tatums---------------"
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    group_index=5
	    row_front=padding(245)	#blank spaces left in front of tatums
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(tatms_c):
		row.append(group_index)
		row.append(i)
		for item in row_front:	#appending blank spaces at the front of the row
			row.append(item)

		row.append(tatms_c[i])
		tatms_c_avg= get_avg(tatms_c)
		row.append(tatms_c_avg)
	 	tatms_c_max= get_max(tatms_c)
		row.append(tatms_c_max)
	        tatms_c_min = get_min(tatms_c)
		row.append(tatms_c_min)
	        tatms_c_stddev= get_stddev(tatms_c)
		row.append(tatms_c_stddev)
                tatms_c_count = get_count(tatms_c)
		row.append(tatms_c_count)
                tatms_c_sum = get_sum(tatms_c)
		row.append(tatms_c_sum)
                tatms_start = hdf5_getters.get_tatums_start(h5)
		row.append(tatms_start[i])
	        tatms_start_avg= get_avg(tatms_start)
		row.append(tatms_start_avg)
	        tatms_start_max= get_max(tatms_start)
		row.append(tatms_start_max)
	        tatms_start_min = get_min(tatms_start)
		row.append(tatms_start_min)
	        tatms_start_stddev= get_stddev(tatms_start)
		row.append(tatms_start_stddev)
	        tatms_start_count = get_count(tatms_start)
		row.append(tatms_start_count)
	        tatms_start_sum = get_sum(tatms_start)				   
		row.append(tatms_start_sum)
		writer.writerow(row)
		row=[]
		row=gral_info[:]


 
	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1








	    h5.close()
	    count=count+1;
	    print count;
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'track_id'.lower():
                    csvRowString += 'track_id'
                elif attribute == 'artist_familiarity'.lower():
                    csvRowString += 'artist_familiarity'
                elif attribute == 'artist_hotttnesss'.lower():
                    csvRowString += 'artist_hotttnesss'
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += 'artist_mbid'
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += 'artist_playmeid'
                elif attribute == 'artist_7digitalid'.lower():
                    csvRowString += 'artist_7digitalid'
                elif attribute == 'release'.lower():
                    csvRowString += 'release'
                elif attribute == 'release_7digitalid'.lower():
                    csvRowString += 'release_7digitalid'
                elif attribute == 'song_hotttnesss'.lower():
                    csvRowString += 'song_hotttnesss'
                elif attribute == 'track_7digitalid'.lower():
                    csvRowString += 'track_7digitalid'
                elif attribute == 'analysis_sample_rate'.lower():
                    csvRowString += 'analysis_sample_rate'
                elif attribute == 'audio_md5'.lower():
                    csvRowString += 'audio_md5'
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += 'end_of_fade_in'
                elif attribute == 'energy'.lower():
                    csvRowString += 'energy'
                elif attribute == 'key'.lower():
                    csvRowString += 'key'
                elif attribute == 'key_confidence'.lower():
                    csvRowString += 'key_confidence'
                elif attribute == 'loudness'.lower():
                    csvRowString += 'loudness'
                elif attribute == 'mode'.lower():
                    csvRowString += 'mode'
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += 'mode_confidence'
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += 'start_of_fade_out'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = (
            "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"
            +
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature," +
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"
            +
            "Title,Year,track_id,artist_hotttnesss,artist_mbid,artist_playmeid,artist_7digitalid,"
            +
            "release,release_7digitalid,song_hotttnesss,track_7digitalid,analysis_sample_rate,audio_md5,"
            +
            "end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out"
        )
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,")
        outputFile1.write(csvRowString + "\n")
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/vagrant/genrepython/MillionSongSubset"  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.track_id = str(hdf5_getters.get_track_id(songH5File))
            song.artist_familiarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artist_hotttnesss = str(
                hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.artist_playmeid = str(
                hdf5_getters.get_artist_playmeid(songH5File))
            song.artist_7digitalid = str(
                hdf5_getters.get_artist_7digitalid(songH5File))
            song.release = str(hdf5_getters.get_release(songH5File))
            song.release_7digitalid = str(
                hdf5_getters.get_release_7digitalid(songH5File))
            song.song_hotttnesss = str(
                hdf5_getters.get_song_hotttnesss(songH5File))
            song.track_7digitalid = str(
                hdf5_getters.get_track_7digitalid(songH5File))
            song.analysis_sample_rate = str(
                hdf5_getters.get_analysis_sample_rate(songH5File))
            song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File))
            song.end_of_fade_in = str(
                hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.key = str(hdf5_getters.get_key(songH5File))
            song.key_confidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_confidence = str(
                hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(
                hdf5_getters.get_start_of_fade_out(songH5File))

            #print song count
            csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',', "")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',', '')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'track_id'.lower():
                    csvRowString += song.track_id
                elif attribute == 'artist_familiarity'.lower():
                    csvRowString += song.artist_familiarity
                elif attribute == 'artist_hotttnesss'.lower():
                    csvRowString += song.artist_hotttnesss
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += song.artist_mbid
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digitalid'.lower():
                    csvRowString += song.artist_7digitalid
                elif attribute == 'release'.lower():
                    csvRowString += song.release
                elif attribute == 'release_7digitalid'.lower():
                    csvRowString += song.release_7digitalid
                elif attribute == 'song_hotttnesss'.lower():
                    csvRowString += song.song_hotttnesss
                elif attribute == 'track_7digitalid'.lower():
                    csvRowString += song.track_7digitalid
                elif attribute == 'analysis_sample_rate'.lower():
                    csvRowString += song.analysis_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += song.audio_md5
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'key'.lower():
                    csvRowString += song.key
                elif attribute == 'key_confidence'.lower():
                    csvRowString += song.key_confidence
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += song.mode_confidence
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
Ejemplo n.º 6
0
# imports specific to the MSD
import hdf5_getters as GETTERS

cnt = 0
loops = 0

for alpha in string.ascii_uppercase:
    for root, dirs, files in os.walk('/mnt/million-songs/data/' + alpha):
        files = glob.glob(os.path.join(root, '*' + '.h5'))
        for f in files:
            h5 = GETTERS.open_h5_file_read(f)
            num_songs = GETTERS.get_num_songs(h5)
            print f, num_songs

            for i in range(num_songs):
                analysis_sample_rate = GETTERS.get_analysis_sample_rate(h5, i)
                artist_7digitalid = GETTERS.get_artist_7digitalid(h5, i)
                artist_familiarity = GETTERS.get_artist_familiarity(h5, i)
                artist_hotttnesss = GETTERS.get_artist_hotttnesss(h5, i)
                artist_id = GETTERS.get_artist_id(h5, i)
                artist_latitude = GETTERS.get_artist_latitude(h5, i)
                artist_location = GETTERS.get_artist_location(h5, i)
                artist_longitude = GETTERS.get_artist_longitude(h5, i)
                artist_mbid = GETTERS.get_artist_mbid(h5, i)
                artist_mbtags = ','.join(
                    str(e) for e in GETTERS.get_artist_mbtags(h5, i))  # array
                artist_mbtags_count = ','.join(
                    str(e)
                    for e in GETTERS.get_artist_mbtags_count(h5, i))  # array
                artist_name = GETTERS.get_artist_name(h5, i)
                artist_playmeid = GETTERS.get_artist_playmeid(h5, i)
def complete_hd5_to_csv(basedir):
    ext = '.h5'  # Get all files with extension .h5

    # Header title. Essentially it is a schema for all the following songs
    header = [
        'Title', 'Artist familiarity', 'Artist hotness', 'Artist ID',
        'Artist mbID', 'Artist playmeid', 'Artist 7DigitalID',
        'Artist latitude', 'Artist longitude', 'Artist location',
        'Artist Name', 'Release', 'Release 7DigitalID', 'Song ID',
        'Song Hotness', 'Track 7Digital', 'Analysis sample rate', 'Audio md5',
        'Danceability', 'Duration', 'End of Fade', 'Energy', 'Key',
        'Key Confidence', 'Loudness', 'Mode', 'Mode Confidence',
        'Start of fade out', 'Tempo', 'Time signature',
        'Time signature confidence', 'Track ID', 'Year'
    ]

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the header line. This line contains the schema of the data
        csv_writer.writerow(header)

        # Read all files from the given directories
        for root, dirs, files in os.walk(basedir):
            files = glob.glob(os.path.join(root, '*' + ext))
            print(files)

            for f in files:
                h5 = hdf5_getters.open_h5_file_read(f)

                # Write as row all elements. NOTE: Only the serialized elements are parsed and not arrays
                csv_writer.writerow([
                    hdf5_getters.get_title(h5),
                    hdf5_getters.get_artist_familiarity(h5),
                    hdf5_getters.get_artist_hotttnesss(h5),
                    hdf5_getters.get_artist_id(h5),
                    hdf5_getters.get_artist_mbid(h5),
                    hdf5_getters.get_artist_playmeid(h5),
                    hdf5_getters.get_artist_7digitalid(h5),
                    hdf5_getters.get_artist_latitude(h5),
                    hdf5_getters.get_artist_longitude(h5),
                    hdf5_getters.get_artist_location(h5),
                    hdf5_getters.get_artist_name(h5),
                    hdf5_getters.get_release(h5),
                    hdf5_getters.get_release_7digitalid(h5),
                    hdf5_getters.get_song_id(h5),
                    hdf5_getters.get_song_hotttnesss(h5),
                    hdf5_getters.get_track_7digitalid(h5),
                    hdf5_getters.get_analysis_sample_rate(h5),
                    hdf5_getters.get_audio_md5(h5),
                    hdf5_getters.get_danceability(h5),
                    hdf5_getters.get_duration(h5),
                    hdf5_getters.get_end_of_fade_in(h5),
                    hdf5_getters.get_energy(h5),
                    hdf5_getters.get_key(h5),
                    hdf5_getters.get_key_confidence(h5),
                    hdf5_getters.get_loudness(h5),
                    hdf5_getters.get_mode(h5),
                    hdf5_getters.get_mode_confidence(h5),
                    hdf5_getters.get_start_of_fade_out(h5),
                    hdf5_getters.get_tempo(h5),
                    hdf5_getters.get_time_signature(h5),
                    hdf5_getters.get_time_signature_confidence(h5),
                    hdf5_getters.get_track_id(h5),
                    hdf5_getters.get_year(h5)
                ])

                # For debugging purposes. Everything as expected
                # print()
                # print("Num of songs -- ", hdf5_getters.get_num_songs(h5))  # One song per file
                # print("Title -- ", hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
                # print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
                # print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
                # print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
                # print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
                # print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
                # print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
                # print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
                # print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
                # print("Artist location -- ", hdf5_getters.get_artist_location(h5))
                # print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
                # print("Release -- ", hdf5_getters.get_release(h5))
                # print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
                # print("Song ID -- ", hdf5_getters.get_song_id(h5))
                # print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
                # print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
                # print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5))
                # print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
                # print("Danceability -- ", hdf5_getters.get_danceability(h5))
                # print("Duration -- ", hdf5_getters.get_duration(h5))
                # print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
                # print("Energy -- ", hdf5_getters.get_energy(h5))
                # print("Key -- ", hdf5_getters.get_key(h5))
                # print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
                # print("Loudness -- ", hdf5_getters.get_loudness(h5))
                # print("Mode -- ", hdf5_getters.get_mode(h5))
                # print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
                # print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
                # print("Tempo -- ", hdf5_getters.get_tempo(h5))
                # print("Time signature -- ", hdf5_getters.get_time_signature(h5))
                # print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5))
                # print("Track ID -- ", hdf5_getters.get_track_id(h5))
                # # print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
                # # print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
                # print("Year -- ", hdf5_getters.get_year(h5))

                h5.close()
def data_to_flat_file(basedir, ext='.h5'):
    """This function extract the information from the tables and creates the flat file."""
    count = 0
    #song counter
    list_to_write = []
    row_to_write = ""
    writer = csv.writer(open("metadata_wholeA.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f  #the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
            title = hdf5_getters.get_title(h5)
            title = title.replace('"', '')
            comma = title.find(',')  #eliminating commas in the title
            if comma != -1:
                print title
                time.sleep(1)
            album = hdf5_getters.get_release(h5)
            album = album.replace('"', '')  #eliminating commas in the album
            comma = album.find(',')
            if comma != -1:
                print album
                time.sleep(1)
            artist_name = hdf5_getters.get_artist_name(h5)
            comma = artist_name.find(',')
            if comma != -1:
                print artist_name
                time.sleep(1)
            artist_name = artist_name.replace('"',
                                              '')  #eliminating double quotes
            duration = hdf5_getters.get_duration(h5)
            samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            artist_fam = hdf5_getters.get_artist_familiarity(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_fam) == True:
                artist_fam = -1
            artist_hotness = hdf5_getters.get_artist_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_hotness) == True:
                artist_hotness = -1
            artist_id = hdf5_getters.get_artist_id(h5)
            artist_lat = hdf5_getters.get_artist_latitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lat) == True:
                artist_lat = -1
            artist_loc = hdf5_getters.get_artist_location(h5)
            #checks artist_loc to see if it is a hyperlink if it is set as empty string
            artist_loc = artist_loc.replace(",", "\,")
            if artist_loc.startswith("<a"):
                artist_loc = ""
            if len(artist_loc) > 100:
                artist_loc = ""
            artist_lon = hdf5_getters.get_artist_longitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lon) == True:
                artist_lon = -1
            artist_mbid = hdf5_getters.get_artist_mbid(h5)
            artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            audio_md5 = hdf5_getters.get_audio_md5(h5)
            danceability = hdf5_getters.get_danceability(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(danceability) == True:
                danceability = -1
            end_fade_in = hdf5_getters.get_end_of_fade_in(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(end_fade_in) == True:
                end_fade_in = -1
            energy = hdf5_getters.get_energy(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(energy) == True:
                energy = -1
            song_key = hdf5_getters.get_key(h5)
            key_c = hdf5_getters.get_key_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(key_c) == True:
                key_c = -1
            loudness = hdf5_getters.get_loudness(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(loudness) == True:
                loudness = -1
            mode = hdf5_getters.get_mode(h5)
            mode_conf = hdf5_getters.get_mode_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(mode_conf) == True:
                mode_conf = -1
            release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            song_hot = hdf5_getters.get_song_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(song_hot) == True:
                song_hot = -1
            song_id = hdf5_getters.get_song_id(h5)
            start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            tempo = hdf5_getters.get_tempo(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(tempo) == True:
                tempo = -1
            time_sig = hdf5_getters.get_time_signature(h5)
            time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(time_sig_c) == True:
                time_sig_c = -1
            track_id = hdf5_getters.get_track_id(h5)
            track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            year = hdf5_getters.get_year(h5)
            bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_c_avg = get_avg(bars_c)
            bars_c_max = get_max(bars_c)
            bars_c_min = get_min(bars_c)
            bars_c_stddev = get_stddev(bars_c)
            bars_c_count = get_count(bars_c)
            bars_c_sum = get_sum(bars_c)
            bars_start = hdf5_getters.get_bars_start(h5)
            bars_start_avg = get_avg(bars_start)
            bars_start_max = get_max(bars_start)
            bars_start_min = get_min(bars_start)
            bars_start_stddev = get_stddev(bars_start)
            bars_start_count = get_count(bars_start)
            bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg = get_avg(beats_c)
            beats_c_max = get_max(beats_c)
            beats_c_min = get_min(beats_c)
            beats_c_stddev = get_stddev(beats_c)
            beats_c_count = get_count(beats_c)
            beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
            beats_start_avg = get_avg(beats_start)
            beats_start_max = get_max(beats_start)
            beats_start_min = get_min(beats_start)
            beats_start_stddev = get_stddev(beats_start)
            beats_start_count = get_count(beats_start)
            beats_start_sum = get_sum(beats_start)
            sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg = get_avg(sec_c)
            sec_c_max = get_max(sec_c)
            sec_c_min = get_min(sec_c)
            sec_c_stddev = get_stddev(sec_c)
            sec_c_count = get_count(sec_c)
            sec_c_sum = get_sum(sec_c)
            sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
            sec_start_max = get_max(sec_start)
            sec_start_min = get_min(sec_start)
            sec_start_stddev = get_stddev(sec_start)
            sec_start_count = get_count(sec_start)
            sec_start_sum = get_sum(sec_start)
            seg_c = hdf5_getters.get_segments_confidence(h5)
            seg_c_avg = get_avg(seg_c)
            seg_c_max = get_max(seg_c)
            seg_c_min = get_min(seg_c)
            seg_c_stddev = get_stddev(seg_c)
            seg_c_count = get_count(seg_c)
            seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg = get_avg(seg_loud_max)
            seg_loud_max_max = get_max(seg_loud_max)
            seg_loud_max_min = get_min(seg_loud_max)
            seg_loud_max_stddev = get_stddev(seg_loud_max)
            seg_loud_max_count = get_count(seg_loud_max)
            seg_loud_max_sum = get_sum(seg_loud_max)
            seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
            seg_loud_max_time_avg = get_avg(seg_loud_max_time)
            seg_loud_max_time_max = get_max(seg_loud_max_time)
            seg_loud_max_time_min = get_min(seg_loud_max_time)
            seg_loud_max_time_stddev = get_stddev(seg_loud_max_time)
            seg_loud_max_time_count = get_count(seg_loud_max_time)
            seg_loud_max_time_sum = get_sum(seg_loud_max_time)
            seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
            seg_loud_start_avg = get_avg(seg_loud_start)
            seg_loud_start_max = get_max(seg_loud_start)
            seg_loud_start_min = get_min(seg_loud_start)
            seg_loud_start_stddev = get_stddev(seg_loud_start)
            seg_loud_start_count = get_count(seg_loud_start)
            seg_loud_start_sum = get_sum(seg_loud_start)
            seg_pitch = hdf5_getters.get_segments_pitches(h5)
            pitch_size = len(seg_pitch)
            seg_start = hdf5_getters.get_segments_start(h5)
            seg_start_avg = get_avg(seg_start)
            seg_start_max = get_max(seg_start)
            seg_start_min = get_min(seg_start)
            seg_start_stddev = get_stddev(seg_start)
            seg_start_count = get_count(seg_start)
            seg_start_sum = get_sum(seg_start)
            seg_timbre = hdf5_getters.get_segments_timbre(h5)
            tatms_c = hdf5_getters.get_tatums_confidence(h5)
            tatms_c_avg = get_avg(tatms_c)
            tatms_c_max = get_max(tatms_c)
            tatms_c_min = get_min(tatms_c)
            tatms_c_stddev = get_stddev(tatms_c)
            tatms_c_count = get_count(tatms_c)
            tatms_c_sum = get_sum(tatms_c)
            tatms_start = hdf5_getters.get_tatums_start(h5)
            tatms_start_avg = get_avg(tatms_start)
            tatms_start_max = get_max(tatms_start)
            tatms_start_min = get_min(tatms_start)
            tatms_start_stddev = get_stddev(tatms_start)
            tatms_start_count = get_count(tatms_start)
            tatms_start_sum = get_sum(tatms_start)

            #Getting the genres
            genre_set = 0  #flag to see if the genre has been set or not
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
            trn_wght = hdf5_getters.get_artist_terms_weight(h5)
            a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
            genre_indexes = get_genre_indexes(
                trm_freq)  #index of the highest freq
            final_genre = []
            genres_so_far = []
            for i in range(len(genre_indexes)):
                genre_tmp = get_genre(
                    art_trm, genre_indexes[i]
                )  #genre that corresponds to the highest freq
                genres_so_far = genre_dict.get_genre_in_dict(
                    genre_tmp)  #getting the genre from the dictionary
                if len(genres_so_far) != 0:
                    for i in genres_so_far:
                        final_genre.append(i)
                        genre_set = 1  #genre was found in dictionary

            if genre_set == 1:
                col_num = []

                for genre in final_genre:
                    column = int(
                        genre)  #getting the column number of the genre
                    col_num.append(column)

                genre_array = genre_columns(col_num)  #genre array
            else:
                genre_array = genre_columns(
                    -1)  #the genre was not found in the dictionary

            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_pitch_avg = []
            seg_pitch_max = []
            seg_pitch_min = []
            seg_pitch_stddev = []
            seg_pitch_count = []
            seg_pitch_sum = []
            i = 0
            #Getting the aggregate values in the pitches array
            for row in transpose_pitch:
                seg_pitch_avg.append(get_avg(row))
                seg_pitch_max.append(get_max(row))
                seg_pitch_min.append(get_min(row))
                seg_pitch_stddev.append(get_stddev(row))
                seg_pitch_count.append(get_count(row))
                seg_pitch_sum.append(get_sum(row))
                i = i + 1

            #extracting information from the timbre array
            transpose_timbre = seg_pitch.transpose(
            )  #tranposing matrix, to have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_timbre_avg = []
            seg_timbre_max = []
            seg_timbre_min = []
            seg_timbre_stddev = []
            seg_timbre_count = []
            seg_timbre_sum = []
            i = 0
            for row in transpose_timbre:
                seg_timbre_avg.append(get_avg(row))
                seg_timbre_max.append(get_max(row))
                seg_timbre_min.append(get_min(row))
                seg_timbre_stddev.append(get_stddev(row))
                seg_timbre_count.append(get_count(row))
                seg_timbre_sum.append(get_sum(row))
                i = i + 1

        #Writing to the flat file
            writer.writerow([
                title, album, artist_name, year, duration, seg_start_count,
                tempo
            ])

            h5.close()
            count = count + 1
            print count
            temp = hdf5_getters.get_artist_terms(songH5File)
            song.artistTerms = remove_trap_characters(str(list(temp)))
            song.artistTermsCount = get_list_length(temp)
            song.artistTermsFreq = remove_trap_characters(
                str(list(hdf5_getters.get_artist_terms_freq(songH5File))))
            song.artistTermsWeight = remove_trap_characters(
                str(list(hdf5_getters.get_artist_terms_weight(songH5File))))

            temp = hdf5_getters.get_artist_mbtags(songH5File)
            song.artistMBTags = remove_trap_characters(str(list(temp)))
            song.artistMBTagsOuterCount = get_list_length(temp)
            song.artistMBTagsCount = remove_trap_characters(
                str(list(hdf5_getters.get_artist_mbtags_count(songH5File))))
            song.analysisSampleRate = remove_trap_characters(
                str(hdf5_getters.get_analysis_sample_rate(songH5File)))
            song.audioMD5 = remove_trap_characters(
                str(hdf5_getters.get_audio_md5(songH5File)))
            song.endOfFadeIn = remove_trap_characters(
                str(hdf5_getters.get_end_of_fade_in(songH5File)))
            song.startOfFadeOut = remove_trap_characters(
                str(hdf5_getters.get_start_of_fade_out(songH5File)))
            song.energy = remove_trap_characters(
                str(hdf5_getters.get_energy(songH5File)))
            song.release = remove_trap_characters(
                str(hdf5_getters.get_release(songH5File)))
            song.release7digitalid = remove_trap_characters(
                str(hdf5_getters.get_release_7digitalid(songH5File)))
            song.songHotness = remove_trap_characters(
                str(hdf5_getters.get_song_hotttnesss(songH5File)))
            song.track7digitalid = remove_trap_characters(
Ejemplo n.º 10
0
def data_to_flat_file(basedir, ext='.h5'):
    """ This function extracts the information from the tables and creates the flat file. """
    count = 0
    #song counter
    list_to_write = []
    group_index = 0
    row_to_write = ""
    writer = csv.writer(open("complete.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            row = []
            print f
            h5 = hdf5_getters.open_h5_file_read(f)
            title = hdf5_getters.get_title(h5)
            title = title.replace('"', '')
            row.append(title)
            comma = title.find(',')
            if comma != -1:
                print title
                time.sleep(1)
            album = hdf5_getters.get_release(h5)
            album = album.replace('"', '')
            row.append(album)
            comma = album.find(',')
            if comma != -1:
                print album
                time.sleep(1)
            artist_name = hdf5_getters.get_artist_name(h5)
            comma = artist_name.find(',')
            if comma != -1:
                print artist_name
                time.sleep(1)
            artist_name = artist_name.replace('"', '')
            row.append(artist_name)
            duration = hdf5_getters.get_duration(h5)
            row.append(duration)
            samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            row.append(samp_rt)
            artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            row.append(artist_7digitalid)
            artist_fam = hdf5_getters.get_artist_familiarity(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_fam) == True:
                artist_fam = -1
            row.append(artist_fam)
            artist_hotness = hdf5_getters.get_artist_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_hotness) == True:
                artist_hotness = -1
            row.append(artist_hotness)
            artist_id = hdf5_getters.get_artist_id(h5)
            row.append(artist_id)
            artist_lat = hdf5_getters.get_artist_latitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lat) == True:
                artist_lat = -1
            row.append(artist_lat)
            artist_loc = hdf5_getters.get_artist_location(h5)
            row.append(artist_loc)
            artist_lon = hdf5_getters.get_artist_longitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lon) == True:
                artist_lon = -1
            row.append(artist_lon)
            artist_mbid = hdf5_getters.get_artist_mbid(h5)
            row.append(artist_mbid)

            #Getting the genre
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
            trn_wght = hdf5_getters.get_artist_terms_weight(h5)
            a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
            genre_indexes = get_genre_indexes(
                trm_freq)  #index of the highest freq
            genre_set = 0  #flag to see if the genre has been set or not
            final_genre = []
            genres_so_far = []
            for i in range(len(genre_indexes)):
                genre_tmp = get_genre(
                    art_trm, genre_indexes[i]
                )  #genre that corresponds to the highest freq
                genres_so_far = genre_dict.get_genre_in_dict(
                    genre_tmp)  #getting the genre from the dictionary
                if len(genres_so_far) != 0:
                    for i in genres_so_far:
                        final_genre.append(i)
                        genre_set = 1

            if genre_set == 1:
                col_num = []
                for i in final_genre:
                    column = int(i)  #getting the column number of the genre
                    col_num.append(column)

                genre_array = genre_columns(col_num)  #genre array
                for i in range(len(
                        genre_array)):  #appending the genre_array to the row
                    row.append(genre_array[i])
            else:
                genre_array = genre_columns(
                    -1
                )  #when there is no genre matched, return an array of [0...0]
                for i in range(len(
                        genre_array)):  #appending the genre_array to the row
                    row.append(genre_array[i])

            artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            row.append(artist_pmid)
            audio_md5 = hdf5_getters.get_audio_md5(h5)
            row.append(audio_md5)
            danceability = hdf5_getters.get_danceability(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(danceability) == True:
                danceability = -1
            row.append(danceability)
            end_fade_in = hdf5_getters.get_end_of_fade_in(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(end_fade_in) == True:
                end_fade_in = -1
            row.append(end_fade_in)
            energy = hdf5_getters.get_energy(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(energy) == True:
                energy = -1
            row.append(energy)
            song_key = hdf5_getters.get_key(h5)
            row.append(song_key)
            key_c = hdf5_getters.get_key_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(key_c) == True:
                key_c = -1
            row.append(key_c)
            loudness = hdf5_getters.get_loudness(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(loudness) == True:
                loudness = -1
            row.append(loudness)
            mode = hdf5_getters.get_mode(h5)
            row.append(mode)
            mode_conf = hdf5_getters.get_mode_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(mode_conf) == True:
                mode_conf = -1
            row.append(mode_conf)
            release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            row.append(release_7digitalid)
            song_hot = hdf5_getters.get_song_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(song_hot) == True:
                song_hot = -1
            row.append(song_hot)
            song_id = hdf5_getters.get_song_id(h5)
            row.append(song_id)
            start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            row.append(start_fade_out)
            tempo = hdf5_getters.get_tempo(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(tempo) == True:
                tempo = -1
            row.append(tempo)
            time_sig = hdf5_getters.get_time_signature(h5)
            row.append(time_sig)
            time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(time_sig_c) == True:
                time_sig_c = -1
            row.append(time_sig_c)
            track_id = hdf5_getters.get_track_id(h5)
            row.append(track_id)
            track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            row.append(track_7digitalid)
            year = hdf5_getters.get_year(h5)
            row.append(year)
            bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_start = hdf5_getters.get_bars_start(h5)
            row_bars_padding = padding(
                245
            )  #this is the array that will be attached at the end of th row

            #--------------bars---------------"
            gral_info = []
            gral_info = row[:]
            empty = []
            for i, item in enumerate(bars_c):
                row.append(group_index)
                row.append(i)
                row.append(bars_c[i])
                bars_c_avg = get_avg(bars_c)
                row.append(bars_c_avg)
                bars_c_max = get_max(bars_c)
                row.append(bars_c_max)
                bars_c_min = get_min(bars_c)
                row.append(bars_c_min)
                bars_c_stddev = get_stddev(bars_c)
                row.append(bars_c_stddev)
                bars_c_count = get_count(bars_c)
                row.append(bars_c_count)
                bars_c_sum = get_sum(bars_c)
                row.append(bars_c_sum)
                row.append(bars_start[i])
                bars_start_avg = get_avg(bars_start)
                row.append(bars_start_avg)
                bars_start_max = get_max(bars_start)
                row.append(bars_start_max)
                bars_start_min = get_min(bars_start)
                row.append(bars_start_min)
                bars_start_stddev = get_stddev(bars_start)
                row.append(bars_start_stddev)
                bars_start_count = get_count(bars_start)
                row.append(bars_start_count)
                bars_start_sum = get_sum(bars_start)
                row.append(bars_start_sum)
                for i in row_bars_padding:
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    #--------beats---------------"
            beats_c = hdf5_getters.get_beats_confidence(h5)
            group_index = 1
            row = []
            row = gral_info[:]
            row_front = padding(
                14)  #blanks left in front of the row(empty spaces for bars)
            row_beats_padding = padding(231)
            for i, item in enumerate(beats_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the beats
                    row.append(index)

                row.append(beats_c[i])
                beats_c_avg = get_avg(beats_c)
                row.append(beats_c_avg)
                beats_c_max = get_max(beats_c)
                row.append(beats_c_max)
                beats_c_min = get_min(beats_c)
                row.append(beats_c_min)
                beats_c_stddev = get_stddev(beats_c)
                row.append(beats_c_stddev)
                beats_c_count = get_count(beats_c)
                row.append(beats_c_count)
                beats_c_sum = get_sum(beats_c)
                row.append(beats_c_sum)
                beats_start = hdf5_getters.get_beats_start(h5)
                row.append(beats_start[i])
                beats_start_avg = get_avg(beats_start)
                row.append(beats_start_avg)
                beats_start_max = get_max(beats_start)
                row.append(beats_start_max)
                beats_start_min = get_min(beats_start)
                row.append(beats_start_min)
                beats_start_stddev = get_stddev(beats_start)
                row.append(beats_start_stddev)
                beats_start_count = get_count(beats_start)
                row.append(beats_start_count)
                beats_start_sum = get_sum(beats_start)
                row.append(beats_start_sum)
                for i in row_beats_padding:
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    # "--------sections---------------"
            row_sec_padding = padding(
                217)  #blank spaces left at the end of the row
            sec_c = hdf5_getters.get_sections_confidence(h5)
            group_index = 2
            row = []
            row = gral_info[:]
            row_front = padding(
                28)  #blank spaces left in front(empty spaces for bars,beats)
            for i, item in enumerate(sec_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the sections
                    row.append(index)

                row.append(sec_c[i])
                sec_c_avg = get_avg(sec_c)
                row.append(sec_c_avg)
                sec_c_max = get_max(sec_c)
                row.append(sec_c_max)
                sec_c_min = get_min(sec_c)
                row.append(sec_c_min)
                sec_c_stddev = get_stddev(sec_c)
                row.append(sec_c_stddev)
                sec_c_count = get_count(sec_c)
                row.append(sec_c_count)
                sec_c_sum = get_sum(sec_c)
                row.append(sec_c_sum)
                sec_start = hdf5_getters.get_sections_start(h5)
                row.append(sec_start[i])
                sec_start_avg = get_avg(sec_start)
                row.append(sec_start_avg)
                sec_start_max = get_max(sec_start)
                row.append(sec_start_max)
                sec_start_min = get_min(sec_start)
                row.append(sec_start_min)
                sec_start_stddev = get_stddev(sec_start)
                row.append(sec_start_stddev)
                sec_start_count = get_count(sec_start)
                row.append(sec_start_count)
                sec_start_sum = get_sum(sec_start)
                row.append(sec_start_sum)
                for i in row_sec_padding:  #appending the blank spaces at the end of the row
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    #--------segments---------------"
            row_seg_padding = padding(182)  #blank spaces at the end of the row
            row_front = padding(42)  #blank spaces left in front of segments
            seg_c = hdf5_getters.get_segments_confidence(h5)
            group_index = 3
            row = []
            row = gral_info[:]
            for i, item in enumerate(seg_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the segments
                    row.append(index)

                row.append(seg_c[i])
                seg_c_avg = get_avg(seg_c)
                row.append(seg_c_avg)
                seg_c_max = get_max(seg_c)
                row.append(seg_c_max)
                seg_c_min = get_min(seg_c)
                row.append(seg_c_min)
                seg_c_stddev = get_stddev(seg_c)
                row.append(seg_c_stddev)
                seg_c_count = get_count(seg_c)
                row.append(seg_c_count)
                seg_c_sum = get_sum(seg_c)
                row.append(seg_c_sum)
                seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
                row.append(seg_loud_max[i])
                seg_loud_max_avg = get_avg(seg_loud_max)
                row.append(seg_loud_max_avg)
                seg_loud_max_max = get_max(seg_loud_max)
                row.append(seg_loud_max_max)
                seg_loud_max_min = get_min(seg_loud_max)
                row.append(seg_loud_max_min)
                seg_loud_max_stddev = get_stddev(seg_loud_max)
                row.append(seg_loud_max_stddev)
                seg_loud_max_count = get_count(seg_loud_max)
                row.append(seg_loud_max_count)
                seg_loud_max_sum = get_sum(seg_loud_max)
                row.append(seg_loud_max_sum)
                seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(
                    h5)
                row.append(seg_loud_max_time[i])
                seg_loud_max_time_avg = get_avg(seg_loud_max_time)
                row.append(seg_loud_max_time_avg)
                seg_loud_max_time_max = get_max(seg_loud_max_time)
                row.append(seg_loud_max_time_max)
                seg_loud_max_time_min = get_min(seg_loud_max_time)
                row.append(seg_loud_max_time_min)
                seg_loud_max_time_stddev = get_stddev(seg_loud_max_time)
                row.append(seg_loud_max_time_stddev)
                seg_loud_max_time_count = get_count(seg_loud_max_time)
                row.append(seg_loud_max_time_count)
                seg_loud_max_time_sum = get_sum(seg_loud_max_time)
                row.append(seg_loud_max_time_sum)
                seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
                row.append(seg_loud_start[i])
                seg_loud_start_avg = get_avg(seg_loud_start)
                row.append(seg_loud_start_avg)
                seg_loud_start_max = get_max(seg_loud_start)
                row.append(seg_loud_start_max)
                seg_loud_start_min = get_min(seg_loud_start)
                row.append(seg_loud_start_min)
                seg_loud_start_stddev = get_stddev(seg_loud_start)
                row.append(seg_loud_start_stddev)
                seg_loud_start_count = get_count(seg_loud_start)
                row.append(seg_loud_start_count)
                seg_loud_start_sum = get_sum(seg_loud_start)
                row.append(seg_loud_start_sum)
                seg_start = hdf5_getters.get_segments_start(h5)
                row.append(seg_start[i])
                seg_start_avg = get_avg(seg_start)
                row.append(seg_start_avg)
                seg_start_max = get_max(seg_start)
                row.append(seg_start_max)
                seg_start_min = get_min(seg_start)
                row.append(seg_start_min)
                seg_start_stddev = get_stddev(seg_start)
                row.append(seg_start_stddev)
                seg_start_count = get_count(seg_start)
                row.append(seg_start_count)
                seg_start_sum = get_sum(seg_start)
                row.append(seg_start_sum)
                for i in row_seg_padding:  #appending blank spaces at the end of the row
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

            #----------segments pitch and timbre---------------"
            row_seg2_padding = padding(
                14)  #blank spaces left at the end of the row
            row_front = padding(
                77)  #blank spaces left at the front of the segments and timbre
            seg_pitch = hdf5_getters.get_segments_pitches(h5)
            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            group_index = 4
            row = []
            row = gral_info[:]
            for i, item in enumerate(transpose_pitch[0]):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of segments and timbre
                    row.append(index)

                row.append(transpose_pitch[0][i])
                seg_pitch_avg = get_avg(transpose_pitch[0])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[0])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[0])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[0])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[0])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[0])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[1][i])
                seg_pitch_avg = get_avg(transpose_pitch[1])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[1])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[1])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[1])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[1])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[1])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[2][i])
                seg_pitch_avg = get_avg(transpose_pitch[2])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[2])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[2])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[2])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[2])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[2])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[3][i])
                seg_pitch_avg = get_avg(transpose_pitch[3])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[3])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[3])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[3])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[3])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[3])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[4][i])
                seg_pitch_avg = get_avg(transpose_pitch[4])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[4])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[4])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[4])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[4])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[4])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[5][i])
                seg_pitch_avg = get_avg(transpose_pitch[5])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[5])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[5])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[5])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[5])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[5])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[6][i])
                seg_pitch_avg = get_avg(transpose_pitch[6])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[6])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[6])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[6])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[6])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[6])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[7][i])
                seg_pitch_avg = get_avg(transpose_pitch[7])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[7])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[7])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[7])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[7])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[7])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[8][i])
                seg_pitch_avg = get_avg(transpose_pitch[8])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[8])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[8])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[8])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[8])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[8])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[9][i])
                seg_pitch_avg = get_avg(transpose_pitch[9])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[9])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[9])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[9])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[9])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[9])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[10][i])
                seg_pitch_avg = get_avg(transpose_pitch[10])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[10])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[10])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[10])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[10])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[10])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[11][i])
                seg_pitch_avg = get_avg(transpose_pitch[11])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[11])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[11])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[11])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[11])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[11])
                row.append(seg_pitch_sum)
                #timbre arrays
                seg_timbre = hdf5_getters.get_segments_timbre(h5)
                transpose_timbre = seg_pitch.transpose(
                )  #tranposing matrix, to have 12 rows
                row.append(transpose_timbre[0][i])
                seg_timbre_avg = get_avg(transpose_timbre[0])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[0])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[0])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[0])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[0])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[0])
                row.append(seg_timbre_sum)
                row.append(transpose_timbre[1][i])
                seg_timbre_avg = get_avg(transpose_timbre[1])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[1])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[1])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[1])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[1])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[1])
                row.append(seg_timbre_sum)
                row.append(transpose_timbre[2][i])
                seg_timbre_avg = get_avg(transpose_timbre[2])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[2])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[2])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[2])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[2])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[2])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[3][i])
                seg_timbre_avg = get_avg(transpose_timbre[3])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[3])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[3])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[3])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[3])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[3])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[4][i])
                seg_timbre_avg = get_avg(transpose_timbre[4])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[4])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[4])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[4])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[4])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[4])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[5][i])
                seg_timbre_avg = get_avg(transpose_timbre[5])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[5])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[5])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[5])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[5])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[5])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[6][i])
                seg_timbre_avg = get_avg(transpose_timbre[6])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[6])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[6])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[6])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[6])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[6])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[7][i])
                seg_timbre_avg = get_avg(transpose_timbre[7])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[7])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[7])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[7])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[7])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[7])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[8][i])
                seg_timbre_avg = get_avg(transpose_timbre[8])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[8])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[8])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[8])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[8])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[8])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[9][i])
                seg_timbre_avg = get_avg(transpose_timbre[9])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[9])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[9])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[9])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[9])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[9])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[10][i])
                seg_timbre_avg = get_avg(transpose_timbre[10])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[10])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[10])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[10])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[10])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[10])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[11][i])
                seg_timbre_avg = get_avg(transpose_timbre[11])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[11])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[11])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[11])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[11])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[11])
                row.append(seg_timbre_sum)
                for item in row_seg2_padding:
                    row.append(item)
                writer.writerow(row)
                row = []
                row = gral_info[:]

    # "--------tatums---------------"
            tatms_c = hdf5_getters.get_tatums_confidence(h5)
            group_index = 5
            row_front = padding(245)  #blank spaces left in front of tatums
            row = []
            row = gral_info[:]
            for i, item in enumerate(tatms_c):
                row.append(group_index)
                row.append(i)
                for item in row_front:  #appending blank spaces at the front of the row
                    row.append(item)

                row.append(tatms_c[i])
                tatms_c_avg = get_avg(tatms_c)
                row.append(tatms_c_avg)
                tatms_c_max = get_max(tatms_c)
                row.append(tatms_c_max)
                tatms_c_min = get_min(tatms_c)
                row.append(tatms_c_min)
                tatms_c_stddev = get_stddev(tatms_c)
                row.append(tatms_c_stddev)
                tatms_c_count = get_count(tatms_c)
                row.append(tatms_c_count)
                tatms_c_sum = get_sum(tatms_c)
                row.append(tatms_c_sum)
                tatms_start = hdf5_getters.get_tatums_start(h5)
                row.append(tatms_start[i])
                tatms_start_avg = get_avg(tatms_start)
                row.append(tatms_start_avg)
                tatms_start_max = get_max(tatms_start)
                row.append(tatms_start_max)
                tatms_start_min = get_min(tatms_start)
                row.append(tatms_start_min)
                tatms_start_stddev = get_stddev(tatms_start)
                row.append(tatms_start_stddev)
                tatms_start_count = get_count(tatms_start)
                row.append(tatms_start_count)
                tatms_start_sum = get_sum(tatms_start)
                row.append(tatms_start_sum)
                writer.writerow(row)
                row = []
                row = gral_info[:]

            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_pitch_avg = []
            seg_pitch_max = []
            seg_pitch_min = []
            seg_pitch_stddev = []
            seg_pitch_count = []
            seg_pitch_sum = []
            i = 0
            #Getting the aggregate values in the pitches array
            for row in transpose_pitch:
                seg_pitch_avg.append(get_avg(row))
                seg_pitch_max.append(get_max(row))
                seg_pitch_min.append(get_min(row))
                seg_pitch_stddev.append(get_stddev(row))
                seg_pitch_count.append(get_count(row))
                seg_pitch_sum.append(get_sum(row))
                i = i + 1

            #extracting information from the timbre array
            transpose_timbre = seg_pitch.transpose(
            )  #tranposing matrix, to have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_timbre_avg = []
            seg_timbre_max = []
            seg_timbre_min = []
            seg_timbre_stddev = []
            seg_timbre_count = []
            seg_timbre_sum = []
            i = 0
            for row in transpose_timbre:
                seg_timbre_avg.append(get_avg(row))
                seg_timbre_max.append(get_max(row))
                seg_timbre_min.append(get_min(row))
                seg_timbre_stddev.append(get_stddev(row))
                seg_timbre_count.append(get_count(row))
                seg_timbre_sum.append(get_sum(row))
                i = i + 1

            h5.close()
            count = count + 1
            print count
Ejemplo n.º 11
0
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Familiarity'.lower():  ####Added by us!
                    csvRowString += song.familiarity
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += song.artist_mbid
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digid'.lower():
                    csvRowString += song.artist_7digid
                elif attribute == 'hottness'.lower():
                    csvRowString += song.hottness
                elif attribute == 'song_hottness'.lower():
                    csvRowString += song.song_hottness
                elif attribute == 'digitalid7'.lower():
                    csvRowString += song.digitalid7
                elif attribute == 'similar_artists'.lower():
                    csvRowString += song.similar_artists
                elif attribute == 'artist_terms'.lower():
                    csvRowString += song.artist_terms
                elif attribute == 'art_terms_freq'.lower():
                    csvRowString += song.art_terms_freq
                elif attribute == 'art_terms_weight'.lower():
                    csvRowString += song.art_terms_weight
                elif attribute == 'a_sample_rate'.lower():
                    csvRowString += song.a_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += song.audio_md5
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_conf'.lower():
                    csvRowString += song.mode_conf
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                elif attribute == 'trackid'.lower():
                    csvRowString += song.trackid
                elif attribute == 'segm_start'.lower():
                    csvRowString += song.segm_start
                elif attribute == 'segm_conf'.lower():
                    csvRowString += song.segm_conf
                elif attribute == 'segm_pitch'.lower():
                    csvRowString += song.segm_pitch
                elif attribute == 'segm_timbre'.lower():
                    csvRowString += song.segm_timbre
                elif attribute == 'segm_max_loud'.lower():
                    csvRowString += song.segm_max_loud
                elif attribute == 'segm_max_loud_time'.lower():
                    csvRowString += song.segm_max_loud_time
                elif attribute == 'segm_loud_start'.lower():
                    csvRowString += song.segm_loud_start
                elif attribute == 'sect_start'.lower():
                    csvRowString += song.sect_start
                elif attribute == 'sect_conf'.lower():
                    csvRowString += song.sect_conf
                elif attribute == 'beats_start'.lower():
                    csvRowString += song.beats_start
                elif attribute == 'beats_conf'.lower():
                    csvRowString += song.beats_conf
                elif attribute == 'bars_start'.lower():
                    csvRowString += song.bars_start
                elif attribute == 'bars_conf'.lower():
                    csvRowString += song.bars_conf
                elif attribute == 'tatums_start'.lower():
                    csvRowString += song.tatums_start
                elif attribute == 'tatums_conf'.lower():
                    csvRowString += song.tatums_conf
                elif attribute == 'artist_mbtags'.lower():
                    csvRowString += song.artist_mbtags
                elif attribute == 'artist_mbtags_count'.lower():
                    csvRowString += song.artist_mbtags_count
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print("==============")
                    print("I believe there has been an error with the input.")
                    print("==============")
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year,Familiarity,Artist_Mbid,Artist_PlaymeId,Artist_7didId,Hottness,Song_Hottness,7digitalid,A_Sample_Rate,Audio_Md5,End_Of_Fade_In,Energy,Loudness,Mode,Mode_Conf,Start_Of_Fade_Out,TrackId"
        #################################################

        csvAttributeList = re.split(',', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        csvRowString += "\n"
        outputFile1.write(csvRowString)
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/home/bigdata/smalltest/"  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            # testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #########Added by us!
            song.familiarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.artist_playmeid = str(
                hdf5_getters.get_artist_playmeid(songH5File))
            song.artist_7digid = str(
                hdf5_getters.get_artist_7digitalid(songH5File))
            song.hottness = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.song_hottness = str(
                hdf5_getters.get_song_hotttnesss(songH5File))
            song.digitalid7 = str(
                hdf5_getters.get_track_7digitalid(songH5File))
            #song.similar_artists = str(hdf5_getters.get_similar_artists(songH5File))
            #song.artist_terms = str(hdf5_getters.get_artist_terms(songH5File))
            #song.art_terms_freq = str(hdf5_getters.get_artist_terms_freq(songH5File))
            #song.art_terms_weight = str(hdf5_getters.get_artist_terms_weight(songH5File))
            song.a_sample_rate = str(
                hdf5_getters.get_analysis_sample_rate(songH5File))
            song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File))
            song.end_of_fade_in = str(
                hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_conf = str(hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(
                hdf5_getters.get_start_of_fade_out(songH5File))
            song.trackid = str(hdf5_getters.get_track_id(songH5File))
            #song.segm_start = str(hdf5_getters.get_segments_start(songH5File))
            #song.segm_conf = str(hdf5_getters.get_segments_confidence(songH5File))
            #song.segm_pitch = str(hdf5_getters.get_segments_pitches(songH5File))
            #song.segm_timbre = str(hdf5_getters.get_segments_timbre(songH5File))
            #song.segm_max_loud = str(hdf5_getters.get_segments_loudness_max(songH5File))
            #song.segm_max_loud_time = str(hdf5_getters.get_segments_loudness_max_time(songH5File))
            #song.segm_loud_start = str(hdf5_getters.get_segments_loudness_start(songH5File))
            #song.sect_start = str(hdf5_getters.get_sections_start(songH5File))
            #song.sect_conf = str(hdf5_getters.get_sections_confidence(songH5File))
            #song.beats_start = str(hdf5_getters.get_beats_start(songH5File))
            #song.beats_conf = str(hdf5_getters.get_beats_confidence(songH5File))
            #song.bars_start = str(hdf5_getters.get_bars_start(songH5File))
            #song.bars_conf = str(hdf5_getters.get_bars_confidence(songH5File))
            #song.tatums_start = str(hdf5_getters.get_tatums_start(songH5File))
            #song.tatums_conf = str(hdf5_getters.get_tatums_confidence(songH5File))
            #song.artist_mbtags = str(hdf5_getters.get_artist_mbtags(songH5File))
            #song.artist_mbtags_count = str(hdf5_getters.get_artist_mbtags_count(songH5File))

            #print song count
            #csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace("b\"", "")
                    albumName = albumName.replace("\"", "")
                    albumName = albumName.replace(',', "")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',', '')
                    location = location.replace("b\"", "")
                    location = location.replace("\"", "")
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    artistName = song.artistName
                    artistName = artistName.replace("b\"", "")
                    artistName = artistName.replace("\"", "")
                    csvRowString += "\"" + artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    t = song.title
                    t = t.replace("b\"", "")
                    t = t.replace("\"", "")
                    csvRowString += "\"" + t + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'Familiarity'.lower():  ####Added by us!
                    csvRowString += song.familiarity
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += "\"" + song.artist_mbid + "\""
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digid'.lower():
                    csvRowString += song.artist_7digid
                elif attribute == 'hottness'.lower():
                    csvRowString += song.hottness
                elif attribute == 'song_hottness'.lower():
                    csvRowString += song.song_hottness
                elif attribute == 'digitalid7'.lower():
                    csvRowString += song.digitalid7
                elif attribute == 'similar_artists'.lower():
                    csvRowString += song.similar_artists
                elif attribute == 'artist_terms'.lower():
                    csvRowString += song.artist_terms
                elif attribute == 'art_terms_freq'.lower():
                    csvRowString += song.art_terms_freq
                elif attribute == 'art_terms_weight'.lower():
                    csvRowString += song.art_terms_weight
                elif attribute == 'a_sample_rate'.lower():
                    csvRowString += song.a_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += "\"" + song.audio_md5 + "\""
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_conf'.lower():
                    csvRowString += song.mode_conf
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                elif attribute == 'trackid'.lower():
                    csvRowString += "\"" + song.trackid + "\""
                elif attribute == 'segm_start'.lower():
                    csvRowString += song.segm_start
                elif attribute == 'segm_conf'.lower():
                    csvRowString += song.segm_conf
                elif attribute == 'segm_pitch'.lower():
                    csvRowString += song.segm_pitch
                elif attribute == 'segm_timbre'.lower():
                    csvRowString += song.segm_timbre
                elif attribute == 'segm_max_loud'.lower():
                    csvRowString += song.segm_max_loud
                elif attribute == 'segm_max_loud_time'.lower():
                    csvRowString += song.segm_max_loud_time
                elif attribute == 'segm_loud_start'.lower():
                    csvRowString += song.segm_loud_start
                elif attribute == 'sect_start'.lower():
                    csvRowString += song.sect_start
                elif attribute == 'sect_conf'.lower():
                    csvRowString += song.sect_conf
                elif attribute == 'beats_start'.lower():
                    csvRowString += song.beats_start
                elif attribute == 'beats_conf'.lower():
                    csvRowString += song.beats_conf
                elif attribute == 'bars_start'.lower():
                    csvRowString += song.bars_start
                elif attribute == 'bars_conf'.lower():
                    csvRowString += song.bars_conf
                elif attribute == 'tatums_start'.lower():
                    csvRowString += song.tatums_start
                elif attribute == 'tatums_conf'.lower():
                    csvRowString += song.tatums_conf
                elif attribute == 'artist_mbtags'.lower():
                    csvRowString += song.artist_mbtags
                elif attribute == 'artist_mbtags_count'.lower():
                    csvRowString += song.artist_mbtags_count
                else:
                    csvRowString += "\"ERR\""

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
Ejemplo n.º 12
0

cnt = 0
loops = 0


for alpha in string.ascii_uppercase :
   for root, dirs, files in os.walk('/mnt/million-songs/data/'+alpha):
      files = glob.glob(os.path.join(root,'*'+'.h5'))
      for f in files :
         h5 = GETTERS.open_h5_file_read(f)
         num_songs = GETTERS.get_num_songs(h5)
         print f, num_songs

         for i in range(num_songs):
            analysis_sample_rate = GETTERS.get_analysis_sample_rate(h5, i)
            artist_7digitalid = GETTERS.get_artist_7digitalid(h5, i)
            artist_familiarity = GETTERS.get_artist_familiarity(h5, i)
            artist_hotttnesss = GETTERS.get_artist_hotttnesss(h5, i)
            artist_id = GETTERS.get_artist_id(h5, i)
            artist_latitude = GETTERS.get_artist_latitude(h5, i)
            artist_location = GETTERS.get_artist_location(h5, i)
            artist_longitude = GETTERS.get_artist_longitude(h5, i)
            artist_mbid = GETTERS.get_artist_mbid(h5, i)
            artist_mbtags = ','.join(str(e) for e in GETTERS.get_artist_mbtags(h5, i)) # array
            artist_mbtags_count = ','.join(str(e) for e in GETTERS.get_artist_mbtags_count(h5, i)) # array
            artist_name = GETTERS.get_artist_name(h5, i)
            artist_playmeid = GETTERS.get_artist_playmeid(h5, i)
            artist_terms = ','.join(str(e) for e in GETTERS.get_artist_terms(h5, i)) # array
            #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array
            #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" +
                "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n" +
                "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"


                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'track_id'.lower():
                    csvRowString += 'track_id' 
                elif attribute == 'artist_familiarity'.lower():
                    csvRowString += 'artist_familiarity' 
                elif attribute == 'artist_hotttnesss'.lower():
                    csvRowString += 'artist_hotttnesss' 
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += 'artist_mbid'
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += 'artist_playmeid'
                elif attribute == 'artist_7digitalid'.lower():
                    csvRowString += 'artist_7digitalid'
                elif attribute == 'release'.lower():
                    csvRowString += 'release' 
                elif attribute == 'release_7digitalid'.lower():
                    csvRowString += 'release_7digitalid' 
                elif attribute == 'song_hotttnesss'.lower():
                    csvRowString += 'song_hotttnesss'
                elif attribute == 'track_7digitalid'.lower():
                    csvRowString += 'track_7digitalid' 
                elif attribute == 'analysis_sample_rate'.lower():
                    csvRowString += 'analysis_sample_rate' 
                elif attribute == 'audio_md5'.lower():
                    csvRowString += 'audio_md5' 
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += 'end_of_fade_in'
                elif attribute == 'energy'.lower():
                    csvRowString += 'energy'  
                elif attribute == 'key'.lower():
                    csvRowString += 'key'  
                elif attribute == 'key_confidence'.lower():
                    csvRowString += 'key_confidence' 
                elif attribute == 'loudness'.lower():
                    csvRowString += 'loudness' 
                elif attribute == 'mode'.lower():
                    csvRowString += 'mode'  
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += 'mode_confidence'   
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += 'start_of_fade_out'                                                                                
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user, 
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "Title,Year,track_id,artist_hotttnesss,artist_mbid,artist_playmeid,artist_7digitalid,"+
            "release,release_7digitalid,song_hotttnesss,track_7digitalid,analysis_sample_rate,audio_md5,"+
            "end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,");
        outputFile1.write(csvRowString + "\n");
        csvRowString = ""  

    #################################################


    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/vagrant/genrepython/MillionSongSubset" # "." As the default means the current directory
    ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):        
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.track_id = str(hdf5_getters.get_track_id(songH5File))
            song.artist_familiarity = str(hdf5_getters.get_artist_familiarity(songH5File))
            song.artist_hotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.artist_playmeid = str(hdf5_getters.get_artist_playmeid(songH5File))
            song.artist_7digitalid = str(hdf5_getters.get_artist_7digitalid(songH5File))
            song.release = str(hdf5_getters.get_release(songH5File))
            song.release_7digitalid = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.song_hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.track_7digitalid = str(hdf5_getters.get_track_7digitalid(songH5File))
            song.analysis_sample_rate = str(hdf5_getters.get_analysis_sample_rate(songH5File))
            song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File))
            song.end_of_fade_in = str(hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.key = str(hdf5_getters.get_key(songH5File))
            song.key_confidence = str(hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_confidence = str(hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(hdf5_getters.get_start_of_fade_out(songH5File))

            #print song count
            csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',',"")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude                
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""                
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence                                 
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence                                   
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'track_id'.lower():
                    csvRowString += song.track_id 
                elif attribute == 'artist_familiarity'.lower():
                    csvRowString += song.artist_familiarity  
                elif attribute == 'artist_hotttnesss'.lower():
                    csvRowString += song.artist_hotttnesss 
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += song.artist_mbid
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid 
                elif attribute == 'artist_7digitalid'.lower():
                    csvRowString += song.artist_7digitalid 
                elif attribute == 'release'.lower():
                    csvRowString += song.release
                elif attribute == 'release_7digitalid'.lower():
                    csvRowString += song.release_7digitalid 
                elif attribute == 'song_hotttnesss'.lower():
                    csvRowString += song.song_hotttnesss  
                elif attribute == 'track_7digitalid'.lower():
                    csvRowString += song.track_7digitalid 
                elif attribute == 'analysis_sample_rate'.lower():
                    csvRowString += song.analysis_sample_rate  
                elif attribute == 'audio_md5'.lower():
                    csvRowString += song.audio_md5 
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in  
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy 
                elif attribute == 'key'.lower():
                    csvRowString += song.key 
                elif attribute == 'key_confidence'.lower():
                    csvRowString += song.key_confidence 
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode   
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += song.mode_confidence    
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out                                                                              
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
def hd5_single_random_file_parser():
    # Open an h5 file in read mode
    h5 = hdf5_getters.open_h5_file_read(
        '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5'
    )

    function_tracker = filter(
        lambda x: x.startswith('get'),
        hdf5_getters.__dict__.keys())  # Detects all the getter functions

    for f in function_tracker:  # Print everything in function tracker
        print(f)

    # First effort to check what each field contains.
    print()  # 55 available fields (exluding number of songs fields)
    print("Num of songs -- ",
          hdf5_getters.get_num_songs(h5))  # One song per file
    print("Title -- ",
          hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
    print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
    print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
    print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
    print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
    print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
    print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
    print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
    print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
    print("Artist location -- ", hdf5_getters.get_artist_location(h5))
    print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
    print("Release -- ", hdf5_getters.get_release(h5))
    print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
    print("Song ID -- ", hdf5_getters.get_song_id(h5))
    print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
    print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
    print("Similar artists -- ", hdf5_getters.get_similar_artists(h5))
    print("Artist terms -- ", hdf5_getters.get_artist_terms(h5))
    print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5))
    print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5))
    print("Analysis sample rate -- ",
          hdf5_getters.get_analysis_sample_rate(h5))
    print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
    print("Danceability -- ", hdf5_getters.get_danceability(h5))
    print("Duration -- ", hdf5_getters.get_duration(h5))
    print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
    print("Energy -- ", hdf5_getters.get_energy(h5))
    print("Key -- ", hdf5_getters.get_key(h5))
    print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
    print("Loudness -- ", hdf5_getters.get_loudness(h5))
    print("Mode -- ", hdf5_getters.get_mode(h5))
    print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
    print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
    print("Tempo -- ", hdf5_getters.get_tempo(h5))
    print("Time signature -- ", hdf5_getters.get_time_signature(h5))
    print("Time signature confidence -- ",
          hdf5_getters.get_time_signature_confidence(h5))
    print("Track ID -- ", hdf5_getters.get_track_id(h5))
    print("Segments Start -- ", hdf5_getters.get_segments_start(h5))
    print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5))
    print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5))
    print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5))
    print("Segments Loudness max -- ",
          hdf5_getters.get_segments_loudness_max(h5))
    print("Segments Loudness max time-- ",
          hdf5_getters.get_segments_loudness_max_time(h5))
    print("Segments Loudness start -- ",
          hdf5_getters.get_segments_loudness_start(h5))
    print("Sections start -- ", hdf5_getters.get_sections_start(h5))
    print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5))
    print("Beats start -- ", hdf5_getters.get_beats_start(h5))
    print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5))
    print("Bars start -- ", hdf5_getters.get_bars_start(h5))
    print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5))
    print("Tatums start -- ", hdf5_getters.get_tatums_start(h5))
    print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5))
    print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
    print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
    print("Year -- ", hdf5_getters.get_year(h5))

    fields = ['Title', 'Artist ID']

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the fields
        csv_writer.writerow(fields)

        # writing the data rows
        csv_writer.writerow(
            [hdf5_getters.get_title(h5),
             hdf5_getters.get_artist_id(h5)])

    h5.close()  # close h5 when completed in the end
def data_to_flat_file(basedir,ext='.h5') :
    """This function extract the information from the tables and creates the flat file."""	
    count = 0;	#song counter
    list_to_write= []
    row_to_write = ""
    writer = csv.writer(open("metadata_wholeA.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    print f	#the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
	    comma=title.find(',')	#eliminating commas in the title
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')	#eliminating commas in the album	
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')	#eliminating double quotes
	    duration = hdf5_getters.get_duration(h5)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	            artist_hotness=-1
	    artist_id = hdf5_getters.get_artist_id(h5)
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
	    artist_loc = hdf5_getters.get_artist_location(h5)
		#checks artist_loc to see if it is a hyperlink if it is set as empty string
	    artist_loc = artist_loc.replace(",", "\,");
	    if artist_loc.startswith("<a"):
                artist_loc = ""
	    if len(artist_loc) > 100:
                artist_loc = ""
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            song_key = hdf5_getters.get_key(h5)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
	    mode = hdf5_getters.get_mode(h5)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
	    song_id = hdf5_getters.get_song_id(h5)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
	    time_sig = hdf5_getters.get_time_signature(h5)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
	    track_id = hdf5_getters.get_track_id(h5)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
	    year = hdf5_getters.get_year(h5)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
	    bars_c_avg= get_avg(bars_c)
	    bars_c_max= get_max(bars_c)
	    bars_c_min = get_min(bars_c)
	    bars_c_stddev= get_stddev(bars_c)
	    bars_c_count = get_count(bars_c)
	    bars_c_sum = get_sum(bars_c)
	    bars_start = hdf5_getters.get_bars_start(h5)
	    bars_start_avg = get_avg(bars_start)
	    bars_start_max= get_max(bars_start)
	    bars_start_min = get_min(bars_start)
	    bars_start_stddev= get_stddev(bars_start)
	    bars_start_count = get_count(bars_start)
	    bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg= get_avg(beats_c)
	    beats_c_max= get_max(beats_c)
	    beats_c_min = get_min(beats_c)
	    beats_c_stddev= get_stddev(beats_c)
	    beats_c_count = get_count(beats_c)
	    beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
 	    beats_start_avg = get_avg(beats_start)
	    beats_start_max= get_max(beats_start)
	    beats_start_min = get_min(beats_start)
	    beats_start_stddev= get_stddev(beats_start)
	    beats_start_count = get_count(beats_start)
	    beats_start_sum = get_sum(beats_start)
	    sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg= get_avg(sec_c)
	    sec_c_max= get_max(sec_c)
	    sec_c_min = get_min(sec_c)
	    sec_c_stddev= get_stddev(sec_c)
	    sec_c_count = get_count(sec_c)
	    sec_c_sum = get_sum(sec_c)
	    sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
	    sec_start_max= get_max(sec_start)
	    sec_start_min = get_min(sec_start)
	    sec_start_stddev= get_stddev(sec_start)
	    sec_start_count = get_count(sec_start)
	    sec_start_sum = get_sum(sec_start)
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    seg_c_avg= get_avg(seg_c)
	    seg_c_max= get_max(seg_c)
	    seg_c_min = get_min(seg_c)
	    seg_c_stddev= get_stddev(seg_c)
	    seg_c_count = get_count(seg_c)
	    seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg= get_avg(seg_loud_max)
	    seg_loud_max_max= get_max(seg_loud_max)
	    seg_loud_max_min = get_min(seg_loud_max)
	    seg_loud_max_stddev= get_stddev(seg_loud_max)
	    seg_loud_max_count = get_count(seg_loud_max)
	    seg_loud_max_sum = get_sum(seg_loud_max)
	    seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
	    seg_loud_max_time_avg= get_avg(seg_loud_max_time)
	    seg_loud_max_time_max= get_max(seg_loud_max_time)
	    seg_loud_max_time_min = get_min(seg_loud_max_time)
	    seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
	    seg_loud_max_time_count = get_count(seg_loud_max_time)
	    seg_loud_max_time_sum = get_sum(seg_loud_max_time)
	    seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
	    seg_loud_start_avg= get_avg(seg_loud_start)
	    seg_loud_start_max= get_max(seg_loud_start)
	    seg_loud_start_min = get_min(seg_loud_start)
	    seg_loud_start_stddev= get_stddev(seg_loud_start)
	    seg_loud_start_count = get_count(seg_loud_start)
	    seg_loud_start_sum = get_sum(seg_loud_start)					      
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    pitch_size = len(seg_pitch)
	    seg_start = hdf5_getters.get_segments_start(h5)
	    seg_start_avg= get_avg(seg_start)
	    seg_start_max= get_max(seg_start)
	    seg_start_min = get_min(seg_start)
	    seg_start_stddev= get_stddev(seg_start)
	    seg_start_count = get_count(seg_start)
	    seg_start_sum = get_sum(seg_start)
	    seg_timbre = hdf5_getters.get_segments_timbre(h5)
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    tatms_c_avg= get_avg(tatms_c)
	    tatms_c_max= get_max(tatms_c)
	    tatms_c_min = get_min(tatms_c)
	    tatms_c_stddev= get_stddev(tatms_c)
	    tatms_c_count = get_count(tatms_c)
	    tatms_c_sum = get_sum(tatms_c)
	    tatms_start = hdf5_getters.get_tatums_start(h5)
	    tatms_start_avg= get_avg(tatms_start)
	    tatms_start_max= get_max(tatms_start)
	    tatms_start_min = get_min(tatms_start)
	    tatms_start_stddev= get_stddev(tatms_start)
	    tatms_start_count = get_count(tatms_start)
	    tatms_start_sum = get_sum(tatms_start)
	
	    #Getting the genres
	    genre_set = 0    #flag to see if the genre has been set or not
	    art_trm = hdf5_getters.get_artist_terms(h5)
	    trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			    for i in genres_so_far:
				final_genre.append(i)
				genre_set=1				#genre was found in dictionary
				  
		
	    
	    if genre_set == 1:
		    col_num=[]
		   
		    for genre in final_genre:
			    column=int(genre)				#getting the column number of the genre
			    col_num.append(column)

		    genre_array=genre_columns(col_num)	         #genre array
 	    else:
		    genre_array=genre_columns(-1)		#the genre was not found in the dictionary

	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1
		


		#Writing to the flat file
            writer.writerow([title,album,artist_name,year,duration,seg_start_count, tempo])

	    h5.close()
	    count=count+1;
	    print count;
Ejemplo n.º 16
0
def data_to_flat_file(basedir,ext='.h5') :
    """This function extract the information from the tables and creates the flat file."""	
    count = 0;	#song counter
    list_to_write= []
    row_to_write = ""
    writer = csv.writer(open("metadata.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    print f	#the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
	    comma=title.find(',')	#eliminating commas in the title
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')	#eliminating commas in the album	
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')	#eliminating double quotes
	    duration = hdf5_getters.get_duration(h5)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	            artist_hotness=-1
	    artist_id = hdf5_getters.get_artist_id(h5)
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
	    artist_loc = hdf5_getters.get_artist_location(h5)
		#checks artist_loc to see if it is a hyperlink if it is set as empty string
	    artist_loc = artist_loc.replace(",", "\,");
	    if artist_loc.startswith("<a"):
                artist_loc = ""
	    if len(artist_loc) > 100:
                artist_loc = ""
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            song_key = hdf5_getters.get_key(h5)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
	    mode = hdf5_getters.get_mode(h5)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
	    song_id = hdf5_getters.get_song_id(h5)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
	    time_sig = hdf5_getters.get_time_signature(h5)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
	    track_id = hdf5_getters.get_track_id(h5)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
	    year = hdf5_getters.get_year(h5)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
	    bars_c_avg= get_avg(bars_c)
	    bars_c_max= get_max(bars_c)
	    bars_c_min = get_min(bars_c)
	    bars_c_stddev= get_stddev(bars_c)
	    bars_c_count = get_count(bars_c)
	    bars_c_sum = get_sum(bars_c)
	    bars_start = hdf5_getters.get_bars_start(h5)
	    bars_start_avg = get_avg(bars_start)
	    bars_start_max= get_max(bars_start)
	    bars_start_min = get_min(bars_start)
	    bars_start_stddev= get_stddev(bars_start)
	    bars_start_count = get_count(bars_start)
	    bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg= get_avg(beats_c)
	    beats_c_max= get_max(beats_c)
	    beats_c_min = get_min(beats_c)
	    beats_c_stddev= get_stddev(beats_c)
	    beats_c_count = get_count(beats_c)
	    beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
 	    beats_start_avg = get_avg(beats_start)
	    beats_start_max= get_max(beats_start)
	    beats_start_min = get_min(beats_start)
	    beats_start_stddev= get_stddev(beats_start)
	    beats_start_count = get_count(beats_start)
	    beats_start_sum = get_sum(beats_start)
	    sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg= get_avg(sec_c)
	    sec_c_max= get_max(sec_c)
	    sec_c_min = get_min(sec_c)
	    sec_c_stddev= get_stddev(sec_c)
	    sec_c_count = get_count(sec_c)
	    sec_c_sum = get_sum(sec_c)
	    sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
	    sec_start_max= get_max(sec_start)
	    sec_start_min = get_min(sec_start)
	    sec_start_stddev= get_stddev(sec_start)
	    sec_start_count = get_count(sec_start)
	    sec_start_sum = get_sum(sec_start)
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    seg_c_avg= get_avg(seg_c)
	    seg_c_max= get_max(seg_c)
	    seg_c_min = get_min(seg_c)
	    seg_c_stddev= get_stddev(seg_c)
	    seg_c_count = get_count(seg_c)
	    seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg= get_avg(seg_loud_max)
	    seg_loud_max_max= get_max(seg_loud_max)
	    seg_loud_max_min = get_min(seg_loud_max)
	    seg_loud_max_stddev= get_stddev(seg_loud_max)
	    seg_loud_max_count = get_count(seg_loud_max)
	    seg_loud_max_sum = get_sum(seg_loud_max)
	    seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
	    seg_loud_max_time_avg= get_avg(seg_loud_max_time)
	    seg_loud_max_time_max= get_max(seg_loud_max_time)
	    seg_loud_max_time_min = get_min(seg_loud_max_time)
	    seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
	    seg_loud_max_time_count = get_count(seg_loud_max_time)
	    seg_loud_max_time_sum = get_sum(seg_loud_max_time)
	    seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
	    seg_loud_start_avg= get_avg(seg_loud_start)
	    seg_loud_start_max= get_max(seg_loud_start)
	    seg_loud_start_min = get_min(seg_loud_start)
	    seg_loud_start_stddev= get_stddev(seg_loud_start)
	    seg_loud_start_count = get_count(seg_loud_start)
	    seg_loud_start_sum = get_sum(seg_loud_start)					      
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    pitch_size = len(seg_pitch)
	    seg_start = hdf5_getters.get_segments_start(h5)
	    seg_start_avg= get_avg(seg_start)
	    seg_start_max= get_max(seg_start)
	    seg_start_min = get_min(seg_start)
	    seg_start_stddev= get_stddev(seg_start)
	    seg_start_count = get_count(seg_start)
	    seg_start_sum = get_sum(seg_start)
	    seg_timbre = hdf5_getters.get_segments_timbre(h5)
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    tatms_c_avg= get_avg(tatms_c)
	    tatms_c_max= get_max(tatms_c)
	    tatms_c_min = get_min(tatms_c)
	    tatms_c_stddev= get_stddev(tatms_c)
	    tatms_c_count = get_count(tatms_c)
	    tatms_c_sum = get_sum(tatms_c)
	    tatms_start = hdf5_getters.get_tatums_start(h5)
	    tatms_start_avg= get_avg(tatms_start)
	    tatms_start_max= get_max(tatms_start)
	    tatms_start_min = get_min(tatms_start)
	    tatms_start_stddev= get_stddev(tatms_start)
	    tatms_start_count = get_count(tatms_start)
	    tatms_start_sum = get_sum(tatms_start)
	
	    #Getting the genres
	    genre_set = 0    #flag to see if the genre has been set or not
	    art_trm = hdf5_getters.get_artist_terms(h5)
	    trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			    for i in genres_so_far:
				final_genre.append(i)
				genre_set=1				#genre was found in dictionary
				  
		
	    
	    if genre_set == 1:
		    col_num=[]
		   
		    for genre in final_genre:
			    column=int(genre)				#getting the column number of the genre
			    col_num.append(column)

		    genre_array=genre_columns(col_num)	         #genre array
 	    else:
		    genre_array=genre_columns(-1)		#the genre was not found in the dictionary

	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1
		


		#Writing to the flat file

            writer.writerow([title,album,artist_name,duration,samp_rt,artist_7digitalid,artist_fam,artist_hotness,artist_id,artist_lat,artist_loc,artist_lon,artist_mbid,genre_array[0],genre_array[1],genre_array[2],
genre_array[3],genre_array[4],genre_array[5],genre_array[6],genre_array[7],genre_array[8],genre_array[9],genre_array[10],genre_array[11],genre_array[12],genre_array[13],genre_array[14],genre_array[15],
genre_array[16],genre_array[17],genre_array[18],genre_array[19],genre_array[20],genre_array[21],genre_array[22],genre_array[23],genre_array[24],genre_array[25],genre_array[26],
genre_array[27],genre_array[28],genre_array[29],genre_array[30],genre_array[31],genre_array[32],genre_array[33],genre_array[34],genre_array[35],genre_array[36],genre_array[37],genre_array[38],
genre_array[39],genre_array[40],genre_array[41],genre_array[42],genre_array[43],genre_array[44],genre_array[45],genre_array[46],genre_array[47],genre_array[48],genre_array[49],
genre_array[50],genre_array[51],genre_array[52],genre_array[53],genre_array[54],genre_array[55],genre_array[56],genre_array[57],genre_array[58],genre_array[59],
genre_array[60],genre_array[61],genre_array[62],genre_array[63],genre_array[64],genre_array[65],genre_array[66],genre_array[67],genre_array[68],genre_array[69],
genre_array[70],genre_array[71],genre_array[72],genre_array[73],genre_array[74],genre_array[75],genre_array[76],genre_array[77],genre_array[78],genre_array[79],
genre_array[80],genre_array[81],genre_array[82],genre_array[83],genre_array[84],genre_array[85],genre_array[86],genre_array[87],genre_array[88],genre_array[89],
genre_array[90],genre_array[91],genre_array[92],genre_array[93],genre_array[94],genre_array[95],genre_array[96],genre_array[97],genre_array[98],genre_array[99],genre_array[100],genre_array[101],
genre_array[102],genre_array[103],genre_array[104],genre_array[105],genre_array[106],genre_array[107],genre_array[108],genre_array[109],genre_array[110],genre_array[111],genre_array[112],
genre_array[113],genre_array[114],genre_array[115],genre_array[116],genre_array[117],genre_array[118],genre_array[119],genre_array[120],genre_array[121],genre_array[122],genre_array[123],
genre_array[124],genre_array[125],genre_array[126],genre_array[127],genre_array[128],genre_array[129],genre_array[130],genre_array[131],genre_array[132],
artist_pmid,audio_md5,danceability,end_fade_in,energy,song_key,key_c,loudness,mode,mode_conf,release_7digitalid,song_hot,song_id,start_fade_out,tempo,time_sig,time_sig_c,track_id,track_7digitalid,year,bars_c_avg,bars_c_max,bars_c_min,bars_c_stddev,bars_c_count,bars_c_sum,bars_start_avg,bars_start_max,bars_start_min,bars_start_stddev,bars_start_count,bars_start_sum,beats_c_avg,beats_c_max,beats_c_min,beats_c_stddev,beats_c_count,beats_c_sum,beats_start_avg,beats_start_max,beats_start_min, beats_start_stddev,beats_start_count,beats_start_sum, sec_c_avg,sec_c_max,sec_c_min,sec_c_stddev,sec_c_count,sec_c_sum,sec_start_avg,sec_start_max,sec_start_min,sec_start_stddev,sec_start_count,sec_start_sum,seg_c_avg,seg_c_max,seg_c_min,seg_c_stddev,seg_c_count,seg_c_sum,seg_loud_max_avg,seg_loud_max_max,seg_loud_max_min,seg_loud_max_stddev,seg_loud_max_count,seg_loud_max_sum,seg_loud_max_time_avg,seg_loud_max_time_max,seg_loud_max_time_min,seg_loud_max_time_stddev,seg_loud_max_time_count,seg_loud_max_time_sum,seg_loud_start_avg,seg_loud_start_max,seg_loud_start_min,seg_loud_start_stddev,seg_loud_start_count,seg_loud_start_sum,seg_pitch_avg[0],seg_pitch_max[0],seg_pitch_min[0],seg_pitch_stddev[0],seg_pitch_count[0],seg_pitch_sum[0],seg_pitch_avg[1],seg_pitch_max[1],seg_pitch_min[1],seg_pitch_stddev[1],seg_pitch_count[1],seg_pitch_sum[1],seg_pitch_avg[2],seg_pitch_max[2],seg_pitch_min[2],seg_pitch_stddev[2],seg_pitch_count[2],seg_pitch_sum[2],seg_pitch_avg[3],seg_pitch_max[3],seg_pitch_min[3],seg_pitch_stddev[3],seg_pitch_count[3],seg_pitch_sum[3],seg_pitch_avg[4],seg_pitch_max[4],seg_pitch_min[4],seg_pitch_stddev[4],seg_pitch_count[4],seg_pitch_sum[4],seg_pitch_avg[5],seg_pitch_max[5],seg_pitch_min[5],seg_pitch_stddev[5],seg_pitch_count[5],seg_pitch_sum[5],seg_pitch_avg[6],seg_pitch_max[6],seg_pitch_min[6],seg_pitch_stddev[6],seg_pitch_count[6],seg_pitch_sum[6],seg_pitch_avg[7],seg_pitch_max[7],seg_pitch_min[7],seg_pitch_stddev[7],seg_pitch_count[7],seg_pitch_sum[7],seg_pitch_avg[8],seg_pitch_max[8],seg_pitch_min[8],seg_pitch_stddev[8],seg_pitch_count[8],seg_pitch_sum[8],seg_pitch_avg[9],seg_pitch_max[9],seg_pitch_min[9],seg_pitch_stddev[9],seg_pitch_count[9],seg_pitch_sum[9],seg_pitch_avg[10],seg_pitch_max[10],seg_pitch_min[10],seg_pitch_stddev[10],seg_pitch_count[10],seg_pitch_sum[10],seg_pitch_avg[11],seg_pitch_max[11],seg_pitch_min[11],
seg_pitch_stddev[11],seg_pitch_count[11],seg_pitch_sum[11],seg_start_avg,seg_start_max,seg_start_min,seg_start_stddev, 
seg_start_count,seg_start_sum,seg_timbre_avg[0],seg_timbre_max[0],seg_timbre_min[0],seg_timbre_stddev[0],seg_timbre_count[0],
seg_timbre_sum[0],seg_timbre_avg[1],seg_timbre_max[1],seg_timbre_min[1],seg_timbre_stddev[1],seg_timbre_count[1],
seg_timbre_sum[1],seg_timbre_avg[2],seg_timbre_max[2],seg_timbre_min[2],seg_timbre_stddev[2],seg_timbre_count[2],
seg_timbre_sum[2],seg_timbre_avg[3],seg_timbre_max[3],seg_timbre_min[3],seg_timbre_stddev[3],seg_timbre_count[3],
seg_timbre_sum[3],seg_timbre_avg[4],seg_timbre_max[4],seg_timbre_min[4],seg_timbre_stddev[4],seg_timbre_count[4],
seg_timbre_sum[4],seg_timbre_avg[5],seg_timbre_max[5],seg_timbre_min[5],seg_timbre_stddev[5],seg_timbre_count[5],
seg_timbre_sum[5],seg_timbre_avg[6],seg_timbre_max[6],seg_timbre_min[6],seg_timbre_stddev[6],seg_timbre_count[6],
seg_timbre_sum[6],seg_timbre_avg[7],seg_timbre_max[7],seg_timbre_min[7],seg_timbre_stddev[7],seg_timbre_count[7],
seg_timbre_sum[7],seg_timbre_avg[8],seg_timbre_max[8],seg_timbre_min[8],seg_timbre_stddev[8],seg_timbre_count[8],
seg_timbre_sum[8],seg_timbre_avg[9],seg_timbre_max[9],seg_timbre_min[9],seg_timbre_stddev[9],seg_timbre_count[9],
seg_timbre_sum[9],seg_timbre_avg[10],seg_timbre_max[10],seg_timbre_min[10],seg_timbre_stddev[10],seg_timbre_count[10],
seg_timbre_sum[10],seg_timbre_avg[11],seg_timbre_max[11],seg_timbre_min[11],seg_timbre_stddev[11],seg_timbre_count[11],
seg_timbre_sum[11],tatms_c_avg,tatms_c_max,tatms_c_min,tatms_c_stddev,tatms_c_count,tatms_c_sum,tatms_start_avg,tatms_start_max,tatms_start_min,tatms_start_stddev,tatms_start_count,tatms_start_sum])






	    h5.close()
	    count=count+1;
	    print count;
def get_fields(files):
    tracks = []
    counts = {}
    field_counts = []
    for file in files:
        h5 = hdf5_getters.open_h5_file_read(file)
        t = {}
        t['artist_familiarity'] = hdf5_getters.get_artist_familiarity(
            h5)  # estimation
        t['artist_hotttnesss'] = hdf5_getters.get_artist_hotttnesss(
            h5)  # estimation
        t['artist_name'] = hdf5_getters.get_artist_name(h5)  # artist name
        t['release'] = hdf5_getters.get_release(h5)  # album name
        t['title'] = hdf5_getters.get_title(h5)  # title
        t['len_similar_artists'] = len(
            hdf5_getters.get_similar_artists(h5))  # number of similar artists
        t['analysis_sample_rate'] = hdf5_getters.get_analysis_sample_rate(
            h5)  # sample rate of the audio used ?????????
        t['duration'] = hdf5_getters.get_duration(h5)  # seconds
        t['key'] = hdf5_getters.get_key(h5)  # key the song is in
        t['key_confidence'] = hdf5_getters.get_key_confidence(
            h5)  # confidence measure
        t['loudness'] = hdf5_getters.get_loudness(h5)  # overall loudness in dB
        t['mode_confidence'] = hdf5_getters.get_mode_confidence(
            h5)  # confidence measure
        t['start_of_fade_out'] = hdf5_getters.get_start_of_fade_out(
            h5)  # time in sec
        t['tempo'] = hdf5_getters.get_tempo(h5)  # estimated tempo in BPM
        t['time_signature'] = hdf5_getters.get_time_signature(
            h5)  # estimate of number of beats per bar, e.g. 4
        t['year'] = hdf5_getters.get_year(
            h5)  # song release year from MusicBrainz or 0

        timbre = hdf5_getters.get_segments_timbre(
            h5)  # 2D float array, texture features (MFCC+PCA-like)
        t['segments_timbre'] = timbre
        t['timbre_avg'] = timbre.mean(axis=0)  # list of 12 averages
        cov_mat_timbre = np.cov(timbre, rowvar=False)
        cov_timbre = []
        for i in range(len(cov_mat_timbre)):
            for j in range(len(cov_mat_timbre) - i):
                cov_timbre.append(cov_mat_timbre[i][j])
        t['timbre_cov'] = cov_timbre  # list of 78 covariances

        pitch = hdf5_getters.get_segments_pitches(
            h5)  # 2D float array, chroma feature, one value per note
        t['segments_pitch'] = pitch
        t['pitch_avg'] = pitch.mean(axis=0)  # list of 12 averages
        cov_mat_pitch = np.cov(pitch, rowvar=False)
        cov_pitch = []
        for i in range(len(cov_mat_pitch)):
            for j in range(len(cov_mat_pitch) - i):
                cov_pitch.append(cov_mat_timbre[i][j])
        t['pitch_cov'] = cov_pitch  # list of 78 covariances

        # seg_pitch = hdf5_getters.get_segments_pitches(h5)  # 2D float array, chroma feature, one value per note
        # print(seg_pitch.shape)

        # t['artist_latitude'] = hdf5_getters.get_artist_latitude(h5)  # float, ????????????????????????????????????????
        # t['artist_longitude'] = hdf5_getters.get_artist_longitude(h5)  # float, ??????????????????????????????????????
        # t['artist_location'] = hdf5_getters.get_artist_location(h5)  # location name
        # t['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(h5)  # estimation
        # t['danceability'] = hdf5_getters.get_danceability(h5)  # estimation
        # t['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5)  # seconds at the beginning of the song
        # t['energy'] = hdf5_getters.get_energy(h5)  # energy from listener point of view
        # t['mode'] = hdf5_getters.get_mode(h5)  # major or minor
        # t['time_signature_confidence'] = hdf5_getters.get_time_signature_confidence(h5)  # confidence measure
        # t['artist_mbtags_count'] = len(hdf5_getters.get_artist_mbtags_count(h5))  # array int, tag counts for musicbrainz tags
        # bad types or non arithmatic numbers
        '''
        # t['audio_md5'] = hdf5_getters.get_audio_md5(h5)  # hash code of the audio used for the analysis by The Echo Nest
        # t['artist_terms_weight'] = hdf5_getters.get_artist_terms_weight(h5)  # array float, echonest tags weight ?????
        # t['artist_terms_freq'] = hdf5_getters.get_artist_terms_freq(h5)  # array float, echonest tags freqs ??????????
        # t['artist_terms'] = hdf5_getters.get_artist_terms(h5)  # array string, echonest tags ?????????????????????????
        # t['artist_id'] = hdf5_getters.get_artist_id(h5)  # echonest id
        # t['artist_mbid'] = hdf5_getters.get_artist_mbid(h5)  # musicbrainz id
        # t['artist_playmeid'] = hdf5_getters.get_artist_playmeid(h5)  # playme id
        # t['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(h5)  # 7digital id
        # t['release_7digitalid'] = hdf5_getters.get_release_7digitalid(h5)  # 7digital id
        # t['song_id'] = hdf5_getters.get_song_id(h5)  # echonest id
        # t['track_7digitalid'] = hdf5_getters.get_track_7digitalid(h5)  # 7digital id
        # t['similar_artists'] = hdf5_getters.get_similar_artists(h5)  # string array of sim artist ids
        # t['track_id'] = hdf5_getters.get_track_id(h5)  # echonest track id
        # t['segments_start'] = hdf5_getters.get_segments_start(h5)  # array floats, musical events, ~ note onsets
        # t['segments_confidence'] = hdf5_getters.get_segments_confidence(h5)  # array floats, confidence measure
        # t['segments_pitches'] = hdf5_getters.get_segments_pitches(h5)  # 2D float array, chroma feature, one value per note
        # t['segments_timbre'] = hdf5_getters.get_segments_timbre(h5)  # 2D float array, texture features (MFCC+PCA-like)
        # t['segments_loudness_max'] = hdf5_getters.get_segments_loudness_max(h5)  # float array, max dB value
        # t['segments_loudness_max_time'] = hdf5_getters.get_segments_loudness_max_time(h5)  # float array, time of max dB value, i.e. end of attack
        # t['segments_loudness_start'] = hdf5_getters.get_segments_loudness_start(h5)  # array float, dB value at onset
        # t['sections_start'] = hdf5_getters.get_sections_start(h5)  # array float, largest grouping in a song, e.g. verse
        # t['sections_confidence'] = hdf5_getters.get_sections_confidence(h5)  # array float, confidence measure
        # t['beats_start'] = hdf5_getters.get_beats_start(h5)  # array float, result of beat tracking
        # t['beats_confidence'] = hdf5_getters.get_beats_confidence(h5)  # array float, confidence measure
        # t['bars_start'] = hdf5_getters.get_bars_start(h5)  # array float, beginning of bars, usually on a beat
        # t['bars_confidence'] = hdf5_getters.get_bars_confidence(h5)  # array float, confidence measure
        # t['tatums_start'] = hdf5_getters.get_tatums_start(h5)  # array float, smallest rythmic element
        # t['tatums_confidence'] = hdf5_getters.get_tatums_confidence(h5)  # array float, confidence measure
        # t['artist_mbtags'] = hdf5_getters.get_artist_mbtags(h5)  # array string, tags from musicbrainz.org 
        '''
        h5.close()

        for key, value in t.items():
            if isinstance(value, float) and math.isnan(value):
                pass
            if type(value) is np.ndarray:
                if key in counts.keys():
                    counts[key] += 1
                else:
                    counts[key] = 1
            elif value:
                if key in counts.keys():
                    counts[key] += 1
                else:
                    counts[key] = 1
            elif key not in counts.keys():
                counts[key] = 0

        count = 0
        for key, value in t.items():
            if isinstance(value, float) and math.isnan(value):
                pass
            elif type(value) is np.ndarray:
                count += 1
            elif value:
                count += 1
        field_counts.append(count)

        # progress bar
        if num_of_tracks >= 100:
            i = files.index(file) + 1
            scale = num_of_tracks / 100
            if i % math.ceil(len(files) * .05) == 0:
                sys.stdout.write('\r')
                # the exact output you're looking for:
                sys.stdout.write("Loading dataframe: [%-100s] %d%%" %
                                 ('=' * int(i // scale), 1 / scale * i))
                sys.stdout.flush()
                time.sleep(.01)

        tracks.append(t)
    print()
    return tracks, counts, field_counts
Ejemplo n.º 18
0
def fill_attributes(song, songH5File):

    #----------------------------non array attributes-------------------------------
    song.analysisSampleRate = str(
        hdf5_getters.get_analysis_sample_rate(songH5File))
    song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File))
    song.artistFamiliarity = str(
        hdf5_getters.get_artist_familiarity(songH5File))
    song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File))
    song.artistID = str(hdf5_getters.get_artist_id(songH5File))
    song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
    song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
    song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
    song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File))
    song.artistName = str(hdf5_getters.get_artist_name(songH5File))
    song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File))
    song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File))
    song.danceability = str(hdf5_getters.get_danceability(songH5File))
    song.duration = str(hdf5_getters.get_duration(songH5File))
    song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
    song.energy = str(hdf5_getters.get_energy(songH5File))
    song.key = str(hdf5_getters.get_key(songH5File))
    song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_segments_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_sections_confidence(songH5File))
    song.loudness = str(hdf5_getters.get_loudness(songH5File))
    song.mode = str(hdf5_getters.get_mode(songH5File))
    song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File))
    song.release = str(hdf5_getters.get_release(songH5File))
    song.releaseDigitalID = str(
        hdf5_getters.get_release_7digitalid(songH5File))
    song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
    song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File))
    song.tempo = str(hdf5_getters.get_tempo(songH5File))
    song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
    song.timeSignatureConfidence = str(
        hdf5_getters.get_time_signature_confidence(songH5File))
    song.title = str(hdf5_getters.get_title(songH5File))
    song.trackID = str(hdf5_getters.get_track_id(songH5File))
    song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File))
    song.year = str(hdf5_getters.get_year(songH5File))

    #-------------------------------array attributes--------------------------------------
    #array float
    song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_start(songH5File))
    #array float
    song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_freq(songH5File))
    #array float
    song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_weight(songH5File))
    #array int
    song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_mbtags_count(songH5File))
    #array float
    song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_confidence(songH5File))
    #array float
    song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_start(songH5File))
    #array float
    song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_confidence(songH5File))
    #array float
    song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_confidence(songH5File))
    #array float
    song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_start(songH5File))
    #array float
    song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_confidence(songH5File))
    #array float
    song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max(songH5File))
    #array float
    song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max_time(songH5File))
    #array float
    song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_start(songH5File))
    #array float
    song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_start(songH5File))
    #array float
    song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_confidence(songH5File))
    #array float
    song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_start(songH5File))
    #array2d float
    song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_timbre(songH5File))
    #array2d float
    song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_pitches(songH5File))

    #------------------------array string attributes------------------------
    song.similarArtists = convert_array_to_string(
        hdf5_getters.get_similar_artists(songH5File))  #array string
    song.artistTerms = convert_array_to_string(
        hdf5_getters.get_artist_terms(songH5File))  #array string
    song.artistmbTags = convert_array_to_string(
        hdf5_getters.get_artist_mbtags(songH5File))  #array string

    return song
Ejemplo n.º 19
0
            #*****useless... column is filled with 0's?
            energy = hdf5_getters.get_energy(h5)

            # Get loudness
            loudness = hdf5_getters.get_loudness(h5)

            # Get year
            year = hdf5_getters.get_year(h5)

            # Get tempo
            tempo = hdf5_getters.get_tempo(h5)

            #########################################################

            # Get analysis sample rate
            analysis_rate = hdf5_getters.get_analysis_sample_rate(h5)

            # Get end of fade in
            end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5)

            # Get key
            key = hdf5_getters.get_key(h5)

            # Get key confidence
            key_confidence = hdf5_getters.get_key_confidence(h5)

            # Get mode
            mode = hdf5_getters.get_mode(h5)

            # Get mode confidence
            mode_confidence = hdf5_getters.get_mode_confidence(h5)
Ejemplo n.º 20
0
def getInfo(files):
    data = []
    build_str = ''
    with open(sys.argv[1], 'r') as f:
        contents = f.read()
        c = contents.split()
    f.close()
    print("creating csv with following fields:" + contents)
    for i in c:
        build_str = build_str + i + ','
    build_str = build_str[:-1]
    build_str = build_str + '\n'
    for fil in files:
        curFile = getters.open_h5_file_read(fil)
        d2 = {}
        get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)}
        tid = fil.split('/')[-1].split('.')[0]
        # print(c)
        for i in c:
            if i in get_table: 
               d2[i] = get_table[i]
               d2[i] = str(d2[i]).replace('\n','')  
               build_str = build_str + d2[i] + ','
            else:
                print('error: unspecified field')
                exit(0)
        build_str = build_str[:-1]
        # print(build_str[:-1])
        build_str = build_str + '\n'
        curFile.close()
    build_str = build_str.replace('b','').replace("'",'').replace('"','')  
    return (build_str)