def getSimilarSongSegments(audiofile, feature_type = 'rh'): samplerate, samplewidth, wavedata = audiofile_read(audiofile) # here we set the feature type for the analysis, but we need to put it in a list [.] fext = [feature_type] segment_features = rp_extract(wavedata, samplerate, extract_rp = ('rp' in fext), # extract Rhythm Patterns features extract_ssd = ('ssd' in fext), # extract Statistical Spectrum Descriptor extract_sh = ('sh' in fext), # extract Statistical Histograms extract_tssd = ('tssd' in fext), # extract temporal Statistical Spectrum Descriptor extract_rh = ('rh' in fext), # extract Rhythm Histogram features extract_trh = ('trh' in fext), # extract temporal Rhythm Histogram features extract_mvd = ('mvd' in fext), # extract Modulation Frequency Variance Descriptor skip_leadin_fadeout=0, step_width=1, return_segment_features = True) search_features = segment_features[feature_type] # IMPORTANT: we use query_features as a GLOBAL VARIABLE here # that means getSimilarSongs MUST ALWAYS BE CALLED BEFORE THIS FUNCTION query_feature_vector = query_features[feature_type] # Searching similar song segments sim_song_search = NearestNeighbors(n_neighbors = 1, metric='euclidean') # TODO proper Scaling (Normalization) (see above) sim_song_search.fit(search_features) # Get the most similar song SEGMENTS indices most_similar_segment_index = sim_song_search.kneighbors(query_feature_vector, return_distance=False) # here you get the segment's sample position # segment_features['segpos'] # and time positions in seconds # segment_features['timepos'] # print most_similar_segment_index # print segment_features['segpos'][most_similar_segment_index] most_similar_timestamp = segment_features['timepos'][most_similar_segment_index] # print most_similar_timestamp # quit the [[[]]] (we got a nested array for some reason) most_similar_timestamp = most_similar_timestamp[0][0] # print most_similar_timestamp # return tuple (start_pos, end_pos) return (most_similar_timestamp[0],most_similar_timestamp[1])
def get_feature_timbre_ssd(wav_dir): audiofile = wav_dir samplerate, samplewidth, wavedata = audiofile_read(audiofile) nsamples = wavedata.shape[0] nchannels = wavedata.shape[1] print "Successfully read audio file:", audiofile print samplerate, "Hz,", samplewidth * 8, "bit,", nchannels, "channel(s),", nsamples, "samples" features = rp_extract(wavedata, # the two-channel wave-data of the audio-file samplerate=11025, # the samplerate of the audio-file extract_ssd=True, # <== extract this feature! transform_db=True, # apply psycho-accoustic transformation transform_phon=True, # apply psycho-accoustic transformation transform_sone=True, # apply psycho-accoustic transformation fluctuation_strength_weighting=True, # apply psycho-accoustic transformation skip_leadin_fadeout=1, # skip lead-in/fade-out. value = number of segments skipped step_width=1) # # plotssd(features['ssd']) print len(features['ssd']) return features
def extract_all_files(filelist, path, out_file=None, feature_types=['rp', 'ssd', 'rh'], label=False, append=False, no_extension_check=False, force_resampling=None, out_HDF5=False, log_AudioTypes=True, log_Errors=True, verbose=True): """ finds all files of a certain type (e.g. .wav and/or .mp3) in a path and all sub-directories in it extracts selected RP feature types and saves them into separate CSV feature files (one per feature type) # filelist: list of files for features to be extracted # path: absolute path that will be added at beginning of filelist (can be '') # out_file: output file name stub for feature files to write (if omitted, features will be returned from function) # feature_types: RP feature types to extract. see rp_extract.py # label: use subdirectory name as class label # no_extension_check: does not check file format via extension. means that decoder is called on ALL files. # force_resampling: force a target sampling rate (provided in Hz) when decoding (works with FFMPEG only!) # out_HDF5: whether to store as HDF5 file format (otherwise CSV) """ ext = feature_types n = 0 # counting the files being processed n_extracted = 0 # counting the files that were actually analyzed err = 0 # counting errors n_files = len(filelist) # initialize filelist_extracted and dict containing all accumulated feature arrays filelist_extracted = [] feat_array = {} audio_logwriter = None error_logwriter = None audio_logwriter_wrote_header = False start_time = time.time() if out_file: # only if out_file is specified if log_AudioTypes: log_filename = out_file + '.audiotypes.log' audio_logfile = open(log_filename, 'w') # TODO allow append mode 'a' audio_logwriter = unicsv.UnicodeCSVWriter( audio_logfile) #, quoting=csv.QUOTE_ALL) if log_Errors: err_log_filename = out_file + '.errors.log' error_logfile = open(err_log_filename, 'w') # TODO allow append mode 'a' error_logwriter = unicsv.UnicodeCSVWriter( error_logfile) #, quoting=csv.QUOTE_ALL) if out_HDF5: FeatureWriter = HDF5FeatureWriter() else: FeatureWriter = CSVFeatureWriter() FeatureWriter.open(out_file, ext, append=append) for fil in filelist: # iterate over all files try: if n > 0: elaps_time = time.time() - start_time remain_time = elaps_time * n_files / n - elaps_time # n is the number of files done here else: remain_time = None n += 1 if path: filename = path + os.sep + fil else: filename = fil if verbose: print '#', n, '/', n_files, '(ETA: ' + timestr( remain_time) + "):", filename # read audio file (wav or mp3) samplerate, samplewidth, data, decoder = audiofile_read( filename, verbose=verbose, include_decoder=True, no_extension_check=no_extension_check, force_resampling=force_resampling) # audio file info if verbose: print samplerate, "Hz,", data.shape[ 1], "channel(s),", data.shape[0], "samples" # extract features # Note: the True/False flags are determined by checking if a feature is listed in 'ext' (see settings above) feat = rp.rp_extract( data, samplerate, extract_rp=('rp' in ext), # extract Rhythm Patterns features extract_ssd=( 'ssd' in ext), # extract Statistical Spectrum Descriptor extract_tssd=( 'tssd' in ext ), # extract temporal Statistical Spectrum Descriptor extract_rh=('rh' in ext), # extract Rhythm Histogram features extract_trh=( 'trh' in ext), # extract temporal Rhythm Histogram features extract_mvd=( 'mvd' in ext ), # extract Modulation Frequency Variance Descriptor spectral_masking=True, transform_db=True, transform_phon=True, transform_sone=True, fluctuation_strength_weighting=True, skip_leadin_fadeout=1, step_width=1, verbose=verbose) # TODO check if ext and feat.keys are consistent # WHAT TO USE AS ID (based on filename): 3 choices: id = fil # rel. filename as from find_files # id = filename # full filename incl. full path # id = filename[len(path)+1:] # relative filename only (extracted from path) if out_file: # WRITE each feature set to a CSV or HDF5 file id2 = None if label: id2 = id.replace("\\", "/").split("/")[-2].strip() if out_HDF5 and n_extracted == 0: # for HDF5 we need to know the vector dimension # thats why we cannot open the file earlier FeatureWriter.open( out_file, ext, feat, append=append ) # append not working for now but possibly in future FeatureWriter.write_features(id, feat, id2) else: # IN MEMORY: add the extracted features for 1 file to the array dict accumulating all files # TODO: only if we don't have out_file? maybe we want this as a general option if feat_array == {}: # for first file, initialize empty array with dimension of the feature set for e in feat.keys(): feat_array[e] = np.empty((0, feat[e].shape[0])) # store features in array for e in feat.keys(): feat_array[e] = np.append( feat_array[e], feat[e].reshape(1, -1), axis=0 ) # 1 for horizontal vector, -1 means take original dimension filelist_extracted.append(id) n_extracted += 1 # write list of analyzed audio files alongsize audio metadata (kHz, bit, etc.) if audio_logwriter: if not audio_logwriter_wrote_header: # write CSV header log_info = [ "filename", "decoder", "samplerate (kHz)", "samplewidth (bit)", "n channels", "n samples" ] audio_logwriter.writerow(log_info) audio_logwriter_wrote_header = True log_info = [ filename, decoder, samplerate, samplewidth * 8, data.shape[1], data.shape[0] ] audio_logwriter.writerow(log_info) gc.collect( ) # after every file we do garbage collection, otherwise our memory is used up quickly for some reason except Exception as e: print "ERROR analysing file: " + fil + ": " + str(e) err += 1 if error_logwriter: error_logwriter.writerow([fil, str(e)]) try: if out_file: # close all output files FeatureWriter.close() if audio_logwriter: audio_logfile.close() if error_logwriter: error_logfile.close() except Exception as e: print "ERROR closing the output or log files: " + str(e) end_time = time.time() if verbose: print "FEATURE EXTRACTION FINISHED.", n, "file(s) processed,", n_extracted, "successful. Duration:", timestr( end_time - start_time) if err > 0: print err, "file(s) had ERRORs during feature extraction.", if log_Errors: print "See", err_log_filename else: print if out_file: opt_ext = '.h5' if out_HDF5 else '' print "Feature file(s):", out_file + "." + str(ext) + opt_ext if out_file is None: return filelist_extracted, feat_array
def extract_all_files_in_path(path,out_file,feature_types,audiofile_types=('.wav','.mp3')): ext = feature_types # get file list of all files in a path (filtered by audiofile_types) filelist = find_files(path,audiofile_types,relative_path=True) n = 0 # counting the files that were actually analyzed err = 0 # counting errors n_files = len(filelist) start_abs = time.time() files, writer = initialize_feature_files(out_file,ext) for fil in filelist: # iterate over all files try: n += 1 filename = path + os.sep + fil print '#',n,'/',n_files,':', filename start = time.time() # read audio file (wav or mp3) samplerate, samplewidth, data = audiofile_read(filename) end = time.time() print end - start, "sec" # audio file info print samplerate, "Hz,", data.shape[1], "channels,", data.shape[0], "samples" # extract features # Note: the True/False flags are determined by checking if a feature is listed in 'ext' (see settings above) start = time.time() feat = rp.rp_extract(data, samplerate, extract_rp = ('rp' in ext), # extract Rhythm Patterns features extract_ssd = ('ssd' in ext), # extract Statistical Spectrum Descriptor extract_sh = ('sh' in ext), # extract Statistical Histograms extract_tssd = ('tssd' in ext), # extract temporal Statistical Spectrum Descriptor extract_rh = ('rh' in ext), # extract Rhythm Histogram features extract_trh = ('trh' in ext), # extract temporal Rhythm Histogram features extract_mvd = ('mvd' in ext), # extract Modulation Frequency Variance Descriptor spectral_masking=True, transform_db=True, transform_phon=True, transform_sone=True, fluctuation_strength_weighting=True, skip_leadin_fadeout=1, step_width=1) end = time.time() print "Features extracted:", feat.keys(), end - start, "sec" # WRITE each feature set to a CSV # TODO check if ext and feat.keys are consistent start = time.time() # add filename before vector. 3 choices: id = fil # filename only # id = filename # full filename incl. full path # id = filename[len(path)+1:] # relative filename only write_feature_files(id,feat,writer) end = time.time() print "Data written." #, end-start except: print "ERROR analysing file: " + fil err += 1 # close all output files close_feature_files(files,ext) end = time.time() print "FEATURE EXTRACTION FINISHED.", n, "files,", end-start_abs, "sec" if err > 0: print err, "files had ERRORs during feature extraction."
if len(sys.argv) > 1: if sys.argv[1] == '-test': # RUN DOCSTRING SELF TEST print("Doing self test. If nothing is printed, it is ok.") import doctest doctest.run_docstring_examples(rp_extract, globals()) #, verbose=True) exit() # Note: no output means that everything went fine else: audiofile = sys.argv[1] else: audiofile = "music/BoxCat_Games_-_10_-_Epic_Song.mp3" # Read audio file and extract features try: samplerate, samplewidth, wavedata = audiofile_read(audiofile) np.set_printoptions(suppress=True) bark_bands = 24 # choose the number of Bark bands (2..24) mod_ampl_limit = 60 # number modulation frequencies on x-axis feat = rp_extract(wavedata, samplerate, extract_rp=True, extract_ssd=True, extract_tssd=False, extract_rh=True, n_bark_bands=bark_bands, spectral_masking=True, transform_db=True,
# process file given on command line or default song (included) if len(sys.argv) > 1: if sys.argv[1] == '-test': # RUN DOCSTRING SELF TEST print "Doing self test. If nothing is printed, it is ok." import doctest doctest.run_docstring_examples(rp_extract, globals()) #, verbose=True) exit() # Note: no output means that everything went fine else: audiofile = sys.argv[1] else: audiofile = "music/BoxCat_Games_-_10_-_Epic_Song.mp3" # Read audio file and extract features try: samplerate, samplewidth, wavedata = audiofile_read(audiofile) np.set_printoptions(suppress=True) bark_bands = 24 # choose the number of Bark bands (2..24) mod_ampl_limit = 60 # number modulation frequencies on x-axis feat = rp_extract(wavedata, samplerate, extract_rp=True, extract_ssd=True, extract_tssd=False, extract_rh=True, n_bark_bands=bark_bands, spectral_masking=True, transform_db=True,
def extract_all_files(filelist, path, out_file=None, feature_types=["rp", "ssd", "rh"], verbose=True): """ finds all files of a certain type (e.g. .wav and/or .mp3) in a path and all sub-directories in it extracts selected RP feature types and saves them into separate CSV feature files (one per feature type) # filelist: list of files for features to be extracted # path: absolute path that will be added at beginning of filelist (can be '') # out_file: output file name stub for feature files to write (if omitted, features will be returned from function) # feature_types: RP feature types to extract. see rp_extract.py # audiofile_types: a string or tuple of suffixes to look for file extensions to consider (include the .) """ ext = feature_types n = 0 # counting the files that were actually analyzed err = 0 # counting errors n_files = len(filelist) # initialize filelist_extracted and dict containing all accumulated feature arrays filelist_extracted = [] feat_array = {} start_abs = time.time() if out_file: # only if out_file is specified files, writer = initialize_feature_files(out_file, ext) for fil in filelist: # iterate over all files try: n += 1 if path: filename = path + os.sep + fil else: filename = fil # if verbose: print "#", n, "/", n_files, ":", filename # read audio file (wav or mp3) samplerate, samplewidth, data = audiofile_read(filename) # audio file info if verbose: print samplerate, "Hz,", data.shape[1], "channel(s),", data.shape[0], "samples" # extract features # Note: the True/False flags are determined by checking if a feature is listed in 'ext' (see settings above) feat = rp.rp_extract( data, samplerate, extract_rp=("rp" in ext), # extract Rhythm Patterns features extract_ssd=("ssd" in ext), # extract Statistical Spectrum Descriptor extract_sh=("sh" in ext), # extract Statistical Histograms extract_tssd=("tssd" in ext), # extract temporal Statistical Spectrum Descriptor extract_rh=("rh" in ext), # extract Rhythm Histogram features extract_trh=("trh" in ext), # extract temporal Rhythm Histogram features extract_mvd=("mvd" in ext), # extract Modulation Frequency Variance Descriptor spectral_masking=True, transform_db=True, transform_phon=True, transform_sone=True, fluctuation_strength_weighting=True, skip_leadin_fadeout=1, step_width=1, verbose=verbose, ) # TODO check if ext and feat.keys are consistent # WHAT TO USE AS ID (based on filename): 3 choices: id = fil # rel. filename as from find_files # id = filename # full filename incl. full path # id = filename[len(path)+1:] # relative filename only (extracted from path) if out_file: # WRITE each feature set to a CSV write_feature_files(id, feat, writer) else: # IN MEMORY: add the extracted features for 1 file to the array dict accumulating all files # TODO: only if we dont have out_file? maybe we want this as a general option if feat_array == {}: # for first file, initialize empty array with dimension of the feature set for e in feat.keys(): feat_array[e] = np.empty((0, feat[e].shape[0])) # store features in array for e in feat.keys(): feat_array[e] = np.append( feat_array[e], feat[e].reshape(1, -1), axis=0 ) # 1 for horizontal vector, -1 means take original dimension filelist_extracted.append(id) except Exception as e: print "ERROR analysing file: " + fil + ": " + str(e) err += 1 if out_file: # close all output files close_feature_files(files, ext) end = time.time() if verbose: print "FEATURE EXTRACTION FINISHED. %d file(s), %.2f sec" % (n, end - start_abs) if err > 0: print err, "files had ERRORs during feature extraction." if out_file: print "Feature file(s):", out_file + ".*", ext if out_file is None: return filelist_extracted, feat_array
def extract_all_files(filelist, path, out_file=None, feature_types=['rp', 'ssd', 'rh'], verbose=True): """ finds all files of a certain type (e.g. .wav and/or .mp3) in a path and all sub-directories in it extracts selected RP feature types and saves them into separate CSV feature files (one per feature type) # filelist: list of files for features to be extracted # path: absolute path that will be added at beginning of filelist (can be '') # out_file: output file name stub for feature files to write (if omitted, features will be returned from function) # feature_types: RP feature types to extract. see rp_extract.py # audiofile_types: a string or tuple of suffixes to look for file extensions to consider (include the .) """ ext = feature_types n = 0 # counting the files that were actually analyzed err = 0 # counting errors n_files = len(filelist) # initialize filelist_extracted and dict containing all accumulated feature arrays filelist_extracted = [] feat_array = {} start_abs = time.time() if out_file: # only if out_file is specified files, writer = initialize_feature_files(out_file, ext) for fil in filelist: # iterate over all files try: n += 1 if path: filename = path + os.sep + fil else: filename = fil #if verbose: print '#', n, '/', n_files, ':', filename # read audio file (wav or mp3) samplerate, samplewidth, data = audiofile_read(filename) # audio file info if verbose: print samplerate, "Hz,", data.shape[ 1], "channel(s),", data.shape[0], "samples" # extract features # Note: the True/False flags are determined by checking if a feature is listed in 'ext' (see settings above) feat = rp.rp_extract( data, samplerate, extract_rp=('rp' in ext), # extract Rhythm Patterns features extract_ssd=( 'ssd' in ext), # extract Statistical Spectrum Descriptor extract_sh=('sh' in ext), # extract Statistical Histograms extract_tssd=( 'tssd' in ext ), # extract temporal Statistical Spectrum Descriptor extract_rh=('rh' in ext), # extract Rhythm Histogram features extract_trh=( 'trh' in ext), # extract temporal Rhythm Histogram features extract_mvd=( 'mvd' in ext ), # extract Modulation Frequency Variance Descriptor spectral_masking=True, transform_db=True, transform_phon=True, transform_sone=True, fluctuation_strength_weighting=True, skip_leadin_fadeout=1, step_width=1, verbose=verbose) # TODO check if ext and feat.keys are consistent # WHAT TO USE AS ID (based on filename): 3 choices: id = fil # rel. filename as from find_files # id = filename # full filename incl. full path # id = filename[len(path)+1:] # relative filename only (extracted from path) if out_file: # WRITE each feature set to a CSV write_feature_files(id, feat, writer) else: # IN MEMORY: add the extracted features for 1 file to the array dict accumulating all files # TODO: only if we dont have out_file? maybe we want this as a general option if feat_array == {}: # for first file, initialize empty array with dimension of the feature set for e in feat.keys(): feat_array[e] = np.empty((0, feat[e].shape[0])) # store features in array for e in feat.keys(): feat_array[e] = np.append( feat_array[e], feat[e].reshape(1, -1), axis=0 ) # 1 for horizontal vector, -1 means take original dimension filelist_extracted.append(id) except Exception as e: print "ERROR analysing file: " + fil + ": " + str(e) err += 1 if out_file: # close all output files close_feature_files(files, ext) end = time.time() if verbose: print "FEATURE EXTRACTION FINISHED. %d file(s), %.2f sec" % (n, end - start_abs) if err > 0: print err, "files had ERRORs during feature extraction." if out_file: print "Feature file(s):", out_file + ".*", ext if out_file is None: return filelist_extracted, feat_array
def getSimilarSongs(audiofile, reference_db_filenames, reference_db_features, feature_type = 'rh'): global query_features # GLOBAL VAR because it is reused in getSimilarSongSegments (see IMPORTANT note above) # Read Wav File samplerate, samplewidth, wavedata = audiofile_read(audiofile) # Analyze New Audio # here we set the feature type for the analysis, but we need to put it in a list [.] fext = [feature_type] query_features = rp_extract(wavedata, samplerate, extract_rp = ('rp' in fext), # extract Rhythm Patterns features extract_ssd = ('ssd' in fext), # extract Statistical Spectrum Descriptor extract_sh = ('sh' in fext), # extract Statistical Histograms extract_tssd = ('tssd' in fext), # extract temporal Statistical Spectrum Descriptor extract_rh = ('rh' in fext), # extract Rhythm Histogram features extract_trh = ('trh' in fext), # extract temporal Rhythm Histogram features extract_mvd = ('mvd' in fext), # extract Modulation Frequency Variance Descriptor skip_leadin_fadeout=0, step_width=1) # from the dict returned by rp_extract, we take only the one feature_type we want query_feature_vector = query_features[feature_type] # reference_db_features should be a dict containing several feature types # we take the feature type given as parameter reference_feature_vectors = reference_db_features[feature_type] # Search for similar songs in pre-analyzed song dataset (based on song's averaged features) # initialize the similarity search object sim_song_search = NearestNeighbors(n_neighbors = 6, metric='euclidean') # TODO proper Scaling (Normalization) would need to add the live extracted song to the db_features # Normalize the extracted features # scaled_feature_space = StandardScaler().fit_transform(reference_feature_vectors) # Fit the Nearest-Neighbor search object to the scaled features # sim_song_search.fit(scaled_feature_space) sim_song_search.fit(reference_feature_vectors) # Get the most similar songs (distances, similar_song_ids) = sim_song_search.kneighbors(query_feature_vector, return_distance=True) # if the query is contained in the db feature set (usually if normalisation is used) we need to take it away from the result #similar_songs = similar_songs[1:] #print similar_song_ids most_similar_songs = reference_db_filenames[feature_type][similar_song_ids] # most_similar_songs is a 2D np.array, make it 1D most_similar_songs = most_similar_songs[0] # transform from np.array to simple list most_similar_songs = most_similar_songs.tolist() return(most_similar_songs)