def feature_allframes(input_features, frame_indexer = None): audio = input_features['audio'] beats = input_features['beats'] # Initialise the algorithms w = Windowing(type = 'hann') loudness = Loudness() if frame_indexer is None: frame_indexer = range(1,len(beats) - 1) # Exclude first frame, because it has no predecessor to calculate difference with # 1 loudness value by default loudness_values = np.zeros((len(beats), 1)) # 1 difference value between loudness value cur and cur-1 # 1 difference value between loudness value cur and cur-4 # 1 difference value between differences above loudness_differences = np.zeros((len(beats), 9)) # Step 1: Calculate framewise for all output frames # Calculate this for all frames where this frame, or its successor, is in the frame_indexer for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i+1 in frame_indexer) or (i-1 in frame_indexer) or (i-2 in frame_indexer) or (i-3 in frame_indexer) or (i-4 in frame_indexer) or (i-5 in frame_indexer) or (i-6 in frame_indexer) or (i-7 in frame_indexer) or (i-8 in frame_indexer)]: SAMPLE_RATE = 44100 start_sample = int(beats[i] * SAMPLE_RATE) end_sample = int(beats[i+1] * SAMPLE_RATE) #print start_sample, end_sample frame = audio[start_sample : end_sample if (start_sample - end_sample) % 2 == 0 else end_sample - 1] loudness_values[i] = loudness(w(frame)) # Step 2: Calculate the cosine distance between the MFCC values for i in frame_indexer: loudness_differences[i][0] = (loudness_values[i] - loudness_values[i-1]) loudness_differences[i][1] = (loudness_values[i+1] - loudness_values[i]) loudness_differences[i][2] = (loudness_values[i+2] - loudness_values[i]) loudness_differences[i][3] = (loudness_values[i+3] - loudness_values[i]) loudness_differences[i][4] = (loudness_values[i+4] - loudness_values[i]) loudness_differences[i][5] = (loudness_values[i+5] - loudness_values[i]) loudness_differences[i][6] = (loudness_values[i+6] - loudness_values[i]) loudness_differences[i][7] = (loudness_values[i+7] - loudness_values[i]) loudness_differences[i][8] = (loudness_values[i-1] - loudness_values[i+1]) # Include the raw values as absolute features result = loudness_differences[frame_indexer] #~ print np.shape(result), np.shape(loudness_values), np.shape(loudness_differences) return preprocessing.scale(result)
def lowLevel(songName): global dataset global lock print songName #REMOVE ; AND , FROM SONGNAMES key = re.sub(r',', "", songName.split('/')[-1]) key = re.sub(r';', "", key) #DONT HAVE TO EXTRACT IF IT IS ALREADY EXTRACTED if key in dataset.keys(): feature = dataset[key] return feature else: loader = MonoLoader(filename=songName) audio = loader() extractor = LowLevelSpectralEqloudExtractor() feature = list(extractor(audio)) del feature[1] del feature[1] extractor = LowLevelSpectralExtractor() featureTwo = list(extractor(audio)) del featureTwo[0] del featureTwo[-2] featureTwo[4] = feature[4][1] feature.extend(featureTwo) extractor = Loudness() feature.append(extractor(audio)) extractor = LogAttackTime() feature.append(extractor(audio)[0]) extractor = KeyExtractor() feature.append(extractor(audio)[2]) extractor = RhythmExtractor2013() data = extractor(audio) feature.append(data[0]) feature.append(data[2]) for x in range(len(feature)): if type(feature[x]) is np.ndarray: #feature[x] = avg(feature[x]) mean, std = stdDev(feature[x]) feature[x] = mean feature.append(std) arr = key + "," + str(feature)[1:-1] + "\n" f = open('data.csv', 'a') lock.acquire() f.write(arr) lock.release() f.close() return feature
def lowLevel(songName): global dataset global lock print songName key = re.sub(r',', "", songName.split('/')[-1]) #IF already present in dataset dont extract if dataset.has_key(key): feature = dataset[key] return feature else: #Loading song and using Extractors loader = MonoLoader(filename=songName) audio = loader() extractor = LowLevelSpectralEqloudExtractor() feature = list(extractor(audio)) del feature[1] del feature[1] extractor = LowLevelSpectralExtractor() featureTwo = list(extractor(audio)) del featureTwo[0] del featureTwo[-2] featureTwo[4] = feature[4][1] feature.extend(featureTwo) extractor = Loudness() feature.append(extractor(audio)) extractor = LogAttackTime() feature.append(extractor(audio)[0]) extractor = KeyExtractor() feature.append(extractor(audio)[2]) extractor = RhythmExtractor2013() data = extractor(audio) feature.append(data[0]) feature.append(data[2]) for x in range(len(feature)): if type(feature[x]) is np.ndarray: feature[x] = avg(feature[x]) arr = key + "," + str(feature)[1:-1] + "\n" f = open('data.csv', 'a') lock.acquire() f.write(arr) lock.release() f.close() return feature
def shared_main(source, dest, display_result): source_audio = _loader(source) destination_audio = _loader(dest) source_frame = FrameGenerator(source_audio, frameSize=2048, hopSize=512) destination_frame = FrameGenerator(destination_audio, frameSize=2048, hopSize=512) window = Windowing(type='hann') # window function spectrum = Spectrum() # spectrum function pitch_yin_fft = PitchYinFFT() # pitch extractor pitch_saliennce = PitchSalience() loudness = Loudness() # draw_plot(source_frame, window, spectrum, pitch_yin_fft) min_cost, match_result = compare(source_frame, destination_frame, window, \ spectrum, pitch_yin_fft, 5, 1, 1, display_result, loudness) return min_cost, match_result
def feature_allframes(audio, beats, frame_indexer = None): # Initialise the algorithms w = Windowing(type = 'hann') loudness = Loudness() if frame_indexer is None: frame_indexer = range(1,len(beats) - 1) # Exclude first frame, because it has no predecessor to calculate difference with # 1 loudness value by default loudness_values = np.zeros((len(beats), 1)) # 1 difference value between loudness value cur and cur-1 # 1 difference value between loudness value cur and cur-4 # 1 difference value between differences above loudness_feature_vector = np.zeros((len(beats), 4)) # Step 1: Calculate framewise for all output frames # Calculate this for all frames where this frame, or its successor, is in the frame_indexer for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i-1 in frame_indexer) or (i-2 in frame_indexer) or (i-3 in frame_indexer)]: SAMPLE_RATE = 44100 start_sample = int(beats[i] * SAMPLE_RATE) end_sample = int(beats[i+1] * SAMPLE_RATE) #print start_sample, end_sample frame = audio[start_sample : end_sample if (start_sample - end_sample) % 2 == 0 else end_sample - 1] loudness_values[i] = loudness(w(frame)) loudness_values = preprocessing.scale(loudness_values) # Step 2: construct feature vector for i in frame_indexer: loudness_feature_vector[i] = np.reshape(loudness_values[i:i+4], (4,)) # Include the raw values as absolute features result = loudness_feature_vector[frame_indexer] #~ print np.shape(result), np.shape(loudness_values), np.shape(loudness_differences) return result