Esempio n. 1
0
def feature_allframes(input_features, frame_indexer = None):
	
	audio = input_features['audio']
	beats = input_features['beats']
	
	# Initialise the algorithms
	w = Windowing(type = 'hann')
	loudness = Loudness()
	
	if frame_indexer is None:
		frame_indexer = range(1,len(beats) - 1) # Exclude first frame, because it has no predecessor to calculate difference with
		
	# 1 loudness value by default
	loudness_values = np.zeros((len(beats), 1))
	# 1 difference value between loudness value cur and cur-1
	# 1 difference value between loudness value cur and cur-4
	# 1 difference value between differences above
	loudness_differences = np.zeros((len(beats), 9))
	
	# Step 1: Calculate framewise for all output frames
	# Calculate this for all frames where this frame, or its successor, is in the frame_indexer
	for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i+1 in frame_indexer) 
		or (i-1 in frame_indexer) or (i-2 in frame_indexer) or (i-3 in frame_indexer) or (i-4 in frame_indexer)
		or (i-5 in frame_indexer) or (i-6 in frame_indexer) or (i-7 in frame_indexer) or (i-8 in frame_indexer)]:
		
		SAMPLE_RATE = 44100
		start_sample = int(beats[i] * SAMPLE_RATE)
		end_sample = int(beats[i+1] * SAMPLE_RATE) 
		#print start_sample, end_sample
		frame = audio[start_sample : end_sample if (start_sample - end_sample) % 2 == 0 else end_sample - 1]
		loudness_values[i] = loudness(w(frame))
		
	# Step 2: Calculate the cosine distance between the MFCC values
	for i in frame_indexer:
		loudness_differences[i][0] = (loudness_values[i] - loudness_values[i-1])
		loudness_differences[i][1] = (loudness_values[i+1] - loudness_values[i])
		loudness_differences[i][2] = (loudness_values[i+2] - loudness_values[i])
		loudness_differences[i][3] = (loudness_values[i+3] - loudness_values[i])
		loudness_differences[i][4] = (loudness_values[i+4] - loudness_values[i])
		loudness_differences[i][5] = (loudness_values[i+5] - loudness_values[i])
		loudness_differences[i][6] = (loudness_values[i+6] - loudness_values[i])
		loudness_differences[i][7] = (loudness_values[i+7] - loudness_values[i])
		loudness_differences[i][8] = (loudness_values[i-1] - loudness_values[i+1])
		
	# Include the raw values as absolute features
	result = loudness_differences[frame_indexer]
	
	#~ print np.shape(result), np.shape(loudness_values), np.shape(loudness_differences)
	return preprocessing.scale(result)
Esempio n. 2
0
def lowLevel(songName):
    global dataset
    global lock
    print songName
    #REMOVE ; AND , FROM SONGNAMES
    key = re.sub(r',', "", songName.split('/')[-1])
    key = re.sub(r';', "", key)
    #DONT HAVE TO EXTRACT IF IT IS ALREADY EXTRACTED
    if key in dataset.keys():
        feature = dataset[key]
        return feature
    else:
        loader = MonoLoader(filename=songName)
        audio = loader()
        extractor = LowLevelSpectralEqloudExtractor()
        feature = list(extractor(audio))
        del feature[1]
        del feature[1]
        extractor = LowLevelSpectralExtractor()
        featureTwo = list(extractor(audio))
        del featureTwo[0]
        del featureTwo[-2]
        featureTwo[4] = feature[4][1]
        feature.extend(featureTwo)
        extractor = Loudness()
        feature.append(extractor(audio))
        extractor = LogAttackTime()
        feature.append(extractor(audio)[0])
        extractor = KeyExtractor()
        feature.append(extractor(audio)[2])
        extractor = RhythmExtractor2013()
        data = extractor(audio)
        feature.append(data[0])
        feature.append(data[2])
        for x in range(len(feature)):
            if type(feature[x]) is np.ndarray:
                #feature[x] = avg(feature[x])
                mean, std = stdDev(feature[x])
                feature[x] = mean
                feature.append(std)
        arr = key + "," + str(feature)[1:-1] + "\n"
        f = open('data.csv', 'a')
        lock.acquire()
        f.write(arr)
        lock.release()
        f.close()
        return feature
Esempio n. 3
0
File: main.py Progetto: SwarajKR/MIR
def lowLevel(songName):
    global dataset
    global lock
    print songName
    key = re.sub(r',', "", songName.split('/')[-1])
    #IF already present in dataset dont extract
    if dataset.has_key(key):
        feature = dataset[key]
        return feature
    else:
        #Loading song and using Extractors
        loader = MonoLoader(filename=songName)
        audio = loader()
        extractor = LowLevelSpectralEqloudExtractor()
        feature = list(extractor(audio))
        del feature[1]
        del feature[1]
        extractor = LowLevelSpectralExtractor()
        featureTwo = list(extractor(audio))
        del featureTwo[0]
        del featureTwo[-2]
        featureTwo[4] = feature[4][1]
        feature.extend(featureTwo)
        extractor = Loudness()
        feature.append(extractor(audio))
        extractor = LogAttackTime()
        feature.append(extractor(audio)[0])
        extractor = KeyExtractor()
        feature.append(extractor(audio)[2])
        extractor = RhythmExtractor2013()
        data = extractor(audio)
        feature.append(data[0])
        feature.append(data[2])
        for x in range(len(feature)):
            if type(feature[x]) is np.ndarray:
                feature[x] = avg(feature[x])
        arr = key + "," + str(feature)[1:-1] + "\n"
        f = open('data.csv', 'a')
        lock.acquire()
        f.write(arr)
        lock.release()
        f.close()
        return feature
Esempio n. 4
0
def shared_main(source, dest, display_result):
    source_audio = _loader(source)
    destination_audio = _loader(dest)

    source_frame = FrameGenerator(source_audio, frameSize=2048, hopSize=512)
    destination_frame = FrameGenerator(destination_audio,
                                       frameSize=2048,
                                       hopSize=512)

    window = Windowing(type='hann')  # window function
    spectrum = Spectrum()  # spectrum function
    pitch_yin_fft = PitchYinFFT()  # pitch extractor
    pitch_saliennce = PitchSalience()
    loudness = Loudness()

    # draw_plot(source_frame, window, spectrum, pitch_yin_fft)
    min_cost, match_result = compare(source_frame, destination_frame, window, \
                                  spectrum, pitch_yin_fft, 5, 1, 1, display_result, loudness)

    return min_cost, match_result
Esempio n. 5
0
def feature_allframes(audio, beats, frame_indexer = None):
	
	# Initialise the algorithms
	w = Windowing(type = 'hann')
	loudness = Loudness()
	
	if frame_indexer is None:
		frame_indexer = range(1,len(beats) - 1) # Exclude first frame, because it has no predecessor to calculate difference with
		
	# 1 loudness value by default
	loudness_values = np.zeros((len(beats), 1))
	# 1 difference value between loudness value cur and cur-1
	# 1 difference value between loudness value cur and cur-4
	# 1 difference value between differences above
	loudness_feature_vector = np.zeros((len(beats), 4))
	
	# Step 1: Calculate framewise for all output frames
	# Calculate this for all frames where this frame, or its successor, is in the frame_indexer
	for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i-1 in frame_indexer) or (i-2 in frame_indexer) or (i-3 in frame_indexer)]:
		
		SAMPLE_RATE = 44100
		start_sample = int(beats[i] * SAMPLE_RATE)
		end_sample = int(beats[i+1] * SAMPLE_RATE) 
		#print start_sample, end_sample
		frame = audio[start_sample : end_sample if (start_sample - end_sample) % 2 == 0 else end_sample - 1]
		loudness_values[i] = loudness(w(frame))
		
	loudness_values = preprocessing.scale(loudness_values)
	# Step 2: construct feature vector
	for i in frame_indexer:
		loudness_feature_vector[i] = np.reshape(loudness_values[i:i+4], (4,))
		
	# Include the raw values as absolute features
	result = loudness_feature_vector[frame_indexer]
	
	#~ print np.shape(result), np.shape(loudness_values), np.shape(loudness_differences)
	return result