コード例 #1
0
import numpy as np
import sys
sys.path.append('../')
from datagrabber import extractAndSave, extractAndSaveYoutubeData

IEMOCAP_LOCATION = "../../../../local"
YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds"


def amplitude(frame, filename):
    '''
    Returns the maxmimum, average and variance amplitude of a frame
    '''
    return [np.amax(frame), np.average(frame), np.var(frame)]


# Extract amplitude from IEMOCAP and YouTube datasets
extractAndSave(amplitude, ["max", "mean", "var"], IEMOCAP_LOCATION, 2, True,
               True)
extractAndSaveYoutubeData(amplitude, ["max", "mean", "var"], YOUTUBE_LOCATION,
                          2, True, True)
コード例 #2
0
ファイル: f0.py プロジェクト: hmajid2301/EmotionCommotion
    greatest_amp = (spectrum.peaks()[0])[0]
    #a counter will be used to iterate over the positions of the list
    list_pos = 0
    #the pairs with amplitudes which are within the threshold will be copied to a new list
    selected_pairs = []
    while True:
        #select the next pair from the list
        pair = (spectrum.peaks()[list_pos])
        #test whether the pair's amplitude is within the threshold
        within_threshold = (pair[0] > (greatest_amp * threshold))
        #if not within the threshold the loop can break as all followings pairs will have lower amplitudes
        if within_threshold == False:
            break
        #if within the threshold add the pair to the list
        selected_pairs.append(pair)
        #increment the counter for the loop
        list_pos = list_pos + 1

    # selected_pairs is now a list of the pairs which have a prominent amplitude
    sorted_by_Hz = sorted(selected_pairs, key=lambda tup: tup[1])
    # the lowest Hz should now be F0
    if len(sorted_by_Hz) == 0:
        return [0]
    else:
        return [sorted_by_Hz[0][1]]


# Extract f0 from IEMOCAP and YouTube datasets
extractAndSave(f0, ['f0'], IEMOCAP_LOCATION, 2, True, True)
extractAndSaveYoutubeData(f0, ["f0"], YOUTUBE_LOCATION, 2, True, True)
コード例 #3
0
Created on Fri Nov 11 12:18:26 2016

@author: Olly Styles
"""
import sys

sys.path.append('../')
from datagrabber import extractAndSave
from datagrabber import extractAndSave, extractAndSaveYoutubeData

IEMOCAP_LOCATION = "../../../../local"
YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds"


def zerocrossing(frame, audiofile):
    '''
    Returns the number of times the signal crosses the y-axis for a given
    frame.
    '''
    n = 0
    for i in range(0, len(frame) - 1):
        if (frame[i] > 0 != frame[i + 1] > 0):  # != is xor operator in python
            n += 1
    return [n]


# Extract energy from IEMOCAP and YouTube datasets
extractAndSave(zerocrossing, ['zerocrossing'], IEMOCAP_LOCATION, 2, True, True)
extractAndSaveYoutubeData(zerocrossing, ["zerocrossing"], YOUTUBE_LOCATION, 2,
                          True, True)
コード例 #4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 12 12:04:22 2016

@author: Tom
"""
import sys
sys.path.append('../')

from datagrabber import extractAndSave, extractAndSaveYoutubeData
import numpy as np

IEMOCAP_LOCATION = "../../../../local"
YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds"


def energy(frame, audiofile):
    '''
    Return energy of a frame, defined as the sum of squared amplitudes
    '''
    return [sum(np.apply_along_axis(lambda x: x**2, 0, frame))]


# Extract energy from IEMOCAP and YouTube datasets
extractAndSave(energy, ['energy'], IEMOCAP_LOCATION, 2, True, True)
extractAndSaveYoutubeData(energy, ["energy"], YOUTUBE_LOCATION, 2, True, True)
コード例 #5
0
    m = aub.mfcc(fftsize, 40, coefficientsCount, sampleRate)

    #first we need to convert this frame to the power spectrum using a DFT
    p = aub.pvoc(fftsize, int(frame_size))
    #in order to compute DFT the frame must be of a length which is a power of 2, so expand to fftsize using zero padding
    if len(frame) != 16000:
        frame = np.pad(frame, (0, frame_size - len(frame)),
                       'constant',
                       constant_values=0)
    #compute the power spectrum
    spec = p(frame.astype(np.float32))

    #compute the MFCC, which returns the coefficents of each of the 12 coefficents
    mfcc_out = m(spec)
    return mfcc_out


# 1. Frame the signal into short frames.
# 2. For each frame calculate the periodogram estimate of the power spectrum.
# 3. Apply the mel filterbank to the power spectra, sum the energy in each filter.
# 4. Take the logarithm of all filterbank energies.
# 5. Take the DCT of the log filterbank energies.
# 6. Keep DCT coefficients 2-13, discard the rest

#http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/#computing-the-mel-filterbank

# Extract MFCC from IEMOCAP and YouTube datasets
extractAndSave(mfcc, labels, IEMOCAP_LOCATION, 2, True, True)
extractAndSaveYoutubeData(mfcc, labels, YOUTUBE_LOCATION, 2, True, True)
コード例 #6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 12 12:04:22 2016

@author: Tom
"""
import sys
sys.path.append('../sourceFiles/')
import thinkplot as tp
import thinkdsp as td
sys.path.append('../')

from datagrabber import extractAndSave, extractAndSaveYoutubeData

IEMOCAP_LOCATION = "../../../../local"
YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds"


#make_spectrum is sorted in order of decreasing amplitude, so the first pair is going to be the amplitude of the pitch and the pitch frequency.
def pitch(frame, audiofile):
    clip = td.Wave(frame)
    spectrum = clip.make_spectrum()
    return [spectrum.peaks()[0][1]]


#extractAndSave(pitch,['pitch'],IEMOCAP_LOCATION,2,False)
extractAndSaveYoutubeData(pitch, ["pitch"], YOUTUBE_LOCATION, 2)
コード例 #7
0
sys.path.append('../')

from datagrabber import extractAndSave,extractAndSaveYoutubeData

IEMOCAP_LOCATION = "../../../../local"
YOUTUBE_LOCATION = "../../../../local/wild_dataset/10_to_20_seconds"

def cepstrum(frame, filename):
    '''
    Computes the cepstrum of the frame, which is calculated by converting 
    the power spectrum (|DCT|^2) of the audio frame to a log-scale.
    The max average and variance of the resulting audio clip are returned
    as features
    '''
    #get DCT
    audio = np.fft.fft(frame)
    #get |DCT|
    audio = abs(audio)
    #get |DCT|^2, i.e. the power spectrum
    audio = audio ** 2
    #take a log, which simulates the non-linearality of human hearing
    audio = np.log2(audio)
    #take the real of the IDFT as the feature
    audio = np.fft.ifft(audio)
    audio = audio.real
    return [np.amax(audio), np.average(audio), np.var(audio)]

# Extract cepstrum from IEMOCAP and YouTube datasets
extractAndSave(cepstrum,["max", "mean", "var"],IEMOCAP_LOCATION,2,True,True)
extractAndSaveYoutubeData(cepstrum,["max", "mean","var"],YOUTUBE_LOCATION,2,True,True)