Ejemplo n.º 1
0
def feature_extraction(audiofile):
    """
    Extract features with settings as in parameters setup
    :param audiofile: Audiofile as numpy array
    :return: matrics of features (frames by features)
    """
    features_df = parameters('mfcc')

    batch_size = features_df.iloc[0]['length']
    batch_shift = features_df.iloc[0]['shift']
    name = features_df.iloc[0]['mode']
    num_features = features_df.iloc[0]['features']

    rate = 16000
    winlen = batch_size / 16000
    winstep = batch_shift / 16000

    if name == "MFCC":

        one_feature = fs.mfcc(audiofile,
                              winlen=winlen,
                              samplerate=rate,
                              winstep=winstep,
                              numcep=num_features,
                              nfilt=64,
                              nfft=512,
                              lowfreq=0,
                              highfreq=None,
                              preemph=0.97,
                              ceplifter=22,
                              appendEnergy=False,
                              winfunc=np.hamming)

        num_rows = np.size(one_feature, 0) - 1
        num_rows = num_rows - num_rows % num_features
        one_feature = one_feature[1:num_rows + 1, :num_features]

    elif name == "MEL":

        one_feature = fs.logfbank(audiofile,
                                  winlen=winlen,
                                  samplerate=rate,
                                  winstep=winstep,
                                  nfilt=64,
                                  nfft=512,
                                  lowfreq=0,
                                  highfreq=None,
                                  preemph=0.97)

        num_rows = np.size(one_feature, 0) - 1
        num_rows = num_rows - num_rows % num_features
        one_feature = one_feature[1:num_rows + 1, :num_features]

    else:
        print('Feature-Type not found')
    features = pd.DataFrame(one_feature)

    return features, num_rows
Ejemplo n.º 2
0
    def __init__ (self, symbol, basket=False, spec=None):
        if basket==False:
            self.symbol = symbol
            csvFile = './price_data/day/'+symbol+'.csv'
            if os.path.isfile(csvFile) == False:
                get_data(self.symbol)
            self.df = pd.read_csv(csvFile)
        else:
            if spec is None:
                print "for basket security prices, spec(trade) name must be provided"
                raise 
            p = parameters(spec)
            basketParam = p.get(['security', 'weight'],basket, 'security')
            self.symbol = basket
            
            for index, item in enumerate(basketParam):
                symbol = item[0]
                weight = float(item[1])
                csvFile = './price_data/day/'+symbol+'.csv'
                if os.path.isfile(csvFile) == False:
                    print "getting symbol prices online", symbol
                    get_data(symbol)

                tempdf = pd.read_csv(csvFile)
                if index == 0:
                    self.df = pd.DataFrame(data=0, index=tempdf.index, columns=tempdf.columns)
                    self.df['Date'] = tempdf['Date']
                beginningPrice = tempdf.tail(1)['Adj Close'].values[0]
                tempdf['Adj Close'] /= beginningPrice / 100.
                self.df[['Open','High','Low','Close','Volume','Adj Close']] += tempdf[['Open','High','Low','Close','Volume','Adj Close']]*weight  # times weights
            self.df.dropna(inplace=True)  
                #Question, should I round before the sum or after?
                #self.df[['Open','High','Low','Close','Volume','Adj Close']].apply(np.round(2))
            
        self.df.sort('Date', inplace=True)
        self.df.reset_index(drop=True, inplace=True)
Ejemplo n.º 3
0
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D, Lambda, Input
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras.models import Model
from keras.callbacks import TensorBoard
from parameter import parameters
import tensorflow as tf
import numpy as np
import random

cnn_df = parameters('cnn')
kernel1 = (cnn_df.iloc[0]['kernel1'], cnn_df.iloc[0]['kernel1'])
kernel2 = (cnn_df.iloc[0]['kernel2'], cnn_df.iloc[0]['kernel2'])
number_speaker = cnn_df.iloc[0]['number_speaker']
accuracy_speaker = []
epochs = 10

# Callbacks
# Creates EarlyStoppingFunction for Training
stopper = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.03, patience=3, verbose=0, mode='auto',
                                        baseline=None)


def neural_network(x_eval, y_eval, x_train, y_train, loss, x_test, y_test, x_retest, y_retest, x_retrain, y_retrain, utterance=False):

    y_test_utt = y_test[:, 1]
Ejemplo n.º 4
0
"""
Create Database for Neural Network using Pandas
"""
import numpy as np
import pandas as pd
from parameter import parameters
from feature_extraction import feature_extraction
db_df = parameters('database')
features = db_df.iloc[0]['features']
path = db_df.iloc[0]['path_features']

number_speakers = db_df.iloc[0]['number_speakers']

def database(mode, saved, type):
    """
    Returns data and label for a given mode, from the timit database
    :param mode: define test or training mode
    :param saved: 0 for no saved file, 1 for an already existing file
    :return: for mode = training returns data matrix and label vector for training and evaluation,
             for mode = test return, data matrix and label vector for testing
    """
    test_f = []
    test_m = []

    # Paths for home and university and number features

    female_speaker_list = pd.DataFrame()
    male_speaker_list = pd.DataFrame()
    test_data = pd.DataFrame()
    training1 = pd.DataFrame()
    eval_con = pd.DataFrame()
Ejemplo n.º 5
0
from database_provider import database
from parameter import parameters
import numpy as np
from NeuralNetworkSetup import neural_network

#
# Important Run GetWAVToPythonFile before main one time for initialisation
#

# If already run code once, database is loaded(1)/saved in file(edit in parameter.py), the program will just skip creating the database and load the files
database_loaded = 0

cnn_list = parameters('cnn')
path_features = cnn_list.iloc[0]['extracted_features']

accuracy = 0

if database_loaded == 0:
    eval_data, eval_labels, train_data, train_labels = database(
        'train', database_loaded, 'gender')

    np.save(path_features.format('eval_data.npy'), eval_data)
    np.save(path_features.format('test_label.npy'), eval_labels)

    np.save(path_features.format('train_data.npy'), train_data)
    np.save(path_features.format('train_label.npy'), train_labels)

    test_data, test_labels = database('test', database_loaded, 'gender')
    np.save(path_features.format('test_data.npy'), test_data)
    np.save(path_features.format('test_label.npy'), test_labels)