コード例 #1
0
 def getFeaturesAndClasses(dirs):
     return aF.dirsWavFeatureExtraction(dirs,
                                         AudioClassifierManager.getMtWin(),
                                         AudioClassifierManager.getMtStep(),
                                         AudioClassifierManager.getStWin(),
                                         AudioClassifierManager.getStStep(),
                                         compute_beat=AudioClassifierManager.__compute_beat)
コード例 #2
0
def train(files):
    #extract feature
    features, classes, filenames = aF.dirsWavFeatureExtraction(
        files, 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep)
    #normalize
    [featuresNorm, MEAN, STD] = aT.normalizeFeatures(features)
    [X, Y] = aT.listOfFeatures2Matrix(featuresNorm)
    #train using SVM
    clf = sklearn.svm.SVC(kernel='linear', probability=True)
    clf.fit(X, Y)
    return clf, MEAN, STD
コード例 #3
0
def final(repo_path_to_test):
    ##load model
    [SVM, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat]=load_model("svm3Classes")
    ##load test,split X,y
    [features_test, classNames_test, filenames_test] = aF.dirsWavFeatureExtraction([repo_path_to_test+"/positive",repo_path_to_test+"/neutral",repo_path_to_test+"/negative"],1.0,1.0, shortTermWindow, shortTermStep,compute_beat=False)
    [x_test, y_test] = listOfFeatures2Matrix(features_test)
    SVM.predict(x_test)
    cm = confusion_matrix(y_pred=y_pred, y_true=y_test)
    f1 = f1_score(y_pred=y_pred, y_true=y_test, average='micro')
    acc = accuracy_score(y_pred=y_pred, y_true=y_test)
    plotly_classification_results(cm, ["positive", "neutral", "negative"])
    print("FINAL -----> FOR C:",C,"F1: ",f1, "ACC:",acc)
コード例 #4
0
def recognition_emotion_from_voice():
    classifier2 = CatBoostClassifier(iterations=1000,
                                     learning_rate=0.25,
                                     depth=5,
                                     loss_function='MultiClassOneVsAll',
                                     eval_metric="Accuracy")
    classifier2.load_model("stable_model")
    if len(os.listdir("data/voice/")) >= 3:
        data = At.dirsWavFeatureExtraction(["data/voice"], 1, 1, 0.05, 0.05)
        result = classifier2.predict(data[0][0])
        result = [x[0] for x in result]
        return max(result, key=result.count)
    return None
コード例 #5
0
def trainNN(listOfDirs, mtWin, mtStep, stWin, stStep, computeBEAT=False):
    #Feature Extraction
    [features, classNames,
     _] = aF.dirsWavFeatureExtraction(listOfDirs,
                                      mtWin,
                                      mtStep,
                                      stWin,
                                      stStep,
                                      computeBEAT=computeBEAT)

    if len(features) == 0:
        print "feature ERROR"
        return

    numOfFeatures = features[0].shape[1]
    featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)]
    aT.writeTrainDataToARFF(modelName, features, classNames, featureNames)
    for i, f in enumerate(features):
        if len(f) == 0:
            print "feature ERROR"
            return

    C = len(classNames)
    [featuresNorm, MEAN,
     STD] = aT.normalizeFeatures(features)  # normalize features
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = featuresNorm

    bestParam = evaluate(featuresNew,
                         classNames,
                         100,
                         numpy.array([1, 2, 3, 4, 5, 6]),
                         0,
                         perTrain=0.80)
    clf = train(featuresNew, bestParam)

    with open(modelName, 'wb') as fid:
        cPickle.dump(clf, fid)
    fo = open(modelName + "MEANS", "wb")
    cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    fo.close()
コード例 #6
0
def ff_one(repo_path, dataset):
    ## normalize each video id and then put train. Select each one from the corresponding position in dict 
    video_dict={}
    for i in range(np.array(dataset["Id"])):
        print(i)
        print([repo_path + "/audio/"+str(i) +"/positive", repo_path + "/audio/"+str(i) +"/neutral", repo_path +"/audio/"+str(i) +"/negative"])
        [features_train, classNames, filenames] = aF.dirsWavFeatureExtraction( [repo_path + "/audio/"+str(i) +"/positive", repo_path + "/audio/"+str(i) +"/neutral", repo_path +"/audio/"+str(i) +"/negative"], 1.0,
        1.0, shortTermWindow, shortTermStep, 0,compute_beat=False)
        print([features_train, classNames, filenames])
        [features_norm, MEAN, STD] = normalizeFeatures(features_train)        # normalize features
        video_dict[i] = [features_norm, MEAN, STD]


    for train, test in kfold.split(dataset["Pickle"][:25]):
        video_test = []
        video_train = []
        for t in train:
            video_train = np.concatenate((video_test,video_dict[t][0]),axis=0)
        for te in test:
            video_test = np.concatenate((video_test,video_dict[te][0]),axis=0)


        [X_train, Y_train] = listOfFeatures2Matrix(video_train)
        [x_test, y_test] = listOfFeatures2Matrix(video_test)

        print("Before OverSampling, counts of label 'positive': {}".format(sum(Y_train==1)))
        print("Before OverSampling, counts of label 'neutral': {} \n".format(sum(Y_train==0)))
        print("Before OverSampling, counts of label 'negative': {} \n".format(sum(Y_train==2)))

        sm = SMOTE(random_state=2,kind='svm')
        X_train, Y_train = sm.fit_sample(X_train, Y_train)
        print("A OverSampling, counts of label 'positive': {}".format(sum(Y_train==1)))
        print("A OverSampling, counts of label 'neutral': {} \n".format(sum(Y_train==0)))
        print("A OverSampling, counts of label 'negative': {} \n".format(sum(Y_train==2)))


        # print("X:",X_train)
        # print("Y:",Y_train)
        cm, acc, f1 = svm_train_evaluate(X_train, Y_train, x_test, y_test, k_folds, C)
コード例 #7
0
def featureAndTrainRegression(dir_name,
                              mt_win,
                              mt_step,
                              st_win,
                              st_step,
                              model_type,
                              model_name,
                              compute_beat=False):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        dir_name:        path of directory containing the WAV files and Regression CSVs
        mt_win, mt_step:        mid-term window length and step
        st_win, st_step:        short-term window and step
        model_type:        "svm" or "knn" or "randomforest"
        model_name:        name of the model to be saved
    RETURNS:
        None. Resulting regression model along with the respective model parameters are saved on files.
    '''
    # STEP A: Feature Extraction:
    [features, _,
     filenames] = aF.dirsWavFeatureExtraction([dir_name],
                                              mt_win,
                                              mt_step,
                                              st_win,
                                              st_step,
                                              compute_beat=compute_beat)
    features = features[0]
    filenames = [ntpath.basename(f) for f in filenames[0]]
    f_final = []

    # Read CSVs:
    CSVs = glob.glob(dir_name + os.sep + "*.csv")
    regression_labels = []
    regression_names = []
    f_final = []
    for c in CSVs:  # for each CSV
        cur_regression_labels = []
        f_temp = []
        with open(
                c, 'rt'
        ) as csvfile:  # open the csv file that contains the current target value's annotations
            CSVreader = csv.reader(csvfile, delimiter=',', quotechar='|')
            for row in CSVreader:
                if len(
                        row
                ) == 2:  # if the current row contains two fields (filename, target value)
                    if row[0] in filenames:  # ... and if the current filename exists in the list of filenames
                        index = filenames.index(row[0])
                        cur_regression_labels.append(float(row[1]))
                        f_temp.append(features[index, :])
                    else:
                        print("Warning: {} not found in list of files.".format(
                            row[0]))
                else:
                    print(
                        "Warning: Row with unknown format in regression file")

        f_final.append(numpy.array(f_temp))
        regression_labels.append(
            numpy.array(cur_regression_labels)
        )  # cur_regression_labels is the list of values for the current regression problem
        regression_names.append(ntpath.basename(c).replace(
            ".csv", ""))  # regression task name
        if len(features) == 0:
            print("ERROR: No data found in any input folder!")
            return

    n_feats = f_final[0].shape[1]

    # TODO: ARRF WRITE????
    # STEP B: classifier Evaluation and Parameter Selection:
    if model_type == "svm" or model_type == "svm_rbf":
        model_params = numpy.array(
            [0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0])
    elif model_type == "randomforest":
        model_params = numpy.array([5, 10, 25, 50, 100])


#    elif model_type == "knn":
#        model_params = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]);
    errors = []
    errors_base = []
    best_params = []

    for iRegression, r in enumerate(regression_names):
        # get optimal classifeir parameter:
        print("Regression task " + r)
        bestParam, error, berror = evaluateRegression(
            f_final[iRegression], regression_labels[iRegression], 100,
            model_type, model_params)
        errors.append(error)
        errors_base.append(berror)
        best_params.append(bestParam)
        print("Selected params: {0:.5f}".format(bestParam))

        [features_norm, MEAN,
         STD] = normalizeFeatures([f_final[iRegression]])  # normalize features

        # STEP C: Save the model to file
        if model_type == "svm":
            classifier, _ = trainSVMregression(features_norm[0],
                                               regression_labels[iRegression],
                                               bestParam)
        if model_type == "svm_rbf":
            classifier, _ = trainSVMregression_rbf(
                features_norm[0], regression_labels[iRegression], bestParam)
        if model_type == "randomforest":
            classifier, _ = trainRandomForestRegression(
                features_norm[0], regression_labels[iRegression], bestParam)

        if model_type == "svm" or model_type == "svm_rbf" or model_type == "randomforest":
            with open(model_name + "_" + r, 'wb') as fid:
                cPickle.dump(classifier, fid)
            fo = open(model_name + "_" + r + "MEANS", "wb")
            cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
            fo.close()
    return errors, errors_base, best_params
コード例 #8
0
def featureAndTrain(list_of_dirs,
                    mt_win,
                    mt_step,
                    st_win,
                    st_step,
                    classifier_type,
                    model_name,
                    compute_beat=False,
                    perTrain=0.90):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        list_of_dirs:        list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files.
        mt_win, mt_step:        mid-term window length and step
        st_win, st_step:        short-term window and step
        classifier_type:        "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees"
        model_name:        name of the model to be saved
    RETURNS:
        None. Resulting classifier along with the respective model parameters are saved on files.
    '''

    # STEP A: Feature Extraction:
    [features, classNames,
     _] = aF.dirsWavFeatureExtraction(list_of_dirs,
                                      mt_win,
                                      mt_step,
                                      st_win,
                                      st_step,
                                      compute_beat=compute_beat)

    if len(features) == 0:
        print("trainSVM_feature ERROR: No data found in any input folder!")
        return

    n_feats = features[0].shape[1]
    feature_names = ["features" + str(d + 1) for d in range(n_feats)]

    writeTrainDataToARFF(model_name, features, classNames, feature_names)

    for i, f in enumerate(features):
        if len(f) == 0:
            print("trainSVM_feature ERROR: " + list_of_dirs[i] +
                  " folder is empty or non-existing!")
            return

    # STEP B: classifier Evaluation and Parameter Selection:
    if classifier_type == "svm" or classifier_type == "svm_rbf":
        classifier_par = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0])
    elif classifier_type == "randomforest":
        classifier_par = numpy.array([10, 25, 50, 100, 200, 500])
    elif classifier_type == "knn":
        classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15])
    elif classifier_type == "gradientboosting":
        classifier_par = numpy.array([10, 25, 50, 100, 200, 500])
    elif classifier_type == "extratrees":
        classifier_par = numpy.array([10, 25, 50, 100, 200, 500])

    # get optimal classifeir parameter:
    features2 = []
    for f in features:
        fTemp = []
        for i in range(f.shape[0]):
            temp = f[i, :]
            if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()):
                fTemp.append(temp.tolist())
            else:
                print("NaN Found! Feature vector not used for training")
        features2.append(numpy.array(fTemp))
    features = features2

    bestParam = evaluateclassifier(features, classNames, 100, classifier_type,
                                   classifier_par, 0, perTrain)

    print("Selected params: {0:.5f}".format(bestParam))

    C = len(classNames)
    [features_norm, MEAN,
     STD] = normalizeFeatures(features)  # normalize features
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = features_norm

    # STEP C: Save the classifier to file
    if classifier_type == "svm":
        classifier = trainSVM(featuresNew, bestParam)
    elif classifier_type == "svm_rbf":
        classifier = trainSVM_RBF(featuresNew, bestParam)
    elif classifier_type == "randomforest":
        classifier = trainRandomForest(featuresNew, bestParam)
    elif classifier_type == "gradientboosting":
        classifier = trainGradientBoosting(featuresNew, bestParam)
    elif classifier_type == "extratrees":
        classifier = trainExtraTrees(featuresNew, bestParam)

    if classifier_type == "knn":
        [X, Y] = listOfFeatures2Matrix(featuresNew)
        X = X.tolist()
        Y = Y.tolist()
        fo = open(model_name, "wb")
        cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifier_type == "svm" or classifier_type == "svm_rbf" or \
                    classifier_type == "randomforest" or \
                    classifier_type == "gradientboosting" or \
                    classifier_type == "extratrees":
        with open(model_name, 'wb') as fid:
            cPickle.dump(classifier, fid)
        fo = open(model_name + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
コード例 #9
0
traie  #!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue May 22 12:32:09 2018

@author: bara
"""

from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# extract features from audio files
[F, labels, files] = audioFeatureExtraction.dirsWavFeatureExtraction(
    ['samples/good', 'samples/bad'], 1, 1, 0.050, 0.025)

featuresNames = [
    'zcr_mean', 'energy_mean', 'entropy_mean', 'spectral_centroid_mean',
    'spectral_spread_mean', 'spectral_entropy_mean', 'spectral_flux_mean',
    'spectral_rolloff_mean', 'mfcc_1_mean', 'mfcc_2_mean', 'mfcc_3_mean',
    'mfcc_4_mean', 'mfcc_5_mean', 'mfcc_6_mean', 'mfcc_7_mean', 'mfcc_8_mean',
    'mfcc_9_mean', 'mfcc_10_mean', 'mfcc_11_mean', 'mfcc_12_mean',
    'mfcc_13_mean', 'chroma_1_mean', 'chroma_2_mean', 'chroma_3_mean',
    'chroma_4_mean', 'chroma_5_mean', 'chroma_6_mean', 'chroma_7_mean',
    'chroma_8_mean', 'chroma_9_mean', 'chroma_10_mean', 'chroma_11_mean',
    'chroma_12_mean', 'chroma_deviation_mean', 'zcr_std', 'energy_std',
    'entropy_std', 'spectral_centroid_std', 'spectral_spread_std',
    'spectral_entropy_std', 'spectral_flux_std', 'spectral_rolloff_std',
    'mfcc_1_std', 'mfcc_2_std', 'mfcc_3_std', 'mfcc_4_std', 'mfcc_5_std',
    'mfcc_6_std', 'mfcc_7_std', 'mfcc_8_std', 'mfcc_9_std', 'mfcc_10_std',
コード例 #10
0
def f(repo_path,dataset):
    best_scores = []
    #find best params and crossvalidation
    classifier_par = numpy.array([0.01, 0.05, 0.1, 0.25]) #0.5, 1.0, 5.0, 10.0])
    e=0
    kfold = KFold(n_splits=2,shuffle=True)
    for train, test in kfold.split(np.array(dataset["Id"])):
        #create also train/test folders 
        ff.create_folders(repo_path)
        print("Train: ",train , "Test: ",test)
        for k in train:
            path= repo_path + "/audio/"+str(dataset['Id'][k])
            #copy video in CASE(train/test) folder
            pf.searchVideo(path,repo_path,"train")   
        for k in test:
            path= repo_path + "/audio/"+str(dataset['Id'][k])  
            pf.searchVideo(path,repo_path,"test")
        for C in classifier_par:
            print("For C: ",C, "For Fold: ",e )
            cm,acc,f1 = featureAndTrain([repo_path+"/audio/train/positive",repo_path+"/audio/train/neutral",repo_path+"/audio/train/negative"],[repo_path+"/audio/test/positive",repo_path+"/audio/test/neutral",repo_path+"/audio/test/negative"],1.0,1.0,shortTermWindow,shortTermStep,"svm","svm5Classes",C,0)
            best_scores.append([C,cm,acc,f1])
        e=e+1
        ff.remove_folders(repo_path)
    print(best_scores)

    ##find best f1 for optimal C
    best_f1= []
    for i in  range(len(best_scores)):
        best_f1.append(best_scores[i][2])

    m = max(best_f1)
    best_c=[i for i, j in enumerate(best_f1) if j == m]

    best_c=best_scores[best_c[0]]
    print("best Params ------->: ",best_c)
    ##visualise with the best score
    # visualize performance measures 
    #pos 1 is the cm matrix
    print(best_c[1])
    plotly_classification_results(best_c[1], ["positive", "neutral", "negative"]) 
    #print(acc, f1)


    ##normalize again this time all dataset this time and fit with the best params 
    ff.create_folders(repo_path)
    for k in dataset["Id"]:
        path= repo_path + "/audio/"+str(k)
        pf.searchVideo(path,repo_path,"train")


    [features, classNames, filenames] = aF.dirsWavFeatureExtraction([repo_path+"/audio/train/positive",repo_path+"/audio/train/neutral",repo_path+"/audio/train/negative"], 1.0,1.0,shortTermWindow,shortTermStep,compute_beat=False)
    [features_norm, MEAN, STD] = normalizeFeatures(features)        # normalize features
    # MEAN, STD = x.mean(axis=0), np.std(x, axis=0)
    # X = (x - MEAN) / STD

    os.chdir("../") ##one folder back
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = features_norm
    [X, Y] = listOfFeatures2Matrix(featuresNew)
    
    ##if fails here check number of instances from each class.smote has neighbours=5 as init parameter. So if a class has below 5 instances smote fails. Try put more instaces or change k

    sm = SMOTE(random_state=2)
    # print("!="+str(features_train))
    # print("x="+str(X))
    # print("y="+str(Y))
    x, y = sm.fit_sample(X, Y)

   
    cl = SVC(kernel='linear', C=best_c[0])
    classifier=cl.fit(x, y)
    mt_win = 1.0
    mt_step = 1.0
    st_win = shortTermWindow
    st_step = shortTermStep
    compute_beat = False
    with open("svm3Classes", 'wb') as fid:
        cPickle.dump(classifier, fid)
    fo = open("svm3Classes" + "MEANS", "wb")
    cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
    fo.close()

    ff.remove_folders(repo_path)
コード例 #11
0
def featureAndTrain(list_of_dirs_train, list_of_dirs_test, mt_win, mt_step, st_win, st_step,classifier_type, model_name,C,model,compute_beat=False,k_folds=3):

    #feature extraction for train/test
    [features_train, classNames_train, filenames_train] = aF.dirsWavFeatureExtraction(list_of_dirs_train, mt_win,mt_step, st_win, st_step,compute_beat=compute_beat)

    [features_test, classNames_test, filenames_test] = aF.dirsWavFeatureExtraction(list_of_dirs_test, mt_win, mt_step,st_win, st_step,compute_beat=compute_beat)


  
    features2 = []
    for f in features_train:
        fTemp = []
        for i in range(f.shape[0]):
            temp = f[i, :]
            if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()):
                fTemp.append(temp.tolist())
            else:
                print("NaN Found! Feature vector not used for training")
        features2.append(numpy.array(fTemp))
    features_train = features2

    features3 = []
    for f in features_test:
        fTemp = []
        for i in range(f.shape[0]):
            temp = f[i, :]
            if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()):
                fTemp.append(temp.tolist())
            else:
                print("NaN Found! Feature vector not used for testing")
        features3.append(numpy.array(fTemp))
    features_test = features3

    # MEAN, STD = x.mean(axis=0), np.std(x, axis=0)
    # X = (x - MEAN) / STD
    (features_norm_train, MEAN, STD) = normalizeFeatures(features_train)
    n_classes_train = len(features_train)

    (features_norm_test, MEAN_test, STD_test) = normalizeFeatures(features_test)
    n_classes_test = len(features_test)

    [x_test, y_test] = listOfFeatures2Matrix(features_norm_test)
    

    ## for training SMOTE

    [X_train, Y_train] = listOfFeatures2Matrix(features_norm_train)
    print("Before OverSampling, counts of label 'positive': {}".format(sum(Y_train==1)))
    print("Before OverSampling, counts of label 'neutral': {} \n".format(sum(Y_train==0)))
    print("Before OverSampling, counts of label 'negative': {} \n".format(sum(Y_train==2)))

    sm = SMOTE(random_state=2,kind='svm')
    X_train, Y_train = sm.fit_sample(X_train, Y_train)
    print("A OverSampling, counts of label 'positive': {}".format(sum(Y_train==1)))
    print("A OverSampling, counts of label 'neutral': {} \n".format(sum(Y_train==0)))
    print("A OverSampling, counts of label 'negative': {} \n".format(sum(Y_train==2)))
    time.sleep(5)
    print("!="+str(features_train))
    print("x="+str(X_train))
    print("y="+str(Y_train))
    print("lx="+str(len(X_train)))
    print("lx="+str(len(Y_train)))
    # time.sleep(5)
    if model ==0:
        cm, acc, f1 = svm_train_evaluate(X_train, Y_train,x_test,y_test,k_folds, C)
        return cm,acc,f1
    else:
        create_model(X_train, Y_train,x_test,y_test)