Python sortDataset Examples

Programming Language: Python

Namespace/Package Name: dataset

Method/Function: sortDataset

Examples at hotexamples.com: 7

Python sortDataset - 7 examples found. These are the top rated real world Python examples of dataset.sortDataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: executiontime.py Project: kristiankrohn/Masterproject

def calculateAndWriteExecutionTime(classifierstring = 'AllFeatures', shift = False, windowLength = 250, Sort = False):
    featuremask = features.readFeatureMask(classifierstring)
    nrOfFeatures = len(featuremask)
    dictionary = {}
    featureName = None
    InputData = [[]]
    executionTimeList = []

    X1, y1 = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=shift, windowLength=windowLength)
    X1, y1 = dataset.sortDataset(X1, y1, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8
    #Just append some movement into the input data from each channel
    for i in range(8):
        InputData.append(X1[i][0])
    #Pop the first list because it is empty
    InputData.pop(0)
    #just iterate through all the features
    for i in range(len(featuremask)):
        wrapped = wrapper(calculateFeature, i, InputData)
        featureString = str(features.FUNC_MAP.get(i))
        featureString = featureString.split(" ")
        featureName = featureString [1] + " "
        dictionary[featureName] = min(timeit.repeat(wrapped, repeat = 10000, number = 1))
        print("finished with feature %d" %i)
        executionTimeList.append(dictionary[featureName])

    writeToFile(executionTimeList, windowLength)
    #Should the list be sorted and printed?
    if Sort:
        sortedValues = sorted(list(dictionary.values()))
        sortedKeys = sorted(list(dictionary),  key = dictionary.__getitem__)
        printSortedValues(sortedKeys, sortedValues)

Example #2

Show file

File: kerasTest.py Project: kristiankrohn/Masterproject

def startLearning():
    bestParams = []
    accuracyScore = []
    f1Score = []
    precision = []
    classificationReport = []
    classifierstring = "learning260RBFsvm22Features"
    #Make sure that the result can be reproduced
    seed = 7
    np.random.seed(seed)

    X, y = dataset.loadDataset(filename="data.txt",
                               filterCondition=True,
                               filterType="DcNotch",
                               removePadding=True,
                               shift=False,
                               windowLength=250)
    X, y = dataset.sortDataset(X,
                               y,
                               length=10000,
                               classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               merge=True)
    #numClasses = 6
    channelIndex = 0

    featuremask = features.readFeatureMask()
    #Use number of features as input layer
    #numFeatures = len(featuremask)
    XL = features.extractFeaturesWithMask(X,
                                          channelIndex,
                                          featuremask=featuremask,
                                          printTime=False)

    XLtrain, XLtest, yTrain, yTest, XL, scaler = classifier.scaleAndSplit(
        XL, y[0])
    #One hot encoding of the classes
    #yTrain = np_utils.to_categorical(yTrain)
    #yTest = np_utils.to_categorical(yTest)
    #Define variable with number of classes
    clf = KerasClassifier(build_fn=createModel,
                          epochs=10,
                          batch_size=50,
                          verbose=0)
    #clf.fit(XLtrain, yTrain, validation_data = (XLtest, yTest), epochs = 10, batch_size = 200, verbose = 2)
    clf.fit(XLtrain, yTrain)
    #clf.fit(XLtrain, yTrain, validation_data = (XLtest, yTest), epochs = 10, batch_size = 50)

    #scores = model.evaluate(Xtest, yTest, verbose = 0)
    #print('Baseline Error: %.2f%%' %(100 - scores[1]*100))

    scores = cross_val_score(clf, XLtrain, yTrain, cv=50, scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)

Example #3

Show file

File: predict.py Project: kristiankrohn/Masterproject

def createPredictor(name,
                    windowLength,
                    datasetnum=1,
                    shift=None,
                    bruteForcemask=None,
                    zeroClassMultiplier=2,
                    datasetLength=130,
                    kernel='rbf'):
    ##### Parameters
    if shift == None:
        if windowLength < 250:
            shift = True
            print "Shift is true"
        else:
            shift = False
            print "Shift is false"
    else:
        print("Shift is: %r" % shift)
    ##### Save parameters
    parameters = {
        'windowLength': windowLength,
        'shift': shift,
        'dataset': datasetnum,
        'datasetLength': datasetLength,
        'kernel': kernel
    }
    pickle.dump(parameters, open("Parameters" + slash + name + ".pkl", "wb"))

    ##### Declarations
    bestParams = []
    accuracyScore = []
    f1Score = []
    precision = []
    classificationReport = []
    XL = [[], [], [], [], [], [], [], []]
    y = [[], [], [], [], [], [], [], []]
    XLlist = []
    ylist = []
    XLtrain = None
    XLtest = None
    yTrain = None
    yTest = None

    ##### Code

    if isinstance(datasetnum, int):
        var = datasetnum
        datasetnum = []
        datasetnum.append(var)

    print(datasetnum)

    for i in datasetnum:
        print i
        dataset.setDatasetFolder(i)

        X, Y = dataset.loadDataset(filename="data.txt",
                                   filterCondition=True,
                                   filterType="DcNotch",
                                   removePadding=True,
                                   shift=shift,
                                   windowLength=windowLength)

        Xl, Y = dataset.sortDataset(X,
                                    Y,
                                    length=datasetLength,
                                    classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                    merge=True,
                                    zeroClassMultiplier=zeroClassMultiplier)

        XLlist.append(Xl)
        ylist.append(Y)
        XL, y = dataset.mergeDatasets(XL, Xl, y, Y)
    #XL, y = dataset.mergeDatasets(XLlist, XLtest, yTrain, yTest)

    if bruteForcemask == None:
        features.compareFeatures2(name,
                                  shift,
                                  windowLength,
                                  X=XL,
                                  y=y,
                                  plot=False)
        featuremask = features.readFeatureMask(name)
    else:
        featuremask = features.readFeatureMask(bruteForcemask)
        features.writeFeatureMask(featuremask, name)
    for i in range(len(XLlist)):
        XLlist[i] = features.extractFeaturesWithMask(XLlist[i],
                                                     featuremask=featuremask,
                                                     printTime=False)
    print("XL list featureextraction finished")

    XL = features.extractFeaturesWithMask(XL,
                                          featuremask=featuremask,
                                          printTime=False)
    print("XL featureextraction finished")

    scaler = classifier.makeScaler(XL)

    for i in range(len(XLlist)):
        XLtrain1, XLtest1, yTrain1, yTest1, XLlist[
            i] = classifier.scaleAndSplit(XLlist[i], ylist[i][0], scaler)
        if i == 0:
            XLtrain = XLtrain1
            yTrain = yTrain1
            XLtest = XLtest1
            yTest = yTest1
        else:
            XLtrain, yTrain = dataset.mergeDatasets(XLtrain, XLtrain1, yTrain,
                                                    yTrain1)
            XLtest, yTest = dataset.mergeDatasets(XLtest, XLtest1, yTest,
                                                  yTest1)
    print("Split fininshed, starting training")

    if kernel == 'rbf':
        clf = svm.SVC(kernel='rbf',
                      gamma=0.01,
                      C=10,
                      decision_function_shape='ovr')
    elif kernel == 'linear':
        clf = svm.SVC(kernel='linear',
                      gamma=0.01,
                      C=10,
                      decision_function_shape='ovr')
    elif kernel == 'linearSVC':
        clf = svm.LinearSVC(penalty='l2', dual=False, C=10, random_state=42)

    clf.fit(XLtrain, yTrain)

    classifier.saveMachinestate(clf, name)
    classifier.saveScaler(scaler, name)

    scores = cross_val_score(clf,
                             XLtrain,
                             yTrain,
                             cv=10,
                             scoring='recall_macro')
    print("Recall: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)



    tempAccuracyScore, tempPrecision, tempClassificationReport\
    , tempf1Score = classifier.predict(XLtest, clf, yTest)

    accuracyScore.append(tempAccuracyScore)
    f1Score.append(tempf1Score)
    precision.append(tempPrecision)
    classificationReport.append(tempClassificationReport)

    print()
    print("The best parameters for the different channels are:")
    print()
    print(bestParams)
    print()
    print("The prediction accuracy for the different channels is:")
    print(accuracyScore)
    print("The f1 score which include false negatives etc is:")
    print(
        f1Score
    )  #This score says something about the correctness of the prediction.
    print("The precision score:")
    print(
        precision
    )  #This score says something about the correctness of the prediction.
    print("Classification Report:")
    print(classificationReport[0])

Example #4

Show file

def multiplottestHHT(c):
    x, y = dataset.loadDataset("longdata.txt",
                               filterCondition=True,
                               filterType="DC")
    #print(y[0])
    x, y = dataset.sortDataset(x, y, length=10, classes=[c])

    channelsToPlot = 4
    N_imfs = 6
    for i in range(len(x[0])):  #Iternate over number of elements in dataset
        title = movements[y[0][i]]
        #print title
        title = title[1:]
        #print title
        fig = plt.figure(figsize=(20, 10))
        #fig = plt.figure()
        plt.suptitle(title)

        outer = gridspec.GridSpec(1,
                                  channelsToPlot,
                                  wspace=0.1,
                                  hspace=0.2,
                                  right=0.98,
                                  left=0.02,
                                  bottom=0.02,
                                  top=0.95)

        for j in range(channelsToPlot):  #Iterate over channels
            inner = gridspec.GridSpecFromSubplotSpec(N_imfs + 1,
                                                     1,
                                                     subplot_spec=outer[j],
                                                     wspace=0.1,
                                                     hspace=0.1)

            xt = x[j][i]

            imfs = fixedIterationHht(xt)
            n_imfs = imfs.shape[0]
            length = len(xt)
            t = np.arange(0, length / glb.fs, 1.0 / glb.fs)
            time_samples = t
            signal = xt

            ax = plt.Subplot(fig, inner[0])
            #ax = plt.subplot(n_imfs + 1, 1, 1)
            ax.plot(time_samples, signal)
            ax.axis([
                time_samples[0], time_samples[-1],
                signal.min(),
                signal.max()
            ])
            ax.tick_params(which='both',
                           left=False,
                           bottom=False,
                           labelleft=False,
                           labelbottom=False)
            ax.grid(False)
            ax.set_ylabel('Signal')
            ax.set_title(channels[j])
            fig.add_subplot(ax)

            axis_extent = max(np.max(np.abs(imfs[:-1, :]), axis=0))
            for k in range(n_imfs - 1):
                #print(i + 2)
                ax = plt.Subplot(fig, inner[k + 1])
                #ax = plt.subplot(n_imfs + 1, 1, i + 2)
                #ax.plot(time_samples, imfs[k, :])
                ax.plot(time_samples, imfs[k, :])
                ax.axis([
                    time_samples[0], time_samples[-1], -axis_extent,
                    axis_extent
                ])
                ax.tick_params(which='both',
                               left=False,
                               bottom=False,
                               labelleft=False,
                               labelbottom=False)
                ax.grid(False)
                ax.set_ylabel('imf' + str(k + 1))
                fig.add_subplot(ax)

            ax = plt.Subplot(fig, inner[n_imfs])
            #ax = plt.subplot(n_imfs + 1, 1, n_imfs + 1)
            ax.plot(time_samples, imfs[-1, :], 'r')
            ax.axis('tight')
            ax.tick_params(which='both',
                           left=False,
                           bottom=False,
                           labelleft=False,
                           labelbottom=False)
            ax.grid(False)
            ax.set_ylabel('res.')
            fig.add_subplot(ax)

        plt.show()

Example #5

Show file

def startLearning():
    bestParams = []
    accuracyScore = []
    f1Score = []
    precision = []
    classificationReport = []

    #classifierstring = "learning260RBFsvm22Features"
    classifierstring = 'AllFeatures'

    #X, y = dataset.loadDataset("longdata.txt")
    '''
    X, y = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=250)
    '''

    dataset.setDatasetFolder(1)

    X1, y1 = dataset.loadDataset(filename="data.txt",
                                 filterCondition=True,
                                 filterType="DcNotch",
                                 removePadding=True,
                                 shift=False,
                                 windowLength=250)
    X1, y1 = dataset.sortDataset(X1,
                                 y1,
                                 length=130,
                                 classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                 merge=True)  #,6,4,2,8
    '''
    X1T, y1T = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=250)
    X1T, y1T = dataset.sortDataset(X1T, y1T, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8
    '''
    dataset.setDatasetFolder(2)

    X2, y2 = dataset.loadDataset(filename="data.txt",
                                 filterCondition=True,
                                 filterType="DcNotch",
                                 removePadding=True,
                                 shift=True,
                                 windowLength=2)
    X2, y2 = dataset.sortDataset(X2,
                                 y2,
                                 length=130,
                                 classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                 merge=True,
                                 zeroClassMultiplier=1)  #,6,4,2,8
    '''
    X2T, y2T = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=250)
    X2T, y2T = dataset.sortDataset(X2T, y2T, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8
    '''

    #X, y = dataset.sortDataset(X, y, length=10000, classes=[6,8], merge = False)

    #def sortDataset(x=None, y=None, length=10, classes=[0,5,4,2,6,8])
    #if x or y is undefined, data.txt will be loaded

    channelIndex = 0
    '''
    FUNC_MAP = {0: hfd,
            1: minDiff,
            2: maxDiff,
            3: specEntropy,
            4: pearsonCoeff14,
            5: stdDeviation,
            6: slope,
            7: thetaBeta1,
            8: extrema
            9: pearsonCoeff13}
    '''
    #XL = features.extractFeatures(X, channelIndex)

    featuremask = features.readFeatureMask(classifierstring)
    XL1 = features.extractFeaturesWithMask(X1,
                                           featuremask=featuremask,
                                           printTime=False)
    XL2 = features.extractFeaturesWithMask(X2,
                                           featuremask=featuremask,
                                           printTime=False)

    #If a test set is needed for the combined subject model
    '''
    XL1T = features.extractFeaturesWithMask(
            X1T, featuremask=featuremask, printTime=False)
    XL2T = features.extractFeaturesWithMask(
            X2T, featuremask=featuremask, printTime=False)
    '''
    #uncomment for using samples as features
    '''
    XL2 = X2[0]
    print(len(X2[0]))
    for i in range(len(X2[0])):
        #XL2[i] = np.concatenate((XL2[i], X2[1][i], X2[3][i]))
        #np.append(XL[i], X[1][i])
        #np.append(XL[i], X[2][i])
        #np.append(XL[i], X[3][i])
    print(len(XL2[0]))
    '''

    #Scale the data if needed and split dataset into training and testing

    #proposed solution, change input to makeScaler when creating for one subject. See classifier.py
    #XLjoined = np.append(XL1, XL2, axis = 0)
    scaler = classifier.makeScaler(XL2)

    XLtrain1, XLtest1, yTrain1, yTest1, XL1 = classifier.scaleAndSplit(
        XL1, y1[0], scaler)
    XLtrain2, XLtest2, yTrain2, yTest2, XL2 = classifier.scaleAndSplit(
        XL2, y2[0], scaler)

    #XLtrain1T, XLtest1T, yTrain1T, yTest1T, XL1T = classifier.scaleAndSplit(XL1T, y1T[0], scaler)
    #XLtrain2T, XLtest2T, yTrain2T, yTest2T, XL2T = classifier.scaleAndSplit(XL2T, y2T[0], scaler)

    #This is to combine the two subjects

    #yTrain = np.append(yTrain1, yTrain2, axis = 0)
    #XLtrain = np.append(XLtrain1, XLtrain2, axis = 0)
    #yTest = np.append(yTest1, yTest2, axis = 0)
    #XLtest = np.append(XLtest1, XLtest2, axis = 0)

    #bestParams.append(classifier.tuneSvmParameters(XLtrain, yTrain, XLtest, yTest, n_jobs = -1))
    #bestParams.append(tuneDecisionTreeParameters(XLtrain, yTrain, XLtest, yTest, n_jobs = -1))

    #try this with tuning of parameters later today.

    #clf, clfPlot = createAndTrain(XLtrain, yTrain, bestParams[0])

    #Use this if predictor other than SVM is used.
    clf, clfPlot = createAndTrain(XLtrain2, yTrain2, None)
    #plot.trainingPredictions(clf, XL, y[0])

    ###TO PLOT LEARNING CURVE UNCOMMENT THIS.
    #title = "Learning Curves (SVM, RBF kernel, C = 50, $\gamma=0.001$)"
    #estimator = svm.SVC(kernel = 'rbf', gamma = 0.01, C = 50, decision_function_shape = 'ovr')
    #plot.learningCurve(estimator, title, XL, y[0], (0.7, 1.01), cv=20, n_jobs=-1)
    #plt.show()

    #clf = classifier.loadMachineState(classifierstring)
    classifier.saveMachinestate(
        clf, classifierstring)  #Uncomment this to save the machine state
    classifier.saveScaler(scaler, classifierstring)
    #clf = CalibratedClassifierCV(svm.SVC(kernel = 'linear', C = C, decision_function_shape = 'ovr'), cv=5, method='sigmoid')

    #Use this if it is important to see the overall prediction, and not for only the test set

    scores = cross_val_score(clf, XLtrain2, yTrain2, cv=50, scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)

    tempAccuracyScore, tempPrecision, tempClassificationReport, tempf1Score = classifier.predictTimer(
        XLtest2, clf, yTest2)
    accuracyScore.append(tempAccuracyScore)
    f1Score.append(tempf1Score)
    precision.append(tempPrecision)
    classificationReport.append(tempClassificationReport)

    #crossValScore.append(tempCrossValScore)
    #accuracyScore, classificationReport = compareFeatures(XL, XLtrain, yTrain, XLtest, yTest, bestParams)

    print()
    print("The best parameters for the different channels are:")
    print()
    print(bestParams)
    print()
    print("The prediction accuracy for the different channels is:")
    print(accuracyScore)
    print("The f1 score which include false negatives etc is:")
    print(
        f1Score
    )  #This score says something about the correctness of the prediction.
    print("The precision score:")
    print(
        precision
    )  #This score says something about the correctness of the prediction.
    print(
        "Classification Report of channel %d:" % channelIndex
    )  #String, weird if you print whole array with string, and predicting over several channels.
    print(classificationReport[0])

Example #6

Show file

def compareFeatures(n_jobs=1,
                    datasetnum=1,
                    shift=False,
                    windowLength=250,
                    zeroClassMultiplier=1):
    #array declaration
    trainings = 0
    allPermutations = []
    allParams = []
    allPavg = []
    allP = []
    allR = []
    allF1 = []
    allS = []

    #Constants and configuration
    maxNumFeatures = 8
    minNumFeatures = 6  #Must be bigger than 1
    #datasetfile = "longdata.txt"
    datasetfile = "data.txt"
    merge = True
    skip = False
    logging = False

    print(
        "Setup for brute force testing of all feature combination, features in list: %d"
        % len(FUNC_MAP))
    print("Enter maximum number of features: (\"all\" for all combinations)")
    inputString = raw_input()
    if inputString.isdigit():
        inputval = int(inputString)
        if inputval > len(FUNC_MAP):
            print("Invalid input, exiting")
            return
        if inputval >= 1:
            maxNumFeatures = inputval
        else:
            print("Invalid input, exiting")
            return
    else:
        if inputString == "all":
            maxNumFeatures = len(FUNC_MAP)
            minNumFeatures = 1
            skip = True
        else:
            print("Invalid input, exiting")
            return

    if not skip:
        print("Enter minimum number of features: ")
        inputString = raw_input()
        if inputString.isdigit():
            inputval = int(inputString)
            if inputval <= 0:
                print("Invalid input, exiting")
                return
        else:
            print("Invalid input, exiting")
            return
        if inputval >= 1:
            minNumFeatures = inputval
        else:
            print("Invalid input, exiting")
            return
    print("Is this a debug session?(Will leak memory) [Y/n]")
    inputString = raw_input()
    if inputString == "Y":
        debug = True
        print(
            "Debug session activated, results will not be valid and memory will explode."
        )
        sendMail = False
        logging = False
    else:
        debug = False
        print("Normal session activated, results will be valid. ")
        print("Do you want to send a mail notification when finished? [Y/n]")
        inputString = raw_input()
        if inputString == "Y":
            sendMail = True
            print("Sending mail when script is finished")
        else:
            print("Do not send mail when script is finished")
            sendMail = False

        print("Do you want to save logs? [Y/n]")
        inputString = raw_input()
        if inputString == "Y":
            logging = True
            print("Saving logs")
        else:
            print("Do not save logs")
            logging = False
    #Load dataset
    #print("Before load")
    '''
	X, y = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=200)
	#print("After load")
	#print X
	if datasetfile == "longdata.txt":
		classes = [0,5,6,4,2,8]
	else:
		classes = [0,1,2,3,4,5,6,7,8,9]
		#classes = [9,7,3,1,0,5]

	X, y = dataset.sortDataset(X, y, length=1000, classes=classes, merge=merge, zeroClassMultiplier=1.2)
	if merge:
		classes = [0,5,6,4,2,8]
			#y = dataset.mergeLabels(y)
	else:
		classes = [0,1,2,3,4,5,6,7,8,9]
	#Calculate features
	#XL = extractAllFeatures(X, channel=0)
	XL = extractFeaturesWithMask(X, featuremask=range(len(FUNC_MAP)))
	#XLtrain, XLtest, yTrain, yTest = classifier.scaleAndSplit(XL, y[0])
	scaler = classifier.makeScaler(XL)
	XLtrain, XLtest, yTrain, yTest, XL = classifier.scaleAndSplit(XL, y[0], scaler)
	'''

    XL = [[], [], [], [], [], [], [], []]
    y = [[], [], [], [], [], [], [], []]
    XLlist = []
    ylist = []
    XLtrain = None
    XLtest = None
    yTrain = None
    yTest = None

    ##### Code

    if isinstance(datasetnum, int):
        var = datasetnum
        datasetnum = []
        datasetnum.append(var)

    print(datasetnum)

    for i in datasetnum:
        print i
        dataset.setDatasetFolder(i)

        X, Y = dataset.loadDataset(filename=datasetfile,
                                   filterCondition=True,
                                   filterType="DcNotch",
                                   removePadding=True,
                                   shift=shift,
                                   windowLength=windowLength)

        Xl, Y = dataset.sortDataset(X,
                                    Y,
                                    length=130,
                                    classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                    merge=True,
                                    zeroClassMultiplier=zeroClassMultiplier)

        XLlist.append(Xl)
        ylist.append(Y)
        XL, y = dataset.mergeDatasets(XL, Xl, y, Y)

    for i in range(len(XLlist)):
        XLlist[i] = extractFeaturesWithMask(XLlist[i],
                                            featuremask=range(len(FUNC_MAP)),
                                            printTime=False)
    print("XL list featureextraction finished")

    XL = extractFeaturesWithMask(XL,
                                 featuremask=range(len(FUNC_MAP)),
                                 printTime=False)
    print("XL featureextraction finished")

    scaler = classifier.makeScaler(XL)

    for i in range(len(XLlist)):
        XLtrain1, XLtest1, yTrain1, yTest1, XLlist[
            i] = classifier.scaleAndSplit(XLlist[i], ylist[i][0], scaler)
        if i == 0:
            XLtrain = XLtrain1
            yTrain = yTrain1
            XLtest = XLtest1
            yTest = yTest1
        else:
            XLtrain, yTrain = dataset.mergeDatasets(XLtrain, XLtrain1, yTrain,
                                                    yTrain1)
            XLtest, yTest = dataset.mergeDatasets(XLtest, XLtest1, yTest,
                                                  yTest1)
    print("Split fininshed, starting training")

    features = range(len(XL[0]))
    print("Featureextraction finished, number of features to check: %d" %
          len(XL[0]))

    if len(features) < maxNumFeatures:
        maxNumFeatures = len(features)
    elif maxNumFeatures < minNumFeatures:
        maxNumFeatures = minNumFeatures

    if minNumFeatures < 1:
        minNumFeatures = 1
    elif minNumFeatures > maxNumFeatures:
        minNumFeatures = maxNumFeatures
    print("Testing with combinations of %d to %d" %
          (minNumFeatures, maxNumFeatures))
    numberOfCombinations = 0
    for i in range(minNumFeatures, maxNumFeatures + 1):
        #numberOfCombinations += len(list(combinations(features, i))) #Dette sprenger minnet :/
        comb = nCr(len(features), i)
        print("Number of iterations for combinations of length %d: %d" %
              (i, comb))
        numberOfCombinations += comb

    print("Number of combinations to test %d" % numberOfCombinations)

    #for i in range(minNumFeatures, maxNumFeatures+1):
    for i in range(maxNumFeatures, minNumFeatures - 1, -1):

        print("Starting to read PermutationLog")
        try:
            permfile = open(
                "Logs" + slash + "PermutationLog" + str(i) + ".txt", 'r')
            #permfile = open("Logs"+slash+"PermutationLog.txt", 'r')

        except IOError:
            print("PermutationLog file does not exist")
            skip = False
        else:
            print("Performing operations on PermutationLog buffer")
            PermutationsString = permfile.read()
            permfile.close()
            PermutationsList = PermutationsString.split(':')
            PermutationsList.pop(0)
            #PermutationsList = tuple(PermutationsList)
            #Might need some more processing, now returns a list of tuples
            #print PermutationsList[28]

            for q in range(len(PermutationsList)):
                #print(eval(PermutationsList[i]))
                #PermutationsList[i] = tuple(map(int, PermutationsList[i][1:-1].split(',')))
                PermutationsList[q] = tuple(eval(PermutationsList[q]))
            print("Finished with operations")
            skip = True
        start = datetime.now()
        print("Finished reading permutations file")
        lastTrainings = 1000
        for p in combinations(features, i):  #If order matters use permutations

            if skip == True:

                if p in PermutationsList:
                    #print("Combination exists")
                    numberOfCombinations -= 1
                    trainings += 1
                    if trainings == lastTrainings:
                        lastTrainings = trainings + 1000
                        print("Training number: %d" % trainings)
                        print("Remaining combinations: %d" %
                              numberOfCombinations)
                        print(
                            "Elapsed time for checking that this combination exists: "
                            + str(elapsedTime))

                    stop = datetime.now()
                    elapsedTime = (stop - start)
                    start = stop
                else:
                    print("Found Starting point")
                    skip = False

            if skip == False:

                start = datetime.now()
                XLtrainPerm = np.empty([len(XLtrain), i])
                XLtestPerm = np.empty([len(XLtest), i])
                for j in range(len(XLtrain)):
                    #print j
                    #print([XLtrain[j][k] for k in p])
                    XLtrainPerm[j] = [XLtrain[j][k] for k in p]
                for j in range(len(XLtest)):
                    #print j
                    #print([XLtest[j][k] for k in p])
                    XLtestPerm[j] = [XLtest[j][k] for k in p]
                #print(XLtrainPerm[0])
                #print(XLtestPerm[0])
                #print("Starting to train with combination: "+convertPermutationToFeatureString(p))

                bestParams, presc, r, f1, s, report = classifier.tuneSvmParameters(
                    XLtrainPerm,
                    yTrain,
                    XLtestPerm,
                    yTest,
                    debug=False,
                    fast=True,
                    n_jobs=n_jobs)

                if logging:

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "PermutationLog" +
                        str(i) + ".txt", 'a+')
                    permfile.write(":")
                    permfile.write(str(p))
                    permfile.close()

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "PrecisionLog" +
                        str(i) + ".txt", 'a+')
                    permfile.write(":")
                    for k in range(len(presc)):
                        permfile.write(',' + str(presc[k]))
                    permfile.close()

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "RecallLog" +
                        str(i) + ".txt", 'a+')
                    permfile.write(":")
                    for k in range(len(r)):
                        permfile.write(',' + str(r[k]))
                    permfile.close()

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "F1Log" + str(i) +
                        ".txt", 'a+')
                    permfile.write(":")
                    for k in range(len(f1)):
                        permfile.write(',' + str(f1[k]))
                    permfile.close()

                if debug:
                    #Append scores
                    allPermutations.append(p)
                    allParams.append(bestParams)
                    allP.append(presc)
                    allPavg.append(np.average(presc, weights=s))
                    allR.append(r)
                    allF1.append(f1)

                    winner = allPavg.index(
                        max(allPavg))  #Check for max average precision
                    print(report)
                    print(
                        "Best features so far are: " +
                        convertPermutationToFeatureString(
                            allPermutations[winner]))
                    print("Best result so far are: ", allPavg[winner])

                #print("Best parameters for this feature combination: " + str(bestParams))
                stop = datetime.now()
                numberOfCombinations -= 1
                trainings += 1
                remainingTime = (stop - start) * numberOfCombinations
                elapsedTime = (stop - start)
                print("Training number: %d" % trainings)
                print("Remaining combinations: %d" % numberOfCombinations)
                print("Elapsed time for training with this combination: " +
                      str(elapsedTime))
                print("Estimated remaining time: " + str(remainingTime))
    '''
	#Evaluate score
	if len(allPavg) > 1:
		winner = allPavg.index(max(allPavg)) #Check for max average precision
		p = allPermutations[winner]
		XLtrainPerm = np.empty([len(XLtrain), len(p)])
		XLtestPerm = np.empty([len(XLtest), len(p)])
		p = allPermutations[winner]
		for j in range(len(XLtrain)):
			XLtrainPerm[j] = [XLtrain[j][k] for k in p]
		for j in range(len(XLtest)):
			XLtestPerm[j] = [XLtest[j][k] for k in p]

		print("Best features for max average precision are:")
		print allPermutations[winner]
		#Test
		bestParams = allParams[winner]
		print("Best parameters for max average precision are: ")
		print(bestParams)

		if bestParams['kernel'] == 'linear':
			clf = svm.SVC(kernel =bestParams['kernel'], C = bestParams['C'], decision_function_shape = 'ovr')
		else:
			clf = svm.SVC(kernel = bestParams['kernel'], gamma=bestParams['gamma'], C= bestParams['C'], decision_function_shape='ovr')

		clf.fit(XLtrainPerm, yTrain)
		saveMachinestate(clf, "BruteForceClassifier")
		featuremask = open("featuremask.txt", 'w+')
		featuremask.write(str(allPermutations[winner]))
		#featuremask.write(",")
		featuremask.close()

		yPred = clf.predict(XLtestPerm)
		print(classification_report(yTest, yPred))
	'''
    if sendMail:
        mail.sendemail(
            from_addr='*****@*****.**',
            to_addr_list=['*****@*****.**', '*****@*****.**'],
            cc_addr_list=[],
            subject="Training finished with combinations of %d to %d features"
            % (minNumFeatures, maxNumFeatures),
            message="Logs are ready for download ",
            login='******',
            password='******')

Example #7

Show file

def compareFeatures2(name,
                     shift,
                     windowLength,
                     n_jobs=-1,
                     X=None,
                     y=None,
                     plot=True):
    #datasetfile = "longdata.txt"
    datasetfile = "data.txt"
    merge = True

    if (X == None) or (y == None):
        X, y = dataset.loadDataset(filename=datasetfile,
                                   filterCondition=True,
                                   filterType="DcNotch",
                                   removePadding=True,
                                   shift=shift,
                                   windowLength=windowLength)
        #print("After load")
        #print X
        if datasetfile == "longdata.txt":
            classes = [0, 5, 6, 4, 2, 8]
        else:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
            #classes = [9,7,3,1,0,5]

        X, y = dataset.sortDataset(X,
                                   y,
                                   length=1000,
                                   classes=classes,
                                   merge=merge)  #,6,4,2,8
        if merge:
            classes = [0, 5, 6, 4, 2, 8]
            #y = dataset.mergeLabels(y)
        else:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    #Calculate features
    #XL = extractAllFeatures(X, channel=0)
    XL = extractFeaturesWithMask(X, featuremask=range(len(FUNC_MAP)))
    scaler = classifier.makeScaler(XL)
    XLtrain, XLtest, yTrain, yTest, XL = classifier.scaleAndSplit(
        XL, y[0], scaler)
    #scaler = StandardScaler()
    #XL = scaler.fit_transform(XL, y[0])
    #XLtest = scaler.fit_transform(XLtest, yTest)

    clf = svm.SVC(kernel="linear", C=10, decision_function_shape='ovr')
    #clf = svm.LinearSVC(penalty = 'l2',  loss='squared_hinge', dual = False, C = 10, random_state = 42)
    #clf = RandomForestClassifier(n_estimators = 45, max_depth = 10,  min_samples_leaf = 1, random_state = 40)
    #clf = svm.LinearSVC(penalty = 'l2', dual = False, C = 10, random_state = 42)
    #clf = linear_model.SGDClassifier(penalty = 'l2', random_state = 42)

    rfecv = RFECV(estimator=clf,
                  step=1,
                  cv=10,
                  n_jobs=n_jobs,
                  scoring='accuracy')
    rfecv.fit(XL, y[0])

    print("Optimal number of features : %d" % rfecv.n_features_)
    print("Optimal features: ")
    print(rfecv.support_)

    writeFeatureMask(rfecv.support_, name)

    print("The ranking of the features: ")
    print(rfecv.ranking_)
    print("The scores for each feature combination:")
    print(rfecv.grid_scores_)
    if plot:
        # Plot number of features VS. cross-validation scores
        plt.figure()
        plt.xlabel("Number of features selected")
        plt.ylabel("Cross validation score (nb of correct classifications)")
        plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
        plt.show()

    print("After feature selection: ")
    scores = cross_val_score(rfecv.estimator_,
                             XLtrain,
                             yTrain,
                             cv=10,
                             scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)