Esempi in Python per sortDataset

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: dataset

Metodo/funzione: sortDataset

Esempi su hotexamples.com: 7

sortDataset in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per dataset.sortDataset, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: executiontime.py Progetto: kristiankrohn/Masterproject

def calculateAndWriteExecutionTime(classifierstring = 'AllFeatures', shift = False, windowLength = 250, Sort = False):
    featuremask = features.readFeatureMask(classifierstring)
    nrOfFeatures = len(featuremask)
    dictionary = {}
    featureName = None
    InputData = [[]]
    executionTimeList = []

    X1, y1 = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=shift, windowLength=windowLength)
    X1, y1 = dataset.sortDataset(X1, y1, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8
    #Just append some movement into the input data from each channel
    for i in range(8):
        InputData.append(X1[i][0])
    #Pop the first list because it is empty
    InputData.pop(0)
    #just iterate through all the features
    for i in range(len(featuremask)):
        wrapped = wrapper(calculateFeature, i, InputData)
        featureString = str(features.FUNC_MAP.get(i))
        featureString = featureString.split(" ")
        featureName = featureString [1] + " "
        dictionary[featureName] = min(timeit.repeat(wrapped, repeat = 10000, number = 1))
        print("finished with feature %d" %i)
        executionTimeList.append(dictionary[featureName])

    writeToFile(executionTimeList, windowLength)
    #Should the list be sorted and printed?
    if Sort:
        sortedValues = sorted(list(dictionary.values()))
        sortedKeys = sorted(list(dictionary),  key = dictionary.__getitem__)
        printSortedValues(sortedKeys, sortedValues)

Esempio n. 2

Mostra file

File: kerasTest.py Progetto: kristiankrohn/Masterproject

def startLearning():
    bestParams = []
    accuracyScore = []
    f1Score = []
    precision = []
    classificationReport = []
    classifierstring = "learning260RBFsvm22Features"
    #Make sure that the result can be reproduced
    seed = 7
    np.random.seed(seed)

    X, y = dataset.loadDataset(filename="data.txt",
                               filterCondition=True,
                               filterType="DcNotch",
                               removePadding=True,
                               shift=False,
                               windowLength=250)
    X, y = dataset.sortDataset(X,
                               y,
                               length=10000,
                               classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                               merge=True)
    #numClasses = 6
    channelIndex = 0

    featuremask = features.readFeatureMask()
    #Use number of features as input layer
    #numFeatures = len(featuremask)
    XL = features.extractFeaturesWithMask(X,
                                          channelIndex,
                                          featuremask=featuremask,
                                          printTime=False)

    XLtrain, XLtest, yTrain, yTest, XL, scaler = classifier.scaleAndSplit(
        XL, y[0])
    #One hot encoding of the classes
    #yTrain = np_utils.to_categorical(yTrain)
    #yTest = np_utils.to_categorical(yTest)
    #Define variable with number of classes
    clf = KerasClassifier(build_fn=createModel,
                          epochs=10,
                          batch_size=50,
                          verbose=0)
    #clf.fit(XLtrain, yTrain, validation_data = (XLtest, yTest), epochs = 10, batch_size = 200, verbose = 2)
    clf.fit(XLtrain, yTrain)
    #clf.fit(XLtrain, yTrain, validation_data = (XLtest, yTest), epochs = 10, batch_size = 50)

    #scores = model.evaluate(Xtest, yTest, verbose = 0)
    #print('Baseline Error: %.2f%%' %(100 - scores[1]*100))

    scores = cross_val_score(clf, XLtrain, yTrain, cv=50, scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)

Esempio n. 3

Mostra file

File: predict.py Progetto: kristiankrohn/Masterproject

def createPredictor(name,
                    windowLength,
                    datasetnum=1,
                    shift=None,
                    bruteForcemask=None,
                    zeroClassMultiplier=2,
                    datasetLength=130,
                    kernel='rbf'):
    ##### Parameters
    if shift == None:
        if windowLength < 250:
            shift = True
            print "Shift is true"
        else:
            shift = False
            print "Shift is false"
    else:
        print("Shift is: %r" % shift)
    ##### Save parameters
    parameters = {
        'windowLength': windowLength,
        'shift': shift,
        'dataset': datasetnum,
        'datasetLength': datasetLength,
        'kernel': kernel
    }
    pickle.dump(parameters, open("Parameters" + slash + name + ".pkl", "wb"))

    ##### Declarations
    bestParams = []
    accuracyScore = []
    f1Score = []
    precision = []
    classificationReport = []
    XL = [[], [], [], [], [], [], [], []]
    y = [[], [], [], [], [], [], [], []]
    XLlist = []
    ylist = []
    XLtrain = None
    XLtest = None
    yTrain = None
    yTest = None

    ##### Code

    if isinstance(datasetnum, int):
        var = datasetnum
        datasetnum = []
        datasetnum.append(var)

    print(datasetnum)

    for i in datasetnum:
        print i
        dataset.setDatasetFolder(i)

        X, Y = dataset.loadDataset(filename="data.txt",
                                   filterCondition=True,
                                   filterType="DcNotch",
                                   removePadding=True,
                                   shift=shift,
                                   windowLength=windowLength)

        Xl, Y = dataset.sortDataset(X,
                                    Y,
                                    length=datasetLength,
                                    classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                    merge=True,
                                    zeroClassMultiplier=zeroClassMultiplier)

        XLlist.append(Xl)
        ylist.append(Y)
        XL, y = dataset.mergeDatasets(XL, Xl, y, Y)
    #XL, y = dataset.mergeDatasets(XLlist, XLtest, yTrain, yTest)

    if bruteForcemask == None:
        features.compareFeatures2(name,
                                  shift,
                                  windowLength,
                                  X=XL,
                                  y=y,
                                  plot=False)
        featuremask = features.readFeatureMask(name)
    else:
        featuremask = features.readFeatureMask(bruteForcemask)
        features.writeFeatureMask(featuremask, name)
    for i in range(len(XLlist)):
        XLlist[i] = features.extractFeaturesWithMask(XLlist[i],
                                                     featuremask=featuremask,
                                                     printTime=False)
    print("XL list featureextraction finished")

    XL = features.extractFeaturesWithMask(XL,
                                          featuremask=featuremask,
                                          printTime=False)
    print("XL featureextraction finished")

    scaler = classifier.makeScaler(XL)

    for i in range(len(XLlist)):
        XLtrain1, XLtest1, yTrain1, yTest1, XLlist[
            i] = classifier.scaleAndSplit(XLlist[i], ylist[i][0], scaler)
        if i == 0:
            XLtrain = XLtrain1
            yTrain = yTrain1
            XLtest = XLtest1
            yTest = yTest1
        else:
            XLtrain, yTrain = dataset.mergeDatasets(XLtrain, XLtrain1, yTrain,
                                                    yTrain1)
            XLtest, yTest = dataset.mergeDatasets(XLtest, XLtest1, yTest,
                                                  yTest1)
    print("Split fininshed, starting training")

    if kernel == 'rbf':
        clf = svm.SVC(kernel='rbf',
                      gamma=0.01,
                      C=10,
                      decision_function_shape='ovr')
    elif kernel == 'linear':
        clf = svm.SVC(kernel='linear',
                      gamma=0.01,
                      C=10,
                      decision_function_shape='ovr')
    elif kernel == 'linearSVC':
        clf = svm.LinearSVC(penalty='l2', dual=False, C=10, random_state=42)

    clf.fit(XLtrain, yTrain)

    classifier.saveMachinestate(clf, name)
    classifier.saveScaler(scaler, name)

    scores = cross_val_score(clf,
                             XLtrain,
                             yTrain,
                             cv=10,
                             scoring='recall_macro')
    print("Recall: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)



    tempAccuracyScore, tempPrecision, tempClassificationReport\
    , tempf1Score = classifier.predict(XLtest, clf, yTest)

    accuracyScore.append(tempAccuracyScore)
    f1Score.append(tempf1Score)
    precision.append(tempPrecision)
    classificationReport.append(tempClassificationReport)

    print()
    print("The best parameters for the different channels are:")
    print()
    print(bestParams)
    print()
    print("The prediction accuracy for the different channels is:")
    print(accuracyScore)
    print("The f1 score which include false negatives etc is:")
    print(
        f1Score
    )  #This score says something about the correctness of the prediction.
    print("The precision score:")
    print(
        precision
    )  #This score says something about the correctness of the prediction.
    print("Classification Report:")
    print(classificationReport[0])

Esempio n. 4

Mostra file

def multiplottestHHT(c):
    x, y = dataset.loadDataset("longdata.txt",
                               filterCondition=True,
                               filterType="DC")
    #print(y[0])
    x, y = dataset.sortDataset(x, y, length=10, classes=[c])

    channelsToPlot = 4
    N_imfs = 6
    for i in range(len(x[0])):  #Iternate over number of elements in dataset
        title = movements[y[0][i]]
        #print title
        title = title[1:]
        #print title
        fig = plt.figure(figsize=(20, 10))
        #fig = plt.figure()
        plt.suptitle(title)

        outer = gridspec.GridSpec(1,
                                  channelsToPlot,
                                  wspace=0.1,
                                  hspace=0.2,
                                  right=0.98,
                                  left=0.02,
                                  bottom=0.02,
                                  top=0.95)

        for j in range(channelsToPlot):  #Iterate over channels
            inner = gridspec.GridSpecFromSubplotSpec(N_imfs + 1,
                                                     1,
                                                     subplot_spec=outer[j],
                                                     wspace=0.1,
                                                     hspace=0.1)

            xt = x[j][i]

            imfs = fixedIterationHht(xt)
            n_imfs = imfs.shape[0]
            length = len(xt)
            t = np.arange(0, length / glb.fs, 1.0 / glb.fs)
            time_samples = t
            signal = xt

            ax = plt.Subplot(fig, inner[0])
            #ax = plt.subplot(n_imfs + 1, 1, 1)
            ax.plot(time_samples, signal)
            ax.axis([
                time_samples[0], time_samples[-1],
                signal.min(),
                signal.max()
            ])
            ax.tick_params(which='both',
                           left=False,
                           bottom=False,
                           labelleft=False,
                           labelbottom=False)
            ax.grid(False)
            ax.set_ylabel('Signal')
            ax.set_title(channels[j])
            fig.add_subplot(ax)

            axis_extent = max(np.max(np.abs(imfs[:-1, :]), axis=0))
            for k in range(n_imfs - 1):
                #print(i + 2)
                ax = plt.Subplot(fig, inner[k + 1])
                #ax = plt.subplot(n_imfs + 1, 1, i + 2)
                #ax.plot(time_samples, imfs[k, :])
                ax.plot(time_samples, imfs[k, :])
                ax.axis([
                    time_samples[0], time_samples[-1], -axis_extent,
                    axis_extent
                ])
                ax.tick_params(which='both',
                               left=False,
                               bottom=False,
                               labelleft=False,
                               labelbottom=False)
                ax.grid(False)
                ax.set_ylabel('imf' + str(k + 1))
                fig.add_subplot(ax)

            ax = plt.Subplot(fig, inner[n_imfs])
            #ax = plt.subplot(n_imfs + 1, 1, n_imfs + 1)
            ax.plot(time_samples, imfs[-1, :], 'r')
            ax.axis('tight')
            ax.tick_params(which='both',
                           left=False,
                           bottom=False,
                           labelleft=False,
                           labelbottom=False)
            ax.grid(False)
            ax.set_ylabel('res.')
            fig.add_subplot(ax)

        plt.show()

Esempio n. 5

Mostra file

def startLearning():
    bestParams = []
    accuracyScore = []
    f1Score = []
    precision = []
    classificationReport = []

    #classifierstring = "learning260RBFsvm22Features"
    classifierstring = 'AllFeatures'

    #X, y = dataset.loadDataset("longdata.txt")
    '''
    X, y = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=250)
    '''

    dataset.setDatasetFolder(1)

    X1, y1 = dataset.loadDataset(filename="data.txt",
                                 filterCondition=True,
                                 filterType="DcNotch",
                                 removePadding=True,
                                 shift=False,
                                 windowLength=250)
    X1, y1 = dataset.sortDataset(X1,
                                 y1,
                                 length=130,
                                 classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                 merge=True)  #,6,4,2,8
    '''
    X1T, y1T = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=250)
    X1T, y1T = dataset.sortDataset(X1T, y1T, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8
    '''
    dataset.setDatasetFolder(2)

    X2, y2 = dataset.loadDataset(filename="data.txt",
                                 filterCondition=True,
                                 filterType="DcNotch",
                                 removePadding=True,
                                 shift=True,
                                 windowLength=2)
    X2, y2 = dataset.sortDataset(X2,
                                 y2,
                                 length=130,
                                 classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                 merge=True,
                                 zeroClassMultiplier=1)  #,6,4,2,8
    '''
    X2T, y2T = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=250)
    X2T, y2T = dataset.sortDataset(X2T, y2T, length=130, classes=[0,1,2,3,4,5,6,7,8,9], merge = True) #,6,4,2,8
    '''

    #X, y = dataset.sortDataset(X, y, length=10000, classes=[6,8], merge = False)

    #def sortDataset(x=None, y=None, length=10, classes=[0,5,4,2,6,8])
    #if x or y is undefined, data.txt will be loaded

    channelIndex = 0
    '''
    FUNC_MAP = {0: hfd,
            1: minDiff,
            2: maxDiff,
            3: specEntropy,
            4: pearsonCoeff14,
            5: stdDeviation,
            6: slope,
            7: thetaBeta1,
            8: extrema
            9: pearsonCoeff13}
    '''
    #XL = features.extractFeatures(X, channelIndex)

    featuremask = features.readFeatureMask(classifierstring)
    XL1 = features.extractFeaturesWithMask(X1,
                                           featuremask=featuremask,
                                           printTime=False)
    XL2 = features.extractFeaturesWithMask(X2,
                                           featuremask=featuremask,
                                           printTime=False)

    #If a test set is needed for the combined subject model
    '''
    XL1T = features.extractFeaturesWithMask(
            X1T, featuremask=featuremask, printTime=False)
    XL2T = features.extractFeaturesWithMask(
            X2T, featuremask=featuremask, printTime=False)
    '''
    #uncomment for using samples as features
    '''
    XL2 = X2[0]
    print(len(X2[0]))
    for i in range(len(X2[0])):
        #XL2[i] = np.concatenate((XL2[i], X2[1][i], X2[3][i]))
        #np.append(XL[i], X[1][i])
        #np.append(XL[i], X[2][i])
        #np.append(XL[i], X[3][i])
    print(len(XL2[0]))
    '''

    #Scale the data if needed and split dataset into training and testing

    #proposed solution, change input to makeScaler when creating for one subject. See classifier.py
    #XLjoined = np.append(XL1, XL2, axis = 0)
    scaler = classifier.makeScaler(XL2)

    XLtrain1, XLtest1, yTrain1, yTest1, XL1 = classifier.scaleAndSplit(
        XL1, y1[0], scaler)
    XLtrain2, XLtest2, yTrain2, yTest2, XL2 = classifier.scaleAndSplit(
        XL2, y2[0], scaler)

    #XLtrain1T, XLtest1T, yTrain1T, yTest1T, XL1T = classifier.scaleAndSplit(XL1T, y1T[0], scaler)
    #XLtrain2T, XLtest2T, yTrain2T, yTest2T, XL2T = classifier.scaleAndSplit(XL2T, y2T[0], scaler)

    #This is to combine the two subjects

    #yTrain = np.append(yTrain1, yTrain2, axis = 0)
    #XLtrain = np.append(XLtrain1, XLtrain2, axis = 0)
    #yTest = np.append(yTest1, yTest2, axis = 0)
    #XLtest = np.append(XLtest1, XLtest2, axis = 0)

    #bestParams.append(classifier.tuneSvmParameters(XLtrain, yTrain, XLtest, yTest, n_jobs = -1))
    #bestParams.append(tuneDecisionTreeParameters(XLtrain, yTrain, XLtest, yTest, n_jobs = -1))

    #try this with tuning of parameters later today.

    #clf, clfPlot = createAndTrain(XLtrain, yTrain, bestParams[0])

    #Use this if predictor other than SVM is used.
    clf, clfPlot = createAndTrain(XLtrain2, yTrain2, None)
    #plot.trainingPredictions(clf, XL, y[0])

    ###TO PLOT LEARNING CURVE UNCOMMENT THIS.
    #title = "Learning Curves (SVM, RBF kernel, C = 50, $\gamma=0.001$)"
    #estimator = svm.SVC(kernel = 'rbf', gamma = 0.01, C = 50, decision_function_shape = 'ovr')
    #plot.learningCurve(estimator, title, XL, y[0], (0.7, 1.01), cv=20, n_jobs=-1)
    #plt.show()

    #clf = classifier.loadMachineState(classifierstring)
    classifier.saveMachinestate(
        clf, classifierstring)  #Uncomment this to save the machine state
    classifier.saveScaler(scaler, classifierstring)
    #clf = CalibratedClassifierCV(svm.SVC(kernel = 'linear', C = C, decision_function_shape = 'ovr'), cv=5, method='sigmoid')

    #Use this if it is important to see the overall prediction, and not for only the test set

    scores = cross_val_score(clf, XLtrain2, yTrain2, cv=50, scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)

    tempAccuracyScore, tempPrecision, tempClassificationReport, tempf1Score = classifier.predictTimer(
        XLtest2, clf, yTest2)
    accuracyScore.append(tempAccuracyScore)
    f1Score.append(tempf1Score)
    precision.append(tempPrecision)
    classificationReport.append(tempClassificationReport)

    #crossValScore.append(tempCrossValScore)
    #accuracyScore, classificationReport = compareFeatures(XL, XLtrain, yTrain, XLtest, yTest, bestParams)

    print()
    print("The best parameters for the different channels are:")
    print()
    print(bestParams)
    print()
    print("The prediction accuracy for the different channels is:")
    print(accuracyScore)
    print("The f1 score which include false negatives etc is:")
    print(
        f1Score
    )  #This score says something about the correctness of the prediction.
    print("The precision score:")
    print(
        precision
    )  #This score says something about the correctness of the prediction.
    print(
        "Classification Report of channel %d:" % channelIndex
    )  #String, weird if you print whole array with string, and predicting over several channels.
    print(classificationReport[0])

Esempio n. 6

Mostra file

def compareFeatures(n_jobs=1,
                    datasetnum=1,
                    shift=False,
                    windowLength=250,
                    zeroClassMultiplier=1):
    #array declaration
    trainings = 0
    allPermutations = []
    allParams = []
    allPavg = []
    allP = []
    allR = []
    allF1 = []
    allS = []

    #Constants and configuration
    maxNumFeatures = 8
    minNumFeatures = 6  #Must be bigger than 1
    #datasetfile = "longdata.txt"
    datasetfile = "data.txt"
    merge = True
    skip = False
    logging = False

    print(
        "Setup for brute force testing of all feature combination, features in list: %d"
        % len(FUNC_MAP))
    print("Enter maximum number of features: (\"all\" for all combinations)")
    inputString = raw_input()
    if inputString.isdigit():
        inputval = int(inputString)
        if inputval > len(FUNC_MAP):
            print("Invalid input, exiting")
            return
        if inputval >= 1:
            maxNumFeatures = inputval
        else:
            print("Invalid input, exiting")
            return
    else:
        if inputString == "all":
            maxNumFeatures = len(FUNC_MAP)
            minNumFeatures = 1
            skip = True
        else:
            print("Invalid input, exiting")
            return

    if not skip:
        print("Enter minimum number of features: ")
        inputString = raw_input()
        if inputString.isdigit():
            inputval = int(inputString)
            if inputval <= 0:
                print("Invalid input, exiting")
                return
        else:
            print("Invalid input, exiting")
            return
        if inputval >= 1:
            minNumFeatures = inputval
        else:
            print("Invalid input, exiting")
            return
    print("Is this a debug session?(Will leak memory) [Y/n]")
    inputString = raw_input()
    if inputString == "Y":
        debug = True
        print(
            "Debug session activated, results will not be valid and memory will explode."
        )
        sendMail = False
        logging = False
    else:
        debug = False
        print("Normal session activated, results will be valid. ")
        print("Do you want to send a mail notification when finished? [Y/n]")
        inputString = raw_input()
        if inputString == "Y":
            sendMail = True
            print("Sending mail when script is finished")
        else:
            print("Do not send mail when script is finished")
            sendMail = False

        print("Do you want to save logs? [Y/n]")
        inputString = raw_input()
        if inputString == "Y":
            logging = True
            print("Saving logs")
        else:
            print("Do not save logs")
            logging = False
    #Load dataset
    #print("Before load")
    '''
	X, y = dataset.loadDataset(filename="data.txt", filterCondition=True,
                                filterType="DcNotch", removePadding=True, shift=False, windowLength=200)
	#print("After load")
	#print X
	if datasetfile == "longdata.txt":
		classes = [0,5,6,4,2,8]
	else:
		classes = [0,1,2,3,4,5,6,7,8,9]
		#classes = [9,7,3,1,0,5]

	X, y = dataset.sortDataset(X, y, length=1000, classes=classes, merge=merge, zeroClassMultiplier=1.2)
	if merge:
		classes = [0,5,6,4,2,8]
			#y = dataset.mergeLabels(y)
	else:
		classes = [0,1,2,3,4,5,6,7,8,9]
	#Calculate features
	#XL = extractAllFeatures(X, channel=0)
	XL = extractFeaturesWithMask(X, featuremask=range(len(FUNC_MAP)))
	#XLtrain, XLtest, yTrain, yTest = classifier.scaleAndSplit(XL, y[0])
	scaler = classifier.makeScaler(XL)
	XLtrain, XLtest, yTrain, yTest, XL = classifier.scaleAndSplit(XL, y[0], scaler)
	'''

    XL = [[], [], [], [], [], [], [], []]
    y = [[], [], [], [], [], [], [], []]
    XLlist = []
    ylist = []
    XLtrain = None
    XLtest = None
    yTrain = None
    yTest = None

    ##### Code

    if isinstance(datasetnum, int):
        var = datasetnum
        datasetnum = []
        datasetnum.append(var)

    print(datasetnum)

    for i in datasetnum:
        print i
        dataset.setDatasetFolder(i)

        X, Y = dataset.loadDataset(filename=datasetfile,
                                   filterCondition=True,
                                   filterType="DcNotch",
                                   removePadding=True,
                                   shift=shift,
                                   windowLength=windowLength)

        Xl, Y = dataset.sortDataset(X,
                                    Y,
                                    length=130,
                                    classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                    merge=True,
                                    zeroClassMultiplier=zeroClassMultiplier)

        XLlist.append(Xl)
        ylist.append(Y)
        XL, y = dataset.mergeDatasets(XL, Xl, y, Y)

    for i in range(len(XLlist)):
        XLlist[i] = extractFeaturesWithMask(XLlist[i],
                                            featuremask=range(len(FUNC_MAP)),
                                            printTime=False)
    print("XL list featureextraction finished")

    XL = extractFeaturesWithMask(XL,
                                 featuremask=range(len(FUNC_MAP)),
                                 printTime=False)
    print("XL featureextraction finished")

    scaler = classifier.makeScaler(XL)

    for i in range(len(XLlist)):
        XLtrain1, XLtest1, yTrain1, yTest1, XLlist[
            i] = classifier.scaleAndSplit(XLlist[i], ylist[i][0], scaler)
        if i == 0:
            XLtrain = XLtrain1
            yTrain = yTrain1
            XLtest = XLtest1
            yTest = yTest1
        else:
            XLtrain, yTrain = dataset.mergeDatasets(XLtrain, XLtrain1, yTrain,
                                                    yTrain1)
            XLtest, yTest = dataset.mergeDatasets(XLtest, XLtest1, yTest,
                                                  yTest1)
    print("Split fininshed, starting training")

    features = range(len(XL[0]))
    print("Featureextraction finished, number of features to check: %d" %
          len(XL[0]))

    if len(features) < maxNumFeatures:
        maxNumFeatures = len(features)
    elif maxNumFeatures < minNumFeatures:
        maxNumFeatures = minNumFeatures

    if minNumFeatures < 1:
        minNumFeatures = 1
    elif minNumFeatures > maxNumFeatures:
        minNumFeatures = maxNumFeatures
    print("Testing with combinations of %d to %d" %
          (minNumFeatures, maxNumFeatures))
    numberOfCombinations = 0
    for i in range(minNumFeatures, maxNumFeatures + 1):
        #numberOfCombinations += len(list(combinations(features, i))) #Dette sprenger minnet :/
        comb = nCr(len(features), i)
        print("Number of iterations for combinations of length %d: %d" %
              (i, comb))
        numberOfCombinations += comb

    print("Number of combinations to test %d" % numberOfCombinations)

    #for i in range(minNumFeatures, maxNumFeatures+1):
    for i in range(maxNumFeatures, minNumFeatures - 1, -1):

        print("Starting to read PermutationLog")
        try:
            permfile = open(
                "Logs" + slash + "PermutationLog" + str(i) + ".txt", 'r')
            #permfile = open("Logs"+slash+"PermutationLog.txt", 'r')

        except IOError:
            print("PermutationLog file does not exist")
            skip = False
        else:
            print("Performing operations on PermutationLog buffer")
            PermutationsString = permfile.read()
            permfile.close()
            PermutationsList = PermutationsString.split(':')
            PermutationsList.pop(0)
            #PermutationsList = tuple(PermutationsList)
            #Might need some more processing, now returns a list of tuples
            #print PermutationsList[28]

            for q in range(len(PermutationsList)):
                #print(eval(PermutationsList[i]))
                #PermutationsList[i] = tuple(map(int, PermutationsList[i][1:-1].split(',')))
                PermutationsList[q] = tuple(eval(PermutationsList[q]))
            print("Finished with operations")
            skip = True
        start = datetime.now()
        print("Finished reading permutations file")
        lastTrainings = 1000
        for p in combinations(features, i):  #If order matters use permutations

            if skip == True:

                if p in PermutationsList:
                    #print("Combination exists")
                    numberOfCombinations -= 1
                    trainings += 1
                    if trainings == lastTrainings:
                        lastTrainings = trainings + 1000
                        print("Training number: %d" % trainings)
                        print("Remaining combinations: %d" %
                              numberOfCombinations)
                        print(
                            "Elapsed time for checking that this combination exists: "
                            + str(elapsedTime))

                    stop = datetime.now()
                    elapsedTime = (stop - start)
                    start = stop
                else:
                    print("Found Starting point")
                    skip = False

            if skip == False:

                start = datetime.now()
                XLtrainPerm = np.empty([len(XLtrain), i])
                XLtestPerm = np.empty([len(XLtest), i])
                for j in range(len(XLtrain)):
                    #print j
                    #print([XLtrain[j][k] for k in p])
                    XLtrainPerm[j] = [XLtrain[j][k] for k in p]
                for j in range(len(XLtest)):
                    #print j
                    #print([XLtest[j][k] for k in p])
                    XLtestPerm[j] = [XLtest[j][k] for k in p]
                #print(XLtrainPerm[0])
                #print(XLtestPerm[0])
                #print("Starting to train with combination: "+convertPermutationToFeatureString(p))

                bestParams, presc, r, f1, s, report = classifier.tuneSvmParameters(
                    XLtrainPerm,
                    yTrain,
                    XLtestPerm,
                    yTest,
                    debug=False,
                    fast=True,
                    n_jobs=n_jobs)

                if logging:

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "PermutationLog" +
                        str(i) + ".txt", 'a+')
                    permfile.write(":")
                    permfile.write(str(p))
                    permfile.close()

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "PrecisionLog" +
                        str(i) + ".txt", 'a+')
                    permfile.write(":")
                    for k in range(len(presc)):
                        permfile.write(',' + str(presc[k]))
                    permfile.close()

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "RecallLog" +
                        str(i) + ".txt", 'a+')
                    permfile.write(":")
                    for k in range(len(r)):
                        permfile.write(',' + str(r[k]))
                    permfile.close()

                    permfile = open(
                        dir_path + slash + "Logs" + slash + "F1Log" + str(i) +
                        ".txt", 'a+')
                    permfile.write(":")
                    for k in range(len(f1)):
                        permfile.write(',' + str(f1[k]))
                    permfile.close()

                if debug:
                    #Append scores
                    allPermutations.append(p)
                    allParams.append(bestParams)
                    allP.append(presc)
                    allPavg.append(np.average(presc, weights=s))
                    allR.append(r)
                    allF1.append(f1)

                    winner = allPavg.index(
                        max(allPavg))  #Check for max average precision
                    print(report)
                    print(
                        "Best features so far are: " +
                        convertPermutationToFeatureString(
                            allPermutations[winner]))
                    print("Best result so far are: ", allPavg[winner])

                #print("Best parameters for this feature combination: " + str(bestParams))
                stop = datetime.now()
                numberOfCombinations -= 1
                trainings += 1
                remainingTime = (stop - start) * numberOfCombinations
                elapsedTime = (stop - start)
                print("Training number: %d" % trainings)
                print("Remaining combinations: %d" % numberOfCombinations)
                print("Elapsed time for training with this combination: " +
                      str(elapsedTime))
                print("Estimated remaining time: " + str(remainingTime))
    '''
	#Evaluate score
	if len(allPavg) > 1:
		winner = allPavg.index(max(allPavg)) #Check for max average precision
		p = allPermutations[winner]
		XLtrainPerm = np.empty([len(XLtrain), len(p)])
		XLtestPerm = np.empty([len(XLtest), len(p)])
		p = allPermutations[winner]
		for j in range(len(XLtrain)):
			XLtrainPerm[j] = [XLtrain[j][k] for k in p]
		for j in range(len(XLtest)):
			XLtestPerm[j] = [XLtest[j][k] for k in p]

		print("Best features for max average precision are:")
		print allPermutations[winner]
		#Test
		bestParams = allParams[winner]
		print("Best parameters for max average precision are: ")
		print(bestParams)

		if bestParams['kernel'] == 'linear':
			clf = svm.SVC(kernel =bestParams['kernel'], C = bestParams['C'], decision_function_shape = 'ovr')
		else:
			clf = svm.SVC(kernel = bestParams['kernel'], gamma=bestParams['gamma'], C= bestParams['C'], decision_function_shape='ovr')

		clf.fit(XLtrainPerm, yTrain)
		saveMachinestate(clf, "BruteForceClassifier")
		featuremask = open("featuremask.txt", 'w+')
		featuremask.write(str(allPermutations[winner]))
		#featuremask.write(",")
		featuremask.close()

		yPred = clf.predict(XLtestPerm)
		print(classification_report(yTest, yPred))
	'''
    if sendMail:
        mail.sendemail(
            from_addr='*****@*****.**',
            to_addr_list=['*****@*****.**', '*****@*****.**'],
            cc_addr_list=[],
            subject="Training finished with combinations of %d to %d features"
            % (minNumFeatures, maxNumFeatures),
            message="Logs are ready for download ",
            login='******',
            password='******')

Esempio n. 7

Mostra file

def compareFeatures2(name,
                     shift,
                     windowLength,
                     n_jobs=-1,
                     X=None,
                     y=None,
                     plot=True):
    #datasetfile = "longdata.txt"
    datasetfile = "data.txt"
    merge = True

    if (X == None) or (y == None):
        X, y = dataset.loadDataset(filename=datasetfile,
                                   filterCondition=True,
                                   filterType="DcNotch",
                                   removePadding=True,
                                   shift=shift,
                                   windowLength=windowLength)
        #print("After load")
        #print X
        if datasetfile == "longdata.txt":
            classes = [0, 5, 6, 4, 2, 8]
        else:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
            #classes = [9,7,3,1,0,5]

        X, y = dataset.sortDataset(X,
                                   y,
                                   length=1000,
                                   classes=classes,
                                   merge=merge)  #,6,4,2,8
        if merge:
            classes = [0, 5, 6, 4, 2, 8]
            #y = dataset.mergeLabels(y)
        else:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    #Calculate features
    #XL = extractAllFeatures(X, channel=0)
    XL = extractFeaturesWithMask(X, featuremask=range(len(FUNC_MAP)))
    scaler = classifier.makeScaler(XL)
    XLtrain, XLtest, yTrain, yTest, XL = classifier.scaleAndSplit(
        XL, y[0], scaler)
    #scaler = StandardScaler()
    #XL = scaler.fit_transform(XL, y[0])
    #XLtest = scaler.fit_transform(XLtest, yTest)

    clf = svm.SVC(kernel="linear", C=10, decision_function_shape='ovr')
    #clf = svm.LinearSVC(penalty = 'l2',  loss='squared_hinge', dual = False, C = 10, random_state = 42)
    #clf = RandomForestClassifier(n_estimators = 45, max_depth = 10,  min_samples_leaf = 1, random_state = 40)
    #clf = svm.LinearSVC(penalty = 'l2', dual = False, C = 10, random_state = 42)
    #clf = linear_model.SGDClassifier(penalty = 'l2', random_state = 42)

    rfecv = RFECV(estimator=clf,
                  step=1,
                  cv=10,
                  n_jobs=n_jobs,
                  scoring='accuracy')
    rfecv.fit(XL, y[0])

    print("Optimal number of features : %d" % rfecv.n_features_)
    print("Optimal features: ")
    print(rfecv.support_)

    writeFeatureMask(rfecv.support_, name)

    print("The ranking of the features: ")
    print(rfecv.ranking_)
    print("The scores for each feature combination:")
    print(rfecv.grid_scores_)
    if plot:
        # Plot number of features VS. cross-validation scores
        plt.figure()
        plt.xlabel("Number of features selected")
        plt.ylabel("Cross validation score (nb of correct classifications)")
        plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
        plt.show()

    print("After feature selection: ")
    scores = cross_val_score(rfecv.estimator_,
                             XLtrain,
                             yTrain,
                             cv=10,
                             scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print()
    print("Scores")
    print(scores)