def train_Softmax(C,
                  dataFile,
                  X,
                  Y,
                  testX,
                  testY,
                  pooledFile,
                  imageDim,
                  sgd,
                  save=True,
                  prefix=""):

    cfg = config.Config()
    data_path = cfg.paths['data']

    if sgd:
        raise NotImplementedError
    else:
        SFC = SoftMaxClassifier(X.T, Y, LAMBDA=C, maxiter=10000)
        print(SFC._architecture)
        sfcFile = data_path + "classifiers/%sSoftMax_lambda%e_%s.pkl" % \
        (prefix, C, pooledFile.split("/")[-1].split(".")[0])

    try:
        SFC = pickle.load(open(sfcFile, "rb"))
        SFC = SoftMaxClassifier(input=None, targets=None, saveFile=sfcFile)
        print("[*] trained classifier found.")
        print("[*] trained classifier loaded.")
    except IOError:
        print("[*] Training Softmax Classifier with LAMBDA=%e" % (C))
        SFC.train()
        print("[+] classifier trained.")
        if save:
            print("[+] saving classifier")
            #pickle.dump(SFC, open(sfcFile, "wb"))
            SFC.saveNetwork(sfcFile)

    #pred = SFC.predict(X.T)

    #acc = pred == Y.T
    #acc = np.sum(acc)/float(np.shape(acc)[0])
    #print 'Accuracy: %2.3f%%\n'% (acc * 100)

    #pred = SFC.predict(testX.T)
    #acc = pred == testY.T
    #acc = np.sum(acc)/float(np.shape(acc)[0])
    #print 'Accuracy: %2.3f%%\n'% (acc * 100)
    pred = SFC.predict(testX.T).T
    indices = np.argmax(pred, axis=1)
    pred = np.max(pred, axis=1)
    pred[indices == 0] = 1 - pred[indices == 0]

    fpr, tpr, thresholds = roc_curve(testY, pred)
    FoM = 1 - tpr[np.where(fpr <= 0.01)[0][-1]]
    print("[+] FoM: %.4f" % (FoM))
    threshold = thresholds[np.where(fpr <= 0.01)[0][-1]]
    print("[+] threshold: %.4f" % (threshold))

    return FoM, threshold
Example #2
0
def checkGradients():
    def costFunction(W, *args):
        def l2row(X):
            n, m = np.shape(X)
            N = np.sqrt(np.sum(np.multiply(X, X), axis=1) + 1e-8)
            N_stack = np.tile(N, (m, 1)).T
            Y = np.divide(X, N_stack)
            return Y, N

        def l2rowg(X, Y, N, D):
            n, m = np.shape(X)
            N_stack = np.tile(N, (m, 1)).T
            firstTerm = np.divide(D, N_stack)
            sum = np.sum(np.multiply(D, X), 1)
            sum = sum / (np.multiply(N, N))
            sum_stack = np.tile(sum[np.newaxis], (np.shape(Y)[1], 1)).T
            secondTerm = np.multiply(Y, sum_stack)
            return firstTerm - secondTerm

        X = args[0]
        n, m = np.shape(X)
        W = np.reshape(W, (k, n), order="F")
        # Feed forward
        F = np.dot(W, X)
        Fs = np.sqrt(np.multiply(F, F) + 1e-8)
        NFs, L2Fs = l2row(Fs)
        Fhat, L2Fn = l2row(NFs.T)
        # Compute objective function
        return np.sum(Fhat)

    k = 40
    n = 20
    # initialise
    #W = np.array([[1,2],[3,4],[5,6],[7,8]])/10.0
    W = np.random.rand(int(k), int(n))
    #print np.shape(W)
    W = np.ravel(W, order="F")
    cfg = config.Config()
    data_path = cfg.paths['data']
    dataFile = data_path + "naturalImages_patches_8x8.mat"
    data = sio.loadmat(dataFile)
    X = data["patches"][:n, :20]
    args = X, k

    sf = SparseFilter(k, 1)
    cost, grad = sf.objective(X, W)
    numgrad = computeNumericalGradient(costFunction, W, *args)
    for i in range(len(numgrad)):
        print("%d\t%f\t%f" % (i, numgrad[i], grad[i]))

    print("The above two columns you get should be very similar.")
    print("(Left-Your Numerical Gradient, Right-Analytical Gradient)")
    print()
    print("If your backpropagation implementation is correct, then")
    print("the relative difference will be small (less than 1e-9). ")

    diff = numgrad - grad
    #print "Relative Difference: %f" % diff
    print(diff)
def main():

    parser = optparse.OptionParser("[!] usage: python analyse_RF.py -F <data file>"+\
                                   " -c <classifier file> -s <data set>")

    parser.add_option("-F", dest="dataFile", type="string", \
                      help="specify data file to analyse")
    parser.add_option("-c", dest="classifierFile", type="string", \
                      help="specify classifier to use")
    parser.add_option("-s", dest="dataSet", type="string", \
                      help="specify data set to analyse ([training] or [test] set)")

    (options, args) = parser.parse_args()

    dataFile = options.dataFile
    classifierFile = options.classifierFile
    dataSet = options.dataSet

    # TODO: remove, only for testing
    if False:
        cfg = config.Config()
        data_path = cfg.paths['data']
        dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat"
        classifierFile = data_path + "classifiers/RF_n_estimators100_max_" +  \
        "features10_min_samples_leaf1_3pi_20x20_skew2_signPreserveNorm.pkl"
        dataSet = 'test'



    print()

    if dataFile == None or classifierFile == None or dataSet == None:
        print(parser.usage)
        exit(0)

    if dataSet != "training" and dataSet != "test":
        print("[!] Exiting: data set must be 1 of 'training' or 'test'")
        exit(0)

    try:
        data = sio.loadmat(dataFile)
    except IOError:
        print("[!] Exiting: %s Not Found" % (dataFile))
        exit(0)

    if dataSet == "training":
        X = np.nan_to_num(data["X"])
        y = np.squeeze(data["y"])
    elif dataSet == "test":
        X = np.nan_to_num(data["testX"])
        y = np.squeeze(data["testy"])

    try:
        classifier = pickle.load(open(classifierFile, "rb"))
    except IOError:
        print("[!] Exiting: %s Not Found" % (classifierFile))
        exit(0)
    measure_FoM(X, y, classifier)
Example #4
0
def main():

    parser = optparse.OptionParser(
        "[!] usage: python cross_validate_RF.py -F <data file>")

    parser.add_option("-F", dest="dataFile", type="string", \
                      help="specify data file to analyse")

    (options, args) = parser.parse_args()
    dataFile = options.dataFile

    cfg = config.Config()
    data_path = cfg.paths['data']

    dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat"

    if dataFile == None:
        print(parser.usage)
        exit(0)

    data = sio.loadmat(dataFile)
    #scaler = preprocessing.StandardScaler().fit(data["X"])

    #X = scaler.transform(np.concatenate((data["X"], data["validX"])))
    X = np.nan_to_num(data["X"])
    m, n = np.shape(X)
    y = np.squeeze(data["y"])
    #y = np.squeeze(np.concatenate((data["y"], data["validy"])))
    n_estimators_grid = [100, 10]
    max_features_grid = [10, 25]
    min_samples_leaf_grid = [1, 2, 5]

    kf = KFold(m, n_folds=5)
    fold = 1
    for n_estimators in n_estimators_grid:
        for max_features in max_features_grid:
            for min_samples_leaf in min_samples_leaf_grid:
                fold = 1
                FoMs = []
                for train, test in kf:
                    print("[*]", fold, n_estimators, max_features,
                          min_samples_leaf)
                    file = data_path + "classifiers/cv/RF_n_estimators"+str(n_estimators)+"_max_features"+str(max_features)+\
                           "_min_samples_leaf"+str(min_samples_leaf)+"_"+dataFile.split("/")[-1].split(".")[0]+\
                           "_fold"+str(fold)+".pkl"
                    try:
                        rf = pickle.load(open(file, "rb"))
                    except IOError:
                        train_x, train_y = X[train], y[train]
                        rf = train_RF(train_x, train_y, n_estimators,
                                      max_features, min_samples_leaf)
                        outputFile = open(file, "wb")
                        pickle.dump(rf, outputFile)
                    FoM, threshold = measure_FoM(X[test], y[test], rf, False)
                    fold += 1
                    FoMs.append(FoM)
                print("[+] mean FoM: %.3lf" % (np.mean(np.array(FoMs))))
                print()
def main():

    parser = optparse.OptionParser(
        "[!] usage: python cross_validate_SVM.py -F <data file>")

    parser.add_option("-F", dest="dataFile", type="string", \
                      help="specify data file to analyse")

    (options, args) = parser.parse_args()
    dataFile = options.dataFile

    # TODO: remove only for testing
    if False:
        cfg = config.Config()
        data_path = cfg.paths['data']
        data_file_standard = cfg.paths['data_file_standard']
        dataFile = data_path + data_file_standard

    if dataFile == None:
        print(parser.usage)
        exit(0)

    data = sio.loadmat(dataFile)

    X = data["X"]
    m, n = np.shape(X)
    y = np.squeeze(data["y"])

    kernel_grid = ["rbf"]
    C_grid = [5]
    gamma_grid = [1]

    kf = KFold(m, n_folds=5)
    fold = 1
    for kernel in kernel_grid:
        for C in C_grid:
            for gamma in gamma_grid:
                fold = 1
                FoMs = []
                for train, test in kf:
                    print("[*]", fold, kernel, C, gamma)
                    file = data_path + "classifiers/cv/SVM_kernel"+str(kernel)+"_C"+str(C)+\
                           "_gamma"+str(gamma)+"_"+dataFile.split("/")[-1].split(".")[0]+\
                           "_fold"+str(fold)+".pkl"
                    try:
                        svm = pickle.load(open(file, "rb"))
                    except IOError:
                        train_x, train_y = X[train], y[train]
                        svm = train_SVM(train_x, train_y, kernel, C, gamma)
                        outputFile = open(file, "wb")
                        pickle.dump(svm, outputFile)
                    FoM, threshold = measure_FoM(X[test], y[test], svm, False)
                    fold += 1
                    FoMs.append(FoM)
                print("[+] mean FoM: %.3lf" % (np.mean(np.array(FoMs))))
                print()
def main():

    parser = optparse.OptionParser("[!] usage: python analyse_SVM.py -F <data file>"+\
                                   " -c <classifier file> -s <data set>")

    parser.add_option("-F", dest="dataFile", type="string", \
                      help="specify data file to analyse")
    parser.add_option("-c", dest="classifierFile", type="string", \
                      help="specify classifier to use")
    parser.add_option("-s", dest="dataSet", type="string", \
                      help="specify data set to analyse ([training] or [test] set)")

    (options, args) = parser.parse_args()
    dataFile = options.dataFile
    classifierFile = options.classifierFile
    dataSet = options.dataSet

    cfg = config.Config()

    dataFile = cfg.paths['data'] + cfg.paths['data_file_standard']
    classifierFile = cfg.paths['data'] + "classifiers/" + \
    "SVM_kernelrbf_C1.0_gamma0.0025_3pi_20x20_skew2_signPreserveNorm.pkl"
    dataSet = "test"



    print()

    if dataFile == None or classifierFile == None or dataSet == None:
        print(parser.usage)
        exit(0)

    if dataSet != "training" and dataSet != "test":
        print("[!] Exiting: data set must be 1 of 'training' or 'test'")
        exit(0)

    try:
        data = sio.loadmat(dataFile)
    except IOError:
        print("[!] Exiting: %s Not Found" % (dataFile))
        exit(0)

    if dataSet == "training":
        X = data["X"]
        y = np.squeeze(data["y"])
    elif dataSet == "test":
        X = data["testX"]
        y = np.squeeze(data["testy"])

    try:
        classifier = pickle.load(open(classifierFile, "rb"))
    except IOError:
        print("[!] Exiting: %s Not Found" % (classifierFile))
        exit(0)
    measure_FoM(X, y, classifier)
Example #7
0
def main():
    #checkGradients()
    cfg = config.Config()
    data_path = cfg.paths['data']
    dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat"

    #dataFile = "/Users/dew/development/PS1-Real-Bogus/data/3pi/"+\
    #                   "patches_3pi_20x20_signPreserveNorm_8x8_10.mat"

    data = sio.loadmat(dataFile)

    #X = data["patches"][:40000,:].T
    X = data["X"].T
    sf = SparseFilter()
    sf.fit(X)
    sf.saveSF(data_path + "trained_sparseFilters/SF_256_" +
              dataFile.split("/")[-1].split(".")[0] + ".mat")
    sf.visualiseLearnedFeatures()
def get_sparseFilter(numFeatures, patches, patchesFile, maxiter=100):
    try:
        # added maxiter to filename 24/02/15
        cfg = config.Config()
        data_path = cfg.paths['data']
        sf_file = data_path + "trained_sparseFilters/SF_%d_%s_maxiter%d.mat" % \
        (numFeatures, patchesFile.split("/")[-1].split(".")[0], maxiter)
        print(sf_file)
        SF = SparseFilter(saveFile=sf_file)
        print("[*] Trained sparse filter loaded.")
    except IOError:
        print("[*] Could not find trained sparse filter.")
        print("[+] Training sparse filter ... ")
        SF = SparseFilter(k=numFeatures, maxiter=maxiter)
        SF.fit(patches)
        SF.saveSF(sf_file)
        print("[+] Sparse filter trained")
    SF.visualiseLearnedFeatures()
    return SF
def main(argv=None):

    cfg = config.Config()
    data_path = cfg.paths['data']

    if argv is None:
        argv = sys.argv

    if len(argv) != 5:
        sys.exit("Usage: train_RF.py <n_estimators> <max_features>" +\
                 " <min_samples_leaf> <.mat file>")

    n_estimators = int(argv[1])
    max_features = int(argv[2])
    min_samples_leaf = int(argv[3])
    dataFile = argv[4]

    # TODO: Remove, only for testing
    if False:
        n_estimators = 100
        max_features = 10
        min_samples_leaf = 1
        dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat"

    data = sio.loadmat(dataFile)

    #train_x = np.concatenate((data["X"], data["validX"]))
    #train_y = np.squeeze(np.concatenate((data["y"], data["validy"])))
    train_x = np.nan_to_num(data["X"])
    train_y = np.squeeze(data["y"])

    rf = train_RF(train_x, train_y, n_estimators, max_features,
                  min_samples_leaf)



    outputFile = open(data_path + "classifiers/RF_n_estimators"+str(n_estimators)+\
                      "_max_features"+str(max_features)+\
                      "_min_samples_leaf"+str(min_samples_leaf)+\
                      "_"+dataFile.split("/")[-1].split(".")[0]+".pkl", "wb")

    pickle.dump(rf, outputFile)
Example #10
0
def main():


    parser = optparse.OptionParser("[!] usage: python classify.py\n"+\
                                   " -F <data files [comma-separated]>\n"+\
                                   " -c <classifier files [comma-separated]>\n"+\
                                   " -t <threshold [default=0.5]>\n"+\
                                   " -s <data set>\n"+\
                                   " -o <output file>\n"+\
                                   " -f <figure of merit [\"fpr\" or \"mdr\"]>"
                                   " -p <plot hypothesis distribution [optional]>\n"+\
                                   " -r <plot ROC curve [optional]>\n"+\
                                   " -n <classify by name [optional]>\n"+\
                                   " -P <pooled features file [optional]>\n"+\
                                   " -L <plot learning curve [optional]>\n"+\
                                   " -l <labels for plotting [optional, comma-separated]>\n"+\
                                   " -m <print miss classified file names>")

    parser.add_option("-F", dest="dataFiles", type="string", \
                      help="specify data file[s] to analyse")
    parser.add_option("-c", dest="classifierFiles", type="string", \
                      help="specify classifier[s] to use")
    parser.add_option("-t", dest="threshold", type="float", \
                      help="specify decision boundary threshold [default=0.5]")
    parser.add_option("-o", dest="outputFile", type="string", \
                      help="specify output file")
    parser.add_option("-f", dest="fom", type="string", \
                      help="specify the figure of merit either 1% FPR or 1% MDR - choose \"fpr\" or \"mdr\"")
    parser.add_option("-s", dest="dataSet", type="string", \
                      help="specify data set to analyse [default=test]")
    parser.add_option("-p", action="store_true", dest="plot", \
                      help="specify whether to plot the hypothesis distribution [optional]")
    parser.add_option("-r", action="store_true", dest="roc", \
                      help="specify whether to plot the ROC curve [optional]")
    parser.add_option("-n", action="store_true", dest="byName", \
                      help="specify whether to classify objects by name [optional]")
    parser.add_option("-P", dest="poolFile", type="string", \
                      help="specify pooled features file [optional]")
    parser.add_option("-L", action="store_true", dest="learningCurve", \
                      help="specify whether to generate a leraning curve [optional]")
    parser.add_option("-l", dest="labels", type="string", \
                      help="specify label[s] for plots [optional]")
    parser.add_option("-m", action="store_true", dest="miss", \
                      help="specify whether or not to print misclassified file names [optional]")

    (options, args) = parser.parse_args()

    ## TODO: Test by defining arguments
    if False:
        cfg = config.Config()
        data_path = cfg.paths['data']

        dataFiles = data_path + "3pi_20x20_skew2_signPreserveNorm.mat" + "," \
                    data_path + "3pi_20x20_skew2_signPreserveNorm.mat" + "," \
                    data_path + "3pi_20x20_skew2_signPreserveNorm.mat"

        classifierFiles =
        patchesFile = data_path + "patches_stl-10_unlabeled_meansub_20150409_psdb_6x6.mat"
        imageDim = 20
        imageChannels = 1
        patchDim = 6
        numFeatures = 20
        poolDim = 5
        stepSize = 20



    try:
        dataFiles = options.dataFiles.split(",")
        classifierFiles = options.classifierFiles.split(",")
        threshold = options.threshold
        outputFile = options.outputFile
        fom = options.fom
        dataSet = options.dataSet
        plot = options.plot
        roc = options.roc
        byName = options.byName
        poolFile = options.poolFile
        learningCurve = options.learningCurve
        miss = options.miss
        try:
            labels = options.labels.split(",")
        except:
            labels = None
    except AttributeError as e:
        print(e)
        print(parser.usage)
        exit(0)

    if dataFiles == None or classifierFiles == None:
        print(parser.usage)
        exit(0)

    if threshold == None:
        threshold = 0.5

    if dataSet == None:
        dataSet = "test"

    if fom == "fpr":
        fom_func = one_percent_fpr
    elif fom == "mdr":
        fom_func = one_percent_mdr
    else:
        fom_func = one_percent_fpr


    Xs = []
    Ys = []
    Files = []
    for dataFile in dataFiles:
        data = sio.loadmat(dataFile)
        print("[+] %s" % dataFile)
        X = np.nan_to_num(data["X"])
        #scaler = preprocessing.StandardScaler(with_std=False).fit(X)
        if dataSet == "test":
            try:
                Xs.append(np.nan_to_num(data["testX"]))
                #Xs.append(scaler.transform(data["testX"]))
                Ys.append(np.squeeze(data["testy"]))
                Files.append(data["test_files"])
            except KeyError:
                if plot:
                    y = np.zeros((np.shape(X)[0],))
                else:
                    print("[!] Exiting: Could not load test set from %s" % dataFile)
                    exit(0)
        elif dataSet == "training":
            try:
                Xs.append(np.nan_to_num(data["X"]))
                #Xs.append(np.squeeze(np.concatenate((data["X"], data["testX"]))))
                try:
                    #Ys.append(np.squeeze(np.concatenate((data["y"], data["testy"]))))
                    if -1 in data["y"]:
                        print(np.squeeze(np.where(data["y"] != -1)[1]))
                        Ys.append(np.squeeze(data["y"][np.where(data["y"] != -1)]))
                    else:
                        Ys.append(np.squeeze(data["y"]))
                except KeyError:
                    if fom:
                        print("[!] Exiting: Could not load labels from %s" % dataFile)
                        print("[*] FoM calculation is not possible without labels.")
                        exit(0)
                    else:
                        Ys.append(np.zeros((np.shape(X)[0],)))
                Files.append(data["images"])
            except KeyError:
                try:
                    Files.append(data["train_files"])
                except KeyError as e:
                    print(e)
                    try:
                        Files.append(data["files"])
                    except KeyError as e:
                        print(e)
                        print("[!] Exiting: Could not load training set from %s" % dataFile)
                        exit(0)
        elif dataSet == "all":
            try:
                Xs.append(np.nan_to_num(np.concatenate((data["X"], data["testX"]))))
                try:
                    Ys.append(np.squeeze(np.concatenate((data["y"], data["testy"]))))
                except KeyError:
                    if fom:
                        print("[!] Exiting: Could not load labels from %s" % dataFile)
                        print("[*] FoM calculation is not possible without labels.")
                        exit(0)
                    else:
                        Ys.append(np.zeros((np.shape(Xs[0])[0],)))
                Files.append(np.squeeze(np.concatenate((data["images"], data["test_files"]))))
            except KeyError:
                try:
                    Files.append(np.squeeze(np.concatenate((data["train_files"], data["test_files"]))))
                except KeyError as e:
                    print(e)
                    try:
                        Files.append(np.squeeze(np.concatenate((data["files"], data["test_files"]))))
                    except KeyError as e:
                        print(e)
                        print("[!] Exiting: Could not load training set from %s" % dataFile)
                        exit(0)
        else:
            print("[!] Exiting: %s is not a valid choice, choose one of \"training\" or \"test\"" % dataSet)
            exit(0)


    preds = []
    for classifierFile in classifierFiles:
        dataFile = dataFiles[classifierFiles.index(classifierFile)]
        try:
            predFile = predictionsPath+classifierFile.split("/")[-1].replace(".pkl","")+"_predictions_%s_%s.mat"%(dataFile.split("/")[-1].replace(".mat",""), dataSet)
            preds.append(np.squeeze(sio.loadmat(predFile)["predictions"]))
        except IOError:
            if poolFile != None:
                Xs = []
                try:
                    features = sio.loadmat(poolFile)
                    try:
                        pooledFeaturesTrain = features["pooledFeaturesTrain"]
                    except KeyError:
                        pooledFeaturesTrain = features["pooledFeatures"]

                    X = np.transpose(pooledFeaturesTrain, (0,2,3,1))
                    numTrainImages = np.shape(X)[3]
                    X = np.reshape(X, (int((pooledFeaturesTrain.size)/float(numTrainImages)), \
                                   numTrainImages), order="F")

                    scaler = preprocessing.MinMaxScaler()
                    scaler.fit(X.T)  # Don't cheat - fit only on training data
                    X = scaler.transform(X.T)
                    if dataSet == "training":
                        pass
                    elif dataSet == "test":
                        pooledFeaturesTest = features["pooledFeaturesTest"]

                        X = np.transpose(pooledFeaturesTest, (0,2,3,1))
                        numTestImages = np.shape(X)[3]
                        X = np.reshape(X, (int((pooledFeaturesTest.size)/float(numTestImages)), \
                                       numTestImages), order="F")

                        X = scaler.transform(X.T)
                    Xs.append(X)
                except IOError:
                    print("[!] Exiting: %s Not Found" % (poolFile))
                    exit(0)
                finally:
                    features = None
                    pooledFeaturesTrain = None
                    pooledFeaturesTest = None

            X = Xs[classifierFiles.index(classifierFile)]
            if learningCurve:
                y = Ys[classifierFiles.index(classifierFile)]
                generate_Learning_Curve(X, y, classifierFile)
            else:
                pred = predict(classifierFile, X)
                #predFile = predictionsPath+classifierFile.split("/")[-1].replace(".mat","")+"_predictions_%s.mat"%dataSet
                #sio.savemat(predFile,{"ids":Files[classifierFiles.index(classifierFile)],"predictions":pred})
                preds.append(np.squeeze(pred))
    #X = Xs = None

    if outputFile != None and not byName:
        output = open(outputFile, "w")
        files = Files[0]
        pred = preds[0]
        y = Ys[0]
        for i,prediction in enumerate(pred):
            output.write(files[i].rstrip() + "," + str(prediction) + "," + str(y[i]) + "\n")
        output.close()

    if byName:
        files = Files[0]
        pred = preds[0]
        print(pred)
        print(files)
        print(outputFile)
        preds = [predict_byName(pred, files, outputFile)]
        try:
            Ys = [labels_byName(files, Ys[0])]
        except NameError as e:
            print(e)

    if plot:
        try:
            for pred in preds:
                hypothesisDist(Ys[preds.index(pred)], pred, threshold)
        except NameError as e:
            print("[!] NameError : %s", e)

    if roc:
        plot_ROC(Ys, preds, fom_func, Labels=labels)
        #test_FDR_procedure(Ys[0], preds[0])

    clf = pickle.load(open(classifierFiles[0],"rb"))
    if type(clf) == type(RandomForestClassifier()):
        try:
            feature_names = []
            for f in sio.loadmat(dataFiles[0])["features"]:
                feature_names.append(str(f))
            feature_importance(Xs[0], clf, feature_names)
        except KeyError:
            feature_importance(Xs[0], clf, list(range(Xs[0].shape[1])))

    if miss:
        print_misclassified(Ys[0], preds[0], np.squeeze(Files[0]), fom_func, threshold)
Example #11
0
import sys, optparse
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, f1_score
from sklearn import preprocessing
from tests_marco.tools import mlutils
from tests_marco.config import config
import pickle

# config
cfg = config.Config()
data_path = cfg.paths['data']

predictionsPath = data_path + "predictions/"

def one_percent_mdr(y, pred, fom):
    fpr, tpr, thresholds = roc_curve(y, pred)
    FoM = fpr[np.where(1-tpr<=fom)[0][0]] # FPR at 1% MDR
    threshold = thresholds[np.where(1-tpr<=fom)[0][0]]
    return FoM, threshold, fpr, tpr

def one_percent_fpr(y, pred, fom):
    fpr, tpr, thresholds = roc_curve(y, pred)
    FoM = 1-tpr[np.where(fpr<=fom)[0][-1]] # MDR at 1% FPR
    threshold = thresholds[np.where(fpr<=fom)[0][-1]]
    return FoM, threshold, fpr, tpr

def predict(clfFile, X):
def main():
    """
       add -v argument to visualise learned features
    """
    parser = optparse.OptionParser("[!] usage: python convolutional_sparseFiltering.py\n"+\
                                   "\t -F <data file>\n"+\
                                   "\t -P <patches file>\n"+\
                                   "\t -d <image dimension>\n"+\
                                   "\t -c <number of image channels>\n"+\
                                   "\t -p <patch dimension>\n"+\
                                   "\t -f <number of features to learn>\n"
                                   "\t -r <receptive field dimension>\n"+\
                                   "\t -s <step size>\n"+\
                                   "\t -C <regularisation parameter>\n"+\
                                   "\t -V <cross validate>\n"+\
                                   "\t -n <maximum number of patches to use>\n"+\
                                   "\t -m <maximum number of iterations [default=100]>\n"+\
                                   "\t -g <stochastic gradient decent>")

    parser.add_option("-F", dest="dataFile", type="string", \
                      help="specify data file to analyse")
    parser.add_option("-P", dest="patchesFile", type="string", \
                      help="specify patches file")
    parser.add_option("-d", dest="imageDim", type="int", \
                      help="specify dimension of images in data file")
    parser.add_option("-c", dest="imageChannels", type="int", \
                      help="specify number of channels for images in data file")
    parser.add_option("-p", dest="patchDim", type="int", \
                      help="specify dimension of patches in patches file")
    parser.add_option("-f", dest="numFeatures", type="int", \
                      help="specify number of features for sparse filtering")
    parser.add_option("-r", dest="poolDim", type="int", \
                      help="specify dimension of (pooling dimesion)")
    parser.add_option("-s", dest="stepSize", type="int", \
                      help="specify step size for convolution and pooling")
    parser.add_option("-C", dest="C", type="float", \
                      help="specify the regularisation parameter for linear SVM")
    parser.add_option("-V", action="store_true", dest="cv", \
                      help="specify whether to cross validate [default=False]")
    parser.add_option("-n", dest="numPatches", type="int", \
                      help="specify the maximum number of patches to use [optional]")
    parser.add_option("-m", dest="maxiter", type="int", \
                          help="specify the maximum number iterations [default=100]")
    parser.add_option("-g", action="store_true", dest="sgd", \
                      help="specify whether to use stochastic gradient decent [default=False]")

    (options, args) = parser.parse_args()

    dataFile = options.dataFile
    patchesFile = options.patchesFile
    imageDim = options.imageDim
    imageChannels = options.imageChannels
    patchDim = options.patchDim
    numFeatures = options.numFeatures
    poolDim = options.poolDim
    stepSize = options.stepSize
    C = options.C
    cv = options.cv
    numPatches = options.numPatches
    maxiter = options.maxiter
    sgd = options.sgd

    ## TODO: Test by defining arguments
    if False:
        cfg = config.Config()
        data_path = cfg.paths['data']

        dataFile = data_path + "3pi_20x20_skew2_signPreserveNorm.mat"
        patchesFile = data_path + "patches_stl-10_unlabeled_meansub_20150409_psdb_6x6.mat"
        imageDim = 20
        imageChannels = 1
        patchDim = 6
        numFeatures = 20
        poolDim = 5
        stepSize = 20
        C = 1
        cv = 3


    required_arguments = [dataFile, patchesFile, imageDim, imageChannels, \
                 patchDim, numFeatures, poolDim]

    if None in required_arguments:
        print(parser.usage)
        #exit(0)

    try:
        assert (numFeatures % stepSize) == 0
    except AssertionError:
        print(
            "[!] Exiting: step size must be a multiple of the number of features."
        )
        #exit(0)

    try:
        data = sio.loadmat(patchesFile)
        patches = data["patches"].T[:, :numPatches]
        ### Added scaling 06/01/15 ###
        #n,m = np.shape(patches)
        #means = np.mean(patches, axis=0)
        #means = np.tile(means, (n,1))
        #print np.shape(means)
        #patches = patches - means
        #data = means = None
    except IOError:
        print("[!] Exiting: could not open patches file - %s" % patchesFile)
        #exit(0)

    if maxiter == None:
        maxiter = 100
    SF = get_sparseFilter(numFeatures, patches, patchesFile, maxiter=maxiter)
    W = np.reshape(SF.trainedW, (SF.k, SF.n), order="F")
    SF = None
    patches = None
    # added maxiter to filename  24/02/15
    featuresFile = data_path + "features/SF_maxiter%d_L1_%s_%dx%d_k%d_%s_pooled%d.mat" % \
    (maxiter, dataFile.split("/")[-1].split(".")[0], patchDim, patchDim, numFeatures, \
    patchesFile.split("/")[-1].split(".")[0], poolDim)
    try:
        features = sio.loadmat(featuresFile)
        pooledFeaturesTrain = features["pooledFeaturesTrain"]
        pooledFeaturesTest = features["pooledFeaturesTest"]
        print("[*] convolved and pooled features loaded")
    except IOError:
        print("[*] no convloved and pooled features found for %s" %
              dataFile.split("/")[-1])
        print("[+] convolving and pooling...")
        convolve_and_pool(dataFile, featuresFile, W, imageDim, patchDim, poolDim, \
                          numFeatures, stepSize)
        features = sio.loadmat(featuresFile)
        pooledFeaturesTrain = features["pooledFeaturesTrain"]
        pooledFeaturesTest = features["pooledFeaturesTest"]
        print("[+] Done.")

    if cv == None:
        cv = False

    if sgd == None:
        sgd = False

    if C != None and cv == False:
        trainImages, trainLabels, numTrainImages,\
        testImages, testLabels, numTestImages = load_data(dataFile, imageDim)

        trainImages = None
        testImages = None

        X = np.transpose(pooledFeaturesTrain, (0, 2, 3, 1))
        X = np.reshape(X, (int((pooledFeaturesTrain.size)/float(numTrainImages)), \
                       numTrainImages), order="F")
        # MinMax scaling removed 11-03-2015
        scaler = preprocessing.MinMaxScaler()
        scaler.fit(X.T)  # Don't cheat - fit only on training data
        X = scaler.transform(X.T)
        #X = X.T
        Y = np.squeeze(trainLabels)
        print(Y)

        testX = np.transpose(pooledFeaturesTest, (0, 2, 3, 1))
        testX = np.reshape(testX, (int((pooledFeaturesTest.size)/float(numTestImages)), \
                            numTestImages), order="F")
        # MinMax scaling removed 11-03-2015
        testX = scaler.transform(testX.T)
        testY = np.squeeze(testLabels)
        print(testY)

        #train_linearSVM(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \
        #                sgd, save=True, prefix="")

        #train_SoftMaxOnline(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \
        #                    sgd, save=True, prefix="")

        train_Softmax(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \
                      sgd, save=True, prefix="")

    elif cv == True:
        trainImages, trainLabels, numTrainImages,\
        testImages, testLabels, numTestImages = load_data(dataFile, imageDim)

        trainImages = None
        testImages = None

        X = np.transpose(pooledFeaturesTrain, (0, 2, 3, 1))
        X = np.reshape(X, (int((pooledFeaturesTrain.size)/float(numTrainImages)), \
                           numTrainImages), order="F")
        # MinMax scaling removed 11-03-2015
        scaler = preprocessing.MinMaxScaler()
        scaler.fit(X.T)  # Don't cheat - fit only on training data
        X = scaler.transform(X.T)
        Y = np.squeeze(trainLabels)

        #C = cross_validate_linearSVM(dataFile, X, Y, featuresFile, imageDim, sgd)
        #C = cross_validate_SoftMaxOnline(dataFile, X, Y, featuresFile, imageDim, sgd)
        C = cross_validate_Softmax(dataFile, X, Y, featuresFile, imageDim, sgd)

        testX = np.transpose(pooledFeaturesTest, (0, 2, 3, 1))
        testX = np.reshape(testX, (int((pooledFeaturesTest.size)/float(numTestImages)), \
                           numTestImages), order="F")
        # MinMax scaling removed 11-03-2015
        testX = scaler.transform(testX.T)
        testY = np.squeeze(testLabels)

        #train_linearSVM(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \
        #                sgd, save=True, prefix="")
        #train_SoftmaxOnline(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \
        #                    sgd, save=True, prefix="")
        train_Softmax(C, dataFile, X, Y, testX, testY, featuresFile, imageDim, \
                      sgd, save=True, prefix="")