def run(featureName):
    feature = Feature(featureName, "chbmitSolution/chbmitsettings.yml")
    X_train, Y_train = feature.loadFromDisk("mitpsd", "train")
    X_train, Y_train = feature.overlapInEachHour()
    X_train, _ = feature.scaleAcrossTime(X_train)

    channels = X_train.shape[1]
    bins = X_train.shape[2]
    steps = X_train.shape[3]
    X_train = X_train.reshape(
        X_train.shape[0],
        X_train.shape[1] * X_train.shape[2] * X_train.shape[3])
    X = X_train
    y = Y_train
    zeros = numpy.where(y == 0)
    zeros = len(zeros[0])
    ones = numpy.where(y == 1)
    ones = len(ones[0])

    aucList = []
    ssList = []
    spList = []
    acList = []

    for j in xrange(100):
        cv = StratifiedKFold(y, n_folds=3, shuffle=True)
        clf = svm.SVC(probability=True, class_weight={1: 3})
        for i, (train, test) in enumerate(cv):
            clf.fit(X[train], y[train])
            prob = clf.predict(X[test])

            matrix = confusion_matrix(y[test], prob)

            TP = matrix[0][0]
            FN = matrix[0][1]
            FP = matrix[1][0]
            TN = matrix[1][1]
            AC = (TP + TN) / float(TP + FP + TN + FN)
            acList.append(AC)
            print "Accuracy calculated by matrix:" + str(AC)
            SS = TP / float(TP + FN)
            ssList.append(SS)
            print "Sensitivity calcuated by matrix:" + str(SS)
            SP = TN / float(TN + FP)
            spList.append(SP)
            print "Specificity calcuated by matrix:" + str(SP)
            fpr, tpr, thresholds = roc_curve(y[test], prob)
            roc_auc = auc(fpr, tpr)
            aucList.append(roc_auc)
            print "AUC:" + str(roc_auc)

    print "featureName:"
    print "mean auc: " + str(numpy.mean(aucList))
    print "mean SS:" + str(numpy.mean(ssList))
    print "mean SP:" + str(numpy.mean(spList))
    print "mean AC:" + str(numpy.mean(acList))
Ejemplo n.º 2
0
def run(setting):
    feature = Feature(setting.name, "kaggleSolution/kaggleSettings.yml")
    X_train, Y_train = feature.loadFromDisk("fft","train")
    X_train, Y_train = feature.overlapInEachHour(shuffle = True)
    X_train, _ = feature.scaleAcrossTime(X_train)
    X_test,  Y_test = feature.loadFromDisk("fft","test")
    X_test, _ = feature.scaleAcrossTime(X_test)

    channels = X_train.shape[1]
    bins = X_train.shape[2]
    steps = X_train.shape[3]
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2] * X_train.shape[3])
    X_test = X_test.reshape(X_test.shape[0],  X_test.shape[1] * X_test.shape[2] * X_test.shape[3])

    X = X_train
    y = Y_train

    zeros = numpy.where(y == 0)
    zeros = len(zeros[0])
    ones = numpy.where(y == 1)
    ones = len(ones[0])
    cv = StratifiedKFold(y, n_folds = 3, shuffle=True)
    clf = svm.SVC(probability = True, class_weight={1: zeros / ones})

    clf.fit(X,y)
    result = clf.predict_proba(X_test)

    output = result[:,1]
    ans = zip(Y_test,output)
    dataFrame = DataFrame(data=ans, columns=["clip", "preictal"])
    dataFrame.to_csv(setting.savePath + setting.name + ".csv", index=False, header = True)
    output = Dense(2, activation="softmax", name="output")(merged)
    model = Model(input = [input1, input2], output = [output])
    sgd = SGD(lr = 0.01)
    model.compile(loss="binary_crossentropy", optimizer = sgd, metrics=['accuracy'])

    #callback = ModelCheckpoint(filepath = "my_model_weights.h5", save_best_only = True)

    #history = model.fit({'input1':X_train, 'input2':X_pca_train}, {'output':Y_train}, callbacks = [callback], validation_data = ({"input1":X_train_chb01, "input2":X_pca_train_chb01}, {"output":Y_train_chb01}), nb_epoch=nb_epoch, verbose = 1, batch_size = batch_size, class_weight = [1, 1])
    history = model.fit({'input1':X_train, 'input2':X_pca_train}, {'output':Y_train},  validation_split = 0.3, batch_size = batch_size, class_weight = [1, 1], nb_epoch = nb_epoch)
    model.save_weights("my_model_weights.h5")

nameList = ["chb00"]
setting = Setting("chbmitSolution/transfersettings.yml")
for name in nameList:
    setting = setting.loadSettings("chb01")
    feature = Feature(setting.name, "chbmitSolution/transfersettings.yml")
    X_train_chb01, Y_train_chb01 = feature.loadFromDisk("mitfft","train")
    X_train_chb01, Y_train_chb01 = feature.overlapInEachHour()
    X_train_chb01, _ = feature.scaleAcrossTime(X_train_chb01)
    X_pca_train_chb01, Y_pca_train_chb01 = feature.loadFromDisk("mitpca","train")
    X_pca_train_chb01[numpy.isneginf(X_pca_train_chb01)] = 0
    X_pca_train_chb01, Y_pca_train_chb01 = feature.overlapInEachHour()
    X_pca_train_chb01, _ = feature.scaleAcrossTime(X_pca_train_chb01)

    setting = setting.loadSettings(name = name)
    feature = Feature(setting.name, "chbmitSolution/transfersettings.yml")
    X_train, Y_train = feature.loadFromDisk("mitfft","train")
    X_train, Y_train = feature.overlapInEachHour()
    X_train, _ = feature.scaleAcrossTime(X_train)
    X_pca_train, Y_pca_train = feature.loadFromDisk("mitpca","train")
    X_pca_train[numpy.isneginf(X_pca_train)] = 0
def run(setting):
    nb_filters = setting.nb_filter
    batch_size = setting.batch_size
    nb_epoch = setting.nb_epoch
    featureName = setting.name
    feature = Feature(featureName, "kaggleSolution/kaggleSettings.yml")
    X_train, Y_train = feature.loadFromDisk("PCA", "train")
    X_train, Y_train = feature.overlapInEachHour(shuffle=True)
    X_train, _ = feature.scaleAcrossTime(X_train)
    X_test, Y_test = feature.loadFromDisk("PCA", "test")
    X_test, _ = feature.scaleAcrossTime(X_test)
    channels = X_train.shape[1]
    bins = X_train.shape[2]
    steps = X_train.shape[3]

    X_train = X_train.reshape(X_train.shape[0], 1,
                              X_train.shape[1] * X_train.shape[2],
                              X_train.shape[3])
    X_test = X_test.reshape(X_test.shape[0], 1,
                            X_test.shape[1] * X_test.shape[2], X_test.shape[3])
    Y_train = np_utils.to_categorical(Y_train, 2)

    model = Sequential()
    seq1 = noise.GaussianNoise(setting.noise,
                               input_shape=(1, channels * bins, steps))
    seq2 = Convolution2D(
        nb_filters,
        channels * bins,
        1,
        #init="uniform",
        W_regularizer=l2(l=setting.l2),
        input_shape=(1, channels * bins, steps),
        activation="relu")
    seq3 = Dropout(setting.dropout)
    seq4 = Convolution2D(
        nb_filters,
        1,
        3,
        #init="uniform",
        W_regularizer=l2(l=setting.l2),
        activation="relu")
    seq5 = Dropout(setting.dropout)
    seq6 = Convolution2D(
        nb_filters,
        1,
        3,
        #init="uniform",
        W_regularizer=l2(l=setting.l2),
        activation="relu")
    seq7 = Dropout(setting.dropout)
    seq8 = Convolution2D(
        nb_filters,
        1,
        3,
        #init="uniform",
        W_regularizer=l2(l=setting.l2),
        activation="relu")
    seq9 = Dropout(setting.dropout)
    seq10 = Convolution2D(
        nb_filters,
        1,
        3,
        #init="uniform",
        W_regularizer=l2(l=setting.l2),
        activation="relu")
    seq11 = Flatten()
    seq12 = Dense(setting.output1, activation="tanh")

    seq13 = Dense(512, activation="tanh")
    seq14 = Dense(256, activation="tanh")
    seq15 = Dense(128, activation="tanh")
    seq16 = Dense(2, activation="softmax", name="output")

    model.add(seq1)
    model.add(seq2)
    model.add(seq3)
    model.add(seq4)
    model.add(seq5)
    model.add(seq6)
    model.add(seq7)
    model.add(seq8)
    model.add(seq9)
    model.add(seq10)
    model.add(seq11)
    model.add(seq12)
    model.add(seq13)
    model.add(seq14)
    model.add(seq15)
    model.add(seq16)

    plot(model, to_file=featureName + ".png", show_shapes=True)
    sgd = SGD(lr=0.01)
    model.compile(loss='binary_crossentropy', optimizer=sgd)
    history = model.fit(X_train,
                        Y_train,
                        nb_epoch=nb_epoch,
                        verbose=1,
                        batch_size=batch_size)
    predictions = model.predict(X_test)
    output = predictions[:, 1]
    output = output.tolist()
    ans = zip(Y_test, output)
    dataFrame = DataFrame(data=ans, columns=["clip", "preictal"])
    dataFrame.to_csv(setting.savePath + featureName + ".csv",
                     index=False,
                     header=True)
import numpy as np
from matplotlib import pyplot as plt
from preprocessing.Processor import *
from preprocessing.Feature import *
from tsne import bh_sne

setting = Setting(path = "kaggleSolution/kaggleSettings.yml")
setting.loadSettings(name="Dog_5")
feature = Feature(setting.name, "kaggleSolution/kaggleSettings.yml")
x_data, y_data = feature.loadFromDisk("PCA", "train")

x_data = np.asanyarray(x_data).astype("float64")
print x_data.shape
print y_data.shape
x_data = x_data.reshape((x_data.shape[0], -1))
print x_data.shape

#n = 200
#x_data = x_data[:n]
#y_data = y_data[:n]
#print x_data.shape
#print y_data.shape

vis_data = bh_sne(x_data,perplexity=15)
vis_x = vis_data[:,0]
vis_y = vis_data[:,1]
cm = plt.cm.get_cmap("cool")
plt.scatter(vis_x, vis_y, c=y_data, cmap=cm)
plt.colorbar(ticks=range(2))
plt.show()
Ejemplo n.º 6
0
def run(setting):
    nb_filters = setting.nb_filter
    batch_size = setting.batch_size
    nb_epoch = setting.nb_epoch
    featureName = setting.name
    feature = Feature(featureName, "kaggleSolution/kaggleSettings.yml")
    X_train, Y_train = feature.loadFromDisk("pca", "train")
    X_train, Y_train = feature.overlapInEachHour()
    X_train, _ = feature.scaleAcrossTime(X_train)
    X_test,  Y_test = feature.loadFromDisk("pca", "test")
    X_test, _ = feature.scaleAcrossTime(X_test)
    channels = X_train.shape[1]
    bins = X_train.shape[2]
    steps = X_train.shape[3]
    X_pca_train, Y_pca_train = feature.loadFromDisk("fft", "train")
    X_pca_train, Y_pca_train = feature.overlapInEachHour()
    X_pca_train, _ = feature.scaleAcrossTime(X_pca_train)
    X_pca_test, Y_pca_test = feature.loadFromDisk("fft", "test")
    X_pca_test, _ = feature.scaleAcrossTime(X_pca_test)
    X_train, X_pca_train, Y_train = feature.shuffle(X_train, X_pca_train, Y_train)
    print X_train.shape

    X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1] * X_train.shape[2], X_train.shape[3])
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1] * X_test.shape[2], X_test.shape[3])
    X_pca_train = X_pca_train.reshape(X_pca_train.shape[0], 1, X_pca_train.shape[1] * X_pca_train.shape[2], X_pca_train.shape[3])
    X_pca_test = X_pca_test.reshape(X_pca_test.shape[0], 1, X_pca_test.shape[1] * X_pca_test.shape[2], X_pca_test.shape[3])
    Y_train = np_utils.to_categorical(Y_train, 2)

    input1 = Input(shape = (1, channels * bins, steps), name="input1")
    seq1 = Convolution2D(nb_filters, channels * bins, 1,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           input_shape=(1, channels * bins, steps),
                           activation="relu"
                           )(input1)
    seq1 = Dropout(setting.dropout)(seq1)
    seq1 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq1)
    seq1 = Dropout(setting.dropout)(seq1)
    seq1 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq1)
    seq1 = Dropout(setting.dropout)(seq1)
    seq1 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq1)
    seq1 = Dropout(setting.dropout)(seq1)
    seq1 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq1)
    seq1 = Flatten()(seq1)
    output1 = Dense(setting.output1, activation="tanh")(seq1)

    input2 = Input(shape=(1, channels * 9, steps), name="input2")
    seq2 = Convolution2D(nb_filters, channels * 9, 1,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           input_shape=(1, channels * 9, steps),
                           activation="relu"
                           )(input2)
    seq2 = Dropout(setting.dropout)(seq2)
    seq2 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq2)
    seq2 = Dropout(setting.dropout)(seq2)
    seq2 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq2)
    seq2 = Dropout(setting.dropout)(seq2)
    seq2 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq2)
    seq2 = Dropout(setting.dropout)(seq2)
    seq2 = Convolution2D(nb_filters, 1, 3,
                           #init="uniform",
                           W_regularizer=l2(l=setting.l2),
                           activation="relu"
                           )(seq2)
    seq2 = Flatten()(seq2)
    output2 = Dense(setting.output2, activation="tanh")(seq2)

    merged = merge([output1, output2], mode="concat")
    merged = Dense(512, activation="tanh")(merged)
    merged = Dense(256, activation="tanh")(merged)
    if str(setting.name) != "Dog_5":
        merged = Dense(128, activation="tanh")(merged)
    output = Dense(2, activation="softmax", name="output")(merged)
    model = Model(input = [input1, input2], output=[output])
    sgd = SGD(lr = setting.lr)
    model.compile(loss='binary_crossentropy', optimizer = "sgd")
    model.load_weights("kaggleSolution/weights/" + str(setting.name) + ".h5")
    #history = model.fit({'input1':X_train, 'input2':X_pca_train}, {'output':Y_train}, nb_epoch= nb_epoch, verbose = 1, batch_size = batch_size)
    #model.save_weights("kaggleSolution/weights/" + str(setting.name) + ".h5")

    plot(model, to_file="kaggleSolution/visualization/"+ str(setting.name) + ".png", show_shapes = True)
    predictions = model.predict({'input1':X_test, 'input2':X_pca_test})
    output = predictions[:,1]
    output = output.tolist()
    ans = zip(Y_test,output)
    dataFrame = DataFrame(data=ans, columns=["clip", "preictal"])
    dataFrame.to_csv(setting.savePath + featureName + ".csv", index=False, header = True)