def run(featureName): feature = Feature(featureName, "chbmitSolution/chbmitsettings.yml") X_train, Y_train = feature.loadFromDisk("mitpsd", "train") X_train, Y_train = feature.overlapInEachHour() X_train, _ = feature.scaleAcrossTime(X_train) channels = X_train.shape[1] bins = X_train.shape[2] steps = X_train.shape[3] X_train = X_train.reshape( X_train.shape[0], X_train.shape[1] * X_train.shape[2] * X_train.shape[3]) X = X_train y = Y_train zeros = numpy.where(y == 0) zeros = len(zeros[0]) ones = numpy.where(y == 1) ones = len(ones[0]) aucList = [] ssList = [] spList = [] acList = [] for j in xrange(100): cv = StratifiedKFold(y, n_folds=3, shuffle=True) clf = svm.SVC(probability=True, class_weight={1: 3}) for i, (train, test) in enumerate(cv): clf.fit(X[train], y[train]) prob = clf.predict(X[test]) matrix = confusion_matrix(y[test], prob) TP = matrix[0][0] FN = matrix[0][1] FP = matrix[1][0] TN = matrix[1][1] AC = (TP + TN) / float(TP + FP + TN + FN) acList.append(AC) print "Accuracy calculated by matrix:" + str(AC) SS = TP / float(TP + FN) ssList.append(SS) print "Sensitivity calcuated by matrix:" + str(SS) SP = TN / float(TN + FP) spList.append(SP) print "Specificity calcuated by matrix:" + str(SP) fpr, tpr, thresholds = roc_curve(y[test], prob) roc_auc = auc(fpr, tpr) aucList.append(roc_auc) print "AUC:" + str(roc_auc) print "featureName:" print "mean auc: " + str(numpy.mean(aucList)) print "mean SS:" + str(numpy.mean(ssList)) print "mean SP:" + str(numpy.mean(spList)) print "mean AC:" + str(numpy.mean(acList))
def run(setting): feature = Feature(setting.name, "kaggleSolution/kaggleSettings.yml") X_train, Y_train = feature.loadFromDisk("fft","train") X_train, Y_train = feature.overlapInEachHour(shuffle = True) X_train, _ = feature.scaleAcrossTime(X_train) X_test, Y_test = feature.loadFromDisk("fft","test") X_test, _ = feature.scaleAcrossTime(X_test) channels = X_train.shape[1] bins = X_train.shape[2] steps = X_train.shape[3] X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] * X_train.shape[2] * X_train.shape[3]) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1] * X_test.shape[2] * X_test.shape[3]) X = X_train y = Y_train zeros = numpy.where(y == 0) zeros = len(zeros[0]) ones = numpy.where(y == 1) ones = len(ones[0]) cv = StratifiedKFold(y, n_folds = 3, shuffle=True) clf = svm.SVC(probability = True, class_weight={1: zeros / ones}) clf.fit(X,y) result = clf.predict_proba(X_test) output = result[:,1] ans = zip(Y_test,output) dataFrame = DataFrame(data=ans, columns=["clip", "preictal"]) dataFrame.to_csv(setting.savePath + setting.name + ".csv", index=False, header = True)
output = Dense(2, activation="softmax", name="output")(merged) model = Model(input = [input1, input2], output = [output]) sgd = SGD(lr = 0.01) model.compile(loss="binary_crossentropy", optimizer = sgd, metrics=['accuracy']) #callback = ModelCheckpoint(filepath = "my_model_weights.h5", save_best_only = True) #history = model.fit({'input1':X_train, 'input2':X_pca_train}, {'output':Y_train}, callbacks = [callback], validation_data = ({"input1":X_train_chb01, "input2":X_pca_train_chb01}, {"output":Y_train_chb01}), nb_epoch=nb_epoch, verbose = 1, batch_size = batch_size, class_weight = [1, 1]) history = model.fit({'input1':X_train, 'input2':X_pca_train}, {'output':Y_train}, validation_split = 0.3, batch_size = batch_size, class_weight = [1, 1], nb_epoch = nb_epoch) model.save_weights("my_model_weights.h5") nameList = ["chb00"] setting = Setting("chbmitSolution/transfersettings.yml") for name in nameList: setting = setting.loadSettings("chb01") feature = Feature(setting.name, "chbmitSolution/transfersettings.yml") X_train_chb01, Y_train_chb01 = feature.loadFromDisk("mitfft","train") X_train_chb01, Y_train_chb01 = feature.overlapInEachHour() X_train_chb01, _ = feature.scaleAcrossTime(X_train_chb01) X_pca_train_chb01, Y_pca_train_chb01 = feature.loadFromDisk("mitpca","train") X_pca_train_chb01[numpy.isneginf(X_pca_train_chb01)] = 0 X_pca_train_chb01, Y_pca_train_chb01 = feature.overlapInEachHour() X_pca_train_chb01, _ = feature.scaleAcrossTime(X_pca_train_chb01) setting = setting.loadSettings(name = name) feature = Feature(setting.name, "chbmitSolution/transfersettings.yml") X_train, Y_train = feature.loadFromDisk("mitfft","train") X_train, Y_train = feature.overlapInEachHour() X_train, _ = feature.scaleAcrossTime(X_train) X_pca_train, Y_pca_train = feature.loadFromDisk("mitpca","train") X_pca_train[numpy.isneginf(X_pca_train)] = 0
def run(setting): nb_filters = setting.nb_filter batch_size = setting.batch_size nb_epoch = setting.nb_epoch featureName = setting.name feature = Feature(featureName, "kaggleSolution/kaggleSettings.yml") X_train, Y_train = feature.loadFromDisk("PCA", "train") X_train, Y_train = feature.overlapInEachHour(shuffle=True) X_train, _ = feature.scaleAcrossTime(X_train) X_test, Y_test = feature.loadFromDisk("PCA", "test") X_test, _ = feature.scaleAcrossTime(X_test) channels = X_train.shape[1] bins = X_train.shape[2] steps = X_train.shape[3] X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1] * X_train.shape[2], X_train.shape[3]) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1] * X_test.shape[2], X_test.shape[3]) Y_train = np_utils.to_categorical(Y_train, 2) model = Sequential() seq1 = noise.GaussianNoise(setting.noise, input_shape=(1, channels * bins, steps)) seq2 = Convolution2D( nb_filters, channels * bins, 1, #init="uniform", W_regularizer=l2(l=setting.l2), input_shape=(1, channels * bins, steps), activation="relu") seq3 = Dropout(setting.dropout) seq4 = Convolution2D( nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu") seq5 = Dropout(setting.dropout) seq6 = Convolution2D( nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu") seq7 = Dropout(setting.dropout) seq8 = Convolution2D( nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu") seq9 = Dropout(setting.dropout) seq10 = Convolution2D( nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu") seq11 = Flatten() seq12 = Dense(setting.output1, activation="tanh") seq13 = Dense(512, activation="tanh") seq14 = Dense(256, activation="tanh") seq15 = Dense(128, activation="tanh") seq16 = Dense(2, activation="softmax", name="output") model.add(seq1) model.add(seq2) model.add(seq3) model.add(seq4) model.add(seq5) model.add(seq6) model.add(seq7) model.add(seq8) model.add(seq9) model.add(seq10) model.add(seq11) model.add(seq12) model.add(seq13) model.add(seq14) model.add(seq15) model.add(seq16) plot(model, to_file=featureName + ".png", show_shapes=True) sgd = SGD(lr=0.01) model.compile(loss='binary_crossentropy', optimizer=sgd) history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, verbose=1, batch_size=batch_size) predictions = model.predict(X_test) output = predictions[:, 1] output = output.tolist() ans = zip(Y_test, output) dataFrame = DataFrame(data=ans, columns=["clip", "preictal"]) dataFrame.to_csv(setting.savePath + featureName + ".csv", index=False, header=True)
import numpy as np from matplotlib import pyplot as plt from preprocessing.Processor import * from preprocessing.Feature import * from tsne import bh_sne setting = Setting(path = "kaggleSolution/kaggleSettings.yml") setting.loadSettings(name="Dog_5") feature = Feature(setting.name, "kaggleSolution/kaggleSettings.yml") x_data, y_data = feature.loadFromDisk("PCA", "train") x_data = np.asanyarray(x_data).astype("float64") print x_data.shape print y_data.shape x_data = x_data.reshape((x_data.shape[0], -1)) print x_data.shape #n = 200 #x_data = x_data[:n] #y_data = y_data[:n] #print x_data.shape #print y_data.shape vis_data = bh_sne(x_data,perplexity=15) vis_x = vis_data[:,0] vis_y = vis_data[:,1] cm = plt.cm.get_cmap("cool") plt.scatter(vis_x, vis_y, c=y_data, cmap=cm) plt.colorbar(ticks=range(2)) plt.show()
def run(setting): nb_filters = setting.nb_filter batch_size = setting.batch_size nb_epoch = setting.nb_epoch featureName = setting.name feature = Feature(featureName, "kaggleSolution/kaggleSettings.yml") X_train, Y_train = feature.loadFromDisk("pca", "train") X_train, Y_train = feature.overlapInEachHour() X_train, _ = feature.scaleAcrossTime(X_train) X_test, Y_test = feature.loadFromDisk("pca", "test") X_test, _ = feature.scaleAcrossTime(X_test) channels = X_train.shape[1] bins = X_train.shape[2] steps = X_train.shape[3] X_pca_train, Y_pca_train = feature.loadFromDisk("fft", "train") X_pca_train, Y_pca_train = feature.overlapInEachHour() X_pca_train, _ = feature.scaleAcrossTime(X_pca_train) X_pca_test, Y_pca_test = feature.loadFromDisk("fft", "test") X_pca_test, _ = feature.scaleAcrossTime(X_pca_test) X_train, X_pca_train, Y_train = feature.shuffle(X_train, X_pca_train, Y_train) print X_train.shape X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1] * X_train.shape[2], X_train.shape[3]) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1] * X_test.shape[2], X_test.shape[3]) X_pca_train = X_pca_train.reshape(X_pca_train.shape[0], 1, X_pca_train.shape[1] * X_pca_train.shape[2], X_pca_train.shape[3]) X_pca_test = X_pca_test.reshape(X_pca_test.shape[0], 1, X_pca_test.shape[1] * X_pca_test.shape[2], X_pca_test.shape[3]) Y_train = np_utils.to_categorical(Y_train, 2) input1 = Input(shape = (1, channels * bins, steps), name="input1") seq1 = Convolution2D(nb_filters, channels * bins, 1, #init="uniform", W_regularizer=l2(l=setting.l2), input_shape=(1, channels * bins, steps), activation="relu" )(input1) seq1 = Dropout(setting.dropout)(seq1) seq1 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq1) seq1 = Dropout(setting.dropout)(seq1) seq1 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq1) seq1 = Dropout(setting.dropout)(seq1) seq1 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq1) seq1 = Dropout(setting.dropout)(seq1) seq1 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq1) seq1 = Flatten()(seq1) output1 = Dense(setting.output1, activation="tanh")(seq1) input2 = Input(shape=(1, channels * 9, steps), name="input2") seq2 = Convolution2D(nb_filters, channels * 9, 1, #init="uniform", W_regularizer=l2(l=setting.l2), input_shape=(1, channels * 9, steps), activation="relu" )(input2) seq2 = Dropout(setting.dropout)(seq2) seq2 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq2) seq2 = Dropout(setting.dropout)(seq2) seq2 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq2) seq2 = Dropout(setting.dropout)(seq2) seq2 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq2) seq2 = Dropout(setting.dropout)(seq2) seq2 = Convolution2D(nb_filters, 1, 3, #init="uniform", W_regularizer=l2(l=setting.l2), activation="relu" )(seq2) seq2 = Flatten()(seq2) output2 = Dense(setting.output2, activation="tanh")(seq2) merged = merge([output1, output2], mode="concat") merged = Dense(512, activation="tanh")(merged) merged = Dense(256, activation="tanh")(merged) if str(setting.name) != "Dog_5": merged = Dense(128, activation="tanh")(merged) output = Dense(2, activation="softmax", name="output")(merged) model = Model(input = [input1, input2], output=[output]) sgd = SGD(lr = setting.lr) model.compile(loss='binary_crossentropy', optimizer = "sgd") model.load_weights("kaggleSolution/weights/" + str(setting.name) + ".h5") #history = model.fit({'input1':X_train, 'input2':X_pca_train}, {'output':Y_train}, nb_epoch= nb_epoch, verbose = 1, batch_size = batch_size) #model.save_weights("kaggleSolution/weights/" + str(setting.name) + ".h5") plot(model, to_file="kaggleSolution/visualization/"+ str(setting.name) + ".png", show_shapes = True) predictions = model.predict({'input1':X_test, 'input2':X_pca_test}) output = predictions[:,1] output = output.tolist() ans = zip(Y_test,output) dataFrame = DataFrame(data=ans, columns=["clip", "preictal"]) dataFrame.to_csv(setting.savePath + featureName + ".csv", index=False, header = True)