def do_train(): X, Y, Xt, Yt = TrainFiles.from_csv(csv_file) sl = SKSupervisedLearning(SVC, X, Y, Xt, Yt) sl.fit_standard_scaler() #pca = PCA(250) #pca.fit(np.r_[sl.X_train_scaled, sl.X_test_scaled]) #X_pca = pca.transform(sl.X_train_scaled) #X_pca_test = pca.transform(sl.X_test_scaled) ##construct a dataset for RBM #X_rbm = X[:, 257:] #Xt_rbm = X[:, 257:] #rng = np.random.RandomState(123) #rbm = RBM(X_rbm, n_visible=X_rbm.shape[1], n_hidden=X_rbm.shape[1]/4, numpy_rng=rng) #pretrain_lr = 0.1 #k = 2 #pretraining_epochs = 200 #for epoch in xrange(pretraining_epochs): # rbm.contrastive_divergence(lr=pretrain_lr, k=k) # cost = rbm.get_reconstruction_cross_entropy() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost trndata, tstdata = createDataSets(X, Y, X_test, Yt) fnn = train(trndata, tstdata, epochs=1000, test_error=0.025, momentum=0.15, weight_decay=0.0001)
def do_train(): X, Y, Xt, Yt = TrainFiles.from_csv(csv_file) sl = SKSupervisedLearning(SVC, X, Y, Xt, Yt) sl.fit_standard_scaler() #pca = PCA(250) #pca.fit(np.r_[sl.X_train_scaled, sl.X_test_scaled]) #X_pca = pca.transform(sl.X_train_scaled) #X_pca_test = pca.transform(sl.X_test_scaled) ##construct a dataset for RBM #X_rbm = X[:, 257:] #Xt_rbm = X[:, 257:] #rng = np.random.RandomState(123) #rbm = RBM(X_rbm, n_visible=X_rbm.shape[1], n_hidden=X_rbm.shape[1]/4, numpy_rng=rng) #pretrain_lr = 0.1 #k = 2 #pretraining_epochs = 200 #for epoch in xrange(pretraining_epochs): # rbm.contrastive_divergence(lr=pretrain_lr, k=k) # cost = rbm.get_reconstruction_cross_entropy() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost trndata, tstdata = createDataSets(X, Y, X_test, Yt) fnn = train(trndata, tstdata, epochs = 1000, test_error = 0.025, momentum = 0.15, weight_decay = 0.0001)
from SupervisedLearning import SKSupervisedLearning from train_files import TrainFiles from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.metrics import log_loss, confusion_matrix from sklearn.calibration import CalibratedClassifierCV from tr_utils import vote import matplotlib.pylab as plt from train_nn import createDataSets, train train_path_mix = "/kaggle/malware/mix_lbp.csv" labels_file = "/kaggle/malware/trainLabels.csv" X, Y_train, Xt, Y_test = TrainFiles.from_csv(train_path_mix) def plot_confusion(sl): conf_mat = confusion_matrix(sl.Y_test, sl.clf.predict( sl.X_test_scaled)).astype(dtype='float') norm_conf_mat = conf_mat / conf_mat.sum(axis=1)[:, None] fig = plt.figure() plt.clf() ax = fig.add_subplot(111) ax.set_aspect(1) res = ax.imshow(norm_conf_mat, cmap=plt.cm.jet, interpolation='nearest') cb = fig.colorbar(res) labs = np.unique(Y_test) x = labs - 1 plt.xticks(x, labs)
from SupervisedLearning import SKSupervisedLearning from train_files import TrainFiles from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.metrics import log_loss, confusion_matrix from sklearn.calibration import CalibratedClassifierCV from tr_utils import vote import matplotlib.pylab as plt from train_nn import createDataSets, train train_path_mix = "/kaggle/malware/mix_lbp.csv" labels_file = "/kaggle/malware/trainLabels.csv" X, Y_train, Xt, Y_test = TrainFiles.from_csv(train_path_mix) def plot_confusion(sl): conf_mat = confusion_matrix(sl.Y_test, sl.clf.predict(sl.X_test_scaled)).astype(dtype='float') norm_conf_mat = conf_mat / conf_mat.sum(axis = 1)[:, None] fig = plt.figure() plt.clf() ax = fig.add_subplot(111) ax.set_aspect(1) res = ax.imshow(norm_conf_mat, cmap=plt.cm.jet, interpolation='nearest') cb = fig.colorbar(res) labs = np.unique(Y_test) x = labs - 1 plt.xticks(x, labs) plt.yticks(x, labs)