from seq_reader import load_data from ohv import get_rep_mats, conv_labels import numpy as np from sklearn.model_selection import StratifiedKFold from keras.datasets import mnist from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D from keras.models import Sequential from keras.utils import np_utils seed = 123 np.random.seed(seed) X, y = load_data("dataset/data.txt") X = get_rep_mats(X) for i in X: for idx, j in enumerate(i): i[idx] = j[0] y = conv_labels(y) X = np.asarray(X) Y = np.asarray(y) kfs = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) scores = [] for train, test in kfs.split(X, Y): print("----> FOLD [" + str(len(scores) + 1) + "]") Xtr = X[train].reshape(X[train].shape[0], 1, 58, 64) Xts = X[test].reshape(X[test].shape[0], 1, 58, 64) Xtr = Xtr.astype('float32') Xts = Xts.astype('float32')
np.random.seed(123) # for reproducibility # In[5]: from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.utils import np_utils from keras.datasets import mnist ############################################# # In[6]: # 1. Load data into train and test sets X, y = load_data("../data/promoters.data.txt") # sequences, labels X = get_rep_mats(X) # convert to array of representation matrices ############ # In[ ]: for i in X: for idx, j in enumerate(i): i[idx] = j[0] ############ # In[ ]: y = conv_labels(y, "promoter") # convert to integer labels X = np.asarray(X) # work with np arrays y = np.asarray(y)
from sklearn.model_selection import StratifiedKFold # cross validation import numpy as np from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.utils import np_utils from keras.datasets import mnist ############################################# seed = 123 # for reproducibility np.random.seed(seed) # 1. Load data into train and test sets X, y = load_data("../data/splice.data.txt") # sequences, labels X = get_rep_mats(X) # convert to array of representation matrices for i in X: # CUSTOM reshape for idx, j in enumerate(i): i[idx] = j[0] y = conv_labels(y) # convert to integer labels X = np.asarray(X) # work with np arrays Y = np.asarray(y) # define 10-fold cross validation test harness kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) scores = [] for train, test in kfold.split(X, Y): print "====> FOLD [" + str(len(scores) + 1) + "]"