def init_nnet(d0,h1,d1,h2,d2,h3,d3,e,l,runtype): layers0 = [ ('input', InputLayer), ('dropout0', DropoutLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer), ('hidden2', DenseLayer), ('dropout2', DropoutLayer), ('hidden3', DenseLayer), ('dropout3', DropoutLayer), ('output', DenseLayer) ] net0 = NeuralNet( layers=layers0, input_shape=(None, num_features), dropout0_p=d0, hidden1_num_units=h1, hidden1_nonlinearity=tanh, dropout1_p=d1, hidden2_num_units=h2, hidden2_nonlinearity=sigmoid, dropout2_p=d2, hidden3_num_units=h3, hidden3_nonlinearity=sigmoid, dropout3_p=d3, output_num_units=3, output_nonlinearity=softmax, update=adagrad, update_learning_rate=theano.shared(float32(l)), #on_epoch_finished=on_epoch_finished, #update_momentum=0.9, train_split=TrainSplit(eval_size=0.2), max_epochs=e, verbose=2) return net0
def make_memnn(vocab_size, cont_sl, cont_wl, quest_wl, answ_wl, rnn_size, rnn_type='LSTM', pool_size=4, answ_n=4, dence_l=[100], dropout=0.5, batch_size=16, emb_size=50, grad_clip=40, init_std=0.1, num_hops=3, rnn_style=False, nonlin=LN.softmax, init_W=None, rng=None, art_pool=4, lr=0.01, mom=0, updates=LU.adagrad, valid_indices=0.2, permute_answ=False, permute_cont=False): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #-----------------------------------------------------------------------weights tr_variables = {} tr_variables['WQ'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WA'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WC'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) tr_variables['WTA'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WTC'] = theano.shared( init_std * np.random.randn(cont_sl, emb_size).astype('float32')) tr_variables['WAnsw'] = theano.shared( init_std * np.random.randn(vocab_size, emb_size).astype('float32')) #------------------------------------------------------------------input layers layers = [(LL.InputLayer, { 'name': 'l_in_q', 'shape': (batch_size, 1, quest_wl), 'input_var': T.itensor3('l_in_q_') }), (LL.InputLayer, { 'name': 'l_in_a', 'shape': (batch_size, answ_n, answ_wl), 'input_var': T.itensor3('l_in_a_') }), (LL.InputLayer, { 'name': 'l_in_q_pe', 'shape': (batch_size, 1, quest_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_a_pe', 'shape': (batch_size, answ_n, answ_wl, emb_size) }), (LL.InputLayer, { 'name': 'l_in_cont', 'shape': (batch_size, cont_sl, cont_wl), 'input_var': T.itensor3('l_in_cont_') }), (LL.InputLayer, { 'name': 'l_in_cont_pe', 'shape': (batch_size, cont_sl, cont_wl, emb_size) })] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_a_names = ['l_a_{}'.format(i) for i in range(answ_n)] l_a_pe_names = ['l_a_pe{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_names[i], 'incoming': 'l_in_a', 'indices': slice(i, i + 1), 'axis': 1 })]) for i in range(answ_n): layers.extend([(LL.SliceLayer, { 'name': l_a_pe_names[i], 'incoming': 'l_in_a_pe', 'indices': slice(i, i + 1), 'axis': 1 })]) #------------------------------------------------------------------MEMNN layers #question---------------------------------------------------------------------- layers.extend([(EncodingFullLayer, { 'name': 'l_emb_f_q', 'incomings': ('l_in_q', 'l_in_q_pe'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WQ'], 'WT': None })]) l_mem_names = ['ls_mem_n2n_{}'.format(i) for i in range(num_hops)] layers.extend([(MemoryLayer, { 'name': l_mem_names[0], 'incomings': ('l_in_cont', 'l_in_cont_pe', 'l_emb_f_q'), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': tr_variables['WA'], 'C': tr_variables['WC'], 'AT': tr_variables['WTA'], 'CT': tr_variables['WTC'], 'nonlin': nonlin })]) for i in range(1, num_hops): if i % 2: WC, WA = tr_variables['WA'], tr_variables['WC'] WTC, WTA = tr_variables['WTA'], tr_variables['WTC'] else: WA, WC = tr_variables['WA'], tr_variables['WC'] WTA, WTC = tr_variables['WTA'], tr_variables['WTC'] layers.extend([(MemoryLayer, { 'name': l_mem_names[i], 'incomings': ('l_in_cont', 'l_in_cont_pe', l_mem_names[i - 1]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'A': WA, 'C': WC, 'AT': WTA, 'CT': WTC, 'nonlin': nonlin })]) #answers----------------------------------------------------------------------- l_emb_f_a_names = ['l_emb_f_a{}'.format(i) for i in range(answ_n)] for i in range(answ_n): layers.extend([(EncodingFullLayer, { 'name': l_emb_f_a_names[i], 'incomings': (l_a_names[i], l_a_pe_names[i]), 'vocab_size': vocab_size, 'emb_size': emb_size, 'W': tr_variables['WAnsw'], 'WT': None })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qma_concat', 'incomings': l_mem_names + l_emb_f_a_names })]) #--------------------------------------------------------------------RNN layers layers.extend([( RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([( RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qma_concat', # 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': False, 'grad_clipping': grad_clip })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_f_sl', 'incoming': 'l_qa_rnn_f', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.SliceLayer, { 'name': 'l_qa_rnn_b_sl', 'incoming': 'l_qa_rnn_b', 'indices': slice(-answ_n, None), 'axis': 1 })]) layers.extend([(LL.ElemwiseMergeLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ('l_qa_rnn_f_sl', 'l_qa_rnn_b_sl'), 'merge_function': T.add })]) #-----------------------------------------------------------------pooling layer # layers.extend([(LL.DimshuffleLayer, {'name': 'l_qa_rnn_conc_', # 'incoming': 'l_qa_rnn_conc', 'pattern': (0, 'x', 1)})]) layers.extend([(LL.Pool1DLayer, { 'name': 'l_qa_pool', 'incoming': 'l_qa_rnn_conc', 'pool_size': pool_size, 'mode': 'max' })]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence_l)] if dropout: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do', 'p': dropout })]) for i, d in enumerate(dence_l): if i < len(dence_l) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence_l) - 1 and dropout: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout })]) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) if permute_answ or permute_cont: batch_iterator_train = PermIterator(batch_size, permute_answ, permute_cont) else: batch_iterator_train = BatchIterator(batch_size=batch_size) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=updates, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=batch_iterator_train, batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split, on_batch_finished=[zero_memnn]) nnet.initialize() PrintLayerInfo()(nnet) return nnet
def main(): c = color_codes() patch_size = (15, 15, 15) dir_name = '/home/sergivalverde/w/CNN/images/CH16' patients = [ f for f in sorted(os.listdir(dir_name)) if os.path.isdir(os.path.join(dir_name, f)) ] names = np.stack([ name for name in [[ os.path.join(dir_name, patient, 'FLAIR_preprocessed.nii.gz') for patient in patients ], [ os.path.join(dir_name, patient, 'DP_preprocessed.nii.gz') for patient in patients ], [ os.path.join(dir_name, patient, 'T2_preprocessed.nii.gz') for patient in patients ], [ os.path.join(dir_name, patient, 'T1_preprocessed.nii.gz') for patient in patients ]] if name is not None ], axis=1) seed = np.random.randint(np.iinfo(np.int32).max) ''' Here we create an initial net to find conflictive voxels ''' print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<Running iteration ' + c['b'] + '1>' + c['nc']) net_name = '/home/sergivalverde/w/CNN/code/CNN1/miccai_challenge2016/deep-challenge2016.init.' net = NeuralNet( layers=[ (InputLayer, dict(name='in', shape=(None, 4, 15, 15, 15))), (Conv3DDNNLayer, dict(name='conv1_1', num_filters=32, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_1', pool_size=2, stride=2, mode='average_inc_pad')), (Conv3DDNNLayer, dict(name='conv2_1', num_filters=64, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_2', pool_size=2, stride=2, mode='average_inc_pad')), (DropoutLayer, dict(name='l2drop', p=0.5)), (DenseLayer, dict(name='l1', num_units=256)), (DenseLayer, dict(name='out', num_units=2, nonlinearity=nonlinearities.softmax)), ], objective_loss_function=objectives.categorical_crossentropy, update=updates.adam, update_learning_rate=0.0001, on_epoch_finished=[ SaveWeights(net_name + 'model_weights.pkl', only_best=True, pickle=False), SaveTrainingHistory(net_name + 'model_history.pkl'), PlotTrainingHistory(net_name + 'training_history.png'), EarlyStopping(patience=10) ], verbose=10, max_epochs=50, train_split=TrainSplit(eval_size=0.25), custom_scores=[('dsc', lambda p, t: 2 * np.sum(p * t[:, 1]) / np.sum( (p + t[:, 1])))], ) try: net.load_params_from(net_name + 'model_weights.pkl') except IOError: print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + 'Loading the data for ' + c['b'] + 'iteration 1' + c['nc']) # Create the data (x, y, _) = load_patches(dir_name=dir_name, use_flair=True, use_pd=True, use_t2=True, use_t1=True, use_gado=False, flair_name='FLAIR_preprocessed.nii.gz', pd_name='DP_preprocessed.nii.gz', t2_name='T2_preprocessed.nii.gz', t1_name='T1_preprocessed.nii.gz', gado_name=None, mask_name='Consensus.nii.gz', size=patch_size) print('-- Permuting the data') np.random.seed(seed) x_train = np.random.permutation( np.concatenate(x).astype(dtype=np.float32)) print('-- Permuting the labels') np.random.seed(seed) y_train = np.random.permutation( np.concatenate(y).astype(dtype=np.int32)) y_train = y_train[:, y_train.shape[1] / 2 + 1, y_train.shape[2] / 2 + 1, y_train.shape[3] / 2 + 1] print('-- Training vector shape = (' + ','.join([str(length) for length in x_train.shape]) + ')') print('-- Training labels shape = (' + ','.join([str(length) for length in y_train.shape]) + ')') print c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] +\ 'Training (' + c['b'] + 'initial' + c['nc'] + c['g'] + ')' + c['nc'] # We try to get the last weights to keep improving the net over and over net.fit(x_train, y_train) ''' Here we get the seeds ''' print c['c'] + '[' + strftime( "%H:%M:%S") + '] ' + c['g'] + '<Looking for seeds>' + c['nc'] for patient in names: output_name = os.path.join('/'.join(patient[0].rsplit('/')[:-1]), 'test.iter1.nii.gz') try: load_nii(output_name) print c['c'] + '[' + strftime("%H:%M:%S") + '] ' \ + c['g'] + '-- Patient ' + patient[0].rsplit('/')[-2] + ' already done' + c['nc'] except IOError: print c['c'] + '[' + strftime("%H:%M:%S") + '] '\ + c['g'] + '-- Testing with patient ' + c['b'] + patient[0].rsplit('/')[-2] + c['nc'] image_nii = load_nii(patient[0]) image = np.zeros_like(image_nii.get_data()) for batch, centers in load_patch_batch(patient, 100000, patch_size): y_pred = net.predict_proba(batch) [x, y, z] = np.stack(centers, axis=1) image[x, y, z] = y_pred[:, 1] print c['g'] + '-- Saving image ' + c['b'] + output_name + c['nc'] image_nii.get_data()[:] = image image_nii.to_filename(output_name) ''' Here we perform the last iteration ''' print c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c[ 'g'] + '<Running iteration ' + c['b'] + '2>' + c['nc'] net_name = '/home/sergivalverde/w/CNN/code/CNN1/miccai_challenge2016/deep-challenge2016.final.' net = NeuralNet( layers=[ (InputLayer, dict(name='in', shape=(None, 4, 15, 15, 15))), (Conv3DDNNLayer, dict(name='conv1_1', num_filters=32, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_1', pool_size=2, stride=2, mode='average_inc_pad')), (Conv3DDNNLayer, dict(name='conv2_1', num_filters=64, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_2', pool_size=2, stride=2, mode='average_inc_pad')), (DropoutLayer, dict(name='l2drop', p=0.5)), (DenseLayer, dict(name='l1', num_units=256)), (DenseLayer, dict(name='out', num_units=2, nonlinearity=nonlinearities.softmax)), ], objective_loss_function=objectives.categorical_crossentropy, update=updates.adam, update_learning_rate=0.0001, on_epoch_finished=[ SaveWeights(net_name + 'model_weights.pkl', only_best=True, pickle=False), SaveTrainingHistory(net_name + 'model_history.pkl'), PlotTrainingHistory(net_name + 'training_history.png'), ], batch_iterator_train=BatchIterator(batch_size=4096), verbose=10, max_epochs=2000, train_split=TrainSplit(eval_size=0.25), custom_scores=[('dsc', lambda p, t: 2 * np.sum(p * t[:, 1]) / np.sum( (p + t[:, 1])))], ) try: net.load_params_from(net_name + 'model_weights.pkl') except IOError: pass print c['c'] + '[' + strftime("%H:%M:%S") + '] '\ + c['g'] + 'Loading the data for ' + c['b'] + 'iteration 2' + c['nc'] (x, y, names) = load_patches(dir_name='/home/sergivalverde/w/CNN/images/CH16', use_flair=True, use_pd=True, use_t2=True, use_t1=True, use_gado=False, flair_name='FLAIR_preprocessed.nii.gz', pd_name='DP_preprocessed.nii.gz', t2_name='T2_preprocessed.nii.gz', gado_name=None, t1_name='T1_preprocessed.nii.gz', mask_name='Consensus.nii.gz', size=patch_size, roi_name='test.iter1.nii.gz') print '-- Permuting the data' np.random.seed(seed) x_train = np.random.permutation(np.concatenate(x).astype(dtype=np.float32)) print '-- Permuting the labels' np.random.seed(seed) y_train = np.random.permutation(np.concatenate(y).astype(dtype=np.int32)) y_train = y_train[:, y_train.shape[1] / 2 + 1, y_train.shape[2] / 2 + 1, y_train.shape[3] / 2 + 1] print '-- Training vector shape = (' + ','.join( [str(length) for length in x_train.shape]) + ')' print '-- Training labels shape = (' + ','.join( [str(length) for length in y_train.shape]) + ')' print c['c'] + '[' + strftime("%H:%M:%S") + '] '\ + c['g'] + 'Training (' + c['b'] + 'final' + c['nc'] + c['g'] + ')' + c['nc'] net.fit(x_train, y_train)
output_num_units=1, output_nonlinearity=sigmoid, objective_loss_function=binary_crossentropy, update=adam, update_learning_rate=theano.shared(float32(0.0003), borrow=True), # update_momentum=theano.shared(float32(0.001), borrow=True), update_beta1=0.9, update_beta2=0.99, update_epsilon=1e-06, on_epoch_finished=[ # AdjustVariable('update_learning_rate', start=0.3, stop=0.05), # AdjustVariable('update_momentum', start=0.001, stop=0.00299), # EarlyStopping(patience=200), ], regression=True, train_split=TrainSplit(eval_size=0.00), y_tensor_type=T.matrix, verbose=1, batch_iterator_train=BatchIterator(3200), max_epochs=100) #np.random.seed(7) #net0_clone = clone(net0) #net0_clone.fit(t1nn_conc_shared.get_value(), y) #net0_clone.fit(X_encoded_shared.get_value(), y) cv_by_hand = [(np.where(cvFolds != fold)[0], np.where(cvFolds == fold)[0]) for fold in np.unique(cvFolds)] foldPred = np.zeros((t1nn_conc_shared.get_value().shape[0], 1)) bags = 10
def cascade_model(options): """ 3D cascade model using Nolearn and Lasagne Inputs: - model_options: - weights_path: path to where weights should be saved Output: - nets = list of NeuralNets (CNN1, CNN2) """ # model options channels = len(options['modalities']) train_split_perc = options['train_split'] num_epochs = options['max_epochs'] max_epochs_patience = options['patience'] # save model to disk to re-use it. Create an experiment folder # organize experiment if not os.path.exists( os.path.join(options['weight_paths'], options['experiment'])): os.mkdir(os.path.join(options['weight_paths'], options['experiment'])) if not os.path.exists( os.path.join(options['weight_paths'], options['experiment'], 'nets')): os.mkdir( os.path.join(options['weight_paths'], options['experiment'], 'nets')) # -------------------------------------------------- # first model # -------------------------------------------------- layer1 = InputLayer(name='in1', shape=(None, channels) + options['patch_size']) layer1 = batch_norm(Conv3DLayer(layer1, name='conv1_1', num_filters=32, filter_size=3, pad='same'), name='BN1') layer1 = Pool3DLayer(layer1, name='avgpool_1', mode='max', pool_size=2, stride=2) layer1 = batch_norm(Conv3DLayer(layer1, name='conv2_1', num_filters=64, filter_size=3, pad='same'), name='BN2') layer1 = Pool3DLayer(layer1, name='avgpoo2_1', mode='max', pool_size=2, stride=2) layer1 = DropoutLayer(layer1, name='l2drop', p=0.5) layer1 = DenseLayer(layer1, name='d_1', num_units=256) layer1 = DenseLayer(layer1, name='out', num_units=2, nonlinearity=nonlinearities.softmax) # save weights net_model = 'model_1' net_weights = os.path.join(options['weight_paths'], options['experiment'], 'nets', net_model + '.pkl') net_history = os.path.join(options['weight_paths'], options['experiment'], 'nets', net_model + '_history.pkl') net1 = NeuralNet( layers=layer1, objective_loss_function=objectives.categorical_crossentropy, batch_iterator_train=Rotate_batch_Iterator(batch_size=128), update=updates.adadelta, on_epoch_finished=[ SaveWeights(net_weights, only_best=True, pickle=False), SaveTrainingHistory(net_history), EarlyStopping(patience=max_epochs_patience) ], verbose=options['net_verbose'], max_epochs=num_epochs, train_split=TrainSplit(eval_size=train_split_perc), ) # -------------------------------------------------- # second model # -------------------------------------------------- layer2 = InputLayer(name='in2', shape=(None, channels) + options['patch_size']) layer2 = batch_norm(Conv3DLayer(layer2, name='conv1_1', num_filters=32, filter_size=3, pad='same'), name='BN1') layer2 = Pool3DLayer(layer2, name='avgpool_1', mode='max', pool_size=2, stride=2) layer2 = batch_norm(Conv3DLayer(layer2, name='conv2_1', num_filters=64, filter_size=3, pad='same'), name='BN2') layer2 = Pool3DLayer(layer2, name='avgpoo2_1', mode='max', pool_size=2, stride=2) layer2 = DropoutLayer(layer2, name='l2drop', p=0.5) layer2 = DenseLayer(layer2, name='d_1', num_units=256) layer2 = DenseLayer(layer2, name='out', num_units=2, nonlinearity=nonlinearities.softmax) # save weights net_model = 'model_2' net_weights2 = os.path.join(options['weight_paths'], options['experiment'], 'nets', net_model + '.pkl') net_history2 = os.path.join(options['weight_paths'], options['experiment'], 'nets', net_model + '_history.pkl') net2 = NeuralNet( layers=layer2, objective_loss_function=objectives.categorical_crossentropy, batch_iterator_train=Rotate_batch_Iterator(batch_size=128), update=updates.adadelta, on_epoch_finished=[ SaveWeights(net_weights2, only_best=True, pickle=False), SaveTrainingHistory(net_history2), EarlyStopping(patience=max_epochs_patience) ], verbose=options['net_verbose'], max_epochs=num_epochs, train_split=TrainSplit(eval_size=train_split_perc), ) return [net1, net2]
def test_reproducable(self, TrainSplit, nn): X, y = np.random.random((100, 10)), np.repeat([0, 1, 2, 3], 25) X_train1, X_valid1, y_train1, y_valid1 = TrainSplit(0.2)(X, y, nn) X_train2, X_valid2, y_train2, y_valid2 = TrainSplit(0.2)(X, y, nn) assert np.all(X_train1 == X_train2) assert np.all(y_valid1 == y_valid2)
def test_stratified(self, TrainSplit, nn): X = np.random.random((100, 10)) y = np.hstack([np.repeat([0, 0, 0], 25), np.repeat([1], 25)]) X_train, X_valid, y_train, y_valid = TrainSplit(0.2)(X, y, nn) assert y_train.sum() == 0.8 * 25 assert y_valid.sum() == 0.2 * 25
def create_net(train_source, test_source, batch_size=128, max_epochs=100, train_val_split=False): """Create NN.""" if train_val_split: train_val_split = TrainSplit(eval_size=0.2) else: train_val_split = TrainSplit(eval_size=False) batch_iter_train = IndexBatchIterator(train_source, batch_size=batch_size) batch_iter_test = IndexBatchIterator(test_source, batch_size=batch_size) LF = LayerFactory() dense = 1024 # larger (1024 perhaps) would be better if filt2Dsize: inputLayer = LF(InputLayer, shape=(None, 1, N_ELECTRODES, TIME_POINTS)) convLayer = LF(Conv2DLayer, num_filters=8, filter_size=(N_ELECTRODES, filt2Dsize)) else: inputLayer = LF(InputLayer, shape=(None, N_ELECTRODES, TIME_POINTS)) convLayer = LF(Conv1DLayer, num_filters=8, filter_size=1) layers = [ inputLayer, LF(DropoutLayer, p=0.5), convLayer, # Standard fully connected net from now on LF(DenseLayer, num_units=dense), LF(DropoutLayer, p=0.5), LF(DenseLayer, num_units=dense), LF(DropoutLayer, p=0.5), LF(DenseLayer, layer_name="output", num_units=N_EVENTS, nonlinearity=sigmoid) ] def loss(x, t): return aggregate(binary_crossentropy(x, t)) if filt2Dsize: nnet = NeuralNet(y_tensor_type=theano.tensor.matrix, layers=layers, batch_iterator_train=batch_iter_train, batch_iterator_test=batch_iter_test, max_epochs=max_epochs, verbose=0, update=adam, update_learning_rate=0.001, objective_loss_function=loss, regression=True, train_split=train_val_split, **LF.kwargs) else: nnet = NeuralNet( y_tensor_type=theano.tensor.matrix, layers=layers, batch_iterator_train=batch_iter_train, batch_iterator_test=batch_iter_test, max_epochs=max_epochs, verbose=0, update=nesterov_momentum, update_learning_rate=0.02, update_momentum=0.9, # update=adam, # update_learning_rate=0.001, objective_loss_function=loss, regression=True, train_split=train_val_split, **LF.kwargs) return nnet
def main(): # Parse command line options parser = argparse.ArgumentParser( description='Test different nets with 3D data.') parser.add_argument('--flair', action='store', dest='flair', default='FLAIR_preprocessed.nii.gz') parser.add_argument('--pd', action='store', dest='pd', default='DP_preprocessed.nii.gz') parser.add_argument('--t2', action='store', dest='t2', default='T2_preprocessed.nii.gz') parser.add_argument('--t1', action='store', dest='t1', default='T1_preprocessed.nii.gz') parser.add_argument('--output', action='store', dest='output', default='output.nii.gz') parser.add_argument('--no-docker', action='store_false', dest='docker', default=True) c = color_codes() patch_size = (15, 15, 15) options = vars(parser.parse_args()) batch_size = 10000 min_size = 30 print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<Loading the net ' + c['b'] + '1' + c['nc'] + c['g'] + '>' + c['nc']) net_name = '/usr/local/nets/deep-challenge2016.init.model_weights.pkl' if options['docker'] \ else './deep-challenge2016.init.model_weights.pkl' net = NeuralNet( layers=[ (InputLayer, dict(name='in', shape=(None, 4, 15, 15, 15))), (Conv3DDNNLayer, dict(name='conv1_1', num_filters=32, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_1', pool_size=2, stride=2, mode='average_inc_pad')), (Conv3DDNNLayer, dict(name='conv2_1', num_filters=64, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_2', pool_size=2, stride=2, mode='average_inc_pad')), (DropoutLayer, dict(name='l2drop', p=0.5)), (DenseLayer, dict(name='l1', num_units=256)), (DenseLayer, dict(name='out', num_units=2, nonlinearity=nonlinearities.softmax)), ], objective_loss_function=objectives.categorical_crossentropy, update=updates.adam, update_learning_rate=0.0001, verbose=10, max_epochs=50, train_split=TrainSplit(eval_size=0.25), custom_scores=[('dsc', lambda p, t: 2 * np.sum(p * t[:, 1]) / np.sum( (p + t[:, 1])))], ) net.load_params_from(net_name) print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<Creating the probability map ' + c['b'] + '1' + c['nc'] + c['g'] + '>' + c['nc']) names = np.array( [options['flair'], options['pd'], options['t2'], options['t1']]) image_nii = load_nii(options['flair']) image1 = np.zeros_like(image_nii.get_data()) print('0% of data tested', end='\r') sys.stdout.flush() for batch, centers, percent in load_patch_batch_percent( names, batch_size, patch_size): y_pred = net.predict_proba(batch) print('%f%% of data tested' % percent, end='\r') sys.stdout.flush() [x, y, z] = np.stack(centers, axis=1) image1[x, y, z] = y_pred[:, 1] print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<Loading the net ' + c['b'] + '2' + c['nc'] + c['g'] + '>' + c['nc']) net_name = '/usr/local/nets/deep-challenge2016.final.model_weights.pkl' if options['docker'] \ else './deep-challenge2016.final.model_weights.pkl' net = NeuralNet( layers=[ (InputLayer, dict(name='in', shape=(None, 4, 15, 15, 15))), (Conv3DDNNLayer, dict(name='conv1_1', num_filters=32, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_1', pool_size=2, stride=2, mode='average_inc_pad')), (Conv3DDNNLayer, dict(name='conv2_1', num_filters=64, filter_size=(5, 5, 5), pad='same')), (Pool3DDNNLayer, dict(name='avgpool_2', pool_size=2, stride=2, mode='average_inc_pad')), (DropoutLayer, dict(name='l2drop', p=0.5)), (DenseLayer, dict(name='l1', num_units=256)), (DenseLayer, dict(name='out', num_units=2, nonlinearity=nonlinearities.softmax)), ], objective_loss_function=objectives.categorical_crossentropy, update=updates.adam, update_learning_rate=0.0001, batch_iterator_train=BatchIterator(batch_size=4096), verbose=10, max_epochs=2000, train_split=TrainSplit(eval_size=0.25), custom_scores=[('dsc', lambda t, p: 2 * np.sum(t * p[:, 1]) / np.sum( (t + p[:, 1])))], ) net.load_params_from(net_name) print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<Creating the probability map ' + c['b'] + '2' + c['nc'] + c['g'] + '>' + c['nc']) image2 = np.zeros_like(image_nii.get_data()) print('0% of data tested', end='\r') sys.stdout.flush() for batch, centers, percent in load_patch_batch_percent( names, batch_size, patch_size): y_pred = net.predict_proba(batch) print('%f%% of data tested' % percent, end='\r') sys.stdout.flush() [x, y, z] = np.stack(centers, axis=1) image2[x, y, z] = y_pred[:, 1] print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<Saving to file ' + c['b'] + options['output'] + c['nc'] + c['g'] + '>' + c['nc']) image = (image1 * image2) > 0.5 # filter candidates < min_size labels, num_labels = ndimage.label(image) lesion_list = np.unique(labels) num_elements_by_lesion = ndimage.labeled_comprehension( image, labels, lesion_list, np.sum, float, 0) filt_min_size = num_elements_by_lesion >= min_size lesion_list = lesion_list[filt_min_size] image = reduce(np.logical_or, map(lambda lab: lab == labels, lesion_list)) image_nii.get_data()[:] = np.roll(np.roll(image, 1, axis=0), 1, axis=1) path = '/'.join(options['t1'].rsplit('/')[:-1]) outputname = options['output'].rsplit('/')[-1] image_nii.to_filename(os.path.join(path, outputname)) if not options['docker']: path = '/'.join(options['output'].rsplit('/')[:-1]) case = options['output'].rsplit('/')[-1] gt = load_nii(os.path.join( path, 'Consensus.nii.gz')).get_data().astype(dtype=np.bool) dsc = np.sum( 2.0 * np.logical_and(gt, image)) / (np.sum(gt) + np.sum(image)) print(c['c'] + '[' + strftime("%H:%M:%S") + '] ' + c['g'] + '<DSC value for ' + c['c'] + case + c['g'] + ' = ' + c['b'] + str(dsc) + c['nc'] + c['g'] + '>' + c['nc'])
def WalmartMetaBagging(train, train_y, test, train_tfidf, test_tfidf): usenn = True usexgb = False num_runs = 5 num_classes = 38 print('1.metabagging with neural_network') if usenn: #Load Data X, y, rbm0, rbm1, rbm2, rbm3 = load_train_data(train, train_y, train_tfidf) X_test= load_test_data(test, test_tfidf, rbm0, rbm1, rbm2, rbm3) num_features = X.shape[1] print(num_classes); print(num_features); print(train) layers0 = [('input', InputLayer), ('dropout0', DropoutLayer), ('dense0', DenseLayer), ('dropout1', DropoutLayer), ('dense1', DenseLayer), ('dropout2', DropoutLayer), ('output', DenseLayer)] net0 = NeuralNet(layers=layers0, input_shape=(None, num_features), dropout0_p = 0.05, #theano.shared(float32(0.1)), dense0_num_units= 100, dropout1_p= 0.1, #theano.shared(float32(0.5)), dense1_num_units= 200, dropout2_p = 0.3, #theano.shared(float32(0.8)), output_num_units=num_classes, output_nonlinearity=softmax, update=nesterov_momentum, #update_learning_rate=0.005, #update_momentum=0.9, update_learning_rate = theano.shared(float32(0.001)), update_momentum=theano.shared(float32(0.9)), #objective_loss_function = log_loss, train_split = TrainSplit(0.2), verbose=1, max_epochs=250, on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.002, stop=0.0001), AdjustVariable('update_momentum', start=0.9, stop=0.99), # AdjustDropout('dropout0_p', start = 0.1, stop = 0.2), # #AdjustDropout('dropout1_p', start = 0.5, stop = 0.4), # AdjustDropout('dropout2_p', start = 0.8, stop = 0.9) ] ) print(X) net0.fit(X, y) y_prob = net0.predict_proba(X_test) for jj in xrange(num_runs): print(jj) XX, yy, rbm0, rbm1, rbm2, rbm3 = load_train_data(train, train_y, train_tfidf) XX_test = load_test_data(test, test_tfidf, rbm0, rbm1, rbm2, rbm3) num_features = X.shape[1] net0 = NeuralNet(layers=layers0, input_shape=(None, num_features), dropout0_p = 0.05, #theano.shared(float32(0.1)), dense0_num_units= 100, dropout1_p= 0.1, #theano.shared(float32(0.5)), dense1_num_units= 200, dropout2_p = 0.3, #theano.shared(float32(0.8)), output_num_units=num_classes, output_nonlinearity=softmax, update=nesterov_momentum, #update_learning_rate=0.005, #update_momentum=0.9, update_learning_rate = theano.shared(float32(0.001)), update_momentum=theano.shared(float32(0.9)), #objective_loss_function = log_loss, train_split = TrainSplit(0.2), verbose=1, max_epochs=250, on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.002, stop=0.0001), AdjustVariable('update_momentum', start=0.9, stop=0.99), # AdjustDropout('dropout0_p', start = 0.1, stop = 0.2), # #AdjustDropout('dropout1_p', start = 0.5, stop = 0.4), # AdjustDropout('dropout2_p', start = 0.8, stop = 0.9) ]) y = np.array(y, dtype = np.int32) net0.fit(XX, yy) y_prob = y_prob + net0.predict_proba(XX_test) y_prob = y_prob/(num_runs+1.0) sub = pd.read_csv('../input/sample_submission.csv') cols = sub.columns.values.tolist()[1:] sub[cols] = pd.DataFrame(np.around(y_prob, decimals=5)).applymap(lambda x: round(x, 5)) sub.to_csv('nn_metabagging.csv', index=False) num_runs = 2 print('2. metabagging with xgboost') if usexgb: X, y,rbm1, rbm2, rbm3 = xgb_train_data(train, train_y, train_tfidf) X_test= xgb_test_data(test, test_tfidf, rbm1, rbm2, rbm3) X_train, X_val, train_label, val_label = train_test_split(train, train_y, test_size = 0.2) X_train = np.array( X_train, dtype = np.float32) X_val = np.array(X_val, dtype = np.float32) test = np.array(test, dtype = np.float32) train_label = np.array( train_label, dtype = np.int32) val_label = np.array( val_label, dtype = np.int32) xgtrain = xgb.DMatrix(X_train, label=train_label) xgval = xgb.DMatrix(X_val, label = val_label) ########## xtest to test xgtest = xgb.DMatrix(X_test) params = {} params["objective"] = 'multi:softprob' params["eta"] = 0.1 params["subsample"] = 0.7 params["colsample_bytree"] = 0.55 params["silent"] = 1 params["max_depth"] = 8 params["min_child_weight"] = 12 params["gamma"] = 1 params["num_class"] = 38 params["eval_metric"] = 'mlogloss' watchlist = [(xgtrain, 'train'), (xgval, 'val')] model = xgb.train(list(params.items()), xgtrain, 120, watchlist, early_stopping_rounds = 5) xgb_pred = model.predict(xgtest) for jj in xrange(num_runs): print(jj) # XX, yy, rbm1, rbm2, rbm3 = xgb_train_data(train, train_y, train_tfidf) # XX_test = xgb_test_data(test, test_tfidf, rbm1, rbm2, rbm3) model = xgb.train(list(params.items()), xgtrain, 120, watchlist, early_stopping_rounds = 5) xgb_pred += model.predict(xgtest) xgb_pred = xgb_pred/(num_runs+1.0) # from sklearn.ensemble import BaggingClassifier # baggs = BaggingClassifier(base_estimator=model, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=1, random_state=None, verbose=0)[source] sub = pd.read_csv('../input/sample_submission.csv') cols = sub.columns.values.tolist()[1:] sub[cols] = pd.DataFrame(np.around(xgb_pred, decimals=5)).applymap(lambda x: round(x, 5)) sub.to_csv('xgb_metabagging.csv', index=False)
l_in = InputLayer(shape=(None, num_features)) l_hidden1 = DenseLayer(l_in, num_units=hidden_units) l_hidden2 = DropoutLayer(l_hidden1, p=dropout_p) l_current = l_hidden2 for k in range(hidden_layers - 1): l_current = highway_dense(l_current) l_current = DropoutLayer(l_current, p=dropout_p) l_dropout = DropoutLayer(l_current, p=dropout_p) l_out = DenseLayer(l_dropout, num_units=1, nonlinearity=None) # ==== Neural network definition ==== net1 = NeuralNet(layers=l_out, update=adadelta, update_rho=0.95, update_learning_rate=1.0, train_split=TrainSplit(eval_size=0), verbose=0, max_epochs=1, regression=True) # ==== Print out input shape for diagnosis ==== print(train_data.shape) print(train_targets.shape) # ==== Train it for n iterations and validate on each iteration ==== for i in range(epochs): net1.fit(train_data, train_targets) pred = net1.predict(test_data) val_auc[i] = np.mean((test_targets - pred)**2) print(i + 1, "\t", val_auc[i], "\t", min(val_auc), "\t")
dropout0_p=0.6, dense1_num_units=100, dropout1_p=0.6, dense2_num_units=100, dropout2_p=0.6, dense3_num_units=100, output_num_units=num_classes, output_nonlinearity=softmax, #update=adagrad, update=nesterov_momentum, update_learning_rate=0.3, update_momentum=0.8, #objective_loss_function = binary_crossentropy, train_split=TrainSplit(0.2), verbose=1, max_epochs=50) X, y, encoder, scaler = load_train_data(datapath + "train.csv", 0) print("Fitting Sample 0") net0.fit(X, y) for i in range(1, 14): print("Loading Sample " + str(i)) X, y, encoder, scaler1 = load_train_data(datapath + "train.csv", i) print("Fitting Sample " + str(i)) net0.fit(X, y) print("Fitting Complete")
def cnn(name, cnn_layers, classes, epochs=500, learning_rate=0.0002, verbose=1, seed=0, test_size=0.2, data_folder="all", oversampling=0, undersampling=0, oversampling_ratio, undersampling_ratio, update_func=lasagne.updates.adam, objective_l2=0.0025, train_split_eval_size=0.05, output_folder): # NOTE: while running the function the current working directory should be ../name(one of the arguments)/code/ # and the dmdt processed data should be in ../name(one of the arguments)/data/data_folder(one of the arguments)/ # containing X_2d.npy which is a 3D matrix containing dmdts with dimensions as (#dmdts, height of dmdt, width of dmdt), # X_features.npy which is a 2D matrix with dimensions (#dmdts, #features) and y.npy containing dmdt labels # corresponding to X_2D.npy with dimension (#dmdts,) # Arguments: # name: denotes the parent directory for which cnn is to be trained eg: ensemble, cnn_with, cnn_without, gdr21, periodic, # trans, ptf_classifier # cnn_layers: denotes the list of layers making a CNN. Refer: https://lasagne.readthedocs.io/en/latest/modules/layers.html # for different layers which can be used # eg: # [ # (InputLayer, {'name':'input', 'shape': (None, X_train.shape[1], X_train.shape[2], X_train.shape[3])}), # (Conv2DLayer, {'name':'conv2d1', 'num_filters': 64, 'filter_size': (5, 5), 'pad': 0, 'nonlinearity':rectify}), # (MaxPool2DLayer, {'name':'maxpool1','pool_size': (2, 2)}), # (DropoutLayer, {'name':'dropout1','p':0.1}), # #(Conv2DLayer, {'name':'conv2d2','num_filters': 128, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.3}), # #(Conv2DLayer, {'name':'conv2d3','num_filters': 256, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.5}), # #(DenseLayer, {'name':'dense1','num_units': 512}), # #(DropoutLayer, {'name':'dropout2','p':0.5}), # #(DenseLayer, {'name':'dense2','num_units': 512}), # (DenseLayer, {'name':'output','num_units': len(list(set(y))), 'nonlinearity': softmax}), # ] # classes: a list denoting the class numbers to be used for training the CNN eg: for ensemble CNN, # classes = [1,2,3,4,5,6,7,9,10,11,13,18] # data_folder: refer to the 'name' argument details # epochs, update_func, learning_rate, objective_l2, train_split_eval_size, verbose: denotes NeuralNet parameters # output_folder: denotes the name of the directory in which the results of trained CNN will be saved, most of the time it # will be similar to name argument # oversampling, undersampling: equal to 1 for oversampling or undersampling respectively the training data, else 0 # oversampling_ratio: Refer ratio argument in # http://contrib.scikit-learn.org/imbalanced-learn/stable/generated/imblearn.over_sampling.SMOTE.html # undersampling_ratio: Refer ratio argument in # http://contrib.scikit-learn.org/imbalanced-learn/stable/generated/imblearn.under_sampling.RandomUnderSampler.html import matplotlib matplotlib.use('Agg') import os import copy import generate import numpy import theano import theano.gpuarray import pygpu from pygpu import gpuarray #gpuarray.use("gpu"+str(0)) #import theano.sandbox.cuda #theano.sandbox.cuda.use("gpu"+str(0)) #theano.gpuarray.use("gpu" + str(0)) theano.gpuarray.use("cuda" + str(0)) import lasagne from nolearn.lasagne import NeuralNet, objective, TrainSplit, visualize from lasagne.nonlinearities import softmax, rectify from lasagne.layers import InputLayer from lasagne.layers import Conv2DLayer from lasagne.layers import MaxPool2DLayer from lasagne.layers import DropoutLayer from lasagne.layers import DenseLayer from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn import preprocessing from util import plot_confusion, plot_misclassifications import numpy as np from six.moves import cPickle import pickle from imblearn.over_sampling import SMOTE from imblearn.under_sampling import TomekLinks, RandomUnderSampler # parameters #epochs = 500 #learning_rate = 0.0002 #verbose = 1 #seed = 0 #classes = [5,6] #what are classes #test_size = 0.2 # get data and encode labels #X_2d, X_features, y, indices = generate.get_data("all", classes=classes, shuffle=True, seed=seed) X_2d, X_features, y, indices = generate.get_data(data_folder, classes=classes, shuffle=True, seed=seed) ##sm=SMOTE(random_state=seed) ##(f,g,h)=X_2d.shape ##X_2d,y=sm.fit_sample(X_2d.reshape(f,g*h),y) ##X_2d=X_2d.reshape((X_2d.shape[0],g,h)) #print(scenario) labelencoder = preprocessing.LabelEncoder() labelencoder.fit(y) y = labelencoder.transform(y).astype(numpy.int32) print("Total number of instances: " + str(len(y))) # split data (train/test) X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split( X_2d, y, indices, test_size=test_size, random_state=seed) if oversampling == 1: #sm=SMOTE(random_state=seed) sm = SMOTE(random_state=seed, ratio=oversampling_ratio) #ratio={2:1000,4:1000,5:1000}) (f, g, h) = X_train.shape X_train, y_train = sm.fit_sample(X_train.reshape(f, g * h), y_train) X_train = X_train.reshape((X_train.shape[0], g, h)) if undersampling == 1: rus = RandomUnderSampler( random_state=seed, ratio=undersampling_ratio) #ratio={0:1000,1:1000,3:1000}) (ff, gg, hh) = X_train.shape X_train, y_train = rus.fit_sample(X_train.reshape(ff, gg * hh), y_train) X_train = X_train.reshape((X_train.shape[0], gg, hh)) #sm=SMOTE(random_state=seed) #X_train,y_train=sm.fit_sample(X_train,y_train) X_test_plot = copy.deepcopy(X_test) # why reshaping ? X_train = X_train.reshape( (X_train.shape[0], 1, X_train.shape[1], X_train.shape[2])) X_test = X_test.reshape( (X_test.shape[0], 1, X_test.shape[1], X_test.shape[2])) print("Number of training instances: %i" % len(y_train)) print("Number of test instances: %i" % len(y_test)) layers = cnn_layers # [ # (InputLayer, {'name':'input', 'shape': (None, X_train.shape[1], X_train.shape[2], X_train.shape[3])}), # (Conv2DLayer, {'name':'conv2d1', 'num_filters': 64, 'filter_size': (5, 5), 'pad': 0, 'nonlinearity':rectify}), # (MaxPool2DLayer, {'name':'maxpool1','pool_size': (2, 2)}), # (DropoutLayer, {'name':'dropout1','p':0.1}), # #(Conv2DLayer, {'name':'conv2d2','num_filters': 128, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.3}), # #(Conv2DLayer, {'name':'conv2d3','num_filters': 256, 'filter_size': (5, 5), 'pad': 2, 'nonlinearity':rectify}), # #(MaxPool2DLayer, {'pool_size': (2, 2)}), # #(DropoutLayer, {'p':0.5}), # #(DenseLayer, {'name':'dense1','num_units': 512}), # #(DropoutLayer, {'name':'dropout2','p':0.5}), # #(DenseLayer, {'name':'dense2','num_units': 512}), # (DenseLayer, {'name':'output','num_units': len(list(set(y))), 'nonlinearity': softmax}), # ] net = NeuralNet( layers=layers, #layers=[('input', InputLayer), # ('conv2d1', Conv2DLayer), # ('maxpool1', MaxPool2DLayer), # ('dropout1', DropoutLayer), # ('conv2d2', Conv2DLayer), # ('conv2d3', Conv2DLayer), # ('dense1', DenseLayer), # ('dropout2', DropoutLayer), # ('dense2', DenseLayer), # ('output', DenseLayer), # ], max_epochs=epochs, update=update_func, update_learning_rate=learning_rate, objective_l2=objective_l2, train_split=TrainSplit(eval_size=train_split_eval_size), verbose=verbose, ) net.fit(X_train, y_train) preds = net.predict(X_test) preds_proba = net.predict_proba(X_test) acc = accuracy_score(y_test, preds) print("Accuracy: %f" % acc) y_test = labelencoder.inverse_transform(y_test) preds = labelencoder.inverse_transform(preds) # plot misclassifications plot_misclassifications(y_test, preds, X_test_plot, indices_test, output_folder + "/misclassifications") # save output # os.mkdir("cnn_cd") numpy.save(output_folder + "/X_test", X_test) numpy.save(output_folder + "/y_test", y_test) numpy.save(output_folder + "/preds_proba", preds_proba) numpy.save(output_folder + "/preds", preds) numpy.savetxt(output_folder + "/y_test_cnn.csv", y_test, delimiter=",", fmt='%.4f') numpy.savetxt(output_folder + "/preds_cnn.csv", preds, delimiter=",", fmt='%.4f') numpy.savetxt(output_folder + "/preds_proba_cnn.csv", preds_proba, delimiter=",", fmt='%.4f') plot_confusion(y_test, preds, output_folder + "/confusion_cnn_hpercent.png") plt1 = visualize.plot_conv_weights(net.layers_['conv2d1']) plt1.savefig(output_folder + "/filters1.png") plt1.close() plt2 = visualize.plot_conv_weights(net.layers_['conv2d2']) plt2.savefig(output_folder + "/filters2.png") plt3 = visualize.plot_conv_weights(net.layers_['conv2d3']) plt3.savefig(output_folder + "/filters3.png") plt3.close() f = open(output_folder + '/obj.save_cd', 'wb') cPickle.dump(net, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() #f=open('obj.save','rb') #net=cPickle.load(f) #f.close() #print(net) print("F1 Score: " + str( f1_score(y_test.reshape(y_test.shape[0]), preds.reshape(preds.shape[0]), average=None))) print("Matthews correlation coefficient (MCC): " + str( matthews_corrcoef(y_test.reshape(y_test.shape[0]), preds.reshape(preds.shape[0]))))
def build_model(weights_path, options): """ Build the CNN model. Create the Neural Net object and return it back. Inputs: - subject name: used to save the net weights accordingly. - options: several hyper-parameters used to configure the net. Output: - net: a NeuralNet object """ net_model_name = options['experiment'] try: os.mkdir(os.path.join(weights_path, net_model_name)) except: pass net_weights = os.path.join(weights_path, net_model_name, net_model_name + '.pkl') net_history = os.path.join(weights_path, net_model_name, net_model_name + '_history.pkl') # select hyper-parameters t_verbose = options['net_verbose'] train_split_perc = options['train_split'] num_epochs = options['max_epochs'] max_epochs_patience = options['patience'] early_stopping = EarlyStopping(patience=max_epochs_patience) save_weights = SaveWeights(net_weights, only_best=True, pickle=False) save_training_history = SaveTrainingHistory(net_history) # build the architecture ps = options['patch_size'][0] num_channels = 1 fc_conv = 180 fc_fc = 180 dropout_conv = 0.5 dropout_fc = 0.5 # -------------------------------------------------- # channel_1: axial # -------------------------------------------------- axial_ch = InputLayer(name='in1', shape=(None, num_channels, ps, ps)) axial_ch = prelu(batch_norm( Conv2DLayer(axial_ch, name='axial_ch_conv1', num_filters=20, filter_size=3)), name='axial_ch_prelu1') axial_ch = prelu(batch_norm( Conv2DLayer(axial_ch, name='axial_ch_conv2', num_filters=20, filter_size=3)), name='axial_ch_prelu2') axial_ch = MaxPool2DLayer(axial_ch, name='axial_max_pool_1', pool_size=2) axial_ch = prelu(batch_norm( Conv2DLayer(axial_ch, name='axial_ch_conv3', num_filters=40, filter_size=3)), name='axial_ch_prelu3') axial_ch = prelu(batch_norm( Conv2DLayer(axial_ch, name='axial_ch_conv4', num_filters=40, filter_size=3)), name='axial_ch_prelu4') axial_ch = MaxPool2DLayer(axial_ch, name='axial_max_pool_2', pool_size=2) axial_ch = prelu(batch_norm( Conv2DLayer(axial_ch, name='axial_ch_conv5', num_filters=60, filter_size=3)), name='axial_ch_prelu5') axial_ch = DropoutLayer(axial_ch, name='axial_l1drop', p=dropout_conv) axial_ch = DenseLayer(axial_ch, name='axial_d1', num_units=fc_conv) axial_ch = prelu(axial_ch, name='axial_prelu_d1') # -------------------------------------------------- # channel_1: coronal # -------------------------------------------------- coronal_ch = InputLayer(name='in2', shape=(None, num_channels, ps, ps)) coronal_ch = prelu(batch_norm( Conv2DLayer(coronal_ch, name='coronal_ch_conv1', num_filters=20, filter_size=3)), name='coronal_ch_prelu1') coronal_ch = prelu(batch_norm( Conv2DLayer(coronal_ch, name='coronal_ch_conv2', num_filters=20, filter_size=3)), name='coronal_ch_prelu2') coronal_ch = MaxPool2DLayer(coronal_ch, name='coronal_max_pool_1', pool_size=2) coronal_ch = prelu(batch_norm( Conv2DLayer(coronal_ch, name='coronal_ch_conv3', num_filters=40, filter_size=3)), name='coronal_ch_prelu3') coronal_ch = prelu(batch_norm( Conv2DLayer(coronal_ch, name='coronal_ch_conv4', num_filters=40, filter_size=3)), name='coronal_ch_prelu4') coronal_ch = MaxPool2DLayer(coronal_ch, name='coronal_max_pool_2', pool_size=2) coronal_ch = prelu(batch_norm( Conv2DLayer(coronal_ch, name='coronal_ch_conv5', num_filters=60, filter_size=3)), name='coronal_ch_prelu5') coronal_ch = DropoutLayer(coronal_ch, name='coronal_l1drop', p=dropout_conv) coronal_ch = DenseLayer(coronal_ch, name='coronal_d1', num_units=fc_conv) coronal_ch = prelu(coronal_ch, name='coronal_prelu_d1') # -------------------------------------------------- # channel_1: saggital # -------------------------------------------------- saggital_ch = InputLayer(name='in3', shape=(None, num_channels, ps, ps)) saggital_ch = prelu(batch_norm( Conv2DLayer(saggital_ch, name='saggital_ch_conv1', num_filters=20, filter_size=3)), name='saggital_ch_prelu1') saggital_ch = prelu(batch_norm( Conv2DLayer(saggital_ch, name='saggital_ch_conv2', num_filters=20, filter_size=3)), name='saggital_ch_prelu2') saggital_ch = MaxPool2DLayer(saggital_ch, name='saggital_max_pool_1', pool_size=2) saggital_ch = prelu(batch_norm( Conv2DLayer(saggital_ch, name='saggital_ch_conv3', num_filters=40, filter_size=3)), name='saggital_ch_prelu3') saggital_ch = prelu(batch_norm( Conv2DLayer(saggital_ch, name='saggital_ch_conv4', num_filters=40, filter_size=3)), name='saggital_ch_prelu4') saggital_ch = MaxPool2DLayer(saggital_ch, name='saggital_max_pool_2', pool_size=2) saggital_ch = prelu(batch_norm( Conv2DLayer(saggital_ch, name='saggital_ch_conv5', num_filters=60, filter_size=3)), name='saggital_ch_prelu5') saggital_ch = DropoutLayer(saggital_ch, name='saggital_l1drop', p=dropout_conv) saggital_ch = DenseLayer(saggital_ch, name='saggital_d1', num_units=fc_conv) saggital_ch = prelu(saggital_ch, name='saggital_prelu_d1') # FC layer 540 layer = ConcatLayer(name='elem_channels', incomings=[axial_ch, coronal_ch, saggital_ch]) layer = DropoutLayer(layer, name='f1_drop', p=dropout_fc) layer = DenseLayer(layer, name='FC1', num_units=540) layer = prelu(layer, name='prelu_f1') # concatenate channels 540 + 15 layer = DropoutLayer(layer, name='f2_drop', p=dropout_fc) atlas_layer = DropoutLayer(InputLayer(name='in4', shape=(None, 15)), name='Dropout_atlas', p=.2) atlas_layer = InputLayer(name='in4', shape=(None, 15)) layer = ConcatLayer(name='elem_channels2', incomings=[layer, atlas_layer]) # FC layer 270 layer = DenseLayer(layer, name='fc_2', num_units=270) layer = prelu(layer, name='prelu_f2') # FC output 15 (softmax) net_layer = DenseLayer(layer, name='out_layer', num_units=15, nonlinearity=softmax) net = NeuralNet( layers=net_layer, objective_loss_function=objectives.categorical_crossentropy, update=updates.adam, update_learning_rate=0.001, on_epoch_finished=[ save_weights, save_training_history, early_stopping, ], verbose=t_verbose, max_epochs=num_epochs, train_split=TrainSplit(eval_size=train_split_perc), ) if options['load_weights'] == 'True': try: print " --> loading weights from ", net_weights net.load_params_from(net_weights) except: pass return net
def make_grnn( batch_size, emb_size, g_hidden_size, word_n, wc_num, dence, wsm_num=1, rnn_type='LSTM', rnn_size=12, dropout_d=0.5, # pooling='mean', quest_na=4, gradient_steps=-1, valid_indices=None, lr=0.05, grad_clip=10): def select_rnn(x): return { 'RNN': LL.RecurrentLayer, 'LSTM': LL.LSTMLayer, 'GRU': LL.GRULayer, }.get(x, LL.LSTMLayer) # dence = dence + [1] RNN = select_rnn(rnn_type) #------------------------------------------------------------------input layers layers = [ (LL.InputLayer, { 'name': 'l_in_se_q', 'shape': (None, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_se_a', 'shape': (None, quest_na, word_n, emb_size) }), (LL.InputLayer, { 'name': 'l_in_mask_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_q', 'shape': (None, word_n) }), (LL.InputLayer, { 'name': 'l_in_mask_ri_a', 'shape': (None, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_q', 'shape': (None, word_n, word_n) }), (LL.InputLayer, { 'name': 'l_in_wt_a', 'shape': (None, word_n, quest_na, word_n) }), (LL.InputLayer, { 'name': 'l_in_act_', 'shape': (None, word_n, g_hidden_size) }), (LL.InputLayer, { 'name': 'l_in_act__', 'shape': (None, word_n, word_n, g_hidden_size) }), ] #------------------------------------------------------------------slice layers # l_qs = [] # l_cas = [] l_ase_names = ['l_ase_{}'.format(i) for i in range(quest_na)] l_amask_names = ['l_amask_{}'.format(i) for i in range(quest_na)] l_amask_ri_names = ['l_amask_ri_{}'.format(i) for i in range(quest_na)] l_awt_names = ['l_awt_{}'.format(i) for i in range(quest_na)] for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_ase_names[i], 'incoming': 'l_in_se_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_names[i], 'incoming': 'l_in_mask_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_amask_ri_names[i], 'incoming': 'l_in_mask_ri_a', 'indices': i, 'axis': 1 })]) for i in range(quest_na): layers.extend([(LL.SliceLayer, { 'name': l_awt_names[i], 'incoming': 'l_in_wt_a', 'indices': i, 'axis': 1 })]) #-------------------------------------------------------------------GRNN layers WC = theano.shared( np.random.randn(wc_num, g_hidden_size, g_hidden_size).astype('float32')) # WC = LI.Normal(0.1) WSM = theano.shared( np.random.randn(emb_size, g_hidden_size).astype('float32')) b = theano.shared(np.ones(g_hidden_size).astype('float32')) # b = lasagne.init.Constant(1.0) layers.extend([(GRNNLayer, { 'name': 'l_q_grnn', 'incomings': ['l_in_se_q', 'l_in_mask_q', 'l_in_wt_q', 'l_in_act_', 'l_in_act__'], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) l_a_grnns_names = ['l_a_grnn_{}'.format(i) for i in range(quest_na)] for i, l_a_grnns_name in enumerate(l_a_grnns_names): layers.extend([(GRNNLayer, { 'name': l_a_grnns_name, 'incomings': [ l_ase_names[i], l_amask_names[i], l_awt_names[i], 'l_in_act_', 'l_in_act__' ], 'emb_size': emb_size, 'hidden_size': g_hidden_size, 'word_n': word_n, 'wc_num': wc_num, 'wsm_num': wsm_num, 'only_return_final': False, 'WC': WC, 'WSM': WSM, 'b': b })]) #------------------------------------------------------------concatenate layers layers.extend([(LL.ConcatLayer, { 'name': 'l_qa_concat', 'incomings': ['l_q_grnn'] + l_a_grnns_names })]) layers.extend([(LL.ConcatLayer, { 'name': 'l_qamask_concat', 'incomings': ['l_in_mask_ri_q'] + l_amask_ri_names })]) #--------------------------------------------------------------------RNN layers layers.extend([(RNN, { 'name': 'l_qa_rnn_f', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': False, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(RNN, { 'name': 'l_qa_rnn_b', 'incoming': 'l_qa_concat', 'mask_input': 'l_qamask_concat', 'num_units': rnn_size, 'backwards': True, 'only_return_final': True, 'grad_clipping': grad_clip })]) layers.extend([(LL.ElemwiseSumLayer, { 'name': 'l_qa_rnn_conc', 'incomings': ['l_qa_rnn_f', 'l_qa_rnn_b'] })]) ##-----------------------------------------------------------------pooling layer ## l_qa_pool = layers.extend([(LL.ExpressionLayer, {'name': 'l_qa_pool', ## 'incoming': l_qa_rnn_conc, ## 'function': lambda X: X.mean(-1), ## 'output_shape'='auto'})]) #------------------------------------------------------------------dence layers l_dence_names = ['l_dence_{}'.format(i) for i, _ in enumerate(dence)] if dropout_d: layers.extend([(LL.DropoutLayer, { 'name': 'l_dence_do' + 'do', 'p': dropout_d })]) for i, d in enumerate(dence): if i < len(dence) - 1: nonlin = LN.tanh else: nonlin = LN.softmax layers.extend([(LL.DenseLayer, { 'name': l_dence_names[i], 'num_units': d, 'nonlinearity': nonlin })]) if i < len(dence) - 1 and dropout_d: layers.extend([(LL.DropoutLayer, { 'name': l_dence_names[i] + 'do', 'p': dropout_d })]) def loss(x, t): return LO.aggregate( LO.categorical_crossentropy(T.clip(x, 1e-6, 1. - 1e-6), t)) # return LO.aggregate(LO.squared_error(T.clip(x, 1e-6, 1. - 1e-6), t)) if isinstance(valid_indices, np.ndarray) or isinstance( valid_indices, list): train_split = TrainSplit_indices(valid_indices=valid_indices) else: train_split = TrainSplit(eval_size=valid_indices, stratify=False) nnet = NeuralNet( y_tensor_type=T.ivector, layers=layers, update=LU.adagrad, update_learning_rate=lr, # update_epsilon=1e-7, objective_loss_function=loss, regression=False, verbose=2, batch_iterator_train=PermIterator(batch_size=batch_size), batch_iterator_test=BatchIterator(batch_size=batch_size / 2), # batch_iterator_train=BatchIterator(batch_size=batch_size), # batch_iterator_test=BatchIterator(batch_size=batch_size), #train_split=TrainSplit(eval_size=eval_size) train_split=train_split) nnet.initialize() PrintLayerInfo()(nnet) return nnet
layers=outputLayer, update=updates.nesterov_momentum, #update=updates.adam, #update=updates.rmsprop, update_learning_rate=0.4, #update_beta1 = 0.9, #update_beta2 = 0.999, #update_epsilon = 1e-8, update_momentum=0.9, #update_rho=0.9, #update_epsilon=1e-06, objective_loss_function=objectives.categorical_crossentropy, #objective=objectives.categorical_crossentropy, batch_iterator_train=BatchIterator(batch_size=batchSize), batch_iterator_test=BatchIterator(batch_size=batchSize), train_split=TrainSplit(eval_size=0.2, stratify=False), use_label_encoder=True, #use_label_encoder = False, regression=False, max_epochs=numberEpochs, verbose=1) #x_fit, x_eval, y_fit, y_eval= cross_validation.train_test_split(xTrain, y, test_size=0.2) net.fit(xTrain, y) predictY = net.predict_proba(xTrain) print(metrics.log_loss(originalY, predictY)) files = [f for f in listdir(testDir) if path.isfile(path.join(testDir, f))] xTest = np.zeros((len(files), imageSize), dtype='float32')
def train_model(train_samples, train_phenotypes, labels, valid_samples=None, valid_phenotypes=None, generate_valid_set=True, train_sample_flags=None, valid_sample_flags=None, landmark_norm=None, scale=True, ncell=500, nsubset=4096, subset_selection='random', nrun=10, pooling='max', ncell_pooled=None, regression=False, nfilter=2, learning_rate=0.03, momentum=0.9, l2_weight_decay_conv=1e-8, l2_weight_decay_out=1e-8, max_epochs=10, verbose=1, select_filters='consensus', accur_thres=.9, benchmark_scores=False): ''' train_samples: list with input samples, e.g. cytometry samples train_phenotype: phenotype associated with the samples in train_samples labels: labels of measured markers in train_samples ''' # copy the list of samples so that they are not modified in place train_samples = copy.deepcopy(train_samples) if valid_samples is not None: valid_samples = copy.deepcopy(valid_samples) # create dummy single-cell flags if not given if train_sample_flags is None: train_sample_flags = [np.zeros((x.shape[0],1), dtype=int) for x in train_samples] if (valid_samples is not None) and (valid_sample_flags is None): valid_sample_flags = [np.zeros((x.shape[0],1), dtype=int) for x in valid_samples] if landmark_norm is not None: idx_to_normalize = [labels.index(label) for label in landmark_norm] train_samples = landmark_normalization(train_samples, idx_to_normalize) if valid_samples is not None: valid_samples = landmark_normalization(valid_samples, idx_to_normalize) # normalize extreme values # we assume that 0 corresponds to the control class if subset_selection == 'outlier': ctrl_list = [train_samples[i] for i in np.where(np.array(train_phenotypes) == 0)[0]] test_list = [train_samples[i] for i in np.where(np.array(train_phenotypes) == 1)[0]] train_samples = normalize_outliers_to_control(ctrl_list, test_list) if valid_samples is not None: ctrl_list = [valid_samples[i] for i in np.where(np.array(valid_phenotypes) == 0)[0]] test_list = [valid_samples[i] for i in np.where(np.array(valid_phenotypes) == 1)[0]] valid_samples = normalize_outliers_to_control(ctrl_list, test_list) if (valid_samples is None) and (not generate_valid_set): sample_ids = range(len(train_phenotypes)) X_train, id_train, z_train = combine_samples(train_samples, sample_ids, train_sample_flags) elif (valid_samples is None) and generate_valid_set: sample_ids = range(len(train_phenotypes)) X, sample_id, z = combine_samples(train_samples, sample_ids, train_sample_flags) valid_phenotypes = train_phenotypes # split into train-validation partitions eval_folds = 5 kf = StratifiedKFold(sample_id, eval_folds) train_indices, valid_indices = next(iter(kf)) X_train, id_train, z_train = X[train_indices], sample_id[train_indices], z[train_indices] X_valid, id_valid , z_valid = X[valid_indices], sample_id[valid_indices], z[valid_indices] else: sample_ids = range(len(train_phenotypes)) X_train, id_train, z_train = combine_samples(train_samples, sample_ids, train_sample_flags) sample_ids = range(len(valid_phenotypes)) X_valid, id_valid, z_valid = combine_samples(valid_samples, sample_ids, valid_sample_flags) # scale all marker distributions to mu=0, std=1 if scale: scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_train, z_train, id_train = shuffle(X_train, z_train, id_train) train_phenotypes = np.asarray(train_phenotypes) y_train = train_phenotypes[id_train] if (valid_samples is not None) or generate_valid_set: if scale: X_valid = scaler.transform(X_valid) X_valid, z_valid, id_valid = shuffle(X_valid, z_valid, id_valid) valid_phenotypes = np.asarray(valid_phenotypes) y_valid = valid_phenotypes[id_valid] # number of measured markers nmark = X_train.shape[1] # generate multi-cell inputs if subset_selection == 'outlier': # here we assume that class 0 is always the control class and class 1 is the test class # TODO: extend for more classes x_ctrl_train = X_train[y_train == 0] nsubset_ctrl = nsubset / np.sum(train_phenotypes == 0) nsubset_biased = nsubset / np.sum(train_phenotypes == 1) to_keep = int(0.01 * (X_train.shape[0] - x_ctrl_train.shape[0])) X_tr, y_tr = generate_biased_subsets(X_train, train_phenotypes, id_train, x_ctrl_train, nsubset_ctrl, nsubset_biased, ncell, to_keep, id_ctrl=np.where(train_phenotypes == 0)[0], id_biased=np.where(train_phenotypes == 1)[0]) if (valid_samples is not None) or generate_valid_set: x_ctrl_valid = X_valid[y_valid == 0] nsubset_ctrl = nsubset / np.sum(valid_phenotypes == 0) nsubset_biased = nsubset / np.sum(valid_phenotypes == 1) to_keep = int(0.01 * (X_valid.shape[0] - x_ctrl_valid.shape[0])) X_v, y_v = generate_biased_subsets(X_valid, valid_phenotypes, id_valid, x_ctrl_valid, nsubset_ctrl, nsubset_biased, ncell, to_keep, id_ctrl=np.where(valid_phenotypes == 0)[0], id_biased=np.where(valid_phenotypes == 1)[0]) # TODO: right now equal number of subsets is drawn from each sample # Do it per phenotype instead? elif subset_selection == 'kmeans': X_tr, y_tr = generate_subsets(X_train, train_phenotypes, id_train, nsubset, ncell, k_init=True) if (valid_samples is not None) or generate_valid_set: X_v, y_v = generate_subsets(X_valid, valid_phenotypes, id_valid, nsubset/2, ncell, k_init=True) else: X_tr, y_tr = generate_subsets(X_train, train_phenotypes, id_train, nsubset, ncell, k_init=False) if (valid_samples is not None) or generate_valid_set: X_v, y_v = generate_subsets(X_valid, valid_phenotypes, id_valid, nsubset/2, ncell, k_init=False) ## neural network configuration ## # batch size bs = 128 # the input and convolutional layers input_conv_layers = [ (layers.InputLayer, {'name': 'input', 'shape': (None, nmark, ncell)}), (layers.Conv1DLayer, {'name': 'conv', 'b': init.Constant(0.), 'W': init.Uniform(range=0.01), 'num_filters': nfilter, 'filter_size': 1})] # the pooling layer # max-pooling detects cell presence # mean-pooling detects cell frequency if pooling == 'max': if ncell_pooled is None: pooling_layers = [(layers.MaxPool1DLayer, {'name': 'maxPool', 'pool_size' : ncell})] else: pooling_layers = [ (SelectCellLayer, {'name': 'select', 'num_cell': ncell_pooled}), (layers.Pool1DLayer, {'name': 'maxPool', 'pool_size' : ncell_pooled, 'mode': 'average_exc_pad'})] elif pooling == 'mean': pooling_layers = [(layers.Pool1DLayer, {'name': 'meanPool', 'pool_size' : ncell, 'mode': 'average_exc_pad'})] else: sys.stderr.write("Undefined pooling type: %s\n" % pooling) sys.exit(-1) # the output layer if not regression: n_out = len(np.unique(train_phenotypes)) output_nonlinearity = T.nnet.softmax else: n_out = 1 output_nonlinearity = T.tanh output_layers = [(layers.DenseLayer, {'name': 'output', 'num_units': n_out, 'W': init.Uniform(range=0.01), 'b': init.Constant(0.), 'nonlinearity': output_nonlinearity})] # combine all the network layers layers_0 = input_conv_layers + pooling_layers + output_layers # train some neural networks with different parameter configurations w_store = dict() accuracies = np.empty(nrun) for irun in range(nrun): if verbose: print 'training network: %d' % (irun + 1) if (valid_samples is not None) or generate_valid_set: # build a convolutional neural network net1 = MyNeuralNet( layers = layers_0, # objective function and weight decay penalties objective = weight_decay_objective, objective_penalty_conv = l2_weight_decay_conv, objective_penalty_output = l2_weight_decay_out, # optimization method update = nesterov_momentum, update_learning_rate = theano.shared(float32(learning_rate)), update_momentum = theano.shared(float32(momentum)), # batches batch_iterator_train = BatchIterator(batch_size = bs), batch_iterator_test = BatchIterator(batch_size = bs), on_epoch_finished = [EarlyStopping(patience=3)], train_split = TrainSplit(eval_size=None), regression = regression, max_epochs = max_epochs, verbose=verbose) # train the model if regression: net1.fit(float32(X_tr), float32(y_tr.reshape(-1,1)), float32(X_v), float32(y_v.reshape(-1,1))) valid_loss = net1.score(float32(X_v), float32(y_v.reshape(-1,1))) valid_accuracy = - valid_loss else: net1.fit(float32(X_tr), int32(y_tr), float32(X_v), int32(y_v)) valid_accuracy = net1.score(float32(X_v), int32(y_v)) else: # build a convolutional neural network without validation set net1 = NeuralNet( layers = layers_0, # objective function and weight decay penalties objective = weight_decay_objective, objective_penalty_conv = l2_weight_decay_conv, objective_penalty_output = l2_weight_decay_out, # optimization method update = nesterov_momentum, update_learning_rate = theano.shared(float32(learning_rate)), update_momentum = theano.shared(float32(momentum)), # batches batch_iterator_train = BatchIterator(batch_size = bs), batch_iterator_test = BatchIterator(batch_size = bs), on_epoch_finished = [], train_split = TrainSplit(eval_size=None), regression = regression, max_epochs = max_epochs, verbose=verbose) # train the model if regression: net1.fit(float32(X_tr), float32(y_tr.reshape(-1,1))) valid_accuracy = 0 else: net1.fit(float32(X_tr), int32(y_tr)) valid_accuracy = 0 # extract the network parameters w_store[irun] = net1.get_all_params_values() accuracies[irun] = valid_accuracy # which filter weights should we return # 'best': return the filter weights of the model with highest validation accuracy # 'consensus': return consensus filters based on hierarchical clustering # 'consensus_priority': prioritize the consensus filter that corresponds # to the biggest cluster # this option only makes sense if validation samples were provided/generated best_net, w_best_net, best_accuracy = None, None, None if select_filters == 'best': best_net = w_store[np.argmax(accuracies)] w_best_net = param_vector(best_net, regression) best_accuracy = np.max(accuracies) w_cons, cluster_res = compute_consensus_profiles(w_store, accuracies, accur_thres, regression, prioritize=False) elif select_filters == 'consensus': w_cons, cluster_res = compute_consensus_profiles(w_store, accuracies, accur_thres, regression, prioritize=False) elif select_filters == 'consensus_priority': w_cons, cluster_res = compute_consensus_profiles(w_store, accuracies, accur_thres, regression, prioritize=True) else: sys.stderr.write("Undefined option for selecting filters: %s\n" % select_filters) sys.exit(-1) print 'undefined option for selecting filters' if (valid_samples is not None) or generate_valid_set: X = np.vstack([X_train, X_valid]) y = np.hstack([y_train, y_valid]) z = np.vstack([z_train, z_valid]) else: X = X_train y = y_train z = z_train # predict using CellCnn if select_filters == 'consensus_priority': params = w_cons w, b = params[:-2], params[-2] x1 = X[y == 1] x0 = X[y == 0] cnn_pred = np.sum(w.reshape(1,-1) * x1, axis=1) + b else: cnn_pred = None results = { 'clustering_result': cluster_res, 'best_net': best_net, 'w_best_net': w_best_net, 'selected_filters': w_cons, 'accuracies': accuracies, 'best_accuracy': best_accuracy, 'cnn_pred': cnn_pred, 'labels': labels, 'X': X, 'y': y, 'z': z} if benchmark_scores: # predict using outlier detection outlier_pred = knn_dist_memory_optimized(x1, x0, s=200000) # predict using multi-cell input logistic regression X_tr_mean = np.sum(X_tr, axis=-1) clf = LogisticRegression(C=10000, penalty='l2') clf.fit(X_tr_mean, y_tr) w_lr, b_lr = clf.coef_, clf.intercept_ mean_pred = np.sum(w_lr.reshape(1,-1) * x1, axis=1) + b_lr[0] # predict using single-cell input logistic regression clf_sc = LogisticRegression(C=10000, penalty='l2') clf_sc.fit(X, y) w_lr, b_lr = clf_sc.coef_, clf_sc.intercept_ sc_pred = np.sum(w_lr.reshape(1,-1) * x1, axis=1) + b_lr[0] # store the predictions results['outlier_pred'] = outlier_pred results['mean_pred'] = mean_pred results['sc_pred'] = sc_pred return results
def create_vgg_net(): ''' Get pretrained weights from pkl file. Create net and use weights and biases as parameters for the layers. ''' with open("/data/vgg_nolearn_saved_wts_biases.pkl") as f: vgg_layer_data_dict = pickle.load(f) vgg_nn = NeuralNet( layers=[ (InputLayer, { 'name': 'input', 'shape': (None, 3, 224, 224) }), (ConvLayer, { 'name': 'conv1', 'num_filters': 96, 'filter_size': (7, 7), 'stride': 2, 'flip_filters': False, 'W': theano.shared(vgg_layer_data_dict['conv1'][0]), 'b': theano.shared(vgg_layer_data_dict['conv1'][1]) }), (NormLayer, { 'name': 'norm1', 'alpha': .0001 }), (PoolLayer, { 'name': 'pool1', 'pool_size': (3, 3), 'stride': 3, 'ignore_border': False }), ( ConvLayer, { 'name': 'conv2', 'num_filters': 256, 'filter_size': (5, 5), 'flip_filters': False, 'W': theano.shared(vgg_layer_data_dict['conv2'][0]), 'b': theano.shared(vgg_layer_data_dict['conv2'][1]) # 'pad':2, # 'stride':1 }), (PoolLayer, { 'name': 'pool2', 'pool_size': (2, 2), 'stride': 2, 'ignore_border': False }), ( ConvLayer, { 'name': 'conv3', 'num_filters': 512, 'filter_size': (3, 3), 'pad': 1, # 'stride':1 'flip_filters': False, 'W': theano.shared(vgg_layer_data_dict['conv3'][0]), 'b': theano.shared(vgg_layer_data_dict['conv3'][1]) }), ( ConvLayer, { 'name': 'conv4', 'num_filters': 512, 'filter_size': (3, 3), 'pad': 1, # 'stride':1 'flip_filters': False, 'W': theano.shared(vgg_layer_data_dict['conv4'][0]), 'b': theano.shared(vgg_layer_data_dict['conv4'][1]) }), ( ConvLayer, { 'name': 'conv5', 'num_filters': 512, 'filter_size': (3, 3), 'pad': 1, # 'stride':1 'flip_filters': False, 'W': theano.shared(vgg_layer_data_dict['conv5'][0]), 'b': theano.shared(vgg_layer_data_dict['conv5'][1]) }), (PoolLayer, { 'name': 'pool5', 'pool_size': (3, 3), 'stride': 3, 'ignore_border': False }), (DenseLayer, { 'name': 'fc6', 'num_units': 4096, 'W': theano.shared(vgg_layer_data_dict['fc6'][0]), 'b': theano.shared(vgg_layer_data_dict['fc6'][1]) }), (DropoutLayer, { 'name': 'drop6', 'p': .5 }), (DenseLayer, { 'name': 'fc7', 'num_units': 4096, 'W': theano.shared(vgg_layer_data_dict['fc7'][0]), 'b': theano.shared(vgg_layer_data_dict['fc7'][1]) }), (DropoutLayer, { 'name': 'drop7', 'p': .5 }), (DenseLayer, { 'name': 'output', 'num_units': 3, 'nonlinearity': softmax, }) ], # # # optimization method: # update=nesterov_momentum, # update_learning_rate=0.01, # update_momentum=0.9, # #potentially ingore this update=sgd, update_learning_rate=.05, # regression=True, # flag to indicate we're dealing with regression problem max_epochs=1000, # we want to train this many epochs verbose=1, train_split=TrainSplit(eval_size=0.25), ) return vgg_nn
def main(): pickle_file = '/mnt/Data/uniformsample_04_1k_mirror_rot_128x128_norm.cpickle' labels_csvfile = '/mnt/Data/trainLabels.csv' train_data, train_labels, test_data, test_labels = make_train_and_test_sets(pickle_file, labels_csvfile) train_data = train_data.reshape(-1, 3, IMAGE_SIZE, IMAGE_SIZE) train_data = train_data.astype('float32') test_data = test_data.reshape(-1, 3, imageWidth, imageWidth) test_data = test_data.astype('float32') numFeatures = train_data[1].size numTrainExamples = train_data.shape[0] print 'Features = %d' %(numFeatures) print 'Train set = %d' %(numTrainExamples) print "training data shape: ", train_data.shape print "training labels shape: ", train_labels.shape layers0 = [ (InputLayer, {'shape': (None, X.shape[1], X.shape[2], X.shape[3])}), (Conv2DLayer, {'num_filters': 32, 'filter_size': 3}), (Conv2DLayer, {'num_filters': 32, 'filter_size': 3}), (Conv2DLayer, {'num_filters': 32, 'filter_size': 3}), (MaxPool2DLayer, {'pool_size': 2}), (Conv2DLayer, {'num_filters': 64, 'filter_size': 3}), (Conv2DLayer, {'num_filters': 64, 'filter_size': 3}), (MaxPool2DLayer, {'pool_size': 2}), (Conv2DLayer, {'num_filters': 128, 'filter_size': 3}), (Conv2DLayer, {'num_filters': 128, 'filter_size': 3}), (MaxPool2DLayer, {'pool_size': 2}), (DenseLayer, {'num_units': 600}), (DropoutLayer, {}), (DenseLayer, {'num_units': 600}), (DenseLayer, {'num_units': 2, 'nonlinearity': softmax}), ] def regularization_objective(layers, lambda1=0., lambda2=0., *args, **kwargs): ''' from nolearn MNIST CNN tutorial''' # default loss losses = objective(layers, *args, **kwargs) # get the layers' weights, but only those that should be regularized # (i.e. not the biases) weights = get_all_params(layers[-1], regularizable=True) # sum of absolute weights for L1 sum_abs_weights = sum([abs(w).sum() for w in weights]) # sum of squared weights for L2 sum_squared_weights = sum([(w ** 2).sum() for w in weights]) # add weights to regular loss losses += lambda1 * sum_abs_weights + lambda2 * sum_squared_weights return losses clf = NeuralNet( layers=layers0, max_epochs=5, # optimization method update=nesterov_momentum, update_momentum=0.9, update_learning_rate=0.0002, objective=regularization_objective, objective_lambda2=0.0025, train_split=TrainSplit(eval_size=0.1), verbose=1, ) # load parameters from pickle file to continue training from previous epochs or smaller network #clf.load_params_from('params1.pickle') #clf.initialize() for i in range(100): print '****************************** ',i,' ******************************' clf.fit(train_data, train_labels) clf.save_params_to('params2.pickle') preds = clf.predict(test_data) #print sum(preds) print "Test data accuracy: ", 1.0*sum(preds==test_labels)/test_labels.shape[0]
def __init__(self, outputShape, testData, modelSaver): self.set_network_specific_settings() modelSaver.model = self self.net = NeuralNet( layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('conv3', layers.Conv2DLayer), ('conv4', layers.Conv2DLayer), ('conv5', layers.Conv2DLayer), ('pool3', layers.MaxPool2DLayer), ('hidden6', layers.DenseLayer), ('dropout1', layers.DropoutLayer), ('hidden7', layers.DenseLayer), ('dropout2', layers.DropoutLayer), ('output', layers.DenseLayer), ], input_shape=(None, Settings.NN_CHANNELS, Settings.NN_INPUT_SHAPE[0], Settings.NN_INPUT_SHAPE[1]), # variable batch size, 3 color shape row shape conv1_num_filters=96, conv1_filter_size=(11, 11), conv1_stride=(4, 4), pool1_pool_size=(5, 5), conv2_num_filters=256, conv2_filter_size=(5, 5), pool2_pool_size=(3, 3), conv3_num_filters=384, conv3_filter_size=(3, 3), conv3_pad = (1,1), conv4_num_filters=384, conv4_filter_size=(3, 3), conv4_pad = (1,1), conv5_num_filters=256, conv5_filter_size=(3, 3), conv5_pad = (1,1), pool3_pool_size=(2, 2), hidden6_num_units=4096, dropout1_p=0.5, hidden7_num_units=4096, dropout2_p=0.5, output_num_units=outputShape, output_nonlinearity=lasagne.nonlinearities.softmax, # optimization method: update=nesterov_momentum, update_learning_rate=theano.shared(utils.to_float32(Settings.NN_START_LEARNING_RATE)), update_momentum=theano.shared(utils.to_float32(Settings.NN_START_MOMENTUM)), batch_iterator_train=AugmentingLazyBatchIterator(Settings.NN_BATCH_SIZE, testData, "train", False, newSegmentation=False, loadingSize=(256,256)), batch_iterator_test=LazyBatchIterator(Settings.NN_BATCH_SIZE, testData, "valid", False, newSegmentation=False, loadingInputShape=Settings.NN_INPUT_SHAPE), train_split=TrainSplit(eval_size=0.0), # we cross validate on our own regression=False, # classification problem on_epoch_finished=[ AdjustVariable('update_learning_rate', start=Settings.NN_START_LEARNING_RATE, stop=0.0001), AdjustVariable('update_momentum', start=Settings.NN_START_MOMENTUM, stop=0.999), TrainingHistory("Krizhevsky", str(self), [], modelSaver), EarlyStopping(150), modelSaver, ], max_epochs=Settings.NN_EPOCHS, verbose=1, )
net0 = NeuralNet( layers=layers0, input_shape=(None, num_features), dense0_num_units=100, dropout0_p=0.5, dense1_num_units=100, output_num_units=num_classes, output_nonlinearity=softmax, update=nesterov_momentum, #update=adam, update_learning_rate=0.08, update_momentum=0.2, #objective_loss_function=squared_error, #objective_loss_function = binary_crossentropy, train_split=TrainSplit(0.1), verbose=1, max_epochs=15) dfrange = range(0, 15) shuffle(dfrange) X, y, encoder, scaler = load_train_data(datapath + "train_fixed_data.csv", 0) print("Fitting Sample 0") net0.fit(X, y) for i in range(1, 14): print("Loading Sample " + str(i)) X, y, encoder, scaler1 = load_train_data( datapath + "train_fixed_data.csv", i) print("Fitting Sample " + str(i))
] #--- Initialise nolearn NN object --- # net_cnn = nolas.NeuralNet( layers = layers_lst, # Optimization: max_epochs = 10, update = lasagne.updates.adadelta, # Objective: objective_loss_function = lasagne.objectives.binary_crossentropy, # Batch size & Splits: train_split = TrainSplit( eval_size=.3 ), batch_iterator_train = BatchIterator(batch_size=10, shuffle=False), batch_iterator_test = BatchIterator(batch_size=10, shuffle=False), # Custom scores: # 1) target; 2) preds: custom_scores = [('auc', lambda y_true, y_proba: roc_auc_score(y_true, y_proba[:,0]))], # 1) preds; 2) target; scores_train = None, scores_valid = None, # misc: y_tensor_type = T.imatrix, regression = True, verbose = 1,
def test_eval_size_half(self, TrainSplit, nn): X, y = np.random.random((100, 10)), np.repeat([0, 1, 2, 3], 25) X_train, X_valid, y_train, y_valid = TrainSplit(0.51)(X, y, nn) assert len(X_train) + len(X_valid) == 100 assert len(y_train) + len(y_valid) == 100 assert len(X_train) > 45
d0, d1, d2, d3, h1, h2, h3 = 0, 0.1, 0.1, 0.1, 60, 45, 25 e = 150 l = 0.02 if m_params['cv']: # do cross validation scoring kf = KFold(X.shape[0], n_folds=5, shuffle=True, random_state=1) scr = np.zeros([len(kf)]) oob_pred = np.zeros((X.shape[0], 3)) for i, (tr_ix, val_ix) in enumerate(kf): clf = init_nnet(d0, h1, d1, h2, d2, h3, d3, e, l, 'cv') clf.fit(X[tr_ix], y[tr_ix]) pred = clf.predict_proba(X[val_ix]) oob_pred[val_ix] = np.array(pred) scr[i] = log_loss(y[val_ix], oob_pred[val_ix]) print('Train score is:', scr[i]) print(log_loss(y, oob_pred)) print oob_pred[1:10] oob_filename = '../output/oob_blend_nnet_cvs' + str(np.mean(scr)) + '.p' pkl.dump(oob_pred, open(oob_filename, 'wb')) else: clf = init_nnet(d0, h1, d1, h2, d2, h3, d3, e, l, 'cv') clf.train_split = TrainSplit(eval_size=0) # fit the model clf.fit(X, y) #submission filename subname = "nnet_blend_cvs" print("Saving Results.") make_submission(clf, X_sub, ids, subname)
maxout3_pool_size=2, dropout3_p=0.4, hidden4_num_units=512, hidden4_nonlinearity=very_leaky_rectify, output_num_units=2, output_nonlinearity=lasagne.nonlinearities.softmax, # optimization method: update=adagrad, #update=nesterov_momentum, #update_momentum=theano.shared(np.float32(0.9)), update_learning_rate=theano.shared(np.float32(0.013)), ### regression=False, max_epochs=2000, train_split=TrainSplit(eval_size=0.1), #custom_score=('auc', lambda y_true, y_proba: roc_auc_score(y_true, y_proba[:, 1])), on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.013, stop=0.001), #AdjustVariable('update_momentum', start=0.9, stop=0.999), EarlyStopping(patience=60), ], verbose=1) clf.fit(X_train, y_train) from sklearn import metrics y_pred = clf.predict_proba(X_val0)[:, 1] score = metrics.roc_auc_score(y_val0, y_pred) print('score on extra set:%s' % score)
# (FeaturePoolLayer, dict(name='l8p', pool_size=2)), (DropoutLayer, dict(name='l8drop', p=0.5)), (DenseLayer, dict(name='out', num_units=n_classes, nonlinearity=nonlinearities.softmax)), ], regression=False, objective_loss_function=objectives.categorical_crossentropy, update=updates.adam, update_learning_rate=0.00005, # update=updates.rmsprop, batch_iterator_train=train_iterator, batch_iterator_test=test_iterator, train_split=TrainSplit(eval_size=1. / 6), on_epoch_finished=[ save_weights, save_training_history, plot_training_history, save_to_json, # early_stopping ], verbose=10, max_epochs=35) def draw(): global net net.initialize() draw_to_file(net, "layout.png", verbose=True)
def __init__(self, valid_indices): TrainSplit.__init__(self, eval_size=0) self.valid_indices = valid_indices