def __init__(self, train_raw, test_raw, dim, mode, l2, l1, batch_norm, dropout, batch_size, ihm_C, los_C, ph_C, decomp_C, partition, nbins, **kwargs): print "==> not used params in network class:", kwargs.keys() self.train_raw = train_raw self.test_raw = test_raw self.dim = dim self.mode = mode self.l2 = l2 self.l1 = l1 self.batch_norm = batch_norm self.dropout = dropout self.batch_size = batch_size self.ihm_C = ihm_C self.los_C = los_C self.ph_C = ph_C self.decomp_C = decomp_C self.nbins = nbins if (partition == 'log'): self.get_bin = metrics.get_bin_log self.get_estimate = metrics.get_estimate_log else: assert self.nbins == 10 self.get_bin = metrics.get_bin_custom self.get_estimate = metrics.get_estimate_custom self.train_batch_gen = self.get_batch_gen(self.train_raw) self.test_batch_gen = self.get_batch_gen(self.test_raw) self.input_var = T.tensor3('X') self.input_lens = T.ivector('L') self.ihm_pos = T.ivector('ihm_pos') self.ihm_mask = T.ivector('ihm_mask') self.ihm_label = T.ivector('ihm_label') self.los_mask = T.imatrix('los_mask') self.los_label = T.matrix('los_label') # for regression #self.los_label = T.imatrix('los_label') self.ph_label = T.imatrix('ph_label') self.decomp_mask = T.imatrix('decomp_mask') self.decomp_label = T.imatrix('decomp_label') print "==> Building neural network" # common network network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), input_var=self.input_var) if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) network = layers.LSTMLayer(incoming=network, num_units=dim, only_return_final=False, grad_clipping=10, ingate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) lstm_output = layers.get_output(network) self.params = layers.get_all_params(network, trainable=True) self.reg_params = layers.get_all_params(network, regularizable=True) # for each example in minibatch take the last output last_outputs = [] for index in range(self.batch_size): last_outputs.append(lstm_output[index, self.input_lens[index]-1, :]) last_outputs = T.stack(last_outputs) # take 48h outputs for fixed mortality task mid_outputs = [] for index in range(self.batch_size): mid_outputs.append(lstm_output[index, self.ihm_pos[index], :]) mid_outputs = T.stack(mid_outputs) # in-hospital mortality related network ihm_network = layers.InputLayer((None, dim), input_var=mid_outputs) ihm_network = layers.DenseLayer(incoming=ihm_network, num_units=2, nonlinearity=softmax) self.ihm_prediction = layers.get_output(ihm_network) self.ihm_det_prediction = layers.get_output(ihm_network, deterministic=True) self.params += layers.get_all_params(ihm_network, trainable=True) self.reg_params += layers.get_all_params(ihm_network, regularizable=True) self.ihm_loss = (self.ihm_mask * categorical_crossentropy(self.ihm_prediction, self.ihm_label)).mean() # length of stay related network # Regression los_network = layers.InputLayer((None, None, dim), input_var=lstm_output) los_network = layers.ReshapeLayer(los_network, (-1, dim)) los_network = layers.DenseLayer(incoming=los_network, num_units=1, nonlinearity=rectify) los_network = layers.ReshapeLayer(los_network, (lstm_output.shape[0], -1)) self.los_prediction = layers.get_output(los_network) self.los_det_prediction = layers.get_output(los_network, deterministic=True) self.params += layers.get_all_params(los_network, trainable=True) self.reg_params += layers.get_all_params(los_network, regularizable=True) self.los_loss = (self.los_mask * squared_error(self.los_prediction, self.los_label)).mean(axis=1).mean(axis=0) # phenotype related network ph_network = layers.InputLayer((None, dim), input_var=last_outputs) ph_network = layers.DenseLayer(incoming=ph_network, num_units=25, nonlinearity=sigmoid) self.ph_prediction = layers.get_output(ph_network) self.ph_det_prediction = layers.get_output(ph_network, deterministic=True) self.params += layers.get_all_params(ph_network, trainable=True) self.reg_params += layers.get_all_params(ph_network, regularizable=True) self.ph_loss = nn_utils.multilabel_loss(self.ph_prediction, self.ph_label) # decompensation related network decomp_network = layers.InputLayer((None, None, dim), input_var=lstm_output) decomp_network = layers.ReshapeLayer(decomp_network, (-1, dim)) decomp_network = layers.DenseLayer(incoming=decomp_network, num_units=2, nonlinearity=softmax) decomp_network = layers.ReshapeLayer(decomp_network, (lstm_output.shape[0], -1, 2)) self.decomp_prediction = layers.get_output(decomp_network)[:, :, 1] self.decomp_det_prediction = layers.get_output(decomp_network, deterministic=True)[:, :, 1] self.params += layers.get_all_params(decomp_network, trainable=True) self.reg_params += layers.get_all_params(decomp_network, regularizable=True) self.decomp_loss = nn_utils.multilabel_loss_with_mask(self.decomp_prediction, self.decomp_label, self.decomp_mask) """ data = next(self.train_batch_gen) print max(data[1]) print lstm_output.eval({self.input_var:data[0]}).shape exit() """ if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params) else: self.loss_l2 = T.constant(0) if self.l1 > 0: self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params) else: self.loss_l1 = T.constant(0) self.reg_loss = self.loss_l1 + self.loss_l2 self.loss = (ihm_C * self.ihm_loss + los_C * self.los_loss + ph_C * self.ph_loss + decomp_C * self.decomp_loss + self.reg_loss) #updates = lasagne.updates.adadelta(self.loss, self.params, # learning_rate=0.001) #updates = lasagne.updates.momentum(self.loss, self.params, # learning_rate=0.00003) #updates = lasagne.updates.adam(self.loss, self.params) updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5, learning_rate=0.0001) # from DCGAN paper #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9, # learning_rate=0.001, all_inputs = [self.input_var, self.input_lens, self.ihm_pos, self.ihm_mask, self.ihm_label, self.los_mask, self.los_label, self.ph_label, self.decomp_mask, self.decomp_label] train_outputs = [self.ihm_prediction, self.los_prediction, self.ph_prediction, self.decomp_prediction, self.loss, self.ihm_loss, self.los_loss, self.ph_loss, self.decomp_loss, self.reg_loss] test_outputs = [self.ihm_det_prediction, self.los_det_prediction, self.ph_det_prediction, self.decomp_det_prediction, self.loss, self.ihm_loss, self.los_loss, self.ph_loss, self.decomp_loss, self.reg_loss] ## compiling theano functions if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=all_inputs, outputs=train_outputs, updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=all_inputs, outputs=test_outputs)
def __init__(self, dim, mode, l2, l1, batch_norm, dropout, batch_size, input_dim=76, **kwargs): print "==> not used params in network class:", kwargs.keys() self.dim = dim self.mode = mode self.l2 = l2 self.l1 = l1 self.batch_norm = batch_norm self.dropout = dropout self.batch_size = batch_size self.input_var = T.tensor3('X') self.input_lens = T.ivector('L') self.target_var = T.ivector('y') self.weight = T.vector('w') print "==> Building neural network" network = layers.InputLayer((None, None, input_dim), input_var=self.input_var) network = layers.LSTMLayer(incoming=network, num_units=dim, only_return_final=False, grad_clipping=10, ingate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) lstm_output = layers.get_output(network) self.params = layers.get_all_params(network, trainable=True) self.reg_params = layers.get_all_params(network, regularizable=True) # for each example in minibatch take the last output last_outputs = [] for index in range(self.batch_size): last_outputs.append(lstm_output[index, self.input_lens[index]-1, :]) last_outputs = T.stack(last_outputs) network = layers.InputLayer(shape=(self.batch_size, self.dim), input_var=last_outputs) network = layers.DenseLayer(incoming=network, num_units=2, nonlinearity=softmax) self.prediction = layers.get_output(network) self.params += layers.get_all_params(network, trainable=True) self.reg_params += layers.get_all_params(network, regularizable=True) self.loss_ce = (self.weight * categorical_crossentropy(self.prediction, self.target_var)).mean() if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params) else: self.loss_l2 = 0 if self.l1 > 0: self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params) else: self.loss_l1 = 0 self.loss = self.loss_ce + self.loss_l2 + self.loss_l1 #updates = lasagne.updates.adadelta(self.loss, self.params, # learning_rate=0.001) #updates = lasagne.updates.momentum(self.loss, self.params, # learning_rate=0.00003) #updates = lasagne.updates.adam(self.loss, self.params) updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5, learning_rate=0.0001) # from DCGAN paper #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9, # learning_rate=0.001, ## compiling theano functions if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.input_lens, self.target_var, self.weight], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.input_lens, self.target_var, self.weight], outputs=[self.prediction, self.loss])
def __init__(self, train_raw, test_raw, dim, mode, l2, l1, batch_norm, dropout, batch_size, **kwargs): print "==> not used params in network class:", kwargs.keys() self.train_raw = train_raw self.test_raw = test_raw self.dim = dim self.mode = mode self.l2 = l2 self.l1 = l1 self.batch_norm = batch_norm self.dropout = dropout self.batch_size = batch_size self.train_batch_gen = self.get_batch_gen(self.train_raw) self.test_batch_gen = self.get_batch_gen(self.test_raw) self.input_var = T.tensor3('X') self.input_lens = T.ivector('L') self.target_var = T.imatrix('y') """ for i in range(700//self.batch_size): ret=next(self.train_batch_gen) print len(ret[0]) print ret[0][0].shape print len(ret[1]) print type(ret[1][0]) print "---" exit() """ print "==> Building neural network" network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), input_var=self.input_var) #print "!!!!!!!!!!! WARNING: dropout on input is disabled !!!!!!!!!!!!!!!!" if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) network = layers.LSTMLayer(incoming=network, num_units=dim, grad_clipping=10, ingate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) network = layers.LSTMLayer(incoming=network, num_units=dim, only_return_final=False, grad_clipping=10, ingate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) lstm_output = layers.get_output(network) self.params = layers.get_all_params(network, trainable=True) self.reg_params = layers.get_all_params(network, regularizable=True) """ data = next(self.train_batch_gen) print max(data[1]) print lstm_output.eval({self.input_var:data[0]}).shape exit() """ # for each example in minibatch take the last output last_outputs = [] for index in range(self.batch_size): last_outputs.append(lstm_output[index, self.input_lens[index]-1, :]) last_outputs = T.stack(last_outputs) """ data = next(self.train_batch_gen) print max(data[1]) print last_outputs.eval({self.input_var:data[0], self.input_lens:data[1], }).shape exit() """ network = layers.InputLayer(shape=(self.batch_size, self.dim), input_var=last_outputs) if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) network = layers.DenseLayer(incoming=network, num_units=train_raw[1][0].shape[0], nonlinearity=sigmoid) self.prediction = layers.get_output(network) self.det_prediction = layers.get_output(network, deterministic=True) self.params += layers.get_all_params(network, trainable=True) self.reg_params += layers.get_all_params(network, regularizable=True) self.loss_multilabel = -(self.target_var * T.log(self.prediction) + \ (1 - self.target_var) * T.log(1 - self.prediction)).mean(axis=1)\ .mean(axis=0) if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params) else: self.loss_l2 = 0 if self.l1 > 0: self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params) else: self.loss_l1 = 0 self.loss = self.loss_multilabel + self.loss_l2 + self.loss_l1 #updates = lasagne.updates.adadelta(self.loss, self.params, # learning_rate=0.001) #updates = lasagne.updates.momentum(self.loss, self.params, # learning_rate=0.00003) #updates = lasagne.updates.adam(self.loss, self.params) updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5, learning_rate=0.0001) # from DCGAN paper #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9, # learning_rate=0.001, ## compiling theano functions if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.input_lens, self.target_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.input_lens, self.target_var], outputs=[self.det_prediction, self.loss])