def __init__(self, train_raw, test_raw, dim, mode, l2, l1,
              batch_norm, dropout, batch_size,
              ihm_C, los_C, ph_C, decomp_C,
              partition, nbins, **kwargs):
             
     print "==> not used params in network class:", kwargs.keys()
     self.train_raw = train_raw
     self.test_raw = test_raw
     
     self.dim = dim
     self.mode = mode
     self.l2 = l2
     self.l1 = l1
     self.batch_norm = batch_norm
     self.dropout = dropout
     self.batch_size = batch_size
     self.ihm_C = ihm_C
     self.los_C = los_C
     self.ph_C = ph_C
     self.decomp_C = decomp_C
     self.nbins = nbins
     
     if (partition == 'log'):
         self.get_bin = metrics.get_bin_log
         self.get_estimate = metrics.get_estimate_log
     else:
         assert self.nbins == 10
         self.get_bin = metrics.get_bin_custom
         self.get_estimate = metrics.get_estimate_custom
     
     self.train_batch_gen = self.get_batch_gen(self.train_raw)
     self.test_batch_gen = self.get_batch_gen(self.test_raw)    
     
     self.input_var = T.tensor3('X')
     self.input_lens = T.ivector('L')
     
     self.ihm_pos = T.ivector('ihm_pos')
     self.ihm_mask = T.ivector('ihm_mask')
     self.ihm_label = T.ivector('ihm_label')
     
     self.los_mask = T.imatrix('los_mask')
     self.los_label = T.matrix('los_label') # for regression
     #self.los_label = T.imatrix('los_label')
     
     self.ph_label = T.imatrix('ph_label')
     
     self.decomp_mask = T.imatrix('decomp_mask')
     self.decomp_label = T.imatrix('decomp_label')
     
     print "==> Building neural network"
     
     # common network
     network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), 
                                 input_var=self.input_var)
     
     if (self.dropout > 0):
         network = layers.DropoutLayer(network, p=self.dropout)
     
     network = layers.LSTMLayer(incoming=network, num_units=dim,
                                only_return_final=False,
                                grad_clipping=10,
                                ingate=lasagne.layers.Gate(
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal(),
                                     W_cell=Normal(0.1)),
                                forgetgate=lasagne.layers.Gate(
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal(),
                                     W_cell=Normal(0.1)),
                                cell=lasagne.layers.Gate(W_cell=None,
                                     nonlinearity=lasagne.nonlinearities.tanh,
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal()),
                                outgate=lasagne.layers.Gate(
                                     W_in=Orthogonal(),
                                     W_hid=Orthogonal(),
                                     W_cell=Normal(0.1)))
     
     if (self.dropout > 0):
         network = layers.DropoutLayer(network, p=self.dropout)
     
     lstm_output = layers.get_output(network)
     self.params = layers.get_all_params(network, trainable=True)
     self.reg_params = layers.get_all_params(network, regularizable=True)
     
     # for each example in minibatch take the last output
     last_outputs = []
     for index in range(self.batch_size):
         last_outputs.append(lstm_output[index, self.input_lens[index]-1, :])
     last_outputs = T.stack(last_outputs)
     
     # take 48h outputs for fixed mortality task
     mid_outputs = []
     for index in range(self.batch_size):
         mid_outputs.append(lstm_output[index, self.ihm_pos[index], :])
     mid_outputs = T.stack(mid_outputs)
     
     
     # in-hospital mortality related network
     ihm_network = layers.InputLayer((None, dim), input_var=mid_outputs)
     ihm_network = layers.DenseLayer(incoming=ihm_network, num_units=2,
                                    nonlinearity=softmax)
     self.ihm_prediction = layers.get_output(ihm_network)
     self.ihm_det_prediction = layers.get_output(ihm_network, deterministic=True)
     self.params += layers.get_all_params(ihm_network, trainable=True)
     self.reg_params += layers.get_all_params(ihm_network, regularizable=True)
     self.ihm_loss = (self.ihm_mask * categorical_crossentropy(self.ihm_prediction, 
                                                       self.ihm_label)).mean()
     
     
     # length of stay related network
     # Regression
     los_network = layers.InputLayer((None, None, dim), input_var=lstm_output)
     los_network = layers.ReshapeLayer(los_network, (-1, dim))
     los_network = layers.DenseLayer(incoming=los_network, num_units=1,
                                     nonlinearity=rectify)
     los_network = layers.ReshapeLayer(los_network, (lstm_output.shape[0], -1))
     self.los_prediction = layers.get_output(los_network)
     self.los_det_prediction = layers.get_output(los_network, deterministic=True)
     self.params += layers.get_all_params(los_network, trainable=True)
     self.reg_params += layers.get_all_params(los_network, regularizable=True)
     self.los_loss = (self.los_mask * squared_error(self.los_prediction,
                                                   self.los_label)).mean(axis=1).mean(axis=0)
     
     
     # phenotype related network
     ph_network = layers.InputLayer((None, dim), input_var=last_outputs)
     ph_network = layers.DenseLayer(incoming=ph_network, num_units=25,
                                    nonlinearity=sigmoid)
     self.ph_prediction = layers.get_output(ph_network)
     self.ph_det_prediction = layers.get_output(ph_network, deterministic=True)
     self.params += layers.get_all_params(ph_network, trainable=True)
     self.reg_params += layers.get_all_params(ph_network, regularizable=True)
     self.ph_loss = nn_utils.multilabel_loss(self.ph_prediction, self.ph_label)
             
     
     # decompensation related network
     decomp_network = layers.InputLayer((None, None, dim), input_var=lstm_output)
     decomp_network = layers.ReshapeLayer(decomp_network, (-1, dim))
     decomp_network = layers.DenseLayer(incoming=decomp_network, num_units=2,
                                    nonlinearity=softmax)
     decomp_network = layers.ReshapeLayer(decomp_network, (lstm_output.shape[0], -1, 2))
     self.decomp_prediction = layers.get_output(decomp_network)[:, :, 1]
     self.decomp_det_prediction = layers.get_output(decomp_network, deterministic=True)[:, :, 1]
     self.params += layers.get_all_params(decomp_network, trainable=True)
     self.reg_params += layers.get_all_params(decomp_network, regularizable=True)
     self.decomp_loss = nn_utils.multilabel_loss_with_mask(self.decomp_prediction,
                                                       self.decomp_label,
                                                       self.decomp_mask)
     
     """
     data = next(self.train_batch_gen)
     print max(data[1])
     print lstm_output.eval({self.input_var:data[0]}).shape
     exit()
     """
     
     
     if self.l2 > 0: 
         self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
     else: 
         self.loss_l2 = T.constant(0)
     
     if self.l1 > 0: 
         self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
     else: 
         self.loss_l1 = T.constant(0)
     
     self.reg_loss = self.loss_l1 + self.loss_l2
     
     self.loss = (ihm_C * self.ihm_loss + los_C * self.los_loss + 
                  ph_C * self.ph_loss + decomp_C * self.decomp_loss + 
                  self.reg_loss)
           
     #updates = lasagne.updates.adadelta(self.loss, self.params,
     #                                    learning_rate=0.001)
     #updates = lasagne.updates.momentum(self.loss, self.params,
     #                                    learning_rate=0.00003)
     #updates = lasagne.updates.adam(self.loss, self.params)
     updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5,
                                    learning_rate=0.0001) # from DCGAN paper
     #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
     #                                             learning_rate=0.001,
     
     all_inputs = [self.input_var, self.input_lens,
                   self.ihm_pos, self.ihm_mask, self.ihm_label,
                   self.los_mask, self.los_label,
                   self.ph_label,
                   self.decomp_mask, self.decomp_label]
     
     train_outputs = [self.ihm_prediction, self.los_prediction,
                      self.ph_prediction, self.decomp_prediction,
                      self.loss,
                      self.ihm_loss, self.los_loss,
                      self.ph_loss, self.decomp_loss,
                      self.reg_loss]
                      
     test_outputs = [self.ihm_det_prediction, self.los_det_prediction,
                     self.ph_det_prediction, self.decomp_det_prediction,
                     self.loss,
                     self.ihm_loss, self.los_loss,
                     self.ph_loss, self.decomp_loss,
                     self.reg_loss]
     
     ## compiling theano functions
     if self.mode == 'train':
         print "==> compiling train_fn"
         self.train_fn = theano.function(inputs=all_inputs,
                                         outputs=train_outputs,
                                         updates=updates)
     
     print "==> compiling test_fn"
     self.test_fn = theano.function(inputs=all_inputs,
                                    outputs=test_outputs)
Exemple #2
0
    def __init__(self, dim, mode, l2, l1, batch_norm, dropout,
                 batch_size, input_dim=76, **kwargs):
                
        print "==> not used params in network class:", kwargs.keys()
        
        self.dim = dim
        self.mode = mode
        self.l2 = l2
        self.l1 = l1
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.batch_size = batch_size
        
        self.input_var = T.tensor3('X')
        self.input_lens = T.ivector('L')
        self.target_var = T.ivector('y')
        self.weight = T.vector('w')
        
        print "==> Building neural network"
        network = layers.InputLayer((None, None, input_dim), 
                                    input_var=self.input_var)
        network = layers.LSTMLayer(incoming=network, num_units=dim,
                                   only_return_final=False,
                                   grad_clipping=10,
                                   ingate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   forgetgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   cell=lasagne.layers.Gate(W_cell=None,
                                        nonlinearity=lasagne.nonlinearities.tanh,
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal()),
                                   outgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
        lstm_output = layers.get_output(network)
        
        self.params = layers.get_all_params(network, trainable=True)
        self.reg_params = layers.get_all_params(network, regularizable=True)
        
        # for each example in minibatch take the last output
        last_outputs = []
        for index in range(self.batch_size):
            last_outputs.append(lstm_output[index, self.input_lens[index]-1, :])
        last_outputs = T.stack(last_outputs)

        network = layers.InputLayer(shape=(self.batch_size, self.dim), 
                                    input_var=last_outputs)
        network = layers.DenseLayer(incoming=network, num_units=2,
                                    nonlinearity=softmax)
        
        self.prediction = layers.get_output(network)
        self.params += layers.get_all_params(network, trainable=True)
        self.reg_params += layers.get_all_params(network, regularizable=True)
        
        self.loss_ce = (self.weight * categorical_crossentropy(self.prediction, 
                                                self.target_var)).mean()
        if self.l2 > 0: 
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
        else: 
            self.loss_l2 = 0
        
        if self.l1 > 0: 
            self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
        else: 
            self.loss_l1 = 0
            
        self.loss = self.loss_ce + self.loss_l2 + self.loss_l1
        
        #updates = lasagne.updates.adadelta(self.loss, self.params,
        #                                    learning_rate=0.001)
        #updates = lasagne.updates.momentum(self.loss, self.params,
        #                                    learning_rate=0.00003)
        #updates = lasagne.updates.adam(self.loss, self.params)
        updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5,
                                       learning_rate=0.0001) # from DCGAN paper
        #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
        #                                             learning_rate=0.001,
        
        ## compiling theano functions
        if self.mode == 'train':
            print "==> compiling train_fn"
            self.train_fn = theano.function(inputs=[self.input_var,
                                                    self.input_lens,
                                                    self.target_var,
                                                    self.weight],
                                            outputs=[self.prediction, self.loss],
                                            updates=updates)
        
        print "==> compiling test_fn"
        self.test_fn = theano.function(inputs=[self.input_var,
                                               self.input_lens,
                                               self.target_var,
                                               self.weight],
                                       outputs=[self.prediction, self.loss])
    def __init__(self, train_raw, test_raw, dim, mode, l2, l1,
                 batch_norm, dropout, batch_size, **kwargs):
                
        print "==> not used params in network class:", kwargs.keys()
        self.train_raw = train_raw
        self.test_raw = test_raw
        
        self.dim = dim
        self.mode = mode
        self.l2 = l2
        self.l1 = l1
        self.batch_norm = batch_norm
        self.dropout = dropout
        self.batch_size = batch_size
        
        self.train_batch_gen = self.get_batch_gen(self.train_raw)
        self.test_batch_gen = self.get_batch_gen(self.test_raw)    
        
        self.input_var = T.tensor3('X')
        self.input_lens = T.ivector('L')
        self.target_var = T.imatrix('y')
        
        """
        for i in range(700//self.batch_size):
            ret=next(self.train_batch_gen)
            print len(ret[0])
            print ret[0][0].shape
            print len(ret[1])
            print type(ret[1][0])
            print "---"
        exit()
        """
                
        print "==> Building neural network"
        network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), 
                                    input_var=self.input_var)
        
        #print "!!!!!!!!!!! WARNING: dropout on input is disabled !!!!!!!!!!!!!!!!"
        if (self.dropout > 0):
            network = layers.DropoutLayer(network, p=self.dropout)

        network = layers.LSTMLayer(incoming=network, num_units=dim,
                                   grad_clipping=10,
                                   ingate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   forgetgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   cell=lasagne.layers.Gate(W_cell=None,
                                        nonlinearity=lasagne.nonlinearities.tanh,
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal()),
                                   outgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
        
        if (self.dropout > 0):
            network = layers.DropoutLayer(network, p=self.dropout)
        
        network = layers.LSTMLayer(incoming=network, num_units=dim,
                                   only_return_final=False,
                                   grad_clipping=10,
                                   ingate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   forgetgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)),
                                   cell=lasagne.layers.Gate(W_cell=None,
                                        nonlinearity=lasagne.nonlinearities.tanh,
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal()),
                                   outgate=lasagne.layers.Gate(
                                        W_in=Orthogonal(),
                                        W_hid=Orthogonal(),
                                        W_cell=Normal(0.1)))
              
        lstm_output = layers.get_output(network)
        self.params = layers.get_all_params(network, trainable=True)
        self.reg_params = layers.get_all_params(network, regularizable=True)
        
        """
        data = next(self.train_batch_gen)
        print max(data[1])
        print lstm_output.eval({self.input_var:data[0]}).shape
        exit()
        """
        
        # for each example in minibatch take the last output
        last_outputs = []
        for index in range(self.batch_size):
            last_outputs.append(lstm_output[index, self.input_lens[index]-1, :])
        last_outputs = T.stack(last_outputs)

        """
        data = next(self.train_batch_gen)
        print max(data[1])
        print last_outputs.eval({self.input_var:data[0],
            self.input_lens:data[1],
        }).shape
        exit()
        """
        
        network = layers.InputLayer(shape=(self.batch_size, self.dim), 
                                    input_var=last_outputs)
        if (self.dropout > 0):
            network = layers.DropoutLayer(network, p=self.dropout)
        network = layers.DenseLayer(incoming=network,
                                    num_units=train_raw[1][0].shape[0],
                                    nonlinearity=sigmoid)
        
        self.prediction = layers.get_output(network)
        self.det_prediction = layers.get_output(network, deterministic=True)
        self.params += layers.get_all_params(network, trainable=True)
        self.reg_params += layers.get_all_params(network, regularizable=True)
        
        self.loss_multilabel = -(self.target_var * T.log(self.prediction) + \
            (1 - self.target_var) * T.log(1 - self.prediction)).mean(axis=1)\
                                                               .mean(axis=0)
        
        if self.l2 > 0: 
            self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params)
        else: 
            self.loss_l2 = 0
        
        if self.l1 > 0: 
            self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params)
        else: 
            self.loss_l1 = 0
            
        self.loss = self.loss_multilabel + self.loss_l2 + self.loss_l1
              
        #updates = lasagne.updates.adadelta(self.loss, self.params,
        #                                    learning_rate=0.001)
        #updates = lasagne.updates.momentum(self.loss, self.params,
        #                                    learning_rate=0.00003)
        #updates = lasagne.updates.adam(self.loss, self.params)
        updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5,
                                       learning_rate=0.0001) # from DCGAN paper
        #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9,
        #                                             learning_rate=0.001,
        
        ## compiling theano functions
        if self.mode == 'train':
            print "==> compiling train_fn"
            self.train_fn = theano.function(inputs=[self.input_var,
                                                    self.input_lens,
                                                    self.target_var],
                                            outputs=[self.prediction, self.loss],
                                            updates=updates)
        
        print "==> compiling test_fn"
        self.test_fn = theano.function(inputs=[self.input_var,
                                               self.input_lens,
                                               self.target_var],
                                       outputs=[self.det_prediction, self.loss])