コード例 #1
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1
        lr_t = self.lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        # zero init of 1st moment
        ms = [K.zeros(shape) for shape in shapes]
        # zero init of exponentially weighted infinity norm
        us = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + us

        for p, g, m, u in zip(params, grads, ms, us):

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            u_t = K.maximum(self.beta_2 * u, K.abs(g))
            p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (u_t + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(u, u_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
コード例 #2
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.get_variable_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        delta_accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators + delta_accumulators
        self.updates = []

        for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)

            new_p = p - get_learing_rate(p,self.lr) * update
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))

            # update delta_accumulator
            new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update)
            self.updates.append(K.update(d_a, new_d_a))
        return self.updates
コード例 #3
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            new_p = p_t
            # apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
コード例 #4
0
ファイル: trainrnn.py プロジェクト: artemyk/ssc
 def get_output(self, train=False):
     x = self.get_input(train)
     x_shape = K.shape(x)
     #stacked = K.concatenate( [K.reshape(x, (x_shape[0], x_shape[1], 1)), K.zeros( (x_shape[0], x_shape[1], self.n-1) )], axis=2 )
     K.zeros( (x_shape[0], x_shape[1], self.n-1) )
     #stacked = K.concatenate( [K.reshape(x, (x_shape[0], x_shape[1], 1)), K.zeros( (x_shape[0], x_shape[1], self.n-1) )], axis=2 )
     return stacked.dimshuffle((0, 2, 1))
コード例 #5
0
    def reset_states(self):
        assert self.stateful, 'Layer must be stateful.'
        input_shape = self.input_shape
        if not input_shape[0]:
            raise Exception('If a RNN is stateful, a complete ' +
                            'input_shape must be provided ' +
                            '(including batch size).')

        if self.return_sequences:
            out_row, out_col, out_filter = self.output_shape[2:]
        else:
            out_row, out_col, out_filter = self.output_shape[1:]

        if hasattr(self, 'states'):
            K.set_value(self.states[0],
                        np.zeros((input_shape[0],
                                  out_row, out_col, out_filter)))
            K.set_value(self.states[1],
                        np.zeros((input_shape[0],
                                  out_row, out_col, out_filter)))
        else:
            self.states = [K.zeros((input_shape[0],
                                    out_row, out_col, out_filter)),
                           K.zeros((input_shape[0],
                                    out_row, out_col, out_filter))]
コード例 #6
0
ファイル: recurrent.py プロジェクト: lxastro/lxnn
    def build(self):
        f_init = self.get_function('init')
        f_inner_init = self.get_function('inner_init')
        f_forget_bias_init = self.get_function('forget_bias_init')
         
        # numpy matrixes
        W_i = f_init((self.input_dim, self.output_dim), name=self.name + '_W_i').get_value()
        U_i = f_inner_init((self.output_dim, self.output_dim), name=self.name + '_U_i').get_value()
        b_i = K.zeros((self.output_dim,), name=self.name + '_b_i').get_value()

        W_f = f_init((self.input_dim, self.output_dim), name=self.name + '_W_f').get_value()
        U_f = f_inner_init((self.output_dim, self.output_dim), name=self.name + '_U_f').get_value()
        b_f = f_forget_bias_init((self.output_dim,), name=self.name + '_b_f').get_value()

        W_c = f_init((self.input_dim, self.output_dim), name=self.name + '_W_c').get_value()
        U_c = f_inner_init((self.output_dim, self.output_dim), name=self.name + '_U_c').get_value()
        b_c = K.zeros((self.output_dim,), name=self.name + '_b_c').get_value()

        W_o = f_init((self.input_dim, self.output_dim), name=self.name + '_W_o').get_value()
        U_o = f_inner_init((self.output_dim, self.output_dim), name=self.name + '_U_o').get_value()
        b_o = K.zeros((self.output_dim,), name=self.name + '_b_o').get_value()
        
        # theano variables
        self.W = theano.shared(numpy.concatenate([W_i, W_f, W_c, W_o], axis=1), name=self.name + '_W' , strict=False)
        self.U = theano.shared(numpy.concatenate([U_i, U_f, U_c, U_o], axis=1), name=self.name + '_U' , strict=False)
        self.b = theano.shared(numpy.concatenate([b_i, b_f, b_c, b_o]), name=self.name + '_b' , strict=False)
        self.params = [self.W, self.U, self.b]
コード例 #7
0
ファイル: cw.py プロジェクト: nadvornix/recurrent-nets
    def reset_states(self):
        assert self.stateful, 'Layer must be stateful.'


        input_shape = self.input_shape
        
        (batch_size, tsteps, xsize) = input_shape

        if not input_shape[0]:
            raise Exception('If a RNN is stateful, a complete ' +
                            'input_shape must be provided ' +
                            '(including batch size).')
        if hasattr(self, 'states'):
            K.set_value(self.states[0],
                        np.zeros((batch_size, self.output_dim)))
            K.set_value(self.states[1],
                        np.zeros((1), dtype="i"))
            K.set_value(self.states[0],
                        np.zeros((batch_size, self.output_dim)))

        else:
            self.states = [K.zeros((batch_size, self.output_dim), name="stateA"),
                        # K.variable(0),
                        # theano.shared(0),
                        K.zeros((1), name="stateB", dtype="int32"),
                        K.zeros((batch_size, self.output_dim), name="stateC"),
                        ]
コード例 #8
0
    def build(self):
        self.W1 = self.init((self.input_dim, self.n_classes), name='{}_W1'.format(self.name))
        self.b1 = K.zeros((self.n_classes,),  name='{}_b1'.format(self.name))
        self.W2 = self.init((self.n_classes, self.input_dim, self.n_outputs_per_class), name='{}_W2'.format(self.name))
        self.b2 = K.zeros((self.n_classes, self.n_outputs_per_class),  name='{}_b2'.format(self.name))

        self.trainable_weights = [self.W1, self.b1, self.W2, self.b2]
コード例 #9
0
ファイル: conv_lstm.py プロジェクト: dblN/misc
    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        nb_input_channel = input_shape[2]

        W_shape = (self.nb_filter, nb_input_channel, self.nb_row, self.nb_col)
        U_shape = (self.nb_filter, self.nb_filter, self.nb_row, self.nb_col)
        C_shape = (self.nb_filter, input_shape[3], input_shape[4])
        b_shape = (self.nb_filter,)

        self.W_i = self.init(W_shape, name="{}_W_i".format(self.name))
        self.U_i = self.inner_init(U_shape, name="{}_U_i".format(self.name))
        self.C_i = self.inner_init(C_shape, name="{}_C_i".format(self.name))
        self.b_i = K.zeros(b_shape, name="{}_b_i".format(self.name))

        self.W_f = self.init(W_shape, name="{}_W_f".format(self.name))
        self.U_f = self.inner_init(U_shape, name="{}_U_f".format(self.name))
        self.C_f = self.inner_init(C_shape, name="{}_C_f".format(self.name))
        self.b_f = self.forget_bias_init(b_shape, name="{}_b_f".format(self.name))

        self.W_c = self.init(W_shape, name="{}_W_c".format(self.name))
        self.U_c = self.inner_init(U_shape, name="{}_U_c".format(self.name))
        self.b_c = K.zeros(b_shape, name="{}_b_c".format(self.name))

        self.W_o = self.init(W_shape, name="{}_W_o".format(self.name))
        self.U_o = self.inner_init(U_shape, name="{}_U_o".format(self.name))
        self.C_o = self.inner_init(C_shape, name="{}_C_o".format(self.name))
        self.b_o = K.zeros(b_shape, name="{}_b_o".format(self.name))

        self.trainable_weights = [self.W_i, self.U_i, self.b_i,
                                  self.W_c, self.U_c, self.b_c,
                                  self.W_f, self.U_f, self.b_f,
                                  self.W_o, self.U_o, self.b_o,
                                  self.C_i, self.C_f, self.C_o]
コード例 #10
0
ファイル: layers.py プロジェクト: Libardo1/modeling
    def build(self):
        #print('self.input_shape', self.input_shape)
        n_features = self.input_shape[1]

        self.W1 = self.init((n_features, self.nb_hsm_classes))
        self.b1 = K.zeros((self.nb_hsm_classes,))

        self.W2 = self.init((self.nb_hsm_classes, n_features, self.nb_outputs_per_class))
        self.b2 = K.zeros((self.nb_hsm_classes, self.nb_outputs_per_class))

        self.trainable_weights = [self.W1, self.b1,
                self.W2, self.b2]
        
        self.regularizers = []
        if self.W1_regularizer:
            self.W1_regularizer.set_param(self.W1)
            self.regularizers.append(self.W1_regularizer)
        
        if self.b1_regularizer:
            self.b1_regularizer.set_param(self.b1)
            self.regularizers.append(self.b1_regularizer)

        if self.W2_regularizer:
            self.W2_regularizer.set_param(self.W2)
            self.regularizers.append(self.W2_regularizer)
        
        if self.b2_regularizer:
            self.b2_regularizer.set_param(self.b2)
            self.regularizers.append(self.b2_regularizer)
コード例 #11
0
ファイル: layers.py プロジェクト: commaai/research
  def build(self, input_shape):
      self.input_spec = [InputSpec(shape=input_shape)]
      if self.stateful:
          self.reset_states()
      else:
          # initial states: all-zero tensor of shape (output_dim)
          self.states = [None]
      input_dim = input_shape[2]
      self.input_dim = input_dim

      self.V = self.init((self.output_dim, input_dim-self.control_dim),
                         name='{}_V'.format(self.name))
      self.W = self.init((input_dim, self.output_dim),
                         name='{}_W'.format(self.name))
      self.U = self.inner_init((self.output_dim, self.output_dim),
                               name='{}_U'.format(self.name))
      self.b = K.zeros((self.output_dim,), name='{}_b'.format(self.name))
      self.ext_b = K.zeros((input_dim-self.control_dim,), name='{}_ext_b'.format(self.name))

      self.regularizers = []
      if self.W_regularizer:
          self.W_regularizer.set_param(self.W)
          self.regularizers.append(self.W_regularizer)
      if self.U_regularizer:
          self.U_regularizer.set_param(self.U)
          self.regularizers.append(self.U_regularizer)
      if self.b_regularizer:
          self.b_regularizer.set_param(self.b)
          self.regularizers.append(self.b_regularizer)

      self.trainable_weights = [self.W, self.U, self.b, self.V, self.ext_b]

      if self.initial_weights is not None:
          self.set_weights(self.initial_weights)
          del self.initial_weights
コード例 #12
0
    def build(self, input_shape):
        assert len(input_shape) >= 3
        self.input_spec = [InputSpec(shape=input_shape)]

        if not self.layer.built:
            self.layer.build(input_shape)
            self.layer.built = True

        super(AttentionLSTMWrapper, self).build()

        if hasattr(self.attention_vec, '_keras_shape'):
            attention_dim = self.attention_vec._keras_shape[1]
        else:
            raise Exception('Layer could not be build: No information about expected input shape.')

        self.U_a = self.layer.inner_init((self.layer.output_dim, self.layer.output_dim), name='{}_U_a'.format(self.name))
        self.b_a = K.zeros((self.layer.output_dim,), name='{}_b_a'.format(self.name))

        self.U_m = self.layer.inner_init((attention_dim, self.layer.output_dim), name='{}_U_m'.format(self.name))
        self.b_m = K.zeros((self.layer.output_dim,), name='{}_b_m'.format(self.name))

        if self.single_attention_param:
            self.U_s = self.layer.inner_init((self.layer.output_dim, 1), name='{}_U_s'.format(self.name))
            self.b_s = K.zeros((1,), name='{}_b_s'.format(self.name))
        else:
            self.U_s = self.layer.inner_init((self.layer.output_dim, self.layer.output_dim), name='{}_U_s'.format(self.name))
            self.b_s = K.zeros((self.layer.output_dim,), name='{}_b_s'.format(self.name))

        self.trainable_weights = [self.U_a, self.U_m, self.U_s, self.b_a, self.b_m, self.b_s]
コード例 #13
0
    def build(self, input_shape):

        super(AttentionLSTM, self).build(input_shape)

        if hasattr(self.attention_vec, '_keras_shape'):
            attention_dim = self.attention_vec._keras_shape[1]
        else:
            raise Exception('Layer could not be build: No information about expected input shape.')
        self.U_a = self.inner_init((self.output_dim, self.output_dim),
                                   name='{}_U_a'.format(self.name))
        self.b_a = K.zeros((self.output_dim,), name='{}_b_a'.format(self.name))

        # U_m is the weight corresponding to image feature
        self.U_m = self.inner_init((attention_dim, self.output_dim),
                                   name='{}_U_m'.format(self.name))

        # b_m is the bias for the MLP in the calculation of tau
        self.b_m = K.zeros((self.output_dim,), name='{}_b_m'.format(self.name))

        if self.single_attention_param:
            self.U_s = self.inner_init((self.output_dim, 1),
                                       name='{}_U_s'.format(self.name))
            self.b_s = K.zeros((1,), name='{}_b_s'.format(self.name))
        else:
            self.U_s = self.inner_init((self.output_dim, self.output_dim),
                                       name='{}_U_s'.format(self.name))
            self.b_s = K.zeros((self.output_dim,), name='{}_b_s'.format(self.name))

        self.trainable_weights += [self.U_a, self.U_m, self.U_s, self.b_a, self.b_m, self.b_s]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #14
0
    def build(self, input_shape):
        
        self.input_spec = [InputSpec(shape=shape) for shape in input_shape]
        input_dim =  self.input_spec[0].shape[-1]
        self.W1 = self.init((input_dim, self.n_classes), name='{}_W1'.format(self.name))
        self.b1 = K.zeros((self.n_classes,),  name='{}_b1'.format(self.name))
        self.W2 = self.init((self.n_classes, input_dim, self.n_outputs_per_class), name='{}_W2'.format(self.name))
        self.b2 = K.zeros((self.n_classes, self.n_outputs_per_class),  name='{}_b2'.format(self.name))

        self.trainable_weights = [self.W1, self.b1, self.W2, self.b2]
コード例 #15
0
ファイル: decoders.py プロジェクト: AppliedML/seq2seq
 def build(self):
     super(AttentionDecoder, self).build()
     dim = self.input_dim
     hdim = self.hidden_dim
     self.input_length = self.input_shape[-2]
     if not self.input_length:
         raise Exception ('AttentionDecoder requires input_length.')
     self.W_h = self.init((dim, hdim))
     self.b_h = K.zeros((hdim, ))
     self.W_a = self.init((hdim, 1))
     self.b_a = K.zeros((1,))
     self.trainable_weights += [self.W_a, self.b_a, self.W_h, self.b_h]
コード例 #16
0
ファイル: keras_layers.py プロジェクト: deepchem/deepchem
    def build(self, input_shape):
        """Initializes trainable weights."""
        x_input_shape, xp_input_shape = input_shape  # Unpack

        n_feat = xp_input_shape[1]

        self.lstm = LSTMStep(n_feat)
        self.q_init = K.zeros([self.n_test, n_feat])
        self.r_init = K.zeros([self.n_test, n_feat])
        self.states_init = self.lstm.get_initial_states([self.n_test, n_feat])

        self.trainable_weights = [self.q_init, self.r_init]
コード例 #17
0
ファイル: temp.py プロジェクト: EderSantana/seya
    def build(self):
        input_dim = self.input_shape[1]

        self.W_mean = self.init((input_dim, self.output_dim))
        self.b_mean = K.zeros((self.output_dim,))
        self.W_logsigma = self.init((input_dim, self.output_dim))
        self.b_logsigma = K.zeros((self.output_dim,))

        self.trainable_weights = [self.W_mean, self.b_mean, self.W_logsigma, self.b_logsigma]

        self.regularizers = []
        reg = self.get_variational_regularization(self.get_input())
        self.regularizers.append(reg)
コード例 #18
0
ファイル: cw.py プロジェクト: nadvornix/recurrent-nets
    def build(self):
        input_shape = self.input_shape
        input_dim = input_shape[2] # = |x| # works only for stateful? (todo: try)
        self.input_dim = input_dim
        self.input = K.placeholder(input_shape)
        
        # from IPython import embed; embed()

        # output dim = |c| = |h| = |output|
        # input dim = |x|

        if self.stateful:
            self.reset_states()
        else:
            # initial states: 2 all-zero tensor of shape (output_dim)
            self.states = [None, None,None]

        # input_dim x output_dim
        # output dim = 50 = |h|?

        input_dim = self.input_dim
        output_dim = self.output_dim

        n = self.output_dim // len(self.periods)
        
        mask = np.zeros((self.output_dim, self.output_dim))
        period = np.zeros((self.output_dim, ), 'i')

        for i, T in enumerate(self.periods):
            mask[i*n:(i+1)*n, i*n:] = 1
            period[i*n:(i+1)*n] = T

        # from IPython import embed; embed()
        self.mask = K.zeros((self.output_dim, self.output_dim))
        self.period = K.zeros((self.output_dim, ), 'i')

        K.set_value(self.mask, mask)
        K.set_value(self.period, period)

        ## todo: mask & period are shared
        # n: K.zeros is shared by default (?)

        self.hh = self.init((self.output_dim, self.output_dim))
        self.xh = self.init((self.input_dim, self.output_dim))
        self.b = K.zeros((self.output_dim,), name="b")

        self.trainable_weights = [self.hh, self.xh, self.b]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #19
0
ファイル: stack.py プロジェクト: dytmas/seya
    def get_initial_states(self, X):

        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
コード例 #20
0
ファイル: variational.py プロジェクト: jfsantos/seya
    def build(self):
        input_dim = self.input_shape[1]

        self.W_mean = self.init((input_dim, self.output_dim))
        self.b_mean = K.zeros((self.output_dim,))
        self.W_logsigma = self.init((input_dim, self.output_dim))
        self.b_logsigma = K.zeros((self.output_dim,))

        self.params = [self.W_mean, self.b_mean, self.W_logsigma,
                       self.b_logsigma]

        self.regularizers = []
        mean, logsigma = self.get_mean_logsigma()
        self.regularizers.append(GaussianKL(mean, logsigma))
コード例 #21
0
ファイル: rtn.py プロジェクト: braingineer/ikelos
 def reset_states(self):
     assert self.stateful, 'Layer must be stateful.'
     input_shape = self.input_spec[0].shape
     if not input_shape[0]:
         raise Exception('If a RNN is stateful, a complete ' +
                         'input_shape must be provided (including batch size).')
     if hasattr(self, 'states'):
         K.set_value(self.states[0],
                     np.zeros((input_shape[0], self.output_dim)))
         K.set_value(self.states[1],
                     np.zeros((input_shapes[1], input_shape[0], self.output_dim)))
     else:
         self.states = [K.zeros((input_shape[0], self.output_dim)),
                        K.zeros((input_shapes[1], input_shape[0], self.output_dim))]
コード例 #22
0
ファイル: Layers.py プロジェクト: shiretzet/PixelRNN
    def build(self, input_shape):
        # Input shape :: (samples, channels, height, width)

        self.input_spec = [InputSpec(shape=input_shape)]
        if self.direction == 'Down':
            dims = self.input_spec[0].shape
            self.shuffeled_dims = (dims[0], dims[3], dims[1], dims[2])
        elif self.direction == 'Right':
            dims = self.input_spec[0].shape
            self.shuffeled_dims = (dims[0], dims[2], dims[1], dims[3])
        else:
            raise Exception('ERROR: Unknown direction')

        input_dim = self.shuffeled_dims[2]
        self.input_dim = input_dim
        self.Shape = (4*self.nb_filter, input_dim, 1, 1)
        self.Shape1 = (4*self.nb_filter, self.nb_filter, 2, 1)
        self.Shape2 = (self.nb_filter, self.shuffeled_dims[3])

        self.W_iof = self.init(self.Shape)
        self.U_iof = self.init(self.Shape1)
        self.b_iof = K.zeros((4*self.nb_filter,))

        self.init_h = K.zeros(self.Shape2)
        self.init_c = K.zeros(self.Shape2)

        if self.stateful:
            self.reset_states()
        else:
            self.states = [None, None]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W_iof)
            self.regularizers.append(self.W_regularizer)
        if self.U_regularizer:
            self.U_regularizer.set_param(self.U_iof)
            self.regularizers.append(self.U_regularizer)
        if self.b_regularizer:
            self.b_regularizer.set_param(self.b_iof)
            self.regularizers.append(self.b_regularizer)

        self.trainable_weights = [self.W_iof, self.U_iof, self.b_iof,
                                  self.init_h, self.init_c]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #23
0
ファイル: lstm.py プロジェクト: nadvornix/recurrent-nets
    def build(self):
        input_shape = self.input_shape
        input_dim = input_shape[2] # = |x| # works only for stateful? (todo: try)
        self.input_dim = input_dim
        self.input = K.placeholder(input_shape)
        

        # output dim = |c| = |h| = |output|
        # input dim = |x|

        if self.stateful:
            self.reset_states()
        else:
            # initial states: 2 all-zero tensor of shape (output_dim)
            self.states = [None, None]

        # input_dim x output_dim
        # output dim = 50 = |h|?

        input_dim = self.input_dim
        output_dim = self.output_dim

        self.W_fx = self.init((input_dim, output_dim))
        self.W_fh = self.inner_init((output_dim, output_dim))
        self.b_f = self.forget_bias_init((output_dim, ))

        self.W_ix = self.init((input_dim, output_dim))
        self.W_ih = self.inner_init((output_dim, output_dim))
        self.b_i = K.zeros((output_dim, ))
        
        self.W_cx = self.init((input_dim, output_dim))
        self.W_ch = self.inner_init((output_dim, output_dim))
        self.b_c = K.zeros((output_dim, ))
        
        self.W_ox = self.init((input_dim, output_dim))
        self.W_oh = self.inner_init((output_dim, output_dim))
        self.b_o = K.zeros((output_dim, ))

        self.trainable_weights = [self.W_fx, self.W_fh, self.b_f,
            self.W_ix, self.W_ih, self.b_i,
            self.W_cx, self.W_ch, self.b_c,
            self.W_ox, self.W_oh, self.b_o,
        ]


        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #24
0
ファイル: layers.py プロジェクト: mwalton/deep-q-learning
    def build(self,input_shape):
        self.W = self._conv_layer.W
        if self.dim_ordering == 'th':
            self.W_shape = (self.nb_out_channels, self.nb_filter, self.nb_row, self.nb_col)
        elif self.dim_ordering == 'tf':
            self.W_shape = (self.nb_row, self.nb_col, self.nb_out_channels, self.nb_filter)
        else:
            raise Exception('Invalid dim_ordering: ' + self.dim_ordering)

        self.b = K.zeros((self.nb_out_channels,))
        self.params = [self.b]
        self.regularizers = []

        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #25
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            if p.name in self.lr_mult:
                multiplied_lr = lr * self.lr_mult[p.name]
            else:
                multiplied_lr = lr

            v = self.momentum * m - multiplied_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - multiplied_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
コード例 #26
0
ファイル: decoders.py プロジェクト: AppliedML/seq2seq
 def get_initial_states(self, X):
     # build an all-zero tensor of shape (samples, hidden_dim)
     initial_state = K.zeros_like(X)  # (samples, input_dim)
     reducer = K.zeros((self.input_dim, self.hidden_dim))
     initial_state = K.dot(initial_state, reducer)  # (samples, hidden_dim)
     initial_states = [initial_state for _ in range(len(self.states))]
     return initial_states
コード例 #27
0
    def __init__(self, input_dim, output_dim,
                 init='glorot_uniform', activation='linear', weights=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        '''
        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
        self.constraints = [self.W_constraint, self.b_constraint]

        self.initial_weights = weights
        '''
        
        #super(TimeDistributedDense, self).__init__(**kwargs)

    #def build(self):
        

        self.W = self.init((self.input_dim, self.output_dim))
        self.b = K.zeros((self.output_dim,))

        self.params = [self.W, self.b]
        '''
コード例 #28
0
ファイル: local.py プロジェクト: Abhipray/keras
    def build(self, input_shape):
        input_dim = input_shape[2]
        _, output_length, nb_filter = self.get_output_shape_for(input_shape)

        self.W_shape = (output_length, self.filter_length * input_dim, nb_filter)
        self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
        if self.bias:
            self.b = K.zeros((output_length, self.nb_filter), name='{}_b'.format(self.name))
            self.trainable_weights = [self.W, self.b]
        else:
            self.trainable_weights = [self.W]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)
        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)
        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        self.constraints = {}
        if self.W_constraint:
            self.constraints[self.W] = self.W_constraint
        if self.b_constraint:
            self.constraints[self.b] = self.b_constraint

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #29
0
ファイル: CustomDense.py プロジェクト: wosel/SAN
    def build(self):
        input_dim = (self.input_shape[1], self.input_shape[2])
        self.W = self.init((self.output_dim[0], input_dim[0]))

        self.b = K.zeros((self.output_dim[0], self.output_dim[1]))

        if self.has_bias:
            print("training bias unit as well")
            self.trainable_weights = [self.W, self.b]
            self.params = [self.W, self.b]
        else:
            self.trainable_weights = [self.W]
            self.params = [self.W]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        #if self.b_regularizer:
        #    self.b_regularizer.set_param(self.b)
        #    self.regularizers.append(self.b_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #30
0
ファイル: deconv.py プロジェクト: berleon/deepdecoder
 def build(self, input_shape):
     stack_size = input_shape[1]
     self.W_shape = (stack_size, self.nb_filter, self.nb_row, self.nb_col)
     w = self.init(self.W_shape)
     self.W = K.variable(K.get_value(w).reshape(self.W_shape))
     self.b = K.zeros((self.nb_filter,))
     self._trainable_weights = [self.W, self.b]
コード例 #31
0
def EnrichedLSTM(sparse_size,
                 vocab_size,
                 max_length,
                 method='init',
                 embedding_size=200,
                 embeddings_dropout=0.2,
                 hidden_size=100,
                 recurrent_dropout=0.0,
                 output_size=2,
                 trainable_records=True,
                 encoding_layer=None):
    # Reading the sparse version of the EHR variables
    input_record = Input(shape=(sparse_size, ), name='ehr_input')

    # Embedding the EHR variables, optionally with pretrained weights
    if encoding_layer != None:
        ae_weights = encoding_layer.get_weights()
        record_embedding_layer = Dense(units=embedding_size,
                                       weights=ae_weights,
                                       trainable=trainable_records,
                                       name='ehr_embedding')
    else:
        record_embedding_layer = Dense(units=embedding_size,
                                       trainable=trainable_records,
                                       name='ehr_embedding')
    embedded_record = record_embedding_layer(input_record)

    # Building an embedding layer for the free text in the record
    input_text = Input(shape=(max_length, ), name='text_input')
    embedding_layer = Embedding(input_dim=vocab_size,
                                output_dim=embedding_size,
                                mask_zero=True,
                                name='text_embedding')
    text_embedding = embedding_layer(input_text)

    # Setting the activation for the final layer
    if output_size == 1:
        activation = 'sigmoid'
    else:
        activation = 'softmax'

    # Setting up the RNN
    rnn = LSTM(units=hidden_size,
               dropout=embeddings_dropout,
               recurrent_dropout=recurrent_dropout,
               return_sequences=False,
               return_state=True,
               name='rnn')

    # First option: pass the record as the initial state for the RNN
    if method == 'init':
        # Reshaping the record embedding
        reshaped_record = Reshape((1, embedding_size))(embedded_record)

        # Zero state for the RNN layer
        batch_size = K.shape(input_record)[0]
        zero_state = [
            K.zeros((batch_size, hidden_size)),
            K.zeros((batch_size, hidden_size))
        ]

        # Running the record through the RNN first, and then the text
        rec_out, rec_h, rec_c = rnn(reshaped_record, initial_state=zero_state)
        pre_dense, _, _ = rnn(text_embedding, initial_state=[rec_h, rec_c])

    # Second option: concat the RNN output and the record before softmax
    elif method == 'post':
        rnn_output, _, _ = rnn(text_embedding)
        pre_dense = concatenate([embedded_record, rnn_output])

    # Third option: concat the word embeddings and the (repeated) record emb.
    elif method == 'word':
        repeated_record = RepeatVector(max_length)(embedded_record)
        text_embedding = concatenate([text_embedding, repeated_record], 2)
        pre_dense, _, _ = rnn(text_embedding)

    # Adding the final dense layer
    print(pre_dense.shape)
    output = Dense(units=output_size, activation=activation)(pre_dense)

    # Putting everything together
    model = Model([input_record, input_text], output)
    return model
コード例 #32
0
 def fn(x, L_acc, LT_acc):
     x_ = K.zeros((self.nb_actions, self.nb_actions))
     x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x)
     diag = K.exp(T.diag(x_)) + K.epsilon()
     x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag)
     return x_, x_.T
    def get_updates(self, loss1, loss2, loss3, loss4, loss5, loss6, params):
        grads1 = self.get_gradients(loss1, params)
        grads2 = self.get_gradients(loss2, params)

        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))
        c1 = self.descent_weight1
        c2 = self.descent_weight2
        ## for split and not multi specify the splitted weighting 
        c11 = c1 # for CE dense 
        c21 = c2 # for l1 dense 
        c12 = 1  # for CE conv 
        c22 = 4e-1 # for l2 dense 

        if self.multi and not self.split: # calculate weighting for the loss functions given (should be default)

            zero = K.variable(0, name='zero')
            one = K.variable(1, name='one')

            flattenedList1 = [K.flatten(x) for x in grads1]
            gradients1 = K.concatenate(flattenedList1)
            flattenedList2 = [K.flatten(x) for x in grads2]
            gradients2 = K.concatenate(flattenedList2)

            grad21 = gradients2 - gradients1
            grad12 = gradients1 - gradients2
            z1 = K.sum(grad21 * gradients2)
            z2 = K.sum(grad12 * gradients1)
            n = K.sum(grad21 * grad21)

            cm1 = z1 / n
            c1 = K.switch(K.equal(K.all(K.equal(gradients1, gradients2)), K.constant(True, dtype=bool)),
                          lambda: one, lambda: cm1)
            cm2 = z2 / n
            c2 = K.switch(K.equal(K.all(K.equal(gradients1, gradients2)), K.constant(True, dtype=bool)),
                          lambda: zero, lambda: cm2)
           
            (c1, c2) = K.switch(c1 < 0, lambda: (zero, one), lambda: (c1, c2))
            (c2, c1) = K.switch(c2 < 0, lambda: (zero, one), lambda: (c2, c1))

        if self.split and self.multi: # calculate weighting for the loss1 given but split in conv/dense and use different loss2 (namely split loss 2 in loss5 and loss6)
            zero = K.variable(0, name='zero')
            one = K.variable(1, name='one')

            grads5 = self.get_gradients(loss5, params) # l1 loss dense 
            grads6= self.get_gradients(loss6, params) # l2 loss conv 

            flattenedList1 = [K.flatten(x) for x in grads1]
            gradients1 = K.concatenate(flattenedList1)
            flattenedList5 = [K.flatten(x) for x in grads5]
            gradients5 = K.concatenate(flattenedList5)
            flattenedList6 = [K.flatten(x) for x in grads6]
            gradients6 = K.concatenate(flattenedList6)

            grad51 = gradients5 - gradients1
            grad15 = gradients1 - gradients5
            z1 = K.sum(grad51 * gradients5)
            z2 = K.sum(grad15 * gradients1)
            n = K.sum(grad51 * grad51)

            cm1 = z1 / n
            c11 = K.switch(K.equal(K.all(K.equal(gradients1, gradients5)), K.constant(True, dtype=bool)),
                          lambda: one, lambda: cm1)
            cm2 = z2 / n
            c21 = K.switch(K.equal(K.all(K.equal(gradients1, gradients5)), K.constant(True, dtype =bool)),lambda: zero, lambda: cm2)

            (c11, c21) = K.switch(c11 < 0, lambda: (zero, one), lambda: (c11, c21))
            (c21, c11) = K.switch(c21 < 0, lambda: (zero, one), lambda: (c21, c11))

            grad61 = gradients6 - gradients1
            grad16 = gradients1 - gradients6
            z1 = K.sum(grad61 * gradients6)
            z2 = K.sum(grad16 * gradients1)
            n = K.sum(grad61 * grad61)

            cm1 = z1 / n
            c12 = K.switch(K.equal(K.all(K.equal(gradients1, gradients6)), K.constant(True, dtype=bool)),
                            lambda: one, lambda: cm1) # for CE conv
            cm2 = z2 / n 
            c22 = K.switch(K.equal(K.all(K.equal(gradients1, gradients6)), K.constant(True, dtype =bool)),
                            lambda: zero, lambda: cm2) # for l2 conv

            (c12, c22) = K.switch(c12 < 0, lambda: (zero, one), lambda: (c12, c22))
            (c22, c12) = K.switch(c22 < 0, lambda: (zero, one), lambda: (c22, c12))

            c1= c11 # for CE dense 
            c2= c21 # for l1 dense 

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms1 = [K.zeros(K.int_shape(p),
              dtype=K.dtype(p),
              name='m_' + str(i))
              for (i, p) in enumerate(params)]
        ms2 = [K.zeros(K.int_shape(p),
                      dtype=K.dtype(p),
                      name='m_' + str(i))
              for (i, p) in enumerate(params)]
        ms6 = [K.zeros(K.int_shape(p),
                      dtype=K.dtype(p),
                      name='m_' + str(i))
              for (i, p) in enumerate(params)]
        vs1 = [K.zeros(K.int_shape(p),
              dtype=K.dtype(p),
              name='v_' + str(i))
              for (i, p) in enumerate(params)]
        vs2 = [K.zeros(K.int_shape(p),
                      dtype=K.dtype(p),
                      name='v_' + str(i))
              for (i, p) in enumerate(params)]
        vs6 = [K.zeros(K.int_shape(p),
                      dtype=K.dtype(p),
                      name='v_' + str(i))
              for (i, p) in enumerate(params)]

        if self.amsgrad:
            vhats1 = [K.zeros(K.int_shape(p),
                     dtype=K.dtype(p),
                     name='vhat_' + str(i))
                     for (i, p) in enumerate(params)]
            vhats2 = [K.zeros(K.int_shape(p),
                            dtype=K.dtype(p),
                            name='vhat_' + str(i))
                    for (i, p) in enumerate(params)]
            vhats6 = [K.zeros(K.int_shape(p),
                            dtype=K.dtype(p),
                            name='vhat_' + str(i))
                    for (i, p) in enumerate(params)]
        else:
            vhats1 = [K.zeros(1, name='vhat_' + str(i))
                     for i in range(len(params))]
            vhats2 = [K.zeros(1, name='vhat_' + str(i))
                      for i in range(len(params))]
            vhats6 = [K.zeros(1, name='vhat_' + str(i))
                      for i in range(len(params))]
        self.weights = [self.iterations] + ms1 + vs1 + vhats1
        if not self.split:  #grads1,2
            for p, g1,g2, m1,v1, vhat1,m2,v2, vhat2 in zip(params, grads1, grads2, ms1, vs1, vhats1, ms2, vs2, vhats2):

                m_t1 = (self.beta_1 * m1) + (1. - self.beta_1) * g1
                m_t2 = (self.beta_1 * m2) + (1. - self.beta_1) * g2
                v_t1 = (self.beta_2 * v1) + (1. - self.beta_2) * K.square(g1)
                v_t2 = (self.beta_2 * v2) + (1. - self.beta_2) * K.square(g2)

                if self.amsgrad:
                    vhat_t1 = K.maximum(vhat1, v_t1)
                    vhat_t2= K.maximum(vhat2, v_t2)
                    p_t = p - lr_t * (c1*(m_t1 / (K.sqrt(vhat_t1) + self.epsilon))+c2*(m_t2 / (K.sqrt(vhat_t2) + self.epsilon)))
                    self.updates.append(K.update(vhat1, vhat_t1))
                    self.updates.append(K.update(vhat2, vhat_t2))
                else:
                    p_t = p - lr_t * (c1*(m_t1 / (K.sqrt(v_t1) + self.epsilon))+c2*(m_t2 / (K.sqrt(v_t2) + self.epsilon)))

                self.updates.append(K.update(m1, m_t1))
                self.updates.append(K.update(m2, m_t2))
                self.updates.append(K.update(v1, v_t1))
                self.updates.append(K.update(v2, v_t2))
                new_p = p_t

                # Apply constraints.
                if getattr(p, 'constraint', None) is not None:
                    new_p = p.constraint(new_p)

                self.updates.append(K.update(p, new_p))
        else: #grads 1,5,6
             for p, g1, g5, g6, m1,v1, vhat1,m5,v5, vhat5, m6,v6, vhat6 in zip(params, grads1, grads5, grads6, ms1, vs1, vhats1, ms2, vs2, vhats2, ms6, vs6, vhats6):

                m_t1 = (self.beta_1 * m1) + (1. - self.beta_1) * g1
                m_t5 = (self.beta_1 * m5) + (1. - self.beta_1) * g5
                m_t6 = (self.beta_1 * m5) + (1. - self.beta_1) * g6
                v_t1 = (self.beta_2 * v1) + (1. - self.beta_2) * K.square(g1)
                v_t5 = (self.beta_2 * v5) + (1. - self.beta_2) * K.square(g5)
                v_t6 = (self.beta_2 * v6) + (1. - self.beta_2) * K.square(g6)

                if g6 == 0: # its a dense param     
                    if self.amsgrad:
                        vhat_t1 = K.maximum(vhat1, v_t1)
                        vhat_t5= K.maximum(vhat5, v_t5)
                        p_t = p - lr_t * (c11*(m_t1 / (K.sqrt(vhat_t1) + self.epsilon))+c21*(m_t5 / (K.sqrt(vhat_t5)+ self.epsilon)))
                        self.updates.append(K.update(vhat1, vhat_t1))
                        self.updates.append(K.update(vhat5, vhat_t5))
                    else:
                        p_t = p - lr_t * (c11*(m_t1 / (K.sqrt(v_t1) + self.epsilon))+c21*(m_t5 / (K.sqrt(v_t5)+ self.epsilon)))

                    self.updates.append(K.update(m1, m_t1))
                    self.updates.append(K.update(v1, v_t1))
                    self.updates.append(K.update(m5, m_t5))
                    self.updates.append(K.update(v5, v_t5))
                    new_p = p_t
                else:  # its a conv param
                    if self.amsgrad:
                        vhat_t1 = K.maximum(vhat1, v_t1)
                        vhat_t6= K.maximum(vhat6, v_t6)
                        p_t = p - lr_t * (c12*(m_t1 / (K.sqrt(vhat_t1) + self.epsilon))+c22*(m_t6 / (K.sqrt(vhat_t6) + self.epsilon)))
                        self.updates.append(K.update(vhat1, vhat_t1))
                        self.updates.append(K.update(vhat6, vhat_t6))
                    else:
                        p_t = p - lr_t * (c12*(m_t1 / (K.sqrt(v_t1) + self.epsilon))+c22*(m_t6 / (K.sqrt(v_t6) + self.epsilon)))

                    self.updates.append(K.update(m1, m_t1))
                    self.updates.append(K.update(v1, v_t1))
                    self.updates.append(K.update(m6, m_t6))
                    self.updates.append(K.update(v6, v_t6))
                    new_p = p_t
                # Apply constraints.
                if getattr(p, 'constraint', None) is not None:
                    new_p = p.constraint(new_p)

                self.updates.append(K.update(p, new_p))
        return self.updates,c1,c2
コード例 #34
0
    def reset_states(self, states=None):
        if not self.stateful:
            raise AttributeError('Layer must be stateful.')
        input_shape = self.input_spec[0].shape
        state_shape = self.compute_output_shape(input_shape)
        if self.return_state:
            state_shape = state_shape[0]
        if self.return_sequences:
            state_shape = state_shape[:1].concatenate(state_shape[2:])
        if None in state_shape:
            raise ValueError('If a RNN is stateful, it needs to know '
                             'its batch size. Specify the batch size '
                             'of your input tensors: \n'
                             '- If using a Sequential model, '
                             'specify the batch size by passing '
                             'a `batch_input_shape` '
                             'argument to your first layer.\n'
                             '- If using the functional API, specify '
                             'the time dimension by passing a '
                             '`batch_shape` argument to your Input layer.\n'
                             'The same thing goes for the number of rows and '
                             'columns.')

        # helper function
        def get_tuple_shape(nb_channels):
            result = list(state_shape)
            if self.cell.data_format == 'channels_first':
                result[1] = nb_channels
            elif self.cell.data_format == 'channels_last':
                result[3] = nb_channels
            else:
                raise KeyError
            return tuple(result)

        # initialize state if None
        if self.states[0] is None:
            if hasattr(self.cell.state_size, '__len__'):
                self.states = [
                    K.zeros(get_tuple_shape(dim))
                    for dim in self.cell.state_size
                ]
            else:
                self.states = [K.zeros(get_tuple_shape(self.cell.state_size))]
        elif states is None:
            if hasattr(self.cell.state_size, '__len__'):
                for state, dim in zip(self.states, self.cell.state_size):
                    K.set_value(state, np.zeros(get_tuple_shape(dim)))
            else:
                K.set_value(self.states[0],
                            np.zeros(get_tuple_shape(self.cell.state_size)))
        else:
            if not isinstance(states, (list, tuple)):
                states = [states]
            if len(states) != len(self.states):
                raise ValueError('Layer ' + self.name + ' expects ' +
                                 str(len(self.states)) + ' states, ' +
                                 'but it received ' + str(len(states)) +
                                 ' state values. Input received: ' +
                                 str(states))
            for index, (value, state) in enumerate(zip(states, self.states)):
                if hasattr(self.cell.state_size, '__len__'):
                    dim = self.cell.state_size[index]
                else:
                    dim = self.cell.state_size
                if value.shape != get_tuple_shape(dim):
                    raise ValueError('State ' + str(index) +
                                     ' is incompatible with layer ' +
                                     self.name + ': expected shape=' +
                                     str(get_tuple_shape(dim)) +
                                     ', found shape=' + str(value.shape))
                # TODO(anjalisridhar): consider batch calls to `set_value`.
                K.set_value(state, value)
コード例 #35
0
    def build(self, input_shape):
        #assert self.output_dim == input_shape[-1]
        self.input_spec = [InputSpec(shape=input_shape)]
        self.middle_length = input_shape[2]
        input_dim = input_shape[3]

        # Attention
        self.W_a = self.init((input_dim + self.output_dim, self.output_dim),
                             name='{}_W_a'.format(self.name))
        self.b_a = K.zeros((self.output_dim, ),
                           name='{}_b_a'.format(self.name))

        # Regular LSTM
        self.input_dim = input_dim

        if self.stateful:
            self.reset_states()
        else:
            # initial states: 2 all-zero tensors of shape (output_dim)
            self.states = [None, None]

        self.W_i = self.init((input_dim, self.output_dim),
                             name='{}_W_i'.format(self.name))
        self.U_i = self.inner_init((self.output_dim, self.output_dim),
                                   name='{}_U_i'.format(self.name))
        self.b_i = K.zeros((self.output_dim, ),
                           name='{}_b_i'.format(self.name))

        self.W_f = self.init((input_dim, self.output_dim),
                             name='{}_W_f'.format(self.name))
        self.U_f = self.inner_init((self.output_dim, self.output_dim),
                                   name='{}_U_f'.format(self.name))
        self.b_f = self.forget_bias_init((self.output_dim, ),
                                         name='{}_b_f'.format(self.name))

        self.W_c = self.init((input_dim, self.output_dim),
                             name='{}_W_c'.format(self.name))
        self.U_c = self.inner_init((self.output_dim, self.output_dim),
                                   name='{}_U_c'.format(self.name))
        self.b_c = K.zeros((self.output_dim, ),
                           name='{}_b_c'.format(self.name))

        self.W_o = self.init((input_dim, self.output_dim),
                             name='{}_W_o'.format(self.name))
        self.U_o = self.inner_init((self.output_dim, self.output_dim),
                                   name='{}_U_o'.format(self.name))
        self.b_o = K.zeros((self.output_dim, ),
                           name='{}_b_o'.format(self.name))

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(
                K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o]))
            self.regularizers.append(self.W_regularizer)
        if self.U_regularizer:
            self.U_regularizer.set_param(
                K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o]))
            self.regularizers.append(self.U_regularizer)
        if self.b_regularizer:
            self.b_regularizer.set_param(
                K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o]))
            self.regularizers.append(self.b_regularizer)

        self.trainable_weights = [
            self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o,
            self.W_a, self.b_a
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
コード例 #36
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):

            # Learning rate multipliers
            if self.multipliers:
                multiplier = [
                    mult for mult in self.multipliers if mult in p.name
                ]
            else:
                multiplier = None
            if multiplier:
                new_lr_t = lr_t * self.multipliers[multiplier[0]]
                if self.debug_verbose:
                    print('Setting {} to learning rate {}'.format(
                        multiplier[0], new_lr_t))
                    print(K.get_value(new_lr_t))
            else:
                new_lr_t = lr_t
                if self.debug_verbose:
                    print('No change in learning rate {}'.format(p.name))
                    print(K.get_value(new_lr_t))
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - new_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - new_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
コード例 #37
0
 def fn(x, P_acc):
     x_ = K.zeros((self.nb_actions, self.nb_actions))
     x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                          x)
     return x_
コード例 #38
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        # The input of this layer is [L, mu, a] in concatenated form. We first split
        # those up.
        idx = 0
        if self.mode == 'full':
            L_flat = x[:, idx:idx +
                       (self.nb_actions * self.nb_actions + self.nb_actions) //
                       2]
            idx += (self.nb_actions * self.nb_actions + self.nb_actions) // 2
        elif self.mode == 'diag':
            L_flat = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions
        else:
            L_flat = None
        assert L_flat is not None
        mu = x[:, idx:idx + self.nb_actions]
        idx += self.nb_actions
        a = x[:, idx:idx + self.nb_actions]
        idx += self.nb_actions

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K._BACKEND == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)],
                                         x)
                    diag = K.exp(T.diag(x_) + K.epsilon())
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn,
                                         sequences=L_flat,
                                         outputs_info=outputs_info)
                L, LT = results
            elif K._BACKEND == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions +
                            self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                L_flat = tf.concat(1, [zeros, L_flat])

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(
                    1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x + K.epsilon())
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K._BACKEND))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K._BACKEND == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn,
                                   sequences=L_flat,
                                   outputs_info=outputs_info)
            elif K._BACKEND == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(
                    1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                L_flat = tf.concat(1, [zeros, L_flat])

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn,
                            L_flat,
                            initializer=K.zeros(
                                (self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K._BACKEND))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, dim=1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, dim=-1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
コード例 #39
0
ファイル: optimizer_v1.py プロジェクト: z-a-f/keras-1
 def _create_all_weights(self, params):
   shapes = [K.int_shape(p) for p in params]
   accumulators = [K.zeros(shape) for shape in shapes]
   self.weights = accumulators
   return accumulators
コード例 #40
0
ファイル: weightnorm.py プロジェクト: mahfujau/GAN
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1
        lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (
            1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):

            # if a weight tensor (len > 1) use weight normalized parameterization
            # this is the only part changed w.r.t. keras.optimizers.Adam
            ps = K.get_variable_shape(p)
            if len(ps) > 1:

                # get weight normalization parameters
                V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(
                    p, g)

                # Adam containers for the 'g' parameter
                V_scaler_shape = K.get_variable_shape(V_scaler)
                m_g = K.zeros(V_scaler_shape)
                v_g = K.zeros(V_scaler_shape)

                # update g parameters
                m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g
                v_g_t = (self.beta_2 *
                         v_g) + (1. - self.beta_2) * K.square(grad_g)
                new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) +
                                                        self.epsilon)
                self.updates.append(K.update(m_g, m_g_t))
                self.updates.append(K.update(v_g, v_g_t))

                # update V parameters
                m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V
                v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V)
                new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
                self.updates.append(K.update(m, m_t))
                self.updates.append(K.update(v, v_t))

                # if there are constraints we apply them to V, not W
                if p in constraints:
                    c = constraints[p]
                    new_V_param = c(new_V_param)

                # wn param updates --> W updates
                add_weightnorm_param_updates(self.updates, new_V_param,
                                             new_g_param, p, V_scaler)

            else:  # do optimization normally
                m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
                v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

                self.updates.append(K.update(m, m_t))
                self.updates.append(K.update(v, v_t))

                new_p = p_t
                # apply constraints
                if p in constraints:
                    c = constraints[p]
                    new_p = c(new_p)
                self.updates.append(K.update(p, new_p))
        return self.updates
コード例 #41
0
ファイル: weightnorm.py プロジェクト: mahfujau/GAN
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        self.updates = []

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))
            self.updates.append(K.update_add(self.iterations, 1))

        # momentum
        shapes = [K.get_variable_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            # if a weight tensor (len > 1) use weight normalized parameterization
            ps = K.get_variable_shape(p)
            if len(ps) > 1:

                # get weight normalization parameters
                V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(
                    p, g)

                # momentum container for the 'g' parameter
                V_scaler_shape = K.get_variable_shape(V_scaler)
                m_g = K.zeros(V_scaler_shape)

                # update g parameters
                v_g = self.momentum * m_g - lr * grad_g  # velocity
                self.updates.append(K.update(m_g, v_g))
                if self.nesterov:
                    new_g_param = g_param + self.momentum * v_g - lr * grad_g
                else:
                    new_g_param = g_param + v_g

                # update V parameters
                v_v = self.momentum * m - lr * grad_V  # velocity
                self.updates.append(K.update(m, v_v))
                if self.nesterov:
                    new_V_param = V + self.momentum * v_v - lr * grad_V
                else:
                    new_V_param = V + v_v

                # if there are constraints we apply them to V, not W
                if p in constraints:
                    c = constraints[p]
                    new_V_param = c(new_V_param)

                # wn param updates --> W updates
                add_weightnorm_param_updates(self.updates, new_V_param,
                                             new_g_param, p, V_scaler)

            else:  # normal SGD with momentum
                v = self.momentum * m - lr * g  # velocity
                self.updates.append(K.update(m, v))

                if self.nesterov:
                    new_p = p + self.momentum * v - lr * g
                else:
                    new_p = p + v

                # apply constraints
                if p in constraints:
                    c = constraints[p]
                    new_p = c(new_p)

                self.updates.append(K.update(p, new_p))
        return self.updates
コード例 #42
0
    def get_initial_state(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(x)  # (samples, timesteps) + image_shape
        non_channel_axis = -1 if self.data_format == 'channels_first' else -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state, axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state, axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'e']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
           states_to_pass.append('ahat')  # pass prediction in states so can use as actual for t+1 when extrapolating
           nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:
            for l in range(nlayers_to_pass[u]):
                ds_factor = 2 ** l
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'e':
                    stack_size = 2 * self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = stack_size * nb_row * nb_col  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state, reducer) # (samples, output_size)
                if self.data_format == 'channels_first':
                    output_shp = (-1, stack_size, nb_row, nb_col)
                else:
                    output_shp = (-1, nb_row, nb_col, stack_size)
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if(self.multi_task_train):
            # encoder level 0
            output_size = self.lbl_pred_chns[0] * 1 * 1 # flattened size
            reducer = K.zeros((input_shape[self.channel_axis], output_size))
            initial_state = K.dot(base_initial_state, reducer) # (samples, output_size)
            output_shp = (-1, 1, 1, self.lbl_pred_chns[0]) ### Hardcoded for only 'channel_last'
            initial_state = K.reshape(initial_state, output_shp)
            initial_states += [initial_state]
            # encoder level 1
            output_size = self.nb_classes * 1 * 1 # flattened size
            reducer = K.zeros((input_shape[self.channel_axis], output_size))
            initial_state = K.dot(base_initial_state, reducer) # (samples, output_size)
            output_shp = (-1, 1, 1, self.nb_classes) ### Hardcoded for only 'channel_last'
            initial_state = K.reshape(initial_state, output_shp)
            initial_states += [initial_state]


        if K._BACKEND == 'theano':
            from theano import tensor as T
            # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension.
            # In our case, this is a problem when training on grayscale images, and the below line fixes it.
            initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states]

        if self.extrap_start_time is not None:
            initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')]  # the last state will correspond to the current timestep
        
        return initial_states
コード例 #43
0
num_epochs = 10

# defining the learning rate
lr = 0.1

# building the model

# defining the placeholders to feed the input and target data
input_tensor = K.placeholder(shape=(batch_size, input_dim), dtype='float32')
target_tensor = K.placeholder(shape=(batch_size, 1), dtype='float32')

# defining the weight and the bias variables
weight_variable = K.random_uniform_variable(shape=(input_dim, 1),
                                            low=-1., high=1.,
                                            dtype='float32')
bias_variable = K.zeros(shape=(1, ), dtype='float32')

# defining the sigmoid output tensor
output_tensor = K.dot(input_tensor, weight_variable) + bias_variable
output_tensor = K.sigmoid(output_tensor)

# defining the mean loss tensor
loss_tensor = K.mean(K.binary_crossentropy(target_tensor,
                                           output_tensor))

# getting the gradients of the mean loss with respect to the weight and bias
gradient_tensors = K.gradients(loss=loss_tensor, variables= [weight_variable,
                                                             bias_variable])

# creating the updates based on stochastic gradient descent rule
updates = [(weight_variable, weight_variable - lr * gradient_tensors[0]),
コード例 #44
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr

        completed_updates = K.cast(
            K.tf.floordiv(self.iterations, self.accum_iters), K.floatx())

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * completed_updates))

        t = completed_updates + 1

        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        # self.iterations incremented after processing a batch
        # batch:              1 2 3 4 5 6 7 8 9
        # self.iterations:    0 1 2 3 4 5 6 7 8
        # update_switch = 1:        x       x     (if accum_iters=4)
        update_switch = K.equal((self.iterations + 1) % self.accum_iters, 0)
        update_switch = K.cast(update_switch, K.floatx())

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat, tg in zip(params, grads, ms, vs, vhats, gs):

            sum_grad = tg + g
            avg_grad = sum_grad / self.accum_iters_float

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * avg_grad
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(avg_grad)

            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(
                    K.update(vhat, (1 - update_switch) * vhat +
                             update_switch * vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(
                K.update(m, (1 - update_switch) * m + update_switch * m_t))
            self.updates.append(
                K.update(v, (1 - update_switch) * v + update_switch * v_t))
            self.updates.append(K.update(tg, (1 - update_switch) * sum_grad))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(
                K.update(p, (1 - update_switch) * p + update_switch * new_p))
        return self.updates
    def get_updates(self, loss1, loss2, loss3, loss4, loss5, loss6, params):
        grads1 = self.get_gradients(loss1, params)
        grads2= self.get_gradients(loss2, params)
        accumulators1 = [K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='accumulator_' + str(i))
                        for (i, p) in enumerate(params)]
        accumulators2 = [K.zeros(K.int_shape(p),
                                 dtype=K.dtype(p),
                                 name='accumulator_' + str(i))
                         for (i, p) in enumerate(params)]

        accumulators6 = [K.zeros(K.int_shape(p),
                                 dtype=K.dtype(p),
                                 name='accumulator_' + str(i))
                         for (i, p) in enumerate(params)]

        self.weights = [self.iterations] + accumulators1
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                   K.dtype(self.decay))))
        c1 = self.descent_weight1
        c2 = self.descent_weight2
        ## for split and not multi specify the splitted weighting 
        c11 = c1 # for CE dense 
        c21 = c2 # for l1 dense 
        c12 = 1  # for CE conv 
        c22 = 4e-1 # for l2 dense 

        if self.multi and not self.split: # calculate weighting for the loss functions given (should be default)
            zero = K.variable(0, name='zero')
            one = K.variable(1, name='one')

            flattenedList1 = [K.flatten(x) for x in grads1]
            gradients1 = K.concatenate(flattenedList1)
            flattenedList2 = [K.flatten(x) for x in grads2]
            gradients2 = K.concatenate(flattenedList2)

            grad21 = gradients2 - gradients1
            grad12 = gradients1 - gradients2
            z1 = K.sum(grad21 * gradients2)
            z2 = K.sum(grad12 * gradients1)
            n = K.sum(grad21 * grad21)

            cm1 = z1 / n
            c1 = K.switch(K.equal(K.all(K.equal(gradients1, gradients2)), K.constant(True, dtype=bool)),
                          lambda: one, lambda: cm1)
            cm2 = z2 / n
            c2 = K.switch(K.equal(K.all(K.equal(gradients1, gradients2)), K.constant(True, dtype=bool)),
                          lambda: zero, lambda: cm2)
            (c1, c2) = K.switch(c1 < 0, lambda: (zero, one), lambda: (c1, c2))
            (c2, c1) = K.switch(c2 < 0, lambda: (zero, one), lambda: (c2, c1))

        if self.split and self.multi: # calculate weighting for the loss1 given but split in conv/dense and use different loss2 (namely split loss 2 in loss5 and loss6)
            zero = K.variable(0, name='zero')
            one = K.variable(1, name='one')

            grads5 = self.get_gradients(loss5, params) # l1 loss dense 
            grads6= self.get_gradients(loss6, params) # l2 loss conv 

            flattenedList1 = [K.flatten(x) for x in grads1]
            gradients1 = K.concatenate(flattenedList1)
            flattenedList5 = [K.flatten(x) for x in grads5]
            gradients5 = K.concatenate(flattenedList5)
            flattenedList6 = [K.flatten(x) for x in grads6]
            gradients6 = K.concatenate(flattenedList6)

            grad51 = gradients5 - gradients1
            grad15 = gradients1 - gradients5
            z1 = K.sum(grad51 * gradients5)
            z2 = K.sum(grad15 * gradients1)
            n = K.sum(grad51 * grad51)

            cm1 = z1 / n
            c11 = K.switch(K.equal(K.all(K.equal(gradients1, gradients5)), K.constant(True, dtype=bool)),
                          lambda: one, lambda: cm1)
            cm2 = z2 / n
            c21 = K.switch(K.equal(K.all(K.equal(gradients1, gradients5)), K.constant(True, dtype =bool)),lambda: zero, lambda: cm2)

            (c11, c21) = K.switch(c11 < 0, lambda: (zero, one), lambda: (c11, c21))
            (c21, c11) = K.switch(c21 < 0, lambda: (zero, one), lambda: (c21, c11))

            grad61 = gradients6 - gradients1
            grad16 = gradients1 - gradients6
            z1 = K.sum(grad61 * gradients6)
            z2 = K.sum(grad16 * gradients1)
            n = K.sum(grad61 * grad61)

            cm1 = z1 / n
            c12 = K.switch(K.equal(K.all(K.equal(gradients1, gradients6)), K.constant(True, dtype=bool)),
                            lambda: one, lambda: cm1) # for CE conv
            cm2 = z2 / n 
            c22 = K.switch(K.equal(K.all(K.equal(gradients1, gradients6)), K.constant(True, dtype =bool)),
                            lambda: zero, lambda: cm2) # for l2 conv

            (c12, c22) = K.switch(c12 < 0, lambda: (zero, one), lambda: (c12, c22))
            (c22, c12) = K.switch(c22 < 0, lambda: (zero, one), lambda: (c22, c12))

            c1= c11 # for CE dense 
            c2= c21 # for l1 dense 

        if not self.split:  #grads1,2
            for p, g1, g2, a1,a2 in zip(params, grads1, grads2, accumulators1, accumulators2):
                # update accumulator
                new_a1 = self.rho * a1 + (1. - self.rho) * K.square(g1)
                new_a2 = self.rho * a2 + (1. - self.rho) * K.square(g2)
                self.updates.append(K.update(a1, new_a1))
                self.updates.append(K.update(a2, new_a2))
                new_p = p - lr *( c1*(g1 / (K.sqrt(new_a1) + self.epsilon))+c2*(g2/ (K.sqrt(new_a2) + self.epsilon)))

                # Apply constraints.
                if getattr(p, 'constraint', None) is not None:
                    new_p = p.constraint(new_p)

                self.updates.append(K.update(p, new_p))
        else: #grads 1,5,6
            for p, g1, g5, g6, a1,a5, a6  in zip(params, grads1, grads5, grads6, accumulators1, accumulators2, accumulators6):
                
                if g6 == 0: # its a dense param     
                    # update accumulator
                    new_a1 = self.rho * a1 + (1. - self.rho) * K.square(g1)
                    new_a5 = self.rho * a5 + (1. - self.rho) * K.square(g5)
                    self.updates.append(K.update(a1, new_a1))
                    self.updates.append(K.update(a5, new_a5))
                    new_p = p - lr *( c11*(g1 / (K.sqrt(new_a1) + self.epsilon))+c21*(g5/ (K.sqrt(new_a5) + self.epsilon)))

                    # Apply constraints.
                    if getattr(p, 'constraint', None) is not None:
                        new_p = p.constraint(new_p)

                    self.updates.append(K.update(p, new_p))
                else: # its a conv param
                    # update accumulator
                    new_a1 = self.rho * a1 + (1. - self.rho) * K.square(g1)
                    new_a6 = self.rho * a6 + (1. - self.rho) * K.square(g6)
                    self.updates.append(K.update(a1, new_a1))
                    self.updates.append(K.update(a6, new_a6))
                    new_p = p - lr *( c12*(g1 / (K.sqrt(new_a1) + self.epsilon))+c22*(g6/ (K.sqrt(new_a6) + self.epsilon)))

                    # Apply constraints.
                    if getattr(p, 'constraint', None) is not None:
                        new_p = p.constraint(new_p)

                    self.updates.append(K.update(p, new_p))

        return self.updates,c1,c2
コード例 #46
0
	X = np.asarray(X, dtype=np.float32)
	Y = np.asarray(Y, dtype=np.float32)
	return X, Y

N = 100

X_train, Y_train = data_construction(N, length=6, size=5, end_marker=True)		
np.set_printoptions(precision=3)

model = 'D'

## A
if model=='A':
	controller_input = Input(shape=(14,12),name='New_Input')

	MEMORY = Lambda(lambda x: K.zeros(shape=(1,120,40)),name='Memory_0')(controller_input)
	usage_weights = Lambda(lambda x: K.zeros(shape=(1,1,120)),name='Usage_Weights_0')(controller_input)
	read_weights = Lambda(lambda x: K.zeros(shape=(1,14,120)),name='Read_Weights_0')(controller_input)

	controller = LSTM(units=200, activation='tanh',stateful=False, return_sequences=True,name='LSTM_CONTROLLER')(controller_input)
	write_keys = Dense(40, activation='tanh',name='Write_Keys')(controller)
	read_keys = Dense(40, activation='tanh',name='Read_Keys')(controller)
	omegas = Dense(1, activation='sigmoid',name='Omegas')(controller)
	least_usage = Lambda(lambda x: K.one_hot(indices=K.argmax(-x),num_classes=120),name='Least_Usage')(usage_weights)
	omegas_tiled = Lambda(lambda x: K.tile(x,(1,1,120)))(omegas)
	compl_omegas = Lambda(lambda o:  K.ones(shape=(14,120)) - o)(omegas_tiled)
	rd_part = Multiply()([omegas_tiled, read_weights])
	us_part = Multiply()([compl_omegas, least_usage])
	write_weights = Add(name='Write_Weights')([rd_part,us_part])
	writing = Dot(axes=[1,1])([write_weights, write_keys])
	MEMORY = Add(name='Memory')([MEMORY, writing])
    def get_updates(self, loss1, loss2, loss3, loss4, loss5, loss6, params):
        grads1 = self.get_gradients(loss1, params)
        grads2 = self.get_gradients(loss2, params)
        grads5 = self.get_gradients(loss5, params) # l1 loss dense 
        grads6= self.get_gradients(loss6, params) # l2 loss conv 

        self.updates = [K.update_add(self.iterations, 1)]
        lr = self.learning_rate

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        c1 = self.descent_weight1
        c2 = self.descent_weight2
        ## for split and without multi specify the splitted weighting 
        c11 = c1 # for CE dense 
        c21 = c2 # for l1 dense 
        c12 = 1  # for CE conv 
        c22 = 4e-1 # for l2 dense 


        if self.multi and not self.split: # calculate weighting for the loss functions given (default, also in the paper)
            zero = K.variable(0, name='zero')
            one = K.variable(1, name='one')

            flattenedList1 = [K.flatten(x) for x in grads1]
            gradients1 = K.concatenate(flattenedList1)
            flattenedList2 = [K.flatten(x) for x in grads2]
            gradients2 = K.concatenate(flattenedList2)

            grad21 = gradients2 - gradients1
            grad12 = gradients1 - gradients2
            z1 = K.sum(grad21 * gradients2)
            z2 = K.sum(grad12 * gradients1)
            n = K.sum(grad21 * grad21)

            cm1 = z1 / n
            c1 = K.switch(K.equal(K.all(K.equal(gradients1, gradients2)), K.constant(True, dtype=bool)),
                          lambda: one, lambda: cm1)
            cm2 = z2 / n
            c2 = K.switch(K.equal(K.all(K.equal(gradients1, gradients2)), K.constant(True, dtype =bool)),lambda: zero, lambda: cm2)
           
            (c1, c2) = K.switch(c1 < 0, lambda: (zero, one), lambda: (c1, c2))
            (c2, c1) = K.switch(c2 < 0, lambda: (zero, one), lambda: (c2, c1))

        if self.split and self.multi:  # calculate weighting for the loss1 given but split in conv/dense and use different loss2 (namely split loss 2 in loss5 and loss6)
            zero = K.variable(0, name='zero')
            one = K.variable(1, name='one')

            flattenedList1 = [K.flatten(x) for x in grads1]
            gradients1 = K.concatenate(flattenedList1)
            flattenedList5 = [K.flatten(x) for x in grads5]
            gradients5 = K.concatenate(flattenedList5)
            flattenedList6 = [K.flatten(x) for x in grads6]
            gradients6 = K.concatenate(flattenedList6)

            grad51 = gradients5 - gradients1
            grad15 = gradients1 - gradients5
            z1 = K.sum(grad51 * gradients5)
            z2 = K.sum(grad15 * gradients1)
            n = K.sum(grad51 * grad51)

            cm1 = z1 / n
            c11 = K.switch(K.equal(K.all(K.equal(gradients1, gradients5)), K.constant(True, dtype=bool)),
                          lambda: one, lambda: cm1)
            cm2 = z2 / n
            c21 = K.switch(K.equal(K.all(K.equal(gradients1, gradients5)), K.constant(True, dtype =bool)),lambda: zero, lambda: cm2)

            (c11, c21) = K.switch(c11 < 0, lambda: (zero, one), lambda: (c11, c21))
            (c21, c11) = K.switch(c21 < 0, lambda: (zero, one), lambda: (c21, c11))

            grad61 = gradients6 - gradients1
            grad16 = gradients1 - gradients6
            z1 = K.sum(grad61 * gradients6)
            z2 = K.sum(grad16 * gradients1)
            n = K.sum(grad61 * grad61)

            cm1 = z1 / n
            c12 = K.switch(K.equal(K.all(K.equal(gradients1, gradients6)), K.constant(True, dtype=bool)),
                            lambda: one, lambda: cm1) # for CE conv
            cm2 = z2 / n 
            c22 = K.switch(K.equal(K.all(K.equal(gradients1, gradients6)), K.constant(True, dtype =bool)),
                            lambda: zero, lambda: cm2) # for l2 conv

            (c12, c22) = K.switch(c12 < 0, lambda: (zero, one), lambda: (c12, c22))
            (c22, c12) = K.switch(c22 < 0, lambda: (zero, one), lambda: (c22, c12))

            c1= c11 # for CE dense 
            c2= c21 # for l1 dense 

        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape, name='moment_' + str(i))
                   for (i, shape) in enumerate(shapes)]
        self.weights = [self.iterations] + moments
        if not self.split: 
            for p, g1, g2, m in zip(params, grads1, grads2, moments):

                v = self.momentum * m - lr*(c1*g1+c2*g2) # velocity
                self.updates.append(K.update(m, v))

                if self.nesterov:
                    new_p = p + self.momentum * v - lr*(c1*g1+c2*g2)
                else:
                    new_p = p + v

                # Apply constraints.
                if getattr(p, 'constraint', None) is not None:
                    new_p = p.constraint(new_p)

                self.updates.append(K.update(p, new_p))
        else: 
            for p, g1, g5, g6, m in zip(params, grads1, grads5, grads6, moments):

                if g6 == 0: # its a dense param
                    v = self.momentum * m - lr*(c11*g1+ c21*g5) # velocity
                    self.updates.append(K.update(m, v))

                    if self.nesterov:
                        new_p = p + self.momentum * v - lr*(c11*g1+ c21*g5) 
                    else:
                        new_p = p + v
                else:  # its a conv param
                    v = self.momentum * m - lr*(c12*g1+ c22*g6) # velocity
                    self.updates.append(K.update(m, v))
                    
                    if self.nesterov:
                        new_p = p + self.momentum * v - lr*(c12*g1+ c22*g6) 
                    else:
                        new_p = p + v

                # Apply constraints.
                if getattr(p, 'constraint', None) is not None:
                    new_p = p.constraint(new_p)

                self.updates.append(K.update(p, new_p))
        self.c1=c1
        self.c2=c2
        return self.updates, c1, c2
コード例 #48
0
ファイル: optimizer_v1.py プロジェクト: z-a-f/keras-1
 def _create_all_weights(self, params):
   accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
   self.weights = accumulators
   return accumulators
コード例 #49
0
    return (np.expand_dims(style_mask,
                           axis=0), np.expand_dims(target_mask, axis=0))


# Create tensor variables for images
if K.image_data_format() == 'channels_first':
    shape = (1, num_colors, img_nrows, img_ncols)
else:
    shape = (1, img_nrows, img_ncols, num_colors)

style_image = K.variable(preprocess_image(style_img_path))
target_image = K.placeholder(shape=shape)
if use_content_img:
    content_image = K.variable(preprocess_image(content_img_path))
else:
    content_image = K.zeros(shape=shape)

images = K.concatenate([style_image, target_image, content_image], axis=0)

# Create tensor variables for masks
raw_style_mask, raw_target_mask = load_mask_labels()
style_mask = K.variable(raw_style_mask.astype('float32'))
target_mask = K.variable(raw_target_mask.astype('float32'))
masks = K.concatenate([style_mask, target_mask], axis=0)

# index constants for images and tasks variables
STYLE, TARGET, CONTENT = 0, 1, 2

# Build image model, mask model and use layer outputs as features
# image model as VGG19
image_model = vgg19.VGG19(include_top=False, input_tensor=images)
コード例 #50
0
 def add_zero(x):
     xc = K.zeros((batch_size * h * w, 1))
     x = K.concatenate([x, xc], axis=1)
     return x
コード例 #51
0
 def __init__(self, model, momentum=0.9999):
     self.momentum = momentum
     self.model = model
     self.ema_weights = [K.zeros(K.shape(w)) for w in model.weights]
コード例 #52
0
# please provide the test_function which takes in the input and target , and
# outputs a tuple of (accuracy, prediction)
input_tensor = K.placeholder(shape=(batch_size, input_dim), dtype='float32')
hidden_tensor = input_tensor
target_tensor = K.placeholder(shape=(batch_size, 10), dtype='float32')

weight_variable_list = []
bias_variable_list = []

for i in xrange(num_layers):
    weight_variable = K.random_uniform_variable(shape=(input_dim,
                                                       num_units[i]),
                                                low=-1.,
                                                high=1.,
                                                dtype='float32')
    bias_variable = K.zeros(shape=(num_units[i], ), dtype='float32')

    weight_variable_list.append(weight_variable)
    bias_variable_list.append(bias_variable)

    hidden_layer_tensor = K.dot(hidden_tensor, weight_variable) + bias_variable
    hidden_layer_tensor = K.relu(hidden_layer_tensor)
    hidden_tensor = hidden_layer_tensor

    input_dim = num_units[i]

weight_variable = K.random_uniform_variable(shape=(input_dim, 10),
                                            low=-1.,
                                            high=1.,
                                            dtype='float32')
bias_variable = K.zeros(shape=(10, ), dtype='float32')
コード例 #53
0
ファイル: driver.py プロジェクト: alecgunny/mc_rbm
    # returns reconstructions of the dataset X as computed by the model
    num_batches  = (X.shape[0] - 1) // batch_size + 1
    predictions  = np.zeros((num_batches * batch_size, v_dim))
    for batch_num in range(num_batches):
        predictions[batch_slice(batch_num)] = predict_func(get_batch(X, batch_num))
    return predictions


# load and preprocess data
(X_train, y_train), (X_valid, y_valid) = mnist.load_data()
X_train, X_valid = preprocess(X_train), preprocess(X_valid)
v_dim = X_train.shape[-1]

# build the parameters of the RBM
W = glorot_normal(shape=(v_dim, h_dim), name='W')
a = K.zeros(shape=(v_dim,), name='a')
b = K.zeros(shape=(h_dim,), name='b')
params = [W, a, b]

# now build the model
# first build visible input and map to hidden state probabilities
v   = K.placeholder(ndim=2)
p_h = K.sigmoid(K.dot(v, W) + b)

# now monte carlo sample a few hs from p_h and map back to p(v|h) then average
p_v = 0
for i in range(n_monte_carlo):
    h    = sample_bernoulli(p_h)
    p_v += K.sigmoid(K.dot(h, W.T) + a)
p_v = clip(p_v / n_monte_carlo)
コード例 #54
0
ファイル: input.py プロジェクト: Ojda22/FeatureNet
 def build(self, input, neighbour=None):
     shape = neighbour.shape
     return K.zeros(shape)
コード例 #55
0
 def _create_all_weights(self, params):
     shapes = [backend.int_shape(p) for p in params]
     moments = [backend.zeros(shape) for shape in shapes]
     self.weights = [self.iterations] + moments
     return moments
コード例 #56
0
 def _create_all_weights(self, params):
     shapes = [backend.int_shape(p) for p in params]
     accumulators = [backend.zeros(shape) for shape in shapes]
     delta_accumulators = [backend.zeros(shape) for shape in shapes]
     self.weights = accumulators + delta_accumulators
     return accumulators, delta_accumulators
コード例 #57
0
 def mask(x):
     shape = K.shape(x)
     mask = K.zeros((shape[1], shape[2])) + (-1e15)
     mask = tf.matrix_band_part(mask, 0, -1)  # upper triangle of `mask`
     mask -= tf.matrix_band_part(mask, 0, 0)  # remove diagonal
     return x + mask
コード例 #58
0
 def _allocate_var(self, name=None):
     return {w: K.zeros(w.get_shape(), name=name) for w in self.weights}
コード例 #59
0
def hack_loss(y_true, y_pred):
        return K.zeros((1,))
コード例 #60
0
ファイル: train_gm.py プロジェクト: GorskiBartosz/MusAE
 def no_loss(self, y_true, y_pred):
     return K.zeros(shape=(1, ))