def get_constants(self, inputs, training=None): constants = [] if self.implementation != 0 and 0 < self.dropout < 1: input_shape = K.int_shape(inputs) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] constants.append(dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): # pylint: disable=function-redefined return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] constants.append(rec_dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = ( 1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_constants(self, inputs, training=None): constants = [] if self.implementation == 0 and 0 < self.dropout < 1: ones = K.zeros_like(inputs) ones = K.sum(ones, axis=1) ones += 1 def dropped_inputs(): return K.dropout(ones, self.dropout) dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] constants.append(dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(4)]) if 0 < self.recurrent_dropout < 1: shape = list(self.kernel_shape) shape[-1] = self.filters ones = K.zeros_like(inputs) ones = K.sum(ones, axis=1) ones = self.input_conv(ones, K.zeros(shape), padding=self.padding) ones += 1. def dropped_inputs(): # pylint: disable=function-redefined return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] constants.append(rec_dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(4)]) return constants
def __init__(self, alpha=0.3, **kwargs): super(LeakyReLU, self).__init__(**kwargs) self.supports_masking = True self.alpha = K.cast_to_floatx(alpha)
def __init__(self, theta=1.0, **kwargs): super(ThresholdedReLU, self).__init__(**kwargs) self.supports_masking = True self.theta = K.cast_to_floatx(theta)
def __init__(self, l1=0., l2=0.): # pylint: disable=redefined-outer-name self.l1 = K.cast_to_floatx(l1) self.l2 = K.cast_to_floatx(l2)