Example #1
0
    def _step(self,
              x_tm1,
              h_tm1, c_tm1, H,
              u_i, u_f, u_o, u_c, w_i, w_f, w_c, w_o, w_x, w_a, v_i, v_f, v_c, v_o, b_i, b_f, b_c, b_o, b_x, b_a):

        s_tm1 = K.repeat(c_tm1, self.input_length)
        e = H + s_tm1
        def a(x, states):
            output = K.dot(x, w_a) + b_a
            return output, []
        _, energy, _ = K.rnn(a, e, [], mask=None)
        energy = activations.get('linear')(energy)
        energy = K.permute_dimensions(energy, (2, 0, 1))
        energy = energy[0]
        alpha = K.softmax(energy)
        alpha = K.repeat(alpha, self.hidden_dim)
        alpha = K.permute_dimensions(alpha, (0, 2 , 1))
        weighted_H = H * alpha
        
        v = K.sum(weighted_H, axis=1)

        xi_t = K.dot(x_tm1, w_i) + K.dot(v, v_i) + b_i
        xf_t = K.dot(x_tm1, w_f) + K.dot(v, v_f) + b_f
        xc_t = K.dot(x_tm1, w_c) + K.dot(v, v_c) + b_c
        xo_t = K.dot(x_tm1, w_o) + K.dot(v, v_o) + b_o

        i_t = self.inner_activation(xi_t + K.dot(h_tm1, u_i))
        f_t = self.inner_activation(xf_t + K.dot(h_tm1, u_f))
        c_t = f_t * c_tm1 + i_t * self.activation(xc_t + K.dot(h_tm1, u_c))
        o_t = self.inner_activation(xo_t + K.dot(h_tm1, u_o))
        h_t = o_t * self.activation(c_t)

        x_t = K.dot(h_t, w_x) + b_x
        return x_t, h_t, c_t
Example #2
0
	def step(self, x, states):
		M = states[0]  # (nb_samples, nb_slots, memory_size)
		h = states[1]  # (nb_samples, memory_size)
		w = states[2]  # (nb_samples, nb_slots)
		#------Memory read--------#
		k = self.W_k(h)  # (nb_samples, memory_size)
		w_hat = T.batched_tensordot(M, k, axes=[(2), (1)])  # (nb_samples, nb_slots)
		beta = K.sigmoid(self.W_b(h))  # (nb_samples, 1)
		beta = K.repeat(beta, self.nb_slots)  # (nb_samples, nb_slots, 1)
		beta = K.squeeze(beta, 2)  # (nb_samples, nb_slots)
		w_hat = softmax(w_hat * beta)  # (nb_samples, nb_slots)
		g = sigmoid(self.W_hg(h))  # (nb_samples, 1)
		g = K.repeat(g, self.nb_slots)  # (nb_samples, nb_slots, 1)
		g = K.squeeze(g, 2)  # (nb_samples, nb_slots)
		w = (1 - g) * w + g * w_hat  # (nb_samples, nb_slots)
		c = T.batched_tensordot(w, M, axes=[(1), (1)])
		h = tanh(self.W_ih(x) + self.W_c(c))
		y = self.W_ho(h)
		#---------Memory write---------#
		v = self.W_v(h)  # (nb_samples, memory_size)
		v = K.repeat(v, 1)
		e = sigmoid(self.W_he(h))  # (nb_samples, nb_slots)
		f = 1 - w * e  # (nb_samples, nb_slots)
		f = K.repeat(f, self.memory_size)  # (nb_samples, memory_size, nb_slots)
		f = K.permute_dimensions(f, (0, 2, 1))  # (nb_samples, nb_slots, memory_size)
		u = w  # (nb_samples, nb_slots)
		u = K.repeat(u, 1)
		uv = T.batched_tensordot(u, v, axes=[(1), (1)])
		M = M * f + uv
		return y, [M, h, w]
Example #3
0
    def set_batch_function(self, model, input_shape, batch_size, nb_actions, gamma):
        input_dim = np.prod(input_shape)
        samples = K.placeholder(shape=(batch_size, input_dim * 2 + 3))

        S = samples[:, 0 : input_dim]
        a = samples[:, input_dim]
        a = K.cast(a, '')
        r = samples[:, input_dim + 1]
        S_prime = samples[:, input_dim + 2 : 2 * input_dim + 2]
        game_over = samples[:, 2 * input_dim + 2 : 2 * input_dim + 3]

        r = K.reshape(r, (batch_size, 1))
        r = K.repeat(r, nb_actions)
        r = K.reshape(r, (batch_size, nb_actions))

        game_over = K.repeat(game_over, nb_actions)
        game_over = K.reshape(game_over, (batch_size, nb_actions))

        S = K.reshape(S, (batch_size, ) + input_shape)
        S_prime = K.reshape(S_prime, (batch_size, ) + input_shape)

        X = K.concatenate([S, S_prime], axis=0)
        Y = model(X)

        Qsa = K.max(Y[batch_size:], axis=1)
        Qsa = K.reshape(Qsa, (batch_size, 1))
        Qsa = K.repeat(Qsa, nb_actions)
        Qsa = K.reshape(Qsa, (batch_size, nb_actions))

        delta = K.reshape(self.one_hot(a, nb_actions), (batch_size, nb_actions))
        targets = (1 - delta) * Y[:batch_size] + delta * (r + gamma * (1 - game_over) * Qsa)

        self.batch_function = K.function(inputs=[samples], outputs=[S, targets])
    def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
Example #5
0
    def step(self, x, states):
        """
            LSTM的几个表达式都在这
        :param x:
        :param states: 上个时刻的输出和隐层状态st
        :return:
        """
        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        # 按照steps的维度重复n次,(sample, step, dim)
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        # softmax
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) +
            K.dot(context, self.C_r) + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) +
            K.dot(context, self.C_z) + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) +
            K.dot(context, self.C_p) + self.b_p)

        # new hidden state:
        st = (1 - zt) * stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) +
            K.dot(context, self.C_o) + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
Example #6
0
def sample_z(args):
    k = 5
    local_mu, local_sigma = args
    local_mu = K.repeat(local_mu, k)
    local_sigma = K.repeat(local_sigma, k)
    eps = K.random_normal(shape=(K.shape(local_mu)[0], k,
                                 K.shape(local_mu)[2]),
                          mean=0.,
                          stddev=1.)
    return local_mu + local_sigma * eps
 def call(self, x):
     w = x[:,0]
     m0 = x[:,9]
     distance = k_b.square(w - m0)
     distance = k_b.sqrt(distance[:,0] + distance[:,1] + distance[:,2])
     distance = k_b.reshape(distance, (-1, 1))
     distance = k_b.repeat(distance, 21)
     m0 = k_b.repeat(m0, 21)
     result = (x-m0)/distance
     return result
Example #8
0
def correlation_loss(y_true, y_pred):
    # want to maximize correlation
    y_true, y_pred = K.reshape(y_true, (-1, WRAP, 20)), K.reshape(
        y_pred, (-1, WRAP, 20))
    mx = K.repeat(K.mean(y_true, axis=1), WRAP)
    my = K.repeat(K.mean(y_pred, axis=1), WRAP)
    xm, ym = y_true - mx, y_pred - my
    r_num = K.sum(xm * ym, axis=1)
    r_den = K.sum(K.sum(K.square(xm), axis=1) * K.sum(K.square(ym), axis=1))
    r = r_num / r_den
    return 1 - r
Example #9
0
    def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps_e)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(
            activations.tanh(
                _Wxstm +
                self._uxpb),  #e_ij = a(s_(i-1),h_j)  where h_j = self._uxpb
            K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps_e)
        at /= at_sum_repeated  # Eq(6) vector of size (batchsize, timesteps, 1) , softmax:length timesteps  from stm

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1),
                            axis=1)  #Eq(5)   length :batchsize*input_dim
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) +
            K.dot(context, self.C_r) + self.b_r)  # f_t 对应lstm遗忘门,此处没有x_t输入

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) +
            K.dot(context, self.C_z) + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) +
            K.dot(context, self.C_p) + self.b_p)

        # new hidden state:
        st = (1 - zt) * stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) +
            K.dot(context, self.C_o) + self.b_o)  # h_t

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
Example #10
0
 def log_prob(self, x):
     """Given batch of x of shape (batch, samples, dim), returns (batch, samples) values of the
        log probability per sample.
     """
     # log gaussian probability = -1/2 sum[(x-mean)^2/variance]
     variance = K.repeat(K.exp(self.log_var),
                         self.k_samples)  # shape is (batch, samples, dim)
     log_det = K.tile(K.sum(self.log_var, axis=-1, keepdims=True),
                      (1, self.k_samples))  # shape is (batch, samples)
     x_diff = x - K.repeat(self.mean,
                           self.k_samples)  # shape is (batch, samples, dim)
     return -(K.sum((x_diff / variance) * x_diff, axis=-1) + log_det) / 2
Example #11
0
    def call(self, x, mask=None):
        en_seq = x[0]
        de_seq = x[1]
        input_de_times = K.int_shape(de_seq)[-2]

        if len(x) == 3:
            mask = x[2]
            m_en = K.cast(mask, K.floatx())
            en_seq = en_seq * K.expand_dims(m_en, -1)

        if len(x) == 2 and mask is not None:
            # remove padding values
            m_en = K.cast(mask[0], K.floatx())
            en_seq = en_seq * K.expand_dims(m_en, -1)

        # compute alphas
        att_en = K.dot(K.reshape(en_seq, (-1, self.input_dim_en)), self.w_en)
        att_en = K.reshape(att_en, shape=(-1, self.input_en_times * self.units))
        att_en = K.repeat(att_en, input_de_times)
        att_en = K.reshape(att_en, shape=(-1, self.input_en_times * input_de_times, self.units))

        att_de = K.dot(K.reshape(de_seq, (-1, self.input_dim_de)), self.w_de)
        att_de = K.reshape(att_de, shape=(-1, input_de_times, self.units))
        att_de = K.repeat_elements(att_de, self.input_en_times, 1)

        co_m = att_en + att_de
        co_m = K.reshape(co_m, (-1, self.units))

        mu = K.dot(K.tanh(co_m), self.nu)

        if len(x) == 3 or (len(x) == 2 and mask is not None):
            m_en = K.repeat(m_en, input_de_times)
            m_en = K.reshape(m_en, shape=(-1, 1))
            m_en = m_en - 1
            m_en = m_en * REMOVE_FACTOR
            mu = mu + m_en

        mu = K.reshape(mu, shape=(-1, input_de_times, self.input_en_times))
        alphas = K.softmax(mu)

        en_seq = K.reshape(en_seq, shape=(-1, self.input_en_times * self.input_dim_en))
        en_seq = K.repeat(en_seq, input_de_times)
        en_seq = K.reshape(en_seq, shape=(-1, input_de_times, self.input_en_times, self.input_dim_en))

        sum_en = K.sum(en_seq * K.expand_dims(alphas, -1), 2)

        output = K.concatenate([de_seq, sum_en], -1)

        if self.return_alphas:
            return [output, alphas]
        else:
            return output
Example #12
0
    def estimated(self, state, batch_size):
        # print(state.shape)

        # batch_size = state.shape[0]

        # generator_mag = K.ones((batch_size, 3))
        # ang_ref = K.zeros((batch_size, 1))
        # ref_ang = tf.Variable(tf.zeros((batch_size, 1)))
        # state = K.concatenate([generator_mag, state[:, :6], ang_ref, state[:, 6:]], axis=-1)

        # print(state.shape)
        state_restore = (state + 1) * (max_state - min_state) / 2 + min_state


        V = state_restore[:, :self.num_bus] * 10
        # [k, 9]

        A = state_restore[:, self.num_bus:]
        # [k, 9]
        # print(V.shape, A.shape)
        # P_bus = K.zeros((A.shape[0], 9))
        # [k, 9]
        # Q_bus = K.zeros((A.shape[0], 9))
        # [k, 9]
        # A_
        # print(K.permute_dimensions(K.repeat(A, 9), [0, 2, 1]).shape)
        # print(K.repeat(A, 9).shape)
        A_ = K.permute_dimensions(K.repeat(A, self.num_bus), [0, 2, 1]) - K.repeat(A, self.num_bus)
        G = K.constant(self.G, dtype=tf.float32)
        B = K.constant(self.B, dtype=tf.float32)
        cos_ = K.cos(A_ * pi / 180)
        sin_ = K.sin(A_ * pi / 180)

        term_1_P = G * cos_ + B * sin_
        term_1_Q = G * sin_ - B * cos_
        P_bus = (V * K.batch_dot(V, term_1_P, axes=[1, 2]))
        Q_bus = (V * K.batch_dot(V, term_1_Q, axes=[1, 2]))

        P_idx = [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13]
        Q_idx = [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]



        batch_estimated_measurement = K.concatenate([self.gather_cols(P_bus, P_idx), self.gather_cols(Q_bus, Q_idx)], axis=1)
        # print(batch_estimated_measurement.shape)
        # batch_estimated_measurement = K.concatenate([P_bus, Q_bus], axis=1)


        ans = (batch_estimated_measurement - min_meas) / (max_meas - min_meas) * 2 - 1
        # print(K.eval(ans))

        return ans
Example #13
0
def cnn_loss(x, x_decoded_mean):
    #N = tf.convert_to_tensor(DPParam, dtype=tf.float32)

    gamma = tf.convert_to_tensor(DPParam['LPMtx'], dtype=tf.float32)
    N = tf.convert_to_tensor(DPParam['Nvec'], dtype=tf.float32)
    m = tf.convert_to_tensor(DPParam['m'], dtype=tf.float32)
    W = tf.convert_to_tensor(DPParam['B'], dtype=tf.float32)
    v = tf.convert_to_tensor(DPParam['nu'], dtype=tf.float32)

    num_cluster = N.shape[0]
    z_mean_1_last = tf.expand_dims(z_mean, -1)  # bs, latent_dim, 1
    z_mean_1_mid = tf.expand_dims(z_mean, 1)  # bs, 1, latent_dim

    for k in range(num_cluster):
        gamma_k_rep = tf.squeeze(
            K.repeat(tf.expand_dims(gamma[:, k], -1), latent_dim))
        z_k_bar = 1 / N[k] * K.sum(tf.multiply(gamma_k_rep, z_mean),
                                   axis=0)  #(latent_dim, )
        z_k_bar_batch = tf.squeeze(
            K.repeat(tf.expand_dims(z_k_bar, 0), batch_size))
        #tf.transpose(z_k_bar_batch, perm=[1, 0])
        z_k_bar_batch_1_last = tf.expand_dims(z_k_bar_batch,
                                              -1)  # bs, latent_dim, 1
        z_k_bar_batch_1_mid = tf.expand_dims(z_k_bar_batch,
                                             1)  # bs, 1, latent_dim

        # TODO:!
        S_k = 1 / N[k] * K.sum(K.batch_dot(
            tf.multiply(tf.expand_dims(gamma_k_rep, -1),
                        (z_mean_1_last - z_k_bar_batch_1_last)),
            z_mean_1_mid - z_k_bar_batch_1_mid),
                               axis=0)  # (latent_dim, latent_dim)
        temp = tf.linalg.trace(tf.linalg.solve(W[k], S_k))
        temp2 = tf.matmul(tf.expand_dims((z_k_bar - m[k]), 0),
                          tf.linalg.inv(W[k]))
        temp3 = tf.squeeze(
            tf.matmul(temp2, tf.expand_dims((z_k_bar - m[k]), -1)))
        if k == 0:
            e = 0.5 * N[k] * (v[k] * (temp + temp3))
        else:
            e += 0.5 * N[k] * (v[k] * (temp + temp3))

    loss_ = alpha * original_dim * objectives.mean_squared_error(
        K.flatten(x), K.flatten(x_decoded_mean)) - scale * K.sum(
            (z_log_var + 1), axis=-1)
    loss_ = K.sum(loss_, axis=0) + e
    # loss = K.sum(loss_, axis = 0)
    #for i in range(5):
    #    loss_ += N

    #return loss_
    return loss_
Example #14
0
File: mnist.py Project: wrccrwx/DAC
def myDist(y_pred):
    y_pred1, y_pred2, sth = y_pred
    norm1 = K.sqrt(K.sum(y_pred1**2, axis=1))
    norm1 = K.reshape(norm1, (norm1.shape[0], 1))
    norm1 = K.reshape(K.repeat(norm1, y_pred1.shape[1]), y_pred1.shape)
    y_pred1 = y_pred1 / norm1

    norm2 = K.sqrt(K.sum(y_pred2**2, axis=1))
    norm2 = K.reshape(norm2, (norm2.shape[0], 1))
    norm2 = K.reshape(K.repeat(norm2, y_pred2.shape[1]), y_pred2.shape)
    y_pred2 = y_pred2 / norm2

    return K.switch(K.dot(y_pred1, y_pred2.T) > sth, 1, 0)
Example #15
0
 def step(self, x, states):
     h_tm1, c_tm1, y_tm1, B, U, H = states
     s = K.dot(c_tm1, self.W_h) + self.b_h
     s = K.repeat(s, self.input_length)
     energy = time_distributed_dense(s + H, self.W_a, self.b_a)
     energy = K.squeeze(energy, 2)
     alpha = K.softmax(energy)
     alpha = K.repeat(alpha, self.input_dim)
     alpha = K.permute_dimensions(alpha, (0, 2, 1))
     weighted_H = H * alpha
     v = K.sum(weighted_H, axis=1)
     y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
     return y, new_states
 def step(self, x, states):
     h_tm1, c_tm1, y_tm1, B, U, H = states
     s = K.dot(c_tm1, self.W_h) + self.b_h
     s = K.repeat(s, self.input_length)
     energy = time_distributed_dense(s + H, self.W_a, self.b_a)
     energy = K.squeeze(energy, 2)
     alpha = K.softmax(energy)
     alpha = K.repeat(alpha, self.input_dim)
     alpha = K.permute_dimensions(alpha, (0, 2, 1))
     weighted_H = H * alpha
     v = K.sum(weighted_H, axis=1)
     y, new_states = super(AttentionDecoder, self).step(v, states[:-1])
     return y, new_states
Example #17
0
    def call(self, x, mask=None):
        en_seq = x[0]
        de_seq = x[1]
        topics = x[2]
        input_de_times = K.shape(de_seq)[-2]

        # compute alphas
        att_en = K.dot(K.reshape(en_seq, (-1, self.input_dim_en)), self.w_en)
        att_en = K.reshape(att_en,
                           shape=(-1, self.input_en_times * self.units))
        att_en = K.repeat(att_en, input_de_times)
        att_en = K.reshape(att_en,
                           shape=(-1, self.input_en_times * input_de_times,
                                  self.units))

        att_de = K.dot(K.reshape(de_seq, (-1, self.input_dim_de)), self.w_de)
        att_de = K.reshape(att_de, shape=(-1, input_de_times, self.units))
        att_de = K.repeat_elements(att_de, self.input_en_times, 1)

        topics_w = K.dot(topics, K.transpose(self.wt))
        topics_w = K.repeat(topics_w, self.input_en_times * input_de_times)

        # print("Here:", att_de, att_en, topics_w)
        co_m = att_en + att_de + topics_w
        co_m = K.reshape(co_m, (-1, self.units))

        mu = K.dot(K.tanh(co_m), self.nu)

        mu = K.reshape(mu, shape=(-1, input_de_times, self.input_en_times))
        alphas = K.softmax(mu)
        p_gen = K.sigmoid(mu)

        en_seq = K.reshape(en_seq,
                           shape=(-1, self.input_en_times * self.input_dim_en))
        en_seq = K.repeat(en_seq, input_de_times)
        en_seq = K.reshape(en_seq,
                           shape=(-1, input_de_times, self.input_en_times,
                                  self.input_dim_en))

        sum_en = K.sum(en_seq * K.expand_dims(alphas, -1), 2)

        # output = K.concatenate([de_seq, sum_en], -1)
        output = de_seq + sum_en
        if self.return_alphas:
            alphas = K.reshape(alphas,
                               shape=(-1, input_de_times, self.input_en_times))
            p_gen = K.reshape(p_gen,
                              shape=(-1, input_de_times, self.input_en_times))
            return [output] + [alphas] + [p_gen]
        else:
            return output
Example #18
0
    def call(self, inputs, mask=None):
        if isinstance(inputs, list):
            memory, aspect = inputs
            mask = mask[0]
        else:
            memory = inputs

        attend_weights = []
        batch_size = K.shape(memory)[0]
        time_steps = K.shape(memory)[1]
        e = K.zeros(shape=(batch_size, self.units))
        for h in range(self.n_hop):
            # compute attention weight
            repeat_e = K.repeat(e, time_steps)
            if isinstance(inputs, list):
                repeat_asp = K.repeat(aspect, time_steps)
                inputs_concat = K.concatenate([memory, repeat_asp, repeat_e],
                                              axis=-1)
            else:
                inputs_concat = K.concatenate([memory, repeat_e], axis=-1)
            g = K.squeeze(K.dot(inputs_concat, self.al_w[h]),
                          axis=-1) + self.al_b[h]  # [batch_size, time_steps]
            a = K.exp(g)

            # apply mask after the exp. will be re-normalized next
            if mask is not None:
                a *= K.cast(mask, K.floatx())

            a /= K.cast(
                K.sum(a, axis=-1, keepdims=True) + K.epsilon(), K.floatx())
            attend_weights.append(a)

            # apply attention
            a_expand = K.expand_dims(a)  # [batch_size, time_steps, 1]
            i_AL = K.sum(
                memory * a_expand, axis=1
            )  # [batch_size, hidden], i_AL is the input of gru at time `h`

            # gru implementation
            r = K.sigmoid(K.dot(i_AL, self.gru_wr) +
                          K.dot(e, self.gru_ur))  # reset gate
            z = K.sigmoid(K.dot(i_AL, self.gru_wz) +
                          K.dot(e, self.gru_uz))  # update gate
            _e = K.tanh(K.dot(i_AL, self.gru_wx) + K.dot(r * e, self.gru_wg))
            e = (1 - z) * e + z * _e  # update e

        if self.return_attend_weight:
            return [e, K.concatenate(attend_weights, axis=0)]
        else:
            return e
Example #19
0
def iwae_loss(y_true, y_pred):
    local_mu = K.repeat(mu, k)
    local_sigma = K.repeat(sigma, k)
    log_posterior = -(n_z / 2) * log2pi - K.sum(
        K.log(1e-8 + local_sigma) +
        0.5 * K.square(z - local_mu) / K.square(1e-8 + local_sigma),
        axis=-1)
    log_prior = -(n_z / 2) * log2pi - K.sum(0.5 * K.square(z), axis=-1)
    log_bernoulli = K.sum(y_true * K.log(y_pred + 1e-8) +
                          (1 - y_true) * K.log(1 - y_pred + 1e-8),
                          axis=-1)
    log_weights = log_bernoulli + log_prior - log_posterior
    importance_weight = K.softmax(log_weights, axis=1)
    return -K.sum(importance_weight * log_weights, axis=-1)
Example #20
0
    def step(self, x, states):
        """ get the previous hidden state of the decoder from states = [z, s_p]
            alignment model:
                waStm1 = W_a \dot s_{t-1}
                uaHt = U_a \dot h_t
                tmp = tanh(waStm1 + uaHt)
                e_ij = V_a^T * tmp
                vector of length = timestep is: u_t = softmax(e_tj)
        """
        atm1 = x
        ztm1, s_tpm1 = states

        # old hidden state:
        # shape (batchsize, units)
        stm1 = (1 - ztm1) * self.stm2 + ztm1 * s_tpm1

        # shape (batchsize, timesteps, units)
        _stm = K.repeat(stm1, self.timesteps)

        # shape (batchsize, timesteps, output_dim)
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities:
        # self._uxpb has shape (batchsize, timesteps, output_dim)
        # V_a has shape (output_dim, )
        # after K.expand_dims it is (output_dim, 1)
        # therefore et has shape (batchsize, timesteps, 1)
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of shape (batchsize, timesteps, 1)

        # reset gate:
        rt = activations.sigmoid(
            K.dot(atm1, self.W_r) + K.dot(stm1, self.U_r) + self.b_r)
        # update gate:
        zt = activations.sigmoid(
            K.dot(atm1, self.W_z) + K.dot(stm1, self.U_z) + self.b_z)
        # proposal hidden state:
        s_tp = activations.tanh(
            K.dot(atm1, self.W_p) + K.dot((rt * stm1), self.U_p) + self.b_p)
        yt = activations.softmax(at)

        if self.return_probabilities:
            return at, [zt, s_tp]
        else:
            return yt, [zt, s_tp]
    def step(self, x, states):

    	# obtain elements of the previous time step.
        zt, htm = states
        if self.idx < self.timesteps:
            self.idx+=1

        # ##    ##    ##    equation 1    ##    ##    ##    ##    ## 

        # repeat the hidden state to the length of the sequence
        _htm = K.repeat(htm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxhtm = K.dot(_htm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxhtm + self._uxpb),
                   K.expand_dims(self.V_a))


        ##    ##    ##    equation 2     ##    ##    ##    ##    ##
    
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)


        ##    ##    ##    equation 3    ##    ##    ##    ##    ##    
    
        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)


        # ~~~> calculate new hidden state
        # equation 4  (zt)

        zt=K.concatenate([context,htm],axis=-1)
        zt=activations.tanh(K.dot(zt, self.W_A_combine))
        # print('int_shape: ', K.int_shape(zt))

		# a switch so that we can return the 
    	# attention for visualizations
        htm = activations.tanh(K.dot(self.x_seq[:, self.idx], self.W_s))
        if self.return_probabilities:
            return at, [zt, htm]
        else:
            return zt, [zt, htm]
 def call(self, x):
     assert isinstance(x, list)
     h1, h2 = x
     c = list()
     for i in range(self.seq_len):
         h2_i = K.repeat(h2[:, i, :], self.seq_len)
         x = K.concatenate([h1, h2_i])
         p = K.tanh(K.dot(x, self.w) + self.b1)
         p = K.softmax(K.dot(p, self.v) + self.b2)
         p = K.squeeze(p, axis=-1)
         p = K.repeat(p, self.embed_len)
         p = K.permute_dimensions(p, (0, 2, 1))
         c_i = K.sum(p * h1, axis=1, keepdims=True)
         c.append(c_i)
     return K.concatenate(c, axis=1)
Example #23
0
def vae_loss(x,
             x_decoded_mean,
             z,
             z_mean,
             z_log_var,
             u_p,
             theta_p,
             lambda_p,
             alpha=1,
             datatype='sigmoid'):
    Z = tf.transpose(K.repeat(z, n_centroid), [0, 2, 1])
    z_mean_t = tf.transpose(K.repeat(z_mean, n_centroid), [0, 2, 1])
    z_log_var_t = tf.transpose(K.repeat(z_log_var, n_centroid), [0, 2, 1])
    u_tensor3 = tf.tile(tf.expand_dims(u_p, [0]), [batch_size, 1, 1])
    # u_tensor3 = T.repeat(tf.expand_dims(u_p,[0]), batch_size, axis=0)
    # lambda_tensor3 = T.repeat(tf.expand_dims(lambda_p,[0]), batch_size, axis=0)
    lambda_tensor3 = tf.tile(tf.expand_dims(lambda_p, [0]), [batch_size, 1, 1])
    temp_theta_p = tf.expand_dims(theta_p, [0])
    temp_theta_p = tf.expand_dims(temp_theta_p, [0])
    # theta_tensor3 = temp_theta_p * T.ones((batch_size, z_dim, n_centroid))
    theta_tensor3 = tf.tile(temp_theta_p, [batch_size, z_dim, 1])

    #@TODO
    #PROBLEM HERE ? add theta z_dim times for each cluster?
    p_c_z = K.exp(K.sum((K.log(theta_tensor3) - 0.5 * K.log(2 * math.pi * lambda_tensor3) - \
                         K.square(Z - u_tensor3) / (2 * lambda_tensor3)), axis=1)) + 1e-10

    gamma = p_c_z / K.sum(p_c_z, axis=-1, keepdims=True)
    gamma_t = K.repeat(gamma, z_dim)

    if datatype == 'sigmoid':
        loss = alpha * original_dim * objectives.binary_crossentropy(x, x_decoded_mean) \
                   + K.sum(0.5 * gamma_t * (
            z_dim * K.log(math.pi * 2) + K.log(lambda_tensor3) + K.exp(z_log_var_t) / lambda_tensor3 + K.square(
                    z_mean_t - u_tensor3) / lambda_tensor3), axis=(1, 2)) \
                   - 0.5 * K.sum(z_log_var + 1, axis=-1) \
                   - K.sum(K.log(K.repeat_elements(tf.expand_dims(theta_p, [0]), batch_size, 0)) * gamma, axis=-1) \
                   + K.sum(K.log(gamma) * gamma, axis=-1)
    else:
        loss = alpha * original_dim * objectives.mean_squared_error(x, x_decoded_mean) \
               + K.sum(0.5 * gamma_t * (
            z_dim * K.log(math.pi * 2) + K.log(lambda_tensor3) + K.exp(z_log_var_t) / lambda_tensor3 + K.square(
                z_mean_t - u_tensor3) / lambda_tensor3), axis=(1, 2)) \
               - 0.5 * K.sum(z_log_var + 1, axis=-1) \
               - K.sum(K.log(K.repeat_elements(tf.expand_dims(theta_p, [0]), batch_size, 0)) * gamma, axis=-1) \
               + K.sum(K.log(gamma) * gamma, axis=-1)

    return tf.reduce_mean(loss)
    def call(self, inputs, mask=None):
        X, v = inputs
        mask_X, _ = mask

        if self.attend_mode == 'concat':
            concatenated = K.concatenate([X, K.repeat(v, X.shape[1])], axis=-1)
            e = dot_product(concatenated, self.W)
        if self.attend_mode == 'sum':
            e = dot_product(X, self.W) + dot_product(K.expand_dims(v, axis=1),
                                                     self.M)

        if self.bias:
            e += self.b
        e = K.tanh(e)
        e = dot_product(e, self.u)

        a = K.exp(e)
        if mask_X is not None:
            a *= K.cast(mask_X, K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = K.expand_dims(a)

        weighted_sum = K.sum(X * a, axis=1)

        if self.return_coefficients:
            return weighted_sum, a
        else:
            return weighted_sum
 def call(self, x, mask=None):
     #print(mask.shape)
     mask = K.cast(mask, 'float32')
     mask = K.repeat(mask, self.repeat_dim)
     #print(mask.shape)
     mask = K.permute_dimensions(mask, (0, 2, 1))
     return x * mask
 def call(self, x, mask=None):
     X = K.repeat(x, self.output_length)
     input_shape = list(self.input_spec[0].shape)
     input_shape = input_shape[:1] + [self.output_length] + input_shape[1:]
     self.input_spec = [InputSpec(shape=tuple(input_shape))]
     if self.stateful or self.state_input or len(self.state_outputs) > 0:
         initial_states = self.states[:]
     else:
         initial_states = self.get_initial_states(X)
     constants = self.get_constants(X)
     y_0 = K.permute_dimensions(X, (1, 0, 2))[0, :, :]
     initial_states += [y_0]
     last_output, outputs, states = K.rnn(self.step, X,
                                          initial_states,
                                          go_backwards=self.go_backwards,
                                          mask=mask,
                                          constants=constants,
                                          unroll=self.unroll,
                                          input_length=self.output_length)
     if self.stateful and not self.state_input:
         self.updates = []
         for i in range(2):
             self.updates.append((self.states[i], states[i]))
     self.states_to_transfer = states
     input_shape.pop(1)
     self.input_spec = [InputSpec(shape=input_shape)]
     return outputs
def time_distributed_dense(x,
                           w,
                           b=None,
                           dropout=None,
                           input_dim=None,
                           output_dim=None,
                           timesteps=None):
    # Apply y.w + b for every temporal slice y of x.
    print(x.shape)
    print(w.shape)
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    print(output_dim)
    print(timesteps)
    print(input_dim)

    if dropout:
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x *= expanded_dropout_matrix

    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Example #28
0
    def call(self, H, mask=None):
        # energy = self.activation(K.dot(x, self.W0)+self.b0)
        # energy=K.dot(energy, self.W) + self.b
        # energy = K.reshape(energy, (-1, self.input_length))
        # energy = K.softmax(energy)
        # xx = K.batch_dot(energy,x, axes=(1, 1))
        # all=K.concatenate([xx,energy])
        # return all
        #      H_t=K.permute_dimensions(H,(0,2,1))        #H is [none, n, hidden] ; H_t is [none, hidden, n]
        #      temp=self.activation(K.permute_dimensions(K.dot(self.W1,H_t),(1,0,2)))   #tanh(W1 . Ht) was [da, none, n],  transpose to [none, da, n]
        #      temp=K.permute_dimensions(K.dot(self.W2,temp),(1,0,2))          #W2 . tanh(W1 . Ht) was [r, none, n], transpose to [none, r, n]
        H1 = H[:, :, :-1]
        attention_mask = H[:, :, -1]
        #adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0
        adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0
        H_t = self.activation(K.dot(H1, self.W1))
        temp = K.permute_dimensions(K.dot(H_t, self.W2), (0, 2, 1))  # [?,r.n]
        #temp=K.square(temp)#make dis larger
        temp += K.repeat(adder, self.r)
        A = K.softmax(temp)  # A    [none, r, n]
        M = K.batch_dot(A, H1, axes=(2, 1))  # [none, r, hidden]

        if self.attention_regularizer_weight > 0.0:
            self.add_loss(self._attention_regularizer(A))

        if self.return_attention:
            return [M, A]

        # all=K.concatenate([M,A])  #[none, r, hidden+n]
        return M
def TD(x,
       w,
       b=None,
       dropout=None,
       input_dim=None,
       output_dim=None,
       timesteps=None,
       training=None):
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]
    if dropout is not None and 0. < dropout < 1.:
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Example #30
0
    def call(self, x):
        assert isinstance(x, list)
        a, b = x
        #print("input shape")
        #print(a.shape)
        #print(b.shape)
        #print("weight shape")
        temp_kernel = self.kernel
        #print(temp_kernel.shape)

        temp_kernel = K.reshape(temp_kernel,
                                (temp_kernel.shape[1], temp_kernel.shape[0]))
        #print(temp_kernel.shape)
        temp_kernel = K.repeat(temp_kernel, max_length)
        #print(temp_kernel.shape)
        ext_kernel = temp_kernel

        #multiplying each time steps of first input with weight
        res1 = Multiply()([a, ext_kernel])

        #multiplying each time steps of second input with weight
        res2 = Multiply()([b, ext_kernel])

        #print(res1.shape)
        #print(res2.shape)

        #computing cosine similarity between each time steps of first input to
        ## each time steps of second input
        out = Dot(axes=2, normalize=True)([res1, res2])
        #print(out.shape)
        return (out)
 def call(self, x, mask=None):
     print("AttentionDecoder.call")
     H = x
     x = K.permute_dimensions(H, (1, 0, 2))[-1, :, :]
     if self.stateful or self.state_input or len(self.state_outputs) > 0:
         initial_states = self.states[:]
     else:
         initial_states = self.get_initial_states(H)
     constants = self.get_constants(H) + [H]
     y_0 = x
     x = K.repeat(x, self.output_length)
     initial_states += [y_0]
     last_output, outputs, states = K.rnn(
         self.step,
         x,
         initial_states,
         go_backwards=self.go_backwards,
         mask=mask,
         constants=constants,
         unroll=self.unroll,
         input_length=self.output_length)
     if self.stateful and not self.state_input:
         self.updates = zip(self.states, states)
     self.states_to_transfer = states
     return outputs
Example #32
0
    def call(self, inputs, mask=None):
        rep_input1 = K.repeat(
            K.squeeze(inputs[1], axis=1),
            inputs[0].shape[1]) if inputs[1].shape[1] == 1 else inputs[1]
        conca_input = K.concatenate([inputs[0], rep_input1])
        e = K.dot(conca_input, self.wt_mid)
        if self.use_bias:
            e += self.b_mid
        e = K.tanh(e)
        e = dot_product(e, self.wt_out)
        if self.use_bias:
            e += self.b_out

        e = isr(e, self.alpha)
        wt = K.exp(e)

        # apply mask after the exp. will be re-normalized next
        if mask is not None and mask[0] is not None:
            mask = mask[0] if mask[1] is None else mask[0] & mask[1]
            wt *= K.cast(mask, K.floatx())
            # in some cases especially in the early stages of training the sum may be almost zero
            # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
            wt /= K.sum(wt, axis=1, keepdims=True) + EPSILON
        else:
            wt /= K.sum(wt, axis=1, keepdims=True)
            # a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = inputs[0] * K.expand_dims(wt)
        return K.sum(weighted_input, axis=1, keepdims=self.keepdims)
Example #33
0
def _loss_tensor(y_true, y_pred):
    max_val = K.max(y_pred,axis=-2) #temporal axis!
    max_val = K.repeat(max_val,K.shape(y_pred)[-2])
    print(K.eval(max_val))
    mask = K.cast(K.equal(max_val,y_pred),K.floatx())
    y_pred = mask * y_pred + (1-mask) * y_true
    return squared_hinge(y_true,y_pred)
Example #34
0
def time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x *= expanded_dropout_matrix

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Example #35
0
 def call(self, x, mask=None):
     H = x
     x = K.permute_dimensions(H, (1, 0, 2))[-1, :, :]
     if self.stateful or self.state_input or len(self.state_outputs) > 0:
         initial_states = self.states[:]
     else:
         initial_states = self.get_initial_states(H)
     constants = self.get_constants(H) + [H]
     y_0 = x
     x = K.repeat(x, self.output_length)
     initial_states += [y_0]
     last_output, outputs, states = K.rnn(self.step,
                                          x,
                                          initial_states,
                                          go_backwards=self.go_backwards,
                                          mask=mask,
                                          constants=constants,
                                          unroll=self.unroll,
                                          input_length=self.output_length)
     if self.stateful and not self.state_input:
         self.updates = []
         for i in range(2):
             self.updates.append((self.states[i], states[i]))
     self.states_to_transfer = states
     return outputs
Example #36
0
    def call(self, x, mask=None):
        y = K.dot(x, self.att_W)
        if not self.activation:
            if K.backend() == 'theano':
                weights = K.theano.tensor.tensordot(self.att_v, y, axes=[0, 2])
            elif K.backend() == 'tensorflow':
                weights = K.tensorflow.python.ops.math_ops.tensordot(
                    self.att_v, y, axes=[0, 2])
        elif self.activation == 'tanh':
            if K.backend() == 'theano':
                weights = K.theano.tensor.tensordot(self.att_v,
                                                    K.tanh(y),
                                                    axes=[0, 2])
            elif K.backend() == 'tensorflow':
                weights = tf.tensordot(self.att_v, K.tanh(y), axes=[[0], [2]])
                # weights = K.tensorflow.python.ops.math_ops.tensordot(self.att_v, K.tanh(y), axes=[0, 2])

        weights = K.softmax(weights)

        out = x * K.permute_dimensions(K.repeat(weights, x.shape[2]),
                                       [0, 2, 1])
        if self.op == 'attsum':
            # out = out.sum(axis=1)
            out = K.sum(out, axis=1)
        elif self.op == 'attmean':
            out = out.sum(axis=1) / mask.sum(axis=1, keepdims=True)
        return K.cast(out, K.floatx())
Example #37
0
def time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None, activation='linear'):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    activation = activations.get(activation)

    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(activation(x), (-1, timesteps, output_dim))
    return x
Example #38
0
def r2_keras(y_true, y_pred):
    y_true, y_pred = K.reshape(y_true, (-1, WRAP, 20)), K.reshape(
        y_pred, (-1, WRAP, 20))
    SS_res = K.sum(K.square(y_true - y_pred), axis=1)
    SS_tot = K.sum(K.square(y_true - K.repeat(K.mean(y_true, axis=1), WRAP)),
                   axis=1)
    return K.mean((1 - SS_res / (SS_tot + K.epsilon())))
Example #39
0
    def attention_call(self,
                       inputs,
                       cell_states,
                       attended,
                       attention_states,
                       attended_mask,
                       training=None):
        # only one attended sequence (verified in build)
        assert len(attended) == 1
        attended = attended[0]
        attended_mask = attended_mask[0]
        h_cell_tm1 = cell_states[0]

        # compute attention weights
        w = K.repeat(
            K.dot(h_cell_tm1, self.W_a) + self.b_UW,
            K.shape(attended)[1])
        u = K.dot(attended, self.U_a)  # TODO should be done externally of cell
        e = K.exp(K.dot(K.tanh(w + u), self.v_a) + self.b_v)

        if attended_mask is not None:
            e = e * K.cast(K.expand_dims(attended_mask, -1), K.dtype(e))

        # weighted average of attended
        a = e / K.sum(e, axis=1, keepdims=True)
        c = K.sum(a * attended, axis=1, keepdims=False)

        return c, [c]
Example #40
0
 def loss(y_true, y_pred):
     from plasma.conf import conf
     fac = MaxHingeTarget.fac
     #overall_fac = np.prod(np.array(K.shape(y_pred)[1:]).astype(np.float32))
     overall_fac = K.prod(K.cast(K.shape(y_pred)[1:],K.floatx()))
     max_val = K.max(y_pred,axis=-2) #temporal axis!
     max_val1 = K.repeat(max_val,K.shape(y_pred)[-2])
     mask = K.cast(K.equal(max_val1,y_pred),K.floatx())
     y_pred1 = mask * y_pred + (1-mask) * y_true
     weight_mask = K.mean(y_true,axis=-1)
     weight_mask = K.cast(K.greater(weight_mask,0.0),K.floatx()) #positive label!
     weight_mask = fac*weight_mask + (1 - weight_mask)
     #return weight_mask*squared_hinge(y_true,y_pred1)
     return conf['model']['loss_scale_factor']*overall_fac*weight_mask*hinge(y_true,y_pred1)
def time_distributed_dense(x, w, b=None, dropout=None,
                            input_dim=None, output_dim=None,
                            timesteps=None, training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Example #42
0
    def call(self, x, mask=None):
        input_shape = self.input_spec[0].shape
        en_seq = x
        x_input = x[:, input_shape[1]-1, :]
        x_input = K.repeat(x_input, input_shape[1])
        initial_states = self.get_initial_states(x_input)

        constants = super(PointerLSTM, self).get_constants(x_input)
        constants.append(en_seq)
        preprocessed_input = self.preprocess_input(x_input)

        last_output, outputs, states = K.rnn(self.step, preprocessed_input,
                                             initial_states,
                                             go_backwards=self.go_backwards,
                                             constants=constants,
                                             input_length=input_shape[1])

        print ('outputs')
        print (outputs)
        return outputs
Example #43
0
    def step(self, x_input, states):
        input_shape = self.input_spec[0].shape
        en_seq = states[-1]
        _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])

        # vt*tanh(W1*e+W2*d)
        dec_seq = K.repeat(h, input_shape[1])
        #dec_seq = K.repeat(h, 2)
        print ('dec_seq')
        print (dec_seq)
        Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
        Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
        U = self.vt * tanh(Eij + Dij)
        print ('U')
        print (U)
        U = K.squeeze(U, 2)
        print ('U squeezed')
        print (U)
        # make probability tensor
        pointer = softmax(U)
        return pointer, [h, c]
Example #44
0
		def step(x, states, weights):
			H = x
			h_tm1, c_tm1 = states
			W1, W2, W3, U, b1, b2, b3 = weights
			input_length = K.shape(x)[1]
			C = K.repeat(c_tm1, input_length)
			_HC = K.concatenate([H, C])
			_HC = K.reshape(_HC, (-1, input_dim + self.hidden_dim))
			energy = K.dot(_HC, W3) + b3
			energy = K.reshape(energy, (-1, input_length))
			energy = K.softmax(energy)
			x = K.batch_dot(energy, H, axes=(1, 1))
			z = K.dot(x, W1) + K.dot(h_tm1, U) + b1
			z0 = z[:, :self.hidden_dim]
			z1 = z[:, self.hidden_dim: 2 * self.hidden_dim]
			z2 = z[:, 2 * self.hidden_dim: 3 * self.hidden_dim]
			z3 = z[:, 3 * self.hidden_dim:]
			i = self.inner_activation(z0)
			f = self.inner_activation(z1)
			c = f * c_tm1 + i * self.activation(z2)
			o = self.inner_activation(z3)
			h = o * self.activation(c)
			y = self.activation(K.dot(h, W2) + b2)
			return y, [h, c]
Example #45
0
 def call(self, x, mask=None):
     H = x
     x = K.permute_dimensions(H, (1, 0, 2))[-1]
     if self.stateful or self.state_input or len(self.state_outputs) > 0:
         initial_states = self.states[:]
     else:
         initial_states = self.get_initial_states(H)
     constants = self.get_constants(H) + [H]
     y_0 = x
     x = K.repeat(x, self.output_length)
     initial_states += [y_0]
     last_output, outputs, states = K.rnn(self.step, x,
                                          initial_states,
                                          go_backwards=self.go_backwards,
                                          mask=mask,
                                          constants=constants,
                                          unroll=self.unroll,
                                          input_length=self.output_length)
     if self.stateful and not self.state_input:
         self.updates = []
         for i in range(2):
             self.updates.append((self.states[i], states[i]))
     self.states_to_transfer = states
     return outputs
Example #46
0
 def get_output(self, train=False):
     X = self.get_input(train)
     return K.repeat(X, self.n).dimshuffle(0, 2, 1)
                              (1,latent_dim), mean=0, scale=1) # Gaussian distribution (input_seq_lenth,1))
print ("vt")
print (vt)

print ("decoder_hidden")
print (decoder_hidden)
#en_seq = Reshape((-1,1,latent_dim))(encoder_outputs) #?,latent_dim
#en_seq =K.squeeze(en_seq,0)
en_seq = encoder_outputs

#en_seq = K.repeat(en_seq, max_encoder_seq_length)
print ("en_seq")
print (en_seq)

#dec_seq = Reshape((-1,1,latent_dim))(decoder_hidden)
dec_seq = K.repeat(decoder_hidden, max_encoder_seq_length)
#dec_seq = Reshape((-1,1,latent_dim))(dec_seq) 
#dec_seq = K.squeeze(dec_seq,0)
print ("dec_seq")
print (dec_seq)

blendW1 = TimeDistributed(Dense(latent_dim))(en_seq)
#blendW1 = TimeDistributed(Dense(latent_dim)(en_seq) #?,input_seq_length,latent_dim
print ('blendW1')
print (blendW1)

#blendW2 = TimeDistributed(Dense(latent_dim),ouput_dim=1)(dec_seq)
blendW2 = TimeDistributed(Dense(latent_dim))(dec_seq)
print ('blendW2')
print (blendW2)
Example #48
0
 def call(self, x, mask=None):
     mask = K.cast(mask, 'float32')
     mask = K.repeat(mask, self.repeat_dim)
     mask = K.permute_dimensions(mask, (0, 2, 1))
     return x * mask
Example #49
0
	def build_model(self, input_shape):
		#input shape in None,input_len,hidden_dimension

		input_dim = input_shape[-1]
		output_dim = self.output_dim
		input_length = input_shape[1]
		hidden_dim = self.hidden_dim

		x = Input(batch_shape=input_shape)
		h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim))
		c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim))
		
		W1 = Dense(hidden_dim * 4,
					 kernel_initializer=self.kernel_initializer,
					 kernel_regularizer=self.kernel_regularizer)
		W2 = Dense(output_dim,
					 kernel_initializer=self.kernel_initializer,
					 kernel_regularizer=self.kernel_regularizer)
		W3 = Dense(1,
					 kernel_initializer=self.kernel_initializer,
					 kernel_regularizer=self.kernel_regularizer)
		U = Dense(hidden_dim * 4,
					kernel_initializer=self.kernel_initializer,
					kernel_regularizer=self.kernel_regularizer)

		'''
			1. Lambda() returns a function
			2. It is a keras thing. It executes lambda expressions.
			**Parameters**
				>> output_shape: how do you want your output.
				>> masks...	

			lambda x: K.repeat(x, input_length)
			lambda: declaration
			x:y -> f(x) = y
			Inputlength: number of encoder unfoldings
			x = one (maybe the last one) encoder output.
		'''
		C = Lambda(lambda x: K.repeat(x, input_length), output_shape=(input_length, input_dim))(c_tm1)
		_xC = concatenate([x, C])
		_xC = Lambda(lambda x: K.reshape(x, (-1, input_dim + hidden_dim)), output_shape=(input_dim + hidden_dim,))(_xC) #essentially transpose

		''' 
			alpha is softmax over input length 
		'''
		alpha = W3(_xC)
		alpha = Lambda(lambda x: K.reshape(x, (-1, input_length)), output_shape=(input_length,))(alpha)
		alpha = Activation('softmax')(alpha)

		_x = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(1, 1)), output_shape=(input_dim,))([alpha, x])

		z = add([W1(_x), U(h_tm1)])

		z0, z1, z2, z3 = get_slices(z, 4)

		i = Activation(self.recurrent_activation)(z0)
		f = Activation(self.recurrent_activation)(z0)

		c = add([multiply([f, c_tm1]), multiply([i, Activation(self.activation)(z2)])])
		o = Activation(self.recurrent_activation)(z3)
		h = multiply([o, Activation(self.activation)(c)])
		y = Activation(self.activation)(W2(h))

		return Model([x, h_tm1, c_tm1], [y, h, c])
Example #50
0
	def build_model(self, input_shape):
	
		input_dim = input_shape[-1]
		output_dim = self.output_dim
		input_length = input_shape[1]
		hidden_dim = self.hidden_dim
		print "the input shape is ", input_shape, "hidden shape ", hidden_dim

		# print input_shape
		# print hidden_dim
		# raw_input("Verify Shapes")

		# x = K.variable(np.random.rand(1,input_shape[1],input_shape[2]))

		x = Input(batch_shape=input_shape)

		# Slicing doesn't work
		# slice_layer = Lambda(self.slice,output_shape=(1,hidden_dim))
		# x_tm1 = slice_layer(x)

		#Transposing, forget it.
		# x_tm1 = K.transpose(x_tm1)				#Does not work!
		
		# Let's try flattening inputs instead
		x_tm1 = Lambda(self.custom_flatten, output_shape=(input_shape[0], input_length*hidden_dim))(x)
		# x_tm1 = K.batch_flatten(x)


		h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim))
		c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim))
		
		# h_tm1 = K.variable(np.random.rand(1,hidden_dim))
		# c_tm1 = K.variable(np.random.rand(1,hidden_dim))

		W1 = Dense(hidden_dim * 4,
					 kernel_initializer=self.kernel_initializer,
					 kernel_regularizer=self.kernel_regularizer,
					 use_bias=False,
					 input_shape=(hidden_dim*input_length,),
					 name="W1")
		W2 = Dense(output_dim,
					 kernel_initializer=self.kernel_initializer,
					 kernel_regularizer=self.kernel_regularizer)
		W3 = Dense(1,
					 kernel_initializer=self.kernel_initializer,
					 kernel_regularizer=self.kernel_regularizer,
					 use_bias=False,
					 name="W3")
		U = Dense(hidden_dim * 4,
					kernel_initializer=self.kernel_initializer,
					kernel_regularizer=self.kernel_regularizer,
					use_bias=False,
					name="U")

		# print K.eval(x).shape
		# print K.eval(x_tm1).shape
		# print K.eval(h_tm1).shape
		# raw_input('check the dimenbasipon f0r x and h')
		# print "x_tm1"
		# print K.eval(x_tm1)
		# print K.eval(x_tm1).shape
		# raw_input("Berry Berry Berrifyxxxx")
		# print "W1 dot x_tm1"
		# print K.eval(W1(x_tm1))
		# print K.eval(W1(x_tm1)).shape
		# raw_input("Berry Berry Berrify")

		z = add([W1(x_tm1), U(h_tm1)])	

		z0, z1, z2, z3 = get_slices_custom(z, 4, 4*hidden_dim)

		i = Activation(self.recurrent_activation)(z0)
		f = Activation(self.recurrent_activation)(z1)

		temp1 = multiply([f, c_tm1])
		temp2 = multiply([i, Activation(self.activation)(z2)])

		c = add([temp1, temp2])
		# c = add([multiply([f, c_tm1]), multiply([i, Activation(self.activation)(z2)])])
		o = Activation(self.recurrent_activation)(z3)
		h = multiply([o, Activation(self.activation)(c)])

		# #Treating h as d_i (wrt Pointer Network nomenclature https://arxiv.org/pdf/1506.03134.pdf)

		H = Lambda(lambda x: K.repeat(x, input_length), output_shape=(input_length, input_dim))(h)
		_xH = concatenate([x, H])
		_xH = Lambda(lambda x: K.reshape(x, (-1, input_dim + hidden_dim)), output_shape=(input_dim + hidden_dim,))(_xH)

		# print K.eval(_xH)
		# print K.eval(_xH).shape
		# raw_input("Verify Shapes _xH")

		alpha = W3(_xH)
		alpha = Lambda(lambda x: K.reshape(x, (-1, input_length)), output_shape=(input_length,))(alpha)			#Transpose
		
		alpha = W2(alpha)
		alpha = Activation('softmax')(alpha)
		

		# softer = Lambda(self.custom_soft_max,output_shape=(input_length,))
		# alphas = softer(alpha)
		return Model([x, h_tm1, c_tm1], [alpha, h, c])