Python mat_mul 예제들, tensorflow.python.ops.math_ops.mat_mul Python 예제들

예제 #1

0

파일 보기

    def testFallbackErrorNotVisibleWhenFallbackMethodRaises(self):
        ctx = context.context()
        ctx.ensure_initialized()

        try:
            math_ops.mat_mul([[1., 1.] * 2], [[1., 1.] * 3])
        except errors.InvalidArgumentError:
            etype, value, tb = sys.exc_info()
            full_exception_text = " ".join(
                traceback.format_exception(etype, value, tb))

        self.assertNotRegex(full_exception_text, "_FallbackException")

예제 #2

0

파일 보기

 def __call__(self, hidden_output, scope=None):
     # if self.prevcontext is None:
     #     self.prevcontext = prevctx
     with tf.variable_scope(scope or type(self).__name__):
         hidden_output = tf.reshape(hidden_output, [1, self.input_size])
         e = math_ops.mat_mul(array_ops.concat(
             [hidden_output, self.prevcontext], 1),
                              self.We2,
                              transpose_b=True)
         e = math_ops.mat_mul(self._activation(e),
                              self.We1,
                              transpose_b=True)
         return e

예제 #3

0

파일 보기

def _test_fully_connected(tensor_in_sizes, filter_in_sizes, bias_in_size=None):
    """ One iteration of fully connected """

    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
        total_size_1 *= s
    for s in filter_in_sizes:
        total_size_2 *= s
    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
    assert int(total_size_1 / tensor_in_sizes[0]) == filter_in_sizes[0], \
        "input size and filter size are mismatched"

    with tf.Graph().as_default():
        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype='float32')
        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype='float32')

        # reshape N H W C into N H*W*C
        in_data_reshape = array_ops.reshape(in_data, [tensor_in_sizes[0], -1])

        out = math_ops.mat_mul(in_data_reshape, in_filter)

        # if we have bias
        if bias_in_size:
            assert bias_in_size[0] == filter_in_sizes[1], "bias and filter size are mismatched"
            bias_array = [f * 1.0 for f in range(1, bias_in_size[0] + 1)]
            in_bias = constant_op.constant(bias_array, shape=bias_in_size, dtype='float32')
            out = nn_ops.bias_add(out, in_bias)

        data_array = np.reshape(data_array, tensor_in_sizes).astype('float32')
        compare_tflite_with_tvm(data_array, 'Placeholder:0', [in_data], [out])

예제 #4

0

파일 보기

    def call(self, inputs):
        @custom_gradient
        def lr_multiplier(x):
            y = array_ops.identity(x)

            def grad(dy):
                return dy * self.lr_mul

            return y, grad

        kernel = lr_multiplier(self.coeff * self.kernel)

        rank = len(inputs.shape)
        if rank > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if not context.executing_eagerly():
                shape = inputs.shape.as_list()
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            inputs = math_ops.cast(inputs, self._compute_dtype)
            if K.is_sparse(inputs):
                outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, kernel)
            else:
                outputs = math_ops.mat_mul(inputs, kernel)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs

예제 #5

0

파일 보기

 def update_context(self, last_output, useDropout_=False):
     ctx = math_ops.mat_mul(array_ops.concat(
         [last_output, self.prevcontext], 1),
                            self._kernel,
                            transpose_b=True)
     self.context = tf.nn.relu(ctx)
     if useDropout_:
         self.context = tf.nn.dropout(self.context, rate=0.5)
     return self.context

예제 #6

0

파일 보기

파일: spectral_normalization_core.py 프로젝트: johndpope/StyleGAN

    def call(self, inputs, training=None):
        if self.lr_mul == 1.0:
            W = self.coeff * self.kernel
        else:
            @custom_gradient
            def lr_multiplier(x):
                y = array_ops.identity(x)
                def grad(dy):
                    return dy * self.lr_mul
                return y, grad
            W = lr_multiplier(self.coeff * self.kernel)

        training = self._get_training_value(training)

        # Update singular vector by power iteration
        W_T = array_ops.transpose(W)
        u = array_ops.identity(self.u)
        for i in range(self.power_iter):
            v = nn_impl.l2_normalize(math_ops.matmul(u, W))  # 1 x filters
            u = nn_impl.l2_normalize(math_ops.matmul(v, W_T))
        # Spectral Normalization
        sigma_W = math_ops.matmul(math_ops.matmul(u, W), array_ops.transpose(v))
        # Backprop doesn't need in power iteration
        sigma_W = array_ops.stop_gradient(sigma_W)
        W_bar = W / array_ops.squeeze(sigma_W)

        # Assign new singular vector
        training_value = tf_utils.constant_value(training)
        if training_value is not False:
            def u_update():
                def true_branch():
                    return self._assign_singular_vector(self.u, u)
                def false_branch():
                    return self.u
                return tf_utils.smart_cond(training, true_branch, false_branch)
            self.add_update(u_update)

        # normal Dense using W_bar
        inputs = ops.convert_to_tensor(inputs)
        rank = common_shapes.rank(inputs)
        if rank > 2:
            # Broadcasting is required for the inputs.
            outputs = standard_ops.tensordot(inputs, W_bar, [[rank - 1], [0]])
            # Reshape the output back to the original ndim of the input.
            if not context.executing_eagerly():
                shape = inputs.shape.as_list()
                output_shape = shape[:-1] + [self.units]
                outputs.set_shape(output_shape)
        else:
            inputs = math_ops.cast(inputs, self._compute_dtype)
            outputs = math_ops.mat_mul(inputs, W_bar)
        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs

예제 #7

0

파일 보기

파일: network.py 프로젝트: zy4kamu/collaborator

 def build(self, inputs_shape):
     if inputs_shape[1].value is None:
         raise ValueError(
             "Expected inputs.shape[-1] to be known, saw shape: %s" %
             inputs_shape)
     input_depth = inputs_shape[1].value
     h_depth = self._num_units
     self._left_matrix = self.add_variable(
         "left_matrix", shape=[input_depth + h_depth, 64])
     self._right_matrix = self.add_variable("right_matrix",
                                            shape=[64, 4 * self._num_units])
     self._kernel = math_ops.mat_mul(self._left_matrix, self._right_matrix)
     self._bias = self.add_variable(
         "bias",
         shape=[4 * self._num_units],
         initializer=init_ops.zeros_initializer(dtype=self.dtype))

예제 #8

0

파일 보기

파일: base_lib.py 프로젝트: Leg-end/tensorchainer

def dot(x, y):
    x_dim = ndim(x)
    y_dim = ndim(y)
    if x_dim is not None and (x_dim > 2 or y_dim > 2):
        x_shape = get_shape(x)
        y_shape = get_shape(y)
        y_perm = list(range(y_dim))
        y_perm = [y_perm.pop(-2)] + y_perm
        xt = array_ops.reshape(x, [-1, x_shape[-1]])
        yt = array_ops.reshape(array_ops.transpose(y, perm=y_perm),
                               [y_shape[-2], -1])
        return array_ops.reshape(math_ops.matmul(xt, yt),
                                 x_shape[:-1] + y_shape[:-2] + y_shape[-1:])
    if is_sparse(x):
        outputs = sparse_ops.sparse_tensor_dense_matmul(x, y)
    else:
        outputs = math_ops.mat_mul(x, y)
    return outputs

예제 #9

0

파일 보기

파일: test_forward.py 프로젝트: bddppq/tvm

def _test_fully_connected(tensor_in_sizes, filter_in_sizes, bias_in_size=None):
    """ One iteration of fully connected """

    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
        total_size_1 *= s
    for s in filter_in_sizes:
        total_size_2 *= s
    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
    assert int(total_size_1 / tensor_in_sizes[0]) == filter_in_sizes[0], \
        "input size and filter size are mismatched"

    with tf.Graph().as_default():
        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype='float32')
        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype='float32')

        # reshape N H W C into N H*W*C
        in_data_reshape = array_ops.reshape(in_data, [tensor_in_sizes[0], -1])

        out = math_ops.mat_mul(in_data_reshape, in_filter)

        # if we have bias
        if bias_in_size:
            assert bias_in_size[0] == filter_in_sizes[1], "bias and filter size are mismatched"
            bias_array = [f * 1.0 for f in range(1, bias_in_size[0] + 1)]
            in_bias = constant_op.constant(bias_array, shape=bias_in_size, dtype='float32')
            out = nn_ops.bias_add(out, in_bias)

        tflite_data_array = np.reshape(data_array, tensor_in_sizes).astype('float32')
        tvm_data_array = np.transpose(tflite_data_array, axes=(0, 3, 1, 2))
        compare_tflite_with_tvm(tflite_data_array, tvm_data_array,
                                'Placeholder:0', [in_data], [out])

예제 #10

0

파일 보기

    def call(self, inputs, state):
        # User gating

        # batch_size x num_units
        item_embedding = inputs[:, :self._num_units]
        user_embedding = inputs[:, self._num_units:2 * self._num_units]
        time_embedding = inputs[:, 2 * self._num_units:]

        # batch_size x num_units
        user_gate_h = math_ops.matmul(state,
                                      self._user_gate_h_weight,
                                      name="u_gate_h_matmul")
        user_gate_i = math_ops.matmul(item_embedding,
                                      self._user_gate_i_weight,
                                      name="u_gate_i_matmul")
        user_gate_u = math_ops.matmul(user_embedding,
                                      self._user_gate_u_weight,
                                      name="u_gate_u_matmul")

        # batch_size x num_units
        user_gate_h = nn_ops.bias_add(user_gate_h,
                                      self._user_gate_h_bias,
                                      name="u_gate_h_bias_add")
        user_gate_i = nn_ops.bias_add(user_gate_i,
                                      self._user_gate_i_bias,
                                      name="u_gate_i_bias_add")
        user_gate_u = nn_ops.bias_add(user_gate_u,
                                      self._user_gate_u_bias,
                                      name="u_gate_u_bias_add")

        to_sum_eq1 = [user_gate_h, user_gate_i, user_gate_u]

        if self._modifying_eq1:
            user_gate_t = math_ops.matmul(time_embedding,
                                          self._user_gate_t_weight,
                                          name="u_gate_t_matmul")
            user_gate_t = nn_ops.bias_add(user_gate_t,
                                          self._user_gate_t_bias,
                                          name="u_gate_t_bias_add")
            to_sum_eq1.append(user_gate_t)

        # batch_size x num_units
        xi = math_ops.sigmoid(math_ops.add_n(to_sum_eq1), name="xi")

        # batch_size x num_units
        one_minus_xi = math_ops.subtract(1., xi, name="one_minus_xi")

        # Hidden state gating
        gated_i = math_ops.multiply(one_minus_xi,
                                    item_embedding,
                                    name="gated_i")
        gated_u = math_ops.multiply(xi, user_embedding, name="gated_u")

        # batch_size x 3*num_units
        to_concat_eq2 = [state, gated_i, gated_u]

        if self._modifying_eq2:
            to_concat_eq2.append(time_embedding)

        concat = tf.concat(to_concat_eq2, 1, name="concat_for_eq_two")

        # batch_size x 2*num_units
        u_r = math_ops.mat_mul(concat, self._h_gate_weight, name="matmul_u_r")
        u_r = math_ops.sigmoid(nn_ops.bias_add(u_r, self._h_gate_bias),
                               name="sigmoid_u_r")

        # batch_size x num_units
        u, r = array_ops.split(u_r,
                               num_or_size_splits=2,
                               axis=1,
                               name="split_u_r")
        one_minus_u = math_ops.subtract(1., u, name="one_minus_u")

        # Update vector
        # batch_size x num_units
        gated_h = math_ops.multiply(r, state, name="gated_h")

        update_vector_h = math_ops.mat_mul(gated_h,
                                           self._update_vector_h_weight,
                                           name="update_vector_h_matmul")
        update_vector_i = math_ops.mat_mul(gated_i,
                                           self._update_vector_i_weight,
                                           name="update_vector_i_matmul")
        update_vector_u = math_ops.mat_mul(gated_u,
                                           self._update_vector_u_weight,
                                           name="update_vector_u_matmul")

        update_vector_h = nn_ops.bias_add(update_vector_h,
                                          self._update_vector_h_bias,
                                          name="update_vector_h_bias_add")
        update_vector_i = nn_ops.bias_add(update_vector_i,
                                          self._update_vector_i_bias,
                                          name="update_vector_i_bias_add")
        update_vector_u = nn_ops.bias_add(update_vector_u,
                                          self._update_vector_u_bias,
                                          name="update_vector_u_bias_add")

        to_sum_eq3 = [update_vector_h, update_vector_i, update_vector_u]

        if self._modifying_eq3:
            update_vector_t = math_ops.mat_mul(time_embedding,
                                               self._update_vector_t_weight,
                                               name="update_vector_t_matmul")
            update_vector_t = nn_ops.bias_add(update_vector_t,
                                              self._update_vector_t_bias,
                                              name="update_vector_t_bias_add")
            to_sum_eq3.append(update_vector_t)

        k = math_ops.tanh(math_ops.add_n(to_sum_eq3), name="k")

        # Update hidden state
        new_h = math_ops.add(math_ops.multiply(one_minus_u, state),
                             math_ops.multiply(u, k),
                             name="new_h")

        return new_h, new_h

예제 #11

0

파일 보기

 def loss(x):
     y = array_ops.reshape(math_ops.mat_mul(x, kernel),
                           []) - array_ops.identity(1.)
     return y * y

예제 #12

0

파일 보기

def stlstm_loop(lstm_size,
                input_data,
                nb_classes,
                usePrevGCA=False,
                previousGCA=None,
                iters=2,
                do_norm=False,
                useDropout=False):
    """https://github.com/philipperemy/tensorflow-multi-dimensional-lstm/blob/master/md_lstm.py
    Implements multi dimension LSTM
    @param lstm_size: the hidden units
    @param input_data: the data to process of shape [batch,frames,joints,channels]
    @param scope_n : the scope
    returns (y,states) - y=[batch,frames,joints,lstm_size[1]] the output of the lstm
    """

    with tf.variable_scope("ST-LSTM", reuse=tf.AUTO_REUSE):
        # Results list
        results = []
        # Create ST-LTSM cells
        cell = STLSTMCell(lstm_size[0],
                          input_shape=input_data.get_shape(),
                          initializer=tf.truncated_normal_initializer,
                          name="layer1",
                          do_norm=do_norm)

        cell2 = STLSTMCell(lstm_size[1],
                           input_shape=tf.TensorShape([lstm_size[0]]),
                           initializer=tf.truncated_normal_initializer,
                           name="layer2",
                           do_norm=do_norm)
        # Create the GCA cells (one per iteration)
        gca_cells = [GCACell(lstm_size, ite) for ite in range(1, iters + 1)]

        # Get the shape of the input (batch_size, x, y, channels)
        # shape = input_data.get_shape().as_list()
        shape = tf.shape(input_data)
        batch_size = shape[0]
        T_dim = shape[1]
        S_dim = shape[2]
        channels = shape[3]
        # Get the number of features (total number of input values per step)
        # features = S_dim * channels

        # The batch size is inferred from the tensor size
        x = tf.reshape(input_data, [batch_size, T_dim, S_dim, channels])

        # Reorder inputs to (t, s, batch_size, features) - t=T_dim, s=S_dim
        x = tf.transpose(x, [1, 2, 0, 3])
        # Reshape to a one dimensional tensor of (t*s*batch_size , features)
        x = tf.reshape(x, [-1, batch_size, channels])
        # Split tensor into t*s tensors of size (batch_size , features)
        # x = tf.split(axis=0, num_or_size_splits=T_dim*S_dim, value=x)

        # Create an input tensor array (literally an array of tensors) to use inside the loop
        inputs_ta = tf.TensorArray(dtype=tf.float32,
                                   size=T_dim * S_dim,
                                   name='input_array',
                                   dynamic_size=True,
                                   infer_shape=False)
        inputs_ta = inputs_ta.unstack(x)

        # Create a TensorArray for the order of the joints
        jointsorder_ta = tf.TensorArray(tf.int32,
                                        OUT_DIM1,
                                        clear_after_read=False)
        jointsorder_ta = jointsorder_ta.unstack(tf.constant(JOINTS_ORDER))

        # Function to get the previous joints id (cs_prev,hs_prev)
        def get_prevS(t_, w_=1):
            # return S_dim + tf.mod(t_, S_dim) - tf.constant(w_)
            # return t_ - tf.constant(w_)
            return jointsorder_ta.read(tf.mod(t_, OUT_DIM1) - 1)

        # Function to get the previous time id (ct_prev,ht_prev)
        def get_prevT(t_, w_=S_dim):
            # return tf.mod(t_, w_)  # - tf.constant(w_)
            return t_ - w_

        def init_context(output_layer1):
            return tf.reduce_mean(output_layer1.stack(), axis=0)

        def process_information(id_, e_ta_):
            gca_ = gca_cells[it - 1]
            e_ta_ = e_ta_.write(id_, gca_(outputs_ta.read(id_)))
            return id_ + 1, e_ta_

        # Controls the initial index
        zero = tf.constant(0)
        e_sum = tf.constant(0)

        # Body of the while loop operation that applies the MD LSTM
        def body1(id_, outputs_ta_, states_ta_):
            # If the current position is less or equal than the width, we are in the first row
            # so we read the zero state we added before.
            # If not, get the sample located at a width distance.
            prevstate_T = tf.cond(
                tf.less_equal(id_, OUT_DIM1),
                lambda: states_ta_.read(T_dim * OUT_DIM1
                                        ),  # first row = zero state
                lambda: states_ta_.read(get_prevT(id_, OUT_DIM1))
            )  # other rows = previous time id (t-1)

            # If it is the first step we read the zero state if not we read the inmediate last
            prevstate_S = tf.cond(
                tf.less(zero, tf.mod(id_, OUT_DIM1)),
                lambda: states_ta_.
                read(get_prevS(id_)
                     ),  # get previous joint state id (j-1) from JOINT ORDER
                lambda: states_ta_.read(T_dim * OUT_DIM1
                                        ))  # first joint - get zero state

            # We build the input state in both dimensions
            current_state = prevstate_S[0], prevstate_T[0], prevstate_S[
                1], prevstate_T[1]
            # Now we calculate the hidden state and the new cell state
            out, state = cell(inputs_ta.read(id_), current_state)
            # We write the output to the output tensor array
            outputs_ta_ = outputs_ta_.write(id_, out)
            # And save the output state to the state tensor array
            states_ta_ = states_ta_.write(id_, state)

            # Return outputs and incremented time step
            return id_ + 1, outputs_ta_, states_ta_  # , outputs_ta2_, states_ta2_

        # Body of the while loop operation that applies the MD LSTM
        def body2(id_, outputs_ta2_, states_ta2_, e_ta_):
            # Informativeness
            r = e_ta_.read(id_) / e_sum
            prevstate_T2 = tf.cond(
                tf.less_equal(id_, OUT_DIM1),
                lambda: states_ta2_.read(T_dim * OUT_DIM1
                                         ),  # first row = zero state
                lambda: states_ta2_.read(get_prevT(id_, OUT_DIM1))
            )  # other rows = previous time id (t-1)

            # If it is the first step we read the zero state if not we read the inmediate last
            prevstate_S2 = tf.cond(
                tf.less(zero, tf.mod(id_, OUT_DIM1)),
                lambda: states_ta2_.read(get_prevS(
                    id_)),  # get previous joint state id (j-1)
                lambda: states_ta2_.read(T_dim * OUT_DIM1)
            )  # first joint - get zero state !
            # Process cureent state and then the new state
            current_state2 = prevstate_S2[0], prevstate_T2[0], prevstate_S2[
                1], prevstate_T2[1]
            out2, state2 = cell2(outputs_ta.read(id_), current_state2, r)
            outputs_ta2_ = outputs_ta2_.write(id_, out2)
            states_ta2_ = states_ta2_.write(id_, state2)

            # Return outputs and incremented time step
            return id_ + 1, outputs_ta2_, states_ta2_, e_ta_

        # Loop output condition. The index, given by the time, should be less than the
        # total number of steps defined within the image
        def condition1(id_, outputs_ta_,
                       states_ta_):  # , outputs_ta2_, states_ta2_
            return tf.less(id_, T_dim * S_dim)  # T_dim * S_dim

        def condition(id_, e_ta_):  # , outputs_ta2_, states_ta2_
            return tf.less(id_, T_dim * S_dim)

        def condition2(id_, outputs_ta2_, states_ta2_,
                       e_ta_):  # , outputs_ta2_, states_ta2_
            return tf.less(id_, T_dim * S_dim)

        # Init ST-LSTM1 states and output arrays
        states_ta = tf.TensorArray(dtype=tf.float32,
                                   size=T_dim * OUT_DIM1 + 1,
                                   name='state_array_1',
                                   clear_after_read=False)
        outputs_ta = tf.TensorArray(dtype=tf.float32,
                                    size=T_dim * OUT_DIM1,
                                    name='output_array_1',
                                    clear_after_read=False)
        # initial cell hidden states: last position of the array = LSTMStateTuple filled with zeros
        states_ta = states_ta.write(
            T_dim * OUT_DIM1,
            LSTMStateTuple(tf.zeros([batch_size, lstm_size[0]], tf.float32),
                           tf.zeros([batch_size, lstm_size[0]], tf.float32)))

        # Loop 1: First ST-LSTM layer
        index = tf.constant(0)
        _, outputs_ta, states_ta = tf.while_loop(
            condition1,
            body1, [index, outputs_ta, states_ta],
            parallel_iterations=1)
        for it in range(1, iters + 1):
            states_ta2 = tf.TensorArray(dtype=tf.float32,
                                        size=T_dim * OUT_DIM1 + 1,
                                        name='state_array2',
                                        clear_after_read=False)
            outputs_ta2 = tf.TensorArray(dtype=tf.float32,
                                         size=T_dim * OUT_DIM1,
                                         name='output_array2')

            # initial cell hidden states: last position of the array = LSTMStateTuple filled with zeros
            states_ta2 = states_ta2.write(
                T_dim * OUT_DIM1,
                LSTMStateTuple(
                    tf.zeros([batch_size, lstm_size[1]], tf.float32),
                    tf.zeros([batch_size, lstm_size[1]], tf.float32)))

            # Informativeness tensors
            # e_ta = tf.TensorArray(tf.float32, T_dim * OUT_DIM1, name='e_it{}'.format(it), clear_after_read=False)
            e_ta = tf.TensorArray(tf.float32,
                                  T_dim * OUT_DIM1,
                                  name='e_array',
                                  clear_after_read=False)

            # # Loop 1: First ST-LSTM layer
            # index = tf.constant(0)
            # _, outputs_ta, states_ta = tf.while_loop(condition1, body1, [index, outputs_ta, states_ta],
            #                                          parallel_iterations=1)

            # Initialize context 0
            if it == 1:
                if usePrevGCA:
                    initial_context = tf.cond(
                        tf.less(tf.constant(0, dtype=tf.int64),
                                tf.count_nonzero(previousGCA)),
                        lambda: previousGCA, lambda: init_context(outputs_ta))
                    gca_cells[0].set_prevcontext(initial_context)
                else:
                    gca_cells[0].set_prevcontext(init_context(outputs_ta))

            # Process e
            index = tf.constant(0)
            _, e_ta = tf.while_loop(condition,
                                    process_information, [index, e_ta],
                                    parallel_iterations=1)
            e_sum = tf.reduce_sum(e_ta.stack(), axis=0)

            # Loop 2: Second ST-LSTM layer
            index = tf.constant(0)
            _, outputs_ta2, states_ta2, _ = tf.while_loop(
                condition2,
                body2, [index, outputs_ta2, states_ta2, e_ta],
                parallel_iterations=1)

            # Update context
            ctx = gca_cells[it - 1].update_context(
                outputs_ta2.read(S_dim * T_dim - 1), useDropout)
            # it += 1
            if it < iters:
                gca_cells[it].prevcontext = ctx

            # Compute Softmax from context
            Wc = tf.get_variable("Wc", [nb_classes, lstm_size[0]],
                                 tf.float32,
                                 tf.truncated_normal_initializer(),
                                 trainable=True)
            y = math_ops.mat_mul(Wc,
                                 gca_cells[it - 1].context,
                                 transpose_b=True)
            y = tf.nn.softmax(tf.transpose(y))
            results.append(y)

        # Extract the output tensors from the processesed tensor array
        # outputs = outputs_ta2.stack()
        # states = states_ta2.stack()

        # Reshape outputs to match the shape of the input
        # y = tf.reshape(outputs, [T_dim, S_dim, batch_size, lstm_size[0]])   # For outputs_ta
        # states = tf.reshape(states, [T_dim,S_dim,batch_size,2,lstm_size[0]])
        # y = tf.reshape(outputs, [T_dim, S_dim, batch_size, lstm_size[1]])

        # Reorder te dimensions to match the input
        # y = tf.transpose(y, [2, 0, 1, 3])

        # Global Context
        gca = gca_cells[-1].context

        # Return the output and the inner states
        return results, gca