Esempio n. 1
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs),
                                                        self.dropout,
                                                        training=training,
                                                        count=2)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[1]),
                self.recurrent_dropout,
                training=training,
                count=2)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if 0 < self.dropout < 1.:
            inputs_f = inputs * dp_mask[0]
            inputs_c = inputs * dp_mask[1]
        else:
            inputs_f = inputs
            inputs_c = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_f = h_tm1 * rec_dp_mask[0]
            h_tm1_c = h_tm1 * rec_dp_mask[1]
        else:
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1

        x_f = self.input_conv(inputs_f,
                              self.kernel_f,
                              self.bias_f,
                              padding=self.padding)
        x_c = self.input_conv(inputs_c,
                              self.kernel_c,
                              self.bias_c,
                              padding=self.padding)
        h_f = self.recurrent_conv(h_tm1_f, self.recurrent_kernel_f)
        h_c = self.recurrent_conv(h_tm1_c, self.recurrent_kernel_c)

        f = self.recurrent_activation(x_f + h_f)
        cf = self.recurrent_activation(x_f + h_f - self.beta)
        ci = self.activation(x_c + h_c)

        c = f * c_tm1 + (1. - cf) * ci

        h = c
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return h, [h, c]
Esempio n. 2
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training,
                                                        count=1)
        if (0 < self.recurrent_dropout < 1 and self._recurrent_masks is None):
            _recurrent_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs, self.units),
                                                     self.recurrent_dropout,
                                                     training=training,
                                                     count=1)
            self._recurrent_masks = _recurrent_mask

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_masks = self._recurrent_masks

        h_tm1 = states[0]  # previous state

        if 0. < self.dropout < 1.:
            inputs *= dp_mask[0]

        if 0. < self.recurrent_dropout < 1.:
            h_tm1 *= rec_dp_masks[0]

        h = K.dot(inputs, self.kernel)
        h = h + (h_tm1 * self.recurrent_kernel)

        if self.use_bias:
            h = K.bias_add(h, self.bias)

        h = self.activation(h)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h]
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=1)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=1)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state

        if 0 < self.dropout < 1.:
            inputs = inputs * dp_mask[0]

        if 0 < self.recurrent_dropout < 1.:
            h_tm1 = h_tm1 * rec_dp_mask[0]

        u1 = self.input_conv(inputs, self.kernel, self.bias, padding=self.padding)
        u2 = self.recurrent_conv(h_tm1, self.recurrent_kernel)
        u = self.recurrent_activation(u1 + u2)

        h = (1 - u) * h_tm1 + u * inputs
        
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return h, [h]
Esempio n. 4
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training,
                                                        count=1)
        if (0 < self.recurrent_dropout < 1
                and self._nested_recurrent_masks is None):
            _nested_recurrent_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=self.depth)
            self._nested_recurrent_masks = _nested_recurrent_mask

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_masks = self._nested_recurrent_masks

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1:self.depth + 1]  # previous carry states

        if 0. < self.dropout < 1.:
            inputs *= dp_mask[0]

        h, c = self.nested_recurrence(inputs,
                                      hidden_state=h_tm1,
                                      cell_states=c_tm1,
                                      recurrent_masks=rec_dp_masks,
                                      current_depth=0)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, c
Esempio n. 5
0
    def call(self, inputs, states, training=None):
        samples, inFeatures = states[0].shape
        h_tm1 = states[0]  # previous state
        time_step = states[1]
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training)

        dp_mask = self._dropout_mask
        rec_dp_mask = self._recurrent_dropout_mask

        if dp_mask is not None:
            inputs *= dp_mask

        if rec_dp_mask is not None:
            h_tm1 *= rec_dp_mask

        if self.split_method:
            # Update State, module-by-module
            h_mod = []
            unitsPerMod = self.units // self.clock_numPeriods

            def if_true():
                hModule = K.dot(h_tm1[:, s:],
                                self.rec_kernel_c_mod[i]) + K.dot(
                                    inputs, self.kernel_c_mod[i])
                if self.use_bias:
                    hModule = K.bias_add(hModule, self.bias_mod[i])
                if self.recurrent_activation is not None:
                    hModule = self.recurrent_activation(hModule)
                return hModule

            def if_false():
                return hModule

            for i, period in enumerate(self.clock_periods):
                s = i * unitsPerMod
                e = (i + 1) * unitsPerMod
                hModule = h_tm1[:, s:e]
                h_mod.append(
                    tf.cond(K.equal(K.tf.mod(time_step[0][0], period), 0),
                            if_true, if_false))
            hidden = K.concatenate(h_mod)

        else:
            # Update State, all at once, then only use certain updates
            h = K.dot(inputs, self.kernel) + K.dot(
                h_tm1, self.recurrent_kernel_c * self.cw_mask)
            if self.bias is not None:
                h = K.bias_add(h, self.bias)
            if self.recurrent_activation is not None:
                h = self.recurrent_activation(h)

            h = K.switch(K.equal(K.tf.mod(time_step, self.cw_periods), 0), h,
                         h_tm1)
            hidden = h

        # Calculate Output
        output = K.dot(hidden, self.recurrent_kernel_o)
        if self.activation is not None:
            output = self.activation(output)

        # Properly set learning phase on output tensor.
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                output._uses_learning_phase = True
        return output, [hidden, time_step + 1]
Esempio n. 6
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_i = inputs * dp_mask[0]
                inputs_f = inputs * dp_mask[1]
                inputs_c = inputs * dp_mask[2]
                inputs_o = inputs * dp_mask[3]
            else:
                inputs_i = inputs
                inputs_f = inputs
                inputs_c = inputs
                inputs_o = inputs
            x_i = K.dot(inputs_i, self.kernel_i)
            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)
            x_o = K.dot(inputs_o, self.kernel_o)
            if self.use_bias:
                x_i = K.bias_add(x_i, self.bias_i)
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)
                x_o = K.bias_add(x_o, self.bias_o)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_i = h_tm1 * rec_dp_mask[0]
                h_tm1_f = h_tm1 * rec_dp_mask[1]
                h_tm1_c = h_tm1 * rec_dp_mask[2]
                h_tm1_o = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_i = h_tm1
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1
                h_tm1_o = h_tm1
            i = self.recurrent_activation(x_i + K.dot(h_tm1_i,
                                                      self.recurrent_kernel_i))
            f = self.recurrent_activation(x_f + K.dot(h_tm1_f,
                                                      self.recurrent_kernel_f))
            c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c,
                                                            self.recurrent_kernel_c))
            o = self.recurrent_activation(x_o + K.dot(h_tm1_o,
                                                      self.recurrent_kernel_o))
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            z += K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                z = K.bias_add(z, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units: 2 * self.units]
            z2 = z[:, 2 * self.units: 3 * self.units]
            z3 = z[:, 3 * self.units:]

            i = self.recurrent_activation(z0)
            f = self.recurrent_activation(z1)
            c = f * c_tm1 + i * self.activation(z2)
            o = self.recurrent_activation(z3)

        h = o * self.activation(c)

        # Adding projeciton
        h = K.dot(h, self.projection)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Esempio n. 7
0
    def call(self, inputs, states, time, constants=None, training=None, **kwargs):
        old_vertices, neighbors, mapping, reverse_mapping = constants
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state
        h_tm2 = kwargs['previous_state'][0]
        c_tm2 = kwargs['previous_state'][1]

        previous_position = reverse_mapping[:, time]
        c = tf.convert_to_tensor(numpy.arange(K.int_shape(previous_position)[0]), dtype=tf.int32)
        previous_position_4_gather = K.tf.stack([c, previous_position], axis=-1)

        ng_rows = K.tf.gather_nd(neighbors, previous_position_4_gather)

        def sum_rows(input_ng):
            ng_row, batch_map = input_ng
            ng_num = K.sum(ng_row, axis=-1)
            batch_ng = K.tf.where(K.equal(ng_row, 1))[:, 0]
            current_positions = K.gather(batch_map, batch_ng)

            def sum_unknown(input_time):
                return tf.cond(input_time < time, lambda: h_tm1, lambda: h_tm2)

            tmp_states = K.map_fn(sum_unknown, current_positions, dtype=tf.float32)
            tmp_states = K.tf.div_no_nan(K.sum(tmp_states, axis=[0, 1]), ng_num)
            return tmp_states

        ngs = K.map_fn(sum_rows, (ng_rows, mapping), dtype=tf.float32)

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_i = inputs * dp_mask[0]
                inputs_f = inputs * dp_mask[1]
                inputs_c = inputs * dp_mask[2]
                inputs_o = inputs * dp_mask[3]
            else:
                inputs_i = inputs
                inputs_f = inputs
                inputs_c = inputs
                inputs_o = inputs

            x_i = K.dot(inputs_i, self.W_i)
            x_f = K.dot(inputs_f, self.W_f)
            x_f_avg = K.dot(inputs_f, self.W_f)
            x_c = K.dot(inputs_c, self.W_c)
            x_o = K.dot(inputs_o, self.W_o)

            if self.use_bias:
                x_i = K.bias_add(x_i, self.bias_i)
                x_f = K.bias_add(x_f, self.bias_f)
                x_f_avg = K.bias_add(x_f_avg, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)
                x_o = K.bias_add(x_o, self.bias_o)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_i = h_tm1 * rec_dp_mask[0]
                h_tm1_f = h_tm1 * rec_dp_mask[1]
                h_tm1_c = h_tm1 * rec_dp_mask[2]
                h_tm1_o = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_i = h_tm1
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1
                h_tm1_o = h_tm1
            i = x_i + K.dot(h_tm1_i, self.U_i) + K.dot(ngs, self.Un_i)
            f_avg = x_f_avg + K.dot(h_tm1, self.Un_f)
            f = x_f + K.dot(h_tm1_f, self.U_f)
            c = x_c + K.dot(h_tm1_c, self.U_c) + K.dot(ngs, self.Un_c)
            o = x_o + K.dot(h_tm1_o, self.U_o) + K.dot(ngs, self.Un_o)

            i = self.recurrent_activation(i)
            f_avg = self.recurrent_activation(f_avg)
            f = self.recurrent_activation(f)
            o = self.recurrent_activation(o)
            c = self.activation(c)
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.W)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            if self.use_bias:
                z = K.bias_add(z, self.bias)

            i = z[:, :self.units]
            f_avg = z[:, self.units: 2 * self.units]
            f = z[:, self.units: 2 * self.units]
            c = z[:, 2 * self.units: 3 * self.units]
            o = z[:, 3 * self.units:]

            i += K.dot(h_tm1, self.U_i) + K.dot(ngs, self.Un_i)
            f_avg += K.dot(h_tm1, self.Un_f)
            f += K.dot(h_tm1, self.U_f)
            o += K.dot(h_tm1, self.U_o) + K.dot(ngs, self.Un_o)
            c += K.dot(h_tm1, self.U_c) + K.dot(ngs, self.Un_c)

            i = self.recurrent_activation(i)
            f_avg = self.recurrent_activation(f_avg)
            f = self.recurrent_activation(f)
            o = self.recurrent_activation(o)
            c = self.activation(c)

        def sum_memories(input_ng):
            ng_row, batch_map = input_ng
            ng_num = K.sum(ng_row, axis=-1)
            batch_ng = K.tf.where(K.equal(ng_row, 1))[:, 0]
            current_positions = K.gather(batch_map, batch_ng)

            def sum_unknown_memories(input_time):
                return tf.cond(input_time < time, lambda: f_avg * c_tm1, lambda: f_avg * c_tm2)

            tmp_states = K.map_fn(sum_unknown_memories, current_positions, dtype=tf.float32)
            tmp_states = K.tf.div_no_nan(K.sum(tmp_states, axis=[0, 1]), ng_num)
            return tmp_states

        memory = K.map_fn(sum_memories, (ng_rows, mapping), dtype=tf.float32)
        memory += f * c_tm1 + i * c

        h = o * self.activation(memory)
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, memory]
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[1]),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if 0 < self.dropout < 1.:
            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]
        else:
            inputs_i = inputs
            inputs_f = inputs
            inputs_c = inputs
            inputs_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            h_tm1_i = h_tm1
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            h_tm1_o = h_tm1

        x_i = self.input_conv(inputs_i, w=self.kernel_i,
                              w_1x1=self.kernel_1x1_i, b=self.bias_i,
                              padding=self.padding)
        x_f = self.input_conv(inputs_f, w=self.kernel_f,
                              w_1x1=self.kernel_1x1_f, b=self.bias_f,
                              padding=self.padding)
        x_c = self.input_conv(inputs_c, w=self.kernel_c,
                              w_1x1=self.kernel_1x1_c, b=self.bias_c,
                              padding=self.padding)
        x_o = self.input_conv(inputs_o, w=self.kernel_o,
                              w_1x1=self.kernel_1x1_o, b=self.bias_o,
                              padding=self.padding)
        h_i = self.recurrent_conv(h_tm1_i,
                                  self.recurrent_kernel_i,
                                  self.recurrent_kernel_1x1_i)
        h_f = self.recurrent_conv(h_tm1_f,
                                  self.recurrent_kernel_f,
                                  self.recurrent_kernel_1x1_f)
        h_c = self.recurrent_conv(h_tm1_c,
                                  self.recurrent_kernel_c,
                                  self.recurrent_kernel_1x1_c)
        h_o = self.recurrent_conv(h_tm1_o,
                                  self.recurrent_kernel_o,
                                  self.recurrent_kernel_1x1_o)

        i = self.recurrent_activation(x_i + h_i)
        f = self.recurrent_activation(x_f + h_f)
        c = f * c_tm1 + i * self.activation(x_c + h_c)
        o = self.recurrent_activation(x_o + h_o)
        h = o * self.activation(c)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Esempio n. 9
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory

        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs),
                                                        self.dropout,
                                                        training=training,
                                                        count=3)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(h_tm1),
                self.recurrent_dropout,
                training=training,
                count=3)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        if self.implementation == 1:
            if 0. < self.dropout < 1.:
                inputs_z = inputs * dp_mask[0]
                inputs_r = inputs * dp_mask[1]
                inputs_h = inputs * dp_mask[2]
            else:
                inputs_z = inputs
                inputs_r = inputs
                inputs_h = inputs

            x_z = K.dot(inputs_z, self.kernel_z)
            x_r = K.dot(inputs_r, self.kernel_r)
            x_h = K.dot(inputs_h, self.kernel_h)
            if self.use_bias:
                x_z = K.bias_add(x_z, self.input_bias_z)
                x_r = K.bias_add(x_r, self.input_bias_r)
                x_h = K.bias_add(x_h, self.input_bias_h)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]
            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_h = h_tm1

            recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z)
            recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r)
            if self.reset_after and self.use_bias:
                recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z)
                recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r)

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            # reset gate applied after/before matrix multiplication
            if self.reset_after:
                recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h)
                if self.use_bias:
                    recurrent_h = K.bias_add(recurrent_h,
                                             self.recurrent_bias_h)
                recurrent_h = r * recurrent_h
            else:
                recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h)

            hh = self.activation(x_h + recurrent_h)
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]

            # inputs projected by all gate matrices at once
            matrix_x = K.dot(inputs, self.kernel)
            if self.use_bias:
                # biases: bias_z_i, bias_r_i, bias_h_i
                matrix_x = K.bias_add(matrix_x, self.input_bias)
            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units:2 * self.units]
            x_h = matrix_x[:, 2 * self.units:]

            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]

            if self.reset_after:
                # hidden state projected by all gate matrices at once
                matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
                if self.use_bias:
                    matrix_inner = K.bias_add(matrix_inner,
                                              self.recurrent_bias)
            else:
                # hidden state projected separately for update/reset and new
                matrix_inner = K.dot(h_tm1,
                                     self.recurrent_kernel[:, :2 * self.units])

            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units:2 * self.units]

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            if self.reset_after:
                recurrent_h = r * matrix_inner[:, 2 * self.units:]
            else:
                recurrent_h = K.dot(r * h_tm1,
                                    self.recurrent_kernel[:, 2 * self.units:])

            hh = self.activation(x_h + recurrent_h)

        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return h, [h]
Esempio n. 10
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, K.shape(inputs)[-1]),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=4)
        if (0 < self.zoneout_c < 1 and
                self._zoneout_mask_c is None):
            self._zoneout_mask_c = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.zoneout_c,
                training=training,
                count=1)
            
        if (0 < self.zoneout_h < 1 and
                self._zoneout_mask_h is None):
            self._zoneout_mask_h = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.zoneout_h,
                training=training,
                count=1)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_i = inputs * dp_mask[0]
                inputs_f = inputs * dp_mask[1]
                inputs_c = inputs * dp_mask[2]
                inputs_o = inputs * dp_mask[3]
            else:
                inputs_i = inputs
                inputs_f = inputs
                inputs_c = inputs
                inputs_o = inputs
            x_i = K.dot(inputs_i, self.kernel_i)
            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)
            x_o = K.dot(inputs_o, self.kernel_o)
            if self.use_bias:
                x_i = K.bias_add(x_i, self.bias_i)
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)
                x_o = K.bias_add(x_o, self.bias_o)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_i = h_tm1 * rec_dp_mask[0]
                h_tm1_f = h_tm1 * rec_dp_mask[1]
                h_tm1_c = h_tm1 * rec_dp_mask[2]
                h_tm1_o = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_i = h_tm1
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1
                h_tm1_o = h_tm1
                
            i = self.recurrent_activation(self.ln(x_i + K.dot(h_tm1_i,
                                                              self.recurrent_kernel_i)))
            f = self.recurrent_activation(self.ln(x_f + K.dot(h_tm1_f,
                                                              self.recurrent_kernel_f)))
            c = f * c_tm1 + i * self.activation(self.ln(x_c + K.dot(h_tm1_c,
                                                                    self.recurrent_kernel_c)))
            o = self.recurrent_activation(self.ln(x_o + K.dot(h_tm1_o,
                                                              self.recurrent_kernel_o)))

        h = o * self.activation(self.ln(c))
        
        if 0 < self.dropout + self.recurrent_dropout + self.zoneout_c + self.zoneout_h:
            if training is None:
                h._uses_learning_phase = True
                
        if 0 < self.zoneout_h < 1:
            h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout_h),
                                 h - h_tm1)
            h = h * (1. - self.zoneout_h) + h_tm1
            
        if 0 < self.zoneout_c < 1:
            c = K.in_train_phase(K.dropout(c - c_tm1, self.zoneout_c),
                                 c - c_tm1)
            c = c * (1. - self.zoneout_c) + c_tm1
        
        return h, [h, c]
Esempio n. 11
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training,
                                                        count=2)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=2)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_f = inputs * dp_mask[0]
                inputs_c = inputs * dp_mask[1]
            else:
                inputs_f = inputs
                inputs_c = inputs

            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)

            if self.use_bias:
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_f = h_tm1 * rec_dp_mask[0]
                h_tm1_c = h_tm1 * rec_dp_mask[1]
            else:
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1

            f = self.recurrent_activation(
                x_f + K.dot(h_tm1_f, self.recurrent_kernel_f))
            c = f * c_tm1 + (1. - f) * self.activation(
                x_c + K.dot(h_tm1_c, self.recurrent_kernel_c))
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]

            z = K.dot(inputs, self.kernel)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]

            z += K.dot(h_tm1, self.recurrent_kernel)

            if self.use_bias:
                z = K.bias_add(z, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units:2 * self.units]

            f = self.recurrent_activation(z0)
            c = f * c_tm1 + (1. - f) * self.activation(z1)

        h = c
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Esempio n. 12
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = [_generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=1)]
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = [_generate_dropout_mask(
                K.ones_like(states[1]),
                self.recurrent_dropout,
                training=training,
                count=1)]

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[0]  # previous carry state

        if 0 < self.dropout < 1.:
            #inputs_i = inputs * dp_mask[0]
            #inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[0]
            #inputs_o = inputs * dp_mask[3]
        else:
            #inputs_i = inputs
            #inputs_f = inputs
            inputs_c = inputs
            #inputs_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            #h_tm1_i = h_tm1 * rec_dp_mask[0]
            #h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[0]
            #h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            #h_tm1_i = h_tm1
            #h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            #h_tm1_o = h_tm1

        #x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i,
        #                      padding=self.padding)
        #x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f,
        #                      padding=self.padding)
        x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c,
                              padding=self.padding)
        #x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o,
        #                      padding=self.padding)
        #h_i = self.recurrent_conv(h_tm1_i,
        #                          self.recurrent_kernel_i)
        #h_f = self.recurrent_conv(h_tm1_f,
        #                          self.recurrent_kernel_f)
        h_c = self.recurrent_conv(h_tm1_c,
                                  self.recurrent_kernel_c)
        #h_o = self.recurrent_conv(h_tm1_o,
        #                          self.recurrent_kernel_o)

        #i = self.recurrent_activation(x_i + h_i)
        #f = self.recurrent_activation(x_f + h_f)
        f = self._retention_ratio
        c = f * c_tm1 + (1-f) * self.activation(x_c + h_c)
        #o = self.recurrent_activation(x_o + h_o)
        # h = o * self.activation(c)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                c._uses_learning_phase = True

        return c, [c, c]
Esempio n. 13
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state
        seriesNum = inputs.shape[2]
        dataDim = inputs.shape[3]
        channels = inputs.shape[1]
        # print('=========')
        # print(h_tm1.shape)
        # print(c_tm1.shape)
        if channels == 1:
            inputs_inner = tf.reshape(inputs, (-1, seriesNum, dataDim))
            inputs_inter = tf.reshape(inputs, (-1, seriesNum, dataDim))
            # h_tm1_inner = h_tm1
            # h_tm1_inter = h_tm1
            # c_tm1_inner = c_tm1
            # c_tm1_inter = c_tm1
            # h_tm1_inner = tf.reshape(h_tm1_inner,(-1,seriesNum,dataDim))
            # h_tm1_inter = tf.reshape(h_tm1_inter,(-1,seriesNum,dataDim))
            # c_tm1_inner = tf.reshape(c_tm1_inner,(-1,seriesNum,dataDim))
            # c_tm1_inter = tf.reshape(c_tm1_inter,(-1,seriesNum,dataDim))
        else:
            inputs_inner = inputs[:, 0:1, :, :]
            inputs_inner = tf.reshape(inputs_inner, (-1, seriesNum, dataDim))
            inputs_inter = inputs[:, 1:2, :, :]
            inputs_inter = tf.reshape(inputs_inter, (-1, seriesNum, dataDim))
            # h_tm1_inner = h_tm1[:,0:1,:,:]
            # h_tm1_inter = h_tm1[:,1:2,:,:]
            # c_tm1_inner = c_tm1[:,0:1,:,:]
            # c_tm1_inter = c_tm1[:,1:2,:,:]
            # h_tm1_inner = tf.reshape(h_tm1_inner,(-1,seriesNum,dataDim))
            # h_tm1_inter = tf.reshape(h_tm1_inter,(-1,seriesNum,dataDim))
            # c_tm1_inner = tf.reshape(c_tm1_inner,(-1,seriesNum,dataDim))
            # c_tm1_inter = tf.reshape(c_tm1_inter,(-1,seriesNum,dataDim))
        h_tm1_inner = h_tm1[:, 0:1, :, :]
        h_tm1_inter = h_tm1[:, 1:2, :, :]
        c_tm1_inner = c_tm1[:, 0:1, :, :]
        c_tm1_inter = c_tm1[:, 1:2, :, :]
        h_tm1_inner = tf.reshape(h_tm1_inner, (-1, seriesNum, self.units))
        h_tm1_inter = tf.reshape(h_tm1_inter, (-1, seriesNum, self.units))
        c_tm1_inner = tf.reshape(c_tm1_inner, (-1, seriesNum, self.units))
        c_tm1_inter = tf.reshape(c_tm1_inter, (-1, seriesNum, self.units))
        # print('dot shape')
        # print(inputs_inter.shape)
        # print(self.S_kernel.shape)
        inputs_inter = K.dotSelf(self.S_kernel, inputs_inter)
        # print(inputs_inter.shape)
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            #print('start drop')
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs_inner),
                self.dropout,
                training=training,
                count=8)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            #print('start recurrent_dropout')
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(c_tm1_inter),
                self.recurrent_dropout,
                training=training,
                count=8)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        if 0 < self.dropout < 1.:
            inner_inputs_i = inputs_inner * dp_mask[0]
            inner_inputs_f = inputs_inner * dp_mask[1]
            inner_inputs_c = inputs_inner * dp_mask[2]
            inner_inputs_o = inputs_inner * dp_mask[3]

            inter_inputs_i = inputs_inter * dp_mask[4]
            inter_inputs_f = inputs_inter * dp_mask[5]
            inter_inputs_c = inputs_inter * dp_mask[6]
            inter_inputs_o = inputs_inter * dp_mask[7]
        else:
            inner_inputs_i = inputs_inner
            inner_inputs_f = inputs_inner
            inner_inputs_c = inputs_inner
            inner_inputs_o = inputs_inner

            inter_inputs_i = inputs_inter
            inter_inputs_f = inputs_inter
            inter_inputs_c = inputs_inter
            inter_inputs_o = inputs_inter
        if 0 < self.recurrent_dropout < 1.:
            inner_h_tm1_i = h_tm1_inner * rec_dp_mask[0]
            inner_h_tm1_f = h_tm1_inner * rec_dp_mask[1]
            inner_h_tm1_c = h_tm1_inner * rec_dp_mask[2]
            inner_h_tm1_o = h_tm1_inner * rec_dp_mask[3]

            inter_h_tm1_i = h_tm1_inter * rec_dp_mask[4]
            inter_h_tm1_f = h_tm1_inter * rec_dp_mask[5]
            inter_h_tm1_c = h_tm1_inter * rec_dp_mask[6]
            inter_h_tm1_o = h_tm1_inter * rec_dp_mask[7]
        else:
            inner_h_tm1_i = h_tm1_inner
            inner_h_tm1_f = h_tm1_inner
            inner_h_tm1_c = h_tm1_inner
            inner_h_tm1_o = h_tm1_inner

            inter_h_tm1_i = h_tm1_inter
            inter_h_tm1_f = h_tm1_inter
            inter_h_tm1_c = h_tm1_inter
            inter_h_tm1_o = h_tm1_inter

        x_i_inner = K.dot(inner_inputs_i, self.inner_kernel_i)
        x_f_inner = K.dot(inner_inputs_f, self.inner_kernel_f)
        x_o_inner = K.dot(inner_inputs_o, self.inner_kernel_o)
        x_c_inner = K.dot(inner_inputs_c, self.inner_kernel_c)

        x_i_inter = K.dot(inter_inputs_i, self.inter_kernel_i)
        x_f_inter = K.dot(inter_inputs_f, self.inter_kernel_f)
        x_o_inter = K.dot(inter_inputs_o, self.inter_kernel_o)
        x_c_inter = K.dot(inter_inputs_c, self.inter_kernel_c)

        h_i_inner = K.dot(inner_h_tm1_i, self.inner_recurrent_kernel_i)
        h_f_inner = K.dot(inner_h_tm1_f, self.inner_recurrent_kernel_f)
        h_o_inner = K.dot(inner_h_tm1_o, self.inner_recurrent_kernel_o)
        h_c_inner = K.dot(inner_h_tm1_c, self.inner_recurrent_kernel_c)

        h_i_inter = K.dot(inter_h_tm1_i, self.inter_recurrent_kernel_i)
        h_f_inter = K.dot(inter_h_tm1_f, self.inter_recurrent_kernel_f)
        h_o_inter = K.dot(inter_h_tm1_o, self.inter_recurrent_kernel_o)
        h_c_inter = K.dot(inter_h_tm1_c, self.inter_recurrent_kernel_c)
        if self.use_bias:
            # x_i_inner = K.bias_add(x_i_inner, self.inner_bias_i)
            # x_f_inner = K.bias_add(x_f_inner, self.inner_bias_f)
            # x_o_inner = K.bias_add(x_o_inner, self.inner_bias_o)
            # x_c_inner = K.bias_add(x_c_inner, self.inner_bias_c)

            # x_i_inter = K.bias_add(x_i_inter, self.inter_bias_i)
            # x_f_inter = K.bias_add(x_f_inter, self.inter_bias_f)
            # x_o_inter = K.bias_add(x_o_inter, self.inter_bias_o)
            # x_c_inter = K.bias_add(x_c_inter, self.inter_bias_c)

            x_i_inner = x_i_inner + self.inner_bias_i
            x_f_inner = x_f_inner + self.inner_bias_f
            x_o_inner = x_o_inner + self.inner_bias_o
            x_c_inner = x_c_inner + self.inner_bias_c

            x_i_inter = x_i_inter + self.inter_bias_i
            x_f_inter = x_f_inter + self.inter_bias_f
            x_o_inter = x_o_inter + self.inter_bias_o
            x_c_inter = x_c_inter + self.inter_bias_c
        inner_i = self.recurrent_activation(x_i_inner + h_i_inner)
        inner_f = self.recurrent_activation(x_f_inner + h_f_inner)
        inner_o = self.recurrent_activation(x_o_inner + h_o_inner)
        inner_c = inner_f * c_tm1_inner + inner_i * self.activation(x_c_inner +
                                                                    h_c_inner)
        inner_h = inner_o * self.activation(inner_c)

        # temp = inner_o + h_o_inter
        # temp2=  inner_o + x_o_inter
        inter_i = self.recurrent_activation(x_i_inter + h_i_inter)
        inter_f = self.recurrent_activation(x_f_inter + h_f_inter)
        inter_o = self.recurrent_activation(x_o_inter + h_o_inter)
        inter_c = inter_f * c_tm1_inter + inter_i * self.activation(x_c_inter +
                                                                    h_c_inter)
        inter_h = inter_o * self.activation(inter_c)
        inner_h = tf.reshape(inner_h,
                             (-1, 1, inner_h.shape[1], inner_h.shape[2]))
        inter_h = tf.reshape(inter_h,
                             (-1, 1, inter_h.shape[1], inter_h.shape[2]))
        inner_c = tf.reshape(inner_c,
                             (-1, 1, inner_c.shape[1], inner_c.shape[2]))
        inter_c = tf.reshape(inter_c,
                             (-1, 1, inter_c.shape[1], inter_c.shape[2]))

        # print('concat')
        # print(inputs.shape)
        # print(inter_i.shape)
        # print(x_c_inter.shape)
        # print(h_c_inter.shape)
        # print(inter_o.shape)
        # print(inner_h.shape)
        # print(inter_h.shape)
        # print(inner_c.shape)
        # print(inter_c.shape)
        h = tf.concat([inner_h, inter_h], 1)
        c = tf.concat([inner_c, inter_c], 1)
        # print('hshape')
        # print(h.shape)
        # print(c.shape)
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return h, [h, c]
Esempio n. 14
0
    def call(self, inputs, states, training=None):
        """We need to reimplmenet `call` entirely rather than reusing that
        from `GRUCell` since there are lots of differences.

        Args:
            inputs: One tensor which is stacked by 3 inputs (x, m, s)
                x and m are of shape (n_batch * input_dim).
                s is of shape (n_batch, 1).
            states: states and other values from the previous step.
                (h_tm1, x_keep_tm1, s_prev_tm1)
        """
        # Get inputs and states
        input_x = inputs[:, :self.true_input_dim]  # inputs x, m, s
        input_m = inputs[:, self.true_input_dim:-1]
        input_s = inputs[:, -1:]
        # Need to add broadcast for time_stamp if using theano backend.
        if K.backend() == 'theano':
            input_s = K.pattern_broadcast(input_s, [False, True])
        h_tm1, x_keep_tm1, s_prev_tm1 = states
        # previous memory ([n_batch * self.units])
        # previous input x ([n_batch * input_dim])
        # and the subtraction term (of delta_t^d in Equation (2))
        # ([n_batch * input_dim])
        input_1m = K.cast_to_floatx(1.) - input_m
        input_d = input_s - s_prev_tm1

        # Get dropout
        if 0. < self.dropout < 1. and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(input_x),
                                                        self.dropout,
                                                        training=training,
                                                        count=3)
        if (0. < self.recurrent_dropout < 1.
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(h_tm1),
                self.recurrent_dropout,
                training=training,
                count=3)
        dp_mask = self._dropout_mask
        rec_dp_mask = self._recurrent_dropout_mask

        if self.feed_masking:
            if 0. < self.dropout < 1. and self._masking_dropout_mask is None:
                self._masking_dropout_mask = _generate_dropout_mask(
                    K.ones_like(input_m),
                    self.dropout,
                    training=training,
                    count=3)
            m_dp_mask = self._masking_dropout_mask

        # Compute decay if any
        if self.input_decay is not None:
            gamma_di = input_d * self.input_decay_kernel
            if self.use_decay_bias:
                gamma_di = K.bias_add(gamma_di, self.input_decay_bias)
            gamma_di = self.input_decay(gamma_di)
        if self.hidden_decay is not None:
            gamma_dh = K.dot(input_d, self.hidden_decay_kernel)
            if self.use_decay_bias:
                gamma_dh = K.bias_add(gamma_dh, self.hidden_decay_bias)
            gamma_dh = self.hidden_decay(gamma_dh)
        if self.feed_masking and self.masking_decay is not None:
            gamma_dm = input_d * self.masking_decay_kernel
            if self.use_decay_bias:
                gamma_dm = K.bias_add(gamma_dm, self.masking_decay_bias)
            gamma_dm = self.masking_decay(gamma_dm)

        # Get the imputed or decayed input if needed
        # and `x_keep_t` for the next time step

        if self.input_decay is not None:
            x_keep_t = K.switch(input_m, input_x, x_keep_tm1)
            x_t = K.switch(input_m, input_x, gamma_di * x_keep_t)
        elif self.x_imputation == 'forward':
            x_t = K.switch(input_m, input_x, x_keep_tm1)
            x_keep_t = x_t
        elif self.x_imputation == 'zero':
            x_t = K.switch(input_m, input_x, K.zeros_like(input_x))
            x_keep_t = x_t
        elif self.x_imputation == 'raw':
            x_t = input_x
            x_keep_t = x_t
        else:
            raise ValueError('No input decay or invalid x_imputation '
                             '{}.'.format(self.x_imputation))

        # Get decayed hidden if needed
        if self.hidden_decay is not None:
            h_tm1d = gamma_dh * h_tm1
        else:
            h_tm1d = h_tm1

        # Get decayed masking if needed
        if self.feed_masking:
            m_t = input_1m
            if self.masking_decay is not None:
                m_t = gamma_dm * m_t

        # Apply the dropout
        if 0. < self.dropout < 1.:
            x_z, x_r, x_h = x_t * dp_mask[0], x_t * dp_mask[1], x_t * dp_mask[2]
            if self.feed_masking:
                m_z, m_r, m_h = (m_t * m_dp_mask[0], m_t * m_dp_mask[1],
                                 m_t * m_dp_mask[2])
        else:
            x_z, x_r, x_h = x_t, x_t, x_t
            if self.feed_masking:
                m_z, m_r, m_h = m_t, m_t, m_t
        if 0. < self.recurrent_dropout < 1.:
            h_tm1_z, h_tm1_r = (
                h_tm1d * rec_dp_mask[0],
                h_tm1d * rec_dp_mask[1],
            )
        else:
            h_tm1_z, h_tm1_r = h_tm1d, h_tm1d

        # Get z_t, r_t, hh_t
        z_t = K.dot(x_z, self.kernel_z) + K.dot(h_tm1_z,
                                                self.recurrent_kernel_z)
        r_t = K.dot(x_r, self.kernel_r) + K.dot(h_tm1_r,
                                                self.recurrent_kernel_r)
        hh_t = K.dot(x_h, self.kernel_h)
        if self.feed_masking:
            z_t += K.dot(m_z, self.masking_kernel_z)
            r_t += K.dot(m_r, self.masking_kernel_r)
            hh_t += K.dot(m_h, self.masking_kernel_h)
        if self.use_bias:
            z_t = K.bias_add(z_t, self.input_bias_z)
            r_t = K.bias_add(r_t, self.input_bias_r)
            hh_t = K.bias_add(hh_t, self.input_bias_h)
        z_t = self.recurrent_activation(z_t)
        r_t = self.recurrent_activation(r_t)

        if 0. < self.recurrent_dropout < 1.:
            h_tm1_h = r_t * h_tm1d * rec_dp_mask[2]
        else:
            h_tm1_h = r_t * h_tm1d
        hh_t = self.activation(hh_t + K.dot(h_tm1_h, self.recurrent_kernel_h))

        # get h_t
        h_t = z_t * h_tm1 + (1 - z_t) * hh_t
        if 0. < self.dropout + self.recurrent_dropout:
            if training is None:
                h_t._uses_learning_phase = True

        # get s_prev_t
        s_prev_t = K.switch(input_m, K.tile(input_s, [1, self.state_size[-1]]),
                            s_prev_tm1)
        return h_t, [h_t, x_keep_t, s_prev_t]
Esempio n. 15
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory

        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs),
                                                        self.dropout,
                                                        training=training,
                                                        count=5)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(h_tm1),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        if self.implementation == 1:
            if 0. < self.dropout < 1.:
                inputs_z = inputs * dp_mask[0]
                inputs_r = inputs * dp_mask[1]
                inputs_h = inputs * dp_mask[2]
                inputs_myl = inputs * dp_mask[3]
                inputs_myl2 = inputs * dp_mask[4]
            else:
                inputs_z = inputs
                inputs_r = inputs
                inputs_h = inputs
                inputs_myl = inputs
                inputs_myl2 = inputs

            x_z = K.dot(inputs_z, self.kernel_z)
            x_r = K.dot(inputs_r, self.kernel_r)
            x_h = K.dot(inputs_h, self.kernel_h)
            x_myl = K.dot(inputs_myl, self.kernel_myl)
            x_myl2 = K.dot(inputs_myl2, self.kernel_myl2)
            if self.use_bias:
                x_z = K.bias_add(x_z, self.input_bias_z)
                x_r = K.bias_add(x_r, self.input_bias_r)
                x_h = K.bias_add(x_h, self.input_bias_h)
                x_myl = K.bias_add(x_myl, self.input_bias_myl)
                x_myl2 = K.bias_add(x_myl2, self.input_bias_myl2)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]
                h_tm1_myl = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_myl = h_tm1
                h_tm1_h = h_tm1

            recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z)
            recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r)
            recurrent_myl = K.dot(h_tm1_myl, self.recurrent_kernel_myl)
            if self.reset_after and self.use_bias:
                recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z)
                recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r)
                recurrent_myl = K.bias_add(recurrent_myl,
                                           self.recurrent_bias_myl)

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)
            myl = self.recurrent_activation(x_myl + recurrent_myl)

            # reset gate applied after/before matrix multiplication
            if self.reset_after:  # MyGRU 选这个
                recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h)
                if self.use_bias:
                    recurrent_h = K.bias_add(recurrent_h,
                                             self.recurrent_bias_h)
                recurrent_h = r * recurrent_h
            else:  # default
                recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h)

            hh = self.activation(x_h + recurrent_h) + myl * x_myl2
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]

            # inputs projected by all gate matrices at once
            matrix_x = K.dot(inputs, self.kernel)
            if self.use_bias:
                # biases: bias_z_i, bias_r_i, bias_h_i
                matrix_x = K.bias_add(matrix_x, self.input_bias)
            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units:2 * self.units]
            x_h = matrix_x[:, 2 * self.units:3 * self.units]
            x_myl = matrix_x[:, 3 * self.units:4 * self.units]
            x_myl2 = matrix_x[:, 4 * self.units:5 * self.units]
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]

            if self.reset_after:
                # hidden state projected by all gate matrices at once
                matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
                if self.use_bias:
                    matrix_inner = K.bias_add(matrix_inner,
                                              self.recurrent_bias)
            else:
                # hidden state projected separately for update/reset and new
                matrix_inner = K.dot(h_tm1,
                                     self.recurrent_kernel[:, :2 * self.units])

            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units:2 * self.units]
            if self.reset_after:
                recurrent_myl = matrix_inner[:, self.units * 3:4 * self.units]
            else:
                recurrent_myl = K.dot(
                    h_tm1,
                    self.recurrent_kernel[:, 3 * self.units:4 * self.units])

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)
            myl = self.recurrent_activation(x_myl + recurrent_myl)

            if self.reset_after:
                recurrent_h = r * matrix_inner[:,
                                               2 * self.units:3 * self.units]
            else:
                recurrent_h = K.dot(
                    r * h_tm1,
                    self.recurrent_kernel[:, 2 * self.units:3 * self.units])

            hh = self.activation(x_h + recurrent_h) + myl * x_myl2

        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh  # todo why
        # h = (1-z) * h_tm1 + z * hh

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        # tcell
        for i in range(self.t_cell_num):
            h_tm1 = h
            if (0 < self.t_recurrent_dropout < 1
                    and self.t__recurrent_dropout_mask[i] is None):
                self.t__recurrent_dropout_mask[i] = _generate_dropout_mask(
                    K.ones_like(h_tm1),
                    self.t_recurrent_dropout,
                    training=training,
                    count=3)
            rec_dp_mask = self.t__recurrent_dropout_mask[i]
            if self.implementation == 1:
                if 0. < self.recurrent_dropout < 1.:
                    h_tm1_z = h_tm1 * rec_dp_mask[0]
                    h_tm1_r = h_tm1 * rec_dp_mask[1]
                    h_tm1_h = h_tm1 * rec_dp_mask[2]

                else:
                    h_tm1_z = h_tm1
                    h_tm1_r = h_tm1
                    h_tm1_h = h_tm1

                recurrent_z = K.dot(h_tm1_z, self.t_recurrent_kernel_z[i])
                recurrent_r = K.dot(h_tm1_r, self.t_recurrent_kernel_r[i])

                if self.reset_after and self.use_bias:
                    recurrent_z = K.bias_add(recurrent_z,
                                             self.t_recurrent_bias_z[i])
                    recurrent_r = K.bias_add(recurrent_r,
                                             self.t_recurrent_bias_r[i])

                z = self.recurrent_activation(recurrent_z)
                r = self.recurrent_activation(recurrent_r)

                # reset gate applied after/before matrix multiplication
                if self.reset_after:  # TGRU 选这个
                    recurrent_h = K.dot(h_tm1_h, self.t_recurrent_kernel_h[i])
                    if self.use_bias:
                        recurrent_h = K.bias_add(recurrent_h,
                                                 self.t_recurrent_bias_h[i])
                    recurrent_h = r * recurrent_h
                else:  # default
                    recurrent_h = K.dot(r * h_tm1_h,
                                        self.t_recurrent_bias_h[i])

                hh = self.activation(recurrent_h)
            else:

                if 0. < self.recurrent_dropout < 1.:
                    h_tm1 *= rec_dp_mask[0]

                if self.reset_after:
                    # hidden state projected by all gate matrices at once
                    matrix_inner = K.dot(h_tm1, self.t_recurrent_kernel[i])
                    if self.use_bias:
                        matrix_inner = K.bias_add(matrix_inner,
                                                  self.t_recurrent_bias[i])
                else:
                    # hidden state projected separately for update/reset and new
                    matrix_inner = K.dot(
                        h_tm1, self.t_recurrent_kernel[i][:, :2 * self.units])

                recurrent_z = matrix_inner[:, :self.units]
                recurrent_r = matrix_inner[:, self.units:2 * self.units]

                z = self.recurrent_activation(recurrent_z)
                r = self.recurrent_activation(recurrent_r)

                if self.reset_after:
                    recurrent_h = r * matrix_inner[:, 2 * self.units:3 *
                                                   self.units]
                else:
                    recurrent_h = K.dot(
                        r * h_tm1,
                        self.recurrent_kernel[i][:, 2 * self.units:3 *
                                                 self.units])

                hh = self.activation(recurrent_h)

                # previous and candidate state mixed by update gate
            h = z * h_tm1 + (1 - z) * hh  # todo why
            # h = (1-z) * h_tm1 + z * hh

            if 0 < self.dropout + self.recurrent_dropout:
                if training is None:
                    h._uses_learning_phase = True

        return h, [h]
Esempio n. 16
0
    def call(self, states, training=None):
        h_tm1 = states  # previous memory

        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(h_tm1),
                self.recurrent_dropout,
                training=training,
                count=3)

        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        if self.implementation == 1:
            if 0. < self.recurrent_dropout < 1.:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]

            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_h = h_tm1

            recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z)
            recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r)

            if self.reset_after and self.use_bias:
                recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z)
                recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r)

            z = self.recurrent_activation(recurrent_z)
            r = self.recurrent_activation(recurrent_r)

            # reset gate applied after/before matrix multiplication
            if self.reset_after:  # TGRU 选这个
                recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h)
                if self.use_bias:
                    recurrent_h = K.bias_add(recurrent_h,
                                             self.recurrent_bias_h)
                recurrent_h = r * recurrent_h
            else:  # default
                recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h)

            hh = self.activation(recurrent_h)
        else:

            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]

            if self.reset_after:
                # hidden state projected by all gate matrices at once
                matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
                if self.use_bias:
                    matrix_inner = K.bias_add(matrix_inner,
                                              self.recurrent_bias)
            else:
                # hidden state projected separately for update/reset and new
                matrix_inner = K.dot(h_tm1,
                                     self.recurrent_kernel[:, :2 * self.units])

            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units:2 * self.units]

            z = self.recurrent_activation(recurrent_z)
            r = self.recurrent_activation(recurrent_r)

            if self.reset_after:
                recurrent_h = r * matrix_inner[:,
                                               2 * self.units:3 * self.units]
            else:
                recurrent_h = K.dot(
                    r * h_tm1,
                    self.recurrent_kernel[:, 2 * self.units:3 * self.units])

            hh = self.activation(recurrent_h)

        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh  # todo why
        # h = (1-z) * h_tm1 + z * hh

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return h
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, K.shape(inputs)[-1]),
                self.dropout,
                training=training,
                count=8)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            _recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=8)
            self._recurrent_dropout_mask = _recurrent_dropout_mask

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_0 = inputs * dp_mask[0]
                inputs_1 = inputs * dp_mask[1]
                inputs_2 = inputs * dp_mask[2]
                inputs_3 = inputs * dp_mask[3]
                inputs_4 = inputs * dp_mask[4]
                inputs_5 = inputs * dp_mask[5]
                inputs_6 = inputs * dp_mask[6]
                inputs_7 = inputs * dp_mask[7]
            else:
                inputs_0 = inputs
                inputs_1 = inputs
                inputs_2 = inputs
                inputs_3 = inputs
                inputs_4 = inputs
                inputs_5 = inputs
                inputs_6 = inputs
                inputs_7 = inputs

            x_0 = K.dot(inputs_0, self.kernel_0)
            x_1 = K.dot(inputs_1, self.kernel_1)
            x_2 = K.dot(inputs_2, self.kernel_2)
            x_3 = K.dot(inputs_3, self.kernel_3)
            x_4 = K.dot(inputs_4, self.kernel_4)
            x_5 = K.dot(inputs_5, self.kernel_5)
            x_6 = K.dot(inputs_6, self.kernel_6)
            x_7 = K.dot(inputs_7, self.kernel_7)

            if self.use_bias:
                x_0 = K.bias_add(x_0, self.bias_0)
                x_1 = K.bias_add(x_1, self.bias_1)
                x_2 = K.bias_add(x_2, self.bias_2)
                x_3 = K.bias_add(x_3, self.bias_3)
                x_4 = K.bias_add(x_4, self.bias_4)
                x_5 = K.bias_add(x_5, self.bias_5)
                x_6 = K.bias_add(x_6, self.bias_6)
                x_7 = K.bias_add(x_7, self.bias_7)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_0 = h_tm1 * rec_dp_mask[0]
                h_tm1_1 = h_tm1 * rec_dp_mask[1]
                h_tm1_2 = h_tm1 * rec_dp_mask[2]
                h_tm1_3 = h_tm1 * rec_dp_mask[3]
                h_tm1_4 = h_tm1 * rec_dp_mask[4]
                h_tm1_5 = h_tm1 * rec_dp_mask[5]
                h_tm1_6 = h_tm1 * rec_dp_mask[6]
                h_tm1_7 = h_tm1 * rec_dp_mask[7]
            else:
                h_tm1_0 = h_tm1
                h_tm1_1 = h_tm1
                h_tm1_2 = h_tm1
                h_tm1_3 = h_tm1
                h_tm1_4 = h_tm1
                h_tm1_5 = h_tm1
                h_tm1_6 = h_tm1
                h_tm1_7 = h_tm1

            # First Layer
            layer1_0 = self.recurrent_activation(x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0))
            layer1_1 = self.cell_activation(x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1))
            layer1_2 = self.recurrent_activation(x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2))
            layer1_3 = self.cell_activation(x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3))
            layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4))
            layer1_5 = self.recurrent_activation(x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5))
            layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6))
            layer1_7 = self.recurrent_activation(x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7))

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre  # create a new cell
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h, self.projection_kernel))

        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            zr = K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                zr = K.bias_add(zr, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units: 2 * self.units]
            z2 = z[:, 2 * self.units: 3 * self.units]
            z3 = z[:, 3 * self.units: 4 * self.units]
            z4 = z[:, 4 * self.units: 5 * self.units]
            z5 = z[:, 5 * self.units: 6 * self.units]
            z6 = z[:, 6 * self.units: 7 * self.units]
            z7 = z[:, 7 * self.units:]

            zr0 = zr[:, :self.units]
            zr1 = zr[:, self.units: 2 * self.units]
            zr2 = zr[:, 2 * self.units: 3 * self.units]
            zr3 = zr[:, 3 * self.units: 4 * self.units]
            zr4 = zr[:, 4 * self.units: 5 * self.units]
            zr5 = zr[:, 5 * self.units: 6 * self.units]
            zr6 = zr[:, 6 * self.units: 7 * self.units]
            zr7 = zr[:, 7 * self.units:]

            # First Layer
            layer1_0 = self.recurrent_activation(z0 + zr0)
            layer1_1 = self.cell_activation(z1 + zr1)
            layer1_2 = self.recurrent_activation(z2 + zr2)
            layer1_3 = self.cell_activation(z3 * zr3)
            layer1_4 = self.activation(z4 + zr4)
            layer1_5 = self.recurrent_activation(z5 + zr5)
            layer1_6 = self.activation(z6 + zr6)
            layer1_7 = self.recurrent_activation(z7 + zr7)

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h, self.projection_kernel))

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training,
                                                        count=8)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            _recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=8)
            self._recurrent_dropout_mask = _recurrent_dropout_mask

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_0 = inputs * dp_mask[0]
                inputs_1 = inputs * dp_mask[1]
                inputs_2 = inputs * dp_mask[2]
                inputs_3 = inputs * dp_mask[3]
                inputs_4 = inputs * dp_mask[4]
                inputs_5 = inputs * dp_mask[5]
                inputs_6 = inputs * dp_mask[6]
                inputs_7 = inputs * dp_mask[7]
            else:
                inputs_0 = inputs
                inputs_1 = inputs
                inputs_2 = inputs
                inputs_3 = inputs
                inputs_4 = inputs
                inputs_5 = inputs
                inputs_6 = inputs
                inputs_7 = inputs

            x_0 = K.dot(inputs_0, self.kernel_0)
            x_1 = K.dot(inputs_1, self.kernel_1)
            x_2 = K.dot(inputs_2, self.kernel_2)
            x_3 = K.dot(inputs_3, self.kernel_3)
            x_4 = K.dot(inputs_4, self.kernel_4)
            x_5 = K.dot(inputs_5, self.kernel_5)
            x_6 = K.dot(inputs_6, self.kernel_6)
            x_7 = K.dot(inputs_7, self.kernel_7)

            if self.use_bias:
                x_0 = K.bias_add(x_0, self.bias_0)
                x_1 = K.bias_add(x_1, self.bias_1)
                x_2 = K.bias_add(x_2, self.bias_2)
                x_3 = K.bias_add(x_3, self.bias_3)
                x_4 = K.bias_add(x_4, self.bias_4)
                x_5 = K.bias_add(x_5, self.bias_5)
                x_6 = K.bias_add(x_6, self.bias_6)
                x_7 = K.bias_add(x_7, self.bias_7)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_0 = h_tm1 * rec_dp_mask[0]
                h_tm1_1 = h_tm1 * rec_dp_mask[1]
                h_tm1_2 = h_tm1 * rec_dp_mask[2]
                h_tm1_3 = h_tm1 * rec_dp_mask[3]
                h_tm1_4 = h_tm1 * rec_dp_mask[4]
                h_tm1_5 = h_tm1 * rec_dp_mask[5]
                h_tm1_6 = h_tm1 * rec_dp_mask[6]
                h_tm1_7 = h_tm1 * rec_dp_mask[7]
            else:
                h_tm1_0 = h_tm1
                h_tm1_1 = h_tm1
                h_tm1_2 = h_tm1
                h_tm1_3 = h_tm1
                h_tm1_4 = h_tm1
                h_tm1_5 = h_tm1
                h_tm1_6 = h_tm1
                h_tm1_7 = h_tm1

            # First Layer
            layer1_0 = self.recurrent_activation(
                x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0))
            layer1_1 = self.cell_activation(
                x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1))
            layer1_2 = self.recurrent_activation(
                x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2))
            layer1_3 = self.cell_activation(
                x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3))
            layer1_4 = self.activation(x_4 +
                                       K.dot(h_tm1_4, self.recurrent_kernel_4))
            layer1_5 = self.recurrent_activation(
                x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5))
            layer1_6 = self.activation(x_6 +
                                       K.dot(h_tm1_6, self.recurrent_kernel_6))
            layer1_7 = self.recurrent_activation(
                x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7))

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre  # create a new cell
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h,
                                                     self.projection_kernel))

        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            zr = K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                zr = K.bias_add(zr, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units:2 * self.units]
            z2 = z[:, 2 * self.units:3 * self.units]
            z3 = z[:, 3 * self.units:4 * self.units]
            z4 = z[:, 4 * self.units:5 * self.units]
            z5 = z[:, 5 * self.units:6 * self.units]
            z6 = z[:, 6 * self.units:7 * self.units]
            z7 = z[:, 7 * self.units:]

            zr0 = zr[:, :self.units]
            zr1 = zr[:, self.units:2 * self.units]
            zr2 = zr[:, 2 * self.units:3 * self.units]
            zr3 = zr[:, 3 * self.units:4 * self.units]
            zr4 = zr[:, 4 * self.units:5 * self.units]
            zr5 = zr[:, 5 * self.units:6 * self.units]
            zr6 = zr[:, 6 * self.units:7 * self.units]
            zr7 = zr[:, 7 * self.units:]

            # First Layer
            layer1_0 = self.recurrent_activation(z0 + zr0)
            layer1_1 = self.cell_activation(z1 + zr1)
            layer1_2 = self.recurrent_activation(z2 + zr2)
            layer1_3 = self.cell_activation(z3 * zr3)
            layer1_4 = self.activation(z4 + zr4)
            layer1_5 = self.recurrent_activation(z5 + zr5)
            layer1_6 = self.activation(z6 + zr6)
            layer1_7 = self.recurrent_activation(z7 + zr7)

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h,
                                                     self.projection_kernel))

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Esempio n. 19
0
    def call(self, inputs, states, training=None):

        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, K.shape(inputs)[-1] + self.annotation_units),
                self.dropout,
                training=training,
                count=4)

        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        # attention mechanism

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(h_tm1, self.annotation_timesteps)

        # multiplty the weight matrix with the repeated (current) hidden state
        _Wxstm = K.dot(_stm, self.kernel_w)

        # calculate the attention probabilities
        et = K.dot(activations.tanh(_Wxstm + self._uh), K.expand_dims(self.kernel_v))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.annotation_timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.annotations, axes=1), axis=1)

        # append the context vector to the inputs
        inputs = K.concatenate([inputs, context])

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_i = inputs * dp_mask[0]
                inputs_f = inputs * dp_mask[1]
                inputs_c = inputs * dp_mask[2]
                inputs_o = inputs * dp_mask[3]
            else:
                inputs_i = inputs
                inputs_f = inputs
                inputs_c = inputs
                inputs_o = inputs

            x_i = K.dot(inputs_i, self.kernel_i)
            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)
            x_o = K.dot(inputs_o, self.kernel_o)

            if self.use_bias:
                x_i = K.bias_add(x_i, self.bias_i)
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)
                x_o = K.bias_add(x_o, self.bias_o)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_i = h_tm1 * rec_dp_mask[0]
                h_tm1_f = h_tm1 * rec_dp_mask[1]
                h_tm1_c = h_tm1 * rec_dp_mask[2]
                h_tm1_o = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_i = h_tm1
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1
                h_tm1_o = h_tm1

            i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i))
            f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f))
            c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, self.recurrent_kernel_c))
            o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o))

        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            z += K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                z = K.bias_add(z, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units: 2 * self.units]
            z2 = z[:, 2 * self.units: 3 * self.units]
            z3 = z[:, 3 * self.units:]

            i = self.recurrent_activation(z0)
            f = self.recurrent_activation(z1)
            c = f * c_tm1 + i * self.activation(z2)
            o = self.recurrent_activation(z3)

        h = o * self.activation(c)
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Esempio n. 20
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]  # previous memory

        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, K.shape(inputs)[-1]),
                self.dropout,
                training=training,
                count=3)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=3)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        if self.implementation == 1:
            if 0. < self.dropout < 1.:
                inputs_z = inputs * dp_mask[0]
                inputs_r = inputs * dp_mask[1]
                inputs_h = inputs * dp_mask[2]
            else:
                inputs_z = inputs
                inputs_r = inputs
                inputs_h = inputs
                
            x_z = K.dot(inputs_z, self.kernel_z)
            x_r = K.dot(inputs_r, self.kernel_r)
            x_h = K.dot(inputs_h, self.kernel_h)
            if self.use_bias:
                x_z = K.bias_add(x_z, self.bias_z)
                x_r = K.bias_add(x_r, self.bias_r)
                x_h = K.bias_add(x_h, self.bias_h)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]
            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_h = h_tm1
            
            recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z)
            recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r)
            
            a_z = self.ln(x_z + recurrent_z)
            a_r = self.ln(x_r + recurrent_r)
            if self.scale:
                a_z *= self.gamma_z
                a_r *= self.gamma_r
            if self.center:
                a_z += self.beta_z
                a_r += self.beta_r
            z = self.recurrent_activation(a_z)
            r = self.recurrent_activation(a_r)
            
            
            recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h)
            a_h = self.ln(x_h + recurrent_h)
            if self.scale:
                a_h *= self.gamma_h
            if self.center:
                a_h += self.beta_h
            hh = self.activation(a_h)
            
        # ignore implementation 2
        
        h = z * h_tm1 + (1 - z) * hh
        
        if 0 < self.dropout + self.recurrent_dropout + self.zoneout:
            if training is None:
                h._uses_learning_phase = True
                
        if 0 < self.zoneout < 1:
            h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout),
                                 h - h_tm1)
            h = h * (1. - self.zoneout) + h_tm1
        
        return h, [h]