Exemple #1
0
 def call(self, inputs, **kwargs):
     gate = kb.dot(inputs, self.gate_kernel)
     gate = kb.bias_add(gate, self.gate_bias, data_format="channels_last")
     gate = self.activation(gate)
     new_value = kb.dot(inputs, self.dense_kernel)
     new_value = kb.bias_add(new_value, self.dense_bias, data_format="channels_last")
     return gate * new_value + (1.0 - gate) * inputs
Exemple #2
0
    def call(self, inputs, **kwargs):
        assert isinstance(inputs, list) and len(inputs) == 3
        first, second, features = inputs[0], inputs[1], inputs[2]
        if not self.from_logits:
            first = kb.clip(first, 1e-10, 1.0)
            second = kb.clip(second, 1e-10, 1.0)
            first_, second_ = kb.log(first), kb.log(second)
        else:
            first_, second_ = first, second
        # embedded_features.shape = (M, T, 1)
        if self.use_intermediate_layer:
            features = kb.dot(features, self.first_kernel)
            features = kb.bias_add(features, self.first_bias, data_format="channels_last")
            features = self.intermediate_activation(features)
        embedded_features = kb.dot(features, self.features_kernel)
        embedded_features = kb.bias_add(
            embedded_features, self.features_bias, data_format="channels_last")
        if self.use_dimension_bias:
            tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]]
            embedded_features = kb.tile(embedded_features, tiling_shape)
            embedded_features = kb.bias_add(
                embedded_features, self.dimensions_bias, data_format="channels_last")
        sigma = kb.sigmoid(embedded_features)

        result = weighted_sum(first_, second_, sigma,
                              self.first_threshold, self.second_threshold)
        probs = kb.softmax(result)
        if self.return_logits:
            return [probs, result]
        return probs
Exemple #3
0
def _time_distributed_dense(w, x, b):
    if K.backend() == 'tensorflow':
        x = K.dot(x, w)
        x = K.bias_add(x, b)
    else:
        print("time_distributed_dense doesn't backend tensorflow")
    return x
Exemple #4
0
    def call(self, inputs):
        if self.tied_to is not None:
            outputs = K.conv1d(
               inputs,
               self.tied_to.kernel,
               strides=self.strides[0],
               padding=self.padding,
               data_format=self.data_format,
               dilation_rate=self.dilation_rate[0])
        else:
            # this branch is typically entered when a previously trained model is being loaded again
            outputs = K.conv1d(
               inputs,
               self.learnedKernel,
               strides=self.strides[0],
               padding=self.padding,
               data_format=self.data_format,
               dilation_rate=self.dilation_rate[0])

        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Exemple #5
0
    def call(self, inputs):
        if self.rank == 1:
            outputs = K.conv1d(
                inputs,
                self.kernel,
                strides=self.strides[0],
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate[0])
        if self.rank == 2:
            outputs = K.conv2d(
                inputs,
                self.kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate)
        if self.rank == 3:
            outputs = K.conv3d(
                inputs,
                self.kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Exemple #6
0
 def call(self, inputs):
     output = K.dot(inputs, self.kernel)
     if self.use_bias:
         output = K.bias_add(output, self.bias)
     if self.activation is not None:
         output = self.activation(output)
     return output
    def call(self, inputs):
        filter_in_group = self.filters / self.num_group
        if self.data_format == 'channels_first':
            channel_axis = 1
            input_in_group = self.channel_num / self.num_group
            outputs_list = []
            for i in range(self.num_group):
                outputs = K.conv2d(
                    inputs[:,i*input_in_group:(i+1)*input_in_group,:,:],
                    self.kernel[:, :, :, i*filter_in_group:(i+1)*filter_in_group],
                    strides=self.strides,
                    padding=self.padding,
                    data_format=self.data_format,
                    dilation_rate=self.dilation_rate)

                if self.use_bias:
                    outputs = K.bias_add(
                                         outputs,
                                         self.bias[i*filter_in_group:(i+1)*filter_in_group],
                                         data_format=self.data_format)
                outputs_list.append(outputs)

        elif self.data_format == 'channels_last':
            outputs_list = []
            channel_axis = -1
            input_in_group = self.channel_num / self.num_group
            for i in range(self.num_group):
                outputs = K.conv2d(
                    inputs[:, :, :, i*input_in_group:(i+1)*input_in_group],
                    self.kernel[:, :, :, i*filter_in_group:(i+1)*filter_in_group],
                    strides=self.strides,
                    padding=self.padding,
                    data_format=self.data_format,
                    dilation_rate=self.dilation_rate)

                if self.use_bias:
                    outputs = K.bias_add(
                                         outputs,
                                         self.bias[i*filter_in_group:(i+1)*filter_in_group],
                                         data_format=self.data_format)
                outputs_list.append(outputs)

        outputs = concatenate(outputs_list, axis=channel_axis)
        return outputs
Exemple #8
0
    def call(self, inputs):
        
        output = self.local_conv3d(inputs,
                                self.kernel,
                                self.kernel_size,
                                self.strides,
                                (self.output_row, self.output_col, self.output_z),
                                self.data_format)

        if self.use_bias:
            output = K.bias_add(output, self.bias,
                                data_format=self.data_format)

        output = self.activation(output)
        return output
Exemple #9
0
    def call(self, inputs):
        _, _, filters = self.kernel_shape

        output = K.local_conv2d(inputs,
                                self.kernel,
                                self.kernel_size,
                                self.strides,
                                (self.output_row, self.output_col),
                                self.data_format)

        if self.use_bias:
            if self.data_format == 'channels_first' or self.data_format == 'channels_last':
                output = K.bias_add(output, self.bias, data_format=self.data_format)

        output = self.activation(output)
        return output
    def call(self, x):
        # sample from noise distribution
        e_i = K.random_normal((self.input_dim, self.units))
        e_j = K.random_normal((self.units,))

        # We use the factorized Gaussian noise variant from Section 3 of Fortunato et al.
        eW = K.sign(e_i) * (K.sqrt(K.abs(e_i))) * K.sign(e_j) * (K.sqrt(K.abs(e_j)))
        eB = K.sign(e_j) * (K.abs(e_j) ** (1 / 2))

        noise_injected_weights = K.dot(x, self.mu_weight + (self.sigma_weight * eW))
        noise_injected_bias = self.mu_bias + (self.sigma_bias * eB)

        output = K.bias_add(noise_injected_weights, noise_injected_bias)
        if self.activation is not None:
            output = self.activation(output)
        return output
    def call(self, inputs, training=None):
        outputs = K.depthwise_conv2d(
            inputs,
            self.depthwise_kernel,
            strides=self.strides,
            padding=self.padding,
            dilation_rate=self.dilation_rate,
            data_format=self.data_format)

        if self.bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)

        return outputs
    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output
def time_distributed_dense(x, w, b=None, dropout=None,
                            input_dim=None, output_dim=None,
                            timesteps=None, training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Exemple #14
0
    def call(self, inputs):
        assert self.rank == 2, 'only conv2d supported for now...'
        if self.rank == 2:
            outputs = K.conv2d(
                inputs,
                self.kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)


        #if self.activation is not None:
        #    assert False,'activation functions not supported'
        #    return self.activation(outputs)
        return outputs
Exemple #15
0
 def bias_add(self, b):
     self.c = K.bias_add(self.c, b)
     return self
    def call(self, inputs, states, training=None):
        
        # previous memory state for gru
        h_tm1 = states[0]  
        
        # generate our dropout and recurrent dropout masks
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=4)

        # get the dropout mask for input units
        dp_mask = self._dropout_mask
        
        # get the dropout mask for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        ####
        # we need to both attend and align in our attention model
        
        # alignment model
        h_att = K.repeat(h_tm1, self.timestep_dim)
        att = _time_distributed_dense(inputs, 
                                      self.attention_weights, 
                                      self.attention_bias,
                                      input_dim=self.input_dim, 
                                      output_dim=self.units, 
                                      timesteps=self.timestep_dim)
        
        # attention energy
        en = K.dot(h_att, self.attention_recurrent_weights) + att
        attention_ = self.attention_activation(en)
        attention_ = K.squeeze(K.dot(attention_, 
                                     self.attention_recurrent_bias), 2)

        alpha = K.exp(attention_)

        # apply dropout to the attention layer
        if dp_mask is not None:
            alpha *= dp_mask[0]

        alpha /= K.sum(alpha, axis=1, keepdims=True)
        alpha_r = K.repeat(alpha, self.input_dim)
        alpha_r = K.permute_dimensions(alpha_r, (0, 2, 1))

        # make context vector (soft attention after Bahdanau et al.)
        z_hat = inputs * alpha_r
        context_sequence = z_hat
        z_hat = K.sum(z_hat, axis=1)
        ####

        # choose the implementation ... implementation 1 is easier to read :)
        if self.implementation == 1:
            
            # apply dropout
            if 0 < self.dropout < 1.:
                inputs_z = inputs * dp_mask[0]
                inputs_r = inputs * dp_mask[1]
                inputs_h = inputs * dp_mask[2]
            else:
                inputs_z = inputs
                inputs_r = inputs
                inputs_h = inputs
                
            # weight the inputs by the kernel weights
            x_z = K.dot(inputs_z, self.kernel_z)
            x_r = K.dot(inputs_r, self.kernel_r)
            x_h = K.dot(inputs_h, self.kernel_h)
            
            # add biases
            if self.use_bias:
                x_z = K.bias_add(x_z, self.bias_z)
                x_r = K.bias_add(x_r, self.bias_r)
                x_h = K.bias_add(x_h, self.bias_h)

            # apply recurrent dropout
            if 0 < self.recurrent_dropout < 1.:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]
            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_h = h_tm1

            # do the gru gating operations - adding the appropriate attention 
            # term as we go      
            
            # first calculate the recurrent parts
            recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z)
            recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r)
            
            # if we are using the cudnn form (reset after multiplication) then
            # add applicable recurrent biases here
            if self.reset_after and self.use_bias:
                recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z)
                recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r)
            
            # add attention to z
            z = x_z + recurrent_z + K.dot(z_hat, self.attention_z)
            z = self.recurrent_activation(z)            
            
            # add attention to r
            r = x_z + recurrent_r + K.dot(z_hat, self.attention_r)
            r = self.recurrent_activation(r)
            
            # manage cudnn compatibility 
            
            # reset gate applied after matrix multiplication
            if self.reset_after:
                
                recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h)
                if self.use_bias:
                    recurrent_h = K.bias_add(recurrent_h, self.recurrent_bias_h)
                recurrent_h = r * recurrent_h
            
            # reset gate applied before matrix multiplication
            else:
                recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h)
                
            # apply attention and activation
            hh = self.activation(x_h + recurrent_h + K.dot(z_hat, self.attention_h))
            
        # implementation 2 involves batching stuff up more and *might* be more
        # efficient (depending on hardware)
        else:
            
            # apply dropout
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
                
            # weight the inputs by the kernel
            matrix_x = K.dot(inputs, self.kernel)
            
            # apply biases
            if self.use_bias:
                matrix_x = K.bias_add(matrix_x, self.bias)            
            
            # extract the z, r, h parts
            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units: 2 * self.units]
            x_h = matrix_x[:, 2 * self.units:]            
            
            # apply recurrent dropout
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]

            # manage cudnn compatibility 
            
            # reset gate applied after matrix multiplication
            if self.reset_after:
                
                # hidden state projected by all gate matrices at once
                matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
                if self.use_bias:
                    matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias)
           
            # reset gate applied before matrix multiplication
            else:
                # hidden state projected separately for update/reset and new
                matrix_inner = K.dot(h_tm1,
                                     self.recurrent_kernel[:, :2 * self.units])

            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units: 2 * self.units]

            # apply attention and then the recurrent activation function
            z = self.recurrent_activation(x_z + recurrent_z + K.dot(z_hat, self.attention_z))
            r = self.recurrent_activation(x_r + recurrent_r + K.dot(z_hat, self.attention_r))

            # manage cudnn compatibility 
            
            # reset gate applied after matrix multiplication
            if self.reset_after:
                
                recurrent_h = r * matrix_inner[:, 2 * self.units:]
                
            # reset gate applied before matrix multiplication
            else:
                recurrent_h = K.dot(r * h_tm1,
                                    self.recurrent_kernel[:, 2 * self.units:])

            # apply attention and activation
            hh = self.activation(x_h + recurrent_h + K.dot(z_hat, self.attention_h))               
            

        # get the final hidden state by mixing up the previous and candidate 
        # state in the update gate
        h = z * h_tm1 + (1 - z) * hh
        
        # set the learning phase (not sure what we're doing here - but the 
        # keras gru code does this ...)
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
                
        # return the gru states
        return h, [h]
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, K.shape(inputs)[-1]),
                self.dropout,
                training=training,
                count=8)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            _recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=8)
            self._recurrent_dropout_mask = _recurrent_dropout_mask

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_0 = inputs * dp_mask[0]
                inputs_1 = inputs * dp_mask[1]
                inputs_2 = inputs * dp_mask[2]
                inputs_3 = inputs * dp_mask[3]
                inputs_4 = inputs * dp_mask[4]
                inputs_5 = inputs * dp_mask[5]
                inputs_6 = inputs * dp_mask[6]
                inputs_7 = inputs * dp_mask[7]
            else:
                inputs_0 = inputs
                inputs_1 = inputs
                inputs_2 = inputs
                inputs_3 = inputs
                inputs_4 = inputs
                inputs_5 = inputs
                inputs_6 = inputs
                inputs_7 = inputs

            x_0 = K.dot(inputs_0, self.kernel_0)
            x_1 = K.dot(inputs_1, self.kernel_1)
            x_2 = K.dot(inputs_2, self.kernel_2)
            x_3 = K.dot(inputs_3, self.kernel_3)
            x_4 = K.dot(inputs_4, self.kernel_4)
            x_5 = K.dot(inputs_5, self.kernel_5)
            x_6 = K.dot(inputs_6, self.kernel_6)
            x_7 = K.dot(inputs_7, self.kernel_7)

            if self.use_bias:
                x_0 = K.bias_add(x_0, self.bias_0)
                x_1 = K.bias_add(x_1, self.bias_1)
                x_2 = K.bias_add(x_2, self.bias_2)
                x_3 = K.bias_add(x_3, self.bias_3)
                x_4 = K.bias_add(x_4, self.bias_4)
                x_5 = K.bias_add(x_5, self.bias_5)
                x_6 = K.bias_add(x_6, self.bias_6)
                x_7 = K.bias_add(x_7, self.bias_7)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_0 = h_tm1 * rec_dp_mask[0]
                h_tm1_1 = h_tm1 * rec_dp_mask[1]
                h_tm1_2 = h_tm1 * rec_dp_mask[2]
                h_tm1_3 = h_tm1 * rec_dp_mask[3]
                h_tm1_4 = h_tm1 * rec_dp_mask[4]
                h_tm1_5 = h_tm1 * rec_dp_mask[5]
                h_tm1_6 = h_tm1 * rec_dp_mask[6]
                h_tm1_7 = h_tm1 * rec_dp_mask[7]
            else:
                h_tm1_0 = h_tm1
                h_tm1_1 = h_tm1
                h_tm1_2 = h_tm1
                h_tm1_3 = h_tm1
                h_tm1_4 = h_tm1
                h_tm1_5 = h_tm1
                h_tm1_6 = h_tm1
                h_tm1_7 = h_tm1

            # First Layer
            layer1_0 = self.recurrent_activation(x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0))
            layer1_1 = self.cell_activation(x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1))
            layer1_2 = self.recurrent_activation(x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2))
            layer1_3 = self.cell_activation(x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3))
            layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4))
            layer1_5 = self.recurrent_activation(x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5))
            layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6))
            layer1_7 = self.recurrent_activation(x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7))

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre  # create a new cell
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h, self.projection_kernel))

        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            zr = K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                zr = K.bias_add(zr, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units: 2 * self.units]
            z2 = z[:, 2 * self.units: 3 * self.units]
            z3 = z[:, 3 * self.units: 4 * self.units]
            z4 = z[:, 4 * self.units: 5 * self.units]
            z5 = z[:, 5 * self.units: 6 * self.units]
            z6 = z[:, 6 * self.units: 7 * self.units]
            z7 = z[:, 7 * self.units:]

            zr0 = zr[:, :self.units]
            zr1 = zr[:, self.units: 2 * self.units]
            zr2 = zr[:, 2 * self.units: 3 * self.units]
            zr3 = zr[:, 3 * self.units: 4 * self.units]
            zr4 = zr[:, 4 * self.units: 5 * self.units]
            zr5 = zr[:, 5 * self.units: 6 * self.units]
            zr6 = zr[:, 6 * self.units: 7 * self.units]
            zr7 = zr[:, 7 * self.units:]

            # First Layer
            layer1_0 = self.recurrent_activation(z0 + zr0)
            layer1_1 = self.cell_activation(z1 + zr1)
            layer1_2 = self.recurrent_activation(z2 + zr2)
            layer1_3 = self.cell_activation(z3 * zr3)
            layer1_4 = self.activation(z4 + zr4)
            layer1_5 = self.recurrent_activation(z5 + zr5)
            layer1_6 = self.activation(z6 + zr6)
            layer1_7 = self.recurrent_activation(z7 + zr7)

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h, self.projection_kernel))

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Exemple #18
0
    def step(self, inputs, states):
        h_tm1 = states[0]
        c_tm1 = states[1]
        dp_mask = states[2]
        rec_dp_mask = states[3]
        x_input = states[4]

        # alignment model
        h_att = K.repeat(h_tm1, self.timestep_dim)
        att = _time_distributed_dense(x_input, self.attention_weights, self.attention_bias,
                                      output_dim=K.int_shape(self.attention_weights)[1])
        attention_ = self.attention_activation(K.dot(h_att, self.attention_recurrent_weights) + att) # energy
        attention_ = K.squeeze(K.dot(attention_, self.attention_recurrent_bias), 2) # energy

        alpha = K.exp(attention_)

        if dp_mask is not None:
            alpha *= dp_mask[0]

        alpha /= K.sum(alpha, axis=1, keepdims=True)
        alpha_r = K.repeat(alpha, self.input_dim)
        alpha_r = K.permute_dimensions(alpha_r, (0, 2, 1))

        # make context vector (soft attention after Bahdanau et al.)
        z_hat = x_input * alpha_r
        context_sequence = z_hat
        z_hat = K.sum(z_hat, axis=1)

        if self.implementation == 2:
            z = K.dot(inputs * dp_mask[0], self.kernel)
            z += K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel)
            z += K.dot(z_hat, self.attention_kernel)

            if self.use_bias:
                z = K.bias_add(z, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units: 2 * self.units]
            z2 = z[:, 2 * self.units: 3 * self.units]
            z3 = z[:, 3 * self.units:]

            i = self.recurrent_activation(z0)
            f = self.recurrent_activation(z1)
            c = f * c_tm1 + i * self.activation(z2)
            o = self.recurrent_activation(z3)
        else:
            if self.implementation == 0:
                x_i = inputs[:, :self.units]
                x_f = inputs[:, self.units: 2 * self.units]
                x_c = inputs[:, 2 * self.units: 3 * self.units]
                x_o = inputs[:, 3 * self.units:]
            elif self.implementation == 1:
                x_i = K.dot(inputs * dp_mask[0], self.kernel_i) + self.bias_i
                x_f = K.dot(inputs * dp_mask[1], self.kernel_f) + self.bias_f
                x_c = K.dot(inputs * dp_mask[2], self.kernel_c) + self.bias_c
                x_o = K.dot(inputs * dp_mask[3], self.kernel_o) + self.bias_o
            else:
                raise ValueError('Unknown `implementation` mode.')

            i = self.recurrent_activation(x_i + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_i)
                                              + K.dot(z_hat, self.attention_i))
            f = self.recurrent_activation(x_f + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_f)
                                          + K.dot(z_hat, self.attention_f))
            c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * rec_dp_mask[2], self.recurrent_kernel_c)
                                                + K.dot(z_hat, self.attention_c))
            o = self.recurrent_activation(x_o + K.dot(h_tm1 * rec_dp_mask[3], self.recurrent_kernel_o)
                                          + K.dot(z_hat, self.attention_o))
        h = o * self.activation(c)
        if 0 < self.dropout + self.recurrent_dropout:
            h._uses_learning_phase = True

        if self.return_attention:
            return context_sequence, [h, c]
        else:
            return h, [h, c]
Exemple #19
0
	def call(self, inputs):
		# Note that the following did not have the linear term of FM component
		ans1 = K.sum(inputs[0], axis = 1, keepdims = True) 
		ans2 = K.bias_add(K.dot(inputs[1], self.kernel), self.bias)
		#return K.sigmoid(ans1 + ans2 + inputs[2])
		return K.sigmoid(ans1 + ans2)
 def call(self, inputs):
     output = K.dot(inputs[0], self.kernel)
     output1 = K.batch_dot(output, inputs[1])
     output2 = K.bias_add(output1, self.bias)
     output2 = self.activation(output2)
     return output2
Exemple #21
0
    def call(self, inputs, states, training=None):
        h_tm1 = (states[0] if tf.nest.is_nested(states) else states
                 )  # previous memory

        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3)
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1,
                                                               training,
                                                               count=3)

        if self.use_bias:
            if not self.reset_after:
                input_bias, recurrent_bias = self.bias, None
            else:
                input_bias, recurrent_bias = tf.unstack(self.bias)

        if self.implementation == 1:
            if 0.0 < self.dropout < 1.0:
                inputs_z = inputs * dp_mask[0]
                inputs_r = inputs * dp_mask[1]
                inputs_h = inputs * dp_mask[2]
            else:
                inputs_z = inputs
                inputs_r = inputs
                inputs_h = inputs

            x_z = backend.dot(inputs_z, self.kernel[:, :self.units])
            x_r = backend.dot(inputs_r, self.kernel[:,
                                                    self.units:self.units * 2])
            x_h = backend.dot(inputs_h, self.kernel[:, self.units * 2:])

            if self.use_bias:
                x_z = backend.bias_add(x_z, input_bias[:self.units])
                x_r = backend.bias_add(x_r,
                                       input_bias[self.units:self.units * 2])
                x_h = backend.bias_add(x_h, input_bias[self.units * 2:])

            if 0.0 < self.recurrent_dropout < 1.0:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]
            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_h = h_tm1

            recurrent_z = backend.dot(h_tm1_z,
                                      self.recurrent_kernel[:, :self.units])
            recurrent_r = backend.dot(
                h_tm1_r, self.recurrent_kernel[:, self.units:self.units * 2])
            if self.reset_after and self.use_bias:
                recurrent_z = backend.bias_add(recurrent_z,
                                               recurrent_bias[:self.units])
                recurrent_r = backend.bias_add(
                    recurrent_r, recurrent_bias[self.units:self.units * 2])

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            # reset gate applied after/before matrix multiplication
            if self.reset_after:
                recurrent_h = backend.dot(
                    h_tm1_h, self.recurrent_kernel[:, self.units * 2:])
                if self.use_bias:
                    recurrent_h = backend.bias_add(
                        recurrent_h, recurrent_bias[self.units * 2:])
                recurrent_h = r * recurrent_h
            else:
                recurrent_h = backend.dot(
                    r * h_tm1_h, self.recurrent_kernel[:, self.units * 2:])

            hh = self.activation(x_h + recurrent_h)
        else:
            if 0.0 < self.dropout < 1.0:
                inputs = inputs * dp_mask[0]

            # inputs projected by all gate matrices at once
            matrix_x = backend.dot(inputs, self.kernel)
            if self.use_bias:
                # biases: bias_z_i, bias_r_i, bias_h_i
                matrix_x = backend.bias_add(matrix_x, input_bias)

            x_z, x_r, x_h = tf.split(matrix_x, 3, axis=-1)

            if self.reset_after:
                # hidden state projected by all gate matrices at once
                matrix_inner = backend.dot(h_tm1, self.recurrent_kernel)
                if self.use_bias:
                    matrix_inner = backend.bias_add(matrix_inner,
                                                    recurrent_bias)
            else:
                # hidden state projected separately for update/reset and new
                matrix_inner = backend.dot(
                    h_tm1, self.recurrent_kernel[:, :2 * self.units])

            recurrent_z, recurrent_r, recurrent_h = tf.split(
                matrix_inner, [self.units, self.units, -1], axis=-1)

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            if self.reset_after:
                recurrent_h = r * recurrent_h
            else:
                recurrent_h = backend.dot(
                    r * h_tm1, self.recurrent_kernel[:, 2 * self.units:])

            hh = self.activation(x_h + recurrent_h)
        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh
        new_state = [h] if tf.nest.is_nested(states) else h
        return h, new_state
Exemple #22
0
    def call(self, inputs, states, training=None):
        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        z_ = states[0]
        Im_s_ = states[1]
        Re_s_ = states[2]
        omg_ = states[3]

        # only need one column of each matrix since they're all the same
        omg_ = omg_[:, :, 1]
        z_ = z_[:, :, 1]

        inputs_ = inputs[0]
        t_ = inputs[1]

        if 0. < self.dropout < 1.:
            inputs_i = inputs_* dp_mask[0]
            inputs_state = inputs_* dp_mask[1]
            inputs_freq = inputs_* dp_mask[2]
            inputs_g = inputs_* dp_mask[3]
            inputs_omg = inputs_* dp_mask[4]
            inputs_o = inputs_* dp_mask[5]
        else:
            inputs_i = inputs_
            inputs_freq = inputs_
            inputs_state = inputs_
            inputs_g = inputs_
            inputs_omg = inputs_
            inputs_o = inputs_

        x_i = K.dot(inputs_i, self.kernel_i)
        x_freq = K.dot(inputs_freq, self.kernel_freq)
        x_state = K.dot(inputs_state, self.kernel_state)
        x_g = K.dot(inputs_g, self.kernel_g)
        x_omg = K.dot(inputs_omg, self.kernel_omg)

        if self.use_bias:
            x_i = K.bias_add(x_i, self.bias_i)
            x_freq = K.bias_add(x_freq, self.bias_f)
            x_state = K.bias_add(x_state, self.bias_s)
            x_g = K.bias_add(x_g, self.bias_g)
            x_omg = K.bias_add(x_omg, self.bias_omg)

        if 0. < self.recurrent_dropout < 1.:
            z_i = z_ * rec_dp_mask[0]
            z_freq = z_ * rec_dp_mask[1]
            z_state = z_ * rec_dp_mask[2]
            z_g = z_ * rec_dp_mask[3]
            z_omg = z_ * rec_dp_mask[4]
            z_o = z_ * rec_dp_mask[5]
        else:
            z_i = z_
            z_freq = z_
            z_state = z_
            z_g = z_
            z_omg = z_
            z_o = z_

        freq = self.recurrent_activation(x_freq + K.dot(z_freq, self.recurrent_kernel_freq))
        state = self.recurrent_activation(x_state + K.dot(z_state, self.recurrent_kernel_state))
        combined_forget_gate = self.outer_product(freq, state)

        i = self.recurrent_activation(x_i + K.dot(z_i, self.recurrent_kernel_i))

        g = K.tanh(x_g + K.dot(z_g, self.recurrent_kernel_g))

        omega = x_omg + K.dot(z_omg, self.recurrent_kernel_omg)

        real_s = combined_forget_gate * Re_s_ + self.outer_product(i * g, K.cos(omg_ * t_))
        img_s = combined_forget_gate * Im_s_ + self.outer_product(i * g, K.sin(omg_ * t_))

        amplitude = K.sqrt(K.square(real_s) + K.square(img_s))
        # transpose to dimensions (frequency_components, samples, state) for tf.scan
        amplitude = tf.transpose(amplitude, perm=[1, 0, 2])

        def __freq(z_k, inputs_):

            U_k, W_k, V_k, b_k, W_z_k, b_z_k, A_k = inputs_
            o = self.recurrent_activation(K.dot(A_k, U_k) + K.dot(inputs_o, W_k) + K.dot(z_o, V_k) + b_k)
            zz = z_k + o * K.tanh(K.dot(A_k, W_z_k) + b_z_k)

            return tf.stack(zz)

        h = tf.scan(__freq,elems=[self.frequency_kernel_U,self.frequency_kernel_W,
                                     self.frequency_kernel_V,self.freq_bias_o,
                                     self.frequency_kernel_W_z,self.freq_bias_z,
                                     amplitude],initializer=tf.zeros(tf.shape(z_)))
        # get last summation state for final sum
        h = h[-1]
        # make new omega and h matrices to fit size of other stacked matrices
        omega = tf.stack([omega for _ in range(self.state_size[0])], axis=1)
        h = tf.stack([h for _ in range(self.state_size[0])], axis=1)

        return h, [h, img_s, real_s, omega]
    def call(self, inputs, **kwargs):
        """
        Applies the layer.

        Args:
            inputs (list): list of inputs with 2 items: node features (matrix of size N x F),
                and graph adjacency matrix (size N x N), where N is the number of nodes in the graph,
                F is the dimensionality of node features

        """
        X = inputs[0]  # Node features (N x F)
        A = inputs[1]  # Adjacency matrix (N x N)
        # Convert A to dense tensor - needed for the mask to work
        # TODO: replace this dense implementation of GraphAttention layer with a sparse implementation
        if K.is_sparse(A):
            A = tf.sparse_tensor_to_dense(A, validate_indices=False)

        # For the GAT model to match that in the paper, we need to ensure that the graph has self-loops,
        # since the neighbourhood of node i in eq. (4) includes node i itself.
        # Adding self-loops to A via setting the diagonal elements of A to 1.0:
        if kwargs.get("add_self_loops", False):
            # get the number of nodes from inputs[1] directly
            N = K.int_shape(inputs[1])[-1]
            if N is not None:
                # create self-loops
                A = tf.linalg.set_diag(A, K.cast(np.ones((N, )),
                                                 dtype="float"))
            else:
                raise ValueError(
                    "{}: need to know number of nodes to add self-loops; obtained None instead"
                    .format(type(self).__name__))

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearity
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            # YT: this only works for 'binary' A, not for 'weighted' A!
            # YT: if A does not have self-loops, the node itself will be masked, so A should have self-loops
            # YT: this is ensured by setting the diagonal elements of A tensor to 1 above
            mask = -10e9 * (1.0 - A)
            dense += mask

            # Apply softmax to get attention coefficients
            dense = K.softmax(dense)  # (N x N), Eq. 3 of the paper

            # Apply dropout to features and attention coefficients
            dropout_feat = Dropout(self.in_dropout_rate)(features)  # (N x F')
            dropout_attn = Dropout(self.attn_dropout_rate)(dense)  # (N x N)

            # Linear combination with neighbors' features [YT: see Eq. 4]
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        output = self.activation(output)
        return output
    def call(self, inputs):
        x = inputs
        W = self.kernel

        #print("x:", x.shape)
        #print("W:", W.shape)

        weights_norm = tf.norm(W, axis=0, keepdims=True)
        weights = tf.div(W, weights_norm, name="normalize_weights")
        logits = tf.matmul(x, weights)

        #print("self.factormachine:", self.factormachine.shape)
        #Factor machine
        #factors = K.dot(self.factormachine, x )
        #print("self.batch_size: ", self.batch_size)
        #print("self.feature_size: ", self.feature_size)

        K.batch_dot()
        features = []
        for ii in range(self.batch_size):
            xi = x[ii]
            wi = self.factormachine[ii]

            feature = tf.multiply(wi, xi)
            feature = K.transpose(feature)

            #print("feature: ", feature.shape)
            features.append(feature)

        feature_machine = []
        for ii in range(self.batch_size):
            #sum_a_keepdims = K.sum(a , axis=-1 , keepdims=True)
            #K.sum()
            sum = K.sum(features[ii], axis=0, keepdims=False)

            #print("sum: ", sum)

            diffs = []
            #print("K.shape(features[ii]): ", K.shape(features[ii]))
            for jj in range(self.feature_size):
                diff = tf.subtract(
                    sum, features[ii]
                    [jj])  #Subtract()([sum, x]) for x in features[ii]]
                diffs.append(diff)

            #print("diffs: ", diffs)
            dots = []
            for jj in range(self.feature_size):
                #dots = [Dot(axes=1)([d, x]) for d, x in zip(diffs, features[ii])]
                dot = tf.multiply(
                    diffs[jj],
                    features[ii][jj])  #K.dot(diffs[jj], features[ii][jj])
                dots.append(dot)

            sum = K.sum(dots, axis=0, keepdims=False)
            feature_machine.append(sum)
            # print("dots: ", dots)
            # print("dots: ", dots[0].shape)

        if self.use_bias:
            output = K.bias_add(logits, self.bias, data_format='channels_last')

        output = output + feature_machine

        if self.activation is not None:
            output = self.activation(output)
        return output
Exemple #25
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs),
                                                        self.dropout,
                                                        training=training,
                                                        count=4)

        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=4)

        #generating hidden layer dropout masks
        if (0 < self.hidden_dropout < 1 and self._hidden_dropout_mask is None):
            self._hidden_dropout_mask = _generate_dropout_mask(
                K.ones((self.hidden_units, )),
                self.dropout,
                training=training,
                count=2)


#        print('kernel i shape');
#        print(self.kernel_i.shape)
# dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for hidden units
        hidden_dp_mask = self._hidden_dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[
            0]  # previous memory state (should have dimension output_size)
        c_tm1 = states[1]  # previous carry state
        m_tm1 = states[2]  # previous intermediate state

        #        print('h shape')
        #        print(h_tm1.shape)
        #        print('c shape')
        #        print(c_tm1.shape)

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_i = inputs * dp_mask[0]
                inputs_f = inputs * dp_mask[1]
                inputs_c = inputs * dp_mask[2]
                inputs_o = inputs * dp_mask[3]
            else:
                inputs_i = inputs
                inputs_f = inputs
                inputs_c = inputs
                inputs_o = inputs
            x_i = K.dot(inputs_i, self.kernel_i)
            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)
            x_o = K.dot(inputs_o, self.kernel_o)
            #print('x_i shape');
            #print(x_i.shape)
            if self.use_bias:
                x_i = K.bias_add(x_i, self.bias_i)
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)
                x_o = K.bias_add(x_o, self.bias_o)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_i = h_tm1 * rec_dp_mask[0]
                h_tm1_f = h_tm1 * rec_dp_mask[1]
                h_tm1_c = h_tm1 * rec_dp_mask[2]
                h_tm1_o = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_i = h_tm1
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1
                h_tm1_o = h_tm1

            #intermediate recurrent inputs
            m_tm1_i = m_tm1
            m_tm1_f = m_tm1
            m_tm1_c = m_tm1
            m_tm1_o = m_tm1

            i = self.recurrent_activation(
                x_i + K.dot(h_tm1_i, self.recurrent_kernel_i) +
                K.dot(m_tm1_i, self.intermediate_kernel_i))
            f = self.recurrent_activation(
                x_f + K.dot(h_tm1_f, self.recurrent_kernel_f) +
                K.dot(m_tm1_f, self.intermediate_kernel_f))
            c = f * c_tm1 + i * self.activation(
                x_c + K.dot(h_tm1_c, self.recurrent_kernel_c) +
                K.dot(m_tm1_c, self.intermediate_kernel_c))
            o = self.recurrent_activation(
                x_o + K.dot(h_tm1_o, self.recurrent_kernel_o) +
                K.dot(m_tm1_o, self.intermediate_kernel_o))
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            z += K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                z = K.bias_add(z, self.bias)

            z0 = z[:, :self.input_units]
            z1 = z[:, self.input_units:2 * self.input_units]
            z2 = z[:, 2 * self.input_units:3 * self.input_units]
            z3 = z[:, 3 * self.input_units:]

            i = self.recurrent_activation(z0)
            f = self.recurrent_activation(z1)
            c = f * c_tm1 + i * self.activation(z2)
            o = self.recurrent_activation(z3)

        #h = o * self.activation(c) + h_tm1
        m = o * self.activation(c)

        #hidden layer 1
        x_hidden1 = K.dot(m, self.kernel_hidden1)
        x_hidden1 = self.rectifier_activation(
            K.bias_add(x_hidden1, self.bias_hidden1))
        #dropout implementation of hidden layer 1
        if 0 < self.hidden_dropout < 1.:
            x_hidden1 = x_hidden1 * hidden_dp_mask[0]

        #hidden layer 2
        x_hidden2 = K.dot(x_hidden1, self.kernel_hidden2)
        x_hidden2 = self.rectifier_activation(
            K.bias_add(x_hidden2, self.bias_hidden2))

        #dropout implementaion of hidden layer 2
        if 0 < self.hidden_dropout < 1.:
            x_hidden2 = x_hidden2 * hidden_dp_mask[1]

        #rectified dense layer
        x_r = K.dot(x_hidden2, self.kernel_r)
        x_r = K.bias_add(x_r, self.bias_r)
        r = self.rectifier_activation(x_r)
        h = r + h_tm1
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c, m]
Exemple #26
0
    def call(self, inputs, states, training=None):
        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_0 = inputs * dp_mask[0]
                inputs_1 = inputs * dp_mask[1]
                inputs_2 = inputs * dp_mask[2]
                inputs_3 = inputs * dp_mask[3]
                inputs_4 = inputs * dp_mask[4]
                inputs_5 = inputs * dp_mask[5]
                inputs_6 = inputs * dp_mask[6]
                inputs_7 = inputs * dp_mask[7]
            else:
                inputs_0 = inputs
                inputs_1 = inputs
                inputs_2 = inputs
                inputs_3 = inputs
                inputs_4 = inputs
                inputs_5 = inputs
                inputs_6 = inputs
                inputs_7 = inputs

            x_0 = K.dot(inputs_0, self.kernel_0)
            x_1 = K.dot(inputs_1, self.kernel_1)
            x_2 = K.dot(inputs_2, self.kernel_2)
            x_3 = K.dot(inputs_3, self.kernel_3)
            x_4 = K.dot(inputs_4, self.kernel_4)
            x_5 = K.dot(inputs_5, self.kernel_5)
            x_6 = K.dot(inputs_6, self.kernel_6)
            x_7 = K.dot(inputs_7, self.kernel_7)

            if self.use_bias:
                x_0 = K.bias_add(x_0, self.bias_0)
                x_1 = K.bias_add(x_1, self.bias_1)
                x_2 = K.bias_add(x_2, self.bias_2)
                x_3 = K.bias_add(x_3, self.bias_3)
                x_4 = K.bias_add(x_4, self.bias_4)
                x_5 = K.bias_add(x_5, self.bias_5)
                x_6 = K.bias_add(x_6, self.bias_6)
                x_7 = K.bias_add(x_7, self.bias_7)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_0 = h_tm1 * rec_dp_mask[0]
                h_tm1_1 = h_tm1 * rec_dp_mask[1]
                h_tm1_2 = h_tm1 * rec_dp_mask[2]
                h_tm1_3 = h_tm1 * rec_dp_mask[3]
                h_tm1_4 = h_tm1 * rec_dp_mask[4]
                h_tm1_5 = h_tm1 * rec_dp_mask[5]
                h_tm1_6 = h_tm1 * rec_dp_mask[6]
                h_tm1_7 = h_tm1 * rec_dp_mask[7]
            else:
                h_tm1_0 = h_tm1
                h_tm1_1 = h_tm1
                h_tm1_2 = h_tm1
                h_tm1_3 = h_tm1
                h_tm1_4 = h_tm1
                h_tm1_5 = h_tm1
                h_tm1_6 = h_tm1
                h_tm1_7 = h_tm1

            # First Layer
            layer1_0 = self.recurrent_activation(
                x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0))
            layer1_1 = self.cell_activation(
                x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1))
            layer1_2 = self.recurrent_activation(
                x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2))
            layer1_3 = self.cell_activation(
                x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3))
            layer1_4 = self.activation(x_4 +
                                       K.dot(h_tm1_4, self.recurrent_kernel_4))
            layer1_5 = self.recurrent_activation(
                x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5))
            layer1_6 = self.activation(x_6 +
                                       K.dot(h_tm1_6, self.recurrent_kernel_6))
            layer1_7 = self.recurrent_activation(
                x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7))

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre  # create a new cell
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h,
                                                     self.projection_kernel))

        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]
            z = K.dot(inputs, self.kernel)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            zr = K.dot(h_tm1, self.recurrent_kernel)
            if self.use_bias:
                zr = K.bias_add(zr, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units:2 * self.units]
            z2 = z[:, 2 * self.units:3 * self.units]
            z3 = z[:, 3 * self.units:4 * self.units]
            z4 = z[:, 4 * self.units:5 * self.units]
            z5 = z[:, 5 * self.units:6 * self.units]
            z6 = z[:, 6 * self.units:7 * self.units]
            z7 = z[:, 7 * self.units:]

            zr0 = zr[:, :self.units]
            zr1 = zr[:, self.units:2 * self.units]
            zr2 = zr[:, 2 * self.units:3 * self.units]
            zr3 = zr[:, 3 * self.units:4 * self.units]
            zr4 = zr[:, 4 * self.units:5 * self.units]
            zr5 = zr[:, 5 * self.units:6 * self.units]
            zr6 = zr[:, 6 * self.units:7 * self.units]
            zr7 = zr[:, 7 * self.units:]

            # First Layer
            layer1_0 = self.recurrent_activation(z0 + zr0)
            layer1_1 = self.cell_activation(z1 + zr1)
            layer1_2 = self.recurrent_activation(z2 + zr2)
            layer1_3 = self.cell_activation(z3 * zr3)
            layer1_4 = self.activation(z4 + zr4)
            layer1_5 = self.recurrent_activation(z5 + zr5)
            layer1_6 = self.activation(z6 + zr6)
            layer1_7 = self.recurrent_activation(z7 + zr7)

            # Second Layer
            layer2_0 = self.activation(layer1_0 * layer1_1)
            layer2_1 = self.activation(layer1_2 + layer1_3)
            layer2_2 = self.activation(layer1_4 * layer1_5)
            layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)

            # Inject the Cell
            layer2_0 = self.activation(layer2_0 + c_tm1)

            # Third Layer
            layer3_0_pre = layer2_0 * layer2_1
            c = layer3_0_pre
            layer3_0 = layer3_0_pre
            layer3_1 = self.activation(layer2_2 + layer2_3)

            # Final Layer
            h = self.activation(layer3_0 * layer3_1)

            if self.projection_units is not None:
                h = self.projection_activation(K.dot(h,
                                                     self.projection_kernel))

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
    def step(self, inputs, states):
        """
        Step function called to compute the next state of the network
        This step function is equal to the regular GRU step function,
        except that the input
        :param x:
        :param states:
        :return:
        """
        h_tm1 = states[0]  # previous memory
        dp_mask = states[1]  # dropout matrices for recurrent units
        rec_dp_mask = states[2]

        if self.implementation == 2:
            matrix_x = K.dot(inputs * dp_mask[0], self.kernel)
            if self.use_bias:
                matrix_x = K.bias_add(matrix_x, self.bias)
            matrix_inner = K.dot(h_tm1 * rec_dp_mask[0],
                                 self.recurrent_kernel[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units:2 * self.units]
            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units:2 * self.units]

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            x_h = matrix_x[:, 2 * self.units:]
            recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0],
                                self.recurrent_kernel[:, 2 * self.units:])
            hh = self.activation(x_h + recurrent_h)
        else:
            if self.implementation == 0:
                x_z = inputs[:, :self.units]
                x_r = inputs[:, self.units:2 * self.units]
                x_h = inputs[:, 2 * self.units:]
            elif self.implementation == 1:
                x_z = K.dot(inputs * dp_mask[0], self.kernel_z)
                x_r = K.dot(inputs * dp_mask[1], self.kernel_r)
                x_h = K.dot(inputs * dp_mask[2], self.kernel_h)
                if self.use_bias:
                    x_z = K.bias_add(x_z, self.bias_z)
                    x_r = K.bias_add(x_r, self.bias_r)
                    x_h = K.bias_add(x_h, self.bias_h)
            else:
                raise ValueError('Unknown `implementation` mode.')
            z = self.recurrent_activation(
                x_z + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_z))
            r = self.recurrent_activation(
                x_r + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_r))

            hh = self.activation(
                x_h +
                K.dot(r * h_tm1 * rec_dp_mask[2], self.recurrent_kernel_h))
        h = z * h_tm1 + (1 - z) * hh
        if 0 < self.dropout + self.recurrent_dropout:
            h._uses_learning_phase = True

        # concatenate hidden layer activation and gate values
        all = K.concatenate([h, z, r])

        return all, [h]
Exemple #28
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training,
                                                        count=2)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=2)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_f = inputs * dp_mask[0]
                inputs_c = inputs * dp_mask[1]
            else:
                inputs_f = inputs
                inputs_c = inputs

            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)

            if self.use_bias:
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_f = h_tm1 * rec_dp_mask[0]
                h_tm1_c = h_tm1 * rec_dp_mask[1]
            else:
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1

            f = self.recurrent_activation(
                x_f + K.dot(h_tm1_f, self.recurrent_kernel_f))
            c = f * c_tm1 + (1. - f) * self.activation(
                x_c + K.dot(h_tm1_c, self.recurrent_kernel_c))
        else:
            if 0. < self.dropout < 1.:
                inputs *= dp_mask[0]

            z = K.dot(inputs, self.kernel)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]

            z += K.dot(h_tm1, self.recurrent_kernel)

            if self.use_bias:
                z = K.bias_add(z, self.bias)

            z0 = z[:, :self.units]
            z1 = z[:, self.units:2 * self.units]

            f = self.recurrent_activation(z0)
            c = f * c_tm1 + (1. - f) * self.activation(z1)

        h = c
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h, c]
Exemple #29
0
    def call(self, ins):
        u   = ins[0]
        img = ins[1]

        ### g learned by net
        g1 = K.conv2d(self.img, self.kernel_1, padding='same')
        g1 = K.bias_add(g1, self.bias_1)
        g1 = K.relu(g1)
        g1 = K.conv2d(g1, self.kernel_2, padding='same')
        g1 = K.bias_add(g1, self.bias_2)
        g1 = K.relu(g1)
        g2 = K.pool2d(g1, (2,2), padding='same')
        g2 = K.conv2d(g2, self.kernel_3, padding='same')
        g2 = K.bias_add(g2, self.bias_3)
        g2 = K.relu(g2)
        g2 = K.conv2d(g2, self.kernel_4, padding='same')
        g2 = K.bias_add(g2, self.bias_4)
        g2 = K.relu(g2)
        g3 = K.pool2d(g2, (2,2), padding='same')
        g3 = K.conv2d(g3, self.kernel_5, padding='same')
        g3 = K.bias_add(g3, self.bias_5)
        g3 = K.relu(g3)
        g3 = K.conv2d(g3, self.kernel_6, padding='same')
        g3 = K.bias_add(g3, self.bias_6)
        g3 = K.relu(g3)
        g3 = K.conv2d(g3, self.kernel_7, padding='same')
        g3 = K.bias_add(g3, self.bias_7)
        g3 = K.relu(g3)
        g3 = K.conv2d(g3, self.kernel_8, padding='same')
        g3 = K.bias_add(g3, self.bias_8)
        g3 = K.relu(g3)
        g4 = K.resize_images(g3, 2, 2, data_format='channels_last', interpolation='bilinear')
        g4 = K.concatenate([g2, g4])
        g4 = K.conv2d(g4, self.kernel_9, padding='same')
        g4 = K.bias_add(g4, self.bias_9)
        g4 = K.relu(g4)
        g4 = K.conv2d(g4, self.kernel_10, padding='same')
        g4 = K.bias_add(g4, self.bias_10)
        g4 = K.relu(g4)
        g5 = K.resize_images(g4, 2, 2, data_format='channels_last', interpolation='bilinear')
        g5 = K.concatenate([g1, g5])
        g5 = K.conv2d(g5, self.kernel_11, padding='same')
        g5 = K.bias_add(g5, self.bias_11)
        g5 = K.relu(g5)
        g5 = K.conv2d(g5, self.kernel_12, padding='same')
        g5 = K.bias_add(g5, self.bias_12)
        g  = K.sigmoid(g5)

        ### grad(g)
        g_x = K.conv2d(g, kXC, padding='same')
        g_x = scale(g_x, self.shp, self.rhp, self.dx)
        g_y = K.conv2d(g, kYC, padding='same')
        g_y = scale(g_y, self.shp, self.rhp, self.dy)

        ### transport - upwind
        xp = K.conv2d(u, xKP, padding='same')
        xn = K.conv2d(u, xKN, padding='same')
        yp = K.conv2d(u, xYP, padding='same')
        yn = K.conv2d(u, xYN, padding='same')
        fxp =        K.relu(        g_x)
        fxn = -1.0 * K.relu( -1.0 * g_x)
        fyp =        K.relu(        g_y)
        fyn = -1.0 * K.relu( -1.0 * g_y)
        xpp = fxp*xp
        xnn = fxn*xn
        ypp = fyp*yp
        ynn = fyn*yn
        xterms = xpp + xnn
        xterms = scale(xterms, self.shp, self.rhp, self.dx)
        yterms = ypp + ynn
        yterms = scale(yterms, self.shp, self.rhp, self.dy)
        transport = xterms + yterms

        ### curvature - learned
        grad_u = K.conv2d(u, self.kernel_k1, padding='same')
        norm_grad_u = K.sqrt(   K.epsilon() + K.sum( K.square(grad_u), axis=-1, keepdims=True) )
        grad_u = grad_u / (norm_grad_u + K.epsilon())
        kappa = K.conv2d(grad_u, self.kernel_k2, padding='same')
        curvature = g*kappa*norm_grad_u

        ### balloon
        balloon = g*norm_grad_u

        return u + K.constant(self.dt)*(   curvature * self.alpha \
                                         + transport * self.beta  \
                                         + balloon   * self.gamma )
Exemple #30
0
    def call(self, point_cloud):
        def getDistanceMatrix(x):
            """ Compute pairwise distance matrix for a point cloud

            Input:
            point_cloud: tensor (batch_size, n_points, n_features)
            
            Returns:
            dists: tensor (batch_size, n_points, n_points) pairwise distances
            """
            part1 = -2 * K.batch_dot(x, K.permute_dimensions(x, (0, 2, 1)))
            part2 = K.permute_dimensions(K.expand_dims(K.sum(x**2, axis=2)),
                                         (0, 2, 1))
            part3 = K.expand_dims(K.sum(x**2, axis=2))
            dists = part1 + part2 + part3
            return dists

        def getKnearest(dists, k):
            """Get indices of k nearest neighbors from distance tensor
            Input:
            dists: (batch_size, n_points, n_points) pairwise distances
            Returns:
            knn_idx: (batch_size, n_points, k) nearest neighbor indices
            """
            _, knn_idx = tf.math.top_k(-dists, k=k)
            return knn_idx

        def getEdgeFeature(point_cloud, nn_idx):
            """Construct the input for the edge convolution
            Input:
            point_cloud: (batch_size, n_points, n_features)
            nn_idx: (batch_size, n_points, n_neighbors)
            Returns:
            edge_features: (batch_size, n_points, k, n_features*2)
            """
            k = nn_idx.get_shape()[-1]

            point_cloud_shape = tf.shape(point_cloud)
            batch_size = point_cloud_shape[0]
            n_points = point_cloud_shape[1]
            n_features = point_cloud_shape[2]

            # Prepare indices to match neighbors in flattened cloud
            idx = K.arange(0, stop=batch_size, step=1) * n_points
            idx = K.reshape(idx, [-1, 1, 1])

            # Flatten cloud and gather neighbors
            flat_cloud = K.reshape(point_cloud, [-1, n_features])
            neighbors = K.gather(flat_cloud, nn_idx + idx)

            # Expand centers to (batch_size, n_points, k, n_features)
            cloud_centers = K.expand_dims(point_cloud, axis=-2)
            cloud_centers = K.tile(cloud_centers, [1, 1, k, 1])

            edge_features = K.concatenate(
                [cloud_centers, neighbors - cloud_centers], axis=-1)
            return edge_features

        def batch_norm(inputs, gamma, beta, dims, ind):
            """ Normalize batch and update moving averages for mean and std
            Input:
              inputs: (batchsize, n_points, k, n_features * 2) - edge_features
              gamma: weight - gamma for batch normalization
              beta: weight - beta for batch normalization
              dims: list - dimensions along which to normalize
              ind: int - indicating which weights to use
            Returns:
             During training:
              normed: (batchsize, n_points, k, n_features * 2) - normalized
                            batch of data using actual batch for normalization
             Else:
              normed_moving: same, but using the updated average values
            """

            # Calculate normalized data, mean and std for batch
            normed, batch_mean, batch_var = K.normalize_batch_in_training(
                x=inputs, gamma=gamma, beta=beta, reduction_axes=dims)

            # Update the moving averages
            self.add_update([
                K.moving_average_update(self.moving_mean[ind], batch_mean,
                                        0.9),
                K.moving_average_update(self.moving_var[ind], batch_var, 0.9)
            ])

            # Calculate normalization using the averages
            normed_moving = K.batch_normalization(x=inputs,
                                                  mean=self.moving_mean[ind],
                                                  var=self.moving_var[ind],
                                                  beta=beta,
                                                  gamma=gamma)

            # If training return normed, else normed_moving
            return K.in_train_phase(normed, normed_moving)

        if self.n_ind:  # get dinstances according to given indices
            dists = getDistanceMatrix(
                point_cloud[:, :, slice(self.n_ind[0], self.n_ind[1])])
        else:  # get distances according to full feature vector
            dists = getDistanceMatrix(point_cloud)

        knn_idx = getKnearest(dists, self.k)
        edge_features = getEdgeFeature(point_cloud, knn_idx)

        # Create first convolutional block
        output = K.conv2d(edge_features,
                          self.kernel[0], (1, 1),
                          padding='same')
        output = K.bias_add(output, self.bias[0])
        output = batch_norm(output, self.gamma[0], self.beta[0], [0, 1, 2], 0)
        output = K.relu(output)

        # Additional convolutional blocks
        for i in range(1, len(self.n_channel_out)):
            output = K.conv2d(output, self.kernel[i], (1, 1), padding='same')
            output = K.bias_add(output, self.bias[i])
            output = batch_norm(output, self.gamma[i], self.beta[i], [0, 1, 2],
                                i)
            output = K.relu(output)

        output = K.mean(output, axis=-2)
        return output
Exemple #31
0
    def attention(self,
                  pre_q,
                  pre_v,
                  pre_k,
                  out_seq_len: int,
                  d_model: int,
                  training=None):
        """
        Calculates the output of the attention once the affine transformations
        of the inputs are done. Here's the shapes of the arguments:
        :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads)
        :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads)
        :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads)
        :param out_seq_len: the length of the output sequence
        :param d_model: dimensionality of the model (by the paper)
        :param training: Passed by Keras. Should not be defined manually.
          Optional scalar tensor indicating if we're in training
          or inference phase.
        """
        # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads)
        q = K.permute_dimensions(pre_q, [0, 2, 1, 3])
        v = K.permute_dimensions(pre_v, [0, 2, 1, 3])

        if self.compression_window_size is None:
            k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1])
        else:
            # Memory-compressed attention described in paper
            # "Generating Wikipedia by Summarizing Long Sequences"
            # (https://arxiv.org/pdf/1801.10198.pdf)
            # It compresses keys and values using 1D-convolution which reduces
            # the size of Q * K_transposed from roughly seq_len^2
            # to convoluted_seq_len^2. If we use strided convolution with
            # window size = 3 and stride = 3, memory requirements of such
            # memory-compressed attention will be 9 times smaller than
            # that of the original version.
            if self.use_masking:
                raise NotImplementedError(
                    "Masked memory-compressed attention has not "
                    "been implemented yet")
            k = K.permute_dimensions(pre_k, [0, 2, 1, 3])
            k, v = [
                K.reshape(
                    # Step 3: Return the result to its original dimensions
                    # (batch_size, num_heads, seq_len, d_model//heads)
                    K.bias_add(
                        # Step 3: ... and add bias
                        K.conv1d(
                            # Step 2: we "compress" K and V using strided conv
                            K.reshape(
                                # Step 1: we reshape K and V to
                                # (batch + num_heads,  seq_len, d_model//heads)
                                item,
                                (-1, K.int_shape(item)[-2],
                                 d_model // self.num_heads)),
                            kernel,
                            strides=self.compression_window_size,
                            padding='valid',
                            data_format='channels_last'),
                        bias,
                        data_format='channels_last'),
                    # new shape
                    K.concatenate(
                        [K.shape(item)[:2], [-1, d_model // self.num_heads]]))
                for item, kernel, bias in ((k, self.k_conv_kernel,
                                            self.k_conv_bias),
                                           (v, self.v_conv_kernel,
                                            self.v_conv_bias))
            ]
            k_transposed = K.permute_dimensions(k, [0, 1, 3, 2])
        # shaping K into (batch_size, num_heads, d_model//heads, seq_len)
        # for further matrix multiplication
        sqrt_d = K.constant(np.sqrt(d_model // self.num_heads),
                            dtype=K.floatx())
        q_shape = K.int_shape(q)
        k_t_shape = K.int_shape(k_transposed)
        v_shape = K.int_shape(v)
        # before performing batch_dot all tensors are being converted to 3D
        # shape (batch_size * num_heads, rows, cols) to make sure batch_dot
        # performs identically on all backends
        attention_heads = K.reshape(
            K.batch_dot(
                self.apply_dropout_if_needed(K.softmax(
                    self.mask_attention_if_needed(
                        K.batch_dot(
                            K.reshape(q, (-1, ) + q_shape[-2:]),
                            K.reshape(k_transposed,
                                      (-1, ) + k_t_shape[-2:])) / sqrt_d)),
                                             training=training),
                K.reshape(v, (-1, ) + v_shape[-2:])),
            (-1, self.num_heads, q_shape[-2], v_shape[-1]))
        attention_heads_merged = K.reshape(
            K.permute_dimensions(attention_heads, [0, 2, 1, 3]), (-1, d_model))
        attention_out = K.reshape(
            K.dot(attention_heads_merged, self.output_weights),
            (-1, out_seq_len, d_model))
        return attention_out
Exemple #32
0
    def call(self, inputs):
        def _l2normalize(v, eps=1e-12):
            return v / (K.sum(v ** 2) ** 0.5 + eps)

        def power_iteration(W, u):
            _u = u
            _v = _l2normalize(K.dot(_u, K.transpose(W)))
            _u = _l2normalize(K.dot(_v, W))
            return _u, _v

        if self.spectral_normalization:
            W_shape = self.kernel.shape.as_list()
            # Flatten the Tensor
            W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
            _u, _v = power_iteration(W_reshaped, self.u)
            # Calculate Sigma
            sigma = K.dot(_v, W_reshaped)
            sigma = K.dot(sigma, K.transpose(_u))
            # normalize it
            W_bar = W_reshaped / sigma
            # reshape weight tensor
            if training in {0, False}:
                W_bar = K.reshape(W_bar, W_shape)
            else:
                with tf.control_dependencies([self.u.assign(_u)]):
                    W_bar = K.reshape(W_bar, W_shape)

            # update weitht
            self.kernel = W_bar

        if self.rank == 1:
            outputs = K.conv1d(
                inputs,
                self.kernel,
                strides=self.strides[0],
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate[0])
        if self.rank == 2:
            outputs = K.conv2d(
                inputs,
                self.kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate)
        if self.rank == 3:
            outputs = K.conv3d(
                inputs,
                self.kernel,
                strides=self.strides,
                padding=self.padding,
                data_format=self.data_format,
                dilation_rate=self.dilation_rate)

        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Exemple #33
0
    def call(self, inputs, states, training=None):

        dp_mask = self._dropout_mask
        rec_dp_mask = self._recurrent_dropout_mask
        cont_dp_mask = self._controller_dropout_mask

        h_tm1 = states[0]
        c_tm1 = states[1]
        r_tm1 = states[2]

        if 0 < self.dropout < 1.:

            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]

        else:

            inputs_i = inputs_f = inputs_c = inputs_o = inputs

        x_i = K.dot(inputs_i, self.kernel_i)
        x_f = K.dot(inputs_f, self.kernel_f)
        x_c = K.dot(inputs_c, self.kernel_c)
        x_o = K.dot(inputs_o, self.kernel_o)

        if self.use_bias:

            x_i = K.bias_add(x_i, self.bias_i)
            x_f = K.bias_add(x_f, self.bias_f)
            x_c = K.bias_add(x_c, self.bias_c)
            x_o = K.bias_add(x_o, self.bias_o)

        if 0 < self.recurrent_dropout < 1.:

            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]

        else:

            h_tm1_i = h_tm1_f = h_tm1_c = h_tm1_o = h_tm1

        h_tm1_i = K.dot(h_tm1_i, self.recurrent_kernel_i)
        h_tm1_f = K.dot(h_tm1_f, self.recurrent_kernel_f)
        h_tm1_c = K.dot(h_tm1_c, self.recurrent_kernel_c)
        h_tm1_o = K.dot(h_tm1_o, self.recurrent_kernel_o)

        #memories are fed back as input next cycle
        r_tm1_i = K.dot(r_tm1, self.recurrent_kernel_r)

        i = self.recurrent_activation(x_i + h_tm1_i + r_tm1_i)
        f = self.recurrent_activation(x_f + h_tm1_f)
        c = f * c_tm1 + i * self.activation(x_c + h_tm1_c)
        o = self.recurrent_activation(x_o + h_tm1_o)
        h = o * self.activation(c)

        if 0 < self.controller_dropout < 1.:

            controller_r = h * cont_dp_mask[0]

        else:

            controller_r = h

        #calculate the write weights
        self.controller_ww = K.sigmoid(self.write_gate) * self.controller_wr + \
                    (1 - K.sigmoid(self.write_gate)) * self.controller_wlu

        #calculate read weights and retrieve the appropriate memory
        n_controller_r = K.l2_normalize(controller_r, 1)
        n_memory = K.l2_normalize(self.memory, 1)
        t_n_memory = K.transpose(n_memory)
        mem_cos_similarity = K.dot(n_controller_r, t_n_memory)
        self.controller_wr = K.softmax(mem_cos_similarity)
        r = K.dot(self.controller_wr, self.memory)
        self.reads += 1

        #calculate the usage weights
        self.controller_wu = self.usage_decay * self.controller_wu + \
                            self.controller_wr + self.controller_ww

        #calculate the least used weights
        v, i = tf.nn.top_k(self.controller_wu, self.controller_wu.shape[1])
        n = min(self.reads, self.memory.shape[1])
        nth_smallest = K.reshape(v[:, -n], (self.batch_size, 1))
        smallest_index = tf.reduce_min(i[:, -1])
        nth_smallest = tf.matmul(
            nth_smallest, tf.constant(1., shape=(1, self.memory.shape[0])))
        lt = tf.less_equal(self.controller_wu, nth_smallest)
        self.controller_wlu = tf.cast(lt, tf.float32)

        #zero the least used memory location
        #note this is not correct right notw, smallest index is the smallest
        #index of the vector of indicies of smallest values over the batch,
        #not the index of the smallest value over the batch
        zero_array = tf.constant([[1.] if i != smallest_index else [0.]
                                  for i in range(self.memory.shape[0])])
        ones_array = tf.ones((1, self.units))
        self.memory = tf.matmul(zero_array, ones_array) * self.memory

        #update the memory
        self.memory = tf.matmul(tf.transpose(self.controller_ww),
                                h) + self.memory

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return r, [h, c, r]
Exemple #34
0
    def call(self, inputs):
        input_shape = K.shape(inputs)
        batch_size = input_shape[0]
        if self.data_format == 'channels_first':
            h_axis, w_axis = 2, 3
        else:
            h_axis, w_axis = 1, 2

        height, width = input_shape[h_axis], input_shape[w_axis]
        kernel_h, kernel_w = self.kernel_size
        stride_h, stride_w = self.strides
        if self.output_padding is None:
            out_pad_h = out_pad_w = None
        else:
            out_pad_h, out_pad_w = self.output_padding

        # Infer the dynamic output shape:
        out_height = conv_utils.deconv_length(height,
                                              stride_h, kernel_h,
                                              self.padding,
                                              out_pad_h)
        out_width = conv_utils.deconv_length(width,
                                             stride_w, kernel_w,
                                             self.padding,
                                             out_pad_w)
        if self.data_format == 'channels_first':
            output_shape = (batch_size, self.filters, out_height, out_width)
        else:
            output_shape = (batch_size, out_height, out_width, self.filters)

        # Spectral Normalization
        def _l2normalize(v, eps=1e-12):
            return v / (K.sum(v ** 2) ** 0.5 + eps)

        def power_iteration(W, u):
            # Accroding the paper, we only need to do power iteration one time.
            _u = u
            _v = _l2normalize(K.dot(_u, K.transpose(W)))
            _u = _l2normalize(K.dot(_v, W))
            return _u, _v

        W_shape = self.kernel.shape.as_list()
        # Flatten the Tensor
        W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]])
        _u, _v = power_iteration(W_reshaped, self.u)
        # Calculate Sigma
        sigma = K.dot(_v, W_reshaped)
        sigma = K.dot(sigma, K.transpose(_u))
        # normalize it
        W_bar = W_reshaped / sigma
        # reshape weight tensor
        if training in {0, False}:
            W_bar = K.reshape(W_bar, W_shape)
        else:
            with tf.control_dependencies([self.u.assign(_u)]):
                W_bar = K.reshape(W_bar, W_shape)
        self.kernel = W_bar

        outputs = K.conv2d_transpose(
            inputs,
            self.kernel,
            output_shape,
            self.strides,
            padding=self.padding,
            data_format=self.data_format)

        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Exemple #35
0
 def call(self, x):
     output0 = K.dot(x[0], self.kernel)
     output1 = K.batch_dot(output0, x[1])
     output2 = K.bias_add(output1, self.bias) 
     output3 = self.activation(output2)
     return output3
Exemple #36
0
    def step(self, inputs, states, training=None):
        """Computes the output of a single step. Unlike the vanilla GRU, attention is applied to the
        output, as per https://arxiv.org/pdf/1603.01417.pdf
        ----------
        inputs : (K.Tensor)
            A tensor of shape [batch_size, input_size+1]. The last element of each example is the
            attention score.
        states : (K.Tensor)
            Initial (list) of states
        training : (bool)
            Whether the network is in training mode or not. 

        Returns
        -------
        (K.Tensor)
            The output for the current step, modified by attention

        """
        # Needs question as an input
        x_i, attn_gate = array_ops.split(inputs,
                                         num_or_size_splits=[self.units, 1],
                                         axis=1)
        h_tm1 = states[0]

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        if self.implementation == 1:
            if 0. < self.dropout < 1.:
                inputs_z = x_i * dp_mask[0]
                inputs_r = x_i * dp_mask[1]
                inputs_h = x_i * dp_mask[2]
            else:
                inputs_z = x_i
                inputs_r = x_i
                inputs_h = x_i
            x_z = K.dot(inputs_z, self.kernel_z)
            x_r = K.dot(inputs_r, self.kernel_r)
            x_h = K.dot(inputs_h, self.kernel_h)
            if self.use_bias:
                x_z = K.bias_add(x_z, self.bias_z)
                x_r = K.bias_add(x_r, self.bias_r)
                x_h = K.bias_add(x_h, self.bias_h)

            if 0. < self.recurrent_dropout < 1.:
                h_tm1_z = h_tm1 * rec_dp_mask[0]
                h_tm1_r = h_tm1 * rec_dp_mask[1]
                h_tm1_h = h_tm1 * rec_dp_mask[2]
            else:
                h_tm1_z = h_tm1
                h_tm1_r = h_tm1
                h_tm1_h = h_tm1

            z = self.recurrent_activation(
                x_z + K.dot(h_tm1_z, self.recurrent_kernel_z))
            r = self.recurrent_activation(
                x_r + K.dot(h_tm1_r, self.recurrent_kernel_r))

            hh = self.activation(x_h +
                                 K.dot(r * h_tm1_h, self.recurrent_kernel_h))
        else:
            if 0. < self.dropout < 1.:
                x_i *= dp_mask[0]
            matrix_x = K.dot(x_i, self.kernel)
            if self.use_bias:
                matrix_x = K.bias_add(matrix_x, self.bias)
            if 0. < self.recurrent_dropout < 1.:
                h_tm1 *= rec_dp_mask[0]
            matrix_inner = K.dot(h_tm1,
                                 self.recurrent_kernel[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units:2 * self.units]
            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units:2 * self.units]

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            x_h = matrix_x[:, 2 * self.units:]
            recurrent_h = K.dot(r * h_tm1,
                                self.recurrent_kernel[:, 2 * self.units:])
            hh = self.activation(x_h + recurrent_h)
        h = z * h_tm1 + (1 - z) * hh

        # Attention modulated output.
        h = attn_gate * h + (1 - attn_gate) * h_tm1

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h]
Exemple #37
0
	def call(self, x):
		return K.bias_add(K.dot(x[1], self.kernel) * x[0] + x[1], self.bias)
Exemple #38
0
    def step(self, inputs, states):
        h_tm1 = states[0]  # not used
        c_tm1 = states[1]
        dp_mask = states[2]
        rec_dp_mask = states[3]

        if self.implementation == 2:
            z = K.dot(inputs * dp_mask[0], self.kernel)
            z = z * rec_dp_mask[0]

            z0 = z[:, :self.units]

            if self.use_bias:
                z_bias = K.bias_add(z[:, self.units: self.units * 3], self.bias)
                z_bias = self.recurrent_activation(z_bias)
                z1 = z_bias[:, :self.units]
                z2 = z_bias[:, self.units: 2 * self.units]
            else:
                z1 = z[:, self.units: 2 * self.units]
                z2 = z[:, 2 * self.units: 3 * self.units]

            if self.kernel_dim == 4:
                z3 = z[:, 3 * self.units: 4 * self.units]
            else:
                z3 = None

            f = z1
            r = z2

            c = f * c_tm1 + (1 - f) * z0
            if self.kernel_dim == 4:
                h = r * self.activation(c) + (1 - r) * z3
            else:
                h = r * self.activation(c) + (1 - r) * inputs
        else:
            if self.implementation == 0:
                x_w = inputs[:, :self.units]
                x_f = inputs[:, self.units: 2 * self.units]
                x_r = inputs[:, 2 * self.units: 3 * self.units]
                if self.kernel_dim == 4:
                    x_w_x = inputs[:, 3 * self.units: 4 * self.units]
                else:
                    x_w_x = None
            elif self.implementation == 1:
                x_w = K.dot(inputs * dp_mask[0], self.kernel_w)
                x_f = K.dot(inputs * dp_mask[1], self.kernel_f) + self.bias_f
                x_r = K.dot(inputs * dp_mask[2], self.kernel_r) + self.bias_r

                x_f = self.recurrent_activation(x_f)
                x_r = self.recurrent_activation(x_r)

                if self.kernel_dim == 4:
                    x_w_x = K.dot(inputs * dp_mask[0], self.kernel_p)
                else:
                    x_w_x = None
            else:
                raise ValueError('Unknown `implementation` mode.')

            w = x_w * rec_dp_mask[0]
            f = x_f
            r = x_r

            c = f * c_tm1 + (1 - f) * w
            if self.kernel_dim == 4:
                h = r * self.activation(c) + (1 - r) * x_w_x
            else:
                h = r * self.activation(c) + (1 - r) * inputs

        if 0 < self.dropout + self.recurrent_dropout:
            h._uses_learning_phase = True

        return h, [h, c]
Exemple #39
0
        def step(self, inputs, states):
            h_tm1 = states[0]
            c_tm1 = states[1]
            dp_mask = states[2]
            rec_dp_mask = states[3]

            if self.implementation == 2:
                z = K.dot(inputs * dp_mask[0], self.kernel)
                z += z * K.dot(
                    h_tm1 * rec_dp_mask[0],
                    self.recurrent_kernel)  # applies m instead of h_tm1 to z
                if self.use_bias:
                    z = K.bias_add(z, self.bias)

                z0 = z[:, :self.units]
                z1 = z[:, self.units:2 * self.units]
                z2 = z[:, 2 * self.units:3 * self.units]
                z3 = z[:, 3 * self.units:4 * self.units]
                z4 = z[:, 4 * self.
                       units:]  # just elementwise multiplication, no activation functions

                i = self.recurrent_activation(z0)
                f = self.recurrent_activation(z1)
                c = f * c_tm1 + i * self.activation(z2)
                o = self.recurrent_activation(z3)
            else:
                if self.implementation == 0:
                    x_i = inputs[:, :self.units]
                    x_f = inputs[:, self.units:2 * self.units]
                    x_c = inputs[:, 2 * self.units:3 * self.units]
                    x_o = inputs[:, 3 * self.units:4 * self.units]
                    x_m = inputs[:, 4 * self.units:]
                elif self.implementation == 1:
                    x_i = K.dot(inputs * dp_mask[0],
                                self.kernel_i) + self.bias_i
                    x_f = K.dot(inputs * dp_mask[1],
                                self.kernel_f) + self.bias_f
                    x_c = K.dot(inputs * dp_mask[2],
                                self.kernel_c) + self.bias_c
                    x_o = K.dot(inputs * dp_mask[3],
                                self.kernel_o) + self.bias_o
                    x_m = K.dot(inputs * dp_mask[4],
                                self.kernel_m) + self.bias_m
                else:
                    raise ValueError('Unknown `implementation` mode.')

                m = x_m * K.dot(
                    h_tm1 * rec_dp_mask[4],
                    self.recurrent_kernel_m)  # elementwise multiplication m
                i = self.recurrent_activation(
                    x_i + K.dot(m * rec_dp_mask[0], self.recurrent_kernel_i))
                f = self.recurrent_activation(
                    x_f + K.dot(m * rec_dp_mask[1], self.recurrent_kernel_f))
                c = f * c_tm1 + i * self.activation(
                    x_c + K.dot(m * rec_dp_mask[2], self.recurrent_kernel_c))
                o = self.recurrent_activation(
                    x_o + K.dot(m * rec_dp_mask[3], self.recurrent_kernel_o))
            h = o * self.activation(c)
            if 0 < self.dropout + self.recurrent_dropout:
                h._uses_learning_phase = True
            return h, [h, c]
def layer_without_activation(dense):
    output = K.dot(dense.input, dense.kernel)
    if dense.use_bias:
        output = K.bias_add(output, dense.bias, data_format='channels_last')
    return output
Exemple #41
0
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, K.shape(inputs)[-1]),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training,
                count=4)
        if (0 < self.zoneout_c < 1 and
                self._zoneout_mask_c is None):
            self._zoneout_mask_c = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.zoneout_c,
                training=training,
                count=1)
            
        if (0 < self.zoneout_h < 1 and
                self._zoneout_mask_h is None):
            self._zoneout_mask_h = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.zoneout_h,
                training=training,
                count=1)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state

        if self.implementation == 1:
            if 0 < self.dropout < 1.:
                inputs_i = inputs * dp_mask[0]
                inputs_f = inputs * dp_mask[1]
                inputs_c = inputs * dp_mask[2]
                inputs_o = inputs * dp_mask[3]
            else:
                inputs_i = inputs
                inputs_f = inputs
                inputs_c = inputs
                inputs_o = inputs
            x_i = K.dot(inputs_i, self.kernel_i)
            x_f = K.dot(inputs_f, self.kernel_f)
            x_c = K.dot(inputs_c, self.kernel_c)
            x_o = K.dot(inputs_o, self.kernel_o)
            if self.use_bias:
                x_i = K.bias_add(x_i, self.bias_i)
                x_f = K.bias_add(x_f, self.bias_f)
                x_c = K.bias_add(x_c, self.bias_c)
                x_o = K.bias_add(x_o, self.bias_o)

            if 0 < self.recurrent_dropout < 1.:
                h_tm1_i = h_tm1 * rec_dp_mask[0]
                h_tm1_f = h_tm1 * rec_dp_mask[1]
                h_tm1_c = h_tm1 * rec_dp_mask[2]
                h_tm1_o = h_tm1 * rec_dp_mask[3]
            else:
                h_tm1_i = h_tm1
                h_tm1_f = h_tm1
                h_tm1_c = h_tm1
                h_tm1_o = h_tm1
                
            i = self.recurrent_activation(self.ln(x_i + K.dot(h_tm1_i,
                                                              self.recurrent_kernel_i)))
            f = self.recurrent_activation(self.ln(x_f + K.dot(h_tm1_f,
                                                              self.recurrent_kernel_f)))
            c = f * c_tm1 + i * self.activation(self.ln(x_c + K.dot(h_tm1_c,
                                                                    self.recurrent_kernel_c)))
            o = self.recurrent_activation(self.ln(x_o + K.dot(h_tm1_o,
                                                              self.recurrent_kernel_o)))

        h = o * self.activation(self.ln(c))
        
        if 0 < self.dropout + self.recurrent_dropout + self.zoneout_c + self.zoneout_h:
            if training is None:
                h._uses_learning_phase = True
                
        if 0 < self.zoneout_h < 1:
            h = K.in_train_phase(K.dropout(h - h_tm1, self.zoneout_h),
                                 h - h_tm1)
            h = h * (1. - self.zoneout_h) + h_tm1
            
        if 0 < self.zoneout_c < 1:
            c = K.in_train_phase(K.dropout(c - c_tm1, self.zoneout_c),
                                 c - c_tm1)
            c = c * (1. - self.zoneout_c) + c_tm1
        
        return h, [h, c]
Exemple #42
0
    def call(self, inputs):
        # y, M = inputs

        y = inputs

        if self.sync_mode is None or self.sync_mode is 'radial_sync':

            if K.ndim(y) == 4:
                y = K.expand_dims(y, axis=3)
                y = K.repeat_elements(y, rep=self.ndirs, axis=3)

            y_ = bilinear_sampler(y, self.x, self.y, self.nrings, self.ndirs)

            nbatch = K.shape(y)[0]
            nchannels = K.shape(y)[-1]

            if not self.pool:
                # synchronize with sync field

                y = bilinear_sampler(y, self.e_x, self.e_y, self.nrings,
                                     self.ndirs)

                # prepare circular convolution

                y = K.reshape(y, (nbatch * self.sz_y * self.sz_x, self.nrings,
                                  self.ndirs, nchannels))
                # pad it along the dirs axis so that conv2d produces circular
                # convolution along that dimension
                # shape = (nbatch, nv, ndirs, nchannel)
                y = K.concatenate([y, y[:, :, :-1, :]], axis=2)

                # output is N x outmaps x 1 x nrays if filter size is the same as
                # input image size prior padding

                y = K.conv2d(y,
                             self.kernel,
                             strides=(1, 1),
                             padding='valid',
                             data_format='channels_last',
                             dilation_rate=(1, 1))

                y = K.reshape(y, (nbatch, self.sz_y, self.sz_x, 1, self.ndirs,
                                  self.nfilters))

                y = tf.squeeze(y, [3])

                # add contribution of central vertex

                y += K.dot(y_, self.center_kernel)
            else:
                y = K.dot(y_, self.center_kernel)

            if self.use_bias:
                y = K.bias_add(y, self.bias, data_format=None)

            #y = y + center

            if self.activation is not None:
                y = self.activation(y)

            if self.take_max:
                y = K.max(y, axis=2, keepdims=False)

        elif self.sync_mode is 'async':
            if K.ndim(y) == 5:
                y = K.max(y, axis=3, keepdims=False)
            y_ = bilinear_sampler(y, self.x, self.y, self.nrings, self.ndirs)
            if not self.pool:

                nbatch = K.shape(y)[0]
                nchannels = K.shape(y)[-1]

                # pull back the input to the fiber product of the tangent bundle by the frame bundle
                # by the frame transporter

                y = bilinear_sampler(y, self.e_x, self.e_y, self.nrings,
                                     self.ndirs)

                y = K.reshape(y, (nbatch * self.sz_y * self.sz_x, self.nrings,
                                  self.ndirs, nchannels))
                # pad it along the dirs axis so that conv2d produces circular
                # convolution along that dimension
                # shape = (nbatch, nv, ndirs, nchannel)
                y = K.concatenate([y, y[:, :, :-1, :]], axis=2)

                # output is N x outmaps x 1 x nrays if filter size is the same as
                # input image size prior padding

                y = K.conv2d(y,
                             self.kernel,
                             strides=(1, 1),
                             padding='valid',
                             data_format='channels_last',
                             dilation_rate=(1, 1))

                y = K.reshape(y, (nbatch, self.sz_y, self.sz_x, 1, self.ndirs,
                                  self.nfilters))
                # y = K.max(y, axis=2, keepdims=False)
                y = tf.squeeze(y, [3])

                # add contribution of central vertex

                y_ = K.dot(y_, self.center_kernel)
                y_ = K.expand_dims(y_, axis=3)
                y_ = K.repeat_elements(y_, rep=self.ndirs, axis=3)
                y += y_
            else:
                y_ = K.dot(y_, self.center_kernel)
                y_ = K.expand_dims(y_, axis=3)
                y_ = K.repeat_elements(y_, rep=self.ndirs, axis=3)
                y = y_

            if self.use_bias:
                y = K.bias_add(y, self.bias, data_format=None)

            if self.activation is not None:
                y = self.activation(y)

            if self.take_max:
                y = K.max(y, axis=2, keepdims=False)
        return y
Exemple #43
0
    def step(self, inputs, states):
        h_tm1 = states[0]  # previous memory
        dp_mask = states[1]  # dropout matrices for recurrent units
        rec_dp_mask = states[2]
        eye_mask = K.eye(self.num_labels, dtype='float32')
        if self.implementation == 2:
            matrix_x = K.dot(inputs * dp_mask[0], self.kernel)
            if self.use_bias:
                matrix_x = K.bias_add(matrix_x, self.bias)

            # Adding the semi-diagonal mask as mentioned in the paper.
            # This will ensure that all the non-diagonal elements of the weight matrix
            # corresponding to the labels are set to zero.
            recurrent_kernel[:self.num_labels, :self.num_labels] = Multiply([
                recurrent_kernel[:self.num_labels, :self.num_labels], eye_mask
            ])
            recurrent_kernel[:self.num_labels, self.units:self.units +
                             self.num_labels] = Multiply([
                                 recurrent_kernel[:self.num_labels,
                                                  self.units:self.units +
                                                  self.num_labels], eye_mask
                             ])
            recurrent_kernel[:self.num_labels, 2 * self.units:2 * self.units +
                             self.num_labels] = Multiply([
                                 recurrent_kernel[:self.num_labels, 2 *
                                                  self.units:2 * self.units +
                                                  self.num_labels], eye_mask
                             ])

            matrix_inner = K.dot(h_tm1 * rec_dp_mask[0],
                                 self.recurrent_kernel[:, :2 * self.units])

            x_z = matrix_x[:, :self.units]
            x_r = matrix_x[:, self.units:2 * self.units]
            recurrent_z = matrix_inner[:, :self.units]
            recurrent_r = matrix_inner[:, self.units:2 * self.units]

            z = self.recurrent_activation(x_z + recurrent_z)
            r = self.recurrent_activation(x_r + recurrent_r)

            x_h = matrix_x[:, 2 * self.units:]
            recurrent_h = K.dot(r * h_tm1 * rec_dp_mask[0],
                                self.recurrent_kernel[:, 2 * self.units:])
            hh = self.activation(x_h + recurrent_h)
        else:
            if self.implementation == 0:
                x_z = inputs[:, :self.units]
                x_r = inputs[:, self.units:2 * self.units]
                x_h = inputs[:, 2 * self.units:]
            elif self.implementation == 1:
                x_z = K.dot(inputs * dp_mask[0], self.kernel_z)
                x_r = K.dot(inputs * dp_mask[1], self.kernel_r)
                x_h = K.dot(inputs * dp_mask[2], self.kernel_h)
                if self.use_bias:
                    x_z = K.bias_add(x_z, self.bias_z)
                    x_r = K.bias_add(x_r, self.bias_r)
                    x_h = K.bias_add(x_h, self.bias_h)
            else:
                raise ValueError('Unknown `implementation` mode.')
            z = self.recurrent_activation(
                x_z + K.dot(h_tm1 * rec_dp_mask[0], self.recurrent_kernel_z))
            r = self.recurrent_activation(
                x_r + K.dot(h_tm1 * rec_dp_mask[1], self.recurrent_kernel_r))

            hh = self.activation(
                x_h +
                K.dot(r * h_tm1 * rec_dp_mask[2], self.recurrent_kernel_h))
        h = z * h_tm1 + (1 - z) * hh
        if 0 < self.dropout + self.recurrent_dropout:
            h._uses_learning_phase = True
        return h, [h]
 def call(self, x):
     dot = K.dot(x, self.kernel)
     dot_plus_biais = K.bias_add(dot, self.biais)
     return dot_plus_biais
Exemple #45
0
    def call(self, inputs):
        ent_emb = inputs[0]
        rel_emb = inputs[1]
        adj = tf.SparseTensor(
            K.cast(K.squeeze(inputs[2], axis=0), dtype="int64"),
            K.ones_like(inputs[2][0, :, 0]), (self.node_size, self.node_size))
        sparse_indices = tf.squeeze(inputs[3], axis=0)
        sparse_val = tf.squeeze(inputs[4], axis=0)

        rel_adj = K.cast(K.squeeze(inputs[5], axis=0), dtype="int64")
        rel_adj = tf.SparseTensor(indices=rel_adj,
                                  values=tf.ones_like(rel_adj[:, 0],
                                                      dtype='float32'),
                                  dense_shape=(self.node_size, self.rel_size))
        rel_adj = tf.sparse_softmax(rel_adj)
        rel_features = tf.sparse_tensor_dense_matmul(rel_adj, rel_emb)

        ent_adj = K.cast(K.squeeze(inputs[6], axis=0), dtype="int64")
        ent_adj = tf.SparseTensor(indices=ent_adj,
                                  values=tf.ones_like(ent_adj[:, 0],
                                                      dtype='float32'),
                                  dense_shape=(self.node_size, self.node_size))
        ent_adj = tf.sparse_softmax(ent_adj)
        ent_features = tf.sparse_tensor_dense_matmul(ent_adj, ent_emb)

        features = K.concatenate([ent_features, rel_features])
        outputs = [self.activation(features)]

        for _ in range(self.depth):
            features_list = []
            for head in range(self.attn_heads):
                attention_kernel = self.attn_kernels[head]

                attn_for_rels = tf.SparseTensor(indices=sparse_indices,
                                                values=sparse_val,
                                                dense_shape=(self.triple_size,
                                                             self.rel_size))
                attn_for_rels = tf.squeeze(tf.sparse_tensor_dense_matmul(
                    attn_for_rels, K.dot(rel_emb, attention_kernel[2])),
                                           axis=-1)
                attn_for_rels = tf.SparseTensor(indices=adj.indices,
                                                values=attn_for_rels,
                                                dense_shape=adj.dense_shape)
                attn_for_self = K.dot(features, attention_kernel[0])
                attn_for_neighs = tf.transpose(
                    K.dot(features, attention_kernel[1]), [1, 0])

                att = tf.sparse_add(
                    tf.sparse_add(attn_for_rels, adj * attn_for_self),
                    adj * attn_for_neighs)

                att = tf.SparseTensor(indices=att.indices,
                                      values=tf.nn.leaky_relu(att.values),
                                      dense_shape=att.dense_shape)
                att = tf.sparse_softmax(att)
                new_features = tf.sparse_tensor_dense_matmul(att, features)

                if self.use_bias:
                    new_features = K.bias_add(new_features, self.biases[head])
                features_list.append(new_features)

            if self.attn_heads_reduction == 'concat':
                features = K.concatenate(features_list)
            else:
                features = K.mean(K.stack(features_list), axis=0)

            features = self.activation(features)
            outputs.append(features)
        outputs = K.concatenate(outputs)
        return [outputs, att.indices, att.values]
Exemple #46
0
    def call(self, inputs):
        # y, M = inputs

        y = inputs[0]
        contributors = inputs[1]
        weights = inputs[2]
        angles = inputs[3]

        if self.sync_mode is None or self.sync_mode is 'radial_sync':

            if K.ndim(y) == 3:
                y = K.expand_dims(y, axis=2)
                y = K.repeat_elements(y, rep=self.ndirs, axis=2)

            y_ = y

            nbatch = K.shape(y)[0]
            nchannels = K.shape(y)[-1]

            # synchronize with sync field

            y = window_interpolation_sync(y, contributors, weights, angles)

            # circular convolution

            y = gcnn_conv(y, self.kernel, nbatch, self.nv, self.nrings,
                          self.ndirs, self.nfilters, nchannels)

            # add contribution of central vertex

            y += K.dot(y_, self.center_kernel)

            if self.use_bias:
                y = K.bias_add(y, self.bias, data_format=None)

            #y = y + center

            if self.activation is not None:
                y = self.activation(y)

            if self.take_max:
                y = K.max(y, axis=2, keepdims=False)
        elif self.sync_mode is 'async':

            if K.ndim(y) == 4:
                y = K.max(y, axis=2, keepdims=False)
            y_ = y

            nbatch = K.shape(y)[0]
            nchannels = K.shape(y)[-1]

            # pull back the input to the fiber product of the tangent bundle by the frame bundle
            # by the frame transporter

            y = window_interpolation_async(y, contributors, weights)

            y = gcnn_conv(y, self.kernel, nbatch, self.nv, self.nrings,
                          self.ndirs, self.nfilters, nchannels)

            # add contribution of central vertex
            y_ = K.dot(y_, self.center_kernel)
            y_ = K.expand_dims(y_, axis=2)
            y_ = K.repeat_elements(y_, rep=self.ndirs, axis=2)
            y += y_

            if self.use_bias:
                y = K.bias_add(y, self.bias, data_format=None)

            if self.activation is not None:
                y = self.activation(y)

            if self.take_max:
                y = K.max(y, axis=2, keepdims=False)
        return y