Beispiel #1
0
    def step(self, x, states):
        
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]
 
        
        
        r_tm1 = r_tm1
        
        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
       # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
        op_t = op_t
        #op_t = op_t[:,0,:]
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)
        

       
        return o_t, [r_t, V_t, s_t, time] + h_t
 def call(self, x, mask=None):
     # x[0]: (batch_size, input_length, input_dim)
     # x[1]: (batch_size, 1) indices of prepositions
     # Optional: x[2]: (batch_size, input_length - 2)
     assert isinstance(x, list) or isinstance(x, tuple)
     encoded_sentence = x[0]
     prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
     batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
     if self.with_attachment_probs:
         # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
         head_probs = x[2]
         head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
         # (batch_size, input_length)
         padded_head_probs = K.concatenate([head_probs, head_probs_padding])
         # (batch_size, 1)
         max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
         # (batch_size, input_length, 1)
         max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
         # (batch_size, input_length, input_dim)
         masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
         # (batch_size, input_dim)
         head_encoding = K.sum(masked_head_encoding, axis=1)
     else:
         head_indices = prep_indices - 1  # (batch_size,)
         head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
     prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
     child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
     '''
     prep_indices = x[1]
     sentence_mask = mask[0]
     if sentence_mask is not None:
         if K.ndim(sentence_mask) > 2:
             # This means this layer came after a Bidirectional layer. Keras has this bug which
             # concatenates input masks instead of output masks.
             # TODO: Fix Bidirectional instead.
             sentence_mask = K.any(sentence_mask, axis=(-2, -1))
     head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
                                                                            prep_indices)
     '''
     head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, proj_dim)
     prep_projection = K.dot(prep_encoding, self.proj_prep)  # (batch_size, proj_dim)
     child_projection = K.dot(child_encoding, self.proj_child)  # (batch_size, proj_dim)
     #(batch_size, proj_dim)
     if self.composition_type == 'HPCT':
         composed_projection = K.tanh(head_projection + prep_projection + child_projection)
     elif self.composition_type == 'HPC':
         prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
         composed_projection = K.tanh(head_projection + prep_child_projection)
     else:
         # Composition type in HC
         composed_projection = K.tanh(head_projection + child_projection)
     for hidden_layer in self.hidden_layers:
         composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, proj_dim)
     # (batch_size, num_classes)
     class_scores = K.dot(composed_projection, self.scorer)
     label_probabilities = K.softmax(class_scores)
     return label_probabilities
Beispiel #3
0
def staircase_loss(y_true, y_pred, var_a=16.0, cnst=1.0/255.0):
    """ Keras Staircase Loss """
    height = cnst
    width = cnst
    var_x = K.clip(K.abs(y_true - y_pred) - 0.5 * cnst, 0.0, 1.0)
    loss = height*(K.tanh(var_a*((var_x/width)-tf.floor(var_x/width)-0.5)) /
                   (2.0*K.tanh(var_a/2.0)) + 0.5 + tf.floor(var_x/width))
    loss += 1e-10
    return K.mean(loss, axis=-1)
Beispiel #4
0
def logtanh(x, a=1):
    """
    log * tanh

    See Also: arcsinh
    """
    return K.tanh(x) *  K.log(2 + a * abs(x))
    def call(self, x, mask=None):
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        # print weigthted_input.shape
        return K.sum(weighted_input, axis=1)
    def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result
    def call(self, x):
        assert(K.backend() == 'tensorflow')
        temp = K.permute_dimensions(x, (0, 2, 1))
        for i in range(0, self.attention_depth):
            temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i])
        temp = K.permute_dimensions(temp, (0, 2, 1))
        estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1)
        biased_weight = estimated_weight + self.bias
        non_linear_weight = K.tanh(biased_weight)

        # For each hidded state calculate how much should it contribute
        # to the context vector. This is the main part of attention.
        # In order to convert weights to "probabilities" use a sigmoid
        # based function: exp(x) / sum(exp(xi)).
        prob = K.exp(non_linear_weight)
        # Compute the total sum for each batch.
        total_sum = K.sum(prob, axis=1, keepdims=True)
        prob /= K.cast(total_sum, K.floatx())

        # Enable this if you want access to internal probabilities.
        # Should only be used for testing that Attention works as expected.
        # return prob

        # Multiply each hidden value by the corresponding probability.
        prob = K.expand_dims(prob, -1)
        new_hidden_values = x * prob
        return K.sum(new_hidden_values, axis=1)
Beispiel #8
0
 def step(self, x, states):
     
     r_tm1, V_tm1,s_tm1,time = states[:4]
     h_tm1 = states[4:]
     
     def print_name_shape(name,x):
         return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32")
     
     
     r_tm1 = r_tm1 +  print_name_shape("out\nr_tm1",r_tm1) + \
                       print_name_shape("V_tm1",V_tm1) + \
                       print_name_shape("s_tm1",s_tm1) + \
                       print_name_shape("x",x) + \
                       print_name_shape("h_tm1_0",h_tm1[0]) + \
                       print_name_shape("h_tm1_1",h_tm1[1]) 
                      
     
     op_t, h_t = self._update_controller( T.concatenate([x, r_tm1], axis=-1),
                                          h_tm1)
           
    # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
     op_t = op_t + print_name_shape("afterop_t",op_t)
     #op_t = op_t[:,0,:]
     ao = K.dot(op_t, self.W_d)  
     ao = ao +print_name_shape("ao",ao)
     d_t = K.sigmoid( ao + self.b_d)  + print_name_shape("afterop2_t",op_t)
     u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)+ print_name_shape("d_t",op_t)
     v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape("u_t",u_t)
     o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape("v_t",v_t)
     
     o_t = o_t + print_name_shape("afterbulk_t",o_t)
     
     time = time + 1
     V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                          u_t[::,0], v_t,time[0],stack=self.stack)
     
     #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1)
     V_t  = V_t + print_name_shape("o_t",o_t) + \
                       print_name_shape("r_t",r_t) + \
                       print_name_shape("V_t",V_t) +\
                       print_name_shape("s_t",s_t) 
                     # T.cast( theano.printing.Print("time")(time[0]),"float32")
     #time = T.set_subtensor(time[0],time[0] +)
     
     
    
     return o_t, [r_t, V_t, s_t, time] + h_t
 def call(self, x, mask=None):
     eij = K.tanh(K.dot(x, self.W))
     
     ai = K.exp(eij)
     weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')
     
     weighted_input = x*weights.dimshuffle(0,1,'x')
     return weighted_input.sum(axis=1)
Beispiel #10
0
    def step(self, x, states):
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]

        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)

        return o_t, [r_t, V_t, s_t, time] + h_t
Beispiel #11
0
def tanh(x):
    """
    Tanh activation function.

    >>> tanh(0)
    0.0
    """
    return K.eval(K.tanh(K.variable(x))).tolist()
    def get_similarity(self):
        ''' Specify similarity in configuration under 'similarity_params' -> 'mode'
        If a parameter is needed for the model, specify it in 'similarity_params'

        Example configuration:

        config = {
            ... other parameters ...
            'similarity_params': {
                'mode': 'gesd',
                'gamma': 1,
                'c': 1,
            }
        }

        cosine: dot(a, b) / sqrt(dot(a, a) * dot(b, b))
        polynomial: (gamma * dot(a, b) + c) ^ d
        sigmoid: tanh(gamma * dot(a, b) + c)
        rbf: exp(-gamma * l2_norm(a-b) ^ 2)
        euclidean: 1 / (1 + l2_norm(a - b))
        exponential: exp(-gamma * l2_norm(a - b))
        gesd: euclidean * sigmoid
        aesd: (euclidean + sigmoid) / 2
        '''

        params = self.similarity_params
        similarity = params['mode']

        axis = lambda a: len(a._keras_shape) - 1
        dot = lambda a, b: K.batch_dot(a, b, axes=axis(a))
        l2_norm = lambda a, b: K.sqrt(K.sum((a - b) ** 2, axis=axis(a), keepdims=True))
        l1_norm = lambda a, b: K.sum(K.abs(a - b), axis=axis(a), keepdims=True)

        if similarity == 'cosine':
            return lambda x: dot(x[0], x[1]) / K.sqrt(dot(x[0], x[0]) * dot(x[1], x[1]))
        elif similarity == 'polynomial':
            return lambda x: (params['gamma'] * dot(x[0], x[1]) + params['c']) ** params['d']
        elif similarity == 'sigmoid':
            return lambda x: K.tanh(params['gamma'] * dot(x[0], x[1]) + params['c'])
        elif similarity == 'rbf':
            return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1]) ** 2)
        elif similarity == 'euclidean':
            return lambda x: 1 / (1 + l2_norm(x[0], x[1]))
        elif similarity == 'l1':
            return lambda x: -l1_norm(x[0], x[1])
        elif similarity == 'exponential':
            return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1]))
        elif similarity == 'gesd':
            euclidean = lambda x: 1 / (1 + l2_norm(x[0], x[1]))
            sigmoid = lambda x: 1 / (1 + K.exp(-1 * params['gamma'] * (dot(x[0], x[1]) + params['c'])))
            return lambda x: euclidean(x) * sigmoid(x)
        elif similarity == 'aesd':
            euclidean = lambda x: 0.5 / (1 + l2_norm(x[0], x[1]))
            sigmoid = lambda x: 0.5 / (1 + K.exp(-1 * params['gamma'] * (dot(x[0], x[1]) + params['c'])))
            return lambda x: euclidean(x) + sigmoid(x)
        else:
            raise Exception('Invalid similarity: {}'.format(similarity))
 def call(self, x, mask=None):
     # x: (batch_size, input_length, input_dim) where input_length = head_size + 2
     head_encoding = x[:, :-2, :]  # (batch_size, head_size, input_dim)
     prep_encoding = x[:, -2, :]  # (batch_size, input_dim)
     child_encoding = x[:, -1, :]  # (batch_size, input_dim)
     if self.composition_type == 'HPCD':
         # TODO: The following line may not work with TF.
         # (batch_size, head_size, input_dim, 1) * (1, head_size, input_dim, proj_dim)
         head_proj_prod = K.expand_dims(head_encoding) * K.expand_dims(self.dist_proj_head, dim=0)
         head_projection = K.sum(head_proj_prod, axis=2)  # (batch_size, head_size, proj_dim)
     else:
         head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, head_size, proj_dim)
     prep_projection = K.expand_dims(K.dot(prep_encoding, self.proj_prep), dim=1)  # (batch_size, 1, proj_dim)
     child_projection = K.expand_dims(K.dot(child_encoding, self.proj_child), dim=1)  # (batch_size, 1, proj_dim)
     #(batch_size, head_size, proj_dim)
     if self.composition_type == 'HPCT':
         composed_projection = K.tanh(head_projection + prep_projection + child_projection)
     elif self.composition_type == 'HPC' or self.composition_type == "HPCD":
         prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, 1, proj_dim)
         composed_projection = K.tanh(head_projection + prep_child_projection)
     else:
         # Composition type in HC
         composed_projection = K.tanh(head_projection + child_projection)
     for hidden_layer in self.hidden_layers:
         composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, head_size, proj_dim)
     # (batch_size, head_size)
     head_word_scores = K.squeeze(K.dot(composed_projection, self.scorer), axis=-1)
     if mask is None:
         attachment_probabilities = K.softmax(head_word_scores)  # (batch_size, head_size)
     else:
         if K.ndim(mask) > 2:
             # This means this layer came after a Bidirectional layer. Keras has this bug which
             # concatenates input masks instead of output masks.
             # TODO: Fix Bidirectional instead.
             mask = K.any(mask, axis=(-2, -1))
         # We need to do a masked softmax.
         exp_scores = K.exp(head_word_scores)  # (batch_size, head_size)
         head_mask = mask[:, :-2]  # (batch_size, head_size)
         # (batch_size, head_size)
         masked_exp_scores = switch(head_mask, exp_scores, K.zeros_like(head_encoding[:, :, 0]))
         # (batch_size, 1). Adding epsilon to avoid divison by 0. But epsilon is float64.
         exp_sum = K.cast(K.expand_dims(K.sum(masked_exp_scores, axis=1) + K.epsilon()), 'float32')
         attachment_probabilities = masked_exp_scores / exp_sum  # (batch_size, head_size)
     return attachment_probabilities
Beispiel #14
0
    def step(self, x, states):
        h_tild_tm1 = states[0]

        B_U = states[1]
        B_W = states[2]

        if self.consume_less == 'cpu':
            x_i = x[:, :self.output_dim]
            x_f = x[:, self.output_dim: 2 * self.output_dim]
            x_c = x[:, 2 * self.output_dim: 3 * self.output_dim]
            x_o = x[:, 3 * self.output_dim: 4 * self.output_dim]
            x_new = x[:, 4 * self.output_dim:]
        else:
            x_i = K.dot(x * B_W[0], self.W_i) + self.b_i
            x_f = K.dot(x * B_W[1], self.W_f) + self.b_f
            x_c = K.dot(x * B_W[2], self.W_c) + self.b_c
            x_o = K.dot(x * B_W[3], self.W_o) + self.b_o
            x_new = x

        # self.C_tape -> BT, t-1, k
        # self.H_tape -> BT, t-1, k

        # x -> BT, k 
        # h_tild_tm1 -> BT, k       

        if self.H_tape is None:
            self.H_tape = K.zeros_like(h_tild_tm1).dimshuffle((0,'x',1))
            self.C_tape = K.zeros_like(h_tild_tm1).dimshuffle((0,'x',1))

        # s_t -> BT, t-1, 1
        t = K.shape(self.C_tape)[1]

        sum1 = K.dot(self.H_tape, self.W_h)
        sum2 = K.dot(K.repeat_elements(x_new.dimshuffle((0,'x',1)),t, axis=1), self.W_x)
        sum3 = K.dot(K.repeat_elements(h_tild_tm1.dimshuffle((0,'x',1)),t, axis=1), self.W_h_tilde)
        tanhed_sum = K.tanh(sum1 + sum2 + sum3)    
        a_t = K.dot(tanhed_sum, self.v)[:,:,0]
        s_t = K.softmax(a_t)

        h_tilde_t = T.batched_dot(self.H_tape.dimshuffle((0,2,1)), s_t.dimshuffle((0,1,'x')))[:,:,0]
        c_tilde_t = T.batched_dot(self.C_tape.dimshuffle((0,2,1)), s_t.dimshuffle((0,1,'x')))[:,:,0]

        i = self.inner_activation(x_i + K.dot(h_tilde_t * B_U[0], self.U_i))
        f = self.inner_activation(x_f + K.dot(h_tilde_t * B_U[1], self.U_f))
        c_t = f * c_tilde_t + i * self.activation(x_c + K.dot(h_tilde_t * B_U[2], self.U_c))
        o = self.inner_activation(x_o + K.dot(h_tilde_t * B_U[3], self.U_o))

        h_t = o * self.activation(c_t)

        # Add to Tape
        self.C_tape = K.concatenate([self.C_tape, c_t.dimshuffle((0,'x',1))], axis=1)
        self.H_tape = K.concatenate([self.H_tape, h_t.dimshuffle((0,'x',1))], axis=1)

        return h_t, [h_tilde_t]
    def _additive_similarity(self, source, query):
        concatenation = K.concatenate([source, query], axis=2)
        nonlinearity = K.tanh(K.dot(concatenation, self._weights["w_a"]))
        
        # tile the weight vector (1, 1, dim) for each time step and each element of the batch -> (bs, T, dim)
        source_shape = K.shape(source)
        vaeff = K.tile(K.expand_dims(self._weights["v_a"], 0), [source_shape[0], source_shape[1], 1])

        similarity = K.batch_dot(K.permute_dimensions(vaeff, [0, 2, 1]), nonlinearity, axes=[1, 2])
        
        return similarity
    def call(self, x, mask=None):
        e = K.dot(x, self.W)
        if self.bias:
            e += self.b
        e = K.tanh(e)
        e = K.reshape(K.dot(e, self.U), (-1, self.timesteps))
        a = K.exp(e)
        if mask is not None:
            a *= K.cast(mask, K.floatx())
        a_weights = a / K.cast(K.sum(a, axis=-1, keepdims=True) + K.epsilon(), K.floatx())
        weighted_output = x * K.expand_dims(a_weights, axis=-1)

        return [K.mean(weighted_output, axis=1), a_weights]
Beispiel #17
0
 def get_w(self, x, mask=None):
     input_shape = K.int_shape(x)
     features_dim = self.features_dim
     step_dim = input_shape[1]
     eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))
     if self.bias:
         eij += self.b[:input_shape[1]]
     eij = K.tanh(eij)
     a = K.exp(eij)
     if mask is not None:
         a *= K.cast(mask, K.floatx())
     a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
     return a
    def call(self, x, mask=None):
        x_transpose = K.permute_dimensions(x, (0,2,1))
        e = K.dot(x_transpose, self.W)
        if self.bias:
            e += self.b
        e = K.tanh(e)
        if not self.simple:
            e = K.permute_dimensions(e, (0,2,1))
            e = K.reshape(K.dot(e, self.V), (-1, self.timesteps))
        else:
            e = K.mean(e, axis=1)
        a = K.exp(e)
        if mask is not None:
            a *= K.cast(mask, K.floatx())
        a_weights = a / K.cast(K.sum(a, axis=-1, keepdims=True) + K.epsilon(), K.floatx())
        weighted_output = x * K.expand_dims(a_weights, axis=-1)

        return [K.sum(weighted_output, axis=1), a_weights]
    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output
 def call(self, inputs, mask=None):
   if type(inputs) is not list or len(inputs) <= 1:
     raise Exception('BilinearTensorLayer must be called on a list of tensors '
                     '(at least 2). Got: ' + str(inputs))
   e1 = inputs[0]
   e2 = inputs[1]
   batch_size = K.shape(e1)[0]
   k = self.output_dim
   # print([e1,e2])
   feed_forward_product = K.dot(K.concatenate([e1,e2]), self.V)
   # print(feed_forward_product)
   bilinear_tensor_products = [ K.sum((e2 * K.dot(e1, self.W[0])) + self.b, axis=1) ]
   # print(bilinear_tensor_products)
   for i in range(k)[1:]:
     btp = K.sum((e2 * K.dot(e1, self.W[i])) + self.b, axis=1)
     bilinear_tensor_products.append(btp)
   result = K.tanh(K.reshape(K.concatenate(bilinear_tensor_products, axis=0), (batch_size, k)) + feed_forward_product)
   # print(result)
   return result
Beispiel #21
0
 def call(self, x, mask=None):
     mean = super(IntraAttention, self).call(x, mask)
     # x: (batch_size, input_length, input_dim)
     # mean: (batch_size, input_dim)
     ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0)  # (1, input_length)
     # (batch_size, input_length, input_dim)
     tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1))
     if mask is not None:
         if K.ndim(mask) > K.ndim(x):
             # Assuming this is because of the bug in Bidirectional. Temporary fix follows.
             # TODO: Fix Bidirectional.
             mask = K.any(mask, axis=(-2, -1))
         if K.ndim(mask) < K.ndim(x):
             mask = K.expand_dims(mask)
         x = switch(mask, x, K.zeros_like(x))
     # (batch_size, input_length, proj_dim)
     projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector))
     scores = K.dot(projected_combination, self.scorer)  # (batch_size, input_length)
     weights = K.softmax(scores)  # (batch_size, input_length)
     attended_x = K.sum(K.expand_dims(weights) * x, axis=1)  # (batch_size, input_dim)
     return attended_x
Beispiel #22
0
    def call(self, inputs, mask=None):
        if not isinstance(inputs, list) or len(inputs) <= 1:
            raise TypeError('Attention must be called on a list of tensors '
                            '(at least 2). Got: ' + str(inputs))
        # (None(batch), MaxLen(time), spec_dim, embed_dim)
        mix_embed_l = inputs[0]
        # (None(batch), embed_dim)
        spk_embed_l = inputs[1]
        energy = None
        if self.mode == 'dot':
            # (batch, time, spec_dim, embed_dim) batch_dot(3,1) (batch, embed_dim) = (batch, time, spec_dim)
            energy = K.batch_dot(mix_embed_l, spk_embed_l, axes=(3, 1))
        elif self.mode == 'align':
            # (batch, time, spec_dim, embed_dim) dot (embed_dim, align_hidden)
            # -> (batch, time, spec_dim, align_hidden)
            hUa = K.dot(mix_embed_l, self.U_align)
            # (batch, embed_dim) dot (embed_dim, align_hidden)
            # -> (batch, align_hidden)
            sWa = K.dot(spk_embed_l, self.W_align)
            sWa = sWa.dimshuffle(0, 'x', 'x', 1)
            # -> (batch, time, spec_dim, align_hidden)
            tanh_sWahUa = K.tanh(sWa+hUa)
            # -> (batch, time, spec_dim, align_hidden) dot (align_hidden, 1)
            # -> (batch, time, spec_dim, 1)
            energy = K.dot(tanh_sWahUa, self.v_align)
            # -> (batch, time, spec_dim)
            energy = K.reshape(energy, (-1, self.time_step, self.spec_dim))
        else:
            raise ValueError('Unknown merge mode.')

        if self.nonlinearity == 'sigmoid':
            alpha = K.sigmoid(energy)
        elif self.nonlinearity == 'linear':
            alpha = energy
        else:
            raise Exception('Unknown nonlinearity mode for attention:'+self.nonlinearity)
        # (batch, time, spec_dim)
        return alpha
    def call(self, x):
        assert(K.backend() == 'tensorflow')
        # The model is described by the following equations:
        #       estimated_weight_i = dot_product(hidden_state_i, W).
        #       biased_weight = estimated_weight + bias
        #       non_linear_weight = tanh(biased_weight)
        estimated_weight = K.squeeze(K.dot(x, K.expand_dims(self.W, -1)), -1)
        biased_weight = estimated_weight + self.bias
        non_linear_weight = K.tanh(biased_weight)

        # For each hidded state calculate how much should it contribute
        # to the context vector. This is the main part of attention.
        # In order to convert weights to "probabilities" use a sigmoid
        # based function: exp(x) / sum(exp(xi)).
        prob = K.exp(non_linear_weight)
        # Compute the total sum for each batch.
        total_sum = K.sum(prob, axis=1, keepdims=True)
        prob /= K.cast(total_sum, K.floatx())

        # Multiply each hidden value by the corresponding probability.
        prob = K.expand_dims(prob, -1)
        new_hidden_values = x * prob
        return K.sum(new_hidden_values, axis=1)
Beispiel #24
0
 def attention_step(self, x, args):
     ''' Attention step function
     #Arguments
         args: [h_,
                 context, projected_context, 
                 W_h_prj, 
                 w_prj_att, b_att,
                 W_x_h, U_h_h, W_ctx_h, b_h
                 W_x_p, W_h_p, W_ctx_p, b_p].
             h_: (batch_size, dim_hidden)
             context: (batch_size, nb_context, dim_context)
             projected_context: (batch_size, nb_context, dim_projected_context)
                 projected_context = context dot W_ctx_prj + b_ctx_prj
                 calculated before step.
             W_h_prj: (dim_hidden, dim_projected_context)
             w_prj_att: (dim_projected_context, 1)
             b_att: (1,)
             W_x_h: (dim_embedding, dim_hidden)
             U_h_h: (dim_hidden, dim_hidden)
             W_ctx_h: (dim_context, dim_hidden)
             b_h: (dim_hidden,)                
     '''
     assert len(args) == 1 + len(self.contexts) + len(self.params)
     [h_, context, projected_context, 
      W_h_prj, 
      w_prj_att, b_att,
      W_x_h, U_h_h, W_ctx_h, b_h] = args
     
     projected = K.expand_dims(K.dot(h_, W_h_prj), 1) + projected_context
     e = K.dot(K.tanh(projected), w_prj_att) + b_att
     alpha = K.softmax(K.flatten(e))
     weighted_context = K.sum((context * K.expand_dims(alpha)), 1)
     pre_act = K.dot(x, W_x_h) + K.dot(h_, U_h_h) + K.dot(weighted_context, W_ctx_h) + b_h
     h = K.sigmoid(pre_act)
     
 
     return h, [alpha, weighted_context], [h]
Beispiel #25
0
 def call(self, x):
     return self.alpha * K.tanh(self.beta * x)
Beispiel #26
0
def scaled_tanh(x):
	return K.tanh(x) * 640
Beispiel #27
0
def tanhNorm(x):
    square_sum = K.sum(K.square(x), axis=-1, keepdims=True)
    dist = K.sqrt(K.maximum(square_sum, K.epsilon()))
    tanh = K.tanh(dist)
    scale = tanh / dist
    return x * scale
Beispiel #28
0
def sigtan(x: tf.Tensor):
    feat_len = x.shape.as_list()[-1] // 2
    xsig = x[..., :feat_len]
    xtan = x[..., feat_len:]
    return K.sigmoid(xsig) * K.tanh(xtan)
Beispiel #29
0
    def __init__(self,
                 model,
                 intensity_range,
                 regularization,
                 input_shape,
                 init_cost,
                 steps,
                 mini_batch,
                 lr,
                 num_classes,
                 channels_first=CHANNELS_FIRST,
                 upsample_size=UPSAMPLE_SIZE,
                 attack_succ_threshold=ATTACK_SUCC_THRESHOLD,
                 patience=PATIENCE,
                 cost_multiplier=COST_MULTIPLIER,
                 reset_cost_to_zero=RESET_COST_TO_ZERO,
                 mask_min=MASK_MIN,
                 mask_max=MASK_MAX,
                 color_min=COLOR_MIN,
                 color_max=COLOR_MAX,
                 img_color=IMG_COLOR,
                 shuffle=SHUFFLE,
                 batch_size=BATCH_SIZE,
                 verbose=VERBOSE,
                 return_logs=RETURN_LOGS,
                 save_last=SAVE_LAST,
                 epsilon=EPSILON,
                 early_stop=EARLY_STOP,
                 early_stop_threshold=EARLY_STOP_THRESHOLD,
                 early_stop_patience=EARLY_STOP_PATIENCE,
                 save_tmp=SAVE_TMP,
                 tmp_dir=TMP_DIR,
                 raw_input_flag=RAW_INPUT_FLAG):

        assert intensity_range in {'imagenet', 'inception', 'mnist', 'raw'}
        assert regularization in {None, 'l1', 'l2'}

        self.model = model
        self.intensity_range = intensity_range
        self.regularization = regularization
        self.input_shape = input_shape
        self.channels_first = channels_first
        self.init_cost = init_cost
        self.steps = steps
        self.mini_batch = mini_batch
        self.lr = lr
        self.num_classes = num_classes
        self.upsample_size = upsample_size
        self.attack_succ_threshold = attack_succ_threshold
        self.patience = patience
        self.cost_multiplier_up = cost_multiplier
        self.cost_multiplier_down = cost_multiplier**1.5
        self.reset_cost_to_zero = reset_cost_to_zero
        self.mask_min = mask_min
        self.mask_max = mask_max
        self.color_min = color_min
        self.color_max = color_max
        self.img_color = img_color
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.verbose = verbose
        self.return_logs = return_logs
        self.save_last = save_last
        self.epsilon = epsilon
        self.early_stop = early_stop
        self.early_stop_threshold = early_stop_threshold
        self.early_stop_patience = early_stop_patience
        self.save_tmp = save_tmp
        self.tmp_dir = tmp_dir
        self.raw_input_flag = raw_input_flag

        if self.channels_first:
            mask_size = np.ceil(
                np.array(input_shape[1:], dtype=float) / upsample_size)
        else:
            mask_size = np.ceil(
                np.array(input_shape[0:2], dtype=float) / upsample_size)
        mask_size = mask_size.astype(int)
        self.mask_size = mask_size
        mask = np.zeros(self.mask_size)
        pattern = np.zeros(input_shape)
        if self.channels_first:
            mask = np.expand_dims(mask, axis=0)
        else:
            mask = np.expand_dims(mask, axis=2)

        mask_tanh = np.zeros_like(mask)
        pattern_tanh = np.zeros_like(pattern)

        # prepare mask related tensors
        self.mask_tanh_tensor = K.variable(mask_tanh)
        mask_tensor_unrepeat = (K.tanh(self.mask_tanh_tensor) /
                                (2 - self.epsilon) + 0.5)
        if self.channels_first:
            mask_tensor_unexpand = K.repeat_elements(mask_tensor_unrepeat,
                                                     rep=self.img_color,
                                                     axis=0)
        else:
            mask_tensor_unexpand = K.repeat_elements(mask_tensor_unrepeat,
                                                     rep=self.img_color,
                                                     axis=2)
        self.mask_tensor = K.expand_dims(mask_tensor_unexpand, axis=0)
        upsample_layer = UpSampling2D(size=(self.upsample_size,
                                            self.upsample_size))
        mask_upsample_tensor_uncrop = upsample_layer(self.mask_tensor)
        uncrop_shape = K.int_shape(mask_upsample_tensor_uncrop)[1:]
        if self.channels_first:
            cropping_layer = Cropping2D(
                cropping=((0, uncrop_shape[1] - self.input_shape[1]),
                          (0, uncrop_shape[2] - self.input_shape[2])))
        else:
            cropping_layer = Cropping2D(
                cropping=((0, uncrop_shape[0] - self.input_shape[0]),
                          (0, uncrop_shape[1] - self.input_shape[1])))
        self.mask_upsample_tensor = cropping_layer(mask_upsample_tensor_uncrop)
        reverse_mask_tensor = (K.ones_like(self.mask_upsample_tensor) -
                               self.mask_upsample_tensor)

        def keras_preprocess(x_input, intensity_range):

            if intensity_range is 'raw':
                x_preprocess = x_input

            elif intensity_range is 'imagenet':
                # 'RGB'->'BGR'
                x_tmp = x_input[..., ::-1]
                # Zero-center by mean pixel
                mean = K.constant([[[103.939, 116.779, 123.68]]])
                x_preprocess = x_tmp - mean

            elif intensity_range is 'inception':
                x_preprocess = (x_input / 255.0 - 0.5) * 2.0

            elif intensity_range is 'mnist':
                x_preprocess = x_input / 255.0

            else:
                raise Exception('unknown intensity_range %s' % intensity_range)

            return x_preprocess

        def keras_reverse_preprocess(x_input, intensity_range):

            if intensity_range is 'raw':
                x_reverse = x_input

            elif intensity_range is 'imagenet':
                # Zero-center by mean pixel
                mean = K.constant([[[103.939, 116.779, 123.68]]])
                x_reverse = x_input + mean
                # 'BGR'->'RGB'
                x_reverse = x_reverse[..., ::-1]

            elif intensity_range is 'inception':
                x_reverse = (x_input / 2 + 0.5) * 255.0

            elif intensity_range is 'mnist':
                x_reverse = x_input * 255.0

            else:
                raise Exception('unknown intensity_range %s' % intensity_range)

            return x_reverse

        # prepare pattern related tensors
        self.pattern_tanh_tensor = K.variable(pattern_tanh)
        self.pattern_raw_tensor = ((K.tanh(self.pattern_tanh_tensor) /
                                    (2 - self.epsilon) + 0.5) * 255.0)

        # prepare input image related tensors
        # ignore clip operation here
        # assume input image is already clipped into valid color range
        input_tensor = K.placeholder(model.input_shape)
        if self.raw_input_flag:
            input_raw_tensor = input_tensor
        else:
            input_raw_tensor = keras_reverse_preprocess(
                input_tensor, self.intensity_range)

        # IMPORTANT: MASK OPERATION IN RAW DOMAIN
        X_adv_raw_tensor = (
            reverse_mask_tensor * input_raw_tensor +
            self.mask_upsample_tensor * self.pattern_raw_tensor)

        X_adv_tensor = keras_preprocess(X_adv_raw_tensor, self.intensity_range)

        output_tensor = model(X_adv_tensor)

        y_true_tensor = K.placeholder(model.output_shape)

        # TODO: remove quick fixes.
        # output of mnist is (batch, 1, 10), so oddly we need to squeeze.
        # The output is also before softmax.
        if self.intensity_range == 'mnist':
            output_tensor = K.softmax(output_tensor)
            output_tensor = K.squeeze(output_tensor, axis=1)
            y_true_tensor = K.squeeze(y_true_tensor, axis=1)
            print(output_tensor.shape)
            print(y_true_tensor.shape)

        self.loss_acc = categorical_accuracy(output_tensor, y_true_tensor)

        self.loss_ce = categorical_crossentropy(output_tensor, y_true_tensor)

        if self.regularization is None:
            self.loss_reg = K.constant(0)
        elif self.regularization is 'l1':
            self.loss_reg = (K.sum(K.abs(self.mask_upsample_tensor)) /
                             self.img_color)
        elif self.regularization is 'l2':
            self.loss_reg = K.sqrt(
                K.sum(K.square(self.mask_upsample_tensor)) / self.img_color)

        cost = self.init_cost
        self.cost_tensor = K.variable(cost)
        self.loss = self.loss_ce + self.loss_reg * self.cost_tensor

        self.opt = Adam(lr=self.lr, beta_1=0.5, beta_2=0.9)
        self.updates = self.opt.get_updates(
            params=[self.pattern_tanh_tensor, self.mask_tanh_tensor],
            loss=self.loss)
        self.train = K.function(
            [input_tensor, y_true_tensor],
            [self.loss_ce, self.loss_reg, self.loss, self.loss_acc],
            updates=self.updates)

        pass
Beispiel #30
0
def triplet_tanh_pn_loss(y_true, y_pred):
    return K.mean(
        K.tanh(y_pred[:, 0, 0]) +
        ((K.constant(1) - K.tanh(y_pred[:, 1, 0])) +
         (K.constant(1) - K.tanh(y_pred[:, 2, 0]))) / K.constant(2))
    def __init__(
        self,
        model: KERAS_MODEL_TYPE,
        use_logits: bool = False,
        channels_first: bool = False,
        clip_values: Optional["CLIP_VALUES_TYPE"] = None,
        preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None,
        postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None,
        preprocessing: "PREPROCESSING_TYPE" = (0.0, 1.0),
        input_layer: int = 0,
        output_layer: int = 0,
        steps: int = 1000,
        init_cost: float = 1e-3,
        norm: Union[int, float] = 2,
        learning_rate: float = 0.1,
        attack_success_threshold: float = 0.99,
        patience: int = 5,
        early_stop: bool = True,
        early_stop_threshold: float = 0.99,
        early_stop_patience: int = 10,
        cost_multiplier: float = 1.5,
        batch_size: int = 32,
    ):
        """
        Create a Neural Cleanse classifier.

        :param model: Keras model, neural network or other.
        :param use_logits: True if the output of the model are logits; false for probabilities or any other type of
               outputs. Logits output should be favored when possible to ensure attack efficiency.
        :param channels_first: Set channels first or last.
        :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
               maximum values allowed for features. If floats are provided, these will be used as the range of all
               features. If arrays are provided, each value will be considered the bound for a feature, thus
               the shape of clip values needs to match the total number of features.
        :param preprocessing_defences: Preprocessing defence(s) to be applied by the classifier.
        :param postprocessing_defences: Postprocessing defence(s) to be applied by the classifier.
        :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be
               used for data preprocessing. The first value will be subtracted from the input. The input will then
               be divided by the second one.
        :param input_layer: The index of the layer to consider as input for models with multiple input layers. The layer
                            with this index will be considered for computing gradients. For models with only one input
                            layer this values is not required.
        :param output_layer: Which layer to consider as the output when the models has multiple output layers. The layer
                             with this index will be considered for computing gradients. For models with only one output
                             layer this values is not required.
        :param steps: The maximum number of steps to run the Neural Cleanse optimization
        :param init_cost: The initial value for the cost tensor in the Neural Cleanse optimization
        :param norm: The norm to use for the Neural Cleanse optimization, can be 1, 2, or np.inf
        :param learning_rate: The learning rate for the Neural Cleanse optimization
        :param attack_success_threshold: The threshold at which the generated backdoor is successful enough to stop the
                                         Neural Cleanse optimization
        :param patience: How long to wait for changing the cost multiplier in the Neural Cleanse optimization
        :param early_stop: Whether or not to allow early stopping in the Neural Cleanse optimization
        :param early_stop_threshold: How close values need to come to max value to start counting early stop
        :param early_stop_patience: How long to wait to determine early stopping in the Neural Cleanse optimization
        :param cost_multiplier: How much to change the cost in the Neural Cleanse optimization
        :param batch_size: The batch size for optimizations in the Neural Cleanse optimization
        """
        import keras.backend as K
        from keras.losses import categorical_crossentropy
        from keras.metrics import categorical_accuracy

        super().__init__(
            model=model,
            use_logits=use_logits,
            channels_first=channels_first,
            clip_values=clip_values,
            preprocessing_defences=preprocessing_defences,
            postprocessing_defences=postprocessing_defences,
            preprocessing=preprocessing,
            input_layer=input_layer,
            output_layer=output_layer,
            steps=steps,
            init_cost=init_cost,
            norm=norm,
            learning_rate=learning_rate,
            attack_success_threshold=attack_success_threshold,
            early_stop=early_stop,
            early_stop_threshold=early_stop_threshold,
            early_stop_patience=early_stop_patience,
            patience=patience,
            cost_multiplier=cost_multiplier,
            batch_size=batch_size,
        )
        mask = np.random.uniform(size=super().input_shape)
        pattern = np.random.uniform(size=super().input_shape)
        self.epsilon = K.epsilon()

        # Normalize mask between [0, 1]
        self.mask_tensor_raw = K.variable(mask)
        # self.mask_tensor = K.expand_dims(K.tanh(self.mask_tensor_raw) / (2 - self.epsilon) + 0.5, axis=0)
        self.mask_tensor = K.tanh(self.mask_tensor_raw) / (2 - self.epsilon) + 0.5

        # Normalize pattern between [0, 1]
        self.pattern_tensor_raw = K.variable(pattern)
        self.pattern_tensor = K.expand_dims(K.tanh(self.pattern_tensor_raw) / (2 - self.epsilon) + 0.5, axis=0)

        reverse_mask_tensor = K.ones_like(self.mask_tensor) - self.mask_tensor
        input_tensor = K.placeholder(model.input_shape)
        x_adv_tensor = reverse_mask_tensor * input_tensor + self.mask_tensor * self.pattern_tensor

        output_tensor = self.model(x_adv_tensor)
        y_true_tensor = K.placeholder(model.outputs[0].shape.as_list())

        self.loss_acc = categorical_accuracy(output_tensor, y_true_tensor)
        self.loss_ce = categorical_crossentropy(output_tensor, y_true_tensor)

        if self.norm == 1:
            # TODO: change 3 to dynamically set img_color
            self.loss_reg = K.sum(K.abs(self.mask_tensor)) / 3
        elif self.norm == 2:
            self.loss_reg = K.sqrt(K.sum(K.square(self.mask_tensor)) / 3)

        self.cost = self.init_cost
        self.cost_tensor = K.variable(self.cost)
        self.loss_combined = self.loss_ce + self.loss_reg * self.cost_tensor

        try:
            from keras.optimizers import Adam

            self.opt = Adam(lr=self.learning_rate, beta_1=0.5, beta_2=0.9)
        except ImportError:
            from keras.optimizers import adam_v2

            self.opt = adam_v2.Adam(lr=self.learning_rate, beta_1=0.5, beta_2=0.9)
        self.updates = self.opt.get_updates(
            params=[self.pattern_tensor_raw, self.mask_tensor_raw], loss=self.loss_combined
        )
        self.train = K.function(
            [input_tensor, y_true_tensor],
            [self.loss_ce, self.loss_reg, self.loss_combined, self.loss_acc],
            updates=self.updates,
        )
Beispiel #32
0
def hierarchical_containment_layer_sum_activation(x):
    return K.tanh(K.sum(x, axis=-1, keepdims=True))
Beispiel #33
0
 def call(self, x):
     e_t = kb.squeeze(kb.tanh(kb.dot(x, self.w) + self.b), axis=-1)
     a_t = kb.softmax(e_t)
     a_t = kb.expand_dims(a_t, axis=-1)
     return kb.sum(x * a_t, axis=1)
Beispiel #34
0
 def call(self, inputs):
     x = K.permute_dimensions(inputs, (0, 2, 1))
     a = K.softmax(K.tanh(K.dot(x, self.W) + self.b))
     outputs = K.permute_dimensions(a * x, (0, 2, 1))
     outputs = K.sum(outputs, axis=1)
     return outputs
Beispiel #35
0
    def call(self, inputs, **kwargs):
        pos = keras.activations.relu(inputs)
        neg = self.alpha * K.tanh(-self.beta * keras.activations.relu(-inputs))

        return pos + neg
def mean_distance(y_true, y_pred):
    return K.sqrt(K.mean(K.pow(y_true[:, 2] - K.tanh(y_pred[:, 2]), 2)))
def mixed_loss(target, output):
    loss1 = K.binary_crossentropy(K.sigmoid(output[:, 0]), target[:, 0])
    loss2 = K.binary_crossentropy(K.sigmoid(output[:, 1]), target[:, 1])
    loss3 = mean_squared_error(K.tanh(output[:, 2]), target[:, 2])

    return loss1 + loss2 + loss3
 def call(self, x):
     et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1)
     at = K.softmax(et)
     at = K.expand_dims(at, axis=-1)
     output = x * at
     return K.sum(output, axis=1)
Beispiel #39
0
def NewTanh(x):
    return K.tanh(x)
Beispiel #40
0
    def call(self, P, **kwargs):
        """
        :param P: inputs
        :return: encoding of inputs P
        """
        ''' Paper notations in the code '''
        # P = P_hw
        # itr_attn = P_itrAtt
        # encoding = P_enc
        # The paper takes inputs to be P(_hw) as an example and then computes the same thing for H,
        # therefore we'll name our inputs P too.

        # Input of encoding is P with shape (batch, p, d). It would be (batch, h, d) for hypothesis
        # Construct alphaP of shape (batch, p, 3*d, p)
        # A = dot(w_itr_att, alphaP)

        # alphaP consists of 3*d rows along 2nd axis
        # 1. up   -> first  d items represent P[i]
        # 2. mid  -> second d items represent P[j]
        # 3. down -> final items represent alpha(P[i], P[j]) which is element-wise product of P[i] and P[j] = P[i]*P[j]

        # If we look at one slice of alphaP we'll see that it has the following elements:
        # ----------------------------------------
        # P[i][0], P[i][0], P[i][0], ... P[i][0]   ▲
        # P[i][1], P[i][1], P[i][1], ... P[i][1]   |
        # P[i][2], P[i][2], P[i][2], ... P[i][2]   |
        # ...                              ...     | up
        #      ...                         ...     |
        #             ...                  ...     |
        # P[i][d], P[i][d], P[i][d], ... P[i][d]   ▼
        # ----------------------------------------
        # P[0][0], P[1][0], P[2][0], ... P[p][0]   ▲
        # P[0][1], P[1][1], P[2][1], ... P[p][1]   |
        # P[0][2], P[1][2], P[2][2], ... P[p][2]   |
        # ...                              ...     | mid
        #      ...                         ...     |
        #             ...                  ...     |
        # P[0][d], P[1][d], P[2][d], ... P[p][d]   ▼
        # ----------------------------------------
        #                                          ▲
        #                                          |
        #                                          |
        #               up * mid                   | down
        #          element-wise product            |
        #                                          |
        #                                          ▼
        # ----------------------------------------

        # For every slice(i) the up part changes its P[i] values
        # The middle part is repeated p times in depth (for every i)
        # So we can get the middle part by doing the following:
        # mid = broadcast(P) -> to get tensor of shape (batch, p, d, p)
        # As we can notice up is the same mid, but with changed axis, so to obtain up from mid we can do:
        # up = swap_axes(mid, axis1=0, axis2=2)

        ''' Alpha '''
        # P                                                     # (batch, p, d)
        mid = broadcast_last_axis(P)                            # (batch, p, d, p)
        up = K.permute_dimensions(mid, pattern=(0, 3, 2, 1))    # (batch, p, d, p)
        alphaP = K.concatenate([up, mid, up * mid], axis=2)     # (batch, p, 3d, p)
        A = K.dot(self.w_itr_att, alphaP)                       # (batch, p, p)

        ''' Self-attention '''
        # P_itr_attn[i] = sum of for j = 1...p:
        #                           s = sum(for k = 1...p:  e^A[k][j]
        #                           ( e^A[i][j] / s ) * P[j]  --> P[j] is the j-th row, while the first part is a number
        # So P_itr_attn is the weighted sum of P
        # SA is column-wise soft-max applied on A
        # P_itr_attn[i] is the sum of all rows of P scaled by i-th row of SA
        SA = softmax(A, axis=2)        # (batch, p, p)
        itr_attn = K.batch_dot(SA, P)  # (batch, p, d)

        ''' Fuse gate '''
        # These layers are considered linear in the official implementation, therefore we apply dropout on each input
        P_concat = K.concatenate([P, itr_attn], axis=2)                         # (batch, p, 2d)
        z = K.tanh(K.dot(DecayingDropout()(P_concat), self.w1) + self.b1)       # (batch, p, d)
        r = K.sigmoid(K.dot(DecayingDropout()(P_concat), self.w2) + self.b2)    # (batch, p, d)
        f = K.sigmoid(K.dot(DecayingDropout()(P_concat), self.w3) + self.b3)    # (batch, p, d)

        encoding = r * P + f * z        # (batch, p, d)
        return encoding                 # (batch, p, d)
Beispiel #41
0
Datei: NAC.py Projekt: acyrl/NALU
 def call(self, inputs):
     W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat)
     return K.dot(inputs, W)
Beispiel #42
0
def gelu_tanh(x):
    """基于Tanh近似计算的gelu函数
    """
    cdf = 0.5 * (1.0 + K.tanh(
        (np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3)))))
    return x * cdf
Beispiel #43
0
 def call(self, x):
     prog = K.tanh(self.W)
     out = x + prog
     return out
 def call(self, inputs, **kwargs):
     W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat)
     a = K.dot(inputs, W)
     return a
Beispiel #45
0
def mish(x):
    return x*K.tanh(K.softplus(x))
Beispiel #46
0
def scaled_hyperbolic_tangent(x):
    return K.tanh((2 / 3) * x) * 1.7159
 def call(self, inputs):
     return inputs * K.tanh(K.softplus(inputs))
Beispiel #48
0
 def call(self, x):
   weights=K.transpose(K.tanh(K.dot(self.u,self.tinyW)))
   return K.dot(x,weights)
Beispiel #49
0
 def call(self, inputs):
     a = K.dot(inputs[0], self.kernel)
     y_trans = K.permute_dimensions(inputs[1], (0,2,1))
     b = K.batch_dot(a, y_trans, axes=[2,1])
     print K.int_shape(b)
     return K.tanh(b)
def gelu(x):
    c = math.sqrt(2 / math.pi)
    return 0.5 * x * (1 + K.tanh(c * (x + 0.044715 * K.pow(x, 3))))
Beispiel #51
0
def gelu_tanh(x):
    cdf = 0.5 * (1.0 + K.tanh(
        (np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3)))))
    return x * cdf
 def call(self, x):
     u_it = K.tanh(K.dot(x, self.W) + self.b)
     a_it = K.dot(u_it, self.u)
     a_it = K.squeeze(a_it, -1)
     a_it = K.softmax(a_it)
     return a_it
Beispiel #53
0
 def call(self, x, mask=None):
     eij = K.tanh(K.dot(x, self.W))
     ai = K.exp(eij)
     weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')
     weighted_input = x*weights.dimshuffle(0,1,'x')
     return weighted_input.sum(axis=1)
Beispiel #54
0
def mean_length_error(y_true, y_pred):
    y_true_f = K.sum(K.round(K.flatten(y_true)))
    y_pred_f = K.sum(K.round(K.flatten(y_pred)))
    delta = (y_pred_f - y_true_f)
    return K.mean(K.tanh(delta))
Beispiel #55
0
 def call(self, inputs, **kwargs):
     results = inputs * K.tanh(K.softplus(inputs))
     return results
Beispiel #56
0
 def call(self, x):
     tmp = K.tanh(K.dot(x[0], self.w1) + K.dot(x[1], self.w2))
     z = K.sigmoid(K.dot(tmp, self.w3))
     x_new = z * x[0] + (1 - z) * x[1]
     return x_new