Ejemplo n.º 1
0
def time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None, activation='linear'):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    activation = activations.get(activation)

    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(activation(x), (-1, timesteps, output_dim))
    return x
Ejemplo n.º 2
0
 def get_constants(self, x):
     constants = []
     if 0 < self.dropout_U < 1:
         ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
         ones = K.concatenate([ones] * self.output_dim, 1)
         B_U = K.in_train_phase(K.dropout(ones, self.dropout_U), ones)
         constants.append(B_U)
     else:
         constants.append(K.cast_to_floatx(1.))
     if self.consume_less == 'cpu' and 0 < self.dropout_W < 1:
         input_shape = self.input_spec[0].shape
         input_dim = input_shape[-1]
         ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
         ones = K.concatenate([ones] * input_dim, 1)
         B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
         constants.append(B_W)
     else:
         constants.append(K.cast_to_floatx(1.))
     return constants
 def dot_product_attention(self, x, seq_len=None, dropout=0.1, training=None):
     q, k, v = x
     logits = tf.matmul(q, k, transpose_b=True)
     if self.bias:
         logits += self.b
     if seq_len is not None:
         logits = self.mask_logits(logits, seq_len)
     weights = tf.nn.softmax(logits, name="attention_weights")
     weights = K.in_train_phase(K.dropout(weights, dropout), weights, training=training)
     x = tf.matmul(weights, v)
     return x
Ejemplo n.º 4
0
    def get_constants(self, x):
        constants = []
        if 0 < self.dropout_U < 1:
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
            ones = K.concatenate([ones] * self.output_dim, 1)
            B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)]
            constants.append(B_U)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(3)])

        if 0 < self.dropout_W < 1:
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
            ones = K.concatenate([ones] * input_dim, 1)
            B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)]
            constants.append(B_W)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(3)])
        return constants
Ejemplo n.º 5
0
 def call(self, inputs, **kwargs):
     main_input, embedding_matrix = inputs
     input_shape_tensor = K.shape(main_input)
     last_input_dim = K.int_shape(main_input)[-1]
     emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix)
     projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.projection)
     if self.add_biases:
         projected = K.bias_add(projected, self.biases, data_format='channels_last')
     if 0 < self.projection_dropout < 1:
         projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training'))
     attention = K.dot(projected, K.transpose(embedding_matrix))
     if self.scaled_attention:
         sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx())
         attention = attention / sqrt_d
     result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim))
     return result
Ejemplo n.º 6
0
    def call(self, inputs, **kwargs):
        feature_num = self.feature_num
        [v, q] = inputs
        v_proj = K.tanh(K.dot(v, self.v_proj))
        q_proj = K.tanh(K.dot(q, self.q_proj))
        q_proj = K.expand_dims(q_proj, 1)
        q_proj = tf.tile(q_proj, [1, feature_num, 1])
        joint_repr = v_proj * q_proj
        joint_repr = K.dropout(joint_repr, self.drop_rate)

        logit = K.dot(joint_repr, self.linear)
        logit = K.reshape(logit, shape=[-1, feature_num])
        logit = K.softmax(K.l2_normalize(logit))  #[batch,K]
        logit = K.expand_dims(logit, -1)

        self.result = K.sum(logit * v, 1)  #v:[batch,K,4096]
        return self.result
Ejemplo n.º 7
0
def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
    x: input tensor.
    w: weight matrix.
    b: optional bias vector.
    dropout: wether to apply dropout (same dropout mask
        for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
        # Returns
            Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]
    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)
    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Ejemplo n.º 8
0
    def call(self, inputs, train=True, cache=None):
        assert isinstance(inputs, (list, tuple)) and len(inputs) == 3
        x = inputs[0]
        y = inputs[1]
        bias = inputs[2]

        q = self.q_dense_layer(x)
        k = self.k_dense_layer(y)
        v = self.v_dense_layer(y)

        if cache is not None:
            # Combine cached keys and values with new keys and values.
            k = K.concatenate([cache["k"], k], axis=1)
            v = K.concatenate([cache["v"], v], axis=1)

            # Update cache
            cache["k"] = k
            cache["v"] = v

        # [batch_size, seq_len, hidden_size]

        # Split q, k, v into heads.
        q = self.split_heads(q)
        k = self.split_heads(k)
        v = self.split_heads(v)

        # Scale q to prevent the dot product between q and k from growing too large.
        depth = (self.hidden_size // self.num_heads)
        q *= depth ** -0.5

        # Calculate dot product attention
        logits = K2.matmul(q, K.permute_dimensions(k, (0, 1, 3, 2)))
        logits += bias
        weights = K.softmax(logits, axis=-1)
        if train:
            weights = K.dropout(weights, self.attention_dropout)
        attention_output = K2.matmul(weights, v)

        # Recombine heads --> [batch_size, length, hidden_size]
        attention_output = self.combine_heads(attention_output)

        # Run the combined outputs through another linear projection layer.
        attention_output = self.output_dense_layer(attention_output)
        return attention_output
Ejemplo n.º 9
0
def train(num_classes=100, epochs=100, reps=1):
    (x_train, y_train) = load_data(num_classes,reps)

    model = Sequential()
    model.add(Lambda(lambda x: K.dropout(x, level=0.9), input_shape=input_shape))#permanent dropout

    model.add(Conv2D(32, kernel_size=(3, 3), kernel_initializer='uniform'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(64, (3, 3),  kernel_initializer='uniform'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    
    model.add(Dense(128,  kernel_initializer='uniform'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dense(num_classes, activation='softmax'))
    
#    model.load_weights("saved_bn.hdf5")
    model.summary()

    
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True),
                  metrics=['accuracy'])
    #add callbacks
    tensorboard = TensorBoard(log_dir="logs/{}".format(time()), histogram_freq=10, write_graph=True, write_images=True)
    checkpoint = ModelCheckpoint("saved_bn.hdf5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    stopper = EarlyStopping(monitor='val_acc', min_delta=0.01, patience=10, verbose=1, mode='auto')
    
    model.fit(x_train, y_train,
        batch_size=100,
        epochs=epochs,
        verbose=1,
        shuffle=True,
        validation_data=(x_train,y_train),
        callbacks=[tensorboard, checkpoint, stopper])
    
    return model
Ejemplo n.º 10
0
def time_distributed_dense(x, w, b=None, dropout=None,
                            input_dim=None, output_dim=None,
                            timesteps=None, training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Ejemplo n.º 11
0
    def step_do(self, step_in, states):  # 定义每一步的迭代

        in_value = step_in
        if 0 < self.dropout < 1.:
            self._dropout_mask = K.in_train_phase(
                K.dropout(K.ones_like(step_in), self.dropout),
                K.ones_like(step_in))
        if 0 < self.dropout < 1.:
            in_value = step_in * self._dropout_mask
        '''    
        g1 = states[0][:, :-18]
        g2 = states[1][:, :-18]
        g3 = in_value[:, :-18]
        
        d1 = K.sigmoid(K.sqrt(K.sum((K.square(g3-g1)),axis=-1,keepdims=True)))
        d2 = K.sigmoid(K.sqrt(K.sum((K.square(g3-g2)),axis=-1,keepdims=True)))
        
        d1 = K.sigmoid(K.sum((K.abs(in_value-states[0])/self.units),axis=-1,keepdims=True))
        d2 = K.sigmoid(K.sum((K.abs(in_value-states[1])/self.units),axis=-1,keepdims=True))
        '''

        d1 = K.sigmoid(states[0] * in_value) / 2
        d2 = K.sigmoid(states[1] * in_value) / 2
        #update=1#K.sigmoid(K.dot(states[0], self.state_kernel)+ K.dot(step_in, self.input_kernel))
        ''''''
        #print('d1.shape',d1.shape)
        state1 = d1 * states[0] + (1 - d1) * in_value
        print('state1.shape', state1.shape)
        state2 = (1 - d2) * states[1] + d2 * in_value

        #outputs = (1-update)*states[0]+update*step_in
        '''
        lt = K.expand_dims(state1,axis=-2)
        st = K.expand_dims(state2,axis=-2)
        outputs = K.concatenate([lt, st], axis=-2)
        
        out1 = K.dot(state1, self.encode_kernel)
        out2 = K.dot(state2, self.encode_kernel)
        '''
        outputs = K.concatenate([state1, state2], axis=-1)
        #outputs = K.relu(outputs)

        return outputs, [state1, state2]
def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # maybe below is more clear implementation compared to older keras
    # at least it works the same for tensorflow, but not tested on other backends
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    return x
    def _call_attention(self, Key, Value, Query):
        r"""self-attention就是通过相似度函数计算得的相似矩阵过softmax后与自身点乘得到

        .. math::  A = Softmax(Similarity(Source,Query))
        .. math::  C = A \cdot Source
        """
        if isinstance(self.similarity, Callable):
            sim = self.similarity(Key, Query)
        else:
            sim = getattr(self, self.similarity)(Key, Query)
        sm = activations.softmax(sim)
        if self.dropout_rate:
            sm = K.dropout(sm, self.dropout_rate)
        if isinstance(self.mergfunc, Callable):
            result = self.mergfunc(sm, Value)
        elif isinstance(self.mergfunc, str):
            result = getattr(self, self.mergfunc, 'batch_dot_merg')(sm, Value)
        else:
            result = getattr(self, 'batch_dot_merg')(sm, Value)
        return result
Ejemplo n.º 14
0
        def deconv2d(layer_input,
                     filters,
                     f_size=8,
                     dropout_rate=0,
                     permanent=False):
            """Layers used during upsampling"""
            u = UpSampling2D(size=2)(layer_input)
            u = Conv2D(filters,
                       kernel_size=f_size,
                       strides=1,
                       padding='same',
                       activation='relu')(u)
            if dropout_rate and not permanent:
                u = Dropout(dropout_rate)(u)
            elif dropout_rate and permanent:
                # permanent droput from my main man fchollet <3
                u = Lambda(lambda x: K.dropout(x, level=dropout_rate))(u)

            u = BatchNormalization(momentum=0.8)(u)
            return u
Ejemplo n.º 15
0
    def call(self, x, mask=None):
        if mask is None:
            mask = T.mean(T.ones_like(x), axis=-1)
        mask = T.cast(mask, T.floatx())

        dr_perc = 0.5
        mask1 = T.dropout(mask, level=dr_perc)
        mask1 = T.clip(mask1, 0, 1)

        mod_smax = T.max(x[:, :, 0] * mask1, axis=1).dimshuffle(0, 'x')
        smax = T.max(x[:, :, 0] * mask,
                     axis=1).dimshuffle(0, 'x')  #(nb_samples, np_features)
        smin = T.min(x[:, :, 0] * mask,
                     axis=1).dimshuffle(0, 'x')  #(nb_samples, np_features)

        #        mod_smax=T.expand_dims(T.max(x[:,:,0]*mask1,axis=1), 1)
        #        smax = T.expand_dims(T.max(x[:,:,0]*mask,axis=1), 1) #(nb_samples, np_features)
        #        smin = T.expand_dims(T.min(x[:,:,0]*mask,axis=1), 1) #(nb_samples, np_features)

        x_rounded = x[:, :, 0] * mask
        sum_unmasked = T.batch_dot(x_rounded, mask,
                                   axes=1)  # (nb_samples,np_features)

        ssum = T.sum(x, axis=-2)  #(nb_samples, np_features)
        rcnt = T.sum(
            mask, axis=-1, keepdims=True
        )  #(nb_samples) # number of unmasked samples in each record
        bag_label = sum_unmasked / rcnt
        smean = ssum / rcnt

        #        # sigmoid weighted mean:
        #        weight_fn=T.reshape(T.transpose(T.tile(T.reshape(T.variable(get_weight_fn(100)),(100,1)),T.shape(x)[0])),(T.shape(x)[0],T.shape(x)[1],1))
        #        weighted_x=weight_fn*x
        #        wsum=T.sum(weighted_x,axis=-2) #(nb_samples, np_features)
        ##        weight_sum=T.reshape(T.batch_dot(T.ones_like(x),weight_fn,axes=1),T.shape(rcnt)) # used T.ones_like(x) instead of x to check if I am seeing the outputs..which helped me debug
        #        wmean=wsum # because the weights are normalized

        #        sofmax=(1/largeNum)*T.log(T.sum(T.exp()))

        #        return bag_label
        return smax  # max voting
Ejemplo n.º 16
0
    def call(self, inputs, mask=None):
        assert len(inputs) == 2
        query = K.bias_add(K.dot(inputs[0], self.W_query), self.bias_query)
        if self.query_act is not None:
            query = self.query_act(query)
        key = K.bias_add(K.dot(inputs[1], self.W_key), self.bias_key)
        if self.key_act is not None:
            key = self.key_act(key)
        value = K.bias_add(K.dot(inputs[1], self.W_value), self.bias_value)
        if self.value_act is not None:
            value = self.value_act(value)

        query = K.reshape(query, shape=(-1, K.int_shape(inputs[0])[1], self.num_attention_heads, self.size_per_head))
        query = K.permute_dimensions(query, pattern=(0,2,1,3))
        key = K.reshape(key, shape=(-1, K.int_shape(inputs[1])[1], self.num_attention_heads, self.size_per_head))
        key = K.permute_dimensions(key, pattern=(0,2,1,3))
        value = K.reshape(value, shape=(-1, K.int_shape(inputs[1])[1], self.num_attention_heads, self.size_per_head))
        value = K.permute_dimensions(value, pattern=(0,2,1,3))

        attention_scores = K.batch_dot(query, key, axes=(3,3))
        attention_scores /= np.sqrt(self.size_per_head)

        if mask is not None and mask != [None, None]:
            mask_q, mask_k = mask
            mask_q = K.cast(mask_q, K.floatx())
            mask_k = K.cast(mask_k, K.floatx())
            mask_q = K.expand_dims(mask_q)
            mask_k = K.expand_dims(mask_k)
            attention_mask = K.batch_dot(mask_q, mask_k, axes=(-1,-1))
            attention_mask = K.expand_dims(attention_mask, axis=1)
            adder = (1 - attention_mask) * (-10000.0)
            attention_scores += adder

        attention_probs = K.softmax(attention_scores, axis=-1)
        attention_probs = K.dropout(attention_probs, self.attention_probs_dropout_prob)

        context = K.batch_dot(attention_probs, value, axes=(3,2))
        context = K.permute_dimensions(context, pattern=(0,2,1,3))
        context = K.reshape(context, shape=(-1, K.int_shape(inputs[0])[1], self.num_attention_heads*self.size_per_head))

        return context
def time_distributed_dense(input_tensor,
                           weight,
                           bias=None,
                           timesteps=None,
                           input_dim=None,
                           output_dim=None,
                           dropout=None,
                           training=None):
    """Apply t.weight + bias for every t of timesteps of input
        input_tensor: input tensor shape = (batch num, timestep, input_dim)
        weight: weight tensor = (input_dim, output_dim)
        bias: optional bias
        dropout: dropout value
        training: training phase boolean
    """
    if timesteps is None:
        timesteps = K.shape(input_tensor)[1]
    if input_dim is None:
        input_dim = K.shape(input_tensor)[2]
    if output_dim is None:
        output_dim = K.shape(weight)[1]

    if dropout is not None and 0. < dropout < 1.:
        #apply dropout at every timestep
        ones = K.ones_like(K.reshape(input_tensor[:, 0, :], (-1, input_dim)))
        dropout_tensor = K.dropout(ones, dropout)
        dropout_tensor_with_timestep = K.repeat(dropout_tensor, timesteps)
        input_tensor = K.in_train_phase(input_tensor *
                                        dropout_tensor_with_timestep,
                                        input_tensor,
                                        training=training)

    #collapse timestep and batch num together
    input_tensor = K.reshape(input_tensor, (-1, input_dim))
    input_tensor = K.dot(input_tensor, weight)
    if bias is not None:
        input_tensor = K.bias_add(input_tensor, bias)

    output_tensor = K.reshape(input_tensor, (-1, timesteps, output_dim))
    return output_tensor
Ejemplo n.º 18
0
def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
    """Apply `y . w + b` for every temporal slice y of x.

    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Ejemplo n.º 19
0
def build_model(loss="mse", num_outputs=1):
    model = Sequential()

    model.add(
        ConvLSTM2D(filters=100,
                   kernel_size=(3, 3),
                   input_shape=(None, nx, ny, 1),
                   padding='same',
                   return_sequences=False))
    model.add(BatchNormalization())
    model.add(Lambda(lambda x: K.dropout(x, level=0.75)))

    model.add(
        Dense(
            units=num_outputs,
            kernel_regularizer=regularizers.l2(0.0001),
        ))
    model.add(Activation("linear"))

    model.compile(loss=loss, optimizer='nadam')

    return model
Ejemplo n.º 20
0
    def sample_h_given_x(self, x):
        """
        Draw sample from p(h|x).

        For Bernoulli RBM the conditional probability distribution can be derived to be 
           p(h_j=1|x) = sigmoid(x^T W[:,j] + bh_j).
        """
        h_pre = K.dot(x, self.W) + self.bh          # pre-sigmoid (used in cross-entropy error calculation for better numerical stability)
        #h_sigm = K.sigmoid(h_pre)              # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value
	h_sigm = self.activation(self.scaling_h_given_x * h_pre)

	# drop out noise
	if(0.0 < self.p < 1.0):
             noise_shape = self._get_noise_shape(h_sigm)
             h_sigm = K.in_train_phase(K.dropout(h_sigm, self.p, noise_shape), h_sigm)

        h_samp = random_binomial(shape=h_sigm.shape, n=1, p=h_sigm)
                            # random sample
                            #   \hat{h} = 1,      if p(h=1|x) > uniform(0, 1)
                            #             0,      otherwise

        return h_samp, h_pre, h_sigm
Ejemplo n.º 21
0
    def time_distributed_dense(self, x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None):
        '''Apply y.w + b for every temporal slice y of x.
        '''
        self.x = x
        self.w = w
        self.b = b
        self.droput = dropout
        self.input_dim = input_dim
        self.output_dim =output_dim
        self.timesteps = timesteps
        
        if not input_dim:
            # won't work with TensorFlow
            input_dim = K.shape(x)[2]
        if not timesteps:
            # won't work with TensorFlow
            timesteps = K.shape(x)[1]
        if not output_dim:
            # won't work with TensorFlow
            output_dim = K.shape(w)[1]

        if dropout:
            # apply the same dropout pattern at every timestep
            ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
            dropout_matrix = K.dropout(ones, dropout)
            expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
            x *= expanded_dropout_matrix

        # collapse time dimension and batch dimension together
        x = K.reshape(x, (-1, input_dim))
    
        x = K.dot(x, w)
        if b:
            x = x + b
        # reshape to 3D tensor
        x = K.reshape(x, (-1, timesteps, output_dim))
        return x
Ejemplo n.º 22
0
def time_distributed_dense(x,
                           w,
                           b=None,
                           dropout=None,
                           input_dim=None,
                           output_dim=None,
                           timesteps=None,
                           activation='linear'):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    activation = activations.get(activation)

    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(activation(x), (-1, timesteps, output_dim))
    return x
Ejemplo n.º 23
0
    def call(self, x, mask=None):
        q = K.dot(x, self.W_q)
        k = K.dot(x, self.W_k)
        v = K.dot(x, self.W_v)

        k_t = K.permute_dimensions(k, (0, 2, 1))

        q *= self.depth**(-0.5)  # scaled dot-product

        logit = K.batch_dot(q, k_t)  # [batch_size, q_length, k_length]

        # softmax を取ることで正規化します
        attention_weight = K.softmax(logit)

        # dropout
        attention_weight = K.dropout(attention_weight, level=self.dropout_rate)

        # 重みに従って value から情報を引いてきます
        # [batch_size, q_length, depth]
        attention_output = K.batch_dot(attention_weight, v)
        output = K.dot(attention_output, self.W_o)

        return output
Ejemplo n.º 24
0
    def get_constants(self, x):  # needs further edition
        constants = []
        # if 0 < self.dropout_U < 1:
        #     ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
        #     ones = K.concatenate([ones] * self.output_dim, 1)
        #     B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)]
        #     constants.append(B_U)
        # else:
        #     constants.append([K.cast_to_floatx(1.) for _ in range(3)])

        if 0 < self.dropout_W < 1:
            input_shape = self.input_spec[0].shape
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1)))
            ones = K.concatenate([ones] * input_dim, 1)
            B_W = [
                K.in_train_phase(K.dropout(ones, self.dropout_W), ones)
                for _ in range(3)
            ]
            constants.append(B_W)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(3)])
        return constants
Ejemplo n.º 25
0
    def step_do(self, step_in, states):  # 定义每一步的迭代

        in_value = step_in
        if 0 < self.dropout < 1.:
            self._dropout_mask = K.in_train_phase(
                K.dropout(K.ones_like(step_in), self.dropout),
                K.ones_like(step_in))
        if 0 < self.dropout < 1.:
            in_value = step_in * self._dropout_mask

        d1 = K.sigmoid(
            K.sqrt(
                K.sum((K.square(in_value - states[0]) / self.units),
                      axis=-1,
                      keepdims=True) / 2))
        d2 = K.sigmoid(
            K.sqrt(
                K.sum((K.square(in_value - states[1]) / self.units),
                      axis=-1,
                      keepdims=True) / 2))
        '''
        d1 = K.sigmoid(K.sum((K.abs(in_value-states[0])/self.units),axis=-1,keepdims=True))
        d2 = K.sigmoid(K.sum((K.abs(in_value-states[1])/self.units),axis=-1,keepdims=True))
        '''
        print('d1.shape', d1.shape)
        state1 = d1 * states[0] + (1 - d1) * in_value
        print('state1.shape', state1.shape)
        state2 = (1 - d2) * states[0] + d2 * in_value
        '''
        lt = K.expand_dims(state1,axis=-2)
        st = K.expand_dims(state2,axis=-2)
        outputs = K.concatenate([lt, st], axis=-2)
        '''
        outputs = K.concatenate([state1, state2], axis=-1)

        return outputs, [state1, state2]
    def call(self, inputs, **kwargs):
        main_input, embedding_matrix = inputs
        input_shape_tensor = K.shape(main_input)
        last_input_dim = input_shape_tensor[-1]
        print('input_shape_tensor: ', input_shape_tensor)

        embedding_matrix_shape = K.shape(embedding_matrix)
        # vocab_size, hidden_size
        emb_input_dim, emb_output_dim = embedding_matrix_shape[
            -2], embedding_matrix_shape[-1]

        # shape: (main_input_shape[0], hidden_size)
        projected = K.dot(K.reshape(main_input, (-1, last_input_dim)),
                          self.projection)
        if self.add_biases:
            projected = K.bias_add(projected,
                                   self.biases,
                                   data_format='channels_last')
        if 0 < self.projection_dropout < 1:
            projected = K.in_train_phase(
                lambda: K.dropout(projected, self.projection_dropout),
                projected,
                training=kwargs.get('training'))

        # shape: (main_input_shape[0], vocab_size). Calculate with all words in the vocabulary
        attention = K.dot(projected, K.transpose(embedding_matrix))
        if self.scaled_attention:
            # scaled dot-product attention, described in
            # "Attention is all you need" (https://arxiv.org/abs/1706.03762)
            #sqrt_d = math.sqrt(emb_output_dim)
            sqrt_d = K.sqrt(K.cast(emb_output_dim, dtype=K.floatx()))
            attention = attention / sqrt_d
        result = K.reshape(
            self.activation(attention),
            (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim))
        return result
Ejemplo n.º 27
0
 def dropped_inputs():
     return K.dropout(ones, self.recurrent_dropout)
Ejemplo n.º 28
0
 def dropped_inputs():
     return K.dropout(ones, self.dropout)
Ejemplo n.º 29
0
 def dropped_inputs():
     return K.dropout(inputs,
                      self.rate,
                      noise_shape,
                      seed=self.seed)
Ejemplo n.º 30
0
def _dropout(x, level, noise_shape=None, seed=None):
    x = K.dropout(x, level, noise_shape, seed)
    x *= (1. - level)  # compensate for the scaling by the dropout
    return x
Ejemplo n.º 31
0
 def get_output(self, train=False):
     X = self.get_input(train)
     if self.p > 0.:
         X = K.dropout(X, level=self.p)
     return X
Ejemplo n.º 32
0
 def call(self, x, mask=None):
     if 0. < self.rate < 1.:
         noise_shape = self._get_noise_shape(x)
         x = K.dropout(x, self.rate, noise_shape)
     return x
Ejemplo n.º 33
0
# Train Set
float_data = shuffel(float_data)

#Nrmalisation train_data
float_data[:, :-1] = dp.zero_mean_normalization(float_data[:, :-1])
data = float_data[:, :-1]

train_data = data[150:]
label_data = float_data[150:, -1]
#data[:150]=K.get_value(K.dropout(data[:150],0.2,None,None))*0.8
#data[:150,],float_data[:150,-1]=verarbeit_meanimp(data[:150,],float_data[:150,-1])

#validation data
# MCAR
train_data_voll = K.dropout(train_data, 0.0001, None, None)
train_data_voll = K.get_value(train_data_voll)

train_data_20_MCAR = K.dropout(train_data, 0.2, None, None)
train_data_20_MCAR = K.get_value(train_data_20_MCAR)
train_data_20_MCAR *= 0.8

train_data_40_MCAR = K.dropout(train_data, 0.4, None, None)
train_data_40_MCAR = K.get_value(train_data_40_MCAR)
train_data_40_MCAR *= 0.6

#MAR
sort_float_data = sorted(float_data[150:], key=lambda x: x[1])
sort_float_data = np.array(sort_float_data)
sort_train_data = sort_float_data[:, :-1]
label_data_MAR = sort_float_data[:, -1]
Ejemplo n.º 34
0
def VGG19(include_top=True,
          weights='imagenet',
          input_tensor=None,
          input_shape=None,
          pooling=None,
          classes=1000,isDrop=False, drop_rate=0.3,
          **kwargs):
    """Instantiates the VGG19 architecture.

    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)`
            (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
                         ' as true, `classes` should be 1000')
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(img_input)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
    if isDrop:
        x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x)
    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
    if isDrop:
        x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x)
    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    if isDrop:
        x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    if isDrop:
        x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    if isDrop:
        x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if include_top:
        # Classification block
        x = layers.Flatten(name='flatten')(x)
        x = layers.Dense(4096, activation='relu', name='fc1')(x)
        if isDrop:
            x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x)
        x = layers.Dense(4096, activation='relu', name='fc2')(x)
        x = layers.Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = models.Model(inputs, x, name='vgg19')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            weights_path = keras_utils.get_file(
                'vgg19_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='cbe5617147190e668d6c5d5026f83318')
        else:
            weights_path = keras_utils.get_file(
                'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='253f8cb515780f3b799900260a226db6')
        model.load_weights(weights_path)
        if backend.backend() == 'theano':
            keras_utils.convert_all_kernels_in_model(model)
    elif weights is not None:
        model.load_weights(weights)

    return model
Ejemplo n.º 35
0
 def output(self, train=False):
     X = self._default_input(train)
     if self.p > 0.:
         if train:
             X = K.dropout(X, level=self.p)
     return X
Ejemplo n.º 36
0
def PermaDropout(rate):
    return Lambda(lambda x: K.dropout(x, level=rate))
Ejemplo n.º 37
0
 def dropped_inputs():
     return K.dropout(ones, self.recurrent_dropout)
Ejemplo n.º 38
0
 def dropped_inputs():
     return K.dropout(inputs, self.rate, noise_shape,
                      seed=self.seed)
Ejemplo n.º 39
0
    X_test /= 255
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)
    return X_train, Y_train, X_test, Y_test

X_train, Y_train, X_test, Y_test = load_mnist()

model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
#model.add(Dropout(0.2))
model.add(Lambda(lambda x: K.dropout(x, level=0.2)))
model.add(Dense(512))
model.add(Activation('relu'))
#model.add(Dropout(0.2))
model.add(Lambda(lambda x: K.dropout(x, level=0.2)))
model.add(Dense(10))
model.add(Activation('softmax'))

rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)

model.fit(X_train, Y_train,
          batch_size=batch_size, nb_epoch=nb_epoch,
          show_accuracy=True, verbose=2,
          validation_data=(X_test, Y_test))
Ejemplo n.º 40
0
 def call(self, x, mask=None):
     if 0. < self.p < 1.:
         x = K.dropout(x, level=self.p)
     return x
 def func(args):
     old, new = args
     pred = K.random_uniform([]) < self.dropout
     ret = K.switch(pred, old, old + K.dropout(new, self.dropout))
     return K.in_train_phase(ret, old + new)
Ejemplo n.º 42
0
 def call(self, x, mask=None):
     if 0. < self.p < 1.:
         x = K.in_train_phase(K.dropout(x, level=self.p), x)
     return x