コード例 #1
0
    def mycrossentropy(y_true, y_pred, e=0.1):
        loss1 = K.categorical_crossentropy(y_true, y_pred)

        loss2 = K.categorical_crossentropy(
            K.ones_like(y_pred) / nb_classes,
            y_pred)  # K.ones_like(y_pred) / nb_classes

        return (1 - e) * loss1 + e * loss2
コード例 #2
0
    def get_constants(self, inputs, training=None):
        constants = []
        if self.implementation != 0 and 0 < self.dropout < 1:
            input_shape = K.int_shape(inputs)
            input_dim = input_shape[-1]
            ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
            ones = K.tile(ones, (1, int(input_dim)))

            def dropped_inputs():
                return K.dropout(ones, self.dropout)

            dp_mask = [
                K.in_train_phase(dropped_inputs, ones, training=training)
                for _ in range(4)
            ]
            constants.append(dp_mask)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(4)])

        if 0 < self.recurrent_dropout < 1:
            ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
            ones = K.tile(ones, (1, self.units))

            def dropped_inputs():
                return K.dropout(ones, self.recurrent_dropout)

            rec_dp_mask = [
                K.in_train_phase(dropped_inputs, ones, training=training)
                for _ in range(4)
            ]
            constants.append(rec_dp_mask)
        else:
            constants.append([K.cast_to_floatx(1.) for _ in range(4)])

        # append the input as well for use later
        constants.append(inputs)
        return constants
コード例 #3
0
def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
    """Apply `y . w + b` for every temporal slice y of x.
    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.
    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.int_shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
コード例 #4
0
ファイル: Conv2DLSTMP.py プロジェクト: wingsby/rainforecast
    def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = self._generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=4)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = self._generate_dropout_mask(
                K.ones_like(states[1]),
                self.recurrent_dropout,
                training=training,
                count=4)

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_mask = self._recurrent_dropout_mask

        h_tm1 = states[0]  # previous memory state
        c_tm1 = states[1]  # previous carry state
        shape=dp_mask[0].shape
        if(inputs.shape==shape):
           pass
        else:
           dp_mask=tf.slice(dp_mask,[0,0,0,0,0],[4,shape[0]//2,shape[1],shape[2],shape[3]])

        shape=rec_dp_mask[0].shape
        if(inputs.shape[0]==shape[0]):
           pass
        else:
           rec_dp_mask=tf.slice(rec_dp_mask,[0,0,0,0,0],[4,shape[0]//2,shape[1],shape[2],shape[3]])

        if 0 < self.dropout < 1.:
            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]
        else:
            inputs_i = inputs
            inputs_f = inputs
            inputs_c = inputs
            inputs_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            h_tm1_i = h_tm1
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            h_tm1_o = h_tm1

        x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i,
                              padding=self.padding)
        x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f,
                              padding=self.padding)
        x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c,
                              padding=self.padding)
        x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o,
                              padding=self.padding)
        h_i = self.recurrent_conv(h_tm1_i,
                                  self.recurrent_kernel_i)
        h_f = self.recurrent_conv(h_tm1_f,
                                  self.recurrent_kernel_f)
        h_c = self.recurrent_conv(h_tm1_c,
                                  self.recurrent_kernel_c)
        h_o = self.recurrent_conv(h_tm1_o,
                                  self.recurrent_kernel_o)

        i = self.recurrent_activation(x_i + h_i)
        f = self.recurrent_activation(x_f + h_f)
        c = f * c_tm1 + i * self.activation(x_c + h_c)
        o = self.recurrent_activation(x_o + h_o)
        h = o * self.activation(c)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True

        return h, [h, c]
コード例 #5
0
ファイル: model.py プロジェクト: ArneBinder/bert_ner
    def __init__(self,
                 n_classes,
                 input_dims,
                 lr,
                 top_rnns=True,
                 metrics_eval_discard_first_classes=2):
        self.train_history = None
        input = Input(shape=(None, input_dims),
                      dtype='float32',
                      name='bert_encodings')
        X = input
        if top_rnns:
            X = get_bi_lstm()(X)
            X = get_bi_lstm()(X)
        pred = Dense(n_classes, activation='softmax')(X)
        self.model_save = Model(input, pred)
        #logger.debug(f'available training devices:\n{device_lib.list_local_devices()}'.replace('\n', '\n\t'))
        devices = device_lib.list_local_devices()
        # take gpu count from device info manually, because virtual devices (e.g. XLA_GPU) cause wrong number
        gpus = len([None for d in devices if d.device_type == 'GPU'])
        if gpus > 1:
            self.model = multi_gpu_model(self.model_save,
                                         gpus=gpus,
                                         cpu_relocation=True)
            logging.info(f"Training using {gpus} GPUs...")
        else:
            self.model = self.model_save
            logging.info("Training using single GPU or CPU...")

        optimizer = Adam(lr=lr)
        self.model.compile(
            loss='categorical_crossentropy',
            optimizer=optimizer,
            metrics=[
                ANDCounter(
                    conditions_and=lambda y_true, y_pred: (
                        y_true,
                        K.round(y_pred),
                        # This condition masks all entries where y_true has class=0, i.e. <PAD>:
                        #   1) gold values, except for the first class, are summed along the class-axis
                        #   2) the resulting vector is broadcast back to the original format (via stack and number of classes)
                        K.stack([
                            K.sum(y_true[:, :,
                                         metrics_eval_discard_first_classes:],
                                  axis=-1)
                        ] * n_classes,
                                axis=-1),
                    ),
                    name='tp'),
                ANDCounter(
                    conditions_and=lambda y_true, y_pred: (
                        K.abs(y_true - K.ones_like(y_true)),
                        K.round(y_pred),
                        # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above)
                        K.stack([
                            K.sum(y_true[:, :,
                                         metrics_eval_discard_first_classes:],
                                  axis=-1)
                        ] * n_classes,
                                axis=-1),
                    ),
                    name='fp'),
                ANDCounter(
                    conditions_and=lambda y_true, y_pred: (
                        y_true,
                        K.abs(K.round(y_pred) - K.ones_like(y_pred)),
                        # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above)
                        K.stack([
                            K.sum(y_true[:, :,
                                         metrics_eval_discard_first_classes:],
                                  axis=-1)
                        ] * n_classes,
                                axis=-1),
                    ),
                    name='fn'),
                ANDCounter(
                    conditions_and=lambda y_true, y_pred: (
                        y_true,
                        # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above)
                        K.stack([
                            K.sum(y_true[:, :,
                                         metrics_eval_discard_first_classes:],
                                  axis=-1)
                        ] * n_classes,
                                axis=-1),
                    ),
                    name='total_count'),
                'acc',
            ])
        plot_model(self.model, to_file='model.png', show_shapes=True)