コード例 #1
0
def reweight(y_true,
             y_pred,
             tp_weight=.3,
             tn_weight=.3,
             fp_weight=4,
             fn_weight=0.7):
    # Get predictions
    y_pred_classes = K.greater_equal(y_pred, 0.5)
    y_pred_classes_float = K.cast(y_pred_classes, K.floatx())

    # Get misclassified examples
    wrongly_classified = K.not_equal(y_true, y_pred_classes_float)
    wrongly_classified_float = K.cast(wrongly_classified, K.floatx())

    # Get correctly classified examples
    correctly_classified = K.equal(y_true, y_pred_classes_float)
    correctly_classified_float = K.cast(correctly_classified, K.floatx())

    # Get tp, fp, tn, fn
    tp = correctly_classified_float * y_true
    tn = correctly_classified_float * (1 - y_true)
    fp = wrongly_classified_float * y_true
    fn = wrongly_classified_float * (1 - y_true)

    # Get weights
    weight_tensor = tp_weight * tp + fp_weight * fp + tn_weight * tn + fn_weight * fn

    loss = K.binary_crossentropy(y_true, y_pred)
    weighted_loss = loss * weight_tensor
    return weighted_loss
コード例 #2
0
    def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)
        # ait = K.dot(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)
コード例 #3
0
    def predict_prob(self,
                     context_tokens_ids,
                     response_tokens_ids,
                     condition_id,
                     temperature=1.0):
        """
        :param context_tokens_ids:      shape == (batch_size, context_size, seq_len), int32
        :param response_tokens_ids:     shape == (batch_size, seq_len), int32
        :param condition_id:            shape == (batch_size, 1), int32
        :param temperature:             float32
        :return:
            tokens_probs:               shape == (batch_size, seq_len, vocab_size), float32
        """
        # remove last token, but keep first token to match seq2seq decoder input's shape
        response_tokens_ids = response_tokens_ids[:, :-1]
        # shape == (batch_size, seq_len - 1)

        init_dec_hs = np.zeros(shape=(context_tokens_ids.shape[0],
                                      self._decoder_depth,
                                      self._params.hidden_layer_dim),
                               dtype=K.floatx())
        # shape == (batch_size, decoder_depth, hidden_layer_dim)

        temperature = np.full_like(condition_id, temperature, dtype=np.float32)
        # shape == (batch_size, 1)

        tokens_probs = self._models['seq2seq'].predict([
            context_tokens_ids, response_tokens_ids, condition_id, init_dec_hs,
            temperature
        ])
        # shape == (batch_size, seq_len - 1, vocab_size)
        return tokens_probs
コード例 #4
0
ファイル: dnn_util.py プロジェクト: ziqizhang/wop
    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tile(K.expand_dims(self.W, axis=0), (K.shape(x)[0], 1, 1))
        uit = tf.matmul(x, uit)
        uit = K.tanh(K.bias_add(uit, self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output
コード例 #5
0
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)
コード例 #6
0
    def _get_training_batch_generator(self):
        # set unique random seed for different workers to correctly process batches in multi-gpu training
        horovod_seed = self._horovod.rank() if self._horovod else 0
        epoch_id = 0

        while True:  # inifinite batches generator
            epoch_id += 1

            for train_batch in get_training_batch(
                    self._training_data,
                    self._params.train_batch_size,
                    random_permute=SHUFFLE_TRAINING_BATCHES,
                    random_seed=RANDOM_SEED * epoch_id + horovod_seed):

                context_tokens_ids, response_tokens_ids, condition_id = train_batch
                # response tokens are wraped with _start_ and _end_ tokens
                # output shape == (batch_size, seq_len)

                # get input response ids by removing last sequence token (_end_)
                input_response_tokens_ids = response_tokens_ids[:, :-1]
                # output shape == (batch_size, seq_len - 1)

                # get target response ids by removing the first (_start_) token of the sequence
                target_response_tokens_ids = response_tokens_ids[:, 1:]
                # output shape == (batch_size, seq_len - 1)

                # workaround for using sparse_categorical_crossentropy loss
                # see https://github.com/tensorflow/tensorflow/issues/17150#issuecomment-399776510
                target_response_tokens_ids = np.expand_dims(
                    target_response_tokens_ids, axis=-1)
                # output shape == (batch_size, seq_len - 1, 1)

                init_dec_hs = np.zeros(shape=(context_tokens_ids.shape[0],
                                              self._decoder_depth,
                                              self._params.hidden_layer_dim),
                                       dtype=K.floatx())

                yield [
                    context_tokens_ids, input_response_tokens_ids,
                    condition_id, init_dec_hs
                ], target_response_tokens_ids
コード例 #7
0
    def _decoder(self, tokens_emb_model, condition_emb_model):
        self._logger.info('Building decoder...')

        thought_vector = Input(shape=(self._params.hidden_layer_dim, ),
                               dtype=K.floatx(),
                               name='dec_thought_vector')
        # output shape == (batch_size, hidden_layer_dim)
        response_tokens_ids = tokens_emb_model.inputs[0]
        # output shape == (batch_size, seq_len)
        condition_id = condition_emb_model.inputs[0]
        # output shape == (batch_size, 1)
        temperature = Input(shape=(1, ),
                            dtype='float32',
                            name='dec_temperature')
        # output shape == (batch_size, 1)

        # hardcode decoder's depth here: the general solution for any number of stacked rnn layers hs num is too bulky
        # and we don't need it, so keep it simple, stupid
        self._decoder_depth = 2
        # keep inputs for rnn decoder hidden states globally accessible for all model layers that are using them
        # otherwise you may encounter a keras bug that affects rnn stateful models
        # related discussion: https://github.com/keras-team/keras/issues/9385#issuecomment-365464721
        self._dec_hs_input = Input(shape=(self._decoder_depth,
                                          self._params.hidden_layer_dim),
                                   dtype=K.floatx(),
                                   name='dec_hs')
        # shape == (batch_size, dec_depth, hidden_layer_dim)

        response_tokens_embeddings = tokens_emb_model(response_tokens_ids)
        # output shape == (batch_size, seq_len, token_emb_size)
        condition_embedding = condition_emb_model(condition_id)
        # output shape == (batch_size, cond_emb_size)
        conditioned_tv = Concatenate(name='conditioned_tv')(
            [thought_vector, condition_embedding])
        # output shape == (batch_size, hidden_layer_dim + cond_emb_size)

        # Temporary solution:
        # use a custom lambda function for layer repeating and manually set output_shape
        # otherwise the consequent Concatenate layer won't work
        repeated_conditioned_tv = Lambda(
            function=repeat_vector,
            mask=lambda inputs, inputs_masks: inputs_masks[
                0],  # function to get mask of the first input
            output_shape=(None, self._params.hidden_layer_dim +
                          self._condition_embedding_dim),
            name='repeated_conditioned_tv')(
                [conditioned_tv, response_tokens_ids])
        # output shape == (batch_size, seq_len, hidden_layer_dim + cond_emb_size)

        decoder_input = Concatenate(name='concat_emb_cond_tv')(
            [response_tokens_embeddings, repeated_conditioned_tv])
        # output shape == (batch_size, seq_len, token_emb_size + hidden_layer_dim + cond_emb_size)

        # unpack hidden states to tensors
        dec_hs_0 = Lambda(function=lambda x: x[:, 0, :],
                          output_shape=(self._params.hidden_layer_dim, ),
                          name='dec_hs_0')(self._dec_hs_input)

        dec_hs_1 = Lambda(function=lambda x: x[:, 1, :],
                          output_shape=(self._params.hidden_layer_dim, ),
                          name='dec_hs_1')(self._dec_hs_input)

        outputs_seq_0, updated_hs_seq_0 = self._rnn_class(
            units=self._params.hidden_layer_dim, return_sequences=True, return_state=True, name='decoder_0')\
            (decoder_input, initial_state=dec_hs_0)
        # outputs_seq_0 and updated_hs_seq_0 shapes == (batch_size, seq_len, hidden_layer_dim)

        outputs_seq_1, updated_hs_seq_1 = self._rnn_class(
            units=self._params.hidden_layer_dim, return_sequences=True, return_state=True, name='decoder_1')\
            (outputs_seq_0, initial_state=dec_hs_1)
        # outputs_seq_1 and updated_hs_seq_1 shapes == (batch_size, seq_len, hidden_layer_dim)

        outputs_dropout = Dropout(
            rate=self._params.dense_dropout_ratio)(outputs_seq_1)
        # output shape == (batch_size, seq_len, hidden_layer_dim)
        tokens_logits = Dense(self._vocab_size)(outputs_dropout)
        # output shape == (batch_size, seq_len, vocab_size)
        tokens_probs = softmax_with_temperature(tokens_logits, temperature)
        # output shape == (batch_size, seq_len, vocab_size)

        # pack updated hidden states into one tensor
        updated_hs = Concatenate(axis=1, name='updated_hs')([
            Reshape((1, self._params.hidden_layer_dim))(updated_hs_seq_0),
            Reshape((1, self._params.hidden_layer_dim))(updated_hs_seq_1)
        ])

        decoder_training_model = Model(inputs=[
            thought_vector, response_tokens_ids, condition_id,
            self._dec_hs_input
        ],
                                       outputs=[tokens_logits],
                                       name='decoder_training_model')

        decoder_model = Model(inputs=[
            thought_vector, response_tokens_ids, condition_id,
            self._dec_hs_input, temperature
        ],
                              outputs=[tokens_probs, updated_hs],
                              name='decoder_model')

        return decoder_training_model, decoder_model