예제 #1
0
def hop(scope, last, sentence, sentence_bkg, bkg_iter, bkg_fix,
        doc_len, real_max_len, convert_flag,
        biases_initializer=tf.initializers.zeros(),
        weights_initializer=tf.contrib.layers.xavier_initializer()):
    if bkg_iter is None:
        bkg_iter = []
    if bkg_fix is None:
        bkg_fix = []
    if not isinstance(bkg_fix, Iterable):
        bkg_fix = [bkg_fix]
    bkg_fix = list(bkg_fix)
    hidden_size = sentence_bkg.shape[2]

    with tf.variable_scope(scope):
        sentence = tf.stop_gradient(sentence) \
            if not last else sentence
        sentence_bkg = tf.stop_gradient(sentence_bkg) \
            if not last else sentence_bkg
        alphas = attention(sentence_bkg, [bkg_iter] + bkg_fix, doc_len, real_max_len,
                           biases_initializer=biases_initializer,
                           weights_initializer=weights_initializer)
        new_bkg = tf.matmul(alphas, sentence)
        new_bkg = tf.reshape(new_bkg, [-1, hidden_size], name='new_bkg')
        if 'o' in convert_flag:
            new_bkg = bkg_iter + new_bkg
    return new_bkg
예제 #2
0
    def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features,
                 activation='PReLU'):
        super(DIN, self).__init__()
        # Init Embedding Layer
        self.embedding_count_dict = embedding_count_dict
        self.embedding_dim_dict = embedding_dim_dict
        self.embedding_layers = dict()

        for feature in embedding_features_list:
            self.embedding_layers[feature] = layers.Embedding(input_dim=embedding_count_dict[feature],
                                                              output_dim=embedding_dim_dict[feature],
                                                              embeddings_initializer='random_uniform')

        # DIN Attention + Sum Pooling
        self.hist_at = attention(alibaba_utils.get_input_dim(embedding_dim_dict, user_behavior_features))
        # Init Fully Connection Layer
        self.fc = tf.keras.Sequential()
        self.fc.add(layers.BatchNormalization())
        self.fc.add(layers.Dense(200, activation="relu"))
        if activation == "Dice":
            self.fc.add(Dice())
        elif activation == "dice":
            self.fc.add(dice(200))
        elif activation == "PReLU":
            self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
        self.fc.add(layers.Dense(80, activation="relu"))
        if activation == "Dice":
            self.fc.add(Dice())
        elif activation == "dice":
            self.fc.add(dice(80))
        elif activation == "PReLU":
            self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
        self.fc.add(layers.Dense(2, activation=None))
예제 #3
0
 def attention_flow(self):
     """
     Attention Flow Layer
     contains Context-to-query Attention and Query-to-context Attention
     :return:
     """
     self.g = attention(self.hidden_size, self.h, self.u)
     if self.use_dropout:
         self.g = tf.nn.dropout(self.g, self.dropout_keep_prob)
예제 #4
0
파일: nscla.py 프로젝트: SleepyBag/bertcls
    def nsc(self, x, max_sen_len, max_doc_len, sen_len, doc_len):
        def lstm(inputs, sequence_length, hidden_size, scope):
            cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0.,
                                              initializer=xavier())
            cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0.,
                                              initializer=xavier())
            outputs, state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs,
                sequence_length=sequence_length, dtype=tf.float32, scope=scope)
            outputs = tf.concat(outputs, axis=2)
            return outputs, state

        with tf.variable_scope('sentence_layer'):
            # lstm_outputs, _state = lstm(x, sen_len, self.hidden_size, 'lstm')
            # lstm_outputs = tf.reshape(lstm_outputs, [-1, max_sen_len, self.hidden_size])
            lstm_bkg, _state = lstm(x, sen_len, self.hidden_size, 'lstm_bkg')
            lstm_bkg = tf.reshape(lstm_bkg, [-1, max_sen_len, self.hidden_size])
            lstm_outputs = lstm_bkg

            alphas = attention(lstm_bkg, [], sen_len, max_sen_len,
                               biases_initializer=self.biases_initializer,
                               weights_initializer=self.weights_initializer)
            sen_bkg = tf.matmul(alphas, lstm_outputs)
            sen_bkg = tf.reshape(sen_bkg, [-1, self.hidden_size], name='new_bkg')
        outputs = tf.reshape(sen_bkg, [-1, max_doc_len, self.hidden_size])

        with tf.variable_scope('document_layer'):
            # lstm_outputs, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm')
            lstm_bkg, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm_bkg')
            lstm_outputs = lstm_bkg

            alphas = attention(lstm_bkg, [], doc_len, max_doc_len,
                               biases_initializer=self.biases_initializer,
                               weights_initializer=self.weights_initializer)
            doc_bkg = tf.matmul(alphas, lstm_outputs)
            doc_bkg = tf.reshape(doc_bkg, [-1, self.hidden_size], name='new_bkg')
        outputs = doc_bkg

        with tf.variable_scope('result'):
            d_hats = tf.layers.dense(tf.concat([outputs, self.usr, self.prd], axis=1), self.cls_cnt,
                                     kernel_initializer=self.weights_initializer,
                                     bias_initializer=self.biases_initializer)

        return d_hats
예제 #5
0
    def dnsc(self, x, max_sen_len, max_doc_len, sen_len, doc_len, task_label):
        x = tf.reshape(x, [-1, max_sen_len, self.emb_dim])
        sen_len = tf.reshape(sen_len, [-1])

        def lstm(inputs, sequence_length, hidden_size, scope, init_state):
            init_state_fw, init_state_bw = init_state
            cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2,
                                              forget_bias=0.,
                                              initializer=xavier())
            cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2,
                                              forget_bias=0.,
                                              initializer=xavier())
            outputs, state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                initial_state_fw=init_state_fw,
                initial_state_bw=init_state_bw,
                inputs=inputs,
                sequence_length=sequence_length,
                dtype=tf.float32,
                scope=scope)
            outputs = tf.concat(outputs, axis=2)
            return outputs, state

        with tf.variable_scope('sentence_layer'):
            # lstm_outputs, _state = lstm(x, sen_len, self.hidden_size, 'lstm')
            # lstm_outputs = tf.reshape(lstm_outputs, [-1, max_sen_len, self.hidden_size])
            sen_task_label = tf.reshape(
                tf.tile(task_label[:, None], [1, max_doc_len]), [-1])
            sen_init_state = tf.get_variable(
                'sen_init_state', [self.task_cnt, 2 * self.hidden_size])
            sen_init_state = tf.nn.embedding_lookup(sen_init_state,
                                                    sen_task_label)
            sen_init_state_fw = tf.nn.rnn_cell.LSTMStateTuple(
                sen_init_state[:, :self.hidden_size // 2],
                sen_init_state[:, self.hidden_size // 2:self.hidden_size])
            sen_init_state_bw = tf.nn.rnn_cell.LSTMStateTuple(
                sen_init_state[:, self.hidden_size:self.hidden_size * 3 // 2],
                sen_init_state[:, self.hidden_size * 3 // 2:])
            sen_init_state = (sen_init_state_fw, sen_init_state_bw)
            lstm_bkg, _state = lstm(x, sen_len, self.hidden_size, 'lstm_bkg',
                                    sen_init_state)
            lstm_bkg = tf.reshape(lstm_bkg,
                                  [-1, max_sen_len, self.hidden_size])
            lstm_outputs = lstm_bkg

            alphas = attention(lstm_bkg, [],
                               sen_len,
                               max_sen_len,
                               biases_initializer=self.b_init,
                               weights_initializer=self.w_init)
            sen_bkg = alphas @ lstm_outputs
            sen_bkg = tf.reshape(sen_bkg, [-1, self.hidden_size],
                                 name='new_bkg')
        outputs = tf.reshape(sen_bkg, [-1, max_doc_len, self.hidden_size])
        self.alphas = alphas

        with tf.variable_scope('document_layer'):
            # lstm_outputs, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm')
            doc_task_label = task_label
            doc_init_state = tf.get_variable(
                'doc_init_state', [self.task_cnt, 2 * self.hidden_size])
            doc_init_state = tf.nn.embedding_lookup(doc_init_state,
                                                    doc_task_label)
            doc_init_state_fw = tf.nn.rnn_cell.LSTMStateTuple(
                doc_init_state[:, :self.hidden_size // 2],
                doc_init_state[:, self.hidden_size // 2:self.hidden_size])
            doc_init_state_bw = tf.nn.rnn_cell.LSTMStateTuple(
                doc_init_state[:, self.hidden_size:self.hidden_size * 3 // 2],
                doc_init_state[:, self.hidden_size * 3 // 2:])
            doc_init_state = (doc_init_state_fw, doc_init_state_bw)
            lstm_bkg, _state = lstm(outputs, doc_len, self.hidden_size,
                                    'lstm_bkg', doc_init_state)
            lstm_outputs = lstm_bkg

            alphas = attention(lstm_bkg, [],
                               doc_len,
                               max_doc_len,
                               biases_initializer=self.b_init,
                               weights_initializer=self.w_init)
            doc_bkg = alphas @ lstm_outputs
            doc_bkg = tf.reshape(doc_bkg, [-1, self.hidden_size],
                                 name='new_bkg')
        outputs = doc_bkg

        return outputs
예제 #6
0
    def build(self, data_iter, bert_config_file):
        # get the inputs
        with tf.variable_scope('inputs'):
            input_map = data_iter.get_next()
            usrid, prdid, input_x, input_y, doc_len = \
                (input_map['usr'], input_map['prd'],
                 input_map['content'], input_map['rating'],
                 input_map['doc_len'])

            input_x = tf.cast(input_x, tf.int32)
            self.usr = lookup(self.embeddings['usr_emb'],
                              usrid,
                              name='cur_usr_embedding')
            self.prd = lookup(self.embeddings['prd_emb'],
                              prdid,
                              name='cur_prd_embedding')
            # input_x = lookup(self.embeddings['wrd_emb'], input_x, name='cur_wrd_embedding')
            input_x = tf.reshape(input_x, [-1, self.max_doc_len])
            input_mask = tf.sequence_mask(doc_len, self.max_doc_len)
            input_mask = tf.cast(input_mask, tf.int32)

        bert_config = BertConfig.from_json_file(bert_config_file)
        bert = BertModel(bert_config,
                         is_training=True,
                         input_ids=input_x,
                         input_mask=input_mask,
                         token_type_ids=None,
                         use_one_hot_embeddings=False)

        pooled_output = bert.get_pooled_output()
        sequence_output = bert.get_sequence_output()
        alphas = attention(sequence_output, None, self.max_doc_len,
                           self.max_doc_len)
        sequence_output = tf.matmul(alphas, sequence_output)
        sequence_output = tf.squeeze(sequence_output, axis=1)
        bert_output = tf.concat([pooled_output, sequence_output], axis=1)

        logits = tf.layers.dense(
            bert_output,
            self.cls_cnt,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.02))
        self.bert_output = bert_output
        self.logits = logits

        # build the process of model
        prediction = tf.argmax(logits, 1, name='prediction')
        self.prediction = prediction

        with tf.variable_scope("loss"):
            sce = tf.nn.softmax_cross_entropy_with_logits_v2
            log_probs = tf.nn.log_softmax(logits)
            self.probs = tf.nn.softmax(logits)
            loss = -tf.reduce_sum(tf.one_hot(
                input_y, self.cls_cnt, dtype=tf.float32) * log_probs,
                                  axis=-1)
            self.loss = tf.reduce_mean(loss)
            # self.loss = sce(logits=logits, labels=tf.one_hot(input_y, self.cls_cnt))
            # self.loss = tf.reduce_mean(self.loss)
            self.total_loss = tf.reduce_sum(loss)

        prediction = tf.argmax(logits, 1, name='prediction')
        with tf.variable_scope("metrics"):
            correct_prediction = tf.equal(prediction, input_y)
            self.correct = correct_prediction
            mse = tf.reduce_sum(tf.square(prediction - input_y), name="mse")
            correct_num = tf.reduce_sum(tf.cast(correct_prediction,
                                                dtype=tf.int32),
                                        name="correct_num")
            accuracy = tf.reduce_sum(tf.cast(correct_prediction, "float"),
                                     name="accuracy")

        return self.total_loss, mse, correct_num, accuracy