def hop(scope, last, sentence, sentence_bkg, bkg_iter, bkg_fix, doc_len, real_max_len, convert_flag, biases_initializer=tf.initializers.zeros(), weights_initializer=tf.contrib.layers.xavier_initializer()): if bkg_iter is None: bkg_iter = [] if bkg_fix is None: bkg_fix = [] if not isinstance(bkg_fix, Iterable): bkg_fix = [bkg_fix] bkg_fix = list(bkg_fix) hidden_size = sentence_bkg.shape[2] with tf.variable_scope(scope): sentence = tf.stop_gradient(sentence) \ if not last else sentence sentence_bkg = tf.stop_gradient(sentence_bkg) \ if not last else sentence_bkg alphas = attention(sentence_bkg, [bkg_iter] + bkg_fix, doc_len, real_max_len, biases_initializer=biases_initializer, weights_initializer=weights_initializer) new_bkg = tf.matmul(alphas, sentence) new_bkg = tf.reshape(new_bkg, [-1, hidden_size], name='new_bkg') if 'o' in convert_flag: new_bkg = bkg_iter + new_bkg return new_bkg
def __init__(self, embedding_count_dict, embedding_dim_dict, embedding_features_list, user_behavior_features, activation='PReLU'): super(DIN, self).__init__() # Init Embedding Layer self.embedding_count_dict = embedding_count_dict self.embedding_dim_dict = embedding_dim_dict self.embedding_layers = dict() for feature in embedding_features_list: self.embedding_layers[feature] = layers.Embedding(input_dim=embedding_count_dict[feature], output_dim=embedding_dim_dict[feature], embeddings_initializer='random_uniform') # DIN Attention + Sum Pooling self.hist_at = attention(alibaba_utils.get_input_dim(embedding_dim_dict, user_behavior_features)) # Init Fully Connection Layer self.fc = tf.keras.Sequential() self.fc.add(layers.BatchNormalization()) self.fc.add(layers.Dense(200, activation="relu")) if activation == "Dice": self.fc.add(Dice()) elif activation == "dice": self.fc.add(dice(200)) elif activation == "PReLU": self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) self.fc.add(layers.Dense(80, activation="relu")) if activation == "Dice": self.fc.add(Dice()) elif activation == "dice": self.fc.add(dice(80)) elif activation == "PReLU": self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None)) self.fc.add(layers.Dense(2, activation=None))
def attention_flow(self): """ Attention Flow Layer contains Context-to-query Attention and Query-to-context Attention :return: """ self.g = attention(self.hidden_size, self.h, self.u) if self.use_dropout: self.g = tf.nn.dropout(self.g, self.dropout_keep_prob)
def nsc(self, x, max_sen_len, max_doc_len, sen_len, doc_len): def lstm(inputs, sequence_length, hidden_size, scope): cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, scope=scope) outputs = tf.concat(outputs, axis=2) return outputs, state with tf.variable_scope('sentence_layer'): # lstm_outputs, _state = lstm(x, sen_len, self.hidden_size, 'lstm') # lstm_outputs = tf.reshape(lstm_outputs, [-1, max_sen_len, self.hidden_size]) lstm_bkg, _state = lstm(x, sen_len, self.hidden_size, 'lstm_bkg') lstm_bkg = tf.reshape(lstm_bkg, [-1, max_sen_len, self.hidden_size]) lstm_outputs = lstm_bkg alphas = attention(lstm_bkg, [], sen_len, max_sen_len, biases_initializer=self.biases_initializer, weights_initializer=self.weights_initializer) sen_bkg = tf.matmul(alphas, lstm_outputs) sen_bkg = tf.reshape(sen_bkg, [-1, self.hidden_size], name='new_bkg') outputs = tf.reshape(sen_bkg, [-1, max_doc_len, self.hidden_size]) with tf.variable_scope('document_layer'): # lstm_outputs, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm') lstm_bkg, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm_bkg') lstm_outputs = lstm_bkg alphas = attention(lstm_bkg, [], doc_len, max_doc_len, biases_initializer=self.biases_initializer, weights_initializer=self.weights_initializer) doc_bkg = tf.matmul(alphas, lstm_outputs) doc_bkg = tf.reshape(doc_bkg, [-1, self.hidden_size], name='new_bkg') outputs = doc_bkg with tf.variable_scope('result'): d_hats = tf.layers.dense(tf.concat([outputs, self.usr, self.prd], axis=1), self.cls_cnt, kernel_initializer=self.weights_initializer, bias_initializer=self.biases_initializer) return d_hats
def dnsc(self, x, max_sen_len, max_doc_len, sen_len, doc_len, task_label): x = tf.reshape(x, [-1, max_sen_len, self.emb_dim]) sen_len = tf.reshape(sen_len, [-1]) def lstm(inputs, sequence_length, hidden_size, scope, init_state): init_state_fw, init_state_bw = init_state cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0., initializer=xavier()) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, inputs=inputs, sequence_length=sequence_length, dtype=tf.float32, scope=scope) outputs = tf.concat(outputs, axis=2) return outputs, state with tf.variable_scope('sentence_layer'): # lstm_outputs, _state = lstm(x, sen_len, self.hidden_size, 'lstm') # lstm_outputs = tf.reshape(lstm_outputs, [-1, max_sen_len, self.hidden_size]) sen_task_label = tf.reshape( tf.tile(task_label[:, None], [1, max_doc_len]), [-1]) sen_init_state = tf.get_variable( 'sen_init_state', [self.task_cnt, 2 * self.hidden_size]) sen_init_state = tf.nn.embedding_lookup(sen_init_state, sen_task_label) sen_init_state_fw = tf.nn.rnn_cell.LSTMStateTuple( sen_init_state[:, :self.hidden_size // 2], sen_init_state[:, self.hidden_size // 2:self.hidden_size]) sen_init_state_bw = tf.nn.rnn_cell.LSTMStateTuple( sen_init_state[:, self.hidden_size:self.hidden_size * 3 // 2], sen_init_state[:, self.hidden_size * 3 // 2:]) sen_init_state = (sen_init_state_fw, sen_init_state_bw) lstm_bkg, _state = lstm(x, sen_len, self.hidden_size, 'lstm_bkg', sen_init_state) lstm_bkg = tf.reshape(lstm_bkg, [-1, max_sen_len, self.hidden_size]) lstm_outputs = lstm_bkg alphas = attention(lstm_bkg, [], sen_len, max_sen_len, biases_initializer=self.b_init, weights_initializer=self.w_init) sen_bkg = alphas @ lstm_outputs sen_bkg = tf.reshape(sen_bkg, [-1, self.hidden_size], name='new_bkg') outputs = tf.reshape(sen_bkg, [-1, max_doc_len, self.hidden_size]) self.alphas = alphas with tf.variable_scope('document_layer'): # lstm_outputs, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm') doc_task_label = task_label doc_init_state = tf.get_variable( 'doc_init_state', [self.task_cnt, 2 * self.hidden_size]) doc_init_state = tf.nn.embedding_lookup(doc_init_state, doc_task_label) doc_init_state_fw = tf.nn.rnn_cell.LSTMStateTuple( doc_init_state[:, :self.hidden_size // 2], doc_init_state[:, self.hidden_size // 2:self.hidden_size]) doc_init_state_bw = tf.nn.rnn_cell.LSTMStateTuple( doc_init_state[:, self.hidden_size:self.hidden_size * 3 // 2], doc_init_state[:, self.hidden_size * 3 // 2:]) doc_init_state = (doc_init_state_fw, doc_init_state_bw) lstm_bkg, _state = lstm(outputs, doc_len, self.hidden_size, 'lstm_bkg', doc_init_state) lstm_outputs = lstm_bkg alphas = attention(lstm_bkg, [], doc_len, max_doc_len, biases_initializer=self.b_init, weights_initializer=self.w_init) doc_bkg = alphas @ lstm_outputs doc_bkg = tf.reshape(doc_bkg, [-1, self.hidden_size], name='new_bkg') outputs = doc_bkg return outputs
def build(self, data_iter, bert_config_file): # get the inputs with tf.variable_scope('inputs'): input_map = data_iter.get_next() usrid, prdid, input_x, input_y, doc_len = \ (input_map['usr'], input_map['prd'], input_map['content'], input_map['rating'], input_map['doc_len']) input_x = tf.cast(input_x, tf.int32) self.usr = lookup(self.embeddings['usr_emb'], usrid, name='cur_usr_embedding') self.prd = lookup(self.embeddings['prd_emb'], prdid, name='cur_prd_embedding') # input_x = lookup(self.embeddings['wrd_emb'], input_x, name='cur_wrd_embedding') input_x = tf.reshape(input_x, [-1, self.max_doc_len]) input_mask = tf.sequence_mask(doc_len, self.max_doc_len) input_mask = tf.cast(input_mask, tf.int32) bert_config = BertConfig.from_json_file(bert_config_file) bert = BertModel(bert_config, is_training=True, input_ids=input_x, input_mask=input_mask, token_type_ids=None, use_one_hot_embeddings=False) pooled_output = bert.get_pooled_output() sequence_output = bert.get_sequence_output() alphas = attention(sequence_output, None, self.max_doc_len, self.max_doc_len) sequence_output = tf.matmul(alphas, sequence_output) sequence_output = tf.squeeze(sequence_output, axis=1) bert_output = tf.concat([pooled_output, sequence_output], axis=1) logits = tf.layers.dense( bert_output, self.cls_cnt, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)) self.bert_output = bert_output self.logits = logits # build the process of model prediction = tf.argmax(logits, 1, name='prediction') self.prediction = prediction with tf.variable_scope("loss"): sce = tf.nn.softmax_cross_entropy_with_logits_v2 log_probs = tf.nn.log_softmax(logits) self.probs = tf.nn.softmax(logits) loss = -tf.reduce_sum(tf.one_hot( input_y, self.cls_cnt, dtype=tf.float32) * log_probs, axis=-1) self.loss = tf.reduce_mean(loss) # self.loss = sce(logits=logits, labels=tf.one_hot(input_y, self.cls_cnt)) # self.loss = tf.reduce_mean(self.loss) self.total_loss = tf.reduce_sum(loss) prediction = tf.argmax(logits, 1, name='prediction') with tf.variable_scope("metrics"): correct_prediction = tf.equal(prediction, input_y) self.correct = correct_prediction mse = tf.reduce_sum(tf.square(prediction - input_y), name="mse") correct_num = tf.reduce_sum(tf.cast(correct_prediction, dtype=tf.int32), name="correct_num") accuracy = tf.reduce_sum(tf.cast(correct_prediction, "float"), name="accuracy") return self.total_loss, mse, correct_num, accuracy