def fc_model(input_size, h1_size, output_size): model = {} model['fc1'] = nn.fc_layer(input_size, h1_size) model['bn'] = nn.bn_layer_fc(h1_size) model['relu'] = nn.ReLU() model['fc2'] = nn.fc_layer(h1_size, output_size) model['output'] = None return model
def add_dann_loss(self, dom_reps, dom_labels, adapt_rate, weight=1.0, scope=None): dom_reps_grl = flip_gradient(dom_reps, adapt_rate) dom_logits = nn_utils.fc_layer(dom_reps_grl, output_dim=2, scope=scope, reuse=True) dom_loss = tf.losses.softmax_cross_entropy(dom_labels, dom_logits, weights=weight, scope=scope) var_list = [ var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if scope in var.name ] reg_loss = self.l2_reg_lambda * sum( tf.nn.l2_loss(var) for var in var_list) loss = dom_loss + reg_loss return loss
def add_clf_loss(self, cls_reps, cls_labels, num_labels=2, weight=1.0, scope=None): cls_logits = nn_utils.fc_layer(cls_reps, output_dim=num_labels, scope=scope, reuse=True) clf_loss = tf.losses.softmax_cross_entropy(cls_labels, cls_logits, weights=weight, scope=scope) var_list = [ var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if scope in var.name ] reg_loss = self.l2_reg_lambda * sum( tf.nn.l2_loss(var) for var in var_list) loss = clf_loss + reg_loss return loss
def add_adv_loss(self, src_feat, tar_feat, adapt_rate, attention, mask, batch_length, selective=True, weight=1.0, scope=None): dom_label = tf.concat([ tf.tile(tf.zeros(1, dtype=tf.int32), [tf.shape(src_feat)[0]]), tf.tile(tf.ones(1, dtype=tf.int32), [tf.shape(tar_feat)[0]]) ], 0) feat = tf.concat([src_feat, tar_feat], 0) feat = flip_gradient(feat, adapt_rate) dom_fc = nn_utils.fc_layer(feat, output_dim=feat.shape.as_list()[-1], scope=scope + '/fc1', reuse=False) dom_logit = nn_utils.fc_layer(dom_fc, output_dim=2, scope=scope + '/fc2', reuse=False) domain_loss = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(dom_label, 2), logits=dom_logit, name='dom_classifier') # (b*m, ) domain_loss = tf.reshape(domain_loss, [-1, self.max_len]) * mask # (b, m) if selective: print('selective adversarial loss') domain_loss = tf.reduce_sum(domain_loss * attention, axis=-1) / tf.cast( batch_length, tf.float32) # (b) else: print('adversarial loss') domain_loss = tf.reduce_sum(domain_loss, axis=-1) / tf.cast( batch_length, tf.float32) # (b) domain_loss = weight * tf.reduce_mean(domain_loss) return domain_loss
def build_eval_op(self): self.reviews = tf.placeholder(tf.int32, [None, self.memory_size, self.sent_size], name="reviews") self.labels = tf.placeholder(tf.int32, [None, 2], name="labels") with tf.variable_scope(self.name): self.p_reps, self.p_word_attns, self.p_sent_attns = self.P_net( self.reviews, reuse=False) with tf.variable_scope('P_net'): self.sen_logits = nn_utils.fc_layer( self.p_reps, output_dim=2, scope="sentiment_classifier", reuse=False) self.sen_predictions = tf.argmax(self.sen_logits, 1, name="sen_predictions") sen_loss = tf.losses.softmax_cross_entropy( self.labels, self.sen_logits, weights=1.0, scope="sentiment_classifier") var_list = [ var for var in tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) if "sentiment_classifier" in var.name ] reg_loss = self.l2_reg_lambda * sum( tf.nn.l2_loss(var) for var in var_list) self.loss = sen_loss + reg_loss self.dom_logits = nn_utils.fc_layer(self.p_reps, output_dim=2, scope="domain_classifier", reuse=False) self.dom_predictions = tf.argmax(self.dom_logits, 1, name="dom_predictions")
def __call__(self, win_reviews, batch_length, ma_0, mo_0, dropout_rate, reuse=False): with tf.variable_scope(self.scope, reuse=reuse): with tf.variable_scope("Embedding_layer"): mask = tf.cast(tf.sign(win_reviews), tf.float32) self.input_emb = tf.nn.embedding_lookup( self.word2vec, win_reviews) #(b, m, win, d) self.input_emb = tf.nn.dropout(self.input_emb, dropout_rate) with tf.variable_scope('LSTM-OTE'): fw_cell = tf.contrib.rnn.LSTMCell(self.dim_asp_h) bw_cell = tf.contrib.rnn.LSTMCell(self.dim_asp_h) outputs, states = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, self.input_emb, sequence_length=batch_length, dtype=tf.float32) asp_h = tf.concat(outputs, -1) #(b, m, 2*dim_asp_h) with tf.variable_scope('Attention'): ma_t = tf.tile(ma_0, [tf.shape(asp_h)[0], 1]) #(b, 2*dim_asp_h) mo_t = tf.tile(mo_0, [tf.shape(asp_h)[0], 1]) #(b, 2*dim_opn_h) za_list, zo_list = [], [] ma_list, mo_list = [], [] ma_list.append(ma_t) # (b, 2*dim_asp_h) mo_list.append(mo_t) # (b, 2*dim_opn_h) for l in range(self.hops): za = tf.concat( [ self.tensor_product( asp_h, ma_t, self.Ta, self.Wa, self.bias_a, dropout_rate), #(b, m, 2*dim_rel) self.tensor_product(asp_h, mo_t, self.Tao, self.Wo, self.bias_o, dropout_rate) ], #(b, m, 2*dim_rel) -1) #(b, m, 2*dim_rel) zo = tf.concat( [ self.tensor_product( asp_h, mo_t, self.To, self.Wo, self.bias_o, dropout_rate), #(b, m, 2*dim_rel) self.tensor_product( asp_h, ma_t, tf.transpose(self.Tao, [0, 2, 1]), self.Wa, self.bias_a, dropout_rate) ], #(b, m, 2*dim_rel) -1) #(b, m, 2*dim_rel) za_l = tf.reshape( za, [-1, 2 * self.dim_rel]) #(b*m, 2*dim_rel) zo_l = tf.reshape( zo, [-1, 2 * self.dim_rel]) #(b*m, 2*dim_rel) za_list.append(za_l) zo_list.append(zo_l) ea_l = tf.reduce_sum(tf.multiply(za_l, self.va), -1) #(b*m) eo_l = tf.reduce_sum(tf.multiply(zo_l, self.vo), -1) #(b*m) ea_l = tf.reshape(ea_l, [-1, self.max_len]) #(b,m) eo_l = tf.reshape(eo_l, [-1, self.max_len]) #(b,m) alpha_a = tf.expand_dims( nn_utils.mask_softmax(ea_l, axis=1, mask=mask), -1) #(b,m,1) alpha_o = tf.expand_dims( nn_utils.mask_softmax(eo_l, axis=1, mask=mask), -1) #(b,m,1) a_summary = tf.reduce_sum(asp_h * alpha_a, 1) #(b, 2*dim_ote_h) ma_t = ma_t + a_summary o_summary = tf.reduce_sum(asp_h * alpha_o, 1) #(b, 2*dim_ote_h) mo_t = mo_t + o_summary ma_list.append(ma_t) #(b, 2*dim_asp_h) mo_list.append(mo_t) #(b, 2*dim_opn_h) asp_h = za_list[-1] opn_h = zo_list[-1] asp_h = tf.reshape(asp_h, [-1, self.max_len, 2 * self.dim_rel]) opn_h = tf.reshape(opn_h, [-1, self.max_len, 2 * self.dim_rel]) with tf.variable_scope('LSTM-TS'): fw_cell = tf.contrib.rnn.LSTMCell(self.dim_ts_h) bw_cell = tf.contrib.rnn.LSTMCell(self.dim_ts_h) outputs, states = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, asp_h, sequence_length=batch_length, dtype=tf.float32) ts_h = tf.concat(outputs, -1) #(b, m, 2*dim_opn_h) with tf.variable_scope('FC_layer'): asp_h = tf.nn.dropout(asp_h, dropout_rate) #(b, m, 2*dim_ote) opn_h = tf.nn.dropout(opn_h, dropout_rate) #(b, m, 2*dim_ts) ts_h = tf.nn.dropout( ts_h, dropout_rate) #(b, m, 2*dim_asp_h+2*dim_opn_h) asp_h = tf.reshape(asp_h, [-1, asp_h.shape.as_list()[-1]]) opn_h = tf.reshape(opn_h, [-1, opn_h.shape.as_list()[-1]]) ts_h = tf.reshape(ts_h, [-1, ts_h.shape.as_list()[-1]]) asp_pred = nn_utils.fc_layer(asp_h, output_dim=self.dim_ote_y, scope="asp_tagger", reuse=reuse) opn_pred = nn_utils.fc_layer(opn_h, output_dim=self.dim_lm_y, scope="opn_tagger", reuse=reuse) ts_pred = nn_utils.fc_layer(ts_h, output_dim=self.dim_ts_y, scope="ts_tagger", reuse=reuse) return asp_h, ts_h, asp_pred, opn_pred, ts_pred, a_summary, o_summary, tf.squeeze( alpha_a, 2), tf.squeeze(alpha_o, 2)
def fc_model(input_size, output_size): model = {} model['fc1'] = nn.fc_layer(input_size, output_size) model['output'] = None return model