def run(self): batch_size = tf.shape(self.x)[0] inputs_word = tf.nn.embedding_lookup(self.word_embedding, self.x) inputs_domain = tf.nn.embedding_lookup(self.domain_embedding, self.x) inputs = tf.concat([inputs_word, inputs_domain], -1) aspect_prob, opinion_prob, sentiment_prob = self.RACL( inputs, self.position) aspect_value = tf.nn.softmax(aspect_prob, -1) opinion_value = tf.nn.softmax(opinion_prob, -1) senti_value = tf.nn.softmax(sentiment_prob, -1) # AE & OE Regulation Loss reg_cost = tf.reduce_sum( tf.maximum( 0., tf.reduce_sum(aspect_value[:, :, 1:], -1) + tf.reduce_sum(opinion_value[:, :, 1:], -1) - 1.)) / tf.reduce_sum(self.word_mask) # Mask AE & OE Probabilities word_mask = tf.tile(tf.expand_dims(self.word_mask, -1), [1, 1, self.opt.class_num]) aspect_prob = tf.reshape(word_mask * aspect_prob, [-1, self.opt.class_num]) aspect_label = tf.reshape(self.aspect_y, [-1, self.opt.class_num]) opinion_prob = tf.reshape(word_mask * opinion_prob, [-1, self.opt.class_num]) opinion_label = tf.reshape(self.opinion_y, [-1, self.opt.class_num]) # Relation R4 (Only in Training) # In training/validation, the sentiment masks are set to 1.0 only for the aspect terms. # In testing, the sentiment masks are set to 1.0 for all words (except padding ones). senti_mask = tf.tile(tf.expand_dims(self.senti_mask, -1), [1, 1, self.opt.class_num]) # Mask SC Probabilities sentiment_prob = tf.reshape( tf.cast(senti_mask, tf.float32) * sentiment_prob, [-1, self.opt.class_num]) sentiment_label = tf.reshape(self.sentiment_y, [-1, self.opt.class_num]) with tf.name_scope('loss'): tv = tf.trainable_variables() total_para = count_parameter() self.logger.info('>>> total parameter: {}'.format(total_para)) aspect_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=aspect_prob, labels=tf.cast( aspect_label, tf.float32))) opinion_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=opinion_prob, labels=tf.cast( opinion_label, tf.float32))) sentiment_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=sentiment_prob, labels=tf.cast( sentiment_label, tf.float32))) cost = aspect_cost + opinion_cost + sentiment_cost + self.opt.reg_scale * reg_cost with tf.name_scope('train'): global_step = tf.Variable(0, name="tr_global_step", trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=self.opt.learning_rate).minimize( cost, global_step=global_step) with tf.name_scope('predict'): true_ay = tf.reshape(aspect_label, [batch_size, self.opt.max_sentence_len, -1]) pred_ay = tf.reshape(aspect_prob, [batch_size, self.opt.max_sentence_len, -1]) true_oy = tf.reshape(opinion_label, [batch_size, self.opt.max_sentence_len, -1]) pred_oy = tf.reshape(opinion_prob, [batch_size, self.opt.max_sentence_len, -1]) true_sy = tf.reshape(sentiment_label, [batch_size, self.opt.max_sentence_len, -1]) pred_sy = tf.reshape(sentiment_prob, [batch_size, self.opt.max_sentence_len, -1]) saver = tf.train.Saver(max_to_keep=120) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) with tf.Session() as sess: if self.opt.load == 0: init = tf.global_variables_initializer() sess.run(init) else: ckpt = tf.train.get_checkpoint_state('checkpoint/{}'.format( self.opt.task)) saver.restore(sess, ckpt.model_checkpoint_path) train_sets = read_data(self.opt.train_path, self.word_id_mapping, self.opt.max_sentence_len) dev_sets = read_data(self.opt.dev_path, self.word_id_mapping, self.opt.max_sentence_len) test_sets = read_data(self.opt.test_path, self.word_id_mapping, self.opt.max_sentence_len, is_testing=True) aspect_f1_list = [] opinion_f1_list = [] sentiment_acc_list = [] sentiment_f1_list = [] ABSA_f1_list = [] dev_metric_list = [] dev_loss_list = [] for i in range(self.opt.n_iter): 'Train' tr_loss = 0. tr_aloss = 0. tr_oloss = 0. tr_sloss = 0. tr_rloss = 0. if self.opt.load == 0: epoch_start = time.time() for train, num in self.get_batch_data( train_sets, self.opt.batch_size, self.opt.kp1, self.opt.kp2, True, True): tr_eloss, tr_aeloss, tr_oeloss, tr_seloss, tr_reloss, _, step = sess.run( [ cost, aspect_cost, opinion_cost, sentiment_cost, reg_cost, optimizer, global_step ], feed_dict=train) tr_loss += tr_eloss * num tr_aloss += tr_aeloss * num tr_oloss += tr_oeloss * num tr_sloss += tr_seloss * num tr_rloss += tr_reloss * num # if i >= self.opt.warmup_iter: # saver.save(sess, 'checkpoint/{}/RACL.ckpt'.format(self.opt.task), global_step=i) epoch_end = time.time() epoch_time = 'Epoch Time: {:.0f}m {:.0f}s'.format( (epoch_end - epoch_start) // 60, (epoch_end - epoch_start) % 60) 'Test' a_preds, a_labels = [], [] o_preds, o_labels = [], [] s_preds, s_labels = [], [] final_mask = [] for test, _ in self.get_batch_data(test_sets, 200, 1.0, 1.0): _step, t_ay, p_ay, t_oy, p_oy, t_sy, p_sy, e_mask = sess.run( [ global_step, true_ay, pred_ay, true_oy, pred_oy, true_sy, pred_sy, self.word_mask ], feed_dict=test) a_preds.extend(p_ay) a_labels.extend(t_ay) o_preds.extend(p_oy) o_labels.extend(t_oy) s_preds.extend(p_sy) s_labels.extend(t_sy) final_mask.extend(e_mask) aspect_f1, opinion_f1, sentiment_acc, sentiment_f1, ABSA_f1 \ = get_metric(a_labels, a_preds, o_labels, o_preds, s_labels, s_preds, final_mask, 1) aspect_f1_list.append(aspect_f1) opinion_f1_list.append(opinion_f1) sentiment_acc_list.append(sentiment_acc) sentiment_f1_list.append(sentiment_f1) ABSA_f1_list.append(ABSA_f1) 'Dev' dev_loss = 0. dev_aloss = 0. dev_oloss = 0. dev_sloss = 0. dev_rloss = 0. dev_a_preds, dev_a_labels = [], [] dev_o_preds, dev_o_labels = [], [] dev_s_preds, dev_s_labels = [], [] dev_final_mask = [] for dev, num in self.get_batch_data(dev_sets, 200, 1.0, 1.0): dev_eloss, dev_aeloss, dev_oeloss, dev_seloss, dev_reloss, _step, dev_t_ay, dev_p_ay, dev_t_oy, dev_p_oy, dev_t_sy, dev_p_sy, dev_e_mask = \ sess.run([cost, aspect_cost, opinion_cost, sentiment_cost, reg_cost, global_step, true_ay, pred_ay, true_oy, pred_oy, true_sy, pred_sy, self.word_mask], feed_dict=dev) dev_a_preds.extend(dev_p_ay) dev_a_labels.extend(dev_t_ay) dev_o_preds.extend(dev_p_oy) dev_o_labels.extend(dev_t_oy) dev_s_preds.extend(dev_p_sy) dev_s_labels.extend(dev_t_sy) dev_final_mask.extend(dev_e_mask) dev_loss += dev_eloss * num dev_aloss += dev_aeloss * num dev_oloss += dev_oeloss * num dev_sloss += dev_seloss * num dev_rloss += dev_reloss * num dev_aspect_f1, dev_opinion_f1, dev_sentiment_acc, dev_sentiment_f1, dev_ABSA_f1 \ = get_metric(dev_a_labels, dev_a_preds, dev_o_labels, dev_o_preds, dev_s_labels, dev_s_preds, dev_final_mask, 1) if i < self.opt.warmup_iter: dev_metric_list.append(0.) dev_loss_list.append(1000.) else: dev_metric_list.append(dev_ABSA_f1) dev_loss_list.append(dev_loss) if self.opt.load == 0: self.logger.info('\n{:-^80}'.format('Iter' + str(i))) self.logger.info( 'Train: final loss={:.6f}, aspect loss={:.6f}, opinion loss={:.6f}, sentiment loss={:.6f}, reg loss={:.6f}, step={}' .format(tr_loss, tr_aloss, tr_oloss, tr_sloss, tr_rloss, step)) self.logger.info( 'Dev: final loss={:.6f}, aspect loss={:.6f}, opinion loss={:.6f}, sentiment loss={:.6f}, reg loss={:.6f}, step={}' .format(dev_loss, dev_aloss, dev_oloss, dev_sloss, dev_rloss, step)) self.logger.info( 'Dev: aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(dev_aspect_f1, dev_opinion_f1, dev_sentiment_acc, dev_sentiment_f1, dev_ABSA_f1)) self.logger.info( 'Test: aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(aspect_f1, opinion_f1, sentiment_acc, sentiment_f1, ABSA_f1)) self.logger.info( 'Current Max Metrics Index : {} Current Min Loss Index : {} {}' .format(dev_metric_list.index(max(dev_metric_list)), dev_loss_list.index(min(dev_loss_list)), epoch_time)) if self.opt.load == 1: break self.logger.info('\n{:-^80}'.format('Mission Complete')) max_dev_index = dev_metric_list.index(max(dev_metric_list)) self.logger.info('Dev Max Metrics Index: {}'.format(max_dev_index)) self.logger.info( 'aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(aspect_f1_list[max_dev_index], opinion_f1_list[max_dev_index], sentiment_acc_list[max_dev_index], sentiment_f1_list[max_dev_index], ABSA_f1_list[max_dev_index])) min_dev_index = dev_loss_list.index(min(dev_loss_list)) self.logger.info('Dev Min Loss Index: {}'.format(min_dev_index)) self.logger.info( 'aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(aspect_f1_list[min_dev_index], opinion_f1_list[min_dev_index], sentiment_acc_list[min_dev_index], sentiment_f1_list[min_dev_index], ABSA_f1_list[min_dev_index]))
def run(self): batch_size = tf.shape(self.word_mask)[0] aspect_prob, opinion_prob, sentiment_prob = self.RACL_BERT( self.bert_input_ids, self.bert_input_mask, self.bert_segment_ids, self.position) aspect_value = tf.nn.softmax(aspect_prob, -1) opinion_value = tf.nn.softmax(opinion_prob, -1) senti_value = tf.nn.softmax(sentiment_prob, -1) # AE & OE Regulation Loss reg_cost = tf.reduce_sum( tf.maximum( 0., tf.reduce_sum(aspect_value[:, :, 1:], -1) + tf.reduce_sum(opinion_value[:, :, 1:], -1) - 1.)) / tf.reduce_sum(self.word_mask) # Mask AE & OE Probabilities word_mask = tf.tile(tf.expand_dims(self.word_mask, -1), [1, 1, self.opt.class_num]) aspect_prob = tf.reshape(word_mask * aspect_prob, [-1, self.opt.class_num]) aspect_label = tf.reshape(self.aspect_y, [-1, self.opt.class_num]) opinion_prob = tf.reshape(word_mask * opinion_prob, [-1, self.opt.class_num]) opinion_label = tf.reshape(self.opinion_y, [-1, self.opt.class_num]) # Relation R4 (Only in Training) # In training/validation, the sentiment masks are set to 1.0 only for the aspect terms. # In testing, the sentiment masks are set to 1.0 for all words (except padding ones). senti_mask = tf.tile(tf.expand_dims(self.senti_mask, -1), [1, 1, self.opt.class_num]) # Mask SC Probabilities sentiment_prob = tf.reshape( tf.cast(senti_mask, tf.float32) * sentiment_prob, [-1, self.opt.class_num]) sentiment_label = tf.reshape(self.sentiment_y, [-1, self.opt.class_num]) with tf.name_scope('loss'): tv = tf.trainable_variables() for idx, v in enumerate(tv): print('para {}/{}'.format(idx, len(tv)), v) total_para = count_parameter() self.logger.info('>>> total parameter: {}'.format(total_para)) aspect_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=aspect_prob, labels=tf.cast( aspect_label, tf.float32))) opinion_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=opinion_prob, labels=tf.cast( opinion_label, tf.float32))) sentiment_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=sentiment_prob, labels=tf.cast( sentiment_label, tf.float32))) cost = 2 * aspect_cost + opinion_cost + sentiment_cost + self.opt.reg_scale * reg_cost with tf.name_scope('train'): global_step = tf.Variable(0, name="tr_global_step", trainable=False) bert_lr = 0.00001 mine_lr = self.opt.learning_rate mine_lr = tf.train.exponential_decay(mine_lr, global_step, decay_steps=self.decay_step, decay_rate=0.95, staircase=True) bert_vars = tv[:391] mine_vars = tv[391:] bert_opt = bert_optimization.AdamWeightDecayOptimizer( learning_rate=bert_lr) mine_opt = tf.train.AdamOptimizer(mine_lr) grads = tf.gradients(cost, bert_vars + mine_vars) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) bert_grads = grads[:391] mine_grads = grads[391:] # mine_grads = tf.gradients(cost, mine_vars) bert_op = bert_opt.apply_gradients(zip(bert_grads, bert_vars)) mine_op = mine_opt.apply_gradients(zip(mine_grads, mine_vars), global_step=global_step) optimizer = tf.group(bert_op, mine_op) with tf.name_scope('predict'): true_ay = tf.reshape(aspect_label, [batch_size, self.opt.max_sentence_len, -1]) pred_ay = tf.reshape(aspect_prob, [batch_size, self.opt.max_sentence_len, -1]) true_oy = tf.reshape(opinion_label, [batch_size, self.opt.max_sentence_len, -1]) pred_oy = tf.reshape(opinion_prob, [batch_size, self.opt.max_sentence_len, -1]) true_sy = tf.reshape(sentiment_label, [batch_size, self.opt.max_sentence_len, -1]) pred_sy = tf.reshape(sentiment_prob, [batch_size, self.opt.max_sentence_len, -1]) with tf.name_scope('load-bert-large'): # load pre-trained bert-large model saver = tf.train.Saver(max_to_keep=120) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) init_checkpoint = "./bert-large/bert_model.ckpt" use_tpu = False tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = bert_modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) # print(initialized_variable_names) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) with tf.Session() as sess: if self.opt.load == 0: init = tf.global_variables_initializer() sess.run(init) else: ckpt = tf.train.get_checkpoint_state('checkpoint/{}'.format( self.opt.task)) saver.restore(sess, ckpt.model_checkpoint_path) # R4 deactivated train_sets = read_bert_data(self.opt.train_path, self.opt.max_sentence_len, is_testing=True) dev_sets = read_bert_data(self.opt.dev_path, self.opt.max_sentence_len, is_testing=True) test_sets = read_bert_data(self.opt.test_path, self.opt.max_sentence_len, is_testing=True) aspect_f1_list = [] opinion_f1_list = [] sentiment_acc_list = [] sentiment_f1_list = [] ABSA_f1_list = [] dev_metric_list = [] dev_loss_list = [] for i in range(self.opt.n_iter): 'Train' tr_loss = 0. tr_aloss = 0. tr_oloss = 0. tr_sloss = 0. tr_rloss = 0. if self.opt.load == 0: epoch_start = time.time() for train, num in self.get_batch_data( train_sets, self.opt.batch_size, self.opt.kp1, self.opt.kp2, True, True): tr_eloss, tr_aeloss, tr_oeloss, tr_seloss, tr_reloss, _, step = sess.run( [ cost, aspect_cost, opinion_cost, sentiment_cost, reg_cost, optimizer, global_step ], feed_dict=train) tr_loss += tr_eloss * num tr_aloss += tr_aeloss * num tr_oloss += tr_oeloss * num tr_sloss += tr_seloss * num tr_rloss += tr_reloss * num # if i >= self.opt.warmup_iter: # saver.save(sess, 'checkpoint/{}/RACL.ckpt'.format(self.opt.task), global_step=i) epoch_end = time.time() epoch_time = 'Epoch Time: {:.0f}m {:.0f}s'.format( (epoch_end - epoch_start) // 60, (epoch_end - epoch_start) % 60) 'Test' a_preds, a_labels = [], [] o_preds, o_labels = [], [] s_preds, s_labels = [], [] final_mask = [] for test, _ in self.get_batch_data(test_sets, 200, 1.0, 1.0): _step, t_ay, p_ay, t_oy, p_oy, t_sy, p_sy, e_mask = sess.run( [ global_step, true_ay, pred_ay, true_oy, pred_oy, true_sy, pred_sy, self.word_mask ], feed_dict=test) a_preds.extend(p_ay) a_labels.extend(t_ay) o_preds.extend(p_oy) o_labels.extend(t_oy) s_preds.extend(p_sy) s_labels.extend(t_sy) final_mask.extend(e_mask) aspect_f1, opinion_f1, sentiment_acc, sentiment_f1, ABSA_f1 \ = get_metric(a_labels, a_preds, o_labels, o_preds, s_labels, s_preds, final_mask, 1) aspect_f1_list.append(aspect_f1) opinion_f1_list.append(opinion_f1) sentiment_acc_list.append(sentiment_acc) sentiment_f1_list.append(sentiment_f1) ABSA_f1_list.append(ABSA_f1) 'Dev' dev_loss = 0. dev_aloss = 0. dev_oloss = 0. dev_sloss = 0. dev_rloss = 0. dev_a_preds, dev_a_labels = [], [] dev_o_preds, dev_o_labels = [], [] dev_s_preds, dev_s_labels = [], [] dev_final_mask = [] for dev, num in self.get_batch_data(dev_sets, 200, 1.0, 1.0): dev_eloss, dev_aeloss, dev_oeloss, dev_seloss, dev_reloss, _step, dev_t_ay, dev_p_ay, dev_t_oy, dev_p_oy, dev_t_sy, dev_p_sy, dev_e_mask = \ sess.run([cost, aspect_cost, opinion_cost, sentiment_cost, reg_cost, global_step, true_ay, pred_ay, true_oy, pred_oy, true_sy, pred_sy, self.word_mask], feed_dict=dev) dev_a_preds.extend(dev_p_ay) dev_a_labels.extend(dev_t_ay) dev_o_preds.extend(dev_p_oy) dev_o_labels.extend(dev_t_oy) dev_s_preds.extend(dev_p_sy) dev_s_labels.extend(dev_t_sy) dev_final_mask.extend(dev_e_mask) dev_loss += dev_eloss * num dev_aloss += dev_aeloss * num dev_oloss += dev_oeloss * num dev_sloss += dev_seloss * num dev_rloss += dev_reloss * num dev_aspect_f1, dev_opinion_f1, dev_sentiment_acc, dev_sentiment_f1, dev_ABSA_f1 \ = get_metric(dev_a_labels, dev_a_preds, dev_o_labels, dev_o_preds, dev_s_labels, dev_s_preds, dev_final_mask, 1) if i < self.opt.warmup_iter: dev_metric_list.append(0.) dev_loss_list.append(1000.) else: dev_metric_list.append(dev_ABSA_f1) dev_loss_list.append(dev_loss) if self.opt.load == 0: self.logger.info('\n{:-^80}'.format('Iter' + str(i))) self.logger.info( 'Train: final loss={:.6f}, aspect loss={:.6f}, opinion loss={:.6f}, sentiment loss={:.6f}, reg loss={:.6f}, step={}' .format(tr_loss, tr_aloss, tr_oloss, tr_sloss, tr_rloss, step)) self.logger.info( 'Dev: final loss={:.6f}, aspect loss={:.6f}, opinion loss={:.6f}, sentiment loss={:.6f}, reg loss={:.6f}, step={}' .format(dev_loss, dev_aloss, dev_oloss, dev_sloss, dev_rloss, step)) self.logger.info( 'Dev: aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(dev_aspect_f1, dev_opinion_f1, dev_sentiment_acc, dev_sentiment_f1, dev_ABSA_f1)) self.logger.info( 'Test: aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(aspect_f1, opinion_f1, sentiment_acc, sentiment_f1, ABSA_f1)) self.logger.info( 'Current Max Metrics Index : {} Current Min Loss Index : {} {}' .format(dev_metric_list.index(max(dev_metric_list)), dev_loss_list.index(min(dev_loss_list)), epoch_time)) if self.opt.load == 1: break self.logger.info('\n{:-^80}'.format('Mission Complete')) max_dev_index = dev_metric_list.index(max(dev_metric_list)) self.logger.info('Dev Max Metrics Index: {}'.format(max_dev_index)) self.logger.info( 'aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(aspect_f1_list[max_dev_index], opinion_f1_list[max_dev_index], sentiment_acc_list[max_dev_index], sentiment_f1_list[max_dev_index], ABSA_f1_list[max_dev_index])) min_dev_index = dev_loss_list.index(min(dev_loss_list)) self.logger.info('Dev Min Loss Index: {}'.format(min_dev_index)) self.logger.info( 'aspect f1={:.4f}, opinion f1={:.4f}, sentiment acc=={:.4f}, sentiment f1=={:.4f}, ABSA f1=={:.4f},' .format(aspect_f1_list[min_dev_index], opinion_f1_list[min_dev_index], sentiment_acc_list[min_dev_index], sentiment_f1_list[min_dev_index], ABSA_f1_list[min_dev_index]))
batch_x_1, batch_y_1 = gen_doc_1.next() batch_x_2, batch_y_2 = gen_doc_2.next() doc_model.train_on_batch([batch_x_1, batch_x_2], [batch_y_1, batch_y_2]) tr_time = time() - t0 logger.info('Epoch %d, train: %is' % (ii, tr_time)) print loss, loss_aspect, loss_sentiment y_pred_aspect, y_pred_sentiment = aspect_model.predict( [dev_x, dev_y_opinion, np.zeros((len(dev_x), overall_maxlen))]) f_aspect, f_opinion, acc_s, f_s, f_absa \ = get_metric(dev_y_aspect, y_pred_aspect, dev_y_sentiment, y_pred_sentiment, dev_y_mask, args.train_op) logger.info( 'Validation results -- [Aspect f1]: %.4f, [Opinion f1]: %.4f, [Sentiment acc]: %.4f, [Sentiment f1]: %.4f, [Overall f1]: %.4f' % (f_aspect, f_opinion, acc_s, f_s, f_absa)) if f_absa > best_dev_metric and ii > 60: best_dev_metric = f_absa save_model = True else: save_model = False y_pred_aspect, y_pred_sentiment = aspect_model.predict( [test_x, test_y_opinion, np.zeros((len(test_x), overall_maxlen))])