class agent(): def __init__(self,is_training): # Place Holder self.word = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_word') self.pos1 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos1') self.pos2 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos2') self.length = tf.placeholder(dtype=tf.int32, shape=[None], name='input_length') self.mask = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_mask') self.label = tf.placeholder(dtype=tf.int32, shape=[None], name='label') self.label_for_select = tf.placeholder(dtype=tf.int32, shape=[None], name='label_for_select') self.scope = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size + 1], name='scope') self.weights = tf.placeholder(dtype=tf.float32, shape=[None]) self.data_word_vec = np.load(os.path.join(FLAGS.export_path, 'vec.npy')) self.reward_holder = tf.placeholder(dtype = tf.float32, shape=[None],name='reward') self.action_holder = tf.placeholder(dtype = tf.int32, shape=[None],name='action') # Network self.embedding = Embedding(is_training, self.data_word_vec, self.word, self.pos1, self.pos2) self.encoder = Encoder(is_training, FLAGS.drop_prob) self.selector = Selector(FLAGS.num_classes, is_training, FLAGS.drop_prob) self.classifier = Classifier(is_training, self.label, self.weights) #compute self.word_embedding = self.embedding.word_embedding() self.pos_embedding = self.embedding.pos_embedding() self.embedding = self.embedding.concat_embedding(self.word_embedding, self.pos_embedding) self.x = self.encoder.cnn(self.embedding, FLAGS.hidden_size, self.mask, activation=tf.nn.relu) self.logit, self.repre = self.selector.no_bag(self.x) self.outputvalue=self.classifier.outputvalue(self.logit) self.output = self.classifier.output(self.logit) self.softmax_outputs=tf.nn.softmax(self.logit) self.action_onehot=tf.one_hot(indices=self.action_holder,depth=FLAGS.num_classes,dtype=tf.float32) self.step=tf.multiply(self.softmax_outputs,self.action_onehot) self.action_outputvalue=tf.reduce_sum(self.step,axis=1) self.temp=tf.log(self.action_outputvalue)*self.reward_holder self.loss=-tf.reduce_mean(tf.log(self.action_outputvalue)*self.reward_holder)#reduce_mean里面的是188个句子的loss self.loss_pre = self.classifier.softmax_cross_entropy(self.logit) self.optimizer_pre = tf.train.GradientDescentOptimizer(0.5) self.grads_and_vars = self.optimizer_pre.compute_gradients(self.loss_pre) self.train_op = self.optimizer_pre.apply_gradients(self.grads_and_vars) self.optimizer = tf.train.AdamOptimizer(0.0001) self.train_op_rl=self.optimizer.minimize(self.loss) self.tvars = tf.trainable_variables()
class PCNN(): def __init__(self, is_training): #Place Holder self.word = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_word') self.pos1 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos1') self.pos2 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos2') self.length = tf.placeholder(dtype=tf.int32, shape=[None], name='input_length') self.mask = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_mask') self.scope = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size + 1], name='scope') self.bag_label = tf.placeholder(dtype=tf.int32, shape=[None], name='bag_label') self.sentence_label = tf.placeholder(dtype=tf.int32, shape=[None], name='sentence_label') self.label_weights = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size]) self.data_word_vec = np.load(os.path.join(FLAGS.export_path, 'vec.npy')) #Network self.embedding = Embedding(is_training, self.data_word_vec, self.word, self.pos1, self.pos2) self.encoder = Encoder(is_training, FLAGS.drop_prob) self.selector = Selector(FLAGS.num_classes, is_training, FLAGS.drop_prob) self.classifier = Classifier(is_training, self.bag_label, self.label_weights) #compute self.word_embedding = self.embedding.word_embedding() self.pos_embedding = self.embedding.pos_embedding() self.embedding = self.embedding.concat_embedding( self.word_embedding, self.pos_embedding) self.x = self.encoder.pcnn(self.embedding, FLAGS.hidden_size, self.mask, activation=tf.nn.relu) self.logit, self.repre = self.selector.attention( self.x, self.scope, self.sentence_label) #用于判断ds和selected哪一个好,与优化无关 self.label_onehot = tf.one_hot(indices=self.bag_label, depth=FLAGS.num_classes, dtype=tf.float32) self.bag_loss_temp = tf.nn.softmax_cross_entropy_with_logits( labels=self.label_onehot, logits=self.logit) self.bag_loss = tf.reshape(self.bag_loss_temp, [1, -1]) self.loss_mean = tf.reduce_mean(self.bag_loss) #计算reward self.softmax_output = tf.nn.softmax(self.logit) self.reward = tf.log( tf.reduce_sum(self.label_onehot * self.softmax_output, axis=1)) #self.loss_mine = -tf.reduce_mean(self.reward, axis=0)这个就是loss一样的,只是没有加上权重 #计算梯度下降 self.loss = self.classifier.softmax_cross_entropy(self.logit) #self.loss_one = self.classifier.softmax_cross_entropy(self.logit_one) self.output = self.classifier.output(self.logit) #self.output_one = self.classifier.output(self.logit_one) self.outputvalue = self.classifier.outputvalue(self.logit) self.test_output = tf.argmax(self.logit, 1) #输出什么关系 self.test_outputvalue = tf.reduce_max(self.logit, axis=1) #输出关系的概率 # Optimizer self.global_step = tf.Variable(0, name='global_step', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) self.optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.train_op = self.optimizer.apply_gradients( self.grads_and_vars, global_step=self.global_step)