class CNNPredictor: def __init__(self, name= "WikiContrvCNN", input_name=None): if input_name is None: input_name = name self.hp = hyperparams.HPCNN() self.sess = init_session() self.sess.run(tf.global_variables_initializer()) self.dropout_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.seq_max = self.hp.seq_max self.word2idx = cache.load_cache(input_name+".voca") init_emb = cache.load_cache("init_emb_word2vec") self.model = CNN("controv", self.seq_max, 2, [2, 3, 4], 128, init_emb, self.hp.embedding_size, self.dropout_prob) self.input_text = tf.placeholder(tf.int32, shape=[None, self.seq_max], name="comment_input") self.sout = self.model.network(self.input_text) self.tokenize = lambda x: tokenize(x, set(), False) loader = tf.train.Saver() loader.restore(self.sess, cpath.get_model_full_path(name)) def encode(self, docs): def convert(word): if word in self.word2idx: return self.word2idx[word] else: return OOV data = [] for doc in docs: entry = [] for token in self.tokenize(doc): entry.append(convert(token)) entry = entry[:self.seq_max] while len(entry) < self.seq_max: entry.append(PADDING) data.append((entry, 0)) return data def score(self, docs): inputs = self.encode(docs) def forward_run(inputs): batches = get_batches_ex(inputs, self.hp.batch_size, 2) logit_list = [] for batch in batches: x, y, = batch logits, = self.sess.run([self.sout, ], feed_dict={ self.input_text: x, self.dropout_prob: 1.0, }) logit_list.append(logits) return np.concatenate(logit_list) output = forward_run(inputs)[:,1] return output
def get_predictor(): dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") cnn = CNN("agree", sequence_length=FLAGS.comment_length, num_classes=3, filter_sizes=[1, 2, 3], num_filters=64, init_emb=load_local_pickle("init_embedding"), embedding_size=FLAGS.embedding_size, dropout_prob=dropout_keep_prob) input_comment = tf.placeholder(tf.int32, shape=[None, FLAGS.comment_length], name="comment_input") #sout = model.cnn.network(input_comment) sout = cnn.network(input_comment) sess = init_session() batch_size = 512 path = os.path.join(model_path, "runs", "agree", "model-36570") variables = tf.contrib.slim.get_variables_to_restore() for v in variables: print(v.name) loader = tf.train.Saver(variables) loader.restore(sess, path) def predict(comments): batches = get_batches_ex(comments, batch_size, 1) all_scores = [] ticker = TimeEstimator(len(batches)) for batch in batches: scores, = sess.run([sout], feed_dict={ input_comment: batch[0], dropout_keep_prob: 1.0, }) all_scores.append(scores) ticker.tick() return np.concatenate(all_scores) return predict
class ADReaction(): def __init__(self, prior, init_emb): self.comment_length = FLAGS.comment_length self.comment_count = FLAGS.comment_count self.embedding_size = FLAGS.embedding_size self.prior = prior self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.input_comment = tf.placeholder( tf.int32, shape=[None, self.comment_count, self.comment_length], name="input_reaction") self.input_comment_y = tf.placeholder( tf.int32, shape=[None, self.comment_count], name="input_y_comment") # agree label for comments self.input_y = tf.placeholder(tf.int32, shape=[ None, ], name="input_y") # Controversy Label self.cnn = CNN("agree", sequence_length=self.comment_length, num_classes=3, filter_sizes=[1, 2, 3], num_filters=64, init_emb=init_emb, embedding_size=self.embedding_size, dropout_prob=self.dropout_keep_prob) self.score = self.controversy(self.input_comment) self.acc = accuracy(self.score, self.input_y, axis=1) self.agree_logit = self.predict_2d(self.input_comment) self.agree_acc = accuracy(self.agree_logit, self.input_comment_y, axis=2) self.agree_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.agree_logit, labels=self.input_comment_y)) def predict(self, comments): # comments : [None, 30] logits = self.cnn.network(comments) return logits # logit : [None, 3] def predict_2d(self, comments): flat_comments = tf.reshape(comments, [-1, self.comment_length]) logits = self.predict(flat_comments) formatted_logit = tf.reshape(logits, [-1, self.comment_count, 3]) return formatted_logit def controversy(self, comments): formatted_logit = self.predict_2d(comments) avg = tf.reduce_sum(tf.nn.softmax(formatted_logit), axis=1) ad_weights = [[0, 0], [0, 0], [0, 1]] # Only using disagreement as signal self.W = tf.Variable(ad_weights, trainable=False, dtype=tf.float32, name="ad_W") self.b = tf.Variable([0, 0], dtype=tf.float32, name="ad_b") score = tf.nn.xw_plus_b(avg, self.W, self.b) # [None, 2] return score def get_l2_loss(self): return self.cnn.l2_loss