Exemple #1
0
class CNNPredictor:
    def __init__(self, name= "WikiContrvCNN", input_name=None):
        if input_name is None:
            input_name = name
        self.hp = hyperparams.HPCNN()
        self.sess = init_session()
        self.sess.run(tf.global_variables_initializer())
        self.dropout_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.seq_max = self.hp.seq_max
        self.word2idx = cache.load_cache(input_name+".voca")
        init_emb = cache.load_cache("init_emb_word2vec")
        self.model = CNN("controv", self.seq_max, 2, [2, 3, 4], 128,
                         init_emb, self.hp.embedding_size, self.dropout_prob)
        self.input_text = tf.placeholder(tf.int32,
                                       shape=[None, self.seq_max],
                                       name="comment_input")
        self.sout = self.model.network(self.input_text)
        self.tokenize = lambda x: tokenize(x, set(), False)

        loader = tf.train.Saver()
        loader.restore(self.sess, cpath.get_model_full_path(name))

    def encode(self, docs):

        def convert(word):
            if word in self.word2idx:
                return self.word2idx[word]
            else:
                return OOV

        data = []
        for doc in docs:
            entry = []
            for token in self.tokenize(doc):
                entry.append(convert(token))
            entry = entry[:self.seq_max]
            while len(entry) < self.seq_max:
                entry.append(PADDING)
            data.append((entry, 0))
        return data

    def score(self, docs):
        inputs = self.encode(docs)

        def forward_run(inputs):
            batches = get_batches_ex(inputs, self.hp.batch_size, 2)
            logit_list = []
            for batch in batches:
                x, y, = batch
                logits,  = self.sess.run([self.sout, ],
                                           feed_dict={
                                               self.input_text: x,
                                               self.dropout_prob: 1.0,
                                           })
                logit_list.append(logits)
            return np.concatenate(logit_list)

        output = forward_run(inputs)[:,1]
        return output
Exemple #2
0
def get_predictor():
    dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
    cnn = CNN("agree",
              sequence_length=FLAGS.comment_length,
              num_classes=3,
              filter_sizes=[1, 2, 3],
              num_filters=64,
              init_emb=load_local_pickle("init_embedding"),
              embedding_size=FLAGS.embedding_size,
              dropout_prob=dropout_keep_prob)
    input_comment = tf.placeholder(tf.int32,
                                   shape=[None, FLAGS.comment_length],
                                   name="comment_input")
    #sout = model.cnn.network(input_comment)
    sout = cnn.network(input_comment)
    sess = init_session()
    batch_size = 512
    path = os.path.join(model_path, "runs", "agree", "model-36570")
    variables = tf.contrib.slim.get_variables_to_restore()
    for v in variables:
        print(v.name)
    loader = tf.train.Saver(variables)
    loader.restore(sess, path)

    def predict(comments):
        batches = get_batches_ex(comments, batch_size, 1)
        all_scores = []
        ticker = TimeEstimator(len(batches))
        for batch in batches:
            scores, = sess.run([sout],
                               feed_dict={
                                   input_comment: batch[0],
                                   dropout_keep_prob: 1.0,
                               })
            all_scores.append(scores)
            ticker.tick()

        return np.concatenate(all_scores)

    return predict
Exemple #3
0
class ADReaction():
    def __init__(self, prior, init_emb):
        self.comment_length = FLAGS.comment_length
        self.comment_count = FLAGS.comment_count
        self.embedding_size = FLAGS.embedding_size
        self.prior = prior
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        self.input_comment = tf.placeholder(
            tf.int32,
            shape=[None, self.comment_count, self.comment_length],
            name="input_reaction")

        self.input_comment_y = tf.placeholder(
            tf.int32, shape=[None, self.comment_count],
            name="input_y_comment")  # agree label for comments

        self.input_y = tf.placeholder(tf.int32, shape=[
            None,
        ], name="input_y")  # Controversy Label

        self.cnn = CNN("agree",
                       sequence_length=self.comment_length,
                       num_classes=3,
                       filter_sizes=[1, 2, 3],
                       num_filters=64,
                       init_emb=init_emb,
                       embedding_size=self.embedding_size,
                       dropout_prob=self.dropout_keep_prob)
        self.score = self.controversy(self.input_comment)
        self.acc = accuracy(self.score, self.input_y, axis=1)
        self.agree_logit = self.predict_2d(self.input_comment)
        self.agree_acc = accuracy(self.agree_logit,
                                  self.input_comment_y,
                                  axis=2)
        self.agree_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.agree_logit, labels=self.input_comment_y))

    def predict(self, comments):
        # comments : [None, 30]
        logits = self.cnn.network(comments)
        return logits  # logit : [None, 3]

    def predict_2d(self, comments):
        flat_comments = tf.reshape(comments, [-1, self.comment_length])
        logits = self.predict(flat_comments)
        formatted_logit = tf.reshape(logits, [-1, self.comment_count, 3])
        return formatted_logit

    def controversy(self, comments):
        formatted_logit = self.predict_2d(comments)
        avg = tf.reduce_sum(tf.nn.softmax(formatted_logit), axis=1)
        ad_weights = [[0, 0], [0, 0], [0,
                                       1]]  # Only using disagreement as signal
        self.W = tf.Variable(ad_weights,
                             trainable=False,
                             dtype=tf.float32,
                             name="ad_W")
        self.b = tf.Variable([0, 0], dtype=tf.float32, name="ad_b")
        score = tf.nn.xw_plus_b(avg, self.W, self.b)  # [None, 2]
        return score

    def get_l2_loss(self):
        return self.cnn.l2_loss