Ejemplo n.º 1
0
    def __init__(self, tensor_dict, config, opt=None):
        self.learning_rate = config.learning_rate
        self.l2_reg_lambda = config.l2_reg_lambda
        self.batch_size = config.batch_size
        self.vocab_size = config.vocab_size
        self.embed_size = config.embedding_size
        self.filter_sizes = config.filter_sizes
        self.num_filters = config.num_filters
        self.fc_sizes = config.fc_sizes
        self.num_classes = config.num_classes
        self.keep_prob_for_train = config.keep_prob
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        ## inputs & placeholder
        inputs = tensor_dict["text"]
        labels = tensor_dict["label"]
        self.keep_prob = tf.placeholder_with_default(1.0,
                                                     shape=None,
                                                     name="keep_prob")

        ## sentence embedding
        inputs_embed = nn_layers.cnn_text_embedding(inputs, config.vocab_size,
                                                    config.embedding_size,
                                                    config.filter_sizes,
                                                    config.num_filters,
                                                    self.keep_prob)
        inputs_embed = nn_layers.multi_full_connect(inputs_embed,
                                                    config.fc_sizes,
                                                    activation='relu',
                                                    keep_prob=self.keep_prob)
        logits = nn_layers.full_connect(inputs_embed,
                                        config.num_classes,
                                        name='output_layer')

        ## loss and optim
        self.loss = tf.losses.sparse_softmax_cross_entropy(labels, logits)
        tf.summary.scalar('loss', self.loss)
        if not opt:
            optim = nn_layers.get_optimizer(config.optimizer,
                                            learning_rate=self.learning_rate)
        else:
            optim = opt
        self.train_op = optim.minimize(self.loss, global_step=self.global_step)

        ## score & infers
        self.infers = tf.argmax(logits, 1)
        self.score = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))
        tf.summary.scalar('score', self.score)

        ## saver & summary
        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=config.max_to_keep)
        self.merged_summary = tf.summary.merge_all()
Ejemplo n.º 2
0
    def __init__(self, tensor_dict, config, opt=None):
        self.learning_rate = config.learning_rate
        self.l2_reg_lambda = config.l2_reg_lambda
        self.batch_size = config.batch_size
        self.vocab_size = config.vocab_size
        self.embed_size = config.embedding_size
        self.fc_sizes = config.fc_sizes
        self.num_classes = config.num_classes
        self.keep_prob_for_train = config.keep_prob
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        with tf.name_scope('input'):
            self.query_inputs = tensor_dict["query"]
            self.title_inputs_pos = tensor_dict["title_pos"]
            self.title_inputs_neg = tensor_dict["title_neg"]
            self.labels = tf.constant(1,
                                      tf.int32, [self.batch_size, 1],
                                      name="label")
            self.keep_prob = tf.placeholder_with_default(1.0,
                                                         shape=None,
                                                         name="keep_prob")

        with tf.variable_scope('sentence_embedding'):
            query_embed = nn_layers.sparse_text_embedding(
                self.query_inputs, [self.vocab_size, self.embed_size])
            tf.get_variable_scope().reuse_variables()
            title_pos_embed = nn_layers.sparse_text_embedding(
                self.title_inputs_pos, [self.vocab_size, self.embed_size])
            title_neg_embed = nn_layers.sparse_text_embedding(
                self.title_inputs_neg, [self.vocab_size, self.embed_size])

        with tf.variable_scope('sentence_similarity'):
            pos_pair_sim = nn_layers.mlp_similarity(query_embed,
                                                    title_pos_embed,
                                                    self.fc_sizes,
                                                    self.keep_prob)
            tf.get_variable_scope().reuse_variables()
            neg_pair_sim = nn_layers.mlp_similarity(query_embed,
                                                    title_neg_embed,
                                                    self.fc_sizes,
                                                    self.keep_prob)

        with tf.name_scope('predictions'):
            sim_diff = pos_pair_sim - neg_pair_sim
            predictions = tf.sigmoid(sim_diff)
            self.infers = pos_pair_sim

        ## loss and optim
        with tf.name_scope('loss'):
            self.loss = nn_layers.cross_entropy_loss_with_reg(
                self.labels, predictions)
            tf.summary.scalar('loss', self.loss)

        with tf.name_scope('optim'):
            if not opt:
                optim = nn_layers.get_optimizer(
                    config.optimizer, learning_rate=self.learning_rate)
            else:
                optim = opt
            self.train_op = optim.minimize(self.loss,
                                           global_step=self.global_step)

        with tf.name_scope('score'):
            self.correct_num = tf.reduce_sum(
                tf.cast(tf.greater(predictions, 0.5), tf.float32))
            self.wrong_num = tf.reduce_sum(
                tf.cast(tf.less(predictions, 0.5), tf.float32))
            self.score = self.correct_num / (self.wrong_num + 0.0001)
            tf.summary.scalar('score', self.score)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=config.max_to_keep)
        self.merged = tf.summary.merge_all()
    def __init__(self, tensor_dict, config, opt=None):
        self.learning_rate = config.learning_rate
        self.l2_reg_lambda = config.l2_reg_lambda
        self.batch_size = config.batch_size
        self.vocab_size = config.vocab_size
        self.embed_size = config.embedding_size
        self.fc_sizes = config.fc_sizes
        self.keep_prob_for_train = config.keep_prob
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        with tf.name_scope('inputs'):
            query = tensor_dict["query"]
            titles = [tensor_dict["title"]]
            titles.append(tensor_dict["title_neg1"])
            titles.append(tensor_dict["title_neg2"])
            titles.append(tensor_dict["title_neg3"])
            titles.append(tensor_dict["title_neg4"])
            labels = tf.constant(0, tf.int64, [self.batch_size], name="label")
            self.keep_prob = tf.placeholder_with_default(1.0,
                                                         shape=None,
                                                         name="keep_prob")

        with tf.variable_scope('sentence_embedding'):
            tmp_embed = nn_layers.sparse_text_embedding(
                query, [self.vocab_size, self.embed_size])
            query_embed = nn_layers.multi_full_connect(tmp_embed,
                                                       config.fc_sizes,
                                                       activation='relu')
            tf.get_variable_scope().reuse_variables()
            titles_embed = []
            for i in range(len(titles)):
                tmp_embed = nn_layers.sparse_text_embedding(
                    titles[i], [self.vocab_size, self.embed_size])
                titles_embed.append(
                    nn_layers.multi_full_connect(tmp_embed,
                                                 config.fc_sizes,
                                                 activation='relu'))

        with tf.variable_scope('sentence_similarity'):
            pairs_sim = []
            for i in range(len(titles)):
                if i > 0:
                    tf.get_variable_scope().reuse_variables()
                pairs_sim.append(
                    nn_layers.cosine_similarity(query_embed, titles_embed[i]))
            logits = tf.concat(pairs_sim, 1)

        with tf.name_scope('score'):
            self.correct_num = tf.reduce_sum(
                tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))
            self.wrong_num = self.batch_size - self.correct_num
            self.score = self.correct_num / (self.wrong_num + 0.0001)
            self.infers = pairs_sim[0]
            tf.summary.scalar('score', self.score)

        ## loss and optim
        with tf.name_scope('loss'):  # logits: (?,4)  labels: (1000,)
            self.loss = tf.losses.sparse_softmax_cross_entropy(labels, logits)
            tf.summary.scalar('loss', self.loss)

        with tf.name_scope('optim'):
            if not opt:
                optim = nn_layers.get_optimizer(
                    config.optimizer, learning_rate=self.learning_rate)
            else:
                optim = opt
            self.train_op = optim.minimize(self.loss,
                                           global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=config.max_to_keep)
        self.merged = tf.summary.merge_all()