Exemple #1
0
    def load_validation_data(self):
        if len(self.validation_data_files) == 0:
            return
        print('Preloading validation data...')
        data_lines = []
        for data_file in self.validation_data_files:
            with open(data_file) as f:
                data_lines.extend(f.readlines())
        data_lines = [l.strip('\n') for l in data_lines]

        weight_lines = []
        for weight_file in self.validation_weight_files:
            with open(weight_file) as f:
                weight_lines.extend(f.readlines())
        weight_lines = [l.strip('\n') for l in weight_lines]

        labels, sizes, feature_ids, feature_vals = fm_parser(
            tf.constant(data_lines, dtype=tf.string), self.vocabulary_size)
        ori_ids, feature_ids = tf.unique(feature_ids)
        feature_poses = tf.concat([[0], tf.cumsum(sizes)], 0)

        if len(weight_lines) == 0:
            weights = tf.ones(tf.shape(labels), tf.float32)
        else:
            weights = tf.string_to_number(
                tf.constant(weight_lines, dtype=tf.string), tf.float32)

        self.validation_data = dict(
            zip([
                'labels', 'weights', 'feature_ids', 'ori_ids', 'feature_vals',
                'feature_poses'
            ], [
                labels, weights, feature_ids, ori_ids, feature_vals,
                feature_poses
            ]))
Exemple #2
0
    def predict(self, data_lines):
        labels, sizes, feature_ids, feature_vals = fm_parser(
            data_lines, self.vocabulary_size)

        ori_ids, feature_ids = tf.unique(feature_ids)
        feature_poses = tf.concat([[0], tf.cumsum(sizes)], 0)
        local_params = tf.nn.embedding_lookup(self.vocab_blocks, ori_ids)

        pred_score, reg_score = fm_scorer(feature_ids, local_params,
                                          feature_vals, feature_poses,
                                          self.factor_lambda, self.bias_lambda)

        return pred_score
Exemple #3
0
    def _shuffle_input(self, thread_idx, train_file_queue, weight_file_queue,
                       ex_q):
        with tf.name_scope("shuffled_%s" % (thread_idx, )):
            train_reader = tf.TextLineReader()
            _, data_lines = train_reader.read_up_to(train_file_queue,
                                                    self.batch_size)

            min_after_dequeue = 3 * self.batch_size
            capacity = int(min_after_dequeue + self.batch_size * 1.5)

            if weight_file_queue is not None:
                weight_reader = tf.TextLineReader()
                _, weight_lines = weight_reader.read_up_to(
                    weight_file_queue, self.batch_size)

                data_lines_batch, weight_lines_batch = tf.train.shuffle_batch(
                    [data_lines, weight_lines],
                    self.batch_size,
                    capacity,
                    min_after_dequeue,
                    enqueue_many=True,
                    allow_smaller_final_batch=True)

                weights = tf.string_to_number(weight_lines_batch, tf.float32)
            else:
                data_lines_batch = tf.train.shuffle_batch(
                    [data_lines],
                    self.batch_size,
                    capacity,
                    min_after_dequeue,
                    enqueue_many=True,
                    allow_smaller_final_batch=True)
                weights = tf.ones(tf.shape(data_lines_batch), tf.float32)

            labels, sizes, feature_ids, feature_vals = fm_parser(
                data_lines_batch, self.vocabulary_size)
            ori_ids, feature_ids = tf.unique(feature_ids)
            feature_poses = tf.concat([[0], tf.cumsum(sizes)], 0)

            enq = ex_q.enqueue([
                labels, weights, feature_ids, ori_ids, feature_vals,
                feature_poses
            ])
            return enq