Ejemplo n.º 1
0
    def __init__(self, queue_size, epoch_num, vocabulary_size, vocabulary_block_num, hash_feature_id, factor_num,
                 init_value_range, loss_type, optimizer, batch_size, factor_lambda, bias_lambda):
        with self.main_ps_device():
            self.file_queue = tf.FIFOQueue(queue_size, [tf.int32, tf.bool, tf.string, tf.string],
                                           shared_name='global_queue')

        with self.default_device():
            self.finished_worker_num = tf.Variable(0)
            self.incre_finshed_worker_num = self.finished_worker_num.assign_add(1, True)
            self.model_loaded = tf.Variable(False)
            self.set_model_loaded = self.model_loaded.assign(True)
            self.training_stat = []
            self.validation_stat = []
            for i in range(epoch_num):
                self.training_stat.append(ModelStat('training_%d' % i))
                self.validation_stat.append(ModelStat('validation_%d' % i))

            self.epoch_id = tf.placeholder(dtype=tf.int32)
            self.is_training = tf.placeholder(dtype=tf.bool)
            self.data_file = tf.placeholder(dtype=tf.string)
            self.weight_file = tf.placeholder(dtype=tf.string)
            self.file_enqueue_op = self.file_queue.enqueue(
                (self.epoch_id, self.is_training, self.data_file, self.weight_file))
            self.file_dequeue_op = self.file_queue.dequeue()
            self.file_close_queue_op = self.file_queue.close()

            self.vocab_blocks = []
            vocab_size_per_block = int(vocabulary_size / vocabulary_block_num + 1)
            for i in range(vocabulary_block_num):
                self.vocab_blocks.append(tf.Variable(
                    tf.random_uniform([vocab_size_per_block, factor_num + 1], -init_value_range, init_value_range),
                    name='vocab_block_%d' % i))
            self.file_id = tf.placeholder(dtype=tf.int32)
            labels, weights, ori_ids, feature_ids, feature_vals, feature_poses = fm_ops.fm_parser(self.file_id,
                                                                                                  self.data_file,
                                                                                                  self.weight_file,
                                                                                                  batch_size,
                                                                                                  vocabulary_size,
                                                                                                  hash_feature_id)
            self.example_num = tf.size(labels)
            local_params = tf.nn.embedding_lookup(self.vocab_blocks, ori_ids)
            self.pred_score, reg_score = fm_ops.fm_scorer(feature_ids, local_params, feature_vals, feature_poses,
                                                          factor_lambda, bias_lambda)
            if loss_type == 'logistic':
                self.loss = tf.reduce_sum(
                    weights * tf.nn.sigmoid_cross_entropy_with_logits(logits=self.pred_score, labels=labels))
            elif loss_type == 'mse':
                self.loss = tf.reduce_sum(weights * tf.square(self.pred_score - labels))
            else:
                self.loss = None
            if optimizer != None:
                self.opt = optimizer.minimize(self.loss + reg_score)
            self.init_vars = tf.initialize_all_variables()
            self.saver = tf.train.Saver(self.vocab_blocks)
Ejemplo n.º 2
0
def predict(sess, predict_files, score_path, model_path, model_version,
            batch_size, vocabulary_size, hash_feature_id):
    with sess as sess:
        if not os.path.exists(score_path):
            os.mkdir(score_path)

        export_path = os.path.join(tf.compat.as_bytes(model_path),
                                   tf.compat.as_bytes(model_version))

        meta_graph_def = tf.saved_model.loader.load(
            sess, [tf.saved_model.tag_constants.SERVING], export_path)
        signature = meta_graph_def.signature_def
        signature_key = 'predict_score'
        ori_ids_tensor_name = signature[signature_key].inputs['ori_ids'].name
        feature_ids_tensor_name = signature[signature_key].inputs[
            'feature_ids'].name
        feature_vals_tensor_name = signature[signature_key].inputs[
            'feature_vals'].name
        feature_pos_tensor_name = signature[signature_key].inputs[
            'feature_pos'].name
        pred_score_tensor_name = signature[signature_key].outputs[
            'pred_score'].name

        ori_ids = sess.graph.get_tensor_by_name(ori_ids_tensor_name)
        feature_ids = sess.graph.get_tensor_by_name(feature_ids_tensor_name)
        feature_vals = sess.graph.get_tensor_by_name(feature_vals_tensor_name)
        feature_poses = sess.graph.get_tensor_by_name(feature_pos_tensor_name)
        pred_score = sess.graph.get_tensor_by_name(pred_score_tensor_name)

        file_id = tf.placeholder(dtype=tf.int32)
        data_file = tf.placeholder(dtype=tf.string)
        weight_file = tf.placeholder(dtype=tf.string)

        try:
            fid = 0
            for fname in predict_files:
                score_file = score_path + '/' + os.path.basename(
                    fname) + '.score'
                print 'Start processing %s, scores written to %s ...' % (
                    fname, score_file)
                labels_t, weights_t, ori_ids_t, feature_ids_t, feature_vals_t, feature_poses_t = fm_ops.fm_parser(
                    file_id, data_file, weight_file, batch_size,
                    vocabulary_size, hash_feature_id)
                with open(score_file, 'w') as o:
                    while True:
                        labels_, weights_, ori_ids_, feature_ids_, feature_vals_, feature_poses_ = sess.run(
                            [
                                labels_t, weights_t, ori_ids_t, feature_ids_t,
                                feature_vals_t, feature_poses_t
                            ],
                            feed_dict={
                                file_id: fid,
                                data_file: fname,
                                weight_file: ''
                            })
                        if len(labels_) == 0:
                            break
                        instance_score = sess.run(
                            pred_score, {
                                ori_ids: ori_ids_,
                                feature_ids: feature_ids_,
                                feature_vals: feature_vals_,
                                feature_poses: feature_poses_
                            })
                        for score in instance_score:
                            o.write(str(score) + '\n')
            fid += 1
        except tf.errors.OutOfRangeError:
            pass