def load_validation_data(self): if len(self.validation_data_files) == 0: return print('Preloading validation data...') data_lines = [] for data_file in self.validation_data_files: with open(data_file) as f: data_lines.extend(f.readlines()) data_lines = [l.strip('\n') for l in data_lines] weight_lines = [] for weight_file in self.validation_weight_files: with open(weight_file) as f: weight_lines.extend(f.readlines()) weight_lines = [l.strip('\n') for l in weight_lines] labels, sizes, feature_ids, feature_vals = fm_parser( tf.constant(data_lines, dtype=tf.string), self.vocabulary_size) ori_ids, feature_ids = tf.unique(feature_ids) feature_poses = tf.concat([[0], tf.cumsum(sizes)], 0) if len(weight_lines) == 0: weights = tf.ones(tf.shape(labels), tf.float32) else: weights = tf.string_to_number( tf.constant(weight_lines, dtype=tf.string), tf.float32) self.validation_data = dict( zip([ 'labels', 'weights', 'feature_ids', 'ori_ids', 'feature_vals', 'feature_poses' ], [ labels, weights, feature_ids, ori_ids, feature_vals, feature_poses ]))
def predict(self, data_lines): labels, sizes, feature_ids, feature_vals = fm_parser( data_lines, self.vocabulary_size) ori_ids, feature_ids = tf.unique(feature_ids) feature_poses = tf.concat([[0], tf.cumsum(sizes)], 0) local_params = tf.nn.embedding_lookup(self.vocab_blocks, ori_ids) pred_score, reg_score = fm_scorer(feature_ids, local_params, feature_vals, feature_poses, self.factor_lambda, self.bias_lambda) return pred_score
def _shuffle_input(self, thread_idx, train_file_queue, weight_file_queue, ex_q): with tf.name_scope("shuffled_%s" % (thread_idx, )): train_reader = tf.TextLineReader() _, data_lines = train_reader.read_up_to(train_file_queue, self.batch_size) min_after_dequeue = 3 * self.batch_size capacity = int(min_after_dequeue + self.batch_size * 1.5) if weight_file_queue is not None: weight_reader = tf.TextLineReader() _, weight_lines = weight_reader.read_up_to( weight_file_queue, self.batch_size) data_lines_batch, weight_lines_batch = tf.train.shuffle_batch( [data_lines, weight_lines], self.batch_size, capacity, min_after_dequeue, enqueue_many=True, allow_smaller_final_batch=True) weights = tf.string_to_number(weight_lines_batch, tf.float32) else: data_lines_batch = tf.train.shuffle_batch( [data_lines], self.batch_size, capacity, min_after_dequeue, enqueue_many=True, allow_smaller_final_batch=True) weights = tf.ones(tf.shape(data_lines_batch), tf.float32) labels, sizes, feature_ids, feature_vals = fm_parser( data_lines_batch, self.vocabulary_size) ori_ids, feature_ids = tf.unique(feature_ids) feature_poses = tf.concat([[0], tf.cumsum(sizes)], 0) enq = ex_q.enqueue([ labels, weights, feature_ids, ori_ids, feature_vals, feature_poses ]) return enq