def _serialize_dataset(self, tasks, is_training, split): """Writes out tfrecord examples for the specified tasks.""" dataset_name = "_".join(sorted([task.name for task in tasks])) dataset_name += "_" + split if self._config.distill: dataset_name += "_distill" dataset_prefix = os.path.join(self._config.preprocessed_data_dir, dataset_name) tfrecords_path = dataset_prefix + ".tfrecord" metadata_path = dataset_prefix + ".metadata" batch_size = (self._config.train_batch_size if is_training else self._config.eval_batch_size) utils.log("Loading dataset", dataset_name) n_examples = None sizes = {} if (self._config.use_tfrecords_if_existing and tf.gfile.Exists(metadata_path)): utils.log("Using already-written tfrecords") metadata = utils.load_json(metadata_path) n_examples = metadata["n_examples"] sizes = metadata["sizes"] if n_examples is None: utils.log("Existing tfrecords not found so creating") examples = [] for task in tasks: task_examples = task.get_examples(split) sizes[task.name] = len(task_examples) examples += task_examples last_index = len(examples) - (len(examples) % batch_size) #while len(examples) % batch_size != 0: # examples.append(PaddingInputExample()) examples = examples[: last_index] #drop the instances if batch size does not perfectly split the set if is_training: random.shuffle(examples) n_examples = len(examples) assert n_examples % batch_size == 0 utils.mkdir(tfrecords_path.rsplit("/", 1)[0]) self.serialize_examples(examples, is_training, tfrecords_path) utils.write_json({ "n_examples": n_examples, "sizes": sizes }, metadata_path) input_fn = self._input_fn_builder(tfrecords_path, is_training) if is_training: steps = int(n_examples // batch_size * self._config.num_train_epochs) else: steps = n_examples // batch_size return input_fn, steps, sizes
def write_results(config, results): """Write out evaluate metrics to disk.""" utils.log("Writing results to", config.results_txt) utils.mkdir(config.results_txt.rsplit("/", 1)[0]) utils.write_pickle(results, config.results_pkl) with tf.gfile.GFile(config.results_txt, "w") as f: results_str = "" for trial_results in results: for task_name, task_results in trial_results.items(): results_str += task_name + ": " + " - ".join( ["{:}: {:.2f}".format(k, v) for k, v in task_results.items()]) + "\n" f.write(results_str)