コード例 #1
0
    def _serialize_dataset(self, tasks, is_training, split):
        """Writes out tfrecord examples for the specified tasks."""
        dataset_name = "_".join(sorted([task.name for task in tasks]))
        dataset_name += "_" + split
        if self._config.distill:
            dataset_name += "_distill"
        dataset_prefix = os.path.join(self._config.preprocessed_data_dir,
                                      dataset_name)
        tfrecords_path = dataset_prefix + ".tfrecord"
        metadata_path = dataset_prefix + ".metadata"
        batch_size = (self._config.train_batch_size
                      if is_training else self._config.eval_batch_size)

        utils.log("Loading dataset", dataset_name)
        n_examples = None
        sizes = {}
        if (self._config.use_tfrecords_if_existing
                and tf.gfile.Exists(metadata_path)):
            utils.log("Using already-written tfrecords")
            metadata = utils.load_json(metadata_path)
            n_examples = metadata["n_examples"]
            sizes = metadata["sizes"]

        if n_examples is None:
            utils.log("Existing tfrecords not found so creating")
            examples = []
            for task in tasks:
                task_examples = task.get_examples(split)
                sizes[task.name] = len(task_examples)
                examples += task_examples
            last_index = len(examples) - (len(examples) % batch_size)
            #while len(examples) % batch_size != 0:
            #  examples.append(PaddingInputExample())
            examples = examples[:
                                last_index]  #drop the instances if batch size does not perfectly split the set
            if is_training:
                random.shuffle(examples)
            n_examples = len(examples)
            assert n_examples % batch_size == 0
            utils.mkdir(tfrecords_path.rsplit("/", 1)[0])
            self.serialize_examples(examples, is_training, tfrecords_path)
            utils.write_json({
                "n_examples": n_examples,
                "sizes": sizes
            }, metadata_path)

        input_fn = self._input_fn_builder(tfrecords_path, is_training)
        if is_training:
            steps = int(n_examples // batch_size *
                        self._config.num_train_epochs)
        else:
            steps = n_examples // batch_size

        return input_fn, steps, sizes
コード例 #2
0
def write_results(config, results):
  """Write out evaluate metrics to disk."""
  utils.log("Writing results to", config.results_txt)
  utils.mkdir(config.results_txt.rsplit("/", 1)[0])
  utils.write_pickle(results, config.results_pkl)
  with tf.gfile.GFile(config.results_txt, "w") as f:
    results_str = ""
    for trial_results in results:
      for task_name, task_results in trial_results.items():
        results_str += task_name + ": " + " - ".join(
            ["{:}: {:.2f}".format(k, v)
             for k, v in task_results.items()]) + "\n"
    f.write(results_str)