def setUpClass(cls):
        cache_dir = tf.test.get_temp_dir()

        # Create a dummy file
        dummy_dir = os.path.join(cache_dir, 'dummy')
        dummy_filepath = os.path.join(dummy_dir, 'dummy.txt')

        gfile.MakeDirs(dummy_dir)
        dummy_file_contents = 'hello world'
        with gfile.Open(dummy_filepath, 'w') as f:
            f.write(dummy_file_contents)

        # File containing compressed archives
        input_dir = os.path.join(cache_dir, 'to_extract')
        gfile.MakeDirs(input_dir)

        dl_manager = download_manager.DownloadManager(
            cache_dir=cache_dir,
            mode=util.GenerateMode.REUSE_CACHE_IF_EXISTS,
        )

        cls.dummy_dir = dummy_dir
        cls.dummy_filepath = dummy_filepath
        cls.dummy_file_contents = dummy_file_contents
        cls.input_dir = input_dir
        cls.dl_manager = dl_manager
Exemple #2
0
def save_model(model, history):
    if not gfile.Exists(MODEL_DIR):
        gfile.MakeDirs(MODEL_DIR)

    model.save(MODEL_FILE)

    if gfile.Exists(HISTORY_DIR) == False:
        gfile.MakeDirs(HISTORY_DIR)

    with open(HISTORY_FILE, 'wb') as f:
        pickle.dump(history.history, f)
Exemple #3
0
def extract_holdout_model(model):
    game_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'games', model)
    move_output_path = OUTPUT_PATH.format(FLAGS.base_dir, 'moves', model)
    gfile.MakeDirs(os.path.basename(game_output_path))
    gfile.MakeDirs(os.path.basename(move_output_path))

    with gfile.GFile(game_output_path, 'w') as game_f, \
            gfile.GFile(move_output_path, 'w') as move_f:
        for sgf_name in tqdm(get_sgf_names(model)):
            game_data, move_data = extract_data(sgf_name)
            game_f.write(json.dumps(game_data) + '\n')
            for move_datum in move_data:
                move_f.write(json.dumps(move_datum) + '\n')
Exemple #4
0
def prepare_dirs(recreate=False):
    """Prepare config dirs

    When recreate is True, if previous execution exists, remove them and recreate.
    When recreate is False, remain previous execution.
    """
    experiment_dir = environment.EXPERIMENT_DIR
    tensorboard_dir = environment.TENSORBOARD_DIR
    checkpoints_dir = environment.CHECKPOINTS_DIR

    if recreate:
        message = """
Delete and recreate these dirs:
experiment_dir: {experiment_dir}
tensorboard_dir: {tensorboard_dir}
checkpoints_dir: {checkpoints_dir}
        """.format(experiment_dir=experiment_dir,
                   tensorboard_dir=tensorboard_dir,
                   checkpoints_dir=checkpoints_dir)
    else:
        message = """
Create these dirs if the dirs dont exist:
experiment_dir: {experiment_dir}
tensorboard_dir: {tensorboard_dir}
checkpoints_dir: {checkpoints_dir}
        """.format(experiment_dir=experiment_dir,
                   tensorboard_dir=tensorboard_dir,
                   checkpoints_dir=checkpoints_dir)

    print(message)

    if recreate:
        if gfile.Exists(experiment_dir):
            gfile.DeleteRecursively(experiment_dir)

        if gfile.Exists(tensorboard_dir):
            gfile.DeleteRecursively(tensorboard_dir)

        if gfile.Exists(checkpoints_dir):
            gfile.DeleteRecursively(checkpoints_dir)

    if not gfile.Exists(experiment_dir):
        gfile.MakeDirs(experiment_dir)

    if not gfile.Exists(tensorboard_dir):
        gfile.MakeDirs(tensorboard_dir)

    if not gfile.Exists(checkpoints_dir):
        gfile.MakeDirs(checkpoints_dir)
Exemple #5
0
  def after_run(self, _run_context, run_values):
    if not self.is_chief or self._done:
      return

    step_done = run_values.results
    if self._active:
      tf.logging.info("Captured full trace at step %s", step_done)
      # Create output directory
      gfile.MakeDirs(self._output_dir)

      # Save run metadata
      trace_path = os.path.join(self._output_dir, "run_meta")
      with gfile.GFile(trace_path, "wb") as trace_file:
        trace_file.write(run_values.run_metadata.SerializeToString())
        tf.logging.info("Saved run_metadata to %s", trace_path)

      # Save timeline
      timeline_path = os.path.join(self._output_dir, "timeline.json")
      with gfile.GFile(timeline_path, "w") as timeline_file:
        tl_info = timeline.Timeline(run_values.run_metadata.step_stats)
        tl_chrome = tl_info.generate_chrome_trace_format(show_memory=True)
        timeline_file.write(tl_chrome)
        tf.logging.info("Saved timeline to %s", timeline_path)

      # Save tfprof op log
      tf.contrib.tfprof.tfprof_logger.write_op_log(
          graph=tf.get_default_graph(),
          log_dir=self._output_dir,
          run_meta=run_values.run_metadata)
      tf.logging.info("Saved op log to %s", self._output_dir)
      self._active = False
      self._done = True

    self._active = (step_done >= self.params["step"])
Exemple #6
0
 def begin(self):
   self._iter_count = 0
   self._global_step = tf.train.get_global_step()
   self._pred_dict = graph_utils.get_dict_from_collection("predictions")
   # Create the sample directory
   if self._sample_dir is not None:
     gfile.MakeDirs(self._sample_dir)
Exemple #7
0
def get_target_path(request, point_num):
    """Computes the output path for a specific point.

  Args:
    request: ResegmentationRequest proto
    point_num: index of the point of interest within the proto

  Returns:
    path to the output file where resegmentation results will be saved
  """
    # Prepare the output directory.
    output_dir = request.output_directory

    id_a = request.points[point_num].id_a
    id_b = request.points[point_num].id_b

    if request.subdir_digits > 1:
        m = hashlib.md5()
        m.update(str(id_a))
        m.update(str(id_b))
        output_dir = os.path.join(output_dir,
                                  m.hexdigest()[:request.subdir_digits])
    gfile.MakeDirs(output_dir)

    # Terminate early if the output already exists.
    dp = request.points[point_num].point
    target_path = os.path.join(
        output_dir, '%d-%d_at_%d_%d_%d.npz' % (id_a, id_b, dp.x, dp.y, dp.z))
    if gfile.Exists(target_path):
        logging.info('Output already exists: %s', target_path)
        return

    return target_path
Exemple #8
0
def dump_object(object_to_dump, output_path):

  if not tf.io.gfile.exists(output_path):
    gfile.MakeDirs(os.path.dirname(output_path))
  
  with tf.io.gfile.GFile(output_path, 'w') as wf:
    joblib.dump(object_to_dump, wf)
Exemple #9
0
    def _download(self, trial):
        """Downloads a single url given by the trial (thread safe).

    Args:
      trial (UriTrial): Object containing info about download.

    Raises:
      ValueError: If the destination dir is not empty
    """
        log = util.build_log(prefix=trial.id)

        # Check the download dir is empty
        if (gfile.Exists(trial.output_path)
                and gfile.ListDirectory(trial.output_path)):
            raise ValueError('Download dir {} should be empty'.format(
                trial.output_path))

        gfile.MakeDirs(trial.output_path)

        log('Start downloading...')
        self._backend.download(trial)

        # TODO(epot): Compute the checksum

        # Update the output path
        trial.output_path = get_download_filepath(trial)

        log('Download complete at {}', trial.output_path)
Exemple #10
0
def dump_object(object_to_dump, output_path):

    if not gfile.Exists(output_path):
        gfile.MakeDirs(os.path.dirname(output_path))

    with gfile.Open(output_path, 'w') as wf:
        joblib.dump(object_to_dump, wf)
Exemple #11
0
    def _prepare(self):
        """ Prepares for evaluation.

        Builds the model with reuse=True, mode=EVAL and preprocesses
        data file(s).
        """
        text_inputter = TextLineInputter(dataset=self._dataset,
                                         data_field_name="eval_features_file",
                                         batch_size=self._batch_size)
        self._eval_feeding_data = text_inputter.make_feeding_data()
        self._model_configs = update_infer_params(  # update inference parameters
            self._model_configs,
            beam_size=self._beam_size,
            maximum_labels_length=self._maximum_labels_length,
            length_penalty=self._length_penalty)
        estimator_spec = model_fn(model_configs=self._model_configs,
                                  mode=ModeKeys.INFER,
                                  dataset=self._dataset,
                                  name=self._model_name,
                                  reuse=True,
                                  verbose=False)
        self._predict_ops = estimator_spec.predictions
        tmp_trans_dir = os.path.join(self._model_configs["model_dir"],
                                     GlobalNames.TMP_TRANS_DIRNAME)
        if not gfile.Exists(tmp_trans_dir):
            gfile.MakeDirs(tmp_trans_dir)
        self._tmp_trans_file_prefix = os.path.join(
            tmp_trans_dir, GlobalNames.TMP_TRANS_FILENAME_PREFIX)
        self._read_ckpt_bleulog()
        self._eval_labels_file = self._dataset.eval_labels_file
        self._check_bleu_script()
        self._estop_patience = 0
        self._best_bleu_score = 0.
Exemple #12
0
def main(_argv):
  """Main functions. Runs all anaylses."""
  # pylint: disable=W0212
  tfprof_logger._merge_default_with_oplog = merge_default_with_oplog

  FLAGS.model_dir = os.path.abspath(os.path.expanduser(FLAGS.model_dir))
  output_dir = os.path.join(FLAGS.model_dir, "profile")
  gfile.MakeDirs(output_dir)

  run_meta, graph, op_log = load_metadata(FLAGS.model_dir)

  param_arguments = [
      param_analysis_options(output_dir),
      micro_anaylsis_options(output_dir),
      flops_analysis_options(output_dir),
      device_analysis_options(output_dir),
  ]

  for tfprof_cmd, params in param_arguments:
    model_analyzer.print_model_analysis(
        graph=graph,
        run_meta=run_meta,
        op_log=op_log,
        tfprof_cmd=tfprof_cmd,
        tfprof_options=params)

    if params["dump_to_file"] != "":
      print("Wrote {}".format(params["dump_to_file"]))
Exemple #13
0
def main(unused_argv):
    request = inference_flags.request_from_flags()

    if not gfile.Exists(request.segmentation_output_dir):
        gfile.MakeDirs(request.segmentation_output_dir)

    bbox = bounding_box_pb2.BoundingBox()
    text_format.Parse(FLAGS.bounding_box, bbox)

    runner = inference.Runner()
    runner.start(request, with_membrane=FLAGS.with_membrane)
    print('>>>>>>>>>>>>>>>>> FAKE RUN')
    runner.run((bbox.start.z, bbox.start.y, bbox.start.x),
               (bbox.size.z, bbox.size.y, bbox.size.x),
               with_membrane=FLAGS.with_membrane,
               fake=True)
    print('>>>>>>>>>>>>>>>>> REAL RUN')
    runner.run((bbox.start.z, bbox.start.y, bbox.start.x),
               (bbox.size.z, bbox.size.y, bbox.size.x),
               with_membrane=FLAGS.with_membrane)

    counter_path = os.path.join(request.segmentation_output_dir,
                                'counters.txt')
    if not gfile.Exists(counter_path):
        runner.counters.dump(counter_path)
 def copy_latest_checkpoint(self):
     """Copy over the latest checkpoints to the target directory."""
     chkpt = get_latest_checkpoint(self.model_directory)
     logging.info('Got latest checkpoint: %s', chkpt)
     if chkpt is None:
         return None
     # Check if the evaluation meta graph has been copied.
     if self.has_checkpoint() is None:
         # Don't copy temp export folders, e.g. 'temp-01234567/saved_model.pb'
         export_file = gfile.Glob(
             os.path.join(self.model_directory,
                          'export/best_exporter/[0-9]*/saved_model.pb'))[0]
         logging.info('Copying eval export file: %s',
                      ', '.join(export_file))
         target_export_dir = os.path.join(
             self.target_directory, 'export/best_exporter',
             os.path.basename(os.path.dirname(export_file)))
         gfile.MakeDirs(target_export_dir)
         verbose_copy(
             export_file,
             os.path.join(target_export_dir, os.path.basename(export_file)))
     files = gfile.Glob(os.path.join(self.model_directory, chkpt) + b'.*')
     logging.info('Copying files: %s', ', '.join(files))
     for fname in files:
         verbose_copy(
             fname,
             os.path.join(self.target_directory, os.path.basename(fname)))
     return chkpt
Exemple #15
0
def save_flags():
    gfile.MakeDirs(FLAGS.train_dir)
    with gfile.Open(os.path.join(FLAGS.train_dir, 'flags.%d' % time.time()),
                    'w') as f:
        for mod, flag_list in FLAGS.flags_by_module_dict().items():
            if (mod.startswith('google3.research.neuromancer.tensorflow')
                    or mod.startswith('/')):
                for flag in flag_list:
                    f.write('%s\n' % flag.serialize())
Exemple #16
0
def dump_object(object_to_dump, output_path):
    """
      Writes output trained model pipeline to GCS
    """

    if not tf.io.gfile.exists(output_path):
        gfile.MakeDirs(os.path.dirname(output_path))

    with tf.io.gfile.GFile(output_path, 'w') as wf:
        joblib.dump(object_to_dump, wf)
def make_dir(dir_name: str) -> str:
  if gfile.Exists(dir_name):
    if gfile.IsDirectory(dir_name):
      return dir_name
    else:
      logging.fatal(
          'Trying to create directory "%s", but there '
          'is a file with the same name', dir_name)
  gfile.MakeDirs(dir_name)
  return dir_name
Exemple #18
0
def verify_dirs_exist(dirname):
    '''Verify that the directory exists

    Will recursively create directories as needed.

    Input:
        dirname: str; directory name to create
    '''

    if not exists(dirname):
        gfile.MakeDirs(dirname)
Exemple #19
0
def save(saver, sess, logdir, step):
    model_name = 'model.ckpt'
    checkpoint_path = os.path.join(logdir, model_name)
    print('Storing checkpoint to {} ...'.format(logdir), end="")
    sys.stdout.flush()

    if not gfile.Exists(logdir):
        gfile.MakeDirs(logdir)

    saver.save(sess, checkpoint_path, global_step=step)
    print(' Done.')
Exemple #20
0
 def begin(self):
   self._global_step = tf.train.get_global_step()
   self._pred_dict = graph_utils.get_dict_from_collection("predictions")
   self._features = graph_utils.get_dict_from_collection("features")
   self._iter_count = 0
   self._eval_str = ""
   self._current_global_step = None
   # Create the sample directory
   if self._evalution_result_dir is not None:
     if os.path.exists(self._evalution_result_dir) is False:
       gfile.MakeDirs(self._evalution_result_dir)
       os.chmod(self._evalution_result_dir, 777)
Exemple #21
0
    def dump(model_config, output_dir):
        """ Dumps model configurations.

        Args:
            model_config: A dict.
            output_dir: A string, the output directory.
        """
        model_config_filename = os.path.join(output_dir, Constants.MODEL_CONFIG_YAML_FILENAME)
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)
        with open_file(model_config_filename, mode="w") as file:
            yaml.dump(model_config, file)
Exemple #22
0
def main(unused_argv):
    request = inference_flags.request_from_flags()

    if not gfile.Exists(request.segmentation_output_dir):
        gfile.MakeDirs(request.segmentation_output_dir)

    bbox = bounding_box_pb2.BoundingBox()
    text_format.Parse(FLAGS.bounding_box, bbox)

    # start_pos = tuple([int(i) for i in FLAGS.start_pos])
    runner = inference.Runner()

    corner = (bbox.start.z, bbox.start.y, bbox.start.x)
    subvol_size = (bbox.size.z, bbox.size.y, bbox.size.x)
    start_pos = tuple([int(i) for i in FLAGS.start_pos])

    seg_path = storage.segmentation_path(request.segmentation_output_dir,
                                         corner)
    prob_path = storage.object_prob_path(request.segmentation_output_dir,
                                         corner)

    runner.start(request)
    canvas, alignment = runner.make_canvas(corner, subvol_size)
    num_iter = canvas.segment_at(start_pos)

    print('>>', num_iter)

    sel = [
        slice(max(s, 0), e + 1)
        for s, e in zip(canvas._min_pos -
                        canvas._pred_size // 2, canvas._max_pos +
                        canvas._pred_size // 2)
    ]
    mask = canvas.seed[sel] >= canvas.options.segment_threshold
    raw_segmented_voxels = np.sum(mask)

    mask &= canvas.segmentation[sel] <= 0
    actual_segmented_voxels = np.sum(mask)
    canvas._max_id += 1
    canvas.segmentation[sel][mask] = canvas._max_id
    canvas.seg_prob[sel][mask] = storage.quantize_probability(
        expit(canvas.seed[sel][mask]))

    runner.save_segmentation(canvas, alignment, seg_path, prob_path)

    runner.run((bbox.start.z, bbox.start.y, bbox.start.x),
               (bbox.size.z, bbox.size.y, bbox.size.x))

    counter_path = os.path.join(request.segmentation_output_dir,
                                'counters.txt')
    if not gfile.Exists(counter_path):
        runner.counters.dump(counter_path)
Exemple #23
0
def create_captcha_dataset(size=100,
                           data_dir='./data/',
                           height=60,
                           width=160,
                           image_format='.png'):
    if gfile.Exists(data_dir):
        gfile.DeleteRecursively(data_dir)
    gfile.MakeDirs(data_dir)
    captcha = ImageCaptcha(width=width, height=height)
    for _ in range(size):
        text = gen_random_text(CAPTCHA_CHARSET, CAPTCHA_LENGTH)
        captcha.write(text, data_dir + text + image_format)
    return None
Exemple #24
0
def write_handle(path, mode=None):

  if _supports_make_dirs(path):
    gfile.MakeDirs(os.path.dirname(path))

  if mode is None:
    if _supports_binary_writing(path):
      mode = 'wb'
    else:
      mode = 'w'

  handle = gfile.Open(path, mode)
  yield handle
  handle.close()
Exemple #25
0
    def __init__(self, log_dir):
        """Create a new SummaryWriter.

    Args:
      log_dir: path to record tfevents files in.
    """
        # If needed, create log_dir directory as well as missing parent directories.
        if not gfile.IsDirectory(log_dir):
            gfile.MakeDirs(log_dir)

        self.writer = tf.summary.FileWriter(log_dir, graph=None)
        self.end_summaries = []
        self.step = 0
        self.closed = False
Exemple #26
0
def save_yaml(output_dir, config):
    """Save two yaml files.

    1. 'config.yaml' is duplication of python config file as yaml.
    2. 'meta.yaml' for application. The yaml's keys defined by `PARAMS_FOR_EXPORT`.
    """

    if not gfile.Exists(output_dir):
        gfile.MakeDirs(output_dir)

    config_yaml_path = _save_config_yaml(output_dir, config)
    meta_yaml_path = _save_meta_yaml(output_dir, config)

    return config_yaml_path, meta_yaml_path
Exemple #27
0
def dump_object(object_to_dump, output_path):
  """Pickle the object and save to the output_path.

  Args:
    object_to_dump: Python object to be pickled
    output_path: (string) output path which can be Google Cloud Storage

  Returns:
    None
  """

  if not gfile.Exists(output_path):
    gfile.MakeDirs(os.path.dirname(output_path))
  with gfile.Open(output_path, 'w') as wf:
    joblib.dump(object_to_dump, wf)
Exemple #28
0
    def dump(self, model_dir):
        """Dumps the options to a file in the model directory.

    Args:
      model_dir: Path to the model directory. The options will be
      dumped into a file in this directory.
    """
        gfile.MakeDirs(model_dir)
        options_dict = {
            "model_class": self.model_class,
            "model_params": self.model_params,
        }

        with gfile.GFile(TrainOptions.path(model_dir), "wb") as file:
            file.write(json.dumps(options_dict).encode("utf-8"))
    def _prepare(self):
        """ Prepares for evaluation.

        Builds the model with reuse=True, mode=EVAL and preprocesses
        data file(s).
        """
        features_file = self._dataset["features_file"]
        labels_file = self._dataset["labels_file"]
        vocab_source = self._dataset["vocab_source"]
        vocab_target = self._dataset["vocab_target"]
        self._model_configs = update_infer_params(  # update inference parameters
            self._model_configs,
            beam_size=self._beam_size,
            maximum_labels_length=self._maximum_labels_length,
            length_penalty=self._length_penalty)
        estimator_spec = model_fn(model_configs=self._model_configs,
                                  mode=ModeKeys.INFER,
                                  vocab_source=vocab_source,
                                  vocab_target=vocab_target,
                                  name=self._model_name, reuse=True,
                                  verbose=False)
        self._predict_ops = estimator_spec.predictions
        text_inputter = TextLineInputter(
            line_readers=LineReader(
                data=features_file,
                preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x)),
            padding_id=vocab_source.pad_id,
            batch_size=self._batch_size)
        self._infer_data = text_inputter.make_feeding_data(
            input_fields=estimator_spec.input_fields)
        tmp_trans_dir = os.path.join(self._model_configs["model_dir"], Constants.TMP_TRANS_DIRNAME)
        if not gfile.Exists(tmp_trans_dir):
            gfile.MakeDirs(tmp_trans_dir)
        self._tmp_trans_file_prefix = os.path.join(tmp_trans_dir, Constants.TMP_TRANS_FILENAME_PREFIX)
        self._read_ckpt_bleulog()
        # load references
        self._references = []
        for rfile in access_multiple_files(labels_file):
            with open_file(rfile) as fp:
                if self._char_level:
                    self._references.append(to_chinese_char(fp.readlines()))
                else:
                    self._references.append(fp.readlines())
        self._references = list(map(list, zip(*self._references)))
        with open_file(features_file) as fp:
            self._sources = fp.readlines()
        self._bad_count = 0
        self._best_bleu_score = 0.
Exemple #30
0
    def _extract(self, trial):
        """Extract a single file given by the trial.

    Args:
      trial (UriTrial): Object containing info about the extraction.

    Raises:
      ValueError: If the format is incorrect
    """
        log = util.build_log(prefix=trial.id)

        src = trial.extract_info.path
        dst = trial.output_path

        rar_format = ExtractFormat(ext=TAR_EXT, fn=self._backend.extract_tar)
        zip_format = ExtractFormat(ext=['.zip'], fn=self._backend.extract_zip)
        gz_format = ExtractFormat(ext=['.gz'], fn=self._backend.extract_gzip)
        # Order matter as '.tar.gz' will call _extract_tar while '.gz' will
        # call _extract_gzip
        extraction_fns = collections.OrderedDict([
            (download_pb2.ExtractInfo.RAR, rar_format),
            (download_pb2.ExtractInfo.ZIP, zip_format),
            (download_pb2.ExtractInfo.GZ, gz_format),
        ])

        # Filetype explicitly defined
        if trial.extract_info.filetype != download_pb2.ExtractInfo.UNKNOWN:
            extract_filetype = trial.extract_info.filetype
            extract_fn = extraction_fns[extract_filetype]
        # Try to infer the filetype from the name
        else:
            for extract_filetype, extract_format in extraction_fns.items():
                if any(src.lower().endswith(ext)
                       for ext in extract_format.ext):
                    extract_fn = extract_format.fn
                    break
            else:  # No break (unrecognized archive)
                raise ValueError(
                    'Unsuported archive file {} for trial {}. If you think this is an '
                    'error, you can try to explicitly define the type in the '
                    'ExtractFileType'.format(src, trial.id))

        log('Extract {} with {}...', src, extract_fn.__name__)
        gfile.MakeDirs(dst)
        extract_fn(src, dst)

        if extract_filetype == download_pb2.ExtractInfo.GZ:
            trial.output_path = get_download_filepath(trial)