コード例 #1
0
ファイル: job_executor.py プロジェクト: lkotlewski/bert
def execute_job():
    config = JobExecutorConfig()
    make_needed_dirs(config)
    configure_logging(config)
    if config.prediction == 'BERT':
        match_predictor = BertMatchPredictor()
    elif config.prediction == 'KEYED_VECTORS':
        match_predictor = KeyedVectorsFormatPredictor()
    else:
        raise Exception("Wrong prediction mode")

    while True:
        logging.info("job iteration started")
        dir_in = config.dir_in
        files_names = [
            f for f in gfile.ListDirectory(dir_in)
            if not gfile.IsDirectory(join(dir_in, f))
        ]
        for file_name in files_names:
            logging.info(file_name)
            file_path = join(dir_in, file_name)
            try:
                match_predictor.predict(dir_in, file_name, config.dir_result)
                gfile.Rename(file_path, join(config.dir_success, file_name))
            except Exception:
                logging.error(traceback.format_exc())
                gfile.Rename(file_path, join(config.dir_error, file_name))
        logging.info("job iteration finished")
        time.sleep(config.interval)
コード例 #2
0
def download(uri, dst_dir):
  """Download the given URI.

  Args:
    uri: URI to copy (or download) from.
    dst_dir: path to the directory that will be used.

  Returns:
    The path to the downloaded file.
  """
  # Download the URI
  # Should use context manager with Py3 (with urllib2.urlopen(uri) as response)
  response = urllib.request.urlopen(uri)
  filename = response.geturl().split('/')[-1]
  incomplete_path = os.path.join(dst_dir, '{}.incomplete'.format(filename))
  dst_path = os.path.join(dst_dir, filename)

  # TODO(epot): Could add a shared tqdm instance across parallel download
  # to display a single shared progression bar.

  # TODO(b/119663674): Add Google Drive support (cf Ryan code)

  with gfile.Open(incomplete_path, 'wb') as f:
    f.write(response.read())
  gfile.Rename(incomplete_path, dst_path)

  return dst_path
コード例 #3
0
def use_incomplete_dir(trial):
  """Wrap the trial in a temporary .incomplete path while it is processed."""
  # Replace the output dir by a temporary dir
  output_path_original = trial.output_path
  # Should add random string to avoid collision with local download manager ?
  output_path_tmp = trial.output_path + '.incomplete'
  trial.output_path = output_path_tmp
  yield
  if not trial.output_path.startswith(output_path_tmp):
    raise ValueError(
        'The output path for {} has been modified to {} and do not match '
        'the original {}'.format(trial.id, trial.output_path, output_path_tmp))
  gfile.Rename(output_path_tmp, output_path_original)
  output_path_extension = util.lchop(trial.output_path, output_path_tmp)
  trial.output_path = output_path_original + output_path_extension
コード例 #4
0
def atomic_file(path):
    """Atomically saves data to a target path.

  Any existing data at the target path will be overwritten.

  Args:
    path: target path at which to save file

  Yields:
    file-like object
  """
    with tempfile.NamedTemporaryFile() as tmp:
        yield tmp
        tmp.flush()
        # Necessary when the destination is on CNS.
        gfile.Copy(tmp.name, '%s.tmp' % path, overwrite=True)
    gfile.Rename('%s.tmp' % path, path, overwrite=True)