Beispiel #1
0
def _BuildFileUploadMap(manifest, source_dir, bucket_ref, tmp_dir,
                        max_file_size):
    """Builds a map of files to upload, indexed by their hash.

  This skips already-uploaded files.

  Args:
    manifest: A dict containing the deployment manifest for a single service.
    source_dir: The relative source directory of the service.
    bucket_ref: The GCS bucket reference to upload files into.
    tmp_dir: The path to a temporary directory where generated files may be
      stored. If a file in the manifest is not found in the source directory,
      it will be retrieved from this directory instead.
    max_file_size: int, File size limit per individual file or None if no limit.

  Raises:
    LargeFileError: if one of the files to upload exceeds the maximum App Engine
    file size.

  Returns:
    A dict mapping hashes to file paths that should be uploaded.
  """
    files_to_upload = {}
    storage_client = storage_api.StorageClient()
    ttl = _GetLifecycleDeletePolicy(storage_client, bucket_ref)
    existing_items = set(o.name for o in storage_client.ListBucket(bucket_ref)
                         if _IsTTLSafe(ttl, o))
    skipped_size, total_size = 0, 0
    for rel_path in manifest:
        full_path = os.path.join(source_dir, rel_path)
        # For generated files, the relative path is based on the tmp_dir rather
        # than source_dir. If the file is not in the source directory, look in
        # tmp_dir instead.
        if not os.path.exists(encoding.Encode(full_path, encoding='utf-8')):
            full_path = os.path.join(tmp_dir, rel_path)
        # Perform this check when creating the upload map, so we catch too-large
        # files that have already been uploaded
        size = os.path.getsize(encoding.Encode(full_path, encoding='utf-8'))
        if max_file_size and size > max_file_size:
            raise LargeFileError(full_path, size, max_file_size)

        sha1_hash = manifest[rel_path]['sha1Sum']
        total_size += size
        if sha1_hash in existing_items:
            log.debug('Skipping upload of [{f}]'.format(f=rel_path))
            skipped_size += size
        else:
            files_to_upload[sha1_hash] = full_path
        if total_size:
            log.info('Incremental upload skipped {pct}% of data'.format(
                pct=round(100.0 * skipped_size / total_size, 2)))
    return files_to_upload
Beispiel #2
0
 def __init__(self, args, timeout=None, stdin=None, env=None):
   self.args = [encoding.Encode(a, encoding='utf-8') for a in args]
   self.timeout = timeout
   self.stdin = stdin
   self.env = encoding.EncodeEnv(env, encoding='utf-8')
   self.thread = None
   self.p = None
   self.result = None
   self.exc_info = None
Beispiel #3
0
def BinaryFileReader(path):
  """Opens the given file for binary read for use in a 'with' statement.

  Args:
    path: str, The file path to read from.

  Returns:
    A file-like object opened for read in binary mode.
  """
  return _FileOpener(encoding_util.Encode(path, encoding='utf-8'), 'rb', 'read')
Beispiel #4
0
def _Exec(args,
          process_holder,
          env=None,
          out_func=None,
          err_func=None,
          in_str=None,
          **extra_popen_kwargs):
  """See Exec docstring."""
  if out_func:
    extra_popen_kwargs['stdout'] = subprocess.PIPE
  if err_func:
    extra_popen_kwargs['stderr'] = subprocess.PIPE
  if in_str:
    extra_popen_kwargs['stdin'] = subprocess.PIPE
  try:
    if args and isinstance(args, list):
      # On Python 2.x on Windows, the first arg can't be unicode. We encode
      # encode it anyway because there is really nothing else we can do if
      # that happens.
      # https://bugs.python.org/issue19264
      args = [encoding.Encode(a) for a in args]
    p = subprocess.Popen(args, env=_GetToolEnv(env=env), **extra_popen_kwargs)
  except OSError as err:
    if err.errno == errno.EACCES:
      raise PermissionError(err.strerror)
    elif err.errno == errno.ENOENT:
      raise InvalidCommandError(args[0])
    raise
  process_holder.process = p

  if process_holder.signum is not None:
    # This covers the small possibility that process_holder handled a
    # signal when the process was starting but not yet set to
    # process_holder.process.
    if p.poll() is None:
      p.terminate()

  if isinstance(in_str, six.text_type):
    in_str = in_str.encode('utf-8')
  stdout, stderr = list(map(encoding.Decode, p.communicate(input=in_str)))

  if out_func:
    out_func(stdout)
  if err_func:
    err_func(stderr)
  return p.returncode
Beispiel #5
0
    def ReportMetrics(self, wait_for_report=False):
        """Reports the collected metrics using a separate async process."""
        if not self._metrics:
            return

        temp_metrics_file = tempfile.NamedTemporaryFile(delete=False)
        with temp_metrics_file:
            pickle.dump(self._metrics, temp_metrics_file)
            self._metrics = []

        this_file = encoding.Decode(__file__)
        reporting_script_path = os.path.realpath(
            os.path.join(os.path.dirname(this_file), 'metrics_reporter.py'))
        execution_args = execution_utils.ArgsForPythonTool(
            reporting_script_path, temp_metrics_file.name)
        # On Python 2.x on Windows, the first arg can't be unicode. We encode
        # encode it anyway because there is really nothing else we can do if
        # that happens.
        # https://bugs.python.org/issue19264
        execution_args = [encoding.Encode(a) for a in execution_args]

        exec_env = os.environ.copy()
        encoding.SetEncodedValue(exec_env, 'PYTHONPATH',
                                 os.pathsep.join(sys.path))

        try:
            p = subprocess.Popen(execution_args,
                                 env=exec_env,
                                 **self._async_popen_args)
            log.debug('Metrics reporting process started...')
        except OSError:
            # This can happen specifically if the Python executable moves between the
            # start of this process and now.
            log.debug('Metrics reporting process failed to start.')
        if wait_for_report:
            # NOTE: p.wait() can cause a deadlock. p.communicate() is recommended.
            # See python docs for more information.
            p.communicate()
            log.debug('Metrics reporting process finished.')
Beispiel #6
0
    def _RaiseOnSymlinkLoop(self, full_path):
        """Raise SymlinkLoopError if the given path is a symlink loop."""
        if not os.path.islink(encoding.Encode(full_path, encoding='utf-8')):
            return

        # Does it refer to itself somehow?
        p = os.readlink(full_path)
        targets = set()
        while os.path.islink(p):
            if p in targets:
                raise SymlinkLoopError(
                    'The symlink [{}] refers to itself.'.format(full_path))
            targets.add(p)
            p = os.readlink(p)
        # Does it refer to its containing directory?
        p = os.path.dirname(full_path)
        while p and os.path.basename(p):
            if os.path.samefile(p, full_path):
                raise SymlinkLoopError(
                    'The symlink [{}] refers to its own containing directory.'.
                    format(full_path))
            p = os.path.dirname(p)
def Exec(args,
         env=None,
         no_exit=False,
         out_func=None,
         err_func=None,
         in_str=None,
         **extra_popen_kwargs):
    """Emulates the os.exec* set of commands, but uses subprocess.

  This executes the given command, waits for it to finish, and then exits this
  process with the exit code of the child process.

  Args:
    args: [str], The arguments to execute.  The first argument is the command.
    env: {str: str}, An optional environment for the child process.
    no_exit: bool, True to just return the exit code of the child instead of
      exiting.
    out_func: str->None, a function to call with the stdout of the executed
      process. This can be e.g. log.file_only_logger.debug or log.out.write.
    err_func: str->None, a function to call with the stderr of the executed
      process. This can be e.g. log.file_only_logger.debug or log.err.write.
    in_str: bytes or str, input to send to the subprocess' stdin.
    **extra_popen_kwargs: Any additional kwargs will be passed through directly
      to subprocess.Popen

  Returns:
    int, The exit code of the child if no_exit is True, else this method does
    not return.

  Raises:
    PermissionError: if user does not have execute permission for cloud sdk bin
    files.
    InvalidCommandError: if the command entered cannot be found.
  """
    log.debug('Executing command: %s', args)
    # We use subprocess instead of execv because windows does not support process
    # replacement.  The result of execv on windows is that a new processes is
    # started and the original is killed.  When running in a shell, the prompt
    # returns as soon as the parent is killed even though the child is still
    # running.  subprocess waits for the new process to finish before returning.
    env = _GetToolEnv(env=env)

    process_holder = _ProcessHolder()
    with _ReplaceSignal(signal.SIGTERM, process_holder.Handler):
        with _ReplaceSignal(signal.SIGINT, process_holder.Handler):
            if out_func:
                extra_popen_kwargs['stdout'] = subprocess.PIPE
            if err_func:
                extra_popen_kwargs['stderr'] = subprocess.PIPE
            if in_str:
                extra_popen_kwargs['stdin'] = subprocess.PIPE
            try:
                if args and isinstance(args, list):
                    # On Python 2.x on Windows, the first arg can't be unicode. We encode
                    # encode it anyway because there is really nothing else we can do if
                    # that happens.
                    # https://bugs.python.org/issue19264
                    args = [encoding.Encode(a) for a in args]
                p = subprocess.Popen(args, env=env, **extra_popen_kwargs)
            except OSError as err:
                if err.errno == errno.EACCES:
                    raise PermissionError(err.strerror)
                elif err.errno == errno.ENOENT:
                    raise InvalidCommandError(args[0])
                raise
            process_holder.process = p

            if process_holder.signum is not None:
                # This covers the small possibility that process_holder handled a
                # signal when the process was starting but not yet set to
                # process_holder.process.
                if p.poll() is None:
                    p.terminate()

            if isinstance(in_str, six.text_type):
                in_str = in_str.encode('utf-8')
            stdout, stderr = list(
                map(encoding.Decode, p.communicate(input=in_str)))

            if out_func:
                out_func(stdout)
            if err_func:
                err_func(stderr)
            ret_val = p.returncode

    if no_exit and process_holder.signum is None:
        return ret_val
    sys.exit(ret_val)
Beispiel #8
0
def RunPredict(model_dir,
               json_instances=None,
               text_instances=None,
               framework='tensorflow',
               signature_name=None):
    """Run ML Engine local prediction."""
    instances = predict_utilities.ReadInstancesFromArgs(
        json_instances, text_instances)
    sdk_root = config.Paths().sdk_root
    if not sdk_root:
        raise LocalPredictEnvironmentError(
            'You must be running an installed Cloud SDK to perform local '
            'prediction.')
    # Inheriting the environment preserves important variables in the child
    # process. In particular, LD_LIBRARY_PATH under linux and PATH under windows
    # could be used to point to non-standard install locations of CUDA and CUDNN.
    # If not inherited, the child process could fail to initialize Tensorflow.
    env = os.environ.copy()
    encoding.SetEncodedValue(env, 'CLOUDSDK_ROOT', sdk_root)
    # We want to use whatever the user's Python was, before the Cloud SDK started
    # changing the PATH. That's where Tensorflow is installed.
    python_executables = files.SearchForExecutableOnPath('python')
    # Need to ensure that ml_sdk is in PYTHONPATH for the import in
    # local_predict to succeed.

    orig_py_path = encoding.GetEncodedValue(env, 'PYTHONPATH') or ''
    if orig_py_path:
        orig_py_path = ':' + orig_py_path
    encoding.SetEncodedValue(
        env, 'PYTHONPATH',
        os.path.join(sdk_root, 'lib', 'third_party', 'ml_sdk') + orig_py_path)
    if not python_executables:
        # This doesn't have to be actionable because things are probably beyond help
        # at this point.
        raise LocalPredictEnvironmentError(
            'Something has gone really wrong; we can\'t find a valid Python '
            'executable on your PATH.')
    # Use python found on PATH or local_python override if set
    python_executable = (properties.VALUES.ml_engine.local_python.Get()
                         or python_executables[0])
    predict_args = ['--model-dir', model_dir, '--framework', framework]
    if signature_name:
        predict_args += ['--signature-name', signature_name]
    # Start local prediction in a subprocess.
    args = [
        encoding.Encode(a)
        for a in ([python_executable, local_predict.__file__] + predict_args)
    ]
    proc = subprocess.Popen(args,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            env=env)

    # Pass the instances to the process that actually runs local prediction.
    for instance in instances:
        proc.stdin.write((json.dumps(instance) + '\n').encode('utf-8'))
    proc.stdin.flush()

    # Get the results for the local prediction.
    output, err = proc.communicate()
    if proc.returncode != 0:
        raise LocalPredictRuntimeError(err)
    if err:
        log.warning(err)

    try:
        return json.loads(output)
    except ValueError:
        raise InvalidReturnValueError('The output for prediction is not '
                                      'in JSON format: ' + output)
Beispiel #9
0
def ExecWithStreamingOutput(args,
                            env=None,
                            no_exit=False,
                            out_func=None,
                            err_func=None,
                            in_str=None,
                            **extra_popen_kwargs):
  """Emulates the os.exec* set of commands, but uses subprocess.

  This executes the given command, waits for it to finish, and then exits this
  process with the exit code of the child process. Allows realtime processing of
  stderr and stdout from subprocess using threads.

  Args:
    args: [str], The arguments to execute.  The first argument is the command.
    env: {str: str}, An optional environment for the child process.
    no_exit: bool, True to just return the exit code of the child instead of
      exiting.
    out_func: str->None, a function to call with each line of the stdout of the
      executed process. This can be e.g. log.file_only_logger.debug or
      log.out.write.
    err_func: str->None, a function to call with each line of the stderr of
      the executed process. This can be e.g. log.file_only_logger.debug or
      log.err.write.
    in_str: bytes or str, input to send to the subprocess' stdin.
    **extra_popen_kwargs: Any additional kwargs will be passed through directly
      to subprocess.Popen

  Returns:
    int, The exit code of the child if no_exit is True, else this method does
    not return.

  Raises:
    PermissionError: if user does not have execute permission for cloud sdk bin
    files.
    InvalidCommandError: if the command entered cannot be found.
  """
  log.debug('Executing command: %s', args)
  # We use subprocess instead of execv because windows does not support process
  # replacement.  The result of execv on windows is that a new processes is
  # started and the original is killed.  When running in a shell, the prompt
  # returns as soon as the parent is killed even though the child is still
  # running.  subprocess waits for the new process to finish before returning.
  env = _GetToolEnv(env=env)
  process_holder = _ProcessHolder()
  with _ReplaceSignal(signal.SIGTERM, process_holder.Handler):
    with _ReplaceSignal(signal.SIGINT, process_holder.Handler):
      out_handler_func = out_func or log.Print
      err_handler_func = err_func or log.status.Print
      if in_str:
        extra_popen_kwargs['stdin'] = subprocess.PIPE
      try:
        if args and isinstance(args, list):
          # On Python 2.x on Windows, the first arg can't be unicode. We encode
          # encode it anyway because there is really nothing else we can do if
          # that happens.
          # https://bugs.python.org/issue19264
          args = [encoding.Encode(a) for a in args]
        p = subprocess.Popen(args, env=env, stderr=subprocess.PIPE,
                             stdout=subprocess.PIPE, **extra_popen_kwargs)

        if in_str:
          in_str = six.text_type(in_str).encode('utf-8')
          try:
            p.stdin.write(in_str)
            p.stdin.close()
          except OSError as exc:
            if (exc.errno == errno.EPIPE or
                exc.errno == errno.EINVAL):
              pass  # Obey same conventions as subprocess.communicate()
            else:
              _KillProcIfRunning(p)
              raise OutputStreamProcessingException(exc)

        try:
          with parallel.GetPool(2) as pool:
            std_out_future = pool.ApplyAsync(_ProcessStreamHandler,
                                             (p, False, out_handler_func))
            std_err_future = pool.ApplyAsync(_ProcessStreamHandler,
                                             (p, True, err_handler_func))
            std_out_future.Get()
            std_err_future.Get()
        except Exception as e:
          _KillProcIfRunning(p)
          raise  OutputStreamProcessingException(e)

      except OSError as err:
        if err.errno == errno.EACCES:
          raise PermissionError(err.strerror)
        elif err.errno == errno.ENOENT:
          raise InvalidCommandError(args[0])
        raise
      process_holder.process = p

      if process_holder.signum is not None:
        # This covers the small possibility that process_holder handled a
        # signal when the process was starting but not yet set to
        # process_holder.process.
        _KillProcIfRunning(p)

      ret_val = p.returncode

  if no_exit and process_holder.signum is None:
    return ret_val
  sys.exit(ret_val)