コード例 #1
0
def _DoParallelOperation(num_threads, tasks, method, label, show_progress_bar):
    """Perform the given storage operation in parallel.

  Factors out common work: logging, setting up parallelism, managing a progress
  bar (if necessary).

  Args:
    num_threads: int, the number of threads to use
    tasks: list of arguments to be passed to method, one at a time (each zipped
      up in a tuple with a callback)
    method: a function that takes in a single-argument: a tuple of a task to do
      and a zero-argument callback to be done on completion of the task.
    label: str, the label for the progress bar (if used).
    show_progress_bar: bool, whether to show a progress bar during the
      operation.
  """
    log.debug(label)
    log.debug('Using [%d] threads', num_threads)

    pool = parallel.GetPool(num_threads)
    if show_progress_bar:
        progress_bar = console_io.TickableProgressBar(len(tasks), label)
        callback = progress_bar.Tick
    else:
        progress_bar = console_io.NoOpProgressBar()
        callback = None
    with progress_bar, pool:
        pool.Map(method, list(zip(tasks, itertools.cycle((callback, )))))
コード例 #2
0
def ExecuteTasks(tasks,
                 num_threads=DEFAULT_NUM_THREADS,
                 progress_bar_label=None):
    """Perform the given storage tasks in parallel.

  Factors out common work: logging, setting up parallelism, managing a progress
  bar (if necessary).

  Args:
    tasks: [Operation], To be executed in parallel.
    num_threads: int, The number of threads to use
    progress_bar_label: str, If set, a progress bar will be shown with this
      label. Otherwise, no progress bar is displayed.
  """
    log.debug(progress_bar_label)
    log.debug('Using [%d] threads', num_threads)

    pool = parallel.GetPool(num_threads)
    if progress_bar_label:
        progress_bar = console_io.TickableProgressBar(len(tasks),
                                                      progress_bar_label)
        callback = progress_bar.Tick
    else:
        progress_bar = console_io.NoOpProgressBar()
        callback = None

    if num_threads == 0:
        with progress_bar:
            for t in tasks:
                t.Execute(callback)
    else:
        with progress_bar, pool:
            pool.Map(lambda task: task.Execute(callback), tasks)
コード例 #3
0
def DirDiff(old_dir, new_dir, diff):
    """Calls diff.AddChange(op, file) on files that changed from old_dir new_dir.

  diff.AddChange() can construct the {'add', 'delete', 'edit'} file operations
  that convert old_dir to match new_dir. Directory differences are ignored.

  Args:
    old_dir: The old directory path name.
    new_dir: The new directory path name.
    diff: A DiffAccumulator instance.

  Returns:
    The return value of the first diff.AddChange() call that returns non-zero
    or None if all diff.AddChange() calls returned zero.
  """
    with TimeIt('GetDirFilesRecursive new files'):
        new_files = GetDirFilesRecursive(new_dir)
    with TimeIt('GetDirFilesRecursive old files'):
        old_files = GetDirFilesRecursive(old_dir)

    def _FileDiff(file):
        """Diffs a file in new_dir and old_dir."""
        new_contents, new_binary = GetFileContents(os.path.join(new_dir, file))
        if not new_binary:
            diff.Validate(file, new_contents)

        if file in old_files:
            old_contents, old_binary = GetFileContents(
                os.path.join(old_dir, file))
            if old_binary == new_binary and old_contents == new_contents:
                return
            return 'edit', file, old_contents, new_contents
        else:
            return 'add', file, None, new_contents

    with parallel.GetPool(16) as pool:
        results = []
        for file in new_files:
            if diff.Ignore(file):
                continue
            result = pool.ApplyAsync(_FileDiff, (file, ))
            results.append(result)

        for result_future in results:
            result = result_future.Get()
            if result:
                op, file, old_contents, new_contents = result
                prune = diff.AddChange(op, file, old_contents, new_contents)
                if prune:
                    return prune

    for file in old_files:
        if diff.Ignore(file):
            continue
        if file not in new_files:
            prune = diff.AddChange('delete', file)
            if prune:
                return prune
    return None
コード例 #4
0
ファイル: util.py プロジェクト: PinTrees/novelhub
def ListRepositories(args):
    """Lists repositories in a given project.

  If no location value is specified, list repositories across all locations.

  Args:
    args: User input arguments.

  Returns:
    List of repositories.
  """
    project = GetProject(args)
    location = args.location or properties.VALUES.artifacts.location.Get()
    location_list = ar_requests.ListLocations(project)
    if location and location.lower(
    ) not in location_list and location != "all":
        raise ar_exceptions.UnsupportedLocationError(
            "{} is not a valid location. Valid locations are [{}].".format(
                location, ", ".join(location_list)))

    loc_paths = []
    if location and location != "all":
        log.status.Print(
            "Listing items under project {}, location {}.\n".format(
                project, location))
        loc_paths.append("projects/{}/locations/{}".format(project, location))
        buckets = [_GCR_BUCKETS[location]] if location in _GCR_BUCKETS else []
    else:
        log.status.Print(
            "Listing items under project {}, across all locations.\n".format(
                project))
        loc_paths.extend([
            "projects/{}/locations/{}".format(project, loc)
            for loc in location_list
        ])
        buckets = _GCR_BUCKETS.values()

    pool_size = len(loc_paths) if loc_paths else 1
    pool = parallel.GetPool(pool_size)
    page_size = args.page_size
    try:
        pool.Start()
        results = pool.Map(
            lambda x: ar_requests.ListRepositories(x, page_size=page_size),
            loc_paths)
    except parallel.MultiError as e:
        error_set = set(err.content for err in e.errors)
        msg = "\n".join(error_set)
        raise ar_exceptions.ArtifactRegistryError(msg)
    finally:
        pool.Join()

    repos = []
    for sublist in results:
        repos.extend([repo for repo in sublist])
    repos.sort(key=lambda x: x.name.split("/")[-1])

    return repos, buckets, project
コード例 #5
0
def _UploadFilesProcesses(files_to_upload, bucket_ref):
    """Uploads files to App Engine Cloud Storage bucket using processes.

  Args:
    files_to_upload: dict {str: str}, map of checksum to local path
    bucket_ref: storage_api.BucketReference, the reference to the bucket files
      will be placed in.

  Raises:
    MultiError: if one or more errors occurred during file upload.
  """
    tasks = []
    # Have to sort files because the test framework requires a known order for
    # mocked API calls.
    for sha1_hash, path in sorted(files_to_upload.iteritems()):
        tasks.append(FileUploadTask(sha1_hash, path, bucket_ref.ToBucketUrl()))

    num_procs = properties.VALUES.app.num_file_upload_processes.GetInt()
    threads_per_proc = properties.VALUES.app.num_file_upload_threads.GetInt()
    if (platforms.OperatingSystem.Current() is platforms.OperatingSystem.MACOSX
            and platform.mac_ver()[0].startswith('10.12')
        ):  # Sierra is version 10.12
        # OS X Sierra has issues with spawning processes in this manner
        if num_procs == 1:
            # num_procs set explicitly to 1 indicates that a user tried to turn off
            # parallelism, so we respect that.
            threads_per_proc = 1
        # Note: OS X (especially Sierra) has issues with multi-process file upload
        # as we had it implemented, so we just *ignore* the number of processes
        # requested and just use threads.
        # This is slightly confusing, but when we resolve the TODO in the below
        # branch of the if statement, this should get fixed.
        threads_per_proc = threads_per_proc or _DEFAULT_NUM_THREADS
        with parallel.GetPool(1, threads_per_proc) as pool:
            results = pool.Map(_UploadFile, tasks)
    elif num_procs > 1:
        # TODO(b/32001924) switch all parallelism to use core.util.parallel
        pool = multiprocessing.Pool(num_procs)
        results = pool.map(_UploadFile, tasks)
        errors = filter(bool, results)
        pool.close()
        pool.join()
        if errors:
            raise MultiError('during file upload', errors)
    else:
        for task in tasks:
            error = _UploadFile(task)
            if error:
                raise MultiError('during file upload', [error])
コード例 #6
0
def UploadFiles(files_to_upload, num_threads=DEFAULT_NUM_THREADS):
  """Upload the given files to the given Cloud Storage URLs.

  Uses the appropriate parallelism (multi-process, multi-thread, both, or
  synchronous).

  Args:
    files_to_upload: list of FileUploadTask
    num_threads: int (optional), the number of threads to use.
  """
  log.debug(u'Uploading:\n' + u'\n'.join(map(str, files_to_upload)))
  log.debug(u'Using [%d] threads', num_threads)

  with parallel.GetPool(num_threads) as pool:
    pool.Map(_UploadFile, files_to_upload)
コード例 #7
0
def DeleteObjects(objects_to_delete, num_threads=DEFAULT_NUM_THREADS):
  """Delete the given Cloud Storage objects.

  Uses the appropriate parallelism (multi-process, multi-thread, both, or
  synchronous).

  Args:
    objects_to_delete: list of ObjectDeleteTask
    num_threads: int (optional), the number of threads to use.
  """
  log.debug(u'Deleting:\n' + u'\n'.join(map(str, objects_to_delete)))
  log.debug(u'Using [%d] threads', num_threads)

  with parallel.GetPool(num_threads) as pool:
    pool.Map(_DeleteObject, objects_to_delete)
コード例 #8
0
def UploadFiles(files_to_upload,
                num_processes=DEFAULT_NUM_PROCESSES,
                threads_per_process=DEFAULT_NUM_THREADS):
    """Upload the given files to the given Cloud Storage URLs.

  Uses the appropriate parallelism (multi-process, multi-thread, both, or
  synchronous).

  Args:
    files_to_upload: list of FileUploadTask
    num_processes: int or None, the number of processes to use (None for 1
        process per core.
    threads_per_process: int, the number of threads to use per process.
  """
    log.debug(u'Uploading:\n' + u'\n'.join(map(str, files_to_upload)))
    log.debug(u'Using [%d] processes, [%d] threads per process', num_processes,
              threads_per_process)
    with parallel.GetPool(num_processes, threads_per_process) as pool:
        pool.Map(_UploadFile, files_to_upload)
コード例 #9
0
def DeleteObjects(objects_to_delete,
                  num_processes=DEFAULT_NUM_PROCESSES,
                  threads_per_process=DEFAULT_NUM_THREADS):
    """Delete the given Cloud Storage objects.

  Uses the appropriate parallelism (multi-process, multi-thread, both, or
  synchronous).

  Args:
    objects_to_delete: list of ObjectDeleteTask
    num_processes: int or None, the number of processes to use (None for 1
        process per core.
    threads_per_process: int, the number of threads to use per process.
  """
    log.debug(u'Deleting:\n' + u'\n'.join(map(str, objects_to_delete)))
    log.debug(u'Using [%d] processes, [%d] threads per process', num_processes,
              threads_per_process)
    with parallel.GetPool(num_processes, threads_per_process) as pool:
        pool.Map(_DeleteObject, objects_to_delete)
コード例 #10
0
def ExecWithStreamingOutput(args,
                            env=None,
                            no_exit=False,
                            out_func=None,
                            err_func=None,
                            in_str=None,
                            **extra_popen_kwargs):
  """Emulates the os.exec* set of commands, but uses subprocess.

  This executes the given command, waits for it to finish, and then exits this
  process with the exit code of the child process. Allows realtime processing of
  stderr and stdout from subprocess using threads.

  Args:
    args: [str], The arguments to execute.  The first argument is the command.
    env: {str: str}, An optional environment for the child process.
    no_exit: bool, True to just return the exit code of the child instead of
      exiting.
    out_func: str->None, a function to call with each line of the stdout of the
      executed process. This can be e.g. log.file_only_logger.debug or
      log.out.write.
    err_func: str->None, a function to call with each line of the stderr of
      the executed process. This can be e.g. log.file_only_logger.debug or
      log.err.write.
    in_str: bytes or str, input to send to the subprocess' stdin.
    **extra_popen_kwargs: Any additional kwargs will be passed through directly
      to subprocess.Popen

  Returns:
    int, The exit code of the child if no_exit is True, else this method does
    not return.

  Raises:
    PermissionError: if user does not have execute permission for cloud sdk bin
    files.
    InvalidCommandError: if the command entered cannot be found.
  """
  log.debug('Executing command: %s', args)
  # We use subprocess instead of execv because windows does not support process
  # replacement.  The result of execv on windows is that a new processes is
  # started and the original is killed.  When running in a shell, the prompt
  # returns as soon as the parent is killed even though the child is still
  # running.  subprocess waits for the new process to finish before returning.
  env = _GetToolEnv(env=env)
  process_holder = _ProcessHolder()
  with _ReplaceSignal(signal.SIGTERM, process_holder.Handler):
    with _ReplaceSignal(signal.SIGINT, process_holder.Handler):
      out_handler_func = out_func or log.Print
      err_handler_func = err_func or log.status.Print
      if in_str:
        extra_popen_kwargs['stdin'] = subprocess.PIPE
      try:
        if args and isinstance(args, list):
          # On Python 2.x on Windows, the first arg can't be unicode. We encode
          # encode it anyway because there is really nothing else we can do if
          # that happens.
          # https://bugs.python.org/issue19264
          args = [encoding.Encode(a) for a in args]
        p = subprocess.Popen(args, env=env, stderr=subprocess.PIPE,
                             stdout=subprocess.PIPE, **extra_popen_kwargs)

        if in_str:
          in_str = six.text_type(in_str).encode('utf-8')
          try:
            p.stdin.write(in_str)
            p.stdin.close()
          except OSError as exc:
            if (exc.errno == errno.EPIPE or
                exc.errno == errno.EINVAL):
              pass  # Obey same conventions as subprocess.communicate()
            else:
              _KillProcIfRunning(p)
              raise OutputStreamProcessingException(exc)

        try:
          with parallel.GetPool(2) as pool:
            std_out_future = pool.ApplyAsync(_ProcessStreamHandler,
                                             (p, False, out_handler_func))
            std_err_future = pool.ApplyAsync(_ProcessStreamHandler,
                                             (p, True, err_handler_func))
            std_out_future.Get()
            std_err_future.Get()
        except Exception as e:
          _KillProcIfRunning(p)
          raise  OutputStreamProcessingException(e)

      except OSError as err:
        if err.errno == errno.EACCES:
          raise PermissionError(err.strerror)
        elif err.errno == errno.ENOENT:
          raise InvalidCommandError(args[0])
        raise
      process_holder.process = p

      if process_holder.signum is not None:
        # This covers the small possibility that process_holder handled a
        # signal when the process was starting but not yet set to
        # process_holder.process.
        _KillProcIfRunning(p)

      ret_val = p.returncode

  if no_exit and process_holder.signum is None:
    return ret_val
  sys.exit(ret_val)