def _DoParallelOperation(num_threads, tasks, method, label, show_progress_bar): """Perform the given storage operation in parallel. Factors out common work: logging, setting up parallelism, managing a progress bar (if necessary). Args: num_threads: int, the number of threads to use tasks: list of arguments to be passed to method, one at a time (each zipped up in a tuple with a callback) method: a function that takes in a single-argument: a tuple of a task to do and a zero-argument callback to be done on completion of the task. label: str, the label for the progress bar (if used). show_progress_bar: bool, whether to show a progress bar during the operation. """ log.debug(label) log.debug('Using [%d] threads', num_threads) pool = parallel.GetPool(num_threads) if show_progress_bar: progress_bar = console_io.TickableProgressBar(len(tasks), label) callback = progress_bar.Tick else: progress_bar = console_io.NoOpProgressBar() callback = None with progress_bar, pool: pool.Map(method, list(zip(tasks, itertools.cycle((callback, )))))
def ExecuteTasks(tasks, num_threads=DEFAULT_NUM_THREADS, progress_bar_label=None): """Perform the given storage tasks in parallel. Factors out common work: logging, setting up parallelism, managing a progress bar (if necessary). Args: tasks: [Operation], To be executed in parallel. num_threads: int, The number of threads to use progress_bar_label: str, If set, a progress bar will be shown with this label. Otherwise, no progress bar is displayed. """ log.debug(progress_bar_label) log.debug('Using [%d] threads', num_threads) pool = parallel.GetPool(num_threads) if progress_bar_label: progress_bar = console_io.TickableProgressBar(len(tasks), progress_bar_label) callback = progress_bar.Tick else: progress_bar = console_io.NoOpProgressBar() callback = None if num_threads == 0: with progress_bar: for t in tasks: t.Execute(callback) else: with progress_bar, pool: pool.Map(lambda task: task.Execute(callback), tasks)
def DirDiff(old_dir, new_dir, diff): """Calls diff.AddChange(op, file) on files that changed from old_dir new_dir. diff.AddChange() can construct the {'add', 'delete', 'edit'} file operations that convert old_dir to match new_dir. Directory differences are ignored. Args: old_dir: The old directory path name. new_dir: The new directory path name. diff: A DiffAccumulator instance. Returns: The return value of the first diff.AddChange() call that returns non-zero or None if all diff.AddChange() calls returned zero. """ with TimeIt('GetDirFilesRecursive new files'): new_files = GetDirFilesRecursive(new_dir) with TimeIt('GetDirFilesRecursive old files'): old_files = GetDirFilesRecursive(old_dir) def _FileDiff(file): """Diffs a file in new_dir and old_dir.""" new_contents, new_binary = GetFileContents(os.path.join(new_dir, file)) if not new_binary: diff.Validate(file, new_contents) if file in old_files: old_contents, old_binary = GetFileContents( os.path.join(old_dir, file)) if old_binary == new_binary and old_contents == new_contents: return return 'edit', file, old_contents, new_contents else: return 'add', file, None, new_contents with parallel.GetPool(16) as pool: results = [] for file in new_files: if diff.Ignore(file): continue result = pool.ApplyAsync(_FileDiff, (file, )) results.append(result) for result_future in results: result = result_future.Get() if result: op, file, old_contents, new_contents = result prune = diff.AddChange(op, file, old_contents, new_contents) if prune: return prune for file in old_files: if diff.Ignore(file): continue if file not in new_files: prune = diff.AddChange('delete', file) if prune: return prune return None
def ListRepositories(args): """Lists repositories in a given project. If no location value is specified, list repositories across all locations. Args: args: User input arguments. Returns: List of repositories. """ project = GetProject(args) location = args.location or properties.VALUES.artifacts.location.Get() location_list = ar_requests.ListLocations(project) if location and location.lower( ) not in location_list and location != "all": raise ar_exceptions.UnsupportedLocationError( "{} is not a valid location. Valid locations are [{}].".format( location, ", ".join(location_list))) loc_paths = [] if location and location != "all": log.status.Print( "Listing items under project {}, location {}.\n".format( project, location)) loc_paths.append("projects/{}/locations/{}".format(project, location)) buckets = [_GCR_BUCKETS[location]] if location in _GCR_BUCKETS else [] else: log.status.Print( "Listing items under project {}, across all locations.\n".format( project)) loc_paths.extend([ "projects/{}/locations/{}".format(project, loc) for loc in location_list ]) buckets = _GCR_BUCKETS.values() pool_size = len(loc_paths) if loc_paths else 1 pool = parallel.GetPool(pool_size) page_size = args.page_size try: pool.Start() results = pool.Map( lambda x: ar_requests.ListRepositories(x, page_size=page_size), loc_paths) except parallel.MultiError as e: error_set = set(err.content for err in e.errors) msg = "\n".join(error_set) raise ar_exceptions.ArtifactRegistryError(msg) finally: pool.Join() repos = [] for sublist in results: repos.extend([repo for repo in sublist]) repos.sort(key=lambda x: x.name.split("/")[-1]) return repos, buckets, project
def _UploadFilesProcesses(files_to_upload, bucket_ref): """Uploads files to App Engine Cloud Storage bucket using processes. Args: files_to_upload: dict {str: str}, map of checksum to local path bucket_ref: storage_api.BucketReference, the reference to the bucket files will be placed in. Raises: MultiError: if one or more errors occurred during file upload. """ tasks = [] # Have to sort files because the test framework requires a known order for # mocked API calls. for sha1_hash, path in sorted(files_to_upload.iteritems()): tasks.append(FileUploadTask(sha1_hash, path, bucket_ref.ToBucketUrl())) num_procs = properties.VALUES.app.num_file_upload_processes.GetInt() threads_per_proc = properties.VALUES.app.num_file_upload_threads.GetInt() if (platforms.OperatingSystem.Current() is platforms.OperatingSystem.MACOSX and platform.mac_ver()[0].startswith('10.12') ): # Sierra is version 10.12 # OS X Sierra has issues with spawning processes in this manner if num_procs == 1: # num_procs set explicitly to 1 indicates that a user tried to turn off # parallelism, so we respect that. threads_per_proc = 1 # Note: OS X (especially Sierra) has issues with multi-process file upload # as we had it implemented, so we just *ignore* the number of processes # requested and just use threads. # This is slightly confusing, but when we resolve the TODO in the below # branch of the if statement, this should get fixed. threads_per_proc = threads_per_proc or _DEFAULT_NUM_THREADS with parallel.GetPool(1, threads_per_proc) as pool: results = pool.Map(_UploadFile, tasks) elif num_procs > 1: # TODO(b/32001924) switch all parallelism to use core.util.parallel pool = multiprocessing.Pool(num_procs) results = pool.map(_UploadFile, tasks) errors = filter(bool, results) pool.close() pool.join() if errors: raise MultiError('during file upload', errors) else: for task in tasks: error = _UploadFile(task) if error: raise MultiError('during file upload', [error])
def UploadFiles(files_to_upload, num_threads=DEFAULT_NUM_THREADS): """Upload the given files to the given Cloud Storage URLs. Uses the appropriate parallelism (multi-process, multi-thread, both, or synchronous). Args: files_to_upload: list of FileUploadTask num_threads: int (optional), the number of threads to use. """ log.debug(u'Uploading:\n' + u'\n'.join(map(str, files_to_upload))) log.debug(u'Using [%d] threads', num_threads) with parallel.GetPool(num_threads) as pool: pool.Map(_UploadFile, files_to_upload)
def DeleteObjects(objects_to_delete, num_threads=DEFAULT_NUM_THREADS): """Delete the given Cloud Storage objects. Uses the appropriate parallelism (multi-process, multi-thread, both, or synchronous). Args: objects_to_delete: list of ObjectDeleteTask num_threads: int (optional), the number of threads to use. """ log.debug(u'Deleting:\n' + u'\n'.join(map(str, objects_to_delete))) log.debug(u'Using [%d] threads', num_threads) with parallel.GetPool(num_threads) as pool: pool.Map(_DeleteObject, objects_to_delete)
def UploadFiles(files_to_upload, num_processes=DEFAULT_NUM_PROCESSES, threads_per_process=DEFAULT_NUM_THREADS): """Upload the given files to the given Cloud Storage URLs. Uses the appropriate parallelism (multi-process, multi-thread, both, or synchronous). Args: files_to_upload: list of FileUploadTask num_processes: int or None, the number of processes to use (None for 1 process per core. threads_per_process: int, the number of threads to use per process. """ log.debug(u'Uploading:\n' + u'\n'.join(map(str, files_to_upload))) log.debug(u'Using [%d] processes, [%d] threads per process', num_processes, threads_per_process) with parallel.GetPool(num_processes, threads_per_process) as pool: pool.Map(_UploadFile, files_to_upload)
def DeleteObjects(objects_to_delete, num_processes=DEFAULT_NUM_PROCESSES, threads_per_process=DEFAULT_NUM_THREADS): """Delete the given Cloud Storage objects. Uses the appropriate parallelism (multi-process, multi-thread, both, or synchronous). Args: objects_to_delete: list of ObjectDeleteTask num_processes: int or None, the number of processes to use (None for 1 process per core. threads_per_process: int, the number of threads to use per process. """ log.debug(u'Deleting:\n' + u'\n'.join(map(str, objects_to_delete))) log.debug(u'Using [%d] processes, [%d] threads per process', num_processes, threads_per_process) with parallel.GetPool(num_processes, threads_per_process) as pool: pool.Map(_DeleteObject, objects_to_delete)
def ExecWithStreamingOutput(args, env=None, no_exit=False, out_func=None, err_func=None, in_str=None, **extra_popen_kwargs): """Emulates the os.exec* set of commands, but uses subprocess. This executes the given command, waits for it to finish, and then exits this process with the exit code of the child process. Allows realtime processing of stderr and stdout from subprocess using threads. Args: args: [str], The arguments to execute. The first argument is the command. env: {str: str}, An optional environment for the child process. no_exit: bool, True to just return the exit code of the child instead of exiting. out_func: str->None, a function to call with each line of the stdout of the executed process. This can be e.g. log.file_only_logger.debug or log.out.write. err_func: str->None, a function to call with each line of the stderr of the executed process. This can be e.g. log.file_only_logger.debug or log.err.write. in_str: bytes or str, input to send to the subprocess' stdin. **extra_popen_kwargs: Any additional kwargs will be passed through directly to subprocess.Popen Returns: int, The exit code of the child if no_exit is True, else this method does not return. Raises: PermissionError: if user does not have execute permission for cloud sdk bin files. InvalidCommandError: if the command entered cannot be found. """ log.debug('Executing command: %s', args) # We use subprocess instead of execv because windows does not support process # replacement. The result of execv on windows is that a new processes is # started and the original is killed. When running in a shell, the prompt # returns as soon as the parent is killed even though the child is still # running. subprocess waits for the new process to finish before returning. env = _GetToolEnv(env=env) process_holder = _ProcessHolder() with _ReplaceSignal(signal.SIGTERM, process_holder.Handler): with _ReplaceSignal(signal.SIGINT, process_holder.Handler): out_handler_func = out_func or log.Print err_handler_func = err_func or log.status.Print if in_str: extra_popen_kwargs['stdin'] = subprocess.PIPE try: if args and isinstance(args, list): # On Python 2.x on Windows, the first arg can't be unicode. We encode # encode it anyway because there is really nothing else we can do if # that happens. # https://bugs.python.org/issue19264 args = [encoding.Encode(a) for a in args] p = subprocess.Popen(args, env=env, stderr=subprocess.PIPE, stdout=subprocess.PIPE, **extra_popen_kwargs) if in_str: in_str = six.text_type(in_str).encode('utf-8') try: p.stdin.write(in_str) p.stdin.close() except OSError as exc: if (exc.errno == errno.EPIPE or exc.errno == errno.EINVAL): pass # Obey same conventions as subprocess.communicate() else: _KillProcIfRunning(p) raise OutputStreamProcessingException(exc) try: with parallel.GetPool(2) as pool: std_out_future = pool.ApplyAsync(_ProcessStreamHandler, (p, False, out_handler_func)) std_err_future = pool.ApplyAsync(_ProcessStreamHandler, (p, True, err_handler_func)) std_out_future.Get() std_err_future.Get() except Exception as e: _KillProcIfRunning(p) raise OutputStreamProcessingException(e) except OSError as err: if err.errno == errno.EACCES: raise PermissionError(err.strerror) elif err.errno == errno.ENOENT: raise InvalidCommandError(args[0]) raise process_holder.process = p if process_holder.signum is not None: # This covers the small possibility that process_holder handled a # signal when the process was starting but not yet set to # process_holder.process. _KillProcIfRunning(p) ret_val = p.returncode if no_exit and process_holder.signum is None: return ret_val sys.exit(ret_val)