def run_atomic(self, atomic_execution_id: str, composition_path: [(str, Any)]): status = self.get_status(atomic_execution_id) string_prefix = create_path_string_prefix(composition_path) log_info(f'{string_prefix}Status:') print('Running:', status.running) print('Success:', status.success) print('Exit Status:', status.code)
def run(self): try: self.controller.delete_execution( execution_id=self.get_execution_id(), fail_if_running=False, fail_if_deleted=True) except ExecutionAlreadyHarvestedException: log_info('Process already stopped') return log_info('Stopped')
def run_atomic(self, atomic_execution_id: str, composition_path: [(str, Any)]): string_prefix = create_path_string_prefix(composition_path) if len(string_prefix) > 0: message_suffix = f' for {string_prefix[:-1]}' else: message_suffix = '' log_info(f'Harvesting the output{message_suffix}...') self.harvest(atomic_execution_id, composition_path) log_info(f'Retrieving the output{message_suffix}...') self.retrieve_output(atomic_execution_id, composition_path)
def publish(self) -> Optional[str]: # Either the user provided an input id, or we asked the controller # previously and found the data is there with this input Id if self.input_id is not None: return self.input_id input_id = self._compute_input_id() if not self._has_input(input_id): log_info(f'{os.path.getsize(self.tarball.name)} input bytes to ' 'upload') self._put_tarball(input_id) return input_id
def run(self): user = self.configuration.user if self.all_of_them_plz: if self.instance_ids is not None: raise CLIException('Can\'t specify both a list of instances ' 'and --all-of-them-plz') user_in_message = 'all users' if self.ignore_ownership else user log_warning( f'Killing all instances running jobs of {user_in_message} ' 'for all projects') if not self.oh_yeah: answer = input('Are you sure? (yeah/Nope): ') if answer != 'yeah': raise CLIException('Cancelled by user') else: if self.instance_ids is None or len(self.instance_ids) == 0: raise CLIException( 'You must specify a list of instance IDs with the -i ' 'option. Use `plz list` to get instance IDs') if self.including_idle: raise CLIException( 'Option --including-idle only makes sense together with ' '--all-of-them-plz') # The way the API likes it in this case self.including_idle = None log_info('Killing instances: ' + ' '.join(self.instance_ids)) if not self.all_of_them_plz and not self.instance_ids: raise CLIException('No instance IDs specified') try: were_there_instances_to_kill = self.controller.kill_instances( instance_ids=self.instance_ids, force_if_not_idle=self.force_if_not_idle, ignore_ownership=self.ignore_ownership, including_idle=self.including_idle, user=user) except ProviderKillingInstancesException as e: fails = e.failed_instance_ids_to_messages log_error('Error terminating instances: \n' + ''.join([ f'{instance_id}: {message}\n' for instance_id, message in fails.items() ])) raise CLIException( 'Couldn\'t terminate all instances. You can use ' '--force-if-not-idle for non-idle instances') if not were_there_instances_to_kill: log_warning( 'Request to kill all instances, yet no instances were found.') if not self.including_idle: log_warning('Maybe you forgot --including-idle ?') log_info('It was a clean job')
def suboperation(self, name: str, f: Callable[..., Any], if_set: bool = True): if not if_set: return log_info(name) start_time = time.time() result = f() end_time = time.time() time_taken = end_time - start_time if self.configuration.debug: log_debug('Time taken: %.2fs' % time_taken) return result
def harvest(self): try: self.controller.delete_execution( execution_id=self.get_execution_id(), fail_if_running=True, fail_if_deleted=False) except InstanceStillRunningException: if self.force_if_running: log_info('Process is still running') return else: raise CLIException( 'Process is still running, run `plz stop` if you want to ' 'terminate it, \nor use --force-if-running (discouraged)')
def run(self): composition = self.controller.get_execution_composition( self.get_execution_id()) atomic_executions = get_all_atomic(composition) for e in atomic_executions: if len(atomic_executions) > 0: message_prefix = e + '#' else: message_prefix = '' try: self.controller.delete_execution(execution_id=e, fail_if_running=False, fail_if_deleted=True) except ExecutionAlreadyHarvestedException: log_info(message_prefix + 'Process already stopped') return log_info(message_prefix + 'Stopped')
def harvest(self, atomic_execution_id: Optional[str] = None, composition_path: Optional[List[Tuple[str, Any]]] = None): if atomic_execution_id is None: atomic_execution_id = self.get_execution_id() if composition_path is None: composition_path = [] try: self.controller.delete_execution(execution_id=atomic_execution_id, fail_if_running=True, fail_if_deleted=False) except InstanceStillRunningException: if self.force_if_running or len(composition_path) > 0: log_info('Process is still running') return else: raise CLIException( 'Process is still running, run `plz stop` if you want to ' 'terminate it, \nor use --force-if-running (discouraged)')
def retrieve_output(self): execution_id = self.get_execution_id() output_tarball_bytes = self.controller.get_output_files( self.get_execution_id(), path=self.path) formatted_output_dir = self.output_dir.replace('%e', execution_id) formatted_output_dir = os.path.join( formatted_output_dir, self.path if self.path is not None else '') try: os.makedirs(formatted_output_dir) except FileExistsError: if self.force_if_running: log_info('Removing existing output directory') shutil.rmtree(formatted_output_dir) os.makedirs(formatted_output_dir) else: raise CLIException( f'The output directory "{formatted_output_dir}" ' 'already exists.') for path in untar(output_tarball_bytes, formatted_output_dir): print(path)
def run(self): response_dict = self.controller.ping(self.ping_timeout) if response_dict.get('plz', None) == 'pong': if not self.silent_on_success: log_info('Backend is reachable') controller_build_timestamp = response_dict['build_timestamp'] if controller_build_timestamp != self.build_timestamp: log_error( 'Version mismatch! The controller is running version\n' f'{controller_build_timestamp}\n' 'while the cli is running\n' f'{self.build_timestamp}\n' 'You can install the controller version with\n' 'pip install https://s3-eu-west-1.amazonaws.com/' f'plz.prodo.ai/plz_cli-0.1.{controller_build_timestamp}' '-py3-none-any.whl') raise ExitWithStatusCodeException(1) else: log_error('Backend is unreachable') raise ExitWithStatusCodeException(1)
def __enter__(self): # Nothing to save in the context, we have an input id in the controller # and just refer to it if self.input_id: return self if self.path is None: raise ValueError('For input data, neither path nor input id were ' 'given') input_metadata = InputMetadata.of( user=self.user, project=self.project, path=self.path, timestamp_millis=self.timestamp_millis) # Try to avoid building the tarball. Look at maximum modification # time in the input, and if we have in input for the timestamp, use # that one input_id = self.controller.get_input_id_or_none(input_metadata) log_debug(f'Input ID from the controller: {input_id}') if input_id: log_info('Input files not changed according to modification times') self.input_id = input_id return self log_debug('Building the tarball!') files = (os.path.join(directory, file) for directory, _, files in os.walk(self.path) for file in files) self.tarball = tempfile.NamedTemporaryFile() with tarfile.open(self.tarball.name, mode='w:bz2') as tar: for file in files: name = os.path.relpath(file, self.path) size = os.stat(file).st_size with open(file, 'rb') as f: tarinfo = tarfile.TarInfo(name=name) tarinfo.size = size tar.addfile(tarinfo, fileobj=f) return self
def run(self): if self.all_of_them_plz: log_warning('Killing all instances for all users and projects') if not self.oh_yeah: answer = input('Are you sure? (yeah/Nope): ') if answer != 'yeah': raise CLIException('Cancelled by user') instance_ids_for_controller = None else: if self.instance_ids is None or len(self.instance_ids) == 0: raise CLIException( 'You must specify a list of instance IDs with the -i ' 'option. Use `plz list` to get instance IDs') log_info('Killing instances: ' + ' '.join(self.instance_ids)) instance_ids_for_controller = self.instance_ids if not self.all_of_them_plz and not self.instance_ids: raise CLIException('No instance IDs specified') try: were_there_instances_to_kill = self.controller.kill_instances( instance_ids=instance_ids_for_controller, force_if_not_idle=self.force_if_not_idle) except ProviderKillingInstancesException as e: fails = e.failed_instance_ids_to_messages log_error('Error terminating instances: \n' + ''.join([ f'{instance_id}: {message}\n' for instance_id, message in fails.items() ])) raise CLIException( 'Couldn\'t terminate all instances. You can use ' '--force-if-not-idle for non-idle instances') if not were_there_instances_to_kill: log_warning( 'Request to kill all instances, yet no instances were found.') log_info('It was a clean job')
def retrieve_output( self, atomic_execution_id: Optional[str] = None, composition_path: Optional[List[Tuple[str, Any]]] = None): if atomic_execution_id is None: atomic_execution_id = self.get_execution_id() if composition_path is None: composition_path = [] if len(composition_path) > 0: index = int(composition_path[-1][1]) else: index = None output_tarball_bytes = self.controller.get_output_files( atomic_execution_id, path=self.path, index=index) formatted_output_dir = \ self.output_dir.replace('%e', self.get_execution_id()) formatted_output_dir = os.path.join( formatted_output_dir, *('-'.join(node) for node in composition_path), self.path if self.path is not None else '') try: os.makedirs(formatted_output_dir) except FileExistsError: if len(composition_path) > 0 and not self.rewrite_subexecutions: log_info('Output directory already present') return if self.force_if_running or len(composition_path) > 0: log_info('Removing existing output directory') shutil.rmtree(formatted_output_dir) os.makedirs(formatted_output_dir) else: raise CLIException( f'The output directory "{formatted_output_dir}" ' 'already exists.') for path in untar(output_tarball_bytes, formatted_output_dir): print(path)
def display_logs(self, execution_id: str, print_interrupt_message=False): log_info('Streaming logs...') # For the since argument, pass an integer to the backend. Or nothing # in case we want to log from the start (so the default is different # in the cli --current time-- and the backend --start time--). That's # the easiest way to code it, as passing a datetime time object to the # backend would require to pass the timezone and doing timezone # calculations in the backend. This way all calculations # timezone-dependent calculations are done in in the cli and the # backend uses whatever timestamp we pass. if self.since is None: # Default: show since the current time since_timestamp = str(int(time.time())) elif self.since == 'start': # Log from the beginning, that's the default for the backend since_timestamp = None else: try: since_timestamp = str(int(self.since)) except ValueError: since_timestamp = str( int( time.mktime( dateutil.parser.parse(self.since).timetuple()))) byte_lines = self.controller.get_logs(self.get_execution_id(), since=since_timestamp) try: for byte_line in byte_lines: print(byte_line.decode('utf-8'), end='', flush=True) except KeyboardInterrupt: print() if print_interrupt_message: log_info('Your program is still running. ' 'To stream the logs, type:\n\n' f' plz logs {execution_id}\n') raise print()
def display_logs(self, execution_id: str, print_interrupt_message=False): log_info('Streaming logs...') since_timestamp = self._compute_since_timestamp() composition = self.controller.get_execution_composition(execution_id) atomic_executions = get_all_atomic(composition) try: if len(atomic_executions) == 1: byte_lines = self.controller.get_logs(self.get_execution_id(), since=since_timestamp) for byte_line in byte_lines: print(byte_line.decode('utf-8'), end='', flush=True) else: self._print_logs_for_composite(atomic_executions, since_timestamp) except KeyboardInterrupt: print() if print_interrupt_message: log_info('Your program is still running. ' 'To stream the logs, type:\n\n' f' plz logs {self.get_execution_id()}\n') raise print()
def follow_execution(self, was_start_ok: bool): log_info(f'Execution ID is: {self.execution_id}') if self.detach: return retrieve_output_operation = RetrieveOutputOperation( self.configuration, output_dir=self.output_dir, execution_id=self.execution_id, force_if_running=False, path=None) cancelled = False try: if not was_start_ok: raise CLIException('The command failed.') logs = LogsOperation(self.configuration, execution_id=self.execution_id, since='start') logs.display_logs(self.execution_id, print_interrupt_message=True) except CLIException as e: e.print(self.configuration) raise ExitWithStatusCodeException(e.exit_code) except KeyboardInterrupt: cancelled = True finally: if not cancelled: self.suboperation('Harvesting the output...', retrieve_output_operation.harvest) if cancelled: return retrieve_measures_operation = RetrieveMeasuresOperation( self.configuration, execution_id=self.execution_id, summary=True) self.suboperation('Retrieving summary of measures (if present)...', retrieve_measures_operation.retrieve_measures) show_status_operation = ShowStatusOperation( self.configuration, execution_id=self.execution_id) status = show_status_operation.get_status() if status.running: raise CLIException( 'Execution has not finished. This should not happen.' ' Please report it.') elif status.success: log_info('Execution succeeded.') self.suboperation('Retrieving the output...', retrieve_output_operation.retrieve_output) log_info('Done and dusted.') return status.code else: raise CLIException( f'Execution failed with an exit status of {status.code}.', exit_code=status.code)
def run(self): log_info('Harvesting the output...') self.harvest() log_info('Retrieving the output...') self.retrieve_output()
def run(self): status = self.get_status() log_info('Status:') print('Running:', status.running) print('Success:', status.success) print('Exit Status:', status.code)