예제 #1
0
 def run_atomic(self, atomic_execution_id: str,
                composition_path: [(str, Any)]):
     status = self.get_status(atomic_execution_id)
     string_prefix = create_path_string_prefix(composition_path)
     log_info(f'{string_prefix}Status:')
     print('Running:', status.running)
     print('Success:', status.success)
     print('Exit Status:', status.code)
예제 #2
0
 def run(self):
     try:
         self.controller.delete_execution(
             execution_id=self.get_execution_id(),
             fail_if_running=False,
             fail_if_deleted=True)
     except ExecutionAlreadyHarvestedException:
         log_info('Process already stopped')
         return
     log_info('Stopped')
예제 #3
0
 def run_atomic(self, atomic_execution_id: str,
                composition_path: [(str, Any)]):
     string_prefix = create_path_string_prefix(composition_path)
     if len(string_prefix) > 0:
         message_suffix = f' for {string_prefix[:-1]}'
     else:
         message_suffix = ''
     log_info(f'Harvesting the output{message_suffix}...')
     self.harvest(atomic_execution_id, composition_path)
     log_info(f'Retrieving the output{message_suffix}...')
     self.retrieve_output(atomic_execution_id, composition_path)
예제 #4
0
    def publish(self) -> Optional[str]:
        # Either the user provided an input id, or we asked the controller
        # previously and found the data is there with this input Id
        if self.input_id is not None:
            return self.input_id

        input_id = self._compute_input_id()
        if not self._has_input(input_id):
            log_info(f'{os.path.getsize(self.tarball.name)} input bytes to '
                     'upload')
            self._put_tarball(input_id)
        return input_id
예제 #5
0
    def run(self):
        user = self.configuration.user
        if self.all_of_them_plz:
            if self.instance_ids is not None:
                raise CLIException('Can\'t specify both a list of instances '
                                   'and --all-of-them-plz')
            user_in_message = 'all users' if self.ignore_ownership else user
            log_warning(
                f'Killing all instances running jobs of {user_in_message} '
                'for all projects')
            if not self.oh_yeah:
                answer = input('Are you sure? (yeah/Nope): ')
                if answer != 'yeah':
                    raise CLIException('Cancelled by user')
        else:
            if self.instance_ids is None or len(self.instance_ids) == 0:
                raise CLIException(
                    'You must specify a list of instance IDs with the -i '
                    'option. Use `plz list` to get instance IDs')
            if self.including_idle:
                raise CLIException(
                    'Option --including-idle only makes sense together with '
                    '--all-of-them-plz')
            # The way the API likes it in this case
            self.including_idle = None
            log_info('Killing instances: ' + ' '.join(self.instance_ids))
        if not self.all_of_them_plz and not self.instance_ids:
            raise CLIException('No instance IDs specified')

        try:
            were_there_instances_to_kill = self.controller.kill_instances(
                instance_ids=self.instance_ids,
                force_if_not_idle=self.force_if_not_idle,
                ignore_ownership=self.ignore_ownership,
                including_idle=self.including_idle,
                user=user)
        except ProviderKillingInstancesException as e:
            fails = e.failed_instance_ids_to_messages
            log_error('Error terminating instances: \n' + ''.join([
                f'{instance_id}: {message}\n'
                for instance_id, message in fails.items()
            ]))
            raise CLIException(
                'Couldn\'t terminate all instances. You can use '
                '--force-if-not-idle for non-idle instances')

        if not were_there_instances_to_kill:
            log_warning(
                'Request to kill all instances, yet no instances were found.')
            if not self.including_idle:
                log_warning('Maybe you forgot --including-idle ?')

        log_info('It was a clean job')
예제 #6
0
 def suboperation(self,
                  name: str,
                  f: Callable[..., Any],
                  if_set: bool = True):
     if not if_set:
         return
     log_info(name)
     start_time = time.time()
     result = f()
     end_time = time.time()
     time_taken = end_time - start_time
     if self.configuration.debug:
         log_debug('Time taken: %.2fs' % time_taken)
     return result
예제 #7
0
 def harvest(self):
     try:
         self.controller.delete_execution(
             execution_id=self.get_execution_id(),
             fail_if_running=True,
             fail_if_deleted=False)
     except InstanceStillRunningException:
         if self.force_if_running:
             log_info('Process is still running')
             return
         else:
             raise CLIException(
                 'Process is still running, run `plz stop` if you want to '
                 'terminate it, \nor use --force-if-running (discouraged)')
예제 #8
0
 def run(self):
     composition = self.controller.get_execution_composition(
         self.get_execution_id())
     atomic_executions = get_all_atomic(composition)
     for e in atomic_executions:
         if len(atomic_executions) > 0:
             message_prefix = e + '#'
         else:
             message_prefix = ''
         try:
             self.controller.delete_execution(execution_id=e,
                                              fail_if_running=False,
                                              fail_if_deleted=True)
         except ExecutionAlreadyHarvestedException:
             log_info(message_prefix + 'Process already stopped')
             return
         log_info(message_prefix + 'Stopped')
예제 #9
0
 def harvest(self,
             atomic_execution_id: Optional[str] = None,
             composition_path: Optional[List[Tuple[str, Any]]] = None):
     if atomic_execution_id is None:
         atomic_execution_id = self.get_execution_id()
     if composition_path is None:
         composition_path = []
     try:
         self.controller.delete_execution(execution_id=atomic_execution_id,
                                          fail_if_running=True,
                                          fail_if_deleted=False)
     except InstanceStillRunningException:
         if self.force_if_running or len(composition_path) > 0:
             log_info('Process is still running')
             return
         else:
             raise CLIException(
                 'Process is still running, run `plz stop` if you want to '
                 'terminate it, \nor use --force-if-running (discouraged)')
예제 #10
0
 def retrieve_output(self):
     execution_id = self.get_execution_id()
     output_tarball_bytes = self.controller.get_output_files(
         self.get_execution_id(), path=self.path)
     formatted_output_dir = self.output_dir.replace('%e', execution_id)
     formatted_output_dir = os.path.join(
         formatted_output_dir, self.path if self.path is not None else '')
     try:
         os.makedirs(formatted_output_dir)
     except FileExistsError:
         if self.force_if_running:
             log_info('Removing existing output directory')
             shutil.rmtree(formatted_output_dir)
             os.makedirs(formatted_output_dir)
         else:
             raise CLIException(
                 f'The output directory "{formatted_output_dir}" '
                 'already exists.')
     for path in untar(output_tarball_bytes, formatted_output_dir):
         print(path)
예제 #11
0
 def run(self):
     response_dict = self.controller.ping(self.ping_timeout)
     if response_dict.get('plz', None) == 'pong':
         if not self.silent_on_success:
             log_info('Backend is reachable')
         controller_build_timestamp = response_dict['build_timestamp']
         if controller_build_timestamp != self.build_timestamp:
             log_error(
                 'Version mismatch! The controller is running version\n'
                 f'{controller_build_timestamp}\n'
                 'while the cli is running\n'
                 f'{self.build_timestamp}\n'
                 'You can install the controller version with\n'
                 'pip install https://s3-eu-west-1.amazonaws.com/'
                 f'plz.prodo.ai/plz_cli-0.1.{controller_build_timestamp}'
                 '-py3-none-any.whl')
             raise ExitWithStatusCodeException(1)
     else:
         log_error('Backend is unreachable')
         raise ExitWithStatusCodeException(1)
예제 #12
0
    def __enter__(self):
        # Nothing to save in the context, we have an input id in the controller
        # and just refer to it
        if self.input_id:
            return self

        if self.path is None:
            raise ValueError('For input data, neither path nor input id were '
                             'given')

        input_metadata = InputMetadata.of(
            user=self.user,
            project=self.project,
            path=self.path,
            timestamp_millis=self.timestamp_millis)
        # Try to avoid building the tarball. Look at maximum modification
        # time in the input, and if we have in input for the timestamp, use
        # that one
        input_id = self.controller.get_input_id_or_none(input_metadata)
        log_debug(f'Input ID from the controller: {input_id}')
        if input_id:
            log_info('Input files not changed according to modification times')
            self.input_id = input_id
            return self

        log_debug('Building the tarball!')
        files = (os.path.join(directory, file)
                 for directory, _, files in os.walk(self.path)
                 for file in files)
        self.tarball = tempfile.NamedTemporaryFile()
        with tarfile.open(self.tarball.name, mode='w:bz2') as tar:
            for file in files:
                name = os.path.relpath(file, self.path)
                size = os.stat(file).st_size
                with open(file, 'rb') as f:
                    tarinfo = tarfile.TarInfo(name=name)
                    tarinfo.size = size
                    tar.addfile(tarinfo, fileobj=f)
        return self
예제 #13
0
    def run(self):
        if self.all_of_them_plz:
            log_warning('Killing all instances for all users and projects')
            if not self.oh_yeah:
                answer = input('Are you sure? (yeah/Nope): ')
                if answer != 'yeah':
                    raise CLIException('Cancelled by user')
            instance_ids_for_controller = None
        else:
            if self.instance_ids is None or len(self.instance_ids) == 0:
                raise CLIException(
                    'You must specify a list of instance IDs with the -i '
                    'option. Use `plz list` to get instance IDs')
            log_info('Killing instances: ' + ' '.join(self.instance_ids))
            instance_ids_for_controller = self.instance_ids
        if not self.all_of_them_plz and not self.instance_ids:
            raise CLIException('No instance IDs specified')

        try:
            were_there_instances_to_kill = self.controller.kill_instances(
                instance_ids=instance_ids_for_controller,
                force_if_not_idle=self.force_if_not_idle)
        except ProviderKillingInstancesException as e:
            fails = e.failed_instance_ids_to_messages
            log_error('Error terminating instances: \n' + ''.join([
                f'{instance_id}: {message}\n'
                for instance_id, message in fails.items()
            ]))
            raise CLIException(
                'Couldn\'t terminate all instances. You can use '
                '--force-if-not-idle for non-idle instances')

        if not were_there_instances_to_kill:
            log_warning(
                'Request to kill all instances, yet no instances were found.')

        log_info('It was a clean job')
예제 #14
0
    def retrieve_output(
            self,
            atomic_execution_id: Optional[str] = None,
            composition_path: Optional[List[Tuple[str, Any]]] = None):
        if atomic_execution_id is None:
            atomic_execution_id = self.get_execution_id()
        if composition_path is None:
            composition_path = []

        if len(composition_path) > 0:
            index = int(composition_path[-1][1])
        else:
            index = None
        output_tarball_bytes = self.controller.get_output_files(
            atomic_execution_id, path=self.path, index=index)
        formatted_output_dir = \
            self.output_dir.replace('%e', self.get_execution_id())
        formatted_output_dir = os.path.join(
            formatted_output_dir,
            *('-'.join(node) for node in composition_path),
            self.path if self.path is not None else '')
        try:
            os.makedirs(formatted_output_dir)
        except FileExistsError:
            if len(composition_path) > 0 and not self.rewrite_subexecutions:
                log_info('Output directory already present')
                return
            if self.force_if_running or len(composition_path) > 0:
                log_info('Removing existing output directory')
                shutil.rmtree(formatted_output_dir)
                os.makedirs(formatted_output_dir)
            else:
                raise CLIException(
                    f'The output directory "{formatted_output_dir}" '
                    'already exists.')
        for path in untar(output_tarball_bytes, formatted_output_dir):
            print(path)
예제 #15
0
 def display_logs(self, execution_id: str, print_interrupt_message=False):
     log_info('Streaming logs...')
     # For the since argument, pass an integer to the backend. Or nothing
     # in case we want to log from the start (so the default is different
     # in the cli --current time-- and the backend --start time--). That's
     # the easiest way to code it, as passing a datetime time object to the
     # backend would require to pass the timezone and doing timezone
     # calculations in the backend. This way all calculations
     # timezone-dependent calculations are done in in the cli and the
     # backend uses whatever timestamp we pass.
     if self.since is None:
         # Default: show since the current time
         since_timestamp = str(int(time.time()))
     elif self.since == 'start':
         # Log from the beginning, that's the default for the backend
         since_timestamp = None
     else:
         try:
             since_timestamp = str(int(self.since))
         except ValueError:
             since_timestamp = str(
                 int(
                     time.mktime(
                         dateutil.parser.parse(self.since).timetuple())))
     byte_lines = self.controller.get_logs(self.get_execution_id(),
                                           since=since_timestamp)
     try:
         for byte_line in byte_lines:
             print(byte_line.decode('utf-8'), end='', flush=True)
     except KeyboardInterrupt:
         print()
         if print_interrupt_message:
             log_info('Your program is still running. '
                      'To stream the logs, type:\n\n'
                      f'        plz logs {execution_id}\n')
         raise
     print()
예제 #16
0
    def display_logs(self, execution_id: str, print_interrupt_message=False):
        log_info('Streaming logs...')
        since_timestamp = self._compute_since_timestamp()

        composition = self.controller.get_execution_composition(execution_id)
        atomic_executions = get_all_atomic(composition)

        try:
            if len(atomic_executions) == 1:
                byte_lines = self.controller.get_logs(self.get_execution_id(),
                                                      since=since_timestamp)
                for byte_line in byte_lines:
                    print(byte_line.decode('utf-8'), end='', flush=True)
            else:
                self._print_logs_for_composite(atomic_executions,
                                               since_timestamp)
        except KeyboardInterrupt:
            print()
            if print_interrupt_message:
                log_info('Your program is still running. '
                         'To stream the logs, type:\n\n'
                         f'        plz logs {self.get_execution_id()}\n')
            raise
        print()
예제 #17
0
    def follow_execution(self, was_start_ok: bool):
        log_info(f'Execution ID is: {self.execution_id}')

        if self.detach:
            return
        retrieve_output_operation = RetrieveOutputOperation(
            self.configuration,
            output_dir=self.output_dir,
            execution_id=self.execution_id,
            force_if_running=False,
            path=None)

        cancelled = False
        try:
            if not was_start_ok:
                raise CLIException('The command failed.')
            logs = LogsOperation(self.configuration,
                                 execution_id=self.execution_id,
                                 since='start')
            logs.display_logs(self.execution_id, print_interrupt_message=True)
        except CLIException as e:
            e.print(self.configuration)
            raise ExitWithStatusCodeException(e.exit_code)
        except KeyboardInterrupt:
            cancelled = True
        finally:
            if not cancelled:
                self.suboperation('Harvesting the output...',
                                  retrieve_output_operation.harvest)

        if cancelled:
            return

        retrieve_measures_operation = RetrieveMeasuresOperation(
            self.configuration, execution_id=self.execution_id, summary=True)
        self.suboperation('Retrieving summary of measures (if present)...',
                          retrieve_measures_operation.retrieve_measures)

        show_status_operation = ShowStatusOperation(
            self.configuration, execution_id=self.execution_id)
        status = show_status_operation.get_status()
        if status.running:
            raise CLIException(
                'Execution has not finished. This should not happen.'
                ' Please report it.')
        elif status.success:
            log_info('Execution succeeded.')
            self.suboperation('Retrieving the output...',
                              retrieve_output_operation.retrieve_output)
            log_info('Done and dusted.')
            return status.code
        else:
            raise CLIException(
                f'Execution failed with an exit status of {status.code}.',
                exit_code=status.code)
예제 #18
0
 def run(self):
     log_info('Harvesting the output...')
     self.harvest()
     log_info('Retrieving the output...')
     self.retrieve_output()
예제 #19
0
 def run(self):
     status = self.get_status()
     log_info('Status:')
     print('Running:', status.running)
     print('Success:', status.success)
     print('Exit Status:', status.code)