Exemple #1
0
    def _calculate_metafeatures_with_limits(self, time_limit):
        res = None
        time_limit = max(time_limit, 1)
        try:
            context = multiprocessing.get_context(self.pynisher_context)
            preload_modules(context)
            safe_mf = pynisher.enforce_limits(mem_in_mb=self.memory_limit,
                                              wall_time_in_s=int(time_limit),
                                              grace_period_in_s=30,
                                              context=context,
                                              logger=self.logger)(
                                                  self._calculate_metafeatures)
            res = safe_mf()
        except Exception as e:
            self.logger.error('Error getting metafeatures: %s', str(e))

        return res
Exemple #2
0
    def _calculate_metafeatures_with_limits(self, time_limit):
        res = None
        time_limit = max(time_limit, 1)
        try:
            context = multiprocessing.get_context(self.pynisher_context)
            preload_modules(context)
            safe_mf = pynisher.enforce_limits(
                mem_in_mb=self.memory_limit,
                wall_time_in_s=int(time_limit),
                grace_period_in_s=30,
                context=context,
                logger=self.logger)(_calculate_metafeatures)
            res = safe_mf(data_feat_type=self.datamanager.feat_type,
                          data_info_task=self.datamanager.info['task'],
                          x_train=self.datamanager.data['X_train'],
                          y_train=self.datamanager.data['Y_train'],
                          basename=self.dataset_name,
                          watcher=self.watcher,
                          logger_=self.logger)
        except Exception as e:
            self.logger.error('Error getting metafeatures: %s', str(e))

        return res
    def run(
        self,
        config: Configuration,
        instance: Optional[str] = None,
        cutoff: Optional[float] = None,
        seed: int = 12345,
        budget: float = 0.0,
        instance_specific: Optional[str] = None,
    ) -> Tuple[StatusType, float, float, Dict[str, Union[int, float, str, Dict, List, Tuple]]]:

        # Additional information of each of the tae executions
        # Defined upfront for mypy
        additional_run_info: TYPE_ADDITIONAL_INFO = {}

        context = multiprocessing.get_context(self.pynisher_context)
        preload_modules(context)
        queue = context.Queue()

        if not (instance_specific is None or instance_specific == '0'):
            raise ValueError(instance_specific)
        init_params = {'instance': instance}
        if self.init_params is not None:
            init_params.update(self.init_params)

        if self.port is None:
            logger: Union[logging.Logger, PickableLoggerAdapter] = logging.getLogger("pynisher")
        else:
            logger = get_named_client_logger(
                name="pynisher",
                port=self.port,
            )
        arguments = dict(
            logger=logger,
            wall_time_in_s=cutoff,
            mem_in_mb=self.memory_limit,
            capture_output=True,
            context=context,
        )

        if isinstance(config, int):
            num_run = self.initial_num_run
        else:
            num_run = config.config_id + self.initial_num_run

        obj_kwargs = dict(
            queue=queue,
            config=config,
            backend=self.backend,
            port=self.port,
            metric=self.metric,
            seed=self.autosklearn_seed,
            num_run=num_run,
            scoring_functions=self.scoring_functions,
            output_y_hat_optimization=self.output_y_hat_optimization,
            include=self.include,
            exclude=self.exclude,
            disable_file_output=self.disable_file_output,
            instance=instance,
            init_params=init_params,
            budget=budget,
            budget_type=self.budget_type,
            additional_components=autosklearn.pipeline.components.base._addons,
        )

        if self.resampling_strategy != 'test':
            obj_kwargs['resampling_strategy'] = self.resampling_strategy
            obj_kwargs['resampling_strategy_args'] = self.resampling_strategy_args

        try:
            obj = pynisher.enforce_limits(**arguments)(self.ta)
            obj(**obj_kwargs)
        except Exception as e:
            exception_traceback = traceback.format_exc()
            error_message = repr(e)
            additional_run_info.update({
                'traceback': exception_traceback,
                'error': error_message
            })
            return StatusType.CRASHED, self.worst_possible_result, 0.0, additional_run_info

        if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException):
            # Even if the pynisher thinks that a timeout or memout occured,
            # it can be that the target algorithm wrote something into the queue
            #  - then we treat it as a succesful run
            try:
                info = autosklearn.evaluation.util.read_queue(queue)
                result = info[-1]['loss']
                status = info[-1]['status']
                additional_run_info = info[-1]['additional_run_info']

                if obj.stdout:
                    additional_run_info['subprocess_stdout'] = obj.stdout
                if obj.stderr:
                    additional_run_info['subprocess_stderr'] = obj.stderr

                if obj.exit_status is pynisher.TimeoutException:
                    additional_run_info['info'] = 'Run stopped because of timeout.'
                elif obj.exit_status is pynisher.MemorylimitException:
                    additional_run_info['info'] = 'Run stopped because of memout.'

                if status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]:
                    cost = result
                else:
                    cost = self.worst_possible_result

            except Empty:
                info = None
                if obj.exit_status is pynisher.TimeoutException:
                    status = StatusType.TIMEOUT
                    additional_run_info = {'error': 'Timeout'}
                elif obj.exit_status is pynisher.MemorylimitException:
                    status = StatusType.MEMOUT
                    additional_run_info = {
                        "error": "Memout (used more than {} MB).".format(self.memory_limit)
                    }
                else:
                    raise ValueError(obj.exit_status)
                cost = self.worst_possible_result

        elif obj.exit_status is TAEAbortException:
            info = None
            status = StatusType.ABORT
            cost = self.worst_possible_result
            additional_run_info = {'error': 'Your configuration of '
                                            'auto-sklearn does not work!',
                                   'exit_status': _encode_exit_status(obj.exit_status),
                                   'subprocess_stdout': obj.stdout,
                                   'subprocess_stderr': obj.stderr,
                                   }

        else:
            try:
                info = autosklearn.evaluation.util.read_queue(queue)
                result = info[-1]['loss']
                status = info[-1]['status']
                additional_run_info = info[-1]['additional_run_info']

                if obj.exit_status == 0:
                    cost = result
                else:
                    status = StatusType.CRASHED
                    cost = self.worst_possible_result
                    additional_run_info['info'] = 'Run treated as crashed ' \
                                                  'because the pynisher exit ' \
                                                  'status %s is unknown.' % \
                                                  str(obj.exit_status)
                    additional_run_info['exit_status'] = _encode_exit_status(obj.exit_status)
                    additional_run_info['subprocess_stdout'] = obj.stdout
                    additional_run_info['subprocess_stderr'] = obj.stderr
            except Empty:
                info = None
                additional_run_info = {
                    'error': 'Result queue is empty',
                    'exit_status': _encode_exit_status(obj.exit_status),
                    'subprocess_stdout': obj.stdout,
                    'subprocess_stderr': obj.stderr,
                    'exitcode': obj.exitcode
                }
                status = StatusType.CRASHED
                cost = self.worst_possible_result

        if (
            (self.budget_type is None or budget == 0)
            and status == StatusType.DONOTADVANCE
        ):
            status = StatusType.SUCCESS

        if not isinstance(additional_run_info, dict):
            additional_run_info = {'message': additional_run_info}

        if (
            info is not None
            and self.resampling_strategy in ('holdout-iterative-fit', 'cv-iterative-fit')
            and status != StatusType.CRASHED
        ):
            learning_curve = autosklearn.evaluation.util.extract_learning_curve(info)
            learning_curve_runtime = autosklearn.evaluation.util.extract_learning_curve(
                info, 'duration'
            )
            if len(learning_curve) > 1:
                additional_run_info['learning_curve'] = learning_curve
                additional_run_info['learning_curve_runtime'] = learning_curve_runtime

            train_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
                info, 'train_loss'
            )
            if len(train_learning_curve) > 1:
                additional_run_info['train_learning_curve'] = train_learning_curve
                additional_run_info['learning_curve_runtime'] = learning_curve_runtime

            if self._get_validation_loss:
                validation_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
                    info, 'validation_loss',
                )
                if len(validation_learning_curve) > 1:
                    additional_run_info['validation_learning_curve'] = \
                        validation_learning_curve
                    additional_run_info[
                        'learning_curve_runtime'] = learning_curve_runtime

            if self._get_test_loss:
                test_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
                    info, 'test_loss',
                )
                if len(test_learning_curve) > 1:
                    additional_run_info['test_learning_curve'] = test_learning_curve
                    additional_run_info[
                        'learning_curve_runtime'] = learning_curve_runtime

        if isinstance(config, int):
            origin = 'DUMMY'
            config_id = config
        else:
            origin = getattr(config, 'origin', 'UNKNOWN')
            config_id = config.config_id
        additional_run_info['configuration_origin'] = origin

        runtime = float(obj.wall_clock_time)

        autosklearn.evaluation.util.empty_queue(queue)
        self.logger.info("Finished evaluating configuration %d" % config_id)
        return status, cost, runtime, additional_run_info