Ejemplo n.º 1
0
    def run_cv(self, config: Configuration, scenario: ASlibScenario, folds=10):
        '''
            run a cross fold validation based on the given data from cv.arff

            Arguments
            ---------
            scenario: autofolio.data.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            folds: int
                number of cv-splits
        '''
        try:
            if scenario.performance_type[0] == "runtime":
                cv_stat = Stats(runtime_cutoff=scenario.algorithm_cutoff_time)
            else:
                cv_stat = Stats(runtime_cutoff=0)
            for i in range(1, folds + 1):
                self.logger.info("CV-Iteration: %d" % (i))
                test_scenario, training_scenario = scenario.get_split(indx=i)

                feature_pre_pipeline, pre_solver, selector = self.fit(
                    scenario=training_scenario, config=config)

                schedules = self.predict(
                    test_scenario, config, feature_pre_pipeline, pre_solver, selector)

                val = Validator()
                if scenario.performance_type[0] == "runtime":
                    stats = val.validate_runtime(
                        schedules=schedules, test_scenario=test_scenario)
                elif scenario.performance_type[0] == "solution_quality":
                    stats = val.validate_quality(
                        schedules=schedules, test_scenario=test_scenario)
                else:
                    raise ValueError("Unknown performance_type[0]")
                cv_stat.merge(stat=stats)

            self.logger.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
            self.logger.info("CV Stats")
            par10 = cv_stat.show()
        except ValueError:
            traceback.print_exc()
            if not scenario.maximize[0]:
                par10 = scenario.algorithm_cutoff_time * 10
            else:
                par10 = scenario.algorithm_cutoff_time * -10

        if scenario.maximize[0]:
            par10 *= -1

        return par10
Ejemplo n.º 2
0
    def run_fold(self, config: Configuration, scenario:ASlibScenario, fold:int):
        '''
            run a given fold of cross validation
            
            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            fold: int
                fold id
                
            Returns
            -------
            Stats()
                
        '''
        self.logger.info("CV-Iteration: %d" % (fold))
        
        test_scenario, training_scenario = scenario.get_split(indx=fold)

        feature_pre_pipeline, pre_solver, selector = self.fit(
            scenario=training_scenario, config=config)

        schedules = self.predict(
            test_scenario, config, feature_pre_pipeline, pre_solver, selector)

        val = Validator()
        if scenario.performance_type[0] == "runtime":
            stats = val.validate_runtime(
                schedules=schedules, test_scenario=test_scenario)
        elif scenario.performance_type[0] == "solution_quality":
            stats = val.validate_quality(
                schedules=schedules, test_scenario=test_scenario)
        else:
            raise ValueError("Unknown: %s" %(performance_type[0]))
        
        return stats
Ejemplo n.º 3
0
    def run_fold(self, config: Configuration, scenario:ASlibScenario, fold:int, test_scenario=None, return_fit:bool=False):
        '''
            run a given fold of cross validation
            
            Arguments
            ---------
            scenario: aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario at hand
            config: Configuration
                parameter configuration to use for preprocessing
            fold: int
                fold id
            test_scenario:aslib_scenario.aslib_scenario.ASlibScenario
                aslib scenario with test data for validation
                generated from <scenario> if None

            return_fit: bool
                optionally, the learned preprocessing options, presolver and
                selector can be returned
                
            Returns
            -------
            Stats()

            (pre_pipeline, pre_solver, selector):
                only present if return_fit is True
                the pipeline components fit with the configuration options

            schedule: dict of string -> list of (solver, cutoff) pairs
                only present if return_fit is True
                the solver choices for each instance
                
                
        '''

        if test_scenario is None:
            self.logger.info("CV-Iteration: %d" % (fold))
            test_scenario, training_scenario = scenario.get_split(indx=fold)
        else:
            self.logger.info("Validation on test data")
            training_scenario = scenario

        feature_pre_pipeline, pre_solver, selector = self.fit(
            scenario=training_scenario, config=config)

        schedules = self.predict(
            test_scenario, config, feature_pre_pipeline, pre_solver, selector)

        val = Validator()
        if scenario.performance_type[0] == "runtime":
            stats = val.validate_runtime(
                schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
        elif scenario.performance_type[0] == "solution_quality":
            stats = val.validate_quality(
                schedules=schedules, test_scenario=test_scenario, train_scenario=training_scenario)
        else:
            raise ValueError("Unknown: %s" %(scenario.performance_type[0]))
        
        if return_fit:
            return stats, (feature_pre_pipeline, pre_solver, selector), schedules
        else:
            return stats
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Validate the algorithm selection performance of the "
        "predictions made using test-as-auto-sklearn using "
        "autofolio.validation.validate.Validator.")

    parser.add_argument('scenario', help="The ASlib scenario")
    parser.add_argument('predictions',
                        help="The predictions file, from "
                        "test-as-auto-sklearn")

    parser.add_argument('--config',
                        help="A (yaml) config file which "
                        "specifies options controlling the learner behavior")

    logging_utils.add_logging_options(parser)
    args = parser.parse_args()
    logging_utils.update_logging(args)

    msg = "Loading ASlib scenario"
    logger.info(msg)

    scenario = ASlibScenario()
    scenario.read_scenario(args.scenario)

    if args.config is not None:
        msg = "Loading yaml config file"
        logger.info(msg)
        config = yaml.load(open(args.config))
    else:
        config = {}
        config['allowed_feature_groups'] = [scenario.feature_group_dict.keys()]

    # either way, update the scenario with the features used during training
    scenario.used_feature_groups = config['allowed_feature_groups']

    msg = "Reading predictions"
    logger.info(msg)
    predictions = pd.read_csv(args.predictions)

    msg = "Selecting the algorithm with smallest prediction for each instance"
    logger.info(msg)

    algorithm_selections = pandas_utils.get_group_extreme(
        predictions, "predicted", ex_type="min", group_fields="instance_id")

    msg = "Creating the schedules for the validator"
    logger.info(msg)

    schedules = parallel.apply_df_simple(algorithm_selections, _get_schedule,
                                         scenario.algorithm_cutoff_time)

    schedules = utils.merge_dicts(*schedules)

    val = Validator()
    performance_type = scenario.performance_type[0]

    if performance_type == "runtime":
        stats = val.validate_runtime(schedules=schedules,
                                     test_scenario=scenario)

    elif performance_type == "solution_quality":
        stats = val.validate_quality(schedules=schedules,
                                     test_scenario=scenario)

    else:
        msg = "Unknown performance type: {}".format(performance_type)
        raise ValueError(msg)

    msg = "=== RESULTS ==="
    logger.info(msg)
    stats.show()