def fit(self, pipeline_config, run_result_dir):

        instance_file_config_parser = ConfigFileParser([
            ConfigOption(name='path', type='directory', required=True),
            ConfigOption(name='is_classification', type=to_bool,
                         required=True),
            ConfigOption(name='is_multilabel', type=to_bool, required=True),
            ConfigOption(name='num_features', type=int, required=True),
            ConfigOption(name='categorical_features',
                         type=bool,
                         required=True,
                         list=True),
            ConfigOption(
                name='instance_shape',
                type=[ast.literal_eval, lambda x: isinstance(x, tuple)],
                required=True)
        ])
        instance_info = instance_file_config_parser.read(
            os.path.join(run_result_dir, 'instance.info'))
        instance_info = instance_file_config_parser.set_defaults(instance_info)

        dm = DataManager()
        if instance_info["is_multilabel"]:
            dm.problem_type = ProblemType.FeatureMultilabel
        elif instance_info["is_classification"]:
            dm.problem_type = ProblemType.FeatureClassification
        else:
            dm.problem_type = ProblemType.FeatureClassification

        return {'instance_info': instance_info, 'data_manager': dm}
Example #2
0
    def get_pipeline_config(self,
                            throw_error_if_invalid=True,
                            **pipeline_config):
        """Get the full pipeline config given a partial pipeline config
        
        Keyword Arguments:
            throw_error_if_invalid {bool} -- Throw an error if invalid config option is defined (default: {True})
        
        Returns:
            dict -- the full config for the pipeline, containing values for all options
        """
        options = self.get_pipeline_config_options()
        conditions = self.get_pipeline_config_conditions()

        parser = ConfigFileParser(options)
        pipeline_config = parser.set_defaults(
            pipeline_config, throw_error_if_invalid=throw_error_if_invalid)

        # check the conditions e.g. max_budget > min_budget
        for c in conditions:
            try:
                c(pipeline_config)
            except Exception as e:
                if throw_error_if_invalid:
                    raise
                print(e)
                traceback.print_exc()

        return pipeline_config
Example #3
0
    def get_pipeline_config(self,
                            throw_error_if_invalid=True,
                            **pipeline_config):
        options = self.get_pipeline_config_options()
        conditions = self.get_pipeline_config_conditions()

        parser = ConfigFileParser(options)
        pipeline_config = parser.set_defaults(
            pipeline_config, throw_error_if_invalid=throw_error_if_invalid)

        for c in conditions:
            try:
                c(pipeline_config)
            except Exception as e:
                if throw_error_if_invalid:
                    raise
                print(e)
                traceback.print_exc()

        return pipeline_config
Example #4
0
 def get_pipeline_config_options(self):
     options = [
         ConfigOption(
             "default_dataset_download_dir",
             default=ConfigFileParser.get_autonet_home(),
             type='directory',
             info="Directory default datasets will be downloaded to."),
         ConfigOption("dataloader_worker", default=1, type=int),
         ConfigOption("dataloader_cache_size_mb", default=0, type=int)
     ]
     return options
 def get_pipeline_config_options(self):
     options = [
         ConfigOption("instances",
                      default=None,
                      type='directory',
                      required=True),
         ConfigOption("instance_slice", default=None, type=str),
         ConfigOption("dataset_root",
                      default=ConfigFileParser.get_autonet_home(),
                      type='directory'),
     ]
     return options
    def read_data(self, path, y):
        if path is None:
            return None, None, None
        
        if not isinstance(path, str):
            return path, y, str(path)[0:300]
        
        if not os.path.isabs(path):
            path = os.path.abspath(os.path.join(ConfigFileParser.get_autonet_home(), path))

        if not os.path.exists(path):
            raise ValueError('Path ' + str(path) + ' is not a valid path.')

        im = ImageManager()
        im.read_data(path, is_classification=True)

        return im.X_train, im.Y_train, path
Example #7
0
 def get_pipeline_config_options(self):
     options = [
         ConfigOption(name="file_extensions",
                      default=['.png', '.jpg', '.JPEG', '.pgm'],
                      type=str,
                      list=True),
         ConfigOption(
             name="images_shape",
             default=[3, 32, 32],
             type=int,
             list=True,
             info=
             "Image size input to the networks, images will be rescaled to this."
         ),
         ConfigOption(
             name="images_root_folders",
             default=[ConfigFileParser.get_autonet_home()],
             type='directory',
             list=True,
             info="Directory relative to which image paths are given."),
         ConfigOption(name="max_class_size", default=None, type=int),
     ]
     return options
Example #8
0
 def get_autonet_config_file_parser(self):
     return ConfigFileParser(self.pipeline.get_pipeline_config_options())
    parser.add_argument("--host_config", default=None, help="Override some configs according to host specifics.")
    parser.add_argument('benchmark', help='The benchmark to run')
    args = parser.parse_args()

    assert "-" not in args.run_id, "The run id must not contain a minus"
    
    benchmark_config_file = args.benchmark
    host_config_file = args.host_config

    benchmark = Benchmark()
    config_parser = benchmark.get_benchmark_config_file_parser()

    benchmark_config = config_parser.read(benchmark_config_file)
    benchmark_config.update(config_parser.read(host_config_file))

    if (args.result_dir is not None):
        benchmark_config['result_dir'] = os.path.join(ConfigFileParser.get_autonet_home(), args.result_dir)

    if (args.partial_benchmark is not None):
        split = args.partial_benchmark.split(',')
        if (len(split) > 0):
            benchmark_config['instance_slice'] = split[0]
        if (len(split) > 1):
            benchmark_config['autonet_config_slice'] = split[1]
        if (len(split) > 2):
            benchmark_config['run_number_range'] = split[2]

    benchmark_config['run_id'] = args.run_id
    benchmark_config['task_id'] = args.task_id    
    benchmark.run_benchmark(**benchmark_config)
Example #10
0
 def get_benchmark_config_file_parser(self):
     return ConfigFileParser(
         self.benchmark_pipeline.get_pipeline_config_options())
    if "-" in args.run_id_range:
        run_id_range = range(int(args.run_id_range.split("-")[0]),
                             int(args.run_id_range.split("-")[1]) + 1)
    else:
        run_id_range = range(int(args.run_id_range),
                             int(args.run_id_range) + 1)

    benchmark_config_file = args.benchmark
    host_config_file = args.host_config

    benchmark = Benchmark()
    config_parser = benchmark.get_benchmark_config_file_parser()

    benchmark_config = config_parser.read(benchmark_config_file)
    benchmark_config.update(config_parser.read(host_config_file))

    if (args.result_dir is not None):
        benchmark_config['result_dir'] = os.path.join(
            ConfigFileParser.get_autonet_home(), args.result_dir)

    benchmark_config['run_id_range'] = run_id_range
    benchmark_config['plot_logs'] = args.plot_logs.split(
        ",") if args.plot_logs is not None else list()
    benchmark_config['only_finished_runs'] = args.only_finished_runs
    benchmark_config['output_folder'] = args.output_folder
    benchmark_config['scale_uncertainty'] = args.scale_uncertainty
    benchmark_config['agglomeration'] = args.agglomeration
    benchmark_config['font_size'] = args.font_size

    benchmark.visualize_benchmark(**benchmark_config)
Example #12
0
                        default=[],
                        nargs="+",
                        type=str,
                        help="Additional args specified in template")
    parser.add_argument(
        "runscript", help="The script template used to submit job on cluster.")
    parser.add_argument('benchmark', help='The benchmark to run')
    args = parser.parse_args()

    # parse the runscript template
    with open(args.runscript, "r") as f:
        runscript_template = list(f)
    runscript_name = os.path.basename(
        args.runscript if not args.runscript.endswith(".template") else args.
        runscript[:-9])
    autonet_home = ConfigFileParser.get_autonet_home()
    host_config_orig = [
        l[13:] for l in runscript_template if l.startswith("#HOST_CONFIG ")
    ][0].strip()
    host_config_file = os.path.join(
        autonet_home, host_config_orig
    ) if not os.path.isabs(host_config_orig) else host_config_orig

    # parse template args
    runscript_template_args = [
        l[19:].strip().split() for l in runscript_template
        if l.startswith("#TEMPLATE_ARGUMENT ")
    ]
    parsed_template_args = dict()
    for variable_name, default in runscript_template_args:
        try: