def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs): use_log = (kwargs.get("use_log", True), ) shuffle = kwargs.get("shuffle", False) vw_format = kwargs.get("vw_format", True) print("dataset_id", dataset_id) self.vw_examples, self.Y = get_data( max_iter_num, dataset_id=dataset_id, vw_format=vw_format, max_ns_num=ns_num, shuffle=shuffle, use_log=use_log, ) self.max_iter_num = min(max_iter_num, len(self.Y)) self._problem_info = { "max_iter_num": self.max_iter_num, "dataset_id": dataset_id, "ns_num": ns_num, } self._problem_info.update(kwargs) self._fixed_hp_config = kwargs.get("fixed_hp_config", {}) self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example( self.vw_examples[0]) self._raw_namespaces = list(self.namespace_feature_dim.keys()) self._setup_search()
def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs): super().__init__(max_iter_num, dataset_id, ns_num, **kwargs) from flaml.onlineml import VowpalWabbitTrial self._raw_namespaces = list(AutoVW.get_ns_feature_dim_from_vw_example(self.vw_examples[0]).keys()) self._info_key_list = ["dataset_id", "max_iter_num", "ns_num", "shuffle", "use_log"] self.problem_id = 'vw-ns-interaction-' + ('_').join( [str(self._problem_info.get(k, 'None')) for k in self._info_key_list]) self._setup_search() logger.info('search space %s %s', self._search_space, self.problem_id)
def test_supervised_vw_tune_namespace(self): # basic experiment setting vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem() autovw = AutoVW(max_live_model_num=5, search_space=vw_online_aml_problem.search_space, init_config=vw_online_aml_problem.init_config, min_resource_lease='auto', random_seed=2345) cumulative_loss_list = online_learning_loop( vw_online_aml_problem.max_iter_num, vw_online_aml_problem.vw_examples, autovw, loss_func=vw_oml_problem_args["fixed_hp_config"].get( "loss_function", "squared"), ) print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))
def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs): use_log = kwargs.get('use_log', True), shuffle = kwargs.get('shuffle', False) vw_format = kwargs.get('vw_format', True) print('dataset_id', dataset_id) self.vw_examples, self.Y = get_data(max_iter_num, dataset_id=dataset_id, vw_format=vw_format, max_ns_num=ns_num, shuffle=shuffle, use_log=use_log) self.max_iter_num = min(max_iter_num, len(self.Y)) self._problem_info = { 'max_iter_num': self.max_iter_num, 'dataset_id': dataset_id, 'ns_num': ns_num, } self._problem_info.update(kwargs) self._fixed_hp_config = kwargs.get('fixed_hp_config', {}) self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example( self.vw_examples[0]) self._raw_namespaces = list(self.namespace_feature_dim.keys()) self._setup_search()
} # setup configs for the experiments to run alg_dic = {} for alg_name in args.method_list: if alg_name in method_data.keys(): alg_args = method_data[alg_name] alg_alias = '_'.join([alg_name, exp_config]) if 'is_naive' not in alg_args or not alg_args['is_naive']: autovw_args = auto_alg_common_args.copy() autovw_args.update(alg_args['config']) # use the method_name+current_exp_config as the alias for the algorithm logger.info('alg_alias %s %s', alg_alias, alg_args) logger.info('trial runner config %s %s', alg_args['config'], autovw_args) alg_dic[alg_alias] = AutoVW(**autovw_args) else: vw_args = fixed_hp_config.copy() if 'config' in alg_args and alg_args[ 'config'] is not None: vw_args.update(alg_args['config']) alg_dic[alg_alias] = pyvw.vw(**vw_args) else: print('alg name not in methods.yaml') NotImplementedError # convert method names from input to the names in alg_dic for alg_name, alg in alg_dic.items(): max_iter_num = vw_online_aml_problem.max_iter_num time_start = time.time() print('----------running', alg_name, '-----------') ### get result file name