def run_automl(input_dir, output_dir, data_name, time_budget, running_on_codalab): print('input_dir = "%s"' % input_dir) print('output_dir = "%s"' % output_dir) print('data_name = "%s"' % data_name) print('time_budget = %s' % time_budget) try: # automl.data_doubling_rf(input_dir, output_dir, data_name, time_budget, 20) # automl.cv_growing_rf(input_dir, output_dir, data_name, time_budget) # automl.cv_growing_rf_gbm(input_dir, output_dir, data_name, time_budget) # automl.competition_example(input_dir, output_dir, data_name, time_budget) # automl.competition_example_only_rf(input_dir, output_dir, data_name, time_budget) # automl.freeze_thaw_cv_rf(input_dir, output_dir, data_name, time_budget) # automl.freeze_thaw_cv_rf_gbm(input_dir, output_dir, data_name, time_budget, compute_quantum=10) # automl.automl_phase_0(input_dir, output_dir, data_name, time_budget) # mgr = managers.FixedLearnersFreezeThawManager(input_dir=input_dir, output_dir=output_dir, # basename=data_name, time_budget=time_budget, # compute_quantum=None, plot=not running_on_codalab, min_mem=4, # n_folds=5) exp = dict() exp = experiment.exp_param_defaults(exp) mgr = managers.FixedLearnersStackingManager(input_dir=input_dir, output_dir=output_dir, basename=data_name, time_budget=time_budget, compute_quantum=None, plot=not running_on_codalab, n_folds=5, overhead_memory=constants.OVERHEAD, cgroup_soft_limit=constants.CGROUP_SOFT_LIMIT, cgroup_hard_limit=constants.CGROUP_HARD_LIMIT, exp=exp) mgr.communicate() except: traceback.print_exc()
def timing_triple_cloud(): execfile('picloud_venture_credentials.py') exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 30 exp_params['max_burn_time'] = 30 exp_params['max_sample_time'] = 30 exp_params['n_samples'] = 25 print experiment.exp_params_to_str(exp_params) data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) data = {'observations' : observed, 'missing' : missing} model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} # Timing run print 'Timing' job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment) time_per_mh_iter = cloud.result(job_id)['time_per_mh_iter'] # Live run print 'Live' exp_params['intermediate_iter'] = max(1, int(round(0.9 * exp_params['max_sample_time'] / (exp_params['n_samples'] * time_per_mh_iter)))) job_id = cloud.call(experiment.network_cv_single_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment) cloud.join(job_id) print cloud.result(job_id)
def timing_run_local(): exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 30 print experiment.exp_params_to_str(exp_params) data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) data = {'observations' : observed, 'missing' : missing} model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} print experiment.network_cv_timing_run(data, model, exp_params, model_params)
def fold(unused=None): execfile('picloud_venture_credentials.py') data_file = '../data/irm_synth/irm_synth_20.mat' data_dir = '../data/irm_synth/' model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 20 exp_params['max_burn_time'] = 10 exp_params['max_sample_time'] = 20 exp_params['n_samples'] = 25 exp_params['n_restarts'] = 3 print experiment.exp_params_to_str(exp_params) print experiment.network_cv_fold(data_file, data_dir, model, exp_params, model_params)
def timing_run_cloud(): execfile('picloud_venture_credentials.py') exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 30 print experiment.exp_params_to_str(exp_params) data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) data = {'observations' : observed, 'missing' : missing} model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} job_id = cloud.call(experiment.network_cv_timing_run, data, model, exp_params, model_params, _max_runtime=5, _env=cloud_environment) cloud.join(job_id) print cloud.result(job_id)
def run_automl(input_dir, output_dir, data_name, time_budget, running_on_codalab): print('input_dir = "%s"' % input_dir) print('output_dir = "%s"' % output_dir) print('data_name = "%s"' % data_name) print('time_budget = %s' % time_budget) try: # automl.data_doubling_rf(input_dir, output_dir, data_name, time_budget, 20) # automl.cv_growing_rf(input_dir, output_dir, data_name, time_budget) # automl.cv_growing_rf_gbm(input_dir, output_dir, data_name, time_budget) # automl.competition_example(input_dir, output_dir, data_name, time_budget) # automl.competition_example_only_rf(input_dir, output_dir, data_name, time_budget) # automl.freeze_thaw_cv_rf(input_dir, output_dir, data_name, time_budget) # automl.freeze_thaw_cv_rf_gbm(input_dir, output_dir, data_name, time_budget, compute_quantum=10) # automl.automl_phase_0(input_dir, output_dir, data_name, time_budget) # mgr = managers.FixedLearnersFreezeThawManager(input_dir=input_dir, output_dir=output_dir, # basename=data_name, time_budget=time_budget, # compute_quantum=None, plot=not running_on_codalab, min_mem=4, # n_folds=5) exp = dict() exp = experiment.exp_param_defaults(exp) mgr = managers.FixedLearnersStackingManager( input_dir=input_dir, output_dir=output_dir, basename=data_name, time_budget=time_budget, compute_quantum=None, plot=not running_on_codalab, n_folds=5, overhead_memory=constants.OVERHEAD, cgroup_soft_limit=constants.CGROUP_SOFT_LIMIT, cgroup_hard_limit=constants.CGROUP_HARD_LIMIT, exp=exp) mgr.communicate() except: traceback.print_exc()
def timing_triple_local(): exp_params = experiment.exp_param_defaults({}) exp_params['intermediate_iter'] = 1 exp_params['max_initial_run_time'] = 30 exp_params['max_burn_time'] = 30 exp_params['max_sample_time'] = 30 exp_params['n_samples'] = 25 print experiment.exp_params_to_str(exp_params) data = scipy.io.loadmat("../data/irm_synth/irm_synth_20.mat", squeeze_me=True) observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat)) missing = list(zip(data['test_i'].flat, data['test_j'].flat, data['test_v'].flat)) data = {'observations' : observed, 'missing' : missing} model = models.product_IRM model_params = {'D' : 1, 'alpha' : 1, 'symmetric' : True} # Timing run time_per_mh_iter = experiment.network_cv_timing_run(data, model, exp_params, model_params)['time_per_mh_iter'] # Live run exp_params['intermediate_iter'] = max(1, int(round(0.9 * exp_params['max_sample_time'] / (exp_params['n_samples'] * time_per_mh_iter)))) print experiment.network_cv_single_run(data, model, exp_params, model_params)