def test_make_serializable(self): assert functions.is_valid_json({'x': ['i am serializable', 0.1]}) assert not functions.is_valid_json({'x': RandomForestClassifier()}) assert functions.make_serializable({ 'x': ['i am serializable', 0.1], 'y': RandomForestClassifier() }) == { 'x': ['i am serializable', 0.1] }
def search_base_learner(id): """Creates a set of base learners from base learner origin using grid search and queues them up """ path = functions.get_path_from_query_string(request) req_body = request.get_json() if req_body['method'] == 'grid': param_grid = functions.import_object_from_string_code( req_body['source'], 'param_grid') iterator = ParameterGrid(param_grid) elif req_body['method'] == 'random': param_distributions = functions.import_object_from_string_code( req_body['source'], 'param_distributions') iterator = ParameterSampler(param_distributions, n_iter=req_body['n_iter']) else: raise exceptions.UserError('{} not a valid search method'.format( req_body['method'])) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format(id)) learners = [] for params in iterator: est = base_learner_origin.return_estimator() try: est.set_params(**params) except Exception as e: print(repr(e)) continue hyperparameters = functions.make_serializable(est.get_params()) base_learners = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=id, hyperparameters=hyperparameters).all() if base_learners: # already exists continue base_learner = models.BaseLearner(hyperparameters, 'queued', base_learner_origin) session.add(base_learner) session.commit() with Connection(get_redis_connection()): rqtasks.generate_meta_features.delay(path, base_learner.id) learners.append(base_learner) return jsonify(map(lambda x: x.serialize, learners))
def create_new_stacked_ensemble(): path = functions.get_path_from_query_string(request) req_body = request.get_json() with functions.DBContextManager(path) as session: if request.method == 'GET': return jsonify( list( map(lambda x: x.serialize, session.query(models.StackedEnsemble).all()))) if request.method == 'POST': base_learners = session.query(models.BaseLearner).\ filter(models.BaseLearner.id.in_(req_body['base_learner_ids'])).all() if len(base_learners) != len(req_body['base_learner_ids']): raise exceptions.UserError('Not all base learners found') for learner in base_learners: if learner.job_status != 'finished': raise exceptions.UserError( 'Not all base learners have finished') base_learner_origin = session.query(models.BaseLearnerOrigin).\ filter_by(id=req_body['base_learner_origin_id']).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not ' 'found'.format(req_body['base_learner_origin_id']), 404) # Retrieve full hyperparameters est = base_learner_origin.return_estimator() params = functions.import_object_from_string_code\ (req_body['secondary_learner_hyperparameters_source'], 'params') est.set_params(**params) hyperparameters = functions.make_serializable(est.get_params()) stacked_ensembles = session.query(models.StackedEnsemble).\ filter_by(base_learner_origin_id=req_body['base_learner_origin_id'], secondary_learner_hyperparameters=hyperparameters, base_learner_ids=sorted([bl.id for bl in base_learners])).all() if stacked_ensembles: raise exceptions.UserError('Stacked ensemble exists') stacked_ensemble = models.StackedEnsemble( secondary_learner_hyperparameters=hyperparameters, base_learners=base_learners, base_learner_origin=base_learner_origin, job_status='queued') session.add(stacked_ensemble) session.commit() with Connection(get_redis_connection()): rqtasks.evaluate_stacked_ensemble.delay( path, stacked_ensemble.id) return jsonify(stacked_ensemble.serialize)
def create_base_learner(id): """This creates a single base learner from a base learner origin and queues it up""" path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format(id)) req_body = request.get_json() # Retrieve full hyperparameters est = base_learner_origin.return_estimator() hyperparameters = functions.import_object_from_string_code( req_body['source'], 'params') est.set_params(**hyperparameters) hyperparameters = functions.make_serializable(est.get_params()) base_learners = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=id, hyperparameters=hyperparameters).all() if base_learners: raise exceptions.UserError( 'Base learner exists with given hyperparameters') base_learner = models.BaseLearner(hyperparameters, 'queued', base_learner_origin) if 'single_searches' not in base_learner_origin.description: base_learner_origin.description['single_searches'] = [] base_learner_origin.description['single_searches'] += ([ req_body['source'] ]) session.add(base_learner) session.add(base_learner_origin) session.commit() with Connection(get_redis_connection()): rqtasks.generate_meta_features.delay(path, base_learner.id) return jsonify(base_learner.serialize)
def start_automated_run(path, automated_run_id): """Starts automated run. This will automatically create base learners until the run finishes or errors out. Args: path (str): Path to Xcessiv notebook automated_run_id (str): Automated Run ID """ with functions.DBContextManager(path) as session: automated_run = session.query(models.AutomatedRun).filter_by(id=automated_run_id).first() if not automated_run: raise exceptions.UserError('Automated run {} ' 'does not exist'.format(automated_run_id)) automated_run.job_id = get_current_job().id automated_run.job_status = 'started' session.add(automated_run) session.commit() try: module = functions.import_string_code_as_module(automated_run.source) random_state = 8 if not hasattr(module, 'random_state') else module.random_state assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators # get non-searchable parameters base_estimator = automated_run.base_learner_origin.return_estimator() base_estimator.set_params(**module.default_params) default_params = functions.make_serializable(base_estimator.get_params()) non_searchable_params = dict((key, val) for key, val in iteritems(default_params) if key not in module.pbounds) # get already calculated base learners in search space existing_base_learners = [] for base_learner in automated_run.base_learner_origin.base_learners: if not base_learner.job_status == 'finished': continue in_search_space = True for key, val in iteritems(non_searchable_params): if base_learner.hyperparameters[key] != val: in_search_space = False break # If no match, move on to the next base learner if in_search_space: existing_base_learners.append(base_learner) # build initialize dictionary target = [] initialization_dict = dict((key, list()) for key in module.pbounds.keys()) for base_learner in existing_base_learners: # check if base learner's searchable hyperparameters are all numerical all_numerical = True for key in module.pbounds.keys(): if not isinstance(base_learner.hyperparameters[key], numbers.Number): all_numerical = False break if not all_numerical: continue # if there is a non-numerical hyperparameter, skip this. for key in module.pbounds.keys(): initialization_dict[key].append(base_learner.hyperparameters[key]) target.append(base_learner.individual_score[module.metric_to_optimize]) initialization_dict['target'] = target if not module.invert_metric \ else list(map(lambda x: -x, target)) print('{} existing in initialization dictionary'. format(len(initialization_dict['target']))) # Create function to be optimized func_to_optimize = return_func_to_optimize( path, session, automated_run.base_learner_origin, module.default_params, module.metric_to_optimize, module.invert_metric, set(module.integers) ) # Create Bayes object bo = BayesianOptimization(func_to_optimize, module.pbounds) bo.initialize(initialization_dict) np.random.seed(random_state) bo.maximize(**module.maximize_config) automated_run.job_status = 'finished' session.add(automated_run) session.commit() except: session.rollback() automated_run.job_status = 'errored' automated_run.description['error_type'] = repr(sys.exc_info()[0]) automated_run.description['error_value'] = repr(sys.exc_info()[1]) automated_run.description['error_traceback'] = \ traceback.format_exception(*sys.exc_info()) session.add(automated_run) session.commit() raise
def func_to_optimize(**params): base_estimator = base_learner_origin.return_estimator() base_estimator.set_params(**default_params) # For integer hyperparameters, make sure they are rounded off params = dict((key, val) if key not in integers else (key, int(val)) for key, val in iteritems(params)) base_estimator.set_params(**params) hyperparameters = functions.make_serializable(base_estimator.get_params()) # Look if base learner already exists base_learner = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=base_learner_origin.id, hyperparameters=hyperparameters).first() calculate_only = False # If base learner exists and has finished, just return its result if base_learner and base_learner.job_status == 'finished': if invert_metric: return -base_learner.individual_score[metric_to_optimize] else: return base_learner.individual_score[metric_to_optimize] # else if base learner exists but is unfinished, just calculate the result without storing elif base_learner and base_learner.job_status != 'finished': calculate_only = True # else if base learner does not exist, create it else: base_learner = models.BaseLearner(hyperparameters, 'started', base_learner_origin) base_learner.job_id = get_current_job().id session.add(base_learner) session.commit() try: est = base_learner.return_estimator() extraction = session.query(models.Extraction).first() X, y = extraction.return_train_dataset() return_splits_iterable = functions.import_object_from_string_code( extraction.meta_feature_generation['source'], 'return_splits_iterable' ) meta_features_list = [] trues_list = [] for train_index, test_index in return_splits_iterable(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] est = est.fit(X_train, y_train) meta_features_list.append( getattr(est, base_learner.base_learner_origin. meta_feature_generator)(X_test) ) trues_list.append(y_test) meta_features = np.concatenate(meta_features_list, axis=0) y_true = np.concatenate(trues_list) for key in base_learner.base_learner_origin.metric_generators: metric_generator = functions.import_object_from_string_code( base_learner.base_learner_origin.metric_generators[key], 'metric_generator' ) base_learner.individual_score[key] = metric_generator(y_true, meta_features) # Only do this if you want to save things if not calculate_only: meta_features_path = base_learner.meta_features_path(path) if not os.path.exists(os.path.dirname(meta_features_path)): os.makedirs(os.path.dirname(meta_features_path)) np.save(meta_features_path, meta_features, allow_pickle=False) base_learner.job_status = 'finished' base_learner.meta_features_exists = True session.add(base_learner) session.commit() if invert_metric: return -base_learner.individual_score[metric_to_optimize] else: return base_learner.individual_score[metric_to_optimize] except: session.rollback() base_learner.job_status = 'errored' base_learner.description['error_type'] = repr(sys.exc_info()[0]) base_learner.description['error_value'] = repr(sys.exc_info()[1]) base_learner.description['error_traceback'] = \ traceback.format_exception(*sys.exc_info()) session.add(base_learner) session.commit() raise
def start_naive_bayes(automated_run, session, path): """Starts naive bayes automated run Args: automated_run (xcessiv.models.AutomatedRun): Automated run object session: Valid SQLAlchemy session path (str, unicode): Path to project folder """ module = functions.import_string_code_as_module(automated_run.source) random_state = 8 if not hasattr(module, 'random_state') else module.random_state assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators # get non-searchable parameters base_estimator = automated_run.base_learner_origin.return_estimator() base_estimator.set_params(**module.default_params) default_params = functions.make_serializable(base_estimator.get_params()) non_searchable_params = dict((key, val) for key, val in iteritems(default_params) if key not in module.pbounds) # get already calculated base learners in search space existing_base_learners = [] for base_learner in automated_run.base_learner_origin.base_learners: if not base_learner.job_status == 'finished': continue in_search_space = True for key, val in iteritems(non_searchable_params): if base_learner.hyperparameters[key] != val: in_search_space = False break # If no match, move on to the next base learner if in_search_space: existing_base_learners.append(base_learner) # build initialize dictionary target = [] initialization_dict = dict((key, list()) for key in module.pbounds.keys()) for base_learner in existing_base_learners: # check if base learner's searchable hyperparameters are all numerical all_numerical = True for key in module.pbounds.keys(): if not isinstance(base_learner.hyperparameters[key], numbers.Number): all_numerical = False break if not all_numerical: continue # if there is a non-numerical hyperparameter, skip this. for key in module.pbounds.keys(): initialization_dict[key].append(base_learner.hyperparameters[key]) target.append(base_learner.individual_score[module.metric_to_optimize]) initialization_dict['target'] = target if not module.invert_metric \ else list(map(lambda x: -x, target)) print('{} existing in initialization dictionary'.format( len(initialization_dict['target']))) # Create function to be optimized func_to_optimize = return_func_to_optimize( path, session, automated_run.base_learner_origin, module.default_params, module.metric_to_optimize, module.invert_metric, set(module.integers)) # Create Bayes object bo = BayesianOptimization(func_to_optimize, module.pbounds) bo.initialize(initialization_dict) np.random.seed(random_state) bo.maximize(**module.maximize_config) automated_run.job_status = 'finished' session.add(automated_run) session.commit()