Exemple #1
0
 def test_make_serializable(self):
     assert functions.is_valid_json({'x': ['i am serializable', 0.1]})
     assert not functions.is_valid_json({'x': RandomForestClassifier()})
     assert functions.make_serializable({
         'x': ['i am serializable', 0.1],
         'y': RandomForestClassifier()
     }) == {
         'x': ['i am serializable', 0.1]
     }
Exemple #2
0
def search_base_learner(id):
    """Creates a set of base learners from base learner origin using grid search
    and queues them up
    """
    path = functions.get_path_from_query_string(request)
    req_body = request.get_json()
    if req_body['method'] == 'grid':
        param_grid = functions.import_object_from_string_code(
            req_body['source'], 'param_grid')
        iterator = ParameterGrid(param_grid)
    elif req_body['method'] == 'random':
        param_distributions = functions.import_object_from_string_code(
            req_body['source'], 'param_distributions')
        iterator = ParameterSampler(param_distributions,
                                    n_iter=req_body['n_iter'])

    else:
        raise exceptions.UserError('{} not a valid search method'.format(
            req_body['method']))

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(
            models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError(
                'Base learner origin {} not found'.format(id), 404)

        if not base_learner_origin.final:
            raise exceptions.UserError(
                'Base learner origin {} is not final'.format(id))

        learners = []
        for params in iterator:
            est = base_learner_origin.return_estimator()
            try:
                est.set_params(**params)
            except Exception as e:
                print(repr(e))
                continue

            hyperparameters = functions.make_serializable(est.get_params())

            base_learners = session.query(models.BaseLearner).\
                filter_by(base_learner_origin_id=id,
                          hyperparameters=hyperparameters).all()
            if base_learners:  # already exists
                continue

            base_learner = models.BaseLearner(hyperparameters, 'queued',
                                              base_learner_origin)

            session.add(base_learner)
            session.commit()
            with Connection(get_redis_connection()):
                rqtasks.generate_meta_features.delay(path, base_learner.id)
            learners.append(base_learner)
        return jsonify(map(lambda x: x.serialize, learners))
Exemple #3
0
def create_new_stacked_ensemble():
    path = functions.get_path_from_query_string(request)
    req_body = request.get_json()

    with functions.DBContextManager(path) as session:
        if request.method == 'GET':
            return jsonify(
                list(
                    map(lambda x: x.serialize,
                        session.query(models.StackedEnsemble).all())))

        if request.method == 'POST':
            base_learners = session.query(models.BaseLearner).\
                filter(models.BaseLearner.id.in_(req_body['base_learner_ids'])).all()
            if len(base_learners) != len(req_body['base_learner_ids']):
                raise exceptions.UserError('Not all base learners found')
            for learner in base_learners:
                if learner.job_status != 'finished':
                    raise exceptions.UserError(
                        'Not all base learners have finished')

            base_learner_origin = session.query(models.BaseLearnerOrigin).\
                filter_by(id=req_body['base_learner_origin_id']).first()
            if base_learner_origin is None:
                raise exceptions.UserError(
                    'Base learner origin {} not '
                    'found'.format(req_body['base_learner_origin_id']), 404)

            # Retrieve full hyperparameters
            est = base_learner_origin.return_estimator()
            params = functions.import_object_from_string_code\
                (req_body['secondary_learner_hyperparameters_source'], 'params')
            est.set_params(**params)
            hyperparameters = functions.make_serializable(est.get_params())

            stacked_ensembles = session.query(models.StackedEnsemble).\
                filter_by(base_learner_origin_id=req_body['base_learner_origin_id'],
                          secondary_learner_hyperparameters=hyperparameters,
                          base_learner_ids=sorted([bl.id for bl in base_learners])).all()
            if stacked_ensembles:
                raise exceptions.UserError('Stacked ensemble exists')

            stacked_ensemble = models.StackedEnsemble(
                secondary_learner_hyperparameters=hyperparameters,
                base_learners=base_learners,
                base_learner_origin=base_learner_origin,
                job_status='queued')

            session.add(stacked_ensemble)
            session.commit()

            with Connection(get_redis_connection()):
                rqtasks.evaluate_stacked_ensemble.delay(
                    path, stacked_ensemble.id)

            return jsonify(stacked_ensemble.serialize)
Exemple #4
0
def create_base_learner(id):
    """This creates a single base learner from a base learner origin and queues it up"""
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(
            models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError(
                'Base learner origin {} not found'.format(id), 404)

        if not base_learner_origin.final:
            raise exceptions.UserError(
                'Base learner origin {} is not final'.format(id))

        req_body = request.get_json()

        # Retrieve full hyperparameters
        est = base_learner_origin.return_estimator()
        hyperparameters = functions.import_object_from_string_code(
            req_body['source'], 'params')
        est.set_params(**hyperparameters)
        hyperparameters = functions.make_serializable(est.get_params())

        base_learners = session.query(models.BaseLearner).\
            filter_by(base_learner_origin_id=id,
                      hyperparameters=hyperparameters).all()
        if base_learners:
            raise exceptions.UserError(
                'Base learner exists with given hyperparameters')

        base_learner = models.BaseLearner(hyperparameters, 'queued',
                                          base_learner_origin)

        if 'single_searches' not in base_learner_origin.description:
            base_learner_origin.description['single_searches'] = []
        base_learner_origin.description['single_searches'] += ([
            req_body['source']
        ])

        session.add(base_learner)
        session.add(base_learner_origin)
        session.commit()

        with Connection(get_redis_connection()):
            rqtasks.generate_meta_features.delay(path, base_learner.id)

        return jsonify(base_learner.serialize)
Exemple #5
0
def start_automated_run(path, automated_run_id):
    """Starts automated run. This will automatically create
    base learners until the run finishes or errors out.

    Args:
        path (str): Path to Xcessiv notebook

        automated_run_id (str): Automated Run ID
    """
    with functions.DBContextManager(path) as session:
        automated_run = session.query(models.AutomatedRun).filter_by(id=automated_run_id).first()
        if not automated_run:
            raise exceptions.UserError('Automated run {} '
                                       'does not exist'.format(automated_run_id))
        automated_run.job_id = get_current_job().id
        automated_run.job_status = 'started'

        session.add(automated_run)
        session.commit()

        try:
            module = functions.import_string_code_as_module(automated_run.source)
            random_state = 8 if not hasattr(module, 'random_state') else module.random_state
            assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators

            # get non-searchable parameters
            base_estimator = automated_run.base_learner_origin.return_estimator()
            base_estimator.set_params(**module.default_params)
            default_params = functions.make_serializable(base_estimator.get_params())
            non_searchable_params = dict((key, val) for key, val in iteritems(default_params)
                                         if key not in module.pbounds)

            # get already calculated base learners in search space
            existing_base_learners = []
            for base_learner in automated_run.base_learner_origin.base_learners:
                if not base_learner.job_status == 'finished':
                    continue
                in_search_space = True
                for key, val in iteritems(non_searchable_params):
                    if base_learner.hyperparameters[key] != val:
                        in_search_space = False
                        break  # If no match, move on to the next base learner
                if in_search_space:
                    existing_base_learners.append(base_learner)

            # build initialize dictionary
            target = []
            initialization_dict = dict((key, list()) for key in module.pbounds.keys())
            for base_learner in existing_base_learners:
                # check if base learner's searchable hyperparameters are all numerical
                all_numerical = True
                for key in module.pbounds.keys():
                    if not isinstance(base_learner.hyperparameters[key], numbers.Number):
                        all_numerical = False
                        break
                if not all_numerical:
                    continue  # if there is a non-numerical hyperparameter, skip this.

                for key in module.pbounds.keys():
                    initialization_dict[key].append(base_learner.hyperparameters[key])
                target.append(base_learner.individual_score[module.metric_to_optimize])
            initialization_dict['target'] = target if not module.invert_metric \
                else list(map(lambda x: -x, target))
            print('{} existing in initialization dictionary'.
                  format(len(initialization_dict['target'])))

            # Create function to be optimized
            func_to_optimize = return_func_to_optimize(
                path, session, automated_run.base_learner_origin, module.default_params,
                module.metric_to_optimize, module.invert_metric, set(module.integers)
            )

            # Create Bayes object
            bo = BayesianOptimization(func_to_optimize, module.pbounds)

            bo.initialize(initialization_dict)

            np.random.seed(random_state)

            bo.maximize(**module.maximize_config)

            automated_run.job_status = 'finished'
            session.add(automated_run)
            session.commit()

        except:
            session.rollback()
            automated_run.job_status = 'errored'
            automated_run.description['error_type'] = repr(sys.exc_info()[0])
            automated_run.description['error_value'] = repr(sys.exc_info()[1])
            automated_run.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(automated_run)
            session.commit()
            raise
Exemple #6
0
    def func_to_optimize(**params):
        base_estimator = base_learner_origin.return_estimator()
        base_estimator.set_params(**default_params)
        # For integer hyperparameters, make sure they are rounded off
        params = dict((key, val) if key not in integers else (key, int(val))
                      for key, val in iteritems(params))
        base_estimator.set_params(**params)
        hyperparameters = functions.make_serializable(base_estimator.get_params())

        # Look if base learner already exists
        base_learner = session.query(models.BaseLearner).\
            filter_by(base_learner_origin_id=base_learner_origin.id,
                      hyperparameters=hyperparameters).first()

        calculate_only = False

        # If base learner exists and has finished, just return its result
        if base_learner and base_learner.job_status == 'finished':
            if invert_metric:
                return -base_learner.individual_score[metric_to_optimize]
            else:
                return base_learner.individual_score[metric_to_optimize]

        # else if base learner exists but is unfinished, just calculate the result without storing
        elif base_learner and base_learner.job_status != 'finished':
            calculate_only = True

        # else if base learner does not exist, create it
        else:
            base_learner = models.BaseLearner(hyperparameters,
                                              'started',
                                              base_learner_origin)
            base_learner.job_id = get_current_job().id
            session.add(base_learner)
            session.commit()

        try:
            est = base_learner.return_estimator()
            extraction = session.query(models.Extraction).first()
            X, y = extraction.return_train_dataset()
            return_splits_iterable = functions.import_object_from_string_code(
                extraction.meta_feature_generation['source'],
                'return_splits_iterable'
            )

            meta_features_list = []
            trues_list = []
            for train_index, test_index in return_splits_iterable(X, y):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                est = est.fit(X_train, y_train)
                meta_features_list.append(
                    getattr(est, base_learner.base_learner_origin.
                            meta_feature_generator)(X_test)
                )
                trues_list.append(y_test)
            meta_features = np.concatenate(meta_features_list, axis=0)
            y_true = np.concatenate(trues_list)

            for key in base_learner.base_learner_origin.metric_generators:
                metric_generator = functions.import_object_from_string_code(
                    base_learner.base_learner_origin.metric_generators[key],
                    'metric_generator'
                )
                base_learner.individual_score[key] = metric_generator(y_true, meta_features)

            # Only do this if you want to save things
            if not calculate_only:
                meta_features_path = base_learner.meta_features_path(path)

                if not os.path.exists(os.path.dirname(meta_features_path)):
                    os.makedirs(os.path.dirname(meta_features_path))

                np.save(meta_features_path, meta_features, allow_pickle=False)
                base_learner.job_status = 'finished'
                base_learner.meta_features_exists = True
                session.add(base_learner)
                session.commit()

            if invert_metric:
                return -base_learner.individual_score[metric_to_optimize]
            else:
                return base_learner.individual_score[metric_to_optimize]

        except:
            session.rollback()
            base_learner.job_status = 'errored'
            base_learner.description['error_type'] = repr(sys.exc_info()[0])
            base_learner.description['error_value'] = repr(sys.exc_info()[1])
            base_learner.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(base_learner)
            session.commit()
            raise
Exemple #7
0
def start_naive_bayes(automated_run, session, path):
    """Starts naive bayes automated run

    Args:
        automated_run (xcessiv.models.AutomatedRun): Automated run object

        session: Valid SQLAlchemy session

        path (str, unicode): Path to project folder
    """
    module = functions.import_string_code_as_module(automated_run.source)
    random_state = 8 if not hasattr(module,
                                    'random_state') else module.random_state
    assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators

    # get non-searchable parameters
    base_estimator = automated_run.base_learner_origin.return_estimator()
    base_estimator.set_params(**module.default_params)
    default_params = functions.make_serializable(base_estimator.get_params())
    non_searchable_params = dict((key, val)
                                 for key, val in iteritems(default_params)
                                 if key not in module.pbounds)

    # get already calculated base learners in search space
    existing_base_learners = []
    for base_learner in automated_run.base_learner_origin.base_learners:
        if not base_learner.job_status == 'finished':
            continue
        in_search_space = True
        for key, val in iteritems(non_searchable_params):
            if base_learner.hyperparameters[key] != val:
                in_search_space = False
                break  # If no match, move on to the next base learner
        if in_search_space:
            existing_base_learners.append(base_learner)

    # build initialize dictionary
    target = []
    initialization_dict = dict((key, list()) for key in module.pbounds.keys())
    for base_learner in existing_base_learners:
        # check if base learner's searchable hyperparameters are all numerical
        all_numerical = True
        for key in module.pbounds.keys():
            if not isinstance(base_learner.hyperparameters[key],
                              numbers.Number):
                all_numerical = False
                break
        if not all_numerical:
            continue  # if there is a non-numerical hyperparameter, skip this.

        for key in module.pbounds.keys():
            initialization_dict[key].append(base_learner.hyperparameters[key])
        target.append(base_learner.individual_score[module.metric_to_optimize])
    initialization_dict['target'] = target if not module.invert_metric \
        else list(map(lambda x: -x, target))
    print('{} existing in initialization dictionary'.format(
        len(initialization_dict['target'])))

    # Create function to be optimized
    func_to_optimize = return_func_to_optimize(
        path, session, automated_run.base_learner_origin,
        module.default_params, module.metric_to_optimize, module.invert_metric,
        set(module.integers))

    # Create Bayes object
    bo = BayesianOptimization(func_to_optimize, module.pbounds)

    bo.initialize(initialization_dict)

    np.random.seed(random_state)

    bo.maximize(**module.maximize_config)

    automated_run.job_status = 'finished'
    session.add(automated_run)
    session.commit()