Beispiel #1
0
def get_automated_runs():
    """Return all automated runs"""
    path = functions.get_path_from_query_string(request)

    if request.method == 'GET':
        with functions.DBContextManager(path) as session:
            automated_runs = session.query(models.AutomatedRun).all()
            return jsonify(list(map(lambda x: x.serialize, automated_runs)))

    if request.method == 'POST':
        req_body = request.get_json()
        with functions.DBContextManager(path) as session:
            base_learner_origin = None

            if req_body['category'] == 'bayes' or req_body[
                    'category'] == 'greedy_ensemble_search':
                base_learner_origin = session.query(models.BaseLearnerOrigin).\
                    filter_by(id=req_body['base_learner_origin_id']).first()
                if base_learner_origin is None:
                    raise exceptions.UserError(
                        'Base learner origin {} not found'.format(
                            req_body['base_learner_origin_id']), 404)
                if not base_learner_origin.final:
                    raise exceptions.UserError(
                        'Base learner origin {} is not final'.format(
                            req_body['base_learner_origin_id']))

            elif req_body['category'] == 'tpot':
                pass

            else:
                raise exceptions.UserError('Automated run category'
                                           ' {} not recognized'.format(
                                               req_body['category']))

            # Check for any syntax errors
            module = functions.import_string_code_as_module(req_body['source'])
            del module

            automated_run = models.AutomatedRun(req_body['source'], 'queued',
                                                req_body['category'],
                                                base_learner_origin)

            session.add(automated_run)
            session.commit()

            with Connection(get_redis_connection()):
                rqtasks.start_automated_run.delay(path, automated_run.id)

            return jsonify(automated_run.serialize)
Beispiel #2
0
def specific_base_learner_origin(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError('Base learner origin {} not found'.format(id), 404)

        if request.method == 'GET':
            return jsonify(base_learner_origin.serialize)

        if request.method == 'PATCH':
            if base_learner_origin.final:
                raise exceptions.UserError('Cannot modify a final base learner origin')
            req_body = request.get_json()

            modifiable_attr = ('meta_feature_generator', 'name', 'source',
                               'metric_generators')
            for attr in modifiable_attr:
                if attr in req_body:
                    setattr(base_learner_origin, attr, req_body[attr])

            session.add(base_learner_origin)
            session.commit()
            return jsonify(base_learner_origin.serialize)

        if request.method == 'DELETE':
            base_learner_origin.cleanup(path)
            session.delete(base_learner_origin)
            session.commit()
            return jsonify(message='Deleted base learner origin')
Beispiel #3
0
def start_automated_run(id):
    """This starts an automated run using the passed in source code for configuration"""
    path = functions.get_path_from_query_string(request)
    req_body = request.get_json()
    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError('Base learner origin {} not found'.format(id), 404)

        if not base_learner_origin.final:
            raise exceptions.UserError('Base learner origin {} is not final'.format(id))

        # Check for any syntax errors
        module = functions.import_string_code_as_module(req_body['source'])
        del module

        automated_run = models.AutomatedRun(req_body['source'],
                                            'queued',
                                            base_learner_origin)

        session.add(automated_run)
        session.commit()

        with Connection(get_redis_connection()):
            rqtasks.start_automated_run.delay(path, automated_run.id)

        return jsonify(automated_run.serialize)
Beispiel #4
0
def export_stacked_ensemble(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        stacked_ensemble = session.query(
            models.StackedEnsemble).filter_by(id=id).first()
        if stacked_ensemble is None:
            raise exceptions.UserError(
                'Stacked ensemble {} not found'.format(id), 404)

        extraction = session.query(models.Extraction).first()

        if request.method == 'POST':
            req_body = request.get_json()
            if req_body['type'] == 'package':
                stacked_ensemble.export_as_package(
                    os.path.join(path, req_body['name']),
                    extraction.meta_feature_generation['source'])
            elif req_body['type'] == 'file':
                if not req_body['name'].endswith('.py'):
                    req_body['name'] += '.py'
                stacked_ensemble.export_as_file(
                    os.path.join(path, req_body['name']),
                    extraction.meta_feature_generation['source'])
            return jsonify(
                message='Stacked ensemble successfully '
                'exported as {} in {}'.format(req_body['name'], path))
Beispiel #5
0
def export_stacked_ensemble_as_base_learner_origin(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        stacked_ensemble = session.query(
            models.StackedEnsemble).filter_by(id=id).first()
        if stacked_ensemble is None:
            raise exceptions.UserError(
                'Stacked ensemble {} not found'.format(id), 404)

        extraction = session.query(models.Extraction).first()

        if request.method == 'POST':
            source = stacked_ensemble.export_as_code(
                extraction.meta_feature_generation['source'])

            new_base_learner_origin = models.BaseLearnerOrigin(
                source=source,
                name='Xcessiv Ensemble',
                meta_feature_generator=stacked_ensemble.base_learner_origin.
                meta_feature_generator)

            session.add(new_base_learner_origin)
            session.commit()
            return jsonify(new_base_learner_origin.serialize)
Beispiel #6
0
def verify_base_learner_origin(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(
            models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError(
                'Base learner origin {} not found'.format(id), 404)

        if request.method == 'POST':
            req_body = request.get_json()
            if base_learner_origin.final:
                raise exceptions.UserError('Base learner origin {} '
                                           'is already final'.format(id))
            base_learner = base_learner_origin.return_estimator()
            validation_results, hyperparameters = functions.verify_estimator_class(
                base_learner, base_learner_origin.meta_feature_generator,
                base_learner_origin.metric_generators, req_body['dataset'])
            base_learner_origin.validation_results = {
                req_body['dataset']: validation_results
            }
            base_learner_origin.hyperparameters = hyperparameters
            session.add(base_learner_origin)
            session.commit()
            return jsonify(base_learner_origin.serialize)
Beispiel #7
0
def base_learner_origins_view():
    path = functions.get_path_from_query_string(request)

    if request.method == 'GET':
        with functions.DBContextManager(path) as session:
            base_learner_origins = session.query(models.BaseLearnerOrigin).all()
            return jsonify(list(map(lambda x: x.serialize, base_learner_origins)))

    if request.method == 'POST':  # Create new base learner origin
        req_body = request.get_json()
        new_base_learner_origin = models.BaseLearnerOrigin(**req_body)

        with functions.DBContextManager(path) as session:
            session.add(new_base_learner_origin)
            session.commit()
            return jsonify(new_base_learner_origin.serialize)
Beispiel #8
0
def confirm_base_learner_origin(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError('Base learner origin {} not found'.format(id), 404)

        if request.method == 'GET':
            if base_learner_origin.final:
                raise exceptions.UserError('Base learner origin {} '
                                           'is already final'.format(id))
            if not base_learner_origin.validation_results:
                raise exceptions.UserError('Base learner origin {} has not yet been '
                                           'verified on a dataset'.format(id))
            base_learner = base_learner_origin.return_estimator()
            validation_results, hyperparameters = functions.verify_estimator_class(
                base_learner,
                base_learner_origin.meta_feature_generator,
                base_learner_origin.metric_generators,
                base_learner_origin.validation_results['dataset']
            )
            base_learner_origin.validation_results = {
                'dataset': base_learner_origin.validation_results['dataset'],
                'metrics': validation_results
            }
            base_learner_origin.hyperparameters = hyperparameters
            base_learner_origin.final = True
            session.add(base_learner_origin)
            session.commit()
            return jsonify(base_learner_origin.serialize)
Beispiel #9
0
def get_automated_runs():
    """Return all automated runs"""
    path = functions.get_path_from_query_string(request)

    if request.method == 'GET':
        with functions.DBContextManager(path) as session:
            automated_runs = session.query(models.AutomatedRun).all()
            return jsonify(list(map(lambda x: x.serialize, automated_runs)))
Beispiel #10
0
def extraction_test_dataset():
    path = functions.get_path_from_query_string(request)

    if request.method == 'GET':
        with functions.DBContextManager(path) as session:
            extraction = session.query(models.Extraction).first()
            return jsonify(extraction.test_dataset)

    if request.method == 'PATCH':
        req_body = request.get_json()
        with functions.DBContextManager(path) as session:
            extraction = session.query(models.Extraction).first()
            for key, value in six.iteritems(req_body):
                extraction.test_dataset[key] = value
            session.add(extraction)
            session.commit()
            return jsonify(extraction.test_dataset)
Beispiel #11
0
def search_base_learner(id):
    """Creates a set of base learners from base learner origin using grid search
    and queues them up
    """
    path = functions.get_path_from_query_string(request)
    req_body = request.get_json()
    if req_body['method'] == 'grid':
        param_grid = functions.import_object_from_string_code(
            req_body['source'], 'param_grid')
        iterator = ParameterGrid(param_grid)
    elif req_body['method'] == 'random':
        param_distributions = functions.import_object_from_string_code(
            req_body['source'], 'param_distributions')
        iterator = ParameterSampler(param_distributions,
                                    n_iter=req_body['n_iter'])

    else:
        raise exceptions.UserError('{} not a valid search method'.format(
            req_body['method']))

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(
            models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError(
                'Base learner origin {} not found'.format(id), 404)

        if not base_learner_origin.final:
            raise exceptions.UserError(
                'Base learner origin {} is not final'.format(id))

        learners = []
        for params in iterator:
            est = base_learner_origin.return_estimator()
            try:
                est.set_params(**params)
            except Exception as e:
                print(repr(e))
                continue

            hyperparameters = functions.make_serializable(est.get_params())

            base_learners = session.query(models.BaseLearner).\
                filter_by(base_learner_origin_id=id,
                          hyperparameters=hyperparameters).all()
            if base_learners:  # already exists
                continue

            base_learner = models.BaseLearner(hyperparameters, 'queued',
                                              base_learner_origin)

            session.add(base_learner)
            session.commit()
            with Connection(get_redis_connection()):
                rqtasks.generate_meta_features.delay(path, base_learner.id)
            learners.append(base_learner)
        return jsonify(map(lambda x: x.serialize, learners))
Beispiel #12
0
def verify_extraction_test_dataset():
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        extraction = session.query(models.Extraction).first()

    X, y = extraction.return_test_dataset()

    return jsonify(functions.verify_dataset(X, y))
Beispiel #13
0
def create_new_stacked_ensemble():
    path = functions.get_path_from_query_string(request)
    req_body = request.get_json()

    with functions.DBContextManager(path) as session:
        if request.method == 'GET':
            return jsonify(
                list(
                    map(lambda x: x.serialize,
                        session.query(models.StackedEnsemble).all())))

        if request.method == 'POST':
            base_learners = session.query(models.BaseLearner).\
                filter(models.BaseLearner.id.in_(req_body['base_learner_ids'])).all()
            if len(base_learners) != len(req_body['base_learner_ids']):
                raise exceptions.UserError('Not all base learners found')
            for learner in base_learners:
                if learner.job_status != 'finished':
                    raise exceptions.UserError(
                        'Not all base learners have finished')

            base_learner_origin = session.query(models.BaseLearnerOrigin).\
                filter_by(id=req_body['base_learner_origin_id']).first()
            if base_learner_origin is None:
                raise exceptions.UserError(
                    'Base learner origin {} not '
                    'found'.format(req_body['base_learner_origin_id']), 404)

            # Retrieve full hyperparameters
            est = base_learner_origin.return_estimator()
            params = functions.import_object_from_string_code\
                (req_body['secondary_learner_hyperparameters_source'], 'params')
            est.set_params(**params)
            hyperparameters = functions.make_serializable(est.get_params())

            stacked_ensembles = session.query(models.StackedEnsemble).\
                filter_by(base_learner_origin_id=req_body['base_learner_origin_id'],
                          secondary_learner_hyperparameters=hyperparameters,
                          base_learner_ids=sorted([bl.id for bl in base_learners])).all()
            if stacked_ensembles:
                raise exceptions.UserError('Stacked ensemble exists')

            stacked_ensemble = models.StackedEnsemble(
                secondary_learner_hyperparameters=hyperparameters,
                base_learners=base_learners,
                base_learner_origin=base_learner_origin,
                job_status='queued')

            session.add(stacked_ensemble)
            session.commit()

            with Connection(get_redis_connection()):
                rqtasks.evaluate_stacked_ensemble.delay(
                    path, stacked_ensemble.id)

            return jsonify(stacked_ensemble.serialize)
Beispiel #14
0
def verify_extraction_meta_feature_generation():
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        extraction = session.query(models.Extraction).first()

    if extraction.meta_feature_generation['method'] == 'cv':
        raise exceptions.UserError('Xcessiv will use cross-validation to'
                                   ' generate meta-features')

    X_holdout, y_holdout = extraction.return_holdout_dataset()

    return jsonify(functions.verify_dataset(X_holdout, y_holdout))
Beispiel #15
0
def start_automated_run(path, automated_run_id):
    """Starts automated run. This will automatically create
    base learners until the run finishes or errors out.

    Args:
        path (str): Path to Xcessiv notebook

        automated_run_id (str): Automated Run ID
    """
    with functions.DBContextManager(path) as session:
        automated_run = session.query(
            models.AutomatedRun).filter_by(id=automated_run_id).first()
        if not automated_run:
            raise exceptions.UserError(
                'Automated run {} '
                'does not exist'.format(automated_run_id))
        automated_run.job_id = get_current_job().id
        automated_run.job_status = 'started'

        session.add(automated_run)
        session.commit()

        try:
            if automated_run.category == 'bayes':
                automatedruns.start_naive_bayes(automated_run, session, path)

            elif automated_run.category == 'tpot':
                automatedruns.start_tpot(automated_run, session, path)

            elif automated_run.category == 'greedy_ensemble_search':
                automatedruns.start_greedy_ensemble_search(
                    automated_run, session, path)

            else:
                raise Exception(
                    'Something went wrong. Invalid category for automated run')

            automated_run.job_status = 'finished'
            session.add(automated_run)
            session.commit()

        except:
            session.rollback()
            automated_run.job_status = 'errored'
            automated_run.description['error_type'] = repr(sys.exc_info()[0])
            automated_run.description['error_value'] = repr(sys.exc_info()[1])
            automated_run.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(automated_run)
            session.commit()
            raise
Beispiel #16
0
def verify_full_extraction():
    """This is an experimental endpoint to simultaneously verify data
    statistics and extraction for training, test, and holdout datasets.
    With this, the other three verification methods will no longer be
    necessary.
    """
    path = functions.get_path_from_query_string(request)

    if request.method == 'POST':
        rqtasks.extraction_data_statistics(path)

    with functions.DBContextManager(path) as session:
        extraction = session.query(models.Extraction).first()
        return jsonify(extraction.data_statistics)
Beispiel #17
0
def specific_stacked_ensemble(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        stacked_ensemble = session.query(models.StackedEnsemble).filter_by(id=id).first()
        if stacked_ensemble is None:
            raise exceptions.UserError('Stacked ensemble {} not found'.format(id), 404)

        if request.method == 'GET':
            return jsonify(stacked_ensemble.serialize)

        if request.method == 'DELETE':
            session.delete(stacked_ensemble)
            session.commit()
            return jsonify(message='Deleted stacked ensemble')
Beispiel #18
0
def get_base_learners():
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learners = session.query(models.BaseLearner).all()

        if request.method == 'GET':
            return jsonify(list(map(lambda x: x.serialize, base_learners)))

        if request.method == 'DELETE':  # Go crazy and delete everything
            for base_learner in base_learners:
                base_learner.delete_meta_features(path)
                session.delete(base_learner)
            session.commit()
            return jsonify(message='Deleted all base learners')
Beispiel #19
0
def specific_automated_run(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        automated_run = session.query(models.AutomatedRun).filter_by(id=id).first()
        if automated_run is None:
            raise exceptions.UserError('Automated run {} not found'.format(id), 404)

        if request.method == 'GET':
            return jsonify(automated_run.serialize)

        if request.method == 'DELETE':
            session.delete(automated_run)
            session.commit()
            return jsonify(message='Deleted automated run')
Beispiel #20
0
def create_base_learner(id):
    """This creates a single base learner from a base learner origin and queues it up"""
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learner_origin = session.query(
            models.BaseLearnerOrigin).filter_by(id=id).first()
        if base_learner_origin is None:
            raise exceptions.UserError(
                'Base learner origin {} not found'.format(id), 404)

        if not base_learner_origin.final:
            raise exceptions.UserError(
                'Base learner origin {} is not final'.format(id))

        req_body = request.get_json()

        # Retrieve full hyperparameters
        est = base_learner_origin.return_estimator()
        hyperparameters = functions.import_object_from_string_code(
            req_body['source'], 'params')
        est.set_params(**hyperparameters)
        hyperparameters = functions.make_serializable(est.get_params())

        base_learners = session.query(models.BaseLearner).\
            filter_by(base_learner_origin_id=id,
                      hyperparameters=hyperparameters).all()
        if base_learners:
            raise exceptions.UserError(
                'Base learner exists with given hyperparameters')

        base_learner = models.BaseLearner(hyperparameters, 'queued',
                                          base_learner_origin)

        if 'single_searches' not in base_learner_origin.description:
            base_learner_origin.description['single_searches'] = []
        base_learner_origin.description['single_searches'] += ([
            req_body['source']
        ])

        session.add(base_learner)
        session.add(base_learner_origin)
        session.commit()

        with Connection(get_redis_connection()):
            rqtasks.generate_meta_features.delay(path, base_learner.id)

        return jsonify(base_learner.serialize)
Beispiel #21
0
def specific_base_learner(id):
    path = functions.get_path_from_query_string(request)

    with functions.DBContextManager(path) as session:
        base_learner = session.query(models.BaseLearner).filter_by(id=id).first()
        if base_learner is None:
            raise exceptions.UserError('Base learner {} not found'.format(id), 404)

        if request.method == 'GET':
            return jsonify(base_learner.serialize)

        if request.method == 'DELETE':
            base_learner.cleanup(path)
            session.delete(base_learner)
            session.commit()
            return jsonify(message='Deleted base learner')
Beispiel #22
0
 def test_creation(self):
     rv = self.app.post(
         '/ensemble/',
         data=json.dumps(
             {
                 'ensemble_name': self.test_location
             }
         ),
         content_type='application/json'
     )
     assert rv.status_code == 200
     assert os.path.exists(os.path.join(self.test_location,
                                        app.config['XCESSIV_NOTEBOOK_NAME']))
     with functions.DBContextManager(self.test_location) as session:
         extraction = session.query(models.Extraction).all()
         assert len(extraction) == 1
         assert extraction[0].main_dataset == constants.DEFAULT_EXTRACTION_MAIN_DATASET
         assert extraction[0].test_dataset == constants.DEFAULT_EXTRACTION_TEST_DATASET
         assert extraction[0].meta_feature_generation == \
             constants.DEFAULT_EXTRACTION_META_FEATURE_GENERATION
Beispiel #23
0
def create_new_ensemble():
    req_body = request.get_json()
    ensemble_name = req_body['ensemble_name']

    if os.path.exists(ensemble_name):
        return jsonify(message="File/folder already exists"), 400

    os.makedirs(ensemble_name)
    xcessiv_notebook_path = os.path.join(ensemble_name, app.config['XCESSIV_NOTEBOOK_NAME'])
    sqlite_url = 'sqlite:///{}'.format(xcessiv_notebook_path)
    engine = create_engine(sqlite_url)

    models.Base.metadata.create_all(engine)

    # Initialize
    extraction = models.Extraction()
    with functions.DBContextManager(ensemble_name) as session:
        session.add(extraction)
        session.commit()

    return jsonify(message="Xcessiv notebook created")
Beispiel #24
0
def generate_meta_features(path, base_learner_id):
    """Generates meta-features for specified base learner

    After generation of meta-features, the file is saved into the meta-features folder

    Args:
        path (str): Path to Xcessiv notebook

        base_learner_id (str): Base learner ID
    """
    with functions.DBContextManager(path) as session:
        base_learner = session.query(models.BaseLearner).filter_by(id=base_learner_id).first()
        if not base_learner:
            raise exceptions.UserError('Base learner {} '
                                       'does not exist'.format(base_learner_id))

        base_learner.job_id = get_current_job().id
        base_learner.job_status = 'started'

        session.add(base_learner)
        session.commit()

        try:
            est = base_learner.return_estimator()
            extraction = session.query(models.Extraction).first()
            X, y = extraction.return_train_dataset()
            return_splits_iterable = functions.import_object_from_string_code(
                extraction.meta_feature_generation['source'],
                'return_splits_iterable'
            )

            meta_features_list = []
            trues_list = []
            for train_index, test_index in return_splits_iterable(X, y):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                est = est.fit(X_train, y_train)
                meta_features_list.append(
                    getattr(est, base_learner.base_learner_origin.
                            meta_feature_generator)(X_test)
                )
                trues_list.append(y_test)
            meta_features = np.concatenate(meta_features_list, axis=0)
            y_true = np.concatenate(trues_list)

            for key in base_learner.base_learner_origin.metric_generators:
                metric_generator = functions.import_object_from_string_code(
                    base_learner.base_learner_origin.metric_generators[key],
                    'metric_generator'
                )
                base_learner.individual_score[key] = metric_generator(y_true, meta_features)

            meta_features_path = base_learner.meta_features_path(path)

            if not os.path.exists(os.path.dirname(meta_features_path)):
                os.makedirs(os.path.dirname(meta_features_path))

            np.save(meta_features_path, meta_features, allow_pickle=False)
            base_learner.job_status = 'finished'
            base_learner.meta_features_exists = True
            session.add(base_learner)
            session.commit()

        except:
            session.rollback()
            base_learner.job_status = 'errored'
            base_learner.description['error_type'] = repr(sys.exc_info()[0])
            base_learner.description['error_value'] = repr(sys.exc_info()[1])
            base_learner.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(base_learner)
            session.commit()
            raise
Beispiel #25
0
def evaluate_stacked_ensemble(path, ensemble_id):
    """Evaluates the ensemble and updates the database when finished/

    Args:
        path (str): Path to Xcessiv notebook

        ensemble_id (str): Ensemble ID
    """
    with functions.DBContextManager(path) as session:
        stacked_ensemble = session.query(models.StackedEnsemble).filter_by(
            id=ensemble_id).first()
        if not stacked_ensemble:
            raise exceptions.UserError('Stacked ensemble {} '
                                       'does not exist'.format(ensemble_id))

        stacked_ensemble.job_id = get_current_job().id
        stacked_ensemble.job_status = 'started'

        session.add(stacked_ensemble)
        session.commit()

        try:
            meta_features_list = []
            for base_learner in stacked_ensemble.base_learners:
                mf = np.load(base_learner.meta_features_path(path))
                if len(mf.shape) == 1:
                    mf = mf.reshape(-1, 1)
                meta_features_list.append(mf)

            secondary_features = np.concatenate(meta_features_list, axis=1)

            # Get data
            extraction = session.query(models.Extraction).first()
            return_splits_iterable = functions.import_object_from_string_code(
                extraction.meta_feature_generation['source'],
                'return_splits_iterable'
            )
            X, y = extraction.return_train_dataset()

            #  We need to retrieve original order of meta-features
            indices_list = [test_index for train_index, test_index in return_splits_iterable(X, y)]
            indices = np.concatenate(indices_list)
            X, y = X[indices], y[indices]

            est = stacked_ensemble.return_secondary_learner()

            return_splits_iterable_stacked_ensemble = functions.import_object_from_string_code(
                extraction.stacked_ensemble_cv['source'],
                'return_splits_iterable'
            )
            preds = []
            trues_list = []
            for train_index, test_index in return_splits_iterable_stacked_ensemble(secondary_features, y):
                X_train, X_test = secondary_features[train_index], secondary_features[test_index]
                y_train, y_test = y[train_index], y[test_index]
                est = est.fit(X_train, y_train)
                preds.append(
                    getattr(est, stacked_ensemble.base_learner_origin.
                            meta_feature_generator)(X_test)
                )
                trues_list.append(y_test)
            preds = np.concatenate(preds, axis=0)
            y_true = np.concatenate(trues_list)

            for key in stacked_ensemble.base_learner_origin.metric_generators:
                metric_generator = functions.import_object_from_string_code(
                    stacked_ensemble.base_learner_origin.metric_generators[key],
                    'metric_generator'
                )
                stacked_ensemble.individual_score[key] = metric_generator(y_true, preds)

            stacked_ensemble.job_status = 'finished'
            session.add(stacked_ensemble)
            session.commit()

        except:
            session.rollback()
            stacked_ensemble.job_status = 'errored'
            stacked_ensemble.description['error_type'] = repr(sys.exc_info()[0])
            stacked_ensemble.description['error_value'] = repr(sys.exc_info()[1])
            stacked_ensemble.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(stacked_ensemble)
            session.commit()
            raise
Beispiel #26
0
def start_automated_run(path, automated_run_id):
    """Starts automated run. This will automatically create
    base learners until the run finishes or errors out.

    Args:
        path (str): Path to Xcessiv notebook

        automated_run_id (str): Automated Run ID
    """
    with functions.DBContextManager(path) as session:
        automated_run = session.query(models.AutomatedRun).filter_by(id=automated_run_id).first()
        if not automated_run:
            raise exceptions.UserError('Automated run {} '
                                       'does not exist'.format(automated_run_id))
        automated_run.job_id = get_current_job().id
        automated_run.job_status = 'started'

        session.add(automated_run)
        session.commit()

        try:
            module = functions.import_string_code_as_module(automated_run.source)
            random_state = 8 if not hasattr(module, 'random_state') else module.random_state
            assert module.metric_to_optimize in automated_run.base_learner_origin.metric_generators

            # get non-searchable parameters
            base_estimator = automated_run.base_learner_origin.return_estimator()
            base_estimator.set_params(**module.default_params)
            default_params = functions.make_serializable(base_estimator.get_params())
            non_searchable_params = dict((key, val) for key, val in iteritems(default_params)
                                         if key not in module.pbounds)

            # get already calculated base learners in search space
            existing_base_learners = []
            for base_learner in automated_run.base_learner_origin.base_learners:
                if not base_learner.job_status == 'finished':
                    continue
                in_search_space = True
                for key, val in iteritems(non_searchable_params):
                    if base_learner.hyperparameters[key] != val:
                        in_search_space = False
                        break  # If no match, move on to the next base learner
                if in_search_space:
                    existing_base_learners.append(base_learner)

            # build initialize dictionary
            target = []
            initialization_dict = dict((key, list()) for key in module.pbounds.keys())
            for base_learner in existing_base_learners:
                # check if base learner's searchable hyperparameters are all numerical
                all_numerical = True
                for key in module.pbounds.keys():
                    if not isinstance(base_learner.hyperparameters[key], numbers.Number):
                        all_numerical = False
                        break
                if not all_numerical:
                    continue  # if there is a non-numerical hyperparameter, skip this.

                for key in module.pbounds.keys():
                    initialization_dict[key].append(base_learner.hyperparameters[key])
                target.append(base_learner.individual_score[module.metric_to_optimize])
            initialization_dict['target'] = target if not module.invert_metric \
                else list(map(lambda x: -x, target))
            print('{} existing in initialization dictionary'.
                  format(len(initialization_dict['target'])))

            # Create function to be optimized
            func_to_optimize = return_func_to_optimize(
                path, session, automated_run.base_learner_origin, module.default_params,
                module.metric_to_optimize, module.invert_metric, set(module.integers)
            )

            # Create Bayes object
            bo = BayesianOptimization(func_to_optimize, module.pbounds)

            bo.initialize(initialization_dict)

            np.random.seed(random_state)

            bo.maximize(**module.maximize_config)

            automated_run.job_status = 'finished'
            session.add(automated_run)
            session.commit()

        except:
            session.rollback()
            automated_run.job_status = 'errored'
            automated_run.description['error_type'] = repr(sys.exc_info()[0])
            automated_run.description['error_value'] = repr(sys.exc_info()[1])
            automated_run.description['error_traceback'] = \
                traceback.format_exception(*sys.exc_info())
            session.add(automated_run)
            session.commit()
            raise
Beispiel #27
0
def extraction_data_statistics(path):
    """ Generates data statistics for the given data extraction setup stored
    in Xcessiv notebook.

    This is in rqtasks.py but not as a job yet. Temporarily call this directly
    while I'm figuring out Javascript lel.

    Args:
        path (str, unicode): Path to xcessiv notebook
    """
    with functions.DBContextManager(path) as session:
        extraction = session.query(models.Extraction).first()
        X, y = extraction.return_main_dataset()
        functions.verify_dataset(X, y)

        if extraction.test_dataset['method'] == 'split_from_main':
            X, X_test, y, y_test = train_test_split(
                X,
                y,
                test_size=extraction.test_dataset['split_ratio'],
                random_state=extraction.test_dataset['split_seed'],
                stratify=y
            )
        elif extraction.test_dataset['method'] == 'source':
            if 'source' not in extraction.test_dataset or not extraction.test_dataset['source']:
                raise exceptions.UserError('Source is empty')

            extraction_code = extraction.test_dataset["source"]
            extraction_function = functions.\
                import_object_from_string_code(extraction_code, "extract_test_dataset")
            X_test, y_test = extraction_function()
        else:
            X_test, y_test = None, None

        # test base learner cross-validation
        extraction_code = extraction.meta_feature_generation['source']
        return_splits_iterable = functions.import_object_from_string_code(
            extraction_code,
            'return_splits_iterable'
        )
        number_of_splits = 0
        test_indices = []
        try:
            for train_idx, test_idx in return_splits_iterable(X, y):
                number_of_splits += 1
                test_indices.append(test_idx)
        except Exception as e:
            raise exceptions.UserError('User code exception', exception_message=str(e))

        # preparation before testing stacked ensemble cross-validation
        test_indices = np.concatenate(test_indices)
        X, y = X[test_indices], y[test_indices]

        # test stacked ensemble cross-validation
        extraction_code = extraction.stacked_ensemble_cv['source']
        return_splits_iterable = functions.import_object_from_string_code(
            extraction_code,
            'return_splits_iterable'
        )
        number_of_splits_stacked_cv = 0
        try:
            for train_idx, test_idx in return_splits_iterable(X, y):
                number_of_splits_stacked_cv += 1
        except Exception as e:
            raise exceptions.UserError('User code exception', exception_message=str(e))

        data_stats = dict()
        data_stats['train_data_stats'] = functions.verify_dataset(X, y)
        if X_test is not None:
            data_stats['test_data_stats'] = functions.verify_dataset(X_test, y_test)
        else:
            data_stats['test_data_stats'] = None
        data_stats['holdout_data_stats'] = {'number_of_splits': number_of_splits}
        data_stats['stacked_ensemble_cv_stats'] = {'number_of_splits': number_of_splits_stacked_cv}

        extraction.data_statistics = data_stats

        session.add(extraction)
        session.commit()
Beispiel #28
0
def extraction_data_statistics(path):
    """ Generates data statistics for the given data extraction setup stored
    in Xcessiv notebook.

    This is in rqtasks.py but not as a job yet. Temporarily call this directly
    while I'm figuring out Javascript lel.

    Args:
        path (str, unicode): Path to xcessiv notebook
    """
    with functions.DBContextManager(path) as session:
        extraction = session.query(models.Extraction).first()
        X, y = extraction.return_main_dataset()
        functions.verify_dataset(X, y)

        if extraction.test_dataset['method'] == 'split_from_main':
            X, X_test, y, y_test = train_test_split(
                X,
                y,
                test_size=extraction.test_dataset['split_ratio'],
                random_state=extraction.test_dataset['split_seed'],
                stratify=y
            )
        elif extraction.test_dataset['method'] == 'source':
            if 'source' not in extraction.test_dataset or not extraction.test_dataset['source']:
                raise exceptions.UserError('Source is empty')

            extraction_code = extraction.test_dataset["source"]
            extraction_function = functions.\
                import_object_from_string_code(extraction_code, "extract_test_dataset")
            X_test, y_test = extraction_function()
        else:
            X_test, y_test = None, None

        if extraction.meta_feature_generation['method'] == 'holdout_split':
            X, X_holdout, y, y_holdout = train_test_split(
                X,
                y,
                test_size=extraction.meta_feature_generation['split_ratio'],
                random_state=extraction.meta_feature_generation['seed'],
                stratify=y
            )
        elif extraction.meta_feature_generation['method'] == 'holdout_source':
            if 'source' not in extraction.meta_feature_generation or \
                    not extraction.meta_feature_generation['source']:
                raise exceptions.UserError('Source is empty')

            extraction_code = extraction.meta_feature_generation["source"]
            extraction_function = functions.\
                import_object_from_string_code(extraction_code,
                                               "extract_holdout_dataset")
            X_holdout, y_holdout = extraction_function()
        else:
            X_holdout, y_holdout = None, None

        data_stats = dict()
        data_stats['train_data_stats'] = functions.verify_dataset(X, y)
        if X_test is not None:
            data_stats['test_data_stats'] = functions.verify_dataset(X_test, y_test)
        else:
            data_stats['test_data_stats'] = None
        if X_holdout is not None:
            data_stats['holdout_data_stats'] = functions.verify_dataset(X_holdout, y_holdout)
        else:
            data_stats['holdout_data_stats'] = None

        extraction.data_statistics = data_stats

        session.add(extraction)
        session.commit()