예제 #1
0
def run_experiment(project_id: int, engine: str, model: object,
                   target_col: str, train_data: pd.DataFrame,
                   test_data: pd.DataFrame, metrics: dict) -> int:

    _ENGINES = ['sklearn']
    if engine not in _ENGINES:
        raise Exception(
            f'Engine not registered, must be one of the following: {",".join(_ENGINES)}'
        )

    database = Database()
    query = f"INSERT INTO experiment (project_id, engine) VALUES ('{project_id}', '{engine}')"
    experiment_id = database.write(query=query)

    registry = Registry()
    registry.put_model(path=f"{project_id}-{experiment_id}",
                       key='pre-model',
                       model=model)
    registry.put_metrics(path=f"{project_id}-{experiment_id}",
                         key='metrics',
                         metrics=metrics)
    registry.put_dataset(path=f"{project_id}-{experiment_id}",
                         key='test',
                         dataset=test_data)
    registry.put_dataset(path=f"{project_id}-{experiment_id}",
                         key='train',
                         dataset=train_data)

    pipeline = Pipeline()
    conf = {
        'experiment_id': experiment_id,
        'project_id': project_id,
        'target_col': target_col
    }
    triggered = pipeline.trigger_dag(dag_id=f'{engine}-pipeline',
                                     data=dict(conf=conf))

    if triggered:
        query = f"UPDATE experiment SET status = 'submitted' WHERE id = {experiment_id}"
        database.write(query=query)
    else:
        query = f"UPDATE experiment SET status = 'submission-failed' WHERE id = {experiment_id}"
        database.write(query=query)
        raise Exception('Pipeline failed to run experiment')

    return experiment_id
예제 #2
0
def create_project(name: str) -> int:
    query = f"INSERT INTO project (name) VALUES ('{name}')"
    database = Database()
    return database.write(query=query)