def run_experiment(project_id: int, engine: str, model: object, target_col: str, train_data: pd.DataFrame, test_data: pd.DataFrame, metrics: dict) -> int: _ENGINES = ['sklearn'] if engine not in _ENGINES: raise Exception( f'Engine not registered, must be one of the following: {",".join(_ENGINES)}' ) database = Database() query = f"INSERT INTO experiment (project_id, engine) VALUES ('{project_id}', '{engine}')" experiment_id = database.write(query=query) registry = Registry() registry.put_model(path=f"{project_id}-{experiment_id}", key='pre-model', model=model) registry.put_metrics(path=f"{project_id}-{experiment_id}", key='metrics', metrics=metrics) registry.put_dataset(path=f"{project_id}-{experiment_id}", key='test', dataset=test_data) registry.put_dataset(path=f"{project_id}-{experiment_id}", key='train', dataset=train_data) pipeline = Pipeline() conf = { 'experiment_id': experiment_id, 'project_id': project_id, 'target_col': target_col } triggered = pipeline.trigger_dag(dag_id=f'{engine}-pipeline', data=dict(conf=conf)) if triggered: query = f"UPDATE experiment SET status = 'submitted' WHERE id = {experiment_id}" database.write(query=query) else: query = f"UPDATE experiment SET status = 'submission-failed' WHERE id = {experiment_id}" database.write(query=query) raise Exception('Pipeline failed to run experiment') return experiment_id
def create_project(name: str) -> int: query = f"INSERT INTO project (name) VALUES ('{name}')" database = Database() return database.write(query=query)