def training(): form = ExperimentForm() if form.validate_on_submit(): experiment = models.Experiment() generated_tuples = experiment.generate( num_trials=int(form.num_trials.data), data_type=int(form.data_type.data), matrix_size=int(form.matrix_size.data)) experiment.user_id = g.user.id experiment.training = True db.session.add(experiment) for (trial, matrix) in generated_tuples: db.session.add(trial) db.session.add(matrix) db.session.commit() return redirect( url_for('instructions', experiment_id=str(experiment.id))) return render_template('training.html', title='Training', form=form, user=g.user)
def test_default_generate(self): e = models.Experiment() # print e.generate() self.assertEqual(len(e.generate()), 60)
def test_generate_3(self): e = models.Experiment() self.assertEqual(len(e.generate(num_trials = 5, data_type = 'alpha', matrix_size = 4)), 20)
def post(self): """Queues a new experiment.""" # TODO: possibly use marshal() on the post_data # https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.marshal # to make sure the default values etc. are filled in. post_data = request.get_json() # TODO: maybe we can expect a datetime (in the schema) so we # do not have to parse it here. # https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.fields.DateTime scheduled_start = post_data["scheduled_start"] scheduled_start = datetime.fromisoformat(scheduled_start) pipeline_runs = [] pipeline_run_spec = post_data["pipeline_run_spec"] for pipeline_description, id_ in zip( post_data["pipeline_descriptions"], post_data["pipeline_run_ids"]): pipeline_run_spec["pipeline_description"] = pipeline_description pipeline = construct_pipeline(**post_data["pipeline_run_spec"]) # TODO: This can be made more efficient, since the pipeline # is the same for all pipeline runs. The only # difference is the parameters. So all the jobs could # be created in batch. # Create Celery object with the Flask context and construct the # kwargs for the job. celery = make_celery(current_app) celery_job_kwargs = { "experiment_uuid": post_data["experiment_uuid"], "project_uuid": post_data["project_uuid"], "pipeline_description": pipeline.to_dict(), "run_config": pipeline_run_spec["run_config"], } # Start the run as a background task on Celery. Due to circular # imports we send the task by name instead of importing the # function directly. res = celery.send_task( "app.core.tasks.start_non_interactive_pipeline_run", eta=scheduled_start, kwargs=celery_job_kwargs, ) # NOTE: this is only if a backend is configured. The task does # not return anything. Therefore we can forget its result and # make sure that the Celery backend releases recourses (for # storing and transmitting results) associated to the task. # Uncomment the line below if applicable. res.forget() non_interactive_run = { "experiment_uuid": post_data["experiment_uuid"], "run_uuid": res.id, "pipeline_run_id": id_, "pipeline_uuid": pipeline.properties["uuid"], "project_uuid": post_data["project_uuid"], "status": "PENDING", } db.session.add(models.NonInteractiveRun(**non_interactive_run)) # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipline steps that # will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.NonInteractiveRunPipelineStep( **{ "experiment_uuid": post_data["experiment_uuid"], "run_uuid": res.id, "step_uuid": step_uuid, "status": "PENDING", })) db.session.bulk_save_objects(pipeline_steps) db.session.commit() non_interactive_run["pipeline_steps"] = pipeline_steps pipeline_runs.append(non_interactive_run) experiment = { "experiment_uuid": post_data["experiment_uuid"], "project_uuid": post_data["project_uuid"], "pipeline_uuid": post_data["pipeline_uuid"], "scheduled_start": scheduled_start, "total_number_of_pipeline_runs": len(pipeline_runs), } db.session.add(models.Experiment(**experiment)) db.session.commit() experiment["pipeline_runs"] = pipeline_runs return experiment, 201
def post(self): """Queues a new experiment.""" # TODO: possibly use marshal() on the post_data # https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.marshal # to make sure the default values etc. are filled in. post_data = request.get_json() # TODO: maybe we can expect a datetime (in the schema) so we # do not have to parse it here. # https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.fields.DateTime scheduled_start = post_data['scheduled_start'] scheduled_start = datetime.fromisoformat(scheduled_start) pipeline_runs = [] pipeline_run_spec = post_data['pipeline_run_spec'] for pipeline_description, id_ in zip( post_data['pipeline_descriptions'], post_data['pipeline_run_ids']): pipeline_run_spec['pipeline_description'] = pipeline_description pipeline = construct_pipeline(**post_data['pipeline_run_spec']) # TODO: This can be made more efficient, since the pipeline # is the same for all pipeline runs. The only # difference is the parameters. So all the jobs could # be created in batch. # Create Celery object with the Flask context and construct the # kwargs for the job. celery = make_celery(current_app) celery_job_kwargs = { 'experiment_uuid': post_data['experiment_uuid'], 'pipeline_description': pipeline.to_dict(), 'run_config': pipeline_run_spec['run_config'], } # Start the run as a background task on Celery. Due to circular # imports we send the task by name instead of importing the # function directly. res = celery.send_task( 'app.core.tasks.start_non_interactive_pipeline_run', eta=scheduled_start, kwargs=celery_job_kwargs) non_interactive_run = { 'experiment_uuid': post_data['experiment_uuid'], 'run_uuid': res.id, 'pipeline_run_id': id_, 'pipeline_uuid': pipeline.properties['uuid'], 'status': 'PENDING', } db.session.add(models.NonInteractiveRun(**non_interactive_run)) # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipline steps that # will be run. step_uuids = [s.properties['uuid'] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.NonInteractiveRunPipelineStep( **{ 'experiment_uuid': post_data['experiment_uuid'], 'run_uuid': res.id, 'step_uuid': step_uuid, 'status': 'PENDING' })) db.session.bulk_save_objects(pipeline_steps) db.session.commit() non_interactive_run['pipeline_steps'] = pipeline_steps pipeline_runs.append(non_interactive_run) experiment = { 'experiment_uuid': post_data['experiment_uuid'], 'pipeline_uuid': post_data['pipeline_uuid'], 'scheduled_start': scheduled_start, } db.session.add(models.Experiment(**experiment)) db.session.commit() experiment['pipeline_runs'] = pipeline_runs return experiment, 201
def post(self): """Queues a new experiment.""" # TODO: possibly use marshal() on the post_data. Note that we # have moved over to using flask_restx # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.marshal # to make sure the default values etc. are filled in. post_data = request.get_json() # TODO: maybe we can expect a datetime (in the schema) so we # do not have to parse it here. Again note that we are now # using flask_restx # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.fields.DateTime scheduled_start = post_data["scheduled_start"] scheduled_start = datetime.fromisoformat(scheduled_start) experiment = { "experiment_uuid": post_data["experiment_uuid"], "project_uuid": post_data["project_uuid"], "pipeline_uuid": post_data["pipeline_uuid"], "scheduled_start": scheduled_start, "total_number_of_pipeline_runs": len(post_data["pipeline_definitions"]), } db.session.add(models.Experiment(**experiment)) db.session.commit() pipeline_runs = [] pipeline_run_spec = post_data["pipeline_run_spec"] env_uuid_docker_id_mappings = None # this way we write the entire exp to db, but avoid # launching any run (celery task) if we detected a problem experiment_creation_error_messages = [] tasks_to_launch = [] # TODO: This can be made more efficient, since the pipeline # is the same for all pipeline runs. The only # difference is the parameters. So all the jobs could # be created in batch. for pipeline_definition, id_ in zip(post_data["pipeline_definitions"], post_data["pipeline_run_ids"]): pipeline_run_spec["pipeline_definition"] = pipeline_definition pipeline = construct_pipeline(**post_data["pipeline_run_spec"]) # specify the task_id beforehand to avoid race conditions # between the task and its presence in the db task_id = str(uuid.uuid4()) non_interactive_run = { "experiment_uuid": post_data["experiment_uuid"], "run_uuid": task_id, "pipeline_run_id": id_, "pipeline_uuid": pipeline.properties["uuid"], "project_uuid": post_data["project_uuid"], "status": "PENDING", } db.session.add( models.NonInteractivePipelineRun(**non_interactive_run)) # need to flush because otherwise the bulk insertion of # pipeline steps will lead to foreign key errors # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats db.session.flush() # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipeline # steps that will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.PipelineRunStep( **{ "run_uuid": task_id, "step_uuid": step_uuid, "status": "PENDING", })) db.session.bulk_save_objects(pipeline_steps) db.session.commit() non_interactive_run["pipeline_steps"] = pipeline_steps pipeline_runs.append(non_interactive_run) # get docker ids of images to use and make it so that the # images will not be deleted in case they become # outdated by an environment rebuild # compute it only once because this way we are guaranteed # that the mappings will be the same for all runs, having # a new environment build terminate while submitting the # different runs won't affect the experiment if env_uuid_docker_id_mappings is None: try: env_uuid_docker_id_mappings = lock_environment_images_for_run( task_id, post_data["project_uuid"], pipeline.get_environments(), ) except errors.ImageNotFound as e: experiment_creation_error_messages.append( f"Pipeline was referencing environments for " f"which an image does not exist, {e}") else: image_mappings = [ models.PipelineRunImageMapping( **{ "run_uuid": task_id, "orchest_environment_uuid": env_uuid, "docker_img_id": docker_id, }) for env_uuid, docker_id in env_uuid_docker_id_mappings.items() ] db.session.bulk_save_objects(image_mappings) db.session.commit() if len(experiment_creation_error_messages) == 0: # prepare the args for the task run_config = pipeline_run_spec["run_config"] run_config[ "env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings celery_job_kwargs = { "experiment_uuid": post_data["experiment_uuid"], "project_uuid": post_data["project_uuid"], "pipeline_definition": pipeline.to_dict(), "run_config": run_config, } # Due to circular imports we use the task name instead # of importing the function directly. tasks_to_launch.append({ "name": "app.core.tasks.start_non_interactive_pipeline_run", "eta": scheduled_start, "kwargs": celery_job_kwargs, "task_id": task_id, }) experiment["pipeline_runs"] = pipeline_runs if len(experiment_creation_error_messages) == 0: # Create Celery object with the Flask context celery = make_celery(current_app) for task in tasks_to_launch: res = celery.send_task(**task) # NOTE: this is only if a backend is configured. # The task does not return anything. Therefore we can # forget its result and make sure that the Celery # backend releases recourses (for storing and # transmitting results) associated to the task. # Uncomment the line below if applicable. res.forget() return experiment, 201 else: current_app.logger.error( "\n".join(experiment_creation_error_messages)) # simple way to update both in memory objects # and the db while avoiding multiple update statements # (1 for each object) for pipeline_run in experiment["pipeline_runs"]: pipeline_run.status = "SUCCESS" for step in pipeline_run["pipeline_steps"]: step.status = "FAILURE" models.PipelineRunStep.query.filter_by( run_uuid=pipeline_run["run_uuid"]).update( {"status": "FAILURE"}) models.NonInteractivePipelineRun.query.filter_by( experiment_uuid=post_data["experiment_uuid"]).update( {"status": "SUCCESS"}) db.session.commit() return { "message": ("Failed to create experiment because not all referenced" "environments are available.") }, 500