Example #1
0
def test_create_task_output_from_task_throws_when_there_are_entries(
        tmp_path: Path, faker: Faker):
    task_output_schema = TaskOutputDataSchema.parse_obj({
        "some_output": {
            "required": True,
        },
    })

    with pytest.raises(ValueError):
        TaskOutputData.from_task_output(
            schema=task_output_schema,
            output_folder=tmp_path,
            output_file_ext=faker.file_name(),
        )
Example #2
0
async def parse_output_data(
    db_engine: Engine, job_id: str, data: TaskOutputData
) -> None:
    (
        service_key,
        service_version,
        user_id,
        project_id,
        node_id,
    ) = parse_dask_job_id(job_id)
    logger.debug(
        "parsing output %s of dask task for %s:%s of user %s on project '%s' and node '%s'",
        json_dumps(data, indent=2),
        service_key,
        service_version,
        user_id,
        project_id,
        node_id,
    )

    ports = await _create_node_ports(
        db_engine=db_engine,
        user_id=user_id,
        project_id=project_id,
        node_id=node_id,
    )
    for port_key, port_value in data.items():
        value_to_transfer: Optional[links.ItemValue] = None
        if isinstance(port_value, FileUrl):
            value_to_transfer = port_value.url
        else:
            value_to_transfer = port_value

        await (await ports.outputs)[port_key].set_value(value_to_transfer)
async def test_failed_or_aborted_task_cleans_output_files(
    scheduler: BaseCompScheduler,
    minimal_app: FastAPI,
    user_id: PositiveInt,
    aiopg_engine: Iterator[aiopg.sa.engine.Engine],  # type: ignore
    mocked_dask_client_send_task: mock.MagicMock,
    published_project: PublishedProject,
    state: RunningState,
    mocked_clean_task_output_fct: mock.MagicMock,
    mocked_scheduler_task: None,
):
    # we do have a published project where the comp services are in PUBLISHED state
    # here we will artifically call the completion handler in the scheduler
    dask_scheduler = cast(DaskScheduler, scheduler)
    job_id = generate_dask_job_id(
        "simcore/service/comp/pytest/fake",
        "12.34.55",
        user_id,
        published_project.project.uuid,
        published_project.tasks[0].node_id,
    )
    state_event = TaskStateEvent(
        job_id=job_id,
        msg=TaskOutputData.parse_obj({"output_1": "some fake data"}).json(),
        state=state,
    )
    await dask_scheduler._on_task_completed(state_event)
    await assert_comp_tasks_state(
        aiopg_engine,
        published_project.project.uuid,
        [published_project.tasks[0].node_id],
        exp_state=state,
    )

    mocked_clean_task_output_fct.assert_called_once()
Example #4
0
    def fake_sidecar_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
        expected_annotations: Dict[str, Any],
    ) -> TaskOutputData:
        sub = Sub(TaskCancelEvent.topic_name())
        # get the task data
        worker = get_worker()
        task = worker.tasks.get(worker.get_current_task())
        assert task is not None
        print(f"--> task {task=} started")
        assert task.annotations == expected_annotations
        # sleep a bit in case someone is aborting us
        print("--> waiting for task to be aborted...")
        for msg in sub:
            assert msg
            print(f"--> received cancellation msg: {msg=}")
            cancel_event = TaskCancelEvent.parse_raw(msg)  # type: ignore
            assert cancel_event
            if cancel_event.job_id == task.key:
                print("--> raising cancellation error now")
                raise asyncio.CancelledError("task cancelled")

        return TaskOutputData.parse_obj({"some_output_key": 123})
Example #5
0
def test_create_task_output_from_task_throws_when_there_are_missing_files(
        tmp_path: Path, faker: Faker):
    task_output_schema = TaskOutputDataSchema.parse_obj({
        "required_file_output": {
            "required": True,
            "url": "s3://some_file_url",
            "mapping": "the_output_filename",
        },
    })

    with pytest.raises(ValueError):
        TaskOutputData.from_task_output(
            schema=task_output_schema,
            output_folder=tmp_path,
            output_file_ext=faker.file_name(),
        )
Example #6
0
    def fake_remote_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
    ) -> TaskOutputData:
        # get the task data
        worker = get_worker()
        task = worker.tasks.get(worker.get_current_task())
        assert task is not None
        print(f"--> task {task=} started")
        cancel_event = Event(TaskCancelEventName.format(task.key))
        # tell the client we are started
        start_event = Event(_DASK_EVENT_NAME)
        start_event.set()
        # sleep a bit in case someone is aborting us
        print("--> waiting for task to be aborted...")
        cancel_event.wait(timeout=10)
        if cancel_event.is_set():
            # NOTE: asyncio.CancelledError is not propagated back to the client...
            print("--> raising cancellation error now")
            raise TaskCancelledError

        return TaskOutputData.parse_obj({"some_output_key": 123})
async def test_parse_output_data(
    aiopg_engine: aiopg.sa.engine.Engine,  # type: ignore
    published_project: PublishedProject,
    user_id: UserID,
    fake_io_schema: Dict[str, Dict[str, str]],
    fake_task_output_data: TaskOutputData,
    mocker: MockerFixture,
):
    # need some fakes set in the DB
    sleeper_task: CompTaskAtDB = published_project.tasks[1]
    no_outputs = {}
    await set_comp_task_outputs(aiopg_engine, sleeper_task.node_id,
                                fake_io_schema, no_outputs)
    # mock the set_value function so we can test it is called correctly
    mocked_node_ports_set_value_fct = mocker.patch(
        "simcore_sdk.node_ports_v2.port.Port.set_value")

    # test
    dask_job_id = generate_dask_job_id(
        sleeper_task.image.name,
        sleeper_task.image.tag,
        user_id,
        published_project.project.uuid,
        sleeper_task.node_id,
    )
    await parse_output_data(aiopg_engine, dask_job_id, fake_task_output_data)

    # the FileUrl types are converted to a pure url
    expected_values = {
        k: v.url if isinstance(v, FileUrl) else v
        for k, v in fake_task_output_data.items()
    }
    mocked_node_ports_set_value_fct.assert_has_calls(
        [mock.call(value) for value in expected_values.values()])
Example #8
0
    async def _retrieve_output_data(
        self,
        task_volumes: TaskSharedVolumes,
        integration_version: version.Version,
    ) -> TaskOutputData:
        try:
            await self._publish_sidecar_log("Retrieving output data...")
            logger.debug(
                "following files are located in output folder %s:\n%s",
                task_volumes.outputs_folder,
                pformat(list(task_volumes.outputs_folder.rglob("*"))),
            )
            logger.debug(
                "following outputs will be searched for:\n%s",
                self.output_data_keys.json(indent=1),
            )

            output_data = TaskOutputData.from_task_output(
                self.output_data_keys,
                task_volumes.outputs_folder,
                "outputs.json"
                if integration_version > LEGACY_INTEGRATION_VERSION else
                "output.json",
            )

            upload_tasks = []
            for output_params in output_data.values():
                if isinstance(output_params, FileUrl):
                    assert (  # nosec
                        output_params.file_mapping
                    ), f"{output_params.json(indent=1)} expected resolved in TaskOutputData.from_task_output"

                    src_path = task_volumes.outputs_folder / output_params.file_mapping
                    upload_tasks.append(
                        push_file_to_remote(src_path, output_params.url,
                                            self._publish_sidecar_log))
            await asyncio.gather(*upload_tasks)

            await self._publish_sidecar_log(
                "All the output data were uploaded.")
            logger.info("retrieved outputs data:\n%s",
                        output_data.json(indent=1))
            return output_data

        except (ValueError, ValidationError) as exc:
            raise ServiceBadFormattedOutputError(
                service_key=self.service_key,
                service_version=self.service_version,
                exc=exc,
            ) from exc
Example #9
0
def test_create_task_output_from_task_does_not_throw_when_there_are_optional_entries(
        tmp_path: Path, faker: Faker):
    task_output_schema = TaskOutputDataSchema.parse_obj({
        "some_output": {
            "required": False,
        },
    })

    task_output_data = TaskOutputData.from_task_output(
        schema=task_output_schema,
        output_folder=tmp_path,
        output_file_ext=faker.file_name(),
    )
    assert len(task_output_data) == 0
Example #10
0
 def fake_remote_fct(
     docker_auth: DockerBasicAuth,
     service_key: str,
     service_version: str,
     input_data: TaskInputData,
     output_data_keys: TaskOutputDataSchema,
     log_file_url: AnyUrl,
     command: List[str],
 ) -> TaskOutputData:
     # wait here until the client allows us to continue
     start_event = Event(_DASK_EVENT_NAME)
     start_event.wait(timeout=5)
     if fail_remote_fct:
         raise ValueError("We fail because we're told to!")
     return TaskOutputData.parse_obj({"some_output_key": 123})
Example #11
0
    def fake_sidecar_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
    ) -> TaskOutputData:
        # get the task data
        worker = get_worker()
        task = worker.tasks.get(worker.get_current_task())
        assert task is not None

        return TaskOutputData.parse_obj({"some_output_key": 123})
Example #12
0
def test_create_task_output_from_task_does_not_throw_when_there_are_optional_missing_files(
        tmp_path: Path, faker: Faker):
    task_output_schema = TaskOutputDataSchema.parse_obj({
        "optional_file_output": {
            "required": False,
            "url": "s3://some_file_url",
            "mapping": "the_output_filename",
        },
    })

    task_output_data = TaskOutputData.from_task_output(
        schema=task_output_schema,
        output_folder=tmp_path,
        output_file_ext=faker.file_name(),
    )
    assert len(task_output_data) == 0
Example #13
0
    async def _on_task_completed(self, event: TaskStateEvent) -> None:
        logger.debug(
            "received task completion: %s",
            event,
        )
        service_key, service_version, user_id, project_id, node_id = parse_dask_job_id(
            event.job_id)

        assert event.state in COMPLETED_STATES  # nosec

        logger.info(
            "task %s completed with state: %s\n%s",
            event.job_id,
            f"{event.state.value}".lower(),
            event.msg,
        )
        if event.state == RunningState.SUCCESS:
            # we need to parse the results
            assert event.msg  # nosec
            await parse_output_data(
                self.db_engine,
                event.job_id,
                TaskOutputData.parse_raw(event.msg),
            )
        else:
            # we need to remove any invalid files in the storage
            await clean_task_output_and_log_files_if_invalid(
                self.db_engine, user_id, project_id, node_id)

        await CompTasksRepository(self.db_engine).set_project_tasks_state(
            project_id, [node_id], event.state)
        # instrumentation
        message = InstrumentationRabbitMessage(
            metrics="service_stopped",
            user_id=user_id,
            project_id=project_id,
            node_id=node_id,
            service_uuid=node_id,
            service_type=NodeClass.COMPUTATIONAL,
            service_key=service_key,
            service_tag=service_version,
            result=event.state,
        )
        await self.rabbitmq_client.publish_message(message)
        self._wake_up_scheduler_now()
Example #14
0
 def fake_sidecar_fct(
     docker_auth: DockerBasicAuth,
     service_key: str,
     service_version: str,
     input_data: TaskInputData,
     output_data_keys: TaskOutputDataSchema,
     log_file_url: AnyUrl,
     command: List[str],
     expected_annotations: Dict[str, Any],
 ) -> TaskOutputData:
     # sleep a bit in case someone is aborting us
     time.sleep(1)
     # get the task data
     worker = get_worker()
     task = worker.tasks.get(worker.get_current_task())
     assert task is not None
     assert task.annotations == expected_annotations
     return TaskOutputData.parse_obj({"some_output_key": 123})
Example #15
0
def test_create_task_output_from_task_with_optional_fields_as_required(
        tmp_path: Path, optional_fields_set: bool, faker: Faker):
    for schema_example in TaskOutputDataSchema.Config.schema_extra["examples"]:

        task_output_schema = TaskOutputDataSchema.parse_obj(schema_example)
        outputs_file_name = _create_fake_outputs(task_output_schema, tmp_path,
                                                 optional_fields_set, faker)
        task_output_data = TaskOutputData.from_task_output(
            schema=task_output_schema,
            output_folder=tmp_path,
            output_file_ext=outputs_file_name,
        )
        assert task_output_data

        for key, value in task_output_schema.items():
            if not value.required and not optional_fields_set:
                assert task_output_data.get(key) is None
            if value.required or optional_fields_set:
                assert task_output_data.get(key) is not None
Example #16
0
    def fake_sidecar_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
        expected_annotations,
    ) -> TaskOutputData:
        # get the task data
        worker = get_worker()
        task = worker.tasks.get(worker.get_current_task())
        assert task is not None
        assert task.annotations == expected_annotations
        assert command == ["run"]
        event = distributed.Event(_DASK_EVENT_NAME)
        event.wait(timeout=5)

        return TaskOutputData.parse_obj({"some_output_key": 123})
Example #17
0
    def fake_remote_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
    ) -> TaskOutputData:

        state_pub = distributed.Pub(TaskStateEvent.topic_name())
        progress_pub = distributed.Pub(TaskProgressEvent.topic_name())
        logs_pub = distributed.Pub(TaskLogEvent.topic_name())
        state_pub.put("my name is state")
        progress_pub.put("my name is progress")
        logs_pub.put("my name is logs")
        # tell the client we are done
        published_event = Event(name=_DASK_START_EVENT)
        published_event.set()

        return TaskOutputData.parse_obj({"some_output_key": 123})
Example #18
0
def ubuntu_task(request: FixtureRequest,
                ftp_server: List[URL]) -> ServiceExampleParam:
    """Creates a console task in an ubuntu distro that checks for the expected files and error in case they are missing"""
    integration_version = version.Version(request.param)
    print("Using service integration:", integration_version)
    # defines the inputs of the task
    input_data = TaskInputData.parse_obj({
        "input_1": 23,
        "input_23": "a string input",
        "the_input_43": 15.0,
        "the_bool_input_54": False,
        **{
            f"some_file_input_{index+1}": FileUrl(url=f"{file}")
            for index, file in enumerate(ftp_server)
        },
        **{
            f"some_file_input_with_mapping{index+1}": FileUrl(url=f"{file}",
                                                              file_mapping=f"{index+1}/some_file_input")
            for index, file in enumerate(ftp_server)
        },
    })
    # check in the console that the expected files are present in the expected INPUT folder (set as ${INPUT_FOLDER} in the service)
    file_names = [file.path for file in ftp_server]
    list_of_commands = [
        "echo User: $(id $(whoami))",
        "echo Inputs:",
        "ls -tlah -R ${INPUT_FOLDER}",
        "echo Outputs:",
        "ls -tlah -R ${OUTPUT_FOLDER}",
        "echo Logs:",
        "ls -tlah -R ${LOG_FOLDER}",
    ]
    list_of_commands += [
        f"(test -f ${{INPUT_FOLDER}}/{file} || (echo ${{INPUT_FOLDER}}/{file} does not exists && exit 1))"
        for file in file_names
    ] + [f"echo $(cat ${{INPUT_FOLDER}}/{file})" for file in file_names]

    input_json_file_name = ("inputs.json"
                            if integration_version > LEGACY_INTEGRATION_VERSION
                            else "input.json")

    list_of_commands += [
        f"(test -f ${{INPUT_FOLDER}}/{input_json_file_name} || (echo ${{INPUT_FOLDER}}/{input_json_file_name} file does not exists && exit 1))",
        f"echo $(cat ${{INPUT_FOLDER}}/{input_json_file_name})",
        f"sleep {randint(1,4)}",
    ]

    # defines the expected outputs
    jsonable_outputs = {
        "pytest_string": "is quite an amazing feat",
        "pytest_integer": 432,
        "pytest_float": 3.2,
        "pytest_bool": False,
    }
    output_file_url = next(iter(ftp_server)).with_path("output_file")
    expected_output_keys = TaskOutputDataSchema.parse_obj({
        **{k: {
            "required": True
        }
           for k in jsonable_outputs.keys()},
        **{
            "pytest_file": {
                "required": True,
                "mapping": "a_outputfile",
                "url": f"{output_file_url}",
            },
            "pytest_file_with_mapping": {
                "required": True,
                "mapping": "subfolder/a_outputfile",
                "url": f"{output_file_url}",
            },
        },
    })
    expected_output_data = TaskOutputData.parse_obj({
        **jsonable_outputs,
        **{
            "pytest_file": {
                "url": f"{output_file_url}",
                "file_mapping": "a_outputfile",
            },
            "pytest_file_with_mapping": {
                "url": f"{output_file_url}",
                "file_mapping": "subfolder/a_outputfile",
            },
        },
    })
    jsonized_outputs = json.dumps(jsonable_outputs).replace('"', '\\"')
    output_json_file_name = ("outputs.json" if
                             integration_version > LEGACY_INTEGRATION_VERSION
                             else "output.json")

    # check for the log file if legacy version
    list_of_commands += [
        "echo $(ls -tlah ${LOG_FOLDER})",
        f"(test {'!' if integration_version > LEGACY_INTEGRATION_VERSION else ''} -f ${{LOG_FOLDER}}/{LEGACY_SERVICE_LOG_FILE_NAME} || (echo ${{LOG_FOLDER}}/{LEGACY_SERVICE_LOG_FILE_NAME} file does {'' if integration_version > LEGACY_INTEGRATION_VERSION else 'not'} exists && exit 1))",
    ]
    if integration_version == LEGACY_INTEGRATION_VERSION:
        list_of_commands = [
            f"{c} >> ${{LOG_FOLDER}}/{LEGACY_SERVICE_LOG_FILE_NAME}"
            for c in list_of_commands
        ]
    # set the final command to generate the output file(s) (files and json output)
    list_of_commands += [
        f"echo {jsonized_outputs} > ${{OUTPUT_FOLDER}}/{output_json_file_name}",
        "echo 'some data for the output file' > ${OUTPUT_FOLDER}/a_outputfile",
        "mkdir -p ${OUTPUT_FOLDER}/subfolder",
        "echo 'some data for the output file' > ${OUTPUT_FOLDER}/subfolder/a_outputfile",
    ]

    log_file_url = parse_obj_as(
        AnyUrl, f"{next(iter(ftp_server)).with_path('log.dat')}")

    return ServiceExampleParam(
        docker_basic_auth=DockerBasicAuth(server_address="docker.io",
                                          username="******",
                                          password=""),
        #
        # NOTE: we use sleeper because it defines a user
        # that can write in outputs and the
        # sidecar can remove the outputs dirs
        #
        service_key="itisfoundation/sleeper",
        service_version="2.1.2",
        command=[
            "/bin/bash",
            "-c",
            " && ".join(list_of_commands),
        ],
        input_data=input_data,
        output_data_keys=expected_output_keys,
        log_file_url=log_file_url,
        expected_output_data=expected_output_data,
        expected_logs=[
            '{"input_1": 23, "input_23": "a string input", "the_input_43": 15.0, "the_bool_input_54": false}',
            "This is the file contents of 'file_1'",
            "This is the file contents of 'file_2'",
            "This is the file contents of 'file_3'",
        ],
        integration_version=integration_version,
    )
Example #19
0
        ),
        pytest.param(
            RebootState(
                RunningState.STARTED,
                ComputationalBackendTaskResultsNotReadyError(
                    job_id="fake_job_id"),
                RunningState.STARTED,
                RunningState.STARTED,
                RunningState.STARTED,
            ),
            id="reboot with running tasks",
        ),
        pytest.param(
            RebootState(
                RunningState.SUCCESS,
                TaskOutputData.parse_obj({"whatever_output": 123}),
                RunningState.SUCCESS,
                RunningState.SUCCESS,
                RunningState.SUCCESS,
            ),
            id="reboot with completed tasks",
        ),
    ],
)
async def test_handling_scheduling_after_reboot(
    mocked_scheduler_task: None,
    mocked_dask_client: mock.MagicMock,
    aiopg_engine: aiopg.sa.engine.Engine,  # type: ignore
    running_project: RunningProject,
    scheduler: BaseCompScheduler,
    minimal_app: FastAPI,