Esempio n. 1
0
    def _helper_run_job(
        self,
        input_parameters: Union[Dict, str, Path] = None,
        test_job=False,
        track_status: bool = False,
        name: str = None,
    ) -> "Job":
        """
        Helper function to create and run a new real or test job.

        Args:
            input_parameters: Either json string of workflow parameters or filepath to json.
            test_job: If set, runs a test query (search for available imagery based on your data parameters).
            track_status: Automatically attaches workflow.track_status which queries
                the job status every 30 seconds.
            name: The job name. Optional, by default the workflow name is assigned.

        Returns:
            The spawned real or test job object.
        """
        if input_parameters is None:
            raise ValueError(
                "Select the job_parameters, use workflow.construct_parameters()!"
            )

        if isinstance(input_parameters, (str, Path)):
            with open(input_parameters) as src:
                input_parameters = json.load(src)
            logger.info("Loading job parameters from json file.")

        if test_job:
            input_parameters = input_parameters.copy()  # type: ignore
            input_parameters.update({"config": {
                "mode": "DRY_RUN"
            }})  # type: ignore
            logger.info("+++++++++++++++++++++++++++++++++")
            logger.info("Running this job as Test Query...")
            logger.info("+++++++++++++++++++++++++++++++++")

        logger.info(f"Selected input_parameters: {input_parameters}")

        if name is None:
            name = self._info["name"]
        name = f"{name}_py"  # Temporary recognition of python API usage.
        url = (f"{self.auth._endpoint()}/projects/{self.project_id}/"
               f"workflows/{self.workflow_id}/jobs?name={name}")
        response_json = self.auth._request(request_type="POST",
                                           url=url,
                                           data=input_parameters)
        job_json = response_json["data"]
        logger.info(f"Created and running new job: {job_json['id']}.")
        job = Job(
            self.auth,
            job_id=job_json["id"],
            project_id=self.project_id,
        )

        if track_status:
            job.track_status()
        return job
Esempio n. 2
0
    def _helper_run_parallel_jobs(
        self,
        input_parameters_list: List[dict] = None,
        max_concurrent_jobs: int = 10,
        test_job: bool = False,
        name: str = None,
    ) -> "JobCollection":
        """
        Helper function to create and run parallel real or test jobs.

        Args:
            input_parameters_list: List of dictionary of input parameters.
            max_concurrent_jobs: Maximum number of parallel jobs that can be triggered.
            test_job: If set, runs a test query (search for available imagery based on your data parameters).
            name: The job name. Optional, by default the workflow name is assigned.

        Returns:
            The spawned real or test job object.

        Raises:
            ValueError: When max_concurrent_jobs is greater than max_concurrent_jobs set in project settings.
        """
        if input_parameters_list is None:
            raise ValueError(
                "Provide the job parameters via `input_parameters_list`."
                " You can use workflow.construct_parallel_parameters()!")

        if test_job:
            input_parameters_list = copy.deepcopy(input_parameters_list)
            for input_parameters in input_parameters_list:
                input_parameters.update({"config": {
                    "mode": "DRY_RUN"
                }})  # type: ignore
                logger.info("+++++++++++++++++++++++++++++++++")
                logger.info("Running this job as Test Query...")
                logger.info("+++++++++++++++++++++++++++++++++")

        if name is None:
            name = self._info["name"]

        jobs_list = []
        job_nr = 0

        if max_concurrent_jobs > self.max_concurrent_jobs:
            logger.error(
                f"Maximum concurrent jobs {max_concurrent_jobs} greater "
                f"than project settings {self.max_concurrent_jobs}. "
                "Use project.update_project_settings to change this value.")
            raise ValueError("Too many concurrent jobs!")

        # Run all jobs in parallel batches of the max_concurrent_jobs (max. 10.)
        batches = [
            input_parameters_list[pos:pos + max_concurrent_jobs] for pos in
            range(0, len(input_parameters_list), max_concurrent_jobs)
        ]
        for batch in batches:
            batch_jobs = []
            # for params in ten_selected_input_parameters:
            for params in batch:
                logger.info(f"Selected input_parameters: {params}.")

                job_name = (
                    f"{name}_{job_nr}_py"  # Temporary recognition of python API usage.
                )

                url = (f"{self.auth._endpoint()}/projects/{self.project_id}/"
                       f"workflows/{self.workflow_id}/jobs?name={job_name}")
                response_json = self.auth._request(request_type="POST",
                                                   url=url,
                                                   data=params)
                job_json = response_json["data"]
                logger.info(f"Created and running new job: {job_json['id']}")
                job = Job(
                    self.auth,
                    job_id=job_json["id"],
                    project_id=self.project_id,
                )
                batch_jobs.append(job)
                job_nr += 1

            # Track until all jobs in the batch are finished.
            for job in batch_jobs:
                try:
                    job.track_status(report_time=20)
                except ValueError as e:
                    if str(e) == "Job has failed! See the above log.":
                        logger.warning("Skipping failed job...")
                    else:
                        raise
            jobs_list.extend(batch_jobs)

        job_collection = JobCollection(self.auth,
                                       project_id=self.project_id,
                                       jobs=jobs_list)
        return job_collection