def _helper_run_job( self, input_parameters: Union[Dict, str, Path] = None, test_job=False, track_status: bool = False, name: str = None, ) -> "Job": """ Helper function to create and run a new real or test job. Args: input_parameters: Either json string of workflow parameters or filepath to json. test_job: If set, runs a test query (search for available imagery based on your data parameters). track_status: Automatically attaches workflow.track_status which queries the job status every 30 seconds. name: The job name. Optional, by default the workflow name is assigned. Returns: The spawned real or test job object. """ if input_parameters is None: raise ValueError( "Select the job_parameters, use workflow.construct_parameters()!" ) if isinstance(input_parameters, (str, Path)): with open(input_parameters) as src: input_parameters = json.load(src) logger.info("Loading job parameters from json file.") if test_job: input_parameters = input_parameters.copy() # type: ignore input_parameters.update({"config": { "mode": "DRY_RUN" }}) # type: ignore logger.info("+++++++++++++++++++++++++++++++++") logger.info("Running this job as Test Query...") logger.info("+++++++++++++++++++++++++++++++++") logger.info(f"Selected input_parameters: {input_parameters}") if name is None: name = self._info["name"] name = f"{name}_py" # Temporary recognition of python API usage. url = (f"{self.auth._endpoint()}/projects/{self.project_id}/" f"workflows/{self.workflow_id}/jobs?name={name}") response_json = self.auth._request(request_type="POST", url=url, data=input_parameters) job_json = response_json["data"] logger.info(f"Created and running new job: {job_json['id']}.") job = Job( self.auth, job_id=job_json["id"], project_id=self.project_id, ) if track_status: job.track_status() return job
def _helper_run_parallel_jobs( self, input_parameters_list: List[dict] = None, max_concurrent_jobs: int = 10, test_job: bool = False, name: str = None, ) -> "JobCollection": """ Helper function to create and run parallel real or test jobs. Args: input_parameters_list: List of dictionary of input parameters. max_concurrent_jobs: Maximum number of parallel jobs that can be triggered. test_job: If set, runs a test query (search for available imagery based on your data parameters). name: The job name. Optional, by default the workflow name is assigned. Returns: The spawned real or test job object. Raises: ValueError: When max_concurrent_jobs is greater than max_concurrent_jobs set in project settings. """ if input_parameters_list is None: raise ValueError( "Provide the job parameters via `input_parameters_list`." " You can use workflow.construct_parallel_parameters()!") if test_job: input_parameters_list = copy.deepcopy(input_parameters_list) for input_parameters in input_parameters_list: input_parameters.update({"config": { "mode": "DRY_RUN" }}) # type: ignore logger.info("+++++++++++++++++++++++++++++++++") logger.info("Running this job as Test Query...") logger.info("+++++++++++++++++++++++++++++++++") if name is None: name = self._info["name"] jobs_list = [] job_nr = 0 if max_concurrent_jobs > self.max_concurrent_jobs: logger.error( f"Maximum concurrent jobs {max_concurrent_jobs} greater " f"than project settings {self.max_concurrent_jobs}. " "Use project.update_project_settings to change this value.") raise ValueError("Too many concurrent jobs!") # Run all jobs in parallel batches of the max_concurrent_jobs (max. 10.) batches = [ input_parameters_list[pos:pos + max_concurrent_jobs] for pos in range(0, len(input_parameters_list), max_concurrent_jobs) ] for batch in batches: batch_jobs = [] # for params in ten_selected_input_parameters: for params in batch: logger.info(f"Selected input_parameters: {params}.") job_name = ( f"{name}_{job_nr}_py" # Temporary recognition of python API usage. ) url = (f"{self.auth._endpoint()}/projects/{self.project_id}/" f"workflows/{self.workflow_id}/jobs?name={job_name}") response_json = self.auth._request(request_type="POST", url=url, data=params) job_json = response_json["data"] logger.info(f"Created and running new job: {job_json['id']}") job = Job( self.auth, job_id=job_json["id"], project_id=self.project_id, ) batch_jobs.append(job) job_nr += 1 # Track until all jobs in the batch are finished. for job in batch_jobs: try: job.track_status(report_time=20) except ValueError as e: if str(e) == "Job has failed! See the above log.": logger.warning("Skipping failed job...") else: raise jobs_list.extend(batch_jobs) job_collection = JobCollection(self.auth, project_id=self.project_id, jobs=jobs_list) return job_collection