def _helper_run_job( self, input_parameters: Union[Dict, str, Path] = None, test_job=False, track_status: bool = False, name: str = None, ) -> "Job": """ Helper function to create and run a new real or test job. Args: input_parameters: Either json string of workflow parameters or filepath to json. test_job: If set, runs a test query (search for available imagery based on your data parameters). track_status: Automatically attaches workflow.track_status which queries the job status every 30 seconds. name: The job name. Optional, by default the workflow name is assigned. Returns: The spawned real or test job object. """ if input_parameters is None: raise ValueError( "Select the job_parameters, use workflow.construct_parameters()!" ) if isinstance(input_parameters, (str, Path)): with open(input_parameters) as src: input_parameters = json.load(src) logger.info("Loading job parameters from json file.") if test_job: input_parameters = input_parameters.copy() # type: ignore input_parameters.update({"config": { "mode": "DRY_RUN" }}) # type: ignore logger.info("+++++++++++++++++++++++++++++++++") logger.info("Running this job as Test Query...") logger.info("+++++++++++++++++++++++++++++++++") logger.info(f"Selected input_parameters: {input_parameters}") if name is None: name = self._info["name"] name = f"{name}_py" # Temporary recognition of python API usage. url = (f"{self.auth._endpoint()}/projects/{self.project_id}/" f"workflows/{self.workflow_id}/jobs?name={name}") response_json = self.auth._request(request_type="POST", url=url, data=input_parameters) job_json = response_json["data"] logger.info(f"Created and running new job: {job_json['id']}.") job = Job( self.auth, job_id=job_json["id"], project_id=self.project_id, ) if track_status: job.track_status() return job
def get_jobs(self, return_json: bool = False, test_jobs: bool = True, real_jobs: bool = True) -> Union[JobCollection, List[Dict]]: """ Get all jobs in the project as a JobCollection or json. Use Workflow().get_job() to get a JobCollection with jobs associated with a specific workflow. Args: return_json: If true, returns the job info jsons instead of JobCollection. test_jobs: Return test jobs or test queries. real_jobs: Return real jobs. Returns: All job objects in a JobCollection, or alternatively the jobs info as json. """ url = f"{self.auth._endpoint()}/projects/{self.project_id}/jobs" response_json = self.auth._request(request_type="GET", url=url) jobs_json = filter_jobs_on_mode(response_json["data"], test_jobs, real_jobs) logger.info(f"Got {len(jobs_json)} jobs in project {self.project_id}.") if return_json: return jobs_json else: jobs = [ Job(self.auth, job_id=job["id"], project_id=self.project_id) for job in tqdm(jobs_json) ] jobcollection = JobCollection(auth=self.auth, project_id=self.project_id, jobs=jobs) return jobcollection
def initialize_job(job_id: str) -> "Job": """ Returns a Job object (has to exist on UP42). Args: job_id: The UP42 job_id """ if _auth is None: raise RuntimeError("Not authenticated, call up42.authenticate() first") job = Job(auth=_auth, job_id=job_id, project_id=str(_auth.project_id)) logger.info(f"Initialized {job}") return job
def job(ctx, job_id): """ Get job status, results and more. """ ctx.obj = Job(ctx.obj, ctx.obj.project_id, job_id) if not os.environ.get("UP42_JOB_ID"): logger.info( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") logger.info("Run the following command to persist with this job:") logger.info("export UP42_JOB_ID={}".format(job_id)) logger.info( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
def initialize_jobcollection(job_ids: List[str]) -> "JobCollection": """ Returns a JobCollection object (the referenced jobs have to exist on UP42). Args: job_ids: List of UP42 job_ids """ if _auth is None: raise RuntimeError("Not authenticated, call up42.authenticate() first") jobs = [ Job(auth=_auth, job_id=job_id, project_id=str(_auth.project_id)) for job_id in job_ids ] jobcollection = JobCollection( auth=_auth, project_id=str(_auth.project_id), jobs=jobs ) logger.info(f"Initialized {jobcollection}") return jobcollection
def _helper_run_parallel_jobs( self, input_parameters_list: List[dict] = None, max_concurrent_jobs: int = 10, test_job: bool = False, name: str = None, ) -> "JobCollection": """ Helper function to create and run parallel real or test jobs. Args: input_parameters_list: List of dictionary of input parameters. max_concurrent_jobs: Maximum number of parallel jobs that can be triggered. test_job: If set, runs a test query (search for available imagery based on your data parameters). name: The job name. Optional, by default the workflow name is assigned. Returns: The spawned real or test job object. Raises: ValueError: When max_concurrent_jobs is greater than max_concurrent_jobs set in project settings. """ if input_parameters_list is None: raise ValueError( "Provide the job parameters via `input_parameters_list`." " You can use workflow.construct_parallel_parameters()!") if test_job: input_parameters_list = copy.deepcopy(input_parameters_list) for input_parameters in input_parameters_list: input_parameters.update({"config": { "mode": "DRY_RUN" }}) # type: ignore logger.info("+++++++++++++++++++++++++++++++++") logger.info("Running this job as Test Query...") logger.info("+++++++++++++++++++++++++++++++++") if name is None: name = self._info["name"] jobs_list = [] job_nr = 0 if max_concurrent_jobs > self.max_concurrent_jobs: logger.error( f"Maximum concurrent jobs {max_concurrent_jobs} greater " f"than project settings {self.max_concurrent_jobs}. " "Use project.update_project_settings to change this value.") raise ValueError("Too many concurrent jobs!") # Run all jobs in parallel batches of the max_concurrent_jobs (max. 10.) batches = [ input_parameters_list[pos:pos + max_concurrent_jobs] for pos in range(0, len(input_parameters_list), max_concurrent_jobs) ] for batch in batches: batch_jobs = [] # for params in ten_selected_input_parameters: for params in batch: logger.info(f"Selected input_parameters: {params}.") job_name = ( f"{name}_{job_nr}_py" # Temporary recognition of python API usage. ) url = (f"{self.auth._endpoint()}/projects/{self.project_id}/" f"workflows/{self.workflow_id}/jobs?name={job_name}") response_json = self.auth._request(request_type="POST", url=url, data=params) job_json = response_json["data"] logger.info(f"Created and running new job: {job_json['id']}") job = Job( self.auth, job_id=job_json["id"], project_id=self.project_id, ) batch_jobs.append(job) job_nr += 1 # Track until all jobs in the batch are finished. for job in batch_jobs: try: job.track_status(report_time=20) except ValueError as e: if str(e) == "Job has failed! See the above log.": logger.warning("Skipping failed job...") else: raise jobs_list.extend(batch_jobs) job_collection = JobCollection(self.auth, project_id=self.project_id, jobs=jobs_list) return job_collection