def run(self): if self.run_on_yarn: # Dump job and base as local json files for yarn_launcher job_name = f"job-{self.config.name}.json" with open(job_name, "w") as file: json.dump(self.job, file, indent=4) # Launch job on yarn pex_path = self.config.upload_pex_cpu() with skein.Client() as skein_client: LOGGER.info(f"Submitting job {self.config.name}") app_id = submit( skein_client=skein_client, module_name="deepr.cli.main", additional_files=[job_name], archive_hdfs=pex_path, args=["from_config", job_name, "-", "run"], env_vars=self.config.get_env_vars(), hadoop_file_systems=self.config.hadoop_file_systems, memory=self.config.memory, name=self.config.name, num_cores=self.config.num_cores, ) report = skein_client.application_report(app_id) LOGGER.info(f"TRACKING_URL: {report.tracking_url}") mlflow.clear_run() else: LOGGER.info("Not running on yarn.") job = from_config(self.job) job.run()
def run(self): sampled = list(self.sampler) for idx, params in enumerate(sampled): LOGGER.info(f"Launching job with params: {params}") # Update macro params with sampled values macros = deepcopy(self.macros) macros["params"] = {**macros["params"], **params} assert_no_macros(macros["params"]) # Parse config and run job parsed = parse_config(self.job, macros) job = from_config(parsed) if not isinstance(job, base.Job): raise TypeError(f"Expected type Job but got {type(job)}") job.run() mlflow.clear_run() # New parameters based on time need to be different if idx + 1 < len(sampled): LOGGER.info("Sleeping 2 seconds before next experiment\n") time.sleep(2)