Esempio n. 1
0
    def run(self):
        if self.run_on_yarn:
            # Dump job and base as local json files for yarn_launcher
            job_name = f"job-{self.config.name}.json"
            with open(job_name, "w") as file:
                json.dump(self.job, file, indent=4)

            # Launch job on yarn
            pex_path = self.config.upload_pex_cpu()
            with skein.Client() as skein_client:
                LOGGER.info(f"Submitting job {self.config.name}")
                app_id = submit(
                    skein_client=skein_client,
                    module_name="deepr.cli.main",
                    additional_files=[job_name],
                    archive_hdfs=pex_path,
                    args=["from_config", job_name, "-", "run"],
                    env_vars=self.config.get_env_vars(),
                    hadoop_file_systems=self.config.hadoop_file_systems,
                    memory=self.config.memory,
                    name=self.config.name,
                    num_cores=self.config.num_cores,
                )
                report = skein_client.application_report(app_id)
                LOGGER.info(f"TRACKING_URL: {report.tracking_url}")
            mlflow.clear_run()
        else:
            LOGGER.info("Not running on yarn.")
            job = from_config(self.job)
            job.run()
Esempio n. 2
0
    def run(self):
        sampled = list(self.sampler)
        for idx, params in enumerate(sampled):
            LOGGER.info(f"Launching job with params: {params}")

            # Update macro params with sampled values
            macros = deepcopy(self.macros)
            macros["params"] = {**macros["params"], **params}
            assert_no_macros(macros["params"])

            # Parse config and run job
            parsed = parse_config(self.job, macros)
            job = from_config(parsed)
            if not isinstance(job, base.Job):
                raise TypeError(f"Expected type Job but got {type(job)}")
            job.run()
            mlflow.clear_run()

            # New parameters based on time need to be different
            if idx + 1 < len(sampled):
                LOGGER.info("Sleeping 2 seconds before next experiment\n")
                time.sleep(2)