def train(args): ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) package.install_packages(ctx.python_packages, ctx.storage) model = ctx.models_id_map[args.model] logger.info("Training") with util.Tempdir(ctx.cache_dir) as temp_dir: model_dir = os.path.join(temp_dir, "model_dir") ctx.upload_resource_status_start(model) try: estimator_impl, _ = ctx.get_estimator_impl(model["name"]) train_util.train(model["name"], estimator_impl, ctx, model_dir) ctx.upload_resource_status_success(model) logger.info("Caching") logger.info("Caching model " + model["name"]) model_export_dir = os.path.join(model_dir, "export", "estimator") model_zip_path = os.path.join(temp_dir, "model.zip") util.zip_dir(model_export_dir, model_zip_path) ctx.storage.upload_file(model_zip_path, model["key"]) util.log_job_finished(ctx.workload_id) except CortexException as e: ctx.upload_resource_status_failed(model) e.wrap("error") logger.error(str(e)) logger.exception( "An error occurred, see `cortex logs -v model {}` for more details." .format(model["name"])) sys.exit(1) except Exception as e: ctx.upload_resource_status_failed(model) logger.exception( "An error occurred, see `cortex logs -v model {}` for more details." .format(model["name"])) sys.exit(1)
def build(args): ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id) python_packages_list = [ ctx.pp_id_map[id] for id in args.python_packages.split(",") ] python_packages = { python_package["name"]: python_package for python_package in python_packages_list } ctx.upload_resource_status_start(*python_packages_list) try: build_packages(python_packages, ctx.storage) util.log_job_finished(ctx.workload_id) except CortexException as e: e.wrap("error") logger.exception(e) ctx.upload_resource_status_failed(*python_packages_list) except Exception as e: logger.exception(e) ctx.upload_resource_status_failed(*python_packages_list) else: ctx.upload_resource_status_success(*python_packages_list)