def build_packages(python_packages, bucket): cmd_partial = {} build_order = get_build_order(python_packages) for package_name in build_order: python_package = python_packages[package_name] if package_name == "requirements.txt": requirements_path = os.path.join(LOCAL_PACKAGE_PATH, package_name) aws.download_file_from_s3(python_package["src_key"], requirements_path, bucket) cmd_partial[package_name] = "-r " + requirements_path else: aws.download_and_extract_zip(python_package["src_key"], LOCAL_PACKAGE_PATH, bucket) cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH, package_name) logger.info("Setting up packages") restricted_packages = get_restricted_packages() for package_name in build_order: package_wheel_path = os.path.join(WHEELHOUSE_PATH, package_name) requirement = cmd_partial[package_name] logger.info("Building: {}".format(package_name)) completed_process = run("pip3 wheel -w {} {}".format( package_wheel_path, requirement).split()) if completed_process.returncode != 0: raise UserException("creating wheels", package_name) for wheelname in os.listdir(package_wheel_path): name_split = wheelname.split("-") dist_name, version = name_split[0], name_split[1] expected_version = restricted_packages.get(dist_name, None) if expected_version is not None and version != expected_version: raise UserException( "when installing {}, found {}=={} but cortex requires {}=={}" .format(package_name, dist_name, version, dist_name, expected_version)) logger.info("Validating packages") for package_name in build_order: requirement = cmd_partial[package_name] logger.info("Installing: {}".format(package_name)) completed_process = run( "pip3 install --no-index --find-links={} {}".format( os.path.join(WHEELHOUSE_PATH, package_name), requirement).split()) if completed_process.returncode != 0: raise UserException("installing package", package_name) logger.info("Caching built packages") for package_name in build_order: aws.compress_zip_and_upload( os.path.join(WHEELHOUSE_PATH, package_name), python_packages[package_name]["package_key"], bucket, )
def install_packages(python_packages, bucket): build_order = get_build_order(python_packages) for package_name in build_order: python_package = python_packages[package_name] aws.download_and_extract_zip( python_package["package_key"], os.path.join(WHEELHOUSE_PATH, package_name), bucket) if "requirements.txt" in python_packages: aws.download_file_from_s3( python_packages["requirements.txt"]["src_key"], "/requirements.txt", bucket) for package_name in build_order: cmd = package_name if package_name == "requirements.txt": cmd = "-r /requirements.txt" completed_process = run( "pip3 install --no-cache-dir --no-index --find-links={} {}".format( os.path.join(WHEELHOUSE_PATH, package_name), cmd).split()) if completed_process.returncode != 0: raise UserException("installing package", package_name) util.rm_file("/requirements.txt") util.rm_dir(WHEELHOUSE_PATH)
def build_packages(python_packages, bucket): cmd_partial = {} build_order = get_build_order(python_packages) for package_name in build_order: python_package = python_packages[package_name] if package_name == "requirements.txt": requirements_path = os.path.join(LOCAL_PACKAGE_PATH, package_name) aws.download_file_from_s3(python_package["src_key"], requirements_path, bucket) cmd_partial[package_name] = "-r " + requirements_path else: aws.download_and_extract_zip(python_package["src_key"], LOCAL_PACKAGE_PATH, bucket) cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH, package_name) logger.info("Setting up packages") for package_name in build_order: requirement = cmd_partial[package_name] logger.info("Building package {}".format(package_name)) completed_process = run("pip3 wheel -w {} {}".format( os.path.join(WHEELHOUSE_PATH, package_name), requirement).split()) if completed_process.returncode != 0: raise UserException("creating wheels", package_name) logger.info("Validating packages") for package_name in build_order: requirement = cmd_partial[package_name] logger.info("Installing package {}".format(package_name)) completed_process = run( "pip3 install --no-index --find-links={} {}".format( os.path.join(WHEELHOUSE_PATH, package_name), requirement).split()) if completed_process.returncode != 0: raise UserException("installing package", package_name) logger.info("Caching built packages") for package_name in build_order: aws.compress_zip_and_upload( os.path.join(WHEELHOUSE_PATH, package_name), python_packages[package_name]["package_key"], bucket, )
def __init__(self, **kwargs): if "cache_dir" in kwargs: self.cache_dir = kwargs["cache_dir"] elif "local_path" in kwargs: local_path_dir = os.path.dirname( os.path.abspath(kwargs["local_path"])) self.cache_dir = os.path.join(local_path_dir, "cache") else: raise ValueError( "cache_dir must be specified (or inferred from local_path)") util.mkdir_p(self.cache_dir) if "local_path" in kwargs: ctx_raw = util.read_msgpack(kwargs["local_path"]) self.ctx = _deserialize_raw_ctx(ctx_raw) elif "obj" in kwargs: self.ctx = kwargs["obj"] elif "s3_path": local_ctx_path = os.path.join(self.cache_dir, "context.json") bucket, key = aws.deconstruct_s3_path(kwargs["s3_path"]) aws.download_file_from_s3(key, local_ctx_path, bucket) ctx_raw = util.read_msgpack(local_ctx_path) self.ctx = _deserialize_raw_ctx(ctx_raw) else: raise ValueError("invalid context args: " + kwargs) self.workload_id = kwargs.get("workload_id") self.id = self.ctx["id"] self.key = self.ctx["key"] self.cortex_config = self.ctx["cortex_config"] self.dataset_version = self.ctx["dataset_version"] self.root = self.ctx["root"] self.raw_dataset_key = self.ctx["raw_dataset_key"] self.status_prefix = self.ctx["status_prefix"] self.app = self.ctx["app"] self.environment = self.ctx["environment"] self.raw_features = self.ctx["raw_features"] self.transformed_features = self.ctx["transformed_features"] self.transformers = self.ctx["transformers"] self.aggregators = self.ctx["aggregators"] self.aggregates = self.ctx["aggregates"] self.constants = self.ctx["constants"] self.models = self.ctx["models"] self.apis = self.ctx["apis"] self.training_datasets = { k: v["dataset"] for k, v in self.models.items() } self.bucket = self.cortex_config["bucket"] self.region = self.cortex_config["region"] self.api_version = self.cortex_config["api_version"] if self.api_version != consts.CORTEX_VERSION: raise ValueError( "API version mismatch (Context: {}, Image: {})".format( self.api_version, consts.CORTEX_VERSION)) self.features = util.merge_dicts_overwrite( self.raw_features, self.transformed_features # self.aggregates ) self.values = util.merge_dicts_overwrite(self.aggregates, self.constants) self.raw_feature_names = list(self.raw_features.keys()) self.transformed_feature_names = list(self.transformed_features.keys()) self.feature_names = list(self.features.keys()) # Internal caches self._transformer_impls = {} self._aggregator_impls = {} self._model_impls = {} # This affects Tensorflow S3 access os.environ["AWS_REGION"] = self.region # Id map self.rf_id_map = ResourceMap(self.raw_features) self.ag_id_map = ResourceMap(self.aggregates) self.tf_id_map = ResourceMap(self.transformed_features) self.td_id_map = ResourceMap(self.training_datasets) self.models_id_map = ResourceMap(self.models) self.apis_id_map = ResourceMap(self.apis) self.constants_id_map = ResourceMap(self.constants) self.id_map = util.merge_dicts_overwrite( self.rf_id_map, self.ag_id_map, self.tf_id_map, self.td_id_map, self.models_id_map, self.apis_id_map, self.constants_id_map, )
def get_file(self, impl_key, cache_impl_path): if not os.path.isfile(cache_impl_path): aws.download_file_from_s3(impl_key, cache_impl_path, self.bucket) return cache_impl_path