Esempio n. 1
0
def build_packages(python_packages, bucket):
    cmd_partial = {}
    build_order = get_build_order(python_packages)
    for package_name in build_order:
        python_package = python_packages[package_name]
        if package_name == "requirements.txt":
            requirements_path = os.path.join(LOCAL_PACKAGE_PATH, package_name)
            aws.download_file_from_s3(python_package["src_key"],
                                      requirements_path, bucket)
            cmd_partial[package_name] = "-r " + requirements_path
        else:
            aws.download_and_extract_zip(python_package["src_key"],
                                         LOCAL_PACKAGE_PATH, bucket)
            cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH,
                                                     package_name)

    logger.info("Setting up packages")

    restricted_packages = get_restricted_packages()

    for package_name in build_order:
        package_wheel_path = os.path.join(WHEELHOUSE_PATH, package_name)
        requirement = cmd_partial[package_name]
        logger.info("Building: {}".format(package_name))
        completed_process = run("pip3 wheel -w {} {}".format(
            package_wheel_path, requirement).split())

        if completed_process.returncode != 0:
            raise UserException("creating wheels", package_name)

        for wheelname in os.listdir(package_wheel_path):
            name_split = wheelname.split("-")
            dist_name, version = name_split[0], name_split[1]
            expected_version = restricted_packages.get(dist_name, None)
            if expected_version is not None and version != expected_version:
                raise UserException(
                    "when installing {}, found {}=={} but cortex requires {}=={}"
                    .format(package_name, dist_name, version, dist_name,
                            expected_version))

    logger.info("Validating packages")

    for package_name in build_order:
        requirement = cmd_partial[package_name]
        logger.info("Installing: {}".format(package_name))
        completed_process = run(
            "pip3 install --no-index --find-links={} {}".format(
                os.path.join(WHEELHOUSE_PATH, package_name),
                requirement).split())
        if completed_process.returncode != 0:
            raise UserException("installing package", package_name)

    logger.info("Caching built packages")

    for package_name in build_order:
        aws.compress_zip_and_upload(
            os.path.join(WHEELHOUSE_PATH, package_name),
            python_packages[package_name]["package_key"],
            bucket,
        )
Esempio n. 2
0
def install_packages(python_packages, bucket):
    build_order = get_build_order(python_packages)

    for package_name in build_order:
        python_package = python_packages[package_name]
        aws.download_and_extract_zip(
            python_package["package_key"],
            os.path.join(WHEELHOUSE_PATH, package_name), bucket)

    if "requirements.txt" in python_packages:
        aws.download_file_from_s3(
            python_packages["requirements.txt"]["src_key"],
            "/requirements.txt", bucket)

    for package_name in build_order:
        cmd = package_name
        if package_name == "requirements.txt":
            cmd = "-r /requirements.txt"

        completed_process = run(
            "pip3 install --no-cache-dir --no-index --find-links={} {}".format(
                os.path.join(WHEELHOUSE_PATH, package_name), cmd).split())
        if completed_process.returncode != 0:
            raise UserException("installing package", package_name)

    util.rm_file("/requirements.txt")
    util.rm_dir(WHEELHOUSE_PATH)
Esempio n. 3
0
def build_packages(python_packages, bucket):
    cmd_partial = {}
    build_order = get_build_order(python_packages)
    for package_name in build_order:
        python_package = python_packages[package_name]
        if package_name == "requirements.txt":
            requirements_path = os.path.join(LOCAL_PACKAGE_PATH, package_name)
            aws.download_file_from_s3(python_package["src_key"],
                                      requirements_path, bucket)
            cmd_partial[package_name] = "-r " + requirements_path
        else:
            aws.download_and_extract_zip(python_package["src_key"],
                                         LOCAL_PACKAGE_PATH, bucket)
            cmd_partial[package_name] = os.path.join(LOCAL_PACKAGE_PATH,
                                                     package_name)

    logger.info("Setting up packages")

    for package_name in build_order:
        requirement = cmd_partial[package_name]
        logger.info("Building package {}".format(package_name))
        completed_process = run("pip3 wheel -w {} {}".format(
            os.path.join(WHEELHOUSE_PATH, package_name), requirement).split())
        if completed_process.returncode != 0:
            raise UserException("creating wheels", package_name)

    logger.info("Validating packages")

    for package_name in build_order:
        requirement = cmd_partial[package_name]
        logger.info("Installing package {}".format(package_name))
        completed_process = run(
            "pip3 install --no-index --find-links={} {}".format(
                os.path.join(WHEELHOUSE_PATH, package_name),
                requirement).split())
        if completed_process.returncode != 0:
            raise UserException("installing package", package_name)

    logger.info("Caching built packages")

    for package_name in build_order:
        aws.compress_zip_and_upload(
            os.path.join(WHEELHOUSE_PATH, package_name),
            python_packages[package_name]["package_key"],
            bucket,
        )
Esempio n. 4
0
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(
                os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError(
                "cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            ctx_raw = util.read_msgpack(kwargs["local_path"])
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.json")
            bucket, key = aws.deconstruct_s3_path(kwargs["s3_path"])
            aws.download_file_from_s3(key, local_ctx_path, bucket)
            ctx_raw = util.read_msgpack(local_ctx_path)
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.cortex_config = self.ctx["cortex_config"]
        self.dataset_version = self.ctx["dataset_version"]
        self.root = self.ctx["root"]
        self.raw_dataset_key = self.ctx["raw_dataset_key"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.environment = self.ctx["environment"]
        self.raw_features = self.ctx["raw_features"]
        self.transformed_features = self.ctx["transformed_features"]
        self.transformers = self.ctx["transformers"]
        self.aggregators = self.ctx["aggregators"]
        self.aggregates = self.ctx["aggregates"]
        self.constants = self.ctx["constants"]
        self.models = self.ctx["models"]
        self.apis = self.ctx["apis"]
        self.training_datasets = {
            k: v["dataset"]
            for k, v in self.models.items()
        }

        self.bucket = self.cortex_config["bucket"]
        self.region = self.cortex_config["region"]
        self.api_version = self.cortex_config["api_version"]

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION))

        self.features = util.merge_dicts_overwrite(
            self.raw_features,
            self.transformed_features  # self.aggregates
        )

        self.values = util.merge_dicts_overwrite(self.aggregates,
                                                 self.constants)

        self.raw_feature_names = list(self.raw_features.keys())
        self.transformed_feature_names = list(self.transformed_features.keys())
        self.feature_names = list(self.features.keys())

        # Internal caches
        self._transformer_impls = {}
        self._aggregator_impls = {}
        self._model_impls = {}

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.region

        # Id map
        self.rf_id_map = ResourceMap(self.raw_features)
        self.ag_id_map = ResourceMap(self.aggregates)
        self.tf_id_map = ResourceMap(self.transformed_features)
        self.td_id_map = ResourceMap(self.training_datasets)
        self.models_id_map = ResourceMap(self.models)
        self.apis_id_map = ResourceMap(self.apis)
        self.constants_id_map = ResourceMap(self.constants)

        self.id_map = util.merge_dicts_overwrite(
            self.rf_id_map,
            self.ag_id_map,
            self.tf_id_map,
            self.td_id_map,
            self.models_id_map,
            self.apis_id_map,
            self.constants_id_map,
        )
Esempio n. 5
0
 def get_file(self, impl_key, cache_impl_path):
     if not os.path.isfile(cache_impl_path):
         aws.download_file_from_s3(impl_key, cache_impl_path, self.bucket)
     return cache_impl_path