Esempio n. 1
0
 def download_file(self, key, local_path):
     try:
         util.mkdir_p(os.path.dirname(local_path))
         self.s3.download_file(self.bucket, key, local_path)
         return local_path
     except Exception as e:
         raise CortexException("bucket " + self.bucket, "key " + key) from e
Esempio n. 2
0
 def _get_dir(self, prefix, local_dir):
     prefix = util.add_suffix_unless_present(prefix, "/")
     util.mkdir_p(local_dir)
     for key in self._get_matching_s3_keys_generator(prefix):
         rel_path = util.remove_prefix_if_present(key, prefix)
         local_dest_path = os.path.join(local_dir, rel_path)
         self.download_file(key, local_dest_path)
Esempio n. 3
0
def download_dir_from_s3(prefix, local_dir, bucket, client_config={}):
    prefix = util.add_suffix_unless_present(prefix, "/")
    util.mkdir_p(local_dir)
    for key in get_matching_s3_keys_generator(bucket, prefix, client_config=client_config):
        rel_path = util.remove_prefix_if_present(key, prefix)
        local_dest_path = os.path.join(local_dir, rel_path)
        download_file_from_s3(key, local_dest_path, bucket, client_config=client_config)
Esempio n. 4
0
def download_file_from_s3(key, local_path, bucket, client_config={}):
    try:
        util.mkdir_p(os.path.dirname(local_path))
        s3 = s3_client(client_config)
        s3.download_file(bucket, key, local_path)
        return local_path
    except Exception as e:
        raise CortexException("bucket " + bucket, "key " + key) from e
Esempio n. 5
0
 def download_file(self, key, local_path):
     util.mkdir_p(os.path.dirname(local_path))
     try:
         self.s3.download_file(self.bucket, key, local_path)
         return local_path
     except Exception as e:
         raise CortexException(
             'key "{}" in bucket "{}" could not be accessed; '.format(
                 key, bucket) +
             "it may not exist, or you may not have suffienct permissions"
         ) from e
Esempio n. 6
0
    def write_files(self, files, base_path):
        if os.path.isdir(base_path):
            shutil.rmtree(base_path)
        for filepath, contents in files.iteritems():
            filename = os.path.join(base_path, filepath)
            file_dir = os.path.dirname(filename)

            util.mkdir_p(file_dir)
            util.logger.debug("Writing out file %s" % filepath)
            f = open(filename, 'w')
            f.write(contents)
            f.close()

        util.logger.info("All done writing out input data")
Esempio n. 7
0
    def write_files(self, files, base_path):
        if os.path.isdir(base_path):
            shutil.rmtree(base_path)
        for filepath, contents in files.iteritems():
            filename = os.path.join(base_path, filepath)
            file_dir = os.path.dirname(filename)

            util.mkdir_p(file_dir)
            util.logger.debug("Writing out file %s" % filepath)
            f = open(filename, 'w')
            f.write(contents)
            f.close()

        util.logger.info("All done writing out input data")
Esempio n. 8
0
 def __init__(self, ids_or_urls=[], options={}):
     util.mkdir_p(self.test_cases_path())
     if not os.path.isfile(http_client.certs_path()):
         msg = ("You seem to have deleted the file of certificates "
                "that shipped with this repo. It should exist "
                "at %s" % http_client.certs_path())
         raise error.StripeError(msg)
     if ids_or_urls == []:
         util.logger.info('No test case supplied. Randomly choosing among defaults.')
         ids_or_urls = [SystemRandom().choice(self.DEFAULT_TEST_CASES)]
     self.test_cases = map(lambda token: TestCase(self, token), ids_or_urls)
     self.options = options
     headers = {
         'User-Agent': 'Stripe TestHarness/%s' % (self.VERSION,),
     }
     self.http_client = http_client.new_default_http_client(headers=headers, verify_ssl_certs=True)
Esempio n. 9
0
def train(model_name, model_impl, ctx, model_dir):
    model = ctx.models[model_name]

    util.mkdir_p(model_dir)
    util.rm_dir(model_dir)

    tf_lib.set_logging_verbosity(ctx.environment["log_level"]["tensorflow"])

    run_config = tf.estimator.RunConfig(
        tf_random_seed=model["training"]["tf_random_seed"],
        save_summary_steps=model["training"]["save_summary_steps"],
        save_checkpoints_secs=model["training"]["save_checkpoints_secs"],
        save_checkpoints_steps=model["training"]["save_checkpoints_steps"],
        log_step_count_steps=model["training"]["log_step_count_steps"],
        keep_checkpoint_max=model["training"]["keep_checkpoint_max"],
        keep_checkpoint_every_n_hours=model["training"]
        ["keep_checkpoint_every_n_hours"],
        model_dir=model_dir,
    )

    train_input_fn = generate_input_fn(model_name, ctx, "training")
    eval_input_fn = generate_input_fn(model_name, ctx, "evaluation")
    serving_input_fn = generate_json_serving_input_fn(model_name, ctx)
    exporter = tf.estimator.FinalExporter("estimator",
                                          serving_input_fn,
                                          as_text=False)

    dataset_metadata = aws.read_json_from_s3(model["dataset"]["metadata_key"],
                                             ctx.bucket)
    train_num_steps = model["training"]["num_steps"]
    if model["training"]["num_epochs"]:
        train_num_steps = (
            math.ceil(dataset_metadata["dataset_size"] *
                      model["data_partition_ratio"]["training"] /
                      float(model["training"]["batch_size"])) *
            model["training"]["num_epochs"])

    train_spec = tf.estimator.TrainSpec(train_input_fn,
                                        max_steps=train_num_steps)

    eval_num_steps = model["evaluation"]["num_steps"]
    if model["evaluation"]["num_epochs"]:
        eval_num_steps = (
            math.ceil(dataset_metadata["dataset_size"] *
                      model["data_partition_ratio"]["evaluation"] /
                      float(model["evaluation"]["batch_size"])) *
            model["evaluation"]["num_epochs"])

    eval_spec = tf.estimator.EvalSpec(
        eval_input_fn,
        steps=eval_num_steps,
        exporters=[exporter],
        name="estimator-eval",
        start_delay_secs=model["evaluation"]["start_delay_secs"],
        throttle_secs=model["evaluation"]["throttle_secs"],
    )

    model_config = ctx.model_config(model["name"])
    tf_lib.add_tf_types(model_config)

    try:
        estimator = model_impl.create_estimator(run_config, model_config)
    except Exception as e:
        raise UserRuntimeException("model " + model_name) from e

    if model["type"] == "regression":
        estimator = tf.contrib.estimator.add_metrics(
            estimator, get_regression_eval_metrics)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    return model_dir
Esempio n. 10
0
def download_and_extract_zip(key, local_dir, bucket, client_config={}):
    util.mkdir_p(local_dir)
    local_zip = os.path.join(local_dir, "zip.zip")
    download_file_from_s3(key, local_zip, bucket, client_config=client_config)
    util.extract_zip(local_zip, delete_zip_file=True)
Esempio n. 11
0
 def download_and_unzip(self, key, local_dir):
     util.mkdir_p(local_dir)
     local_zip = os.path.join(local_dir, "zip.zip")
     self.download_file(key, local_zip)
     util.extract_zip(local_zip, delete_zip_file=True)
Esempio n. 12
0
    def __init__(self, **kwargs):
        if "cache_dir" in kwargs:
            self.cache_dir = kwargs["cache_dir"]
        elif "local_path" in kwargs:
            local_path_dir = os.path.dirname(
                os.path.abspath(kwargs["local_path"]))
            self.cache_dir = os.path.join(local_path_dir, "cache")
        else:
            raise ValueError(
                "cache_dir must be specified (or inferred from local_path)")
        util.mkdir_p(self.cache_dir)

        if "local_path" in kwargs:
            ctx_raw = util.read_msgpack(kwargs["local_path"])
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "obj" in kwargs:
            self.ctx = kwargs["obj"]
        elif "raw_obj" in kwargs:
            ctx_raw = kwargs["raw_obj"]
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        elif "s3_path":
            local_ctx_path = os.path.join(self.cache_dir, "context.msgpack")
            bucket, key = S3.deconstruct_s3_path(kwargs["s3_path"])
            S3(bucket, client_config={}).download_file(key, local_ctx_path)
            ctx_raw = util.read_msgpack(local_ctx_path)
            self.ctx = _deserialize_raw_ctx(ctx_raw)
        else:
            raise ValueError("invalid context args: " + kwargs)

        self.workload_id = kwargs.get("workload_id")

        self.id = self.ctx["id"]
        self.key = self.ctx["key"]
        self.cortex_config = self.ctx["cortex_config"]
        self.dataset_version = self.ctx["dataset_version"]
        self.root = self.ctx["root"]
        self.raw_dataset = self.ctx["raw_dataset"]
        self.status_prefix = self.ctx["status_prefix"]
        self.app = self.ctx["app"]
        self.environment = self.ctx["environment"]
        self.python_packages = self.ctx["python_packages"]
        self.raw_columns = self.ctx["raw_columns"]
        self.transformed_columns = self.ctx["transformed_columns"]
        self.transformers = self.ctx["transformers"]
        self.aggregators = self.ctx["aggregators"]
        self.aggregates = self.ctx["aggregates"]
        self.constants = self.ctx["constants"]
        self.models = self.ctx["models"]
        self.apis = self.ctx["apis"]
        self.training_datasets = {
            k: v["dataset"]
            for k, v in self.models.items()
        }

        self.api_version = self.cortex_config["api_version"]

        if "local_storage_path" in kwargs:
            self.storage = LocalStorage(base_dir=kwargs["local_storage_path"])
        else:
            self.storage = S3(
                bucket=self.cortex_config["bucket"],
                region=self.cortex_config["region"],
                client_config={},
            )

        if self.api_version != consts.CORTEX_VERSION:
            raise ValueError(
                "API version mismatch (Context: {}, Image: {})".format(
                    self.api_version, consts.CORTEX_VERSION))

        self.columns = util.merge_dicts_overwrite(
            self.raw_columns,
            self.transformed_columns  # self.aggregates
        )

        self.values = util.merge_dicts_overwrite(self.aggregates,
                                                 self.constants)

        self.raw_column_names = list(self.raw_columns.keys())
        self.transformed_column_names = list(self.transformed_columns.keys())
        self.column_names = list(self.columns.keys())

        # Internal caches
        self._transformer_impls = {}
        self._aggregator_impls = {}
        self._model_impls = {}

        # This affects Tensorflow S3 access
        os.environ["AWS_REGION"] = self.cortex_config.get("region", "")

        # Id map
        self.pp_id_map = ResourceMap(self.python_packages)
        self.rf_id_map = ResourceMap(self.raw_columns)
        self.ag_id_map = ResourceMap(self.aggregates)
        self.tf_id_map = ResourceMap(self.transformed_columns)
        self.td_id_map = ResourceMap(self.training_datasets)
        self.models_id_map = ResourceMap(self.models)
        self.apis_id_map = ResourceMap(self.apis)
        self.constants_id_map = ResourceMap(self.constants)

        self.id_map = util.merge_dicts_overwrite(
            self.pp_id_map,
            self.rf_id_map,
            self.ag_id_map,
            self.tf_id_map,
            self.td_id_map,
            self.models_id_map,
            self.apis_id_map,
            self.constants_id_map,
        )
Esempio n. 13
0
def train(model_name, estimator_impl, ctx, model_dir):
    model = ctx.models[model_name]

    util.mkdir_p(model_dir)
    util.rm_dir(model_dir)

    tf_lib.set_logging_verbosity(ctx.environment["log_level"]["tensorflow"])

    run_config = tf.estimator.RunConfig(
        tf_random_seed=model["training"]["tf_random_seed"],
        save_summary_steps=model["training"]["save_summary_steps"],
        save_checkpoints_secs=model["training"]["save_checkpoints_secs"],
        save_checkpoints_steps=model["training"]["save_checkpoints_steps"],
        log_step_count_steps=model["training"]["log_step_count_steps"],
        keep_checkpoint_max=model["training"]["keep_checkpoint_max"],
        keep_checkpoint_every_n_hours=model["training"]["keep_checkpoint_every_n_hours"],
        model_dir=model_dir,
    )

    train_input_fn = generate_input_fn(model_name, ctx, "training", estimator_impl)
    eval_input_fn = generate_input_fn(model_name, ctx, "evaluation", estimator_impl)
    serving_input_fn = generate_json_serving_input_fn(model_name, ctx, estimator_impl)
    exporter = tf.estimator.FinalExporter("estimator", serving_input_fn, as_text=False)

    train_num_steps = model["training"]["num_steps"]
    dataset_metadata = ctx.get_metadata(model["dataset"]["id"])
    if model["training"]["num_epochs"]:
        train_num_steps = (
            math.ceil(dataset_metadata["training_size"] / float(model["training"]["batch_size"]))
            * model["training"]["num_epochs"]
        )

    train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=train_num_steps)

    eval_num_steps = model["evaluation"]["num_steps"]
    if model["evaluation"]["num_epochs"]:
        eval_num_steps = (
            math.ceil(dataset_metadata["eval_size"] / float(model["evaluation"]["batch_size"]))
            * model["evaluation"]["num_epochs"]
        )

    eval_spec = tf.estimator.EvalSpec(
        eval_input_fn,
        steps=eval_num_steps,
        exporters=[exporter],
        name="estimator-eval",
        start_delay_secs=model["evaluation"]["start_delay_secs"],
        throttle_secs=model["evaluation"]["throttle_secs"],
    )

    model_config = ctx.model_config(model_name)

    try:
        tf_estimator = estimator_impl.create_estimator(run_config, model_config)
    except Exception as e:
        raise UserRuntimeException("model " + model_name) from e

    target_col_name = util.get_resource_ref(model["target_column"])
    if ctx.get_inferred_column_type(target_col_name) == consts.COLUMN_TYPE_FLOAT:
        tf_estimator = tf.contrib.estimator.add_metrics(tf_estimator, get_regression_eval_metrics)

    tf.estimator.train_and_evaluate(tf_estimator, train_spec, eval_spec)

    return model_dir