def install_packages(python_packages, bucket): build_order = get_build_order(python_packages) for package_name in build_order: python_package = python_packages[package_name] aws.download_and_extract_zip( python_package["package_key"], os.path.join(WHEELHOUSE_PATH, package_name), bucket) if "requirements.txt" in python_packages: aws.download_file_from_s3( python_packages["requirements.txt"]["src_key"], "/requirements.txt", bucket) for package_name in build_order: cmd = package_name if package_name == "requirements.txt": cmd = "-r /requirements.txt" completed_process = run( "pip3 install --no-cache-dir --no-index --find-links={} {}".format( os.path.join(WHEELHOUSE_PATH, package_name), cmd).split()) if completed_process.returncode != 0: raise UserException("installing package", package_name) util.rm_file("/requirements.txt") util.rm_dir(WHEELHOUSE_PATH)
def train(model_name, model_impl, ctx, model_dir): model = ctx.models[model_name] util.mkdir_p(model_dir) util.rm_dir(model_dir) tf_lib.set_logging_verbosity(ctx.environment["log_level"]["tensorflow"]) run_config = tf.estimator.RunConfig( tf_random_seed=model["training"]["tf_random_seed"], save_summary_steps=model["training"]["save_summary_steps"], save_checkpoints_secs=model["training"]["save_checkpoints_secs"], save_checkpoints_steps=model["training"]["save_checkpoints_steps"], log_step_count_steps=model["training"]["log_step_count_steps"], keep_checkpoint_max=model["training"]["keep_checkpoint_max"], keep_checkpoint_every_n_hours=model["training"] ["keep_checkpoint_every_n_hours"], model_dir=model_dir, ) train_input_fn = generate_input_fn(model_name, ctx, "training") eval_input_fn = generate_input_fn(model_name, ctx, "evaluation") serving_input_fn = generate_json_serving_input_fn(model_name, ctx) exporter = tf.estimator.FinalExporter("estimator", serving_input_fn, as_text=False) dataset_metadata = aws.read_json_from_s3(model["dataset"]["metadata_key"], ctx.bucket) train_num_steps = model["training"]["num_steps"] if model["training"]["num_epochs"]: train_num_steps = ( math.ceil(dataset_metadata["dataset_size"] * model["data_partition_ratio"]["training"] / float(model["training"]["batch_size"])) * model["training"]["num_epochs"]) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=train_num_steps) eval_num_steps = model["evaluation"]["num_steps"] if model["evaluation"]["num_epochs"]: eval_num_steps = ( math.ceil(dataset_metadata["dataset_size"] * model["data_partition_ratio"]["evaluation"] / float(model["evaluation"]["batch_size"])) * model["evaluation"]["num_epochs"]) eval_spec = tf.estimator.EvalSpec( eval_input_fn, steps=eval_num_steps, exporters=[exporter], name="estimator-eval", start_delay_secs=model["evaluation"]["start_delay_secs"], throttle_secs=model["evaluation"]["throttle_secs"], ) model_config = ctx.model_config(model["name"]) tf_lib.add_tf_types(model_config) try: estimator = model_impl.create_estimator(run_config, model_config) except Exception as e: raise UserRuntimeException("model " + model_name) from e if model["type"] == "regression": estimator = tf.contrib.estimator.add_metrics( estimator, get_regression_eval_metrics) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) return model_dir
def train(model_name, estimator_impl, ctx, model_dir): model = ctx.models[model_name] util.mkdir_p(model_dir) util.rm_dir(model_dir) tf_lib.set_logging_verbosity(ctx.environment["log_level"]["tensorflow"]) run_config = tf.estimator.RunConfig( tf_random_seed=model["training"]["tf_random_seed"], save_summary_steps=model["training"]["save_summary_steps"], save_checkpoints_secs=model["training"]["save_checkpoints_secs"], save_checkpoints_steps=model["training"]["save_checkpoints_steps"], log_step_count_steps=model["training"]["log_step_count_steps"], keep_checkpoint_max=model["training"]["keep_checkpoint_max"], keep_checkpoint_every_n_hours=model["training"]["keep_checkpoint_every_n_hours"], model_dir=model_dir, ) train_input_fn = generate_input_fn(model_name, ctx, "training", estimator_impl) eval_input_fn = generate_input_fn(model_name, ctx, "evaluation", estimator_impl) serving_input_fn = generate_json_serving_input_fn(model_name, ctx, estimator_impl) exporter = tf.estimator.FinalExporter("estimator", serving_input_fn, as_text=False) train_num_steps = model["training"]["num_steps"] dataset_metadata = ctx.get_metadata(model["dataset"]["id"]) if model["training"]["num_epochs"]: train_num_steps = ( math.ceil(dataset_metadata["training_size"] / float(model["training"]["batch_size"])) * model["training"]["num_epochs"] ) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=train_num_steps) eval_num_steps = model["evaluation"]["num_steps"] if model["evaluation"]["num_epochs"]: eval_num_steps = ( math.ceil(dataset_metadata["eval_size"] / float(model["evaluation"]["batch_size"])) * model["evaluation"]["num_epochs"] ) eval_spec = tf.estimator.EvalSpec( eval_input_fn, steps=eval_num_steps, exporters=[exporter], name="estimator-eval", start_delay_secs=model["evaluation"]["start_delay_secs"], throttle_secs=model["evaluation"]["throttle_secs"], ) model_config = ctx.model_config(model_name) try: tf_estimator = estimator_impl.create_estimator(run_config, model_config) except Exception as e: raise UserRuntimeException("model " + model_name) from e target_col_name = util.get_resource_ref(model["target_column"]) if ctx.get_inferred_column_type(target_col_name) == consts.COLUMN_TYPE_FLOAT: tf_estimator = tf.contrib.estimator.add_metrics(tf_estimator, get_regression_eval_metrics) tf.estimator.train_and_evaluate(tf_estimator, train_spec, eval_spec) return model_dir