def _setup(self, pipeline_name, monitor_info): target_path = tempfile.mkdtemp(dir=ComponentConstants.TMP_RESTFUL_ROOT, prefix=ComponentConstants.TMP_RESTFUL_DIR_PREFIX) os.chmod(target_path, 0o777) shared_conf = { SharedConstants.TARGET_PATH_KEY: target_path, SharedConstants.SOCK_FILENAME_KEY: UwsgiConstants.SOCK_FILENAME, SharedConstants.STATS_SOCK_FILENAME_KEY: UwsgiConstants.STATS_SOCK_FILENAME } log_format = self._params.get(ComponentConstants.LOG_FORMAT_KEY, ComponentConstants.DEFAULT_LOG_FORMAT) log_level_param = self._params.get(ComponentConstants.LOG_LEVEL_KEY, ComponentConstants.DEFAULT_LOG_LEVEL).lower() log_level = constants.LOG_LEVELS.get(log_level_param, logging.INFO) self._logger.debug("log_level_param: {}, log_level: {}, level_constants: {}" .format(log_level_param, log_level, constants.LOG_LEVELS)) stats_reporting_interval_sec = self._params.get(ComponentConstants.STATS_REPORTING_INTERVAL_SEC, ComponentConstants.DEFAULT_STATS_REPORTING_INTERVAL_SEC) model_filepath_key = java_mapping.RESERVED_KEYS[ComponentConstants.INPUT_MODEL_TAG_NAME] self._params[model_filepath_key] = ModelEnv(self._params[model_filepath_key]).model_filepath uwsgi_entry_point_conf = { UwsgiConstants.RESTFUL_COMP_MODULE_KEY: self.__module__, UwsgiConstants.RESTFUL_COMP_CLS_KEY: self.__class__.__name__, ComponentConstants.LOG_FORMAT_KEY: log_format, ComponentConstants.LOG_LEVEL_KEY: log_level, ComponentConstants.STATS_REPORTING_INTERVAL_SEC: stats_reporting_interval_sec, UwsgiConstants.PARAMS_KEY: self._params, UwsgiConstants.PIPELINE_NAME_KEY: pipeline_name, UwsgiConstants.MODEL_PATH_KEY: self._params[model_filepath_key], ComponentConstants.UWSGI_DISABLE_LOGGING_KEY: parameter.str2bool(self._params.get(ComponentConstants.UWSGI_DISABLE_LOGGING_KEY, ComponentConstants.DEFAULT_UWSGI_DISABLE_LOGGING)), ComponentConstants.METRICS_KEY: Metric.metrics() } self._logger.debug("uwsgi_entry_point_conf: {}".format(uwsgi_entry_point_conf)) nginx_conf = { ComponentConstants.HOST_KEY: ComponentConstants.DEFAULT_HOST, ComponentConstants.PORT_KEY: self._params[ComponentConstants.PORT_KEY], NginxConstants.DISABLE_ACCESS_LOG_KEY: log_level != logging.DEBUG } self._logger.debug("nginx_conf: {}".format(nginx_conf)) self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY, ComponentConstants.DEFAULT_DRY_RUN)) if self._dry_run: self._logger.warning("\n\n" + 80 * '#' + "\n" + 25 * " " + "Running in DRY RUN mode\n" + 80 * '#') self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY, ComponentConstants.DEFAULT_DRY_RUN)) self._wsgi_broker = UwsgiBroker(self._ml_engine, self._dry_run) \ .setup_and_run(shared_conf, uwsgi_entry_point_conf, monitor_info) self._nginx_broker = NginxBroker(self._ml_engine, self._dry_run) \ .setup_and_run(shared_conf, nginx_conf)
def _init_params(self, parent_data_objs): self._output_model_filepath = self._params['output_model_filepath'] self._train_set, valid_set, test_set = parent_data_objs self._print_statistics_info(self._train_set, valid_set, test_set) self._num_features = len(self._train_set[0][0]) self._bucket_name = self._params.get('bucket_name') if not self._bucket_name: self._bucket_name = Session().default_bucket() self._data_location = self._params.get('data_location') if not self._data_location: self._data_location = 'training/kmeans/data' self._output_location = self._params.get('output_location') if not self._output_location: self._output_location = 's3://{}/training/kmeans/output'.format(self._bucket_name) else: self._output_location = 's3://{}/{}'.format(self._bucket_name, self._output_location) self._skip_s3_dataset_uploading = str2bool(self._params.get('skip_s3_dataset_uploading')) self._instance_count = self._params.get('instance_count', 1) self._instance_type = self._params.get('instance_type', 'ml.c4.xlarge') self._volume_size_in_gb = self._params.get('volume_size_in_gb', 50) self._hyper_parameter_k = self._params.get('hyper_parameter_k', 10) self._epochs = self._params.get('epochs', 1) self._mini_batch_size = self._params.get('mini_batch_size', 500) self._max_runtime_in_seconds = self._params.get('max_runtime_in_seconds', 86400)
def _init_params(self, parent_data_objs): self._dataset_s3_url = parent_data_objs[0] self._local_model_filepath = self._params['local_model_filepath'] if not self._local_model_filepath or not os.path.isfile( self._local_model_filepath): self._logger.info("Input model is empty! Skip prediction!") return False self._bucket_name = self._params.get('bucket_name') if not self._bucket_name: self._bucket_name = self._sagemaker_session.default_bucket() self._model_s3_filepath = self._params.get('model_s3_filepath') self._results_s3_location = self._params.get('results_s3_location') if not self._results_s3_location: bucket_name, input_rltv_path = AwsHelper.s3_url_parse( self._dataset_s3_url) self._results_s3_location = "s3://{}/prediction/results".format( bucket_name) self._skip_s3_model_uploading = str2bool( self._params.get('skip_s3_model_uploading')) self._instance_type = self._params.get('instance_type', 'ml.m4.xlarge') self._instance_count = self._params.get('instance_count', 1) return True
def _materialize(self, parent_data_objs, user_data): local_filepath = self._params['local_filepath'] bucket_name = self._params.get('bucket_name') if not bucket_name: bucket_name = Session().default_bucket() remote_filepath = self._params.get('remote_filepath') skip_uploading = str2bool(self._params.get('skip_uploading')) dataset_s3_url = AwsHelper(self._logger).upload_file( local_filepath, bucket_name, remote_filepath, skip_uploading) return [dataset_s3_url]