Esempio n. 1
0
    def _setup(self, pipeline_name, monitor_info):
        target_path = tempfile.mkdtemp(dir=ComponentConstants.TMP_RESTFUL_ROOT,
                                       prefix=ComponentConstants.TMP_RESTFUL_DIR_PREFIX)
        os.chmod(target_path, 0o777)

        shared_conf = {
            SharedConstants.TARGET_PATH_KEY: target_path,
            SharedConstants.SOCK_FILENAME_KEY: UwsgiConstants.SOCK_FILENAME,
            SharedConstants.STATS_SOCK_FILENAME_KEY: UwsgiConstants.STATS_SOCK_FILENAME
        }

        log_format = self._params.get(ComponentConstants.LOG_FORMAT_KEY, ComponentConstants.DEFAULT_LOG_FORMAT)

        log_level_param = self._params.get(ComponentConstants.LOG_LEVEL_KEY, ComponentConstants.DEFAULT_LOG_LEVEL).lower()
        log_level = constants.LOG_LEVELS.get(log_level_param, logging.INFO)
        self._logger.debug("log_level_param: {}, log_level: {}, level_constants: {}"
                           .format(log_level_param, log_level, constants.LOG_LEVELS))

        stats_reporting_interval_sec = self._params.get(ComponentConstants.STATS_REPORTING_INTERVAL_SEC,
                                                        ComponentConstants.DEFAULT_STATS_REPORTING_INTERVAL_SEC)

        model_filepath_key = java_mapping.RESERVED_KEYS[ComponentConstants.INPUT_MODEL_TAG_NAME]
        self._params[model_filepath_key] = ModelEnv(self._params[model_filepath_key]).model_filepath

        uwsgi_entry_point_conf = {
            UwsgiConstants.RESTFUL_COMP_MODULE_KEY: self.__module__,
            UwsgiConstants.RESTFUL_COMP_CLS_KEY: self.__class__.__name__,
            ComponentConstants.LOG_FORMAT_KEY: log_format,
            ComponentConstants.LOG_LEVEL_KEY: log_level,
            ComponentConstants.STATS_REPORTING_INTERVAL_SEC: stats_reporting_interval_sec,
            UwsgiConstants.PARAMS_KEY: self._params,
            UwsgiConstants.PIPELINE_NAME_KEY: pipeline_name,
            UwsgiConstants.MODEL_PATH_KEY: self._params[model_filepath_key],
            ComponentConstants.UWSGI_DISABLE_LOGGING_KEY:
                parameter.str2bool(self._params.get(ComponentConstants.UWSGI_DISABLE_LOGGING_KEY,
                                                    ComponentConstants.DEFAULT_UWSGI_DISABLE_LOGGING)),
            ComponentConstants.METRICS_KEY: Metric.metrics()
        }
        self._logger.debug("uwsgi_entry_point_conf: {}".format(uwsgi_entry_point_conf))

        nginx_conf = {
            ComponentConstants.HOST_KEY: ComponentConstants.DEFAULT_HOST,
            ComponentConstants.PORT_KEY: self._params[ComponentConstants.PORT_KEY],
            NginxConstants.DISABLE_ACCESS_LOG_KEY: log_level != logging.DEBUG
        }
        self._logger.debug("nginx_conf: {}".format(nginx_conf))

        self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY,
                                                            ComponentConstants.DEFAULT_DRY_RUN))
        if self._dry_run:
            self._logger.warning("\n\n" + 80 * '#' + "\n" + 25 * " " + "Running in DRY RUN mode\n" + 80 * '#')

        self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY, ComponentConstants.DEFAULT_DRY_RUN))

        self._wsgi_broker = UwsgiBroker(self._ml_engine, self._dry_run) \
            .setup_and_run(shared_conf, uwsgi_entry_point_conf, monitor_info)

        self._nginx_broker = NginxBroker(self._ml_engine, self._dry_run) \
            .setup_and_run(shared_conf, nginx_conf)
    def _init_params(self, parent_data_objs):
        self._output_model_filepath = self._params['output_model_filepath']

        self._train_set, valid_set, test_set = parent_data_objs
        self._print_statistics_info(self._train_set, valid_set, test_set)

        self._num_features = len(self._train_set[0][0])

        self._bucket_name = self._params.get('bucket_name')
        if not self._bucket_name:
            self._bucket_name = Session().default_bucket()

        self._data_location = self._params.get('data_location')
        if not self._data_location:
            self._data_location = 'training/kmeans/data'

        self._output_location = self._params.get('output_location')
        if not self._output_location:
            self._output_location = 's3://{}/training/kmeans/output'.format(self._bucket_name)
        else:
            self._output_location = 's3://{}/{}'.format(self._bucket_name, self._output_location)

        self._skip_s3_dataset_uploading = str2bool(self._params.get('skip_s3_dataset_uploading'))

        self._instance_count = self._params.get('instance_count', 1)
        self._instance_type = self._params.get('instance_type', 'ml.c4.xlarge')
        self._volume_size_in_gb = self._params.get('volume_size_in_gb', 50)
        self._hyper_parameter_k = self._params.get('hyper_parameter_k', 10)
        self._epochs = self._params.get('epochs', 1)
        self._mini_batch_size = self._params.get('mini_batch_size', 500)
        self._max_runtime_in_seconds = self._params.get('max_runtime_in_seconds', 86400)
    def _init_params(self, parent_data_objs):
        self._dataset_s3_url = parent_data_objs[0]

        self._local_model_filepath = self._params['local_model_filepath']
        if not self._local_model_filepath or not os.path.isfile(
                self._local_model_filepath):
            self._logger.info("Input model is empty! Skip prediction!")
            return False

        self._bucket_name = self._params.get('bucket_name')
        if not self._bucket_name:
            self._bucket_name = self._sagemaker_session.default_bucket()

        self._model_s3_filepath = self._params.get('model_s3_filepath')

        self._results_s3_location = self._params.get('results_s3_location')
        if not self._results_s3_location:
            bucket_name, input_rltv_path = AwsHelper.s3_url_parse(
                self._dataset_s3_url)
            self._results_s3_location = "s3://{}/prediction/results".format(
                bucket_name)

        self._skip_s3_model_uploading = str2bool(
            self._params.get('skip_s3_model_uploading'))

        self._instance_type = self._params.get('instance_type', 'ml.m4.xlarge')
        self._instance_count = self._params.get('instance_count', 1)

        return True
Esempio n. 4
0
    def _materialize(self, parent_data_objs, user_data):

        local_filepath = self._params['local_filepath']

        bucket_name = self._params.get('bucket_name')
        if not bucket_name:
            bucket_name = Session().default_bucket()

        remote_filepath = self._params.get('remote_filepath')

        skip_uploading = str2bool(self._params.get('skip_uploading'))
        dataset_s3_url = AwsHelper(self._logger).upload_file(
            local_filepath, bucket_name, remote_filepath, skip_uploading)

        return [dataset_s3_url]