Ejemplo n.º 1
0
def _get_metadata(options):
    code_dir = Path(options.code_dir)
    if options.model_config is None:
        raise DrumCommonException(
            "You must have a file with the name {} in the directory {}. \n"
            "You don't. \nWhat you do have is these files: \n{} ".format(
                MODEL_CONFIG_FILENAME, code_dir, os.listdir(code_dir)))
    return options.model_config
Ejemplo n.º 2
0
    def run(self):
        try:
            if self.options.docker and not self.run_mode == RunMode.PUSH:
                ret = self._run_inside_docker(self.options, self.run_mode,
                                              self.raw_arguments)
                if ret:
                    raise DrumCommonException(
                        "Error from docker process: {}".format(ret))
                else:
                    return
        except DrumCommonException as e:
            self.logger.error(e)
            raise
        except AttributeError as e:
            # In some parser the options.docker does not exists
            if "docker" not in str(e):
                raise e

        self._print_welcome_header()

        if self.run_mode in [RunMode.SERVER, RunMode.SCORE]:
            self._run_fit_and_predictions_pipelines_in_mlpiper()
        elif self.run_mode == RunMode.FIT:
            self.run_fit()
        elif self.run_mode == RunMode.PERF_TEST:
            CMRunTests(self.options, self.run_mode).performance_test()
        elif self.run_mode == RunMode.VALIDATION:
            CMRunTests(self.options, self.run_mode).validation_test()
        elif self.run_mode == RunMode.NEW:
            self._generate_template()
        elif self.run_mode == RunMode.PUSH:
            options, run_mode, raw_arguments = setup_validation_options(
                copy.deepcopy(self.options))
            validation_runner = CMRunner(self.runtime)
            validation_runner.options = options
            validation_runner.run_mode = run_mode
            validation_runner.raw_arguments = raw_arguments
            validation_runner.run()
            print(
                "Your model was successfully validated locally! Now we will add it into DataRobot"
            )
            drum_push(self.options)
        else:
            error_message = "{} mode is not implemented".format(self.run_mode)
            print(error_message)
            raise DrumCommonException(error_message)
Ejemplo n.º 3
0
def setup_validation_options(options):
    model_config = _get_metadata(options)
    if model_config["type"] == "training":
        return _setup_training_validation(model_config, options)
    elif model_config["type"] == "inference":
        return _setup_inference_validation(model_config, options)
    else:
        raise DrumCommonException("Unsupported type")
Ejemplo n.º 4
0
    def predict_unstructured(self, data, **kwargs):
        def _r_is_character(r_val):
            _is_character = ro.r("is.character")
            return bool(_is_character(r_val))

        def _r_is_raw(r_val):
            _is_raw = ro.r("is.raw")
            return bool(_is_raw(r_val))

        def _r_is_null(r_val):
            return r_val == ro.rinterface.NULL

        def _cast_r_to_py(r_val):
            # TODO: consider checking type against rpy2 proxy object like: isinstance(list_data_kwargs, ro.vectors.ListVector)
            # instead of calling R interpreter
            if _r_is_null(r_val):
                return None
            elif _r_is_raw(r_val):
                return bytes(r_val)
            elif _r_is_character(r_val):
                # Any scalar value is returned from R as one element vector,
                # so get this value.
                return str(r_val[0])
            else:
                raise DrumCommonException(
                    "Can not convert R value {} type {}".format(
                        r_val, type(r_val)))

        def _rlist_to_dict(rlist):
            if _r_is_null(rlist):
                return None
            return {str(k): _cast_r_to_py(v) for k, v in rlist.items()}

        data_binary_or_text = data

        if UnstructuredDtoKeys.QUERY in kwargs:
            kwargs[UnstructuredDtoKeys.QUERY] = ro.ListVector(
                kwargs[UnstructuredDtoKeys.QUERY])

        # if data_binary_or_text is str it will be auto converted into R character type;
        # otherwise if it is bytes, manually convert it into byte vector (raw)
        r_data_binary_or_text = data_binary_or_text
        if isinstance(data_binary_or_text, bytes):
            r_data_binary_or_text = ro.vectors.ByteVector(data_binary_or_text)

        kwargs_filtered = {k: v for k, v in kwargs.items() if v is not None}
        list_data_kwargs = r_handler.predict_unstructured(
            model=self._model, data=r_data_binary_or_text, **kwargs_filtered)
        if isinstance(list_data_kwargs, ro.vectors.ListVector):
            ret = _cast_r_to_py(list_data_kwargs[0]), _rlist_to_dict(
                list_data_kwargs[1])
        else:
            raise DrumCommonException(
                "Wrong type returned in unstructured mode: {}".format(
                    type(list_data_kwargs)))

        return ret
Ejemplo n.º 5
0
def setup_validation_options(options):
    model_config = get_metadata(options)
    validate_config_fields(model_config, ModelMetadataKeys.VALIDATION)
    if model_config["type"] == "training":
        return _setup_training_validation(model_config, options)
    elif model_config["type"] == "inference":
        return _setup_inference_validation(model_config, options)
    else:
        raise DrumCommonException("Unsupported type")
Ejemplo n.º 6
0
def _convert_target_type(unconverted_target_type):
    if unconverted_target_type == TargetType.REGRESSION.value:
        return dr_client.TARGET_TYPE.REGRESSION
    elif unconverted_target_type == TargetType.BINARY.value:
        return dr_client.TARGET_TYPE.BINARY
    elif unconverted_target_type == TargetType.ANOMALY.value:
        return dr_client.enums.CUSTOM_MODEL_TARGET_TYPE.ANOMALY
    raise DrumCommonException(
        "Unsupported target type {}".format(unconverted_target_type))
Ejemplo n.º 7
0
    def predict(self, data, model, **kwargs):
        # checking if positive/negative class labels were provided
        # done in the base class
        super(SKLearnPredictor, self).predict(data, model, **kwargs)

        if self.target_type.value in TargetType.CLASSIFICATION.value:
            if hasattr(model, "classes_"):
                if set(str(label) for label in model.classes_) != set(
                        str(label) for label in self.class_labels):
                    error_message = "Wrong class labels {}. Use class labels detected by sklearn model: {}".format(
                        self.class_labels, model.classes_)
                    raise DrumCommonException(error_message)
                labels_to_use = model.classes_
            else:
                labels_to_use = self.class_labels
            predictions = model.predict_proba(data)
            if predictions.shape[1] == 1:
                if self.target_type == TargetType.MULTICLASS:
                    raise DrumCommonException(
                        "Target type '{}' predictions must return the "
                        "probability distribution for all class labels".format(
                            self.target_type))
                predictions = np.concatenate((1 - predictions, predictions),
                                             axis=1)
            if predictions.shape[1] != len(labels_to_use):
                raise DrumCommonException(
                    "Target type '{}' predictions must return the "
                    "probability distribution for all class labels. "
                    "Expected {} columns, but recieved {}".format(
                        self.target_type, len(labels_to_use),
                        predictions.shape[1]))
            predictions = pd.DataFrame(predictions, columns=labels_to_use)
        elif self.target_type in [TargetType.REGRESSION, TargetType.ANOMALY]:
            predictions = pd.DataFrame(
                [float(prediction) for prediction in model.predict(data)],
                columns=[REGRESSION_PRED_COLUMN],
            )
        else:
            raise DrumCommonException(
                "Target type '{}' is not supported by '{}' predictor".format(
                    self.target_type.value, self.__class__.__name__))

        return predictions
Ejemplo n.º 8
0
    def load_custom_hooks(self):
        custom_file_paths = list(
            Path(self._model_dir).rglob("{}.py".format(CUSTOM_FILE_NAME)))
        assert len(custom_file_paths) <= 1

        if len(custom_file_paths) == 0:
            print("No {}.py file detected in {}".format(
                CUSTOM_FILE_NAME, self._model_dir))
            return

        custom_file_path = custom_file_paths[0]
        print("Detected {} .. trying to load hooks".format(custom_file_path))
        sys.path.insert(0, os.path.dirname(custom_file_path))

        try:
            custom_module = __import__(CUSTOM_FILE_NAME)
            if self._target_type == TargetType.UNSTRUCTURED:
                for hook in CustomHooks.ALL_PREDICT_UNSTRUCTURED:
                    self._custom_hooks[hook] = getattr(custom_module, hook,
                                                       None)

                if self._custom_hooks[CustomHooks.SCORE_UNSTRUCTURED] is None:
                    raise DrumCommonException(
                        "In '{}' mode hook '{}' must be provided.".format(
                            TargetType.UNSTRUCTURED.value,
                            self._custom_hooks[CustomHooks.SCORE_UNSTRUCTURED],
                        ))
            else:
                for hook in CustomHooks.ALL_PREDICT_FIT_STRUCTURED:
                    self._custom_hooks[hook] = getattr(custom_module, hook,
                                                       None)

            if self._custom_hooks.get(CustomHooks.INIT):
                # noinspection PyCallingNonCallable
                self._custom_hooks[CustomHooks.INIT](code_dir=self._model_dir)

            self._logger.debug("Hooks loaded: {}".format(self._custom_hooks))
        except ImportError as e:
            self._logger.error("Could not load hooks: {}".format(e))
            raise DrumCommonException(
                "\n\n{}\n"
                "Failed loading hooks from [{}] : {}".format(
                    RUNNING_LANG_MSG, custom_file_path, e))
Ejemplo n.º 9
0
 def _read_structured_input(filename):
     try:
         if filename.endswith(".mtx"):
             return pd.DataFrame.sparse.from_spmatrix(mmread(filename))
         if filename.endswith(".arrow"):
             with open(filename, "rb") as file:
                 return pyarrow.ipc.deserialize_pandas(file.read())
         return pd.read_csv(filename)
     except pd.errors.ParserError as e:
         raise DrumCommonException("Pandas failed to read input csv file: {}".format(filename))
 def _load_model_via_hook(self):
     self._logger.debug("Load model hook will be used to load the model")
     # noinspection PyCallingNonCallable
     model = self._custom_hooks[CustomHooks.LOAD_MODEL](self._model_dir)
     if model:
         self._logger.debug("Model was successfully loaded by load hook")
         return model
     else:
         raise DrumCommonException(
             "Model loading hook failed to load model")
Ejemplo n.º 11
0
def drum_push(options):
    model_config = _read_metadata(options.code_dir)

    if model_config["type"] == "training":
        _push_training(model_config, options.code_dir)

    elif model_config["type"] == "inference":
        _push_inference(model_config, options.code_dir)
    else:
        raise DrumCommonException("Unsupported type")
Ejemplo n.º 12
0
 def raise_multiple_custom_files(py_paths, r_paths):
     files_found = py_paths + r_paths
     error_mes = (
         "Multiple custom.py/R files were identified in the code directories sub directories.\n"
         "If using the output directory option select a directory that does not contain additional "
         "output directories or code directories.\n\n"
         "The following custom model files were found:\n")
     error_mes += "\n".join([str(path) for path in files_found])
     self.logger.error(error_mes)
     raise DrumCommonException(error_mes)
Ejemplo n.º 13
0
def validate_config_fields(model_config, *fields):
    missing_sections = []
    for f in fields:
        if f not in model_config:
            missing_sections.append(f)

    if missing_sections:
        raise DrumCommonException(
            "The following keys are missing in {} file.\n"
            "Missing keys: {}".format(MODEL_CONFIG_FILENAME, missing_sections))
    def _materialize(self, parent_data_objs, user_data):
        model_api = base_api_blueprint()

        @model_api.route("/health/", methods=["GET"])
        def health():
            return {"message": "OK"}, HTTP_200_OK

        @model_api.route("/predict/", methods=["POST"])
        def predict():
            logger.debug("Entering predict() endpoint")

            self._stats_collector.enable()
            self._stats_collector.mark("start")

            response, response_status = self.do_predict(logger=logger)

            self._stats_collector.mark("finish")
            self._stats_collector.disable()
            return response, response_status

        @model_api.route("/stats/", methods=["GET"])
        def stats():
            mem_info = self._memory_monitor.collect_memory_info()
            ret_dict = {"mem_info": mem_info._asdict()}

            self._stats_collector.round()
            ret_dict["time_info"] = {}
            for name in self._stats_collector.get_report_names():
                d = self._stats_collector.dict_report(name)
                ret_dict["time_info"][name] = d
            self._stats_collector.stats_reset()
            return ret_dict, HTTP_200_OK

        @model_api.errorhandler(Exception)
        def handle_exception(e):
            logger.exception(e)
            return {
                "message": "ERROR: {}".format(e)
            }, HTTP_500_INTERNAL_SERVER_ERROR

        app = get_flask_app(model_api)
        logging.getLogger("werkzeug").setLevel(logger.getEffectiveLevel())

        host = self._params.get("host", None)
        port = self._params.get("port", None)
        try:
            app.run(host, port, threaded=False)
        except OSError as e:
            raise DrumCommonException("{}: host: {}; port: {}".format(
                e, host, port))

        if self._stats_collector:
            self._stats_collector.print_reports()

        return []
    def _detect_model_artifact_file(self):
        # No model was loaded - so there is no local hook - so we are using our artifact predictors
        all_supported_extensions = set(p.artifact_extension
                                       for p in self._artifact_predictors)
        self._logger.debug(
            "Supported suffixes: {}".format(all_supported_extensions))
        model_artifact_file = None

        for filename in os.listdir(self._model_dir):
            path = os.path.join(self._model_dir, filename)
            if os.path.isdir(path):
                continue

            if any(
                    filename.endswith(extension)
                    for extension in all_supported_extensions):
                if model_artifact_file:
                    raise DrumCommonException(
                        "\n\n{}\n"
                        "Multiple serialized model files found. Remove extra artifacts "
                        "or overwrite custom.load_model()".format(
                            RUNNING_LANG_MSG))
                model_artifact_file = path

        if not model_artifact_file:
            files_list = os.listdir(self._model_dir)
            files_list_str = " | ".join(files_list)
            raise DrumCommonException(
                "\n\n{}\n"
                "Could not find model artifact file in: {} supported by default predictors.\n"
                "They support filenames with the following extensions {}.\n"
                "If your artifact is not supported by default predictor, implement custom.load_model hook.\n"
                "List of files got here are: {}".format(
                    RUNNING_LANG_MSG,
                    self._model_dir,
                    list(all_supported_extensions),
                    files_list_str,
                ))

        self._logger.debug(
            "model_artifact_file: {}".format(model_artifact_file))
        return model_artifact_file
Ejemplo n.º 16
0
    def _load_via_predictors(self, model_artifact_file):

        model = None
        pred_that_support_artifact = []
        for pred in self._artifact_predictors:
            if pred.is_artifact_supported(model_artifact_file):
                pred_that_support_artifact.append(pred)

            if pred.can_load_artifact(model_artifact_file):
                try:
                    model = pred.load_model_from_artifact(model_artifact_file)
                except Exception as exc:
                    raise type(exc)(
                        "Could not load model from artifact file: {}".format(exc)
                    ).with_traceback(sys.exc_info()[2]) from None
                break

        if not model:
            if len(pred_that_support_artifact) > 0:
                framework_err = """
                    The following frameworks support this model artifact
                    but could not load the model. Check if requirements are missing
                """

                for pred in pred_that_support_artifact:
                    framework_err += "Framework: {}, requirements: {}".format(
                        pred.name, pred.framework_requirements()
                    )

                raise DrumCommonException(textwrap.dedent(framework_err))
            else:
                raise DrumCommonException(
                    "\n\n{}\n"
                    "Could not load model from artifact file {}."
                    " No builtin support for this model was detected".format(
                        RUNNING_LANG_MSG, model_artifact_file
                    )
                )

        self._model = model
        return model
Ejemplo n.º 17
0
 def predict_unstructured(self, data, **kwargs):
     str_or_tuple = self._model_adapter.predict_unstructured(
         model=self._model, data=data, **kwargs)
     if isinstance(str_or_tuple, (str, bytes, type(None))):
         ret = str_or_tuple, None
     elif isinstance(str_or_tuple, tuple):
         ret = str_or_tuple
     else:
         raise DrumCommonException(
             "Wrong type returned in unstructured mode: {}".format(
                 type(str_or_tuple)))
     return ret
    def predict(self, data, model, **kwargs):
        # checking if positive/negative class labels were provided
        # done in the base class
        super(XGBoostPredictor, self).predict(data, model, **kwargs)

        import xgboost

        xgboost_native = False
        if isinstance(model, xgboost.core.Booster):
            xgboost_native = True
            data = xgboost.DMatrix(data)

        if self.target_type.value in TargetType.CLASSIFICATION.value:
            if xgboost_native:
                predictions = model.predict(data)
                if self.target_type == TargetType.BINARY:
                    negative_preds = 1 - predictions
                    predictions = np.concatenate((negative_preds.reshape(
                        -1, 1), predictions.reshape(-1, 1)),
                                                 axis=1)
                else:
                    if predictions.shape[1] != len(self.class_labels):
                        raise DrumCommonException(
                            "Target type '{}' predictions must return the "
                            "probability distribution for all class labels".
                            format(self.target_type))

            else:
                predictions = model.predict_proba(data)
            predictions = pd.DataFrame(predictions, columns=self.class_labels)
        elif self.target_type in [TargetType.REGRESSION, TargetType.ANOMALY]:
            preds = model.predict(data)
            predictions = pd.DataFrame(data=preds,
                                       columns=[REGRESSION_PRED_COLUMN])
        else:
            raise DrumCommonException(
                "Target type '{}' is not supported by '{}' predictor".format(
                    self.target_type.value, self.__class__.__name__))

        return predictions
def _resolve_incoming_unstructured_data(in_data, mimetype, charset):
    ret_mimetype = mimetype if mimetype is not None and mimetype != "" else MIMETYPE_TEXT_DEFAULT
    ret_charset = charset if charset is not None else CHARSET_DEFAULT

    if not isinstance(in_data, bytes):
        raise DrumCommonException("bytes data is expected, received {}".format(type(in_data)))

    if _is_text_mimetype(ret_mimetype):
        ret_data = in_data.decode(ret_charset)
    else:
        ret_data = in_data

    return ret_data, ret_mimetype, ret_charset
Ejemplo n.º 20
0
    def predict(self, data, model, **kwargs):
        import torch
        from torch.autograd import Variable

        # checking if positive/negative class labels were provided
        # done in the base class
        super(PyTorchPredictor, self).predict(data, model, **kwargs)
        data = Variable(
            torch.from_numpy(
                data.values if type(data) != np.ndarray else data).type(
                    torch.FloatTensor))
        with torch.no_grad():
            predictions = model(data).cpu().data.numpy()
        if self.target_type.value in TargetType.CLASSIFICATION.value:
            if predictions.shape[1] == 1:
                if self.target_type == TargetType.MULTICLASS:
                    if len(self.class_labels) > 2:
                        raise DrumCommonException(
                            "Target type '{}' predictions must return the "
                            "probability distribution for all class labels".
                            format(self.target_type))
                    pos_label = self.class_labels[1]
                    neg_label = self.class_labels[0]
                else:
                    pos_label = self.positive_class_label
                    neg_label = self.negative_class_label
                predictions = pd.DataFrame(predictions, columns=[pos_label])
                predictions[neg_label] = 1 - predictions[pos_label]
            else:
                predictions = pd.DataFrame(predictions,
                                           columns=self.class_labels)
        elif self.target_type in [TargetType.REGRESSION, TargetType.ANOMALY]:
            predictions = pd.DataFrame(predictions,
                                       columns=[REGRESSION_PRED_COLUMN])
        else:
            raise DrumCommonException(
                "Target type '{}' is not supported by '{}' predictor".format(
                    self.target_type.value, self.__class__.__name__))
        return predictions
Ejemplo n.º 21
0
def _push_inference(model_config, code_dir, token=None, endpoint=None):
    dr_client.Client(token=token, endpoint=endpoint)
    if ModelMetadataKeys.MODEL_ID in model_config:
        model_id = model_config[ModelMetadataKeys.MODEL_ID]
    else:
        create_params = dict(
            name=model_config[ModelMetadataKeys.NAME],
            target_type=_convert_target_type(
                model_config[ModelMetadataKeys.TARGET_TYPE]),
            target_name=model_config[ModelMetadataKeys.INFERENCE_MODEL]
            ["targetName"],
            description=model_config.get(ModelMetadataKeys.DESCRIPTION,
                                         "Pushed from DRUM"),
        )
        if model_config[
                ModelMetadataKeys.TARGET_TYPE] == TargetType.BINARY.value:
            create_params.update(
                dict(
                    positive_class_label=model_config[
                        ModelMetadataKeys.INFERENCE_MODEL].get(
                            "positiveClassLabel"),
                    negative_class_label=model_config[
                        ModelMetadataKeys.INFERENCE_MODEL].get(
                            "negativeClassLabel"),
                    prediction_threshold=model_config[
                        ModelMetadataKeys.INFERENCE_MODEL].get(
                            "predictionThreshold"),
                ))
        elif model_config[
                ModelMetadataKeys.TARGET_TYPE] == TargetType.MULTICLASS.value:
            class_labels = model_config[ModelMetadataKeys.INFERENCE_MODEL].get(
                "classLabels")
            class_labels_file = model_config[
                ModelMetadataKeys.INFERENCE_MODEL].get("classLabelsFile")
            if not ((class_labels is None) ^ (class_labels_file is None)):
                raise DrumCommonException(
                    "Multiclass inference models must specify either classLabels or classLabelsFile"
                )
            if class_labels_file:
                with open(class_labels_file) as f:
                    class_labels = f.read().split(os.linesep)
            create_params.update(dict(class_labels=class_labels))
        model_id = dr_client.CustomInferenceModel.create(**create_params).id
    dr_client.CustomModelVersion.create_clean(
        custom_model_id=model_id,
        base_environment_id=model_config[ModelMetadataKeys.ENVIRONMENT_ID],
        folder_path=code_dir,
        is_major_update=model_config.get(ModelMetadataKeys.MAJOR_VERSION,
                                         True),
    )
    _print_model_started_dialogue(model_id)
Ejemplo n.º 22
0
    def _resolve_target_type(self):
        if self.run_mode == RunMode.NEW:
            return

        target_type_options = getattr(self.options, "target_type", None)
        target_type_options = (None if target_type_options is None else
                               TargetType(target_type_options))
        target_type_model_config = None

        if self.options.model_config is not None:
            target_type_model_config = TargetType(
                self.options.model_config["targetType"])

        if target_type_options is None and target_type_model_config is None:
            raise DrumCommonException(
                "Target type is missing. It must be provided in --target-type argument, {} env var or model config file."
                .format(ArgumentOptionsEnvVars.TARGET_TYPE))
        elif (all([target_type_options, target_type_model_config])
              and target_type_options != target_type_model_config):
            raise DrumCommonException(
                "Target type provided in --target-type argument doesn't match target type from model config file. "
                "Use either one of them or make them match.")
        else:
            self.target_type = (target_type_options if target_type_options
                                is not None else target_type_model_config)

        if self.target_type != TargetType.UNSTRUCTURED:
            if getattr(self.options, "query", None):
                raise DrumCommonException(
                    "--query argument can be used only with --target-type unstructured"
                )
            if getattr(self.options, "content_type", None):
                raise DrumCommonException(
                    "--content-type argument can be used only with --target-type unstructured"
                )
        else:
            if self.options.content_type is None:
                self.options.content_type = "text/plain; charset=utf8"
Ejemplo n.º 23
0
    def _resolve_target_type(self):
        target_type_options = None
        target_type_model_config = None

        if hasattr(self.options,
                   "target_type") and self.options.target_type is not None:
            target_type_options = TargetType(self.options.target_type)

        if self.options.model_config is not None:
            target_type_model_config = TargetType(
                self.options.model_config["targetType"])

        if self.run_mode not in [RunMode.NEW]:
            if target_type_options is None and target_type_model_config is None:
                raise DrumCommonException(
                    "Target type is missing. It must be provided in either --target-type argument or model config file."
                )
            elif (all([target_type_options, target_type_model_config])
                  and target_type_options != target_type_model_config):
                raise DrumCommonException(
                    "Target type provided in --target-type argument doesn't match target type from model config file."
                    "Use either one of them or make them match.")
            else:
                self.target_type = (target_type_options if target_type_options
                                    is not None else target_type_model_config)

        if self.target_type != TargetType.UNSTRUCTURED:
            if getattr(self.options, "query", None):
                raise DrumCommonException(
                    "--query argument can be used only with --target-type unstructured"
                )
            if getattr(self.options, "content_type", None):
                raise DrumCommonException(
                    "--content-type argument can be used only with --target-type unstructured"
                )
        else:
            if self.options.content_type is None:
                self.options.content_type = "text/plain; charset=utf8"
Ejemplo n.º 24
0
def drum_push(options):
    model_config = get_metadata(options)

    if model_config["type"] == "training":
        validate_config_fields(model_config, ModelMetadataKeys.ENVIRONMENT_ID)
        _push_training(model_config, options.code_dir)
    elif model_config["type"] == "inference":
        validate_config_fields(model_config, ModelMetadataKeys.ENVIRONMENT_ID,
                               ModelMetadataKeys.INFERENCE_MODEL)
        validate_config_fields(model_config[ModelMetadataKeys.INFERENCE_MODEL],
                               "targetName")
        _push_inference(model_config, options.code_dir)
    else:
        raise DrumCommonException("Unsupported type")
Ejemplo n.º 25
0
def possibly_intuit_order(input_data_file, target_data_file=None, target_col_name=None):
    if target_data_file:
        assert target_col_name is None

        y = pd.read_csv(target_data_file, index_col=False).sample(
            1000, random_state=1, replace=True
        )
        classes = np.unique(y.iloc[:, 0])
    else:
        assert target_data_file is None
        df = pd.read_csv(input_data_file)
        if not target_col_name in df.columns:
            e = "The column '{}' does not exist in your dataframe. \nThe columns in your dataframe are these: {}".format(
                target_col_name, list(df.columns)
            )
            print(e, file=sys.stderr)
            raise DrumCommonException(e)
        classes = np.unique(df[target_col_name].sample(1000, random_state=1, replace=True))
    if len(classes) == 2:
        return classes
    elif len(classes) == 1:
        raise DrumCommonException("Only one target label was provided, please revise training data")
    return None, None
Ejemplo n.º 26
0
def _read_metadata(code_dir):
    code_dir = Path(code_dir)
    if not code_dir.joinpath(CONFIG_FILENAME).exists():
        raise DrumCommonException(
            "You must have a file with the name {} in the directory {}. \n"
            "You don't. \nWhat you do have is these files: \n{} ".format(
                CONFIG_FILENAME, code_dir, os.listdir(code_dir)))
    with open(code_dir.joinpath(CONFIG_FILENAME)) as f:
        try:
            model_config = load(f.read(), schema).data
        except YAMLError as e:
            print(e)
            raise SystemExit()
    return model_config
Ejemplo n.º 27
0
    def _wait_for_server_to_start(self):
        while True:
            try:
                response = requests.get(self._url_server_address)
                if response.ok:
                    break
            except Exception:
                pass

            time.sleep(1)
            self._timeout = self._timeout - 1
            if self._timeout == 0:
                error_message = "Error: server failed to start while running performance testing"
                print(error_message)
                raise DrumCommonException(error_message)
Ejemplo n.º 28
0
 def _cast_r_to_py(r_val):
     # TODO: consider checking type against rpy2 proxy object like: isinstance(list_data_kwargs, ro.vectors.ListVector)
     # instead of calling R interpreter
     if _r_is_null(r_val):
         return None
     elif _r_is_raw(r_val):
         return bytes(r_val)
     elif _r_is_character(r_val):
         # Any scalar value is returned from R as one element vector,
         # so get this value.
         return str(r_val[0])
     else:
         raise DrumCommonException(
             "Can not convert R value {} type {}".format(
                 r_val, type(r_val)))
Ejemplo n.º 29
0
    def _find_predictor_to_use(self):
        self._predictor_to_use = None
        for pred in self._artifact_predictors:
            if pred.can_use_model(self._model):
                self._predictor_to_use = pred
                break

        if not self._predictor_to_use and not self._custom_hooks[CustomHooks.SCORE]:
            raise DrumCommonException(
                "\n\n{}\n"
                "Could not find any framework to handle loaded model and a {} "
                "hook is not provided".format(RUNNING_LANG_MSG, CustomHooks.SCORE)
            )

        self._logger.debug("Predictor to use: {}".format(self._predictor_to_use.name))
Ejemplo n.º 30
0
        def raise_no_language(custom_language):
            custom_language = "None" if custom_language is None else custom_language.value
            error_mes = (
                "Can not detect language by custom.py/R files.\n"
                "Detected: language by custom - {}.\n"
                "Code directory must have either a custom.py/R file\n"
                "Or a python file using the drum_autofit() wrapper.".format(
                    custom_language, ))
            all_files_message = "\n\nFiles(100 first) found in {}:\n{}\n".format(
                code_dir_abspath,
                "\n".join(sorted(os.listdir(code_dir_abspath))[0:100]))

            error_mes += all_files_message
            self.logger.error(error_mes)
            raise DrumCommonException(error_mes)