class PythonPredictor(BaseLanguagePredictor): def __init__(self): super(PythonPredictor, self).__init__() self._model_adapter = None self._mlops = None def configure(self, params): super(PythonPredictor, self).configure(params) self._model_adapter = PythonModelAdapter(model_dir=self._custom_model_path) sys.path.append(self._custom_model_path) self._model_adapter.load_custom_hooks() self._model = self._model_adapter.load_model_from_artifact() if self._model is None: raise Exception("Failed to load model") def predict(self, input_filename): kwargs = {} if self._positive_class_label and self._negative_class_label: kwargs[POSITIVE_CLASS_LABEL_ARG_KEYWORD] = self._positive_class_label kwargs[NEGATIVE_CLASS_LABEL_ARG_KEYWORD] = self._negative_class_label start_predict = time.time() predictions = self._model_adapter.predict( input_filename, model=self._model, unstructured_mode=self._unstructured_mode, **kwargs ) end_predict = time.time() execution_time_ms = (end_predict - start_predict) * 1000 # TODO: call monitor only if we are in structured mode self.monitor(input_filename, predictions, execution_time_ms) return predictions
class PythonPredictor(BaseLanguagePredictor): def __init__(self): super(PythonPredictor, self).__init__() self._model_adapter = None def configure(self, params): super(PythonPredictor, self).configure(params) self._model_adapter = PythonModelAdapter( model_dir=self._custom_model_path) sys.path.append(self._custom_model_path) self._model_adapter.load_custom_hooks() self._model = self._model_adapter.load_model_from_artifact() if self._model is None: raise Exception("Failed to load model") def predict(self, df): kwargs = {} if self._positive_class_label and self._negative_class_label: kwargs[ POSITIVE_CLASS_LABEL_ARG_KEYWORD] = self._positive_class_label kwargs[ NEGATIVE_CLASS_LABEL_ARG_KEYWORD] = self._negative_class_label predictions = self._model_adapter.predict(data=df, model=self._model, **kwargs) return predictions
def test_add_to_one_happy(self): positive_label = "poslabel" negative_label = "neglabel" adapter = PythonModelAdapter(model_dir=None, target_type=TargetType.BINARY) df = pd.DataFrame({positive_label: [0.1, 0.2, 0.3], negative_label: [0.9, 0.8, 0.7]}) adapter._validate_predictions( to_validate=df, class_labels=[positive_label, negative_label], )
def test_add_to_one_sad(self): positive_label = "poslabel" negative_label = "neglabel" adapter = PythonModelAdapter(model_dir=None, target_type=TargetType.BINARY) df = pd.DataFrame({positive_label: [1, 1, 1], negative_label: [-1, 0, 0]}) with pytest.raises(ValueError): adapter._validate_predictions( to_validate=df, class_labels=[positive_label, negative_label], )
def configure(self, params): super(PythonPredictor, self).configure(params) self._model_adapter = PythonModelAdapter(model_dir=self._custom_model_path) sys.path.append(self._custom_model_path) self._model_adapter.load_custom_hooks() self._model = self._model_adapter.load_model_from_artifact() if self._model is None: raise Exception("Failed to load model")
class PythonFit(ConnectableComponent): def __init__(self, engine): super(PythonFit, self).__init__(engine) self.target_name = None self.output_dir = None self.estimator = None self.positive_class_label = None self.negative_class_label = None self.class_labels = None self.custom_model_path = None self.input_filename = None self.weights = None self.sparse_column_file = None self.weights_filename = None self.target_filename = None self._model_adapter = None self.num_rows = None self.parameter_file = None def configure(self, params): super(PythonFit, self).configure(params) self.custom_model_path = self._params["__custom_model_path__"] self.input_filename = self._params["inputFilename"] self.sparse_column_file = self._params["sparse_column_file"] self.target_name = self._params.get("targetColumn") self.output_dir = self._params["outputDir"] self.positive_class_label = self._params.get("positiveClassLabel") self.negative_class_label = self._params.get("negativeClassLabel") self.class_labels = self._params.get("classLabels") self.weights = self._params["weights"] self.weights_filename = self._params["weightsFilename"] self.target_filename = self._params.get("targetFilename") self.num_rows = self._params["numRows"] self.parameter_file = self._params.get("parameterFile") self._model_adapter = PythonModelAdapter(self.custom_model_path) sys.path.append(self.custom_model_path) self._model_adapter.load_custom_hooks() def _materialize(self, parent_data_objs, user_data): X, y, class_order, row_weights, parameters = shared_fit_preprocessing( self) self._model_adapter.fit( X, y, output_dir=self.output_dir, class_order=class_order, row_weights=row_weights, parameters=parameters, ) make_sure_artifact_is_small(self.output_dir) return []
def configure(self, params): super(PythonPredictor, self).configure(params) self.custom_model_path = self._params["__custom_model_path__"] self.positive_class_label = self._params.get("positiveClassLabel") self.negative_class_label = self._params.get("negativeClassLabel") self._model_adapter = PythonModelAdapter( model_dir=self.custom_model_path) sys.path.append(self.custom_model_path) self._model_adapter.load_custom_hooks() self.model = self._model_adapter.load_model_from_artifact() if self.model is None: raise Exception("Failed to load model")
def test_add_to_one_happy(self): positive_label = "poslabel" negative_label = "neglabel" adapter = PythonModelAdapter(model_dir=None) df = pd.DataFrame({ positive_label: [0.1, 0.2, 0.3], negative_label: [0.9, 0.8, 0.7] }) adapter._validate_predictions( to_validate=df, positive_class_label=positive_label, negative_class_label=negative_label, )
class PythonPredictor(BaseLanguagePredictor): def __init__(self): super(PythonPredictor, self).__init__() self._model_adapter = None self._mlops = None def configure(self, params): super(PythonPredictor, self).configure(params) self._model_adapter = PythonModelAdapter( model_dir=self._custom_model_path, target_type=self._target_type) sys.path.append(self._custom_model_path) self._model_adapter.load_custom_hooks() self._model = self._model_adapter.load_model_from_artifact() if self._model is None: raise Exception("Failed to load model") def predict(self, input_filename): kwargs = {} kwargs[TARGET_TYPE_ARG_KEYWORD] = self._target_type if self._positive_class_label and self._negative_class_label: kwargs[ POSITIVE_CLASS_LABEL_ARG_KEYWORD] = self._positive_class_label kwargs[ NEGATIVE_CLASS_LABEL_ARG_KEYWORD] = self._negative_class_label start_predict = time.time() predictions = self._model_adapter.predict(input_filename, model=self._model, **kwargs) end_predict = time.time() execution_time_ms = (end_predict - start_predict) * 1000 # TODO: call monitor only if we are in structured mode self.monitor(input_filename, predictions, execution_time_ms) return predictions def predict_unstructured(self, data, **kwargs): str_or_tuple = self._model_adapter.predict_unstructured( model=self._model, data=data, **kwargs) if isinstance(str_or_tuple, (str, bytes, type(None))): ret = str_or_tuple, None elif isinstance(str_or_tuple, tuple): ret = str_or_tuple else: raise DrumCommonException( "Wrong type returned in unstructured mode: {}".format( type(str_or_tuple))) return ret
def test_add_to_one_sad(self): positive_label = "poslabel" negative_label = "neglabel" adapter = PythonModelAdapter(model_dir=None) df = pd.DataFrame({ positive_label: [1, 1, 1], negative_label: [-1, 0, 0] }) with pytest.raises(ValueError): adapter._validate_predictions( to_validate=df, positive_class_label=positive_label, negative_class_label=negative_label, )
class PythonPredictor(BaseLanguagePredictor): def __init__(self): super(PythonPredictor, self).__init__() self._model_adapter = None self._mlops = None def configure(self, params): super(PythonPredictor, self).configure(params) self._model_adapter = PythonModelAdapter(model_dir=self._code_dir, target_type=self._target_type) sys.path.append(self._code_dir) self._model_adapter.load_custom_hooks() self._model = self._model_adapter.load_model_from_artifact() if self._model is None: raise Exception("Failed to load model") @property def supported_payload_formats(self): return self._model_adapter.supported_payload_formats def model_info(self): model_info = super(PythonPredictor, self).model_info() model_info.update(self._model_adapter.model_info()) return model_info def has_read_input_data_hook(self): return self._model_adapter.has_read_input_data_hook() def _predict(self, **kwargs): kwargs[TARGET_TYPE_ARG_KEYWORD] = self._target_type if self._positive_class_label is not None and self._negative_class_label is not None: kwargs[ POSITIVE_CLASS_LABEL_ARG_KEYWORD] = self._positive_class_label kwargs[ NEGATIVE_CLASS_LABEL_ARG_KEYWORD] = self._negative_class_label if self._class_labels: kwargs[CLASS_LABELS_ARG_KEYWORD] = self._class_labels predictions = self._model_adapter.predict(model=self._model, **kwargs) return predictions def transform(self, **kwargs): return self._model_adapter.transform(model=self._model, **kwargs) def predict_unstructured(self, data, **kwargs): str_or_tuple = self._model_adapter.predict_unstructured( model=self._model, data=data, **kwargs) if isinstance(str_or_tuple, (str, bytes, type(None))): ret = str_or_tuple, None elif isinstance(str_or_tuple, tuple): ret = str_or_tuple else: raise DrumCommonException( "Wrong type returned in unstructured mode: {}".format( type(str_or_tuple))) return ret
def configure(self, params): super(PythonFit, self).configure(params) self.custom_model_path = self._params["__custom_model_path__"] self.input_filename = self._params["inputFilename"] self.target_name = self._params.get("targetColumn") self.output_dir = self._params["outputDir"] self.positive_class_label = self._params.get("positiveClassLabel") self.negative_class_label = self._params.get("negativeClassLabel") self.weights = self._params["weights"] self.weights_filename = self._params["weightsFilename"] self.target_filename = self._params.get("targetFilename") self.num_rows = self._params["numRows"] self._model_adapter = PythonModelAdapter(self.custom_model_path) sys.path.append(self.custom_model_path) self._model_adapter.load_custom_hooks()
class PythonPredictor(ConnectableComponent): def __init__(self, engine): super(PythonPredictor, self).__init__(engine) self.model = None self.positive_class_label = None self.negative_class_label = None self.custom_model_path = None self._model_adapter = None def configure(self, params): super(PythonPredictor, self).configure(params) self.custom_model_path = self._params["__custom_model_path__"] self.positive_class_label = self._params.get("positiveClassLabel") self.negative_class_label = self._params.get("negativeClassLabel") self._model_adapter = PythonModelAdapter( model_dir=self.custom_model_path) sys.path.append(self.custom_model_path) self._model_adapter.load_custom_hooks() self.model = self._model_adapter.load_model_from_artifact() if self.model is None: raise Exception("Failed to load model") def _materialize(self, parent_data_objs, user_data): df = parent_data_objs[0] kwargs = {} if self.positive_class_label and self.negative_class_label: kwargs[ POSITIVE_CLASS_LABEL_ARG_KEYWORD] = self.positive_class_label kwargs[ NEGATIVE_CLASS_LABEL_ARG_KEYWORD] = self.negative_class_label predictions = self._model_adapter.predict(data=df, model=self.model, **kwargs) return [predictions]
def test_regression_predictions_header(self): adapter = PythonModelAdapter(model_dir=None, target_type=TargetType.REGRESSION) df = pd.DataFrame({"Predictions": [0.1, 0.2, 0.3]}) adapter._validate_predictions( to_validate=df, class_labels=None, ) with pytest.raises(ValueError): df = pd.DataFrame({"other_name": [0.1, 0.2, 0.3]}) adapter._validate_predictions( to_validate=df, class_labels=None, )
def test_class_labels(self): positive_label = "poslabel" negative_label = "neglabel" adapter = PythonModelAdapter(model_dir=None, target_type=TargetType.BINARY) df = pd.DataFrame({positive_label: [0.1, 0.2, 0.3], negative_label: [0.9, 0.8, 0.7]}) adapter._validate_predictions( to_validate=df, class_labels=[positive_label, negative_label], ) with pytest.raises(ValueError): df = pd.DataFrame({positive_label: [0.1, 0.2, 0.3], negative_label: [0.9, 0.8, 0.7]}) adapter._validate_predictions( to_validate=df, class_labels=["yes", "no"], )
class PythonFit(ConnectableComponent): def __init__(self, engine): super(PythonFit, self).__init__(engine) self.target_name = None self.output_dir = None self.estimator = None self.positive_class_label = None self.negative_class_label = None self.custom_model_path = None self.input_filename = None self.weights = None self.weights_filename = None self.target_filename = None self._model_adapter = None self.num_rows = None def configure(self, params): super(PythonFit, self).configure(params) self.custom_model_path = self._params["__custom_model_path__"] self.input_filename = self._params["inputFilename"] self.target_name = self._params.get("targetColumn") self.output_dir = self._params["outputDir"] self.positive_class_label = self._params.get("positiveClassLabel") self.negative_class_label = self._params.get("negativeClassLabel") self.weights = self._params["weights"] self.weights_filename = self._params["weightsFilename"] self.target_filename = self._params.get("targetFilename") self.num_rows = self._params["numRows"] self._model_adapter = PythonModelAdapter(self.custom_model_path) sys.path.append(self.custom_model_path) self._model_adapter.load_custom_hooks() def _materialize(self, parent_data_objs, user_data): df = pd.read_csv(self.input_filename) if self.num_rows == "ALL": self.num_rows = len(df) else: self.num_rows = int(self.num_rows) if self.target_filename: X = df.sample(self.num_rows, random_state=1) y = pd.read_csv(self.target_filename, index_col=False).sample(self.num_rows, random_state=1) assert len(y.columns) == 1 assert len(X) == len(y) y = y.iloc[:, 0] else: X = df.drop(self.target_name, axis=1).sample(self.num_rows, random_state=1, replace=True) y = df[self.target_name].sample(self.num_rows, random_state=1, replace=True) if self.weights_filename: row_weights = pd.read_csv(self.weights_filename).sample( self.num_rows, random_state=1, replace=True) elif self.weights: if self.weights not in X.columns: raise ValueError( "The column name {} is not one of the columns in " "your training data".format(self.weights)) row_weights = X[self.weights] else: row_weights = None class_order = ([self.negative_class_label, self.positive_class_label] if self.negative_class_label else None) self._model_adapter.fit(X, y, output_dir=self.output_dir, class_order=class_order, row_weights=row_weights) make_sure_artifact_is_small(self.output_dir) return []