def predict(self, model_conf, data_sources, data_sinks, model, args_dict): if "test_key" in args_dict: # URI param example (an uri param is part in the args_dict just like any other input) key = args_dict["test_key"] logger.info( "Got the uri parameter (ID) %s. Looking up input data and predicting...", key, ) df = data_sources["batch_input"].get_dataframe() df["myid"] = df["myid"].apply(str) X = order_columns(df.loc[df["myid"] == key]) my_tree = model y = my_tree.predict(X.drop("myid", axis=1))[0] return {"iris_variety": y} elif "sepal.length" not in args_dict or args_dict[ "sepal.length"] is None: # Batch prediction example logger.info("Doing batch prediction") X = order_columns(data_sources["batch_input"].get_dataframe()) my_tree = model y = my_tree.predict(X.drop("myid", axis=1)) X["pred"] = y data_sinks["predictions"].put_dataframe(X) return {"status": "ok"} # "Normal" prediction example logger.info('Doing "normal" prediction') X = order_columns(pd.DataFrame(args_dict, index=[0])) my_tree = model y = my_tree.predict(X)[0] return {"iris_variety": y}
def create_trained_model( self, model_conf, data_sources, data_sinks, old_model=None ): df = data_sources["petals"].get_dataframe() X = df.drop("variety", axis=1) y = df["variety"] X_ordered = order_columns(X) my_tree = tree.DecisionTreeClassifier() my_tree.fit(X_ordered, y) return my_tree
def test_trained_model(self, model_conf, data_sources, data_sinks, model): df = data_sources["petals_test"].get_dataframe() X_test = order_columns(df.drop("variety", axis=1)) y_test = df["variety"] my_tree = model y_predict = my_tree.predict(X_test) acc = accuracy_score(y_test, y_predict) conf = confusion_matrix(y_test, y_predict).tolist() metrics = {"accuracy": acc, "confusion_matrix": conf} return metrics
def create_trained_model(self, model_conf, data_sources, data_sinks, old_model=None): df = data_sources["petals"].get_dataframe() X = df.drop("variety", axis=1) y = df["variety"] X_ordered = order_columns(X) my_tree = tree.DecisionTreeClassifier() my_tree.fit(X_ordered, y) # We can add info (numbers, strings, dicts, lists) to be saved in the training report (JSON metadata): report("meaning_of_life", 42) report( "train_data_used", df ) # If we pass dataframes, their summary will be added to the training report return my_tree