def test_log_servingperf(config): to_date = datetime.datetime.now() from_date = to_date - datetime.timedelta(days=30) ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,\ "from_date": str(from_date), "to_date": str(to_date)} dep_tag = "Wine_Elastic_Net_Regression" user_id = "prvileged user" ap = ArangoPipe(config=config) ret = ap.log_serving_perf(ex_servingperf, dep_tag, user_id) return ret
class TestArangopipe(unittest.TestCase): def setUp(self): self.config = ArangoPipeConfig() self.config.set_dbconnection(hostname = "localhost", port = 8529,\ root_user = "******",\ root_user_password = "******") self.admin = ArangoPipeAdmin(config = self.config) self.ap = ArangoPipe(config = self.config) self.provision_project() def provision_project(self): err_raised = False try: proj_info = {"name": "Wine-Quality-Regression-Modelling"} proj_reg = self.admin.register_project(proj_info) except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while provisioning project') self.assertFalse(err_raised) return def register_dataset(self): ds_info = {"name" : "wine_dataset",\ "description": "Wine quality ratings",\ "source": "UCI ML Repository" } ds_reg = self.ap.register_dataset(ds_info) return def lookup_dataset(self): ds_reg = self.ap.lookup_dataset("wine_dataset") return def lookup_featureset(self): fs_reg = self.ap.lookup_featureset("wine_no_transformations") return def register_model(self): model_info = {"name": "elastic_net_wine_model", \ "type": "elastic net regression"} model_reg = self.ap.register_model(model_info) return def lookup_model(self): model_reg = self.ap.lookup_model("elastic_net_wine_model") return def log_run(self): ds_reg = self.ap.lookup_dataset("wine_dataset") fs_reg = self.ap.lookup_featureset("wine_no_transformations") model_reg = self.ap.lookup_model("elastic_net_wine_model") model_params = { "l1_ratio": 0.1, "alpha": 0.2,\ "run_id": "0ef73d9edf08487793c77a1742f4033e"} model_perf = { "rmse": 0.7836984021909766, "r2": 0.20673590971167466,\ "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e",\ "timestamp": "2019-06-06 12:52:11.190048"} run_info = {"dataset" : ds_reg["_key"],\ "featureset": fs_reg["_key"],\ "run_id": "0ef73d9edf08487793c77a1742f4033e",\ "model": model_reg["_key"],\ "model-params": model_params,\ "model-perf": model_perf,\ "pipeline" : "Wine-Regression-Pipeline",\ "project": "Wine-Quality-Assessment",\ "deployment_tag": "Wine_Elastic_Net_Regression",\ "tag": "wine regression model test 1"} self.ap.log_run(run_info) return def provision_deployment(self): ret = self.admin.register_deployment("Wine_Elastic_Net_Regression") return def register_featureset(self): fs_info = {"fixed acidity": "float64",\ "volatile acidity": "float64",\ "citric acid": "float64",\ "residual sugar": "float64",\ "chlorides": "float64",\ "free sulfur dioxide": "float64",\ "total sulfur dioxide": "float64",\ "density": "float64",\ "pH": "float64",\ "sulphates": "float64",\ "alcohol": "float64",\ "quality": "int64",\ "name": "wine_no_transformations" } ds_reg = self.ap.lookup_dataset("wine_dataset") fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"]) return def log_servingperf(self): to_date = datetime.datetime.now() from_date = to_date - datetime.timedelta(days = 30) ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,\ "from_date": str(from_date), "to_date": str(to_date)} dep_tag = "Wine_Elastic_Net_Regression" user_id = "prvileged user" ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id) return def dataset_shift_positive(self): ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "covariate_shift/cal_housing.csv") df = pd.read_csv(ds_path) req_cols = df.columns.tolist() df = df[req_cols] df1 = df.query("lat <= -119") df2 = df.query("lat > -119") rfd = RF_DatasetShiftDetector() score = rfd.detect_dataset_shift(df1, df2) print ("Detaset shift score : ", score) return score def dataset_shift_negative(self): ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "covariate_shift/cal_housing.csv") df = pd.read_csv(ds_path) req_cols = df.columns.tolist() df = df[req_cols] df1 = df.query("lat <= -119") df2 = df1.copy() rfd = RF_DatasetShiftDetector() score = rfd.detect_dataset_shift(df1, df2) print ("Detaset shift score : ", score) return score def test_register_dataset(self): err_raised = False try: self.register_dataset() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while registering dataset') self.assertFalse(err_raised) return def test_lookup_dataset(self): err_raised = False try: self.register_dataset() self.lookup_dataset() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while looking up dataset') self.assertFalse(err_raised) return def test_register_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_lookup_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() self.lookup_featureset() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_register_model(self): err_raised = False try: self.register_model() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while registering model') self.assertFalse(err_raised) return def test_lookup_model(self): err_raised = False try: self.register_model() self.lookup_model() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while looking up model') self.assertFalse(err_raised) return def test_log_run(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while logging performance') self.assertFalse(err_raised) return def test_provision_deployment(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() self.provision_deployment() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while provisioning deployment') self.assertFalse(err_raised) return def test_log_serving_performance(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() self.provision_deployment() self.log_servingperf() except: err_raised = True print ('-'*60) traceback.print_exc(file=sys.stdout) print ('-'*60) self.assertTrue(err_raised,\ 'Exception raised while logging serving performance') self.assertFalse(err_raised) return def test_dataset_shift_positive(self): score = self.dataset_shift_positive() self.assertTrue(score > 0.8) return def test_dataset_shift_negative(self): score = self.dataset_shift_negative() self.assertTrue(score < 0.6) return def tearDown(self): #pass self.admin.delete_arangomldb() self.ap = None self.admin = None return
def generate_runs(clean=False): conn_config = ArangoPipeConfig() conn_config.set_dbconnection(hostname = "localhost", port = 8529,\ root_user = "******", root_user_password = "******") admin = ArangoPipeAdmin(config=conn_config) ap = ArangoPipe(config=conn_config) if clean: admin.delete_arangomldb() admin.create_db() admin.create_enterprise_ml_graph() proj_info = {"name": "Home_Value_Assessor"} proj_reg = admin.register_project(proj_info) period = period_string_generator() ds_info = {"description": "Housing Price Data"} featureset = data.dtypes.to_dict() featureset = {k: str(featureset[k]) for k in featureset} count = 1 for data_tuple in dataset_generator(): logger.info("Processing Dataset:" + str(count)) count = count + 1 aperiod = next(period) X_train = data_tuple[0] X_test = data_tuple[1] y_train = data_tuple[2] y_test = data_tuple[3] X_val = data_tuple[4] y_val = data_tuple[5] alpha_random = np.random.uniform(0.0005, 0.001) lrm = linear_model.Lasso(alpha=alpha_random) lrm.fit(X_train, y_train) predicted_val = lrm.predict(X_val) (rmse, mae, r2) = eval_metrics(y_val, predicted_val) ruuid = uuid.uuid4() model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \ "timestamp": str(dt.datetime.now())} serving_pred = lrm.predict(X_test) (rmse, mae, r2) = eval_metrics(y_test, serving_pred) ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\ "period" : aperiod} deployment_tag = "Deployment_HPE_" + aperiod dataset_tag = "Housing_Dataset_" + aperiod pipeline_tag = "Pipeline_HPE" + aperiod feature_pipeline_tag = "Feature Pipeline HPE" + aperiod ds_info["name"] = dataset_tag ds_info["tag"] = dataset_tag ds_info["source"] = "Housing Price Operational Data Store" featureset["generated_by"] = feature_pipeline_tag featureset["name"] = "log_transformed_house_value" featureset["tag"] = dataset_tag ds_reg = ap.register_dataset(ds_info) fs_reg = ap.register_featureset(featureset, ds_reg["_key"]) model_tag = "model_period:" + aperiod model_info = {"name": "Housing Regression Model",\ "type": "LASSO regression", "tag": model_tag} model_reg = ap.register_model(model_info, project="Home_Value_Assessor") model_params = {"alpha": alpha_random, "run_id": str(ruuid)} run_info = {"dataset" : ds_reg["_key"],\ "featureset": fs_reg["_key"],\ "run_id": str(ruuid),\ "model": model_reg["_key"],\ "model-params": model_params,\ "model-perf": model_perf,\ "pipeline" : pipeline_tag,\ "project": "Housing Price Assessor", "tag_for_deployment": True,\ "deployment_tag": deployment_tag} ap.log_run(run_info) admin.register_deployment(deployment_tag) user_id = "Arangopipe Test Data Generator" ap.log_serving_perf(ex_servingperf, deployment_tag, user_id) return