def verify_install(): #mshost: "5366b66b7d19.arangodb.cloud" config = ArangoPipeConfig() msc = ManagedServiceConnParam() conn_params = { msc.DB_SERVICE_HOST : "d874fc3f1fa5.arangodb.cloud", \ msc.DB_SERVICE_END_POINT : "apmdb",\ msc.DB_SERVICE_NAME : "createDB",\ msc.DB_SERVICE_PORT : 8529,\ msc.DB_CONN_PROTOCOL : 'https',\ msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'} config = config.create_connection_config(conn_params) admin = ArangoPipeAdmin(reuse_connection=False, config=config) ap_config = admin.get_config() ap = ArangoPipe(config=ap_config) ap.lookup_dataset("non existent dataset") try: proj_info = {"name": "Wine-Quality-Regression-Modelling"} proj_reg = admin.register_project(proj_info) except: print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) print("Installation of Arangopipe API verified !") return
def arango_pipe_connections(conn_params, reuse_prev_connection=True): mdb_config = ArangoPipeConfig() mdb_config = mdb_config.create_connection_config(conn_params) admin = ArangoPipeAdmin(reuse_connection=reuse_prev_connection, config=mdb_config) ap_config = admin.get_config() ap = ArangoPipe(config=ap_config) yield admin, ap
def setUp(self): conn_config = ArangoPipeConfig() conn_params = { self.mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \ #self.mscp.DB_ROOT_USER : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER],\ #self.mscp.DB_ROOT_USER_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER_PASSWORD],\ self.mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\ self.mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\ self.mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\ self.mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]} conn_config = conn_config.create_connection_config(conn_params) self.admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config) the_config = self.admin.get_config() self.ap = ArangoPipe(config=the_config) self.provision_project() return
def setUp(self): #mshost: "5366b66b7d19.arangodb.cloud" config = ArangoPipeConfig() msc = ManagedServiceConnParam() conn_params = { msc.DB_SERVICE_HOST : self.test_cfg['arangodb'][msc.DB_SERVICE_HOST], \ msc.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\ msc.DB_SERVICE_NAME : self.test_cfg['arangodb'][msc.DB_SERVICE_NAME],\ msc.DB_SERVICE_PORT : self.test_cfg['arangodb'][msc.DB_SERVICE_PORT],\ msc.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][msc.DB_CONN_PROTOCOL],\ msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'} config = config.create_connection_config(conn_params) self.config = config self.admin = ArangoPipeAdmin(reuse_connection = False,\ config= self.config, persist_conn= False) ap_config = self.admin.get_config() self.ap = ArangoPipe(config = ap_config) self.provision_project() return
def test_using_deleted_database(self): err_raised = False print("Running the test using a stale connection... ") self.delete_arangopipe_db() new_admin = ArangoPipeAdmin(reuse_connection=True) ap_config = new_admin.get_config() try: ap = ArangoPipe(config=ap_config) except Exception: print("Stale connection identified...") print("Using a new connection...") mscp = ManagedServiceConnParam() conn_config = ArangoPipeConfig() conn_params = { mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \ mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\ mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\ mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\ mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]} conn_config = conn_config.create_connection_config(conn_params) admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config) ap_config = admin.get_config() ap = ArangoPipe(config=ap_config) print("Using new connection to look up a non existent dataset...") ap.lookup_dataset("non existent dataset") self.assertFalse(err_raised) return
from arangopipe.arangopipe_storage.arangopipe_config import ArangoPipeConfig def eval_metrics(actual, pred): rmse = np.sqrt(mean_squared_error(actual, pred)) mae = mean_absolute_error(actual, pred) r2 = r2_score(actual, pred) return rmse, mae, r2 if __name__ == "__main__": warnings.filterwarnings("ignore") np.random.seed(40) admin = ArangoPipeAdmin(reuse_connection=True) the_config = admin.get_config() ap = ArangoPipe(config=the_config) # Read the wine-quality csv file (make sure you're running this from the root of MLflow!) wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") data = pd.read_csv(wine_path) ds_reg = ap.lookup_dataset("wine dataset") fs_reg = ap.lookup_featureset("wine_no_transformations") # Split the data into training and test sets. (0.75, 0.25) split. train, test = train_test_split(data) # The predicted column is "quality" which is a scalar from [3, 9] train_x = train.drop(["quality"], axis=1) test_x = test.drop(["quality"], axis=1) train_y = train[["quality"]]
def generate_runs(clean=False): cfg = read_data() mscp = ManagedServiceConnParam() #delete_users() #delete_arangopipe_db() conn_config = ArangoPipeConfig() conn_params = { mscp.DB_SERVICE_HOST : cfg['arangodb'][mscp.DB_SERVICE_HOST], \ mscp.DB_USER_NAME : cfg['arangodb'][mscp.DB_USER_NAME],\ mscp.DB_PASSWORD : cfg['arangodb'][mscp.DB_PASSWORD],\ mscp.DB_NAME : cfg['arangodb'][mscp.DB_NAME], \ mscp.DB_ROOT_USER : cfg['arangodb'][mscp.DB_ROOT_USER],\ mscp.DB_ROOT_USER_PASSWORD : cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD],\ mscp.DB_SERVICE_END_POINT : cfg['arangodb'][mscp.DB_SERVICE_END_POINT],\ mscp.DB_SERVICE_NAME : cfg['arangodb'][mscp.DB_SERVICE_NAME],\ mscp.DB_SERVICE_PORT : cfg['arangodb'][mscp.DB_SERVICE_PORT],\ mscp.DB_CONN_PROTOCOL : cfg['arangodb'][mscp.DB_CONN_PROTOCOL]} conn_config = conn_config.create_connection_config(conn_params) admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config) ap_config = admin.get_config() ap = ArangoPipe(config=ap_config) if clean: admin.delete_arangomldb() admin.create_db() admin.create_enterprise_ml_graph() proj_info = {"name": "Home_Value_Assessor"} proj_reg = admin.register_project(proj_info) period = period_string_generator() ds_info = {"description": "Housing Price Data"} featureset = data.dtypes.to_dict() featureset = {k: str(featureset[k]) for k in featureset} count = 1 for data_tuple in dataset_generator(): logger.info("Processing Dataset:" + str(count)) count = count + 1 aperiod = next(period) X_train = data_tuple[0] X_test = data_tuple[1] y_train = data_tuple[2] y_test = data_tuple[3] X_val = data_tuple[4] y_val = data_tuple[5] alpha_random = np.random.uniform(0.0005, 0.001) lrm = linear_model.Lasso(alpha=alpha_random) lrm.fit(X_train, y_train) predicted_val = lrm.predict(X_val) (rmse, mae, r2) = eval_metrics(y_val, predicted_val) ruuid = uuid.uuid4() model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \ "timestamp": str(dt.datetime.now())} serving_pred = lrm.predict(X_test) (rmse, mae, r2) = eval_metrics(y_test, serving_pred) ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\ "period" : aperiod} deployment_tag = "Deployment_HPE_" + aperiod dataset_tag = "Housing_Dataset_" + aperiod pipeline_tag = "Pipeline_HPE" + aperiod feature_pipeline_tag = "Feature Pipeline HPE" + aperiod ds_info["name"] = dataset_tag ds_info["tag"] = dataset_tag ds_info["source"] = "Housing Price Operational Data Store" featureset["generated_by"] = feature_pipeline_tag featureset["name"] = "log_transformed_house_value_" + str(ruuid) featureset["tag"] = dataset_tag ds_reg = ap.register_dataset(ds_info) fs_reg = ap.register_featureset(featureset, ds_reg["_key"]) model_tag = "model_period:" + aperiod model_name = "Housing Regression Model_" + str(ruuid) model_info = {"name": model_name,\ "type": "LASSO regression", "tag": model_tag} model_reg = ap.register_model(model_info, project="Home_Value_Assessor") model_params = {"alpha": alpha_random, "run_id": str(ruuid)} run_info = {"dataset" : ds_reg["_key"],\ "featureset": fs_reg["_key"],\ "run_id": str(ruuid),\ "model": model_reg["_key"],\ "model-params": model_params,\ "model-perf": model_perf,\ "pipeline" : pipeline_tag,\ "project": "Housing Price Assessor", "tag_for_deployment": True,\ "deployment_tag": deployment_tag} ap.log_run(run_info) admin.register_deployment(deployment_tag) user_id = "Arangopipe Test Data Generator" ap.log_serving_perf(ex_servingperf, deployment_tag, user_id) print("Done loading data into the test database!") return
def run_driver(): params = {'batch_size': 128, 'shuffle': True, 'num_workers': 6} trng_dataset = CH_Dataset() test_dataset = CH_Dataset(train=False) training_generator = data.DataLoader(trng_dataset, **params) test_generator = data.DataLoader(test_dataset, **params) input_size = trng_dataset.input_size output_size = trng_dataset.output_size m = CH_LinearRegression(inputSize=input_size, outputSize=output_size) cost_func = nn.MSELoss() learning_rate = 0.1 optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate) all_losses = [] test_pred_list = [] test_acts_list = [] num_epochs = 100 loss_sched = {} for e in range(num_epochs): batch_losses = [] for ix, (Xb, yb) in enumerate(training_generator): _X = Variable(Xb).float() _y = Variable(yb).float() #==========Forward pass=============== preds = m(_X) preds = torch.flatten(preds) loss = cost_func(preds, _y) #==========backward pass============== optimizer.zero_grad() loss.backward() optimizer.step() batch_losses.append(loss.item()) all_losses.append(loss.item()) mbl = sqrt(np.mean(batch_losses)) if e % 5 == 0: print("training loss: " + str(mbl)) loss_sched[e] = mbl # prepares model for inference when trained with a dropout layer # print(m.training) # m.eval() # print(m.training) test_batch_losses = [] test_pred_list = [] test_acts_list = [] for _X, _y in test_generator: _X = Variable(_X).float() _y = Variable(_y).float() #apply model test_preds = m(_X) test_preds = torch.flatten(test_preds) test_loss = cost_func(test_preds, _y) test_pred_list.extend(test_preds.detach().numpy().ravel()) test_acts_list.extend(_y.numpy().ravel()) test_batch_losses.append(test_loss.item()) # print("Batch loss: {}".format(test_loss.item())) tmbl = sqrt(np.mean(test_batch_losses)) print("test loss: " + str(tmbl)) # Store experiment results in Arangopipe conn_config = ArangoPipeConfig() msc = ManagedServiceConnParam() test_cfg = get_test_config() conn_params = { msc.DB_SERVICE_HOST : test_cfg['arangodb'][msc.DB_SERVICE_HOST], \ msc.DB_SERVICE_END_POINT : test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\ msc.DB_SERVICE_NAME : test_cfg['arangodb'][msc.DB_SERVICE_NAME],\ msc.DB_SERVICE_PORT : test_cfg['arangodb'][msc.DB_SERVICE_PORT],\ msc.DB_CONN_PROTOCOL : test_cfg['arangodb'][msc.DB_CONN_PROTOCOL]} # conn_params = { msc.DB_SERVICE_HOST : "localhost", \ # msc.DB_SERVICE_END_POINT : "apmdb",\ # msc.DB_SERVICE_NAME : "createDB",\ # msc.DB_SERVICE_PORT : 8529,\ # msc.DB_CONN_PROTOCOL : 'http',\ # msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'} conn_config = conn_config.create_connection_config(conn_params) proj_info = {"name": "Housing_Price_Estimation_Project"} admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config) proj_reg = admin.register_project(proj_info) ap_config = admin.get_config() ap = ArangoPipe(config=ap_config) ruuid = str(uuid.uuid4().int) model_name = "pytorch-linear-reg" + "_dev_run_" + ruuid model_info = {"name": model_name, "type": "model-development"} model_reg = ap.register_model(model_info, project="Housing_Price_Estimation_Project") ds_info = trng_dataset.get_dataset() ds_reg = ap.register_dataset(ds_info) fs = trng_dataset.get_featureset() fs_reg = ap.register_featureset(fs, ds_reg["_key"]) model_params = {"optimizer": "Adam", "training_epochs": 100,\ "batch_size": 128, "learning_rate": learning_rate,\ "run_id": ruuid} model_perf = {"training_loss_schedule": jsonpickle.encode(loss_sched),\ "run_id": ruuid, "timestamp": str(datetime.datetime.now())} run_tag = "Housing-Price-Pytorch-Experiment" + "_dev_run_" + ruuid run_info = {"dataset" : ds_reg["_key"],\ "featureset": fs_reg["_key"],\ "run_id": ruuid,\ "model": model_reg["_key"],\ "model-params": model_params,\ "model-perf": model_perf,\ "tag": run_tag,\ "project": "Housing Price Estimation Project"} ap.log_run(run_info) mp = ap.lookup_modelperf(run_tag) print( "A look up of the loss schedule for this experiment in Arangopipe yields:" ) print(str(mp["training_loss_schedule"])) return
class TestArangopipe(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestArangopipe, self).__init__(*args, **kwargs) self.test_cfg = self.get_test_config() return def setUp(self): #mshost: "5366b66b7d19.arangodb.cloud" config = ArangoPipeConfig() msc = ManagedServiceConnParam() conn_params = { msc.DB_SERVICE_HOST : self.test_cfg['arangodb'][msc.DB_SERVICE_HOST], \ msc.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\ msc.DB_SERVICE_NAME : self.test_cfg['arangodb'][msc.DB_SERVICE_NAME],\ msc.DB_SERVICE_PORT : self.test_cfg['arangodb'][msc.DB_SERVICE_PORT],\ msc.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][msc.DB_CONN_PROTOCOL],\ msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'} config = config.create_connection_config(conn_params) self.config = config self.admin = ArangoPipeAdmin(reuse_connection = False,\ config= self.config, persist_conn= False) ap_config = self.admin.get_config() self.ap = ArangoPipe(config = ap_config) self.provision_project() return def get_test_config(self): file_name = os.path.join(os.path.dirname(__file__), "../test_config/test_datagen_config.yaml") with open(file_name, "r") as file_descriptor: test_cfg = yaml.load(file_descriptor, Loader=yaml.FullLoader) return test_cfg def provision_project(self): err_raised = False try: proj_info = {"name": "Wine-Quality-Regression-Modelling"} proj_reg = self.admin.register_project(proj_info) except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while provisioning project') #cls.assertFalse(err_raised, ) return def register_dataset(self): ds_info = {"name": "wine_dataset", "description": "Wine quality ratings", "source": "UCI ML Repository"} ds_reg = self.ap.register_dataset(ds_info) return def lookup_dataset(self): ds_reg = self.ap.lookup_dataset("wine_dataset") return def lookup_featureset(self): fs_reg = self.ap.lookup_featureset("wine_no_transformations") return def register_model(self): model_info = {"name": "elastic_net_wine_model", "type": "elastic net regression"} model_reg = self.ap.register_model(model_info) return def link_models(self): model_info1 = {"name": "elastic_net_wine_model1", "type": "elastic net regression1"} model_reg1 = self.ap.register_model(model_info1) model_info2 = {"name": "elastic_net_wine_model2", "type": "elastic net regression2"} model_reg2 = self.ap.register_model(model_info2) model_info3 = {"name": "elastic_net_wine_model3", "type": "elastic net regression3"} model_reg3 = self.ap.register_model(model_info3) self.ap.link_entities(model_reg1['_id'], model_reg2['_id']) updated_model_info = self.ap.lookup_model(model_info1["name"]) print("Updated model:") print(updated_model_info) print("Adding another model link") self.ap.link_entities(model_reg1['_id'], model_reg3['_id']) updated_model_info = self.ap.lookup_model(model_info1["name"]) print("Updated model:") print(updated_model_info) added_str = updated_model_info['related_models'] added_links = added_str.split(",") link_added = len(added_links) == 2 self.assertTrue(link_added, 'Exception raised while linking models') return def lookup_model(self): model_reg = self.ap.lookup_model("elastic_net_wine_model") return def log_run(self): ds_reg = self.ap.lookup_dataset("wine_dataset") fs_reg = self.ap.lookup_featureset("wine_no_transformations") model_reg = self.ap.lookup_model("elastic_net_wine_model") model_params = {"l1_ratio": 0.1, "alpha": 0.2, "run_id": "0ef73d9edf08487793c77a1742f4033e"} model_perf = {"rmse": 0.7836984021909766, "r2": 0.20673590971167466, "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e", "timestamp": "2019-06-06 12:52:11.190048"} run_info = {"dataset": ds_reg["_key"], "featureset": fs_reg["_key"], "run_id": "0ef73d9edf08487793c77a1742f4033e", "model": model_reg["_key"], "model-params": model_params, "model-perf": model_perf, "pipeline": "Wine-Regression-Pipeline", "project": "Wine-Quality-Assessment", "deployment_tag": "Wine_Elastic_Net_Regression", "tag": "wine regression model test 1"} self.ap.log_run(run_info) return def provision_deployment(self): ret = self.admin.register_deployment("Wine_Elastic_Net_Regression") return def register_featureset(self): fs_info = {"fixed acidity": "float64", "volatile acidity": "float64", "citric acid": "float64", "residual sugar": "float64", "chlorides": "float64", "free sulfur dioxide": "float64", "total sulfur dioxide": "float64", "density": "float64", "pH": "float64", "sulphates": "float64", "alcohol": "float64", "quality": "int64", "name": "wine_no_transformations" } ds_reg = self.ap.lookup_dataset("wine_dataset") fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"]) return def log_servingperf(self): to_date = datetime.datetime.now() from_date = to_date - datetime.timedelta(days=30) ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787, "from_date": str(from_date), "to_date": str(to_date)} dep_tag = "Wine_Elastic_Net_Regression" user_id = "prvileged user" ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id) return def dataset_shift_positive(self): ds_path = os.path.join(os.path.dirname( os.path.abspath(__file__)), "cal_housing.csv") df = pd.read_csv(ds_path) req_cols = df.columns.tolist() df = df[req_cols] df1 = df.query("lat <= -119") df2 = df.query("lat > -119") rfd = RF_DatasetShiftDetector() score = rfd.detect_dataset_shift(df1, df2) print("Detaset shift score : ", score) return score def dataset_shift_negative(self): ds_path = os.path.join(os.path.dirname( os.path.abspath(__file__)), "cal_housing.csv") df = pd.read_csv(ds_path) req_cols = df.columns.tolist() df = df[req_cols] df1 = df.query("lat <= -119") df2 = df1.copy() rfd = RF_DatasetShiftDetector() score = rfd.detect_dataset_shift(df1, df2) print("Detaset shift score : ", score) return score def vertex_add_to_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_1') return def test_arangopipe_vertex_add(self): self.vertex_add_to_arangopipe() self.assertTrue(self.admin.has_vertex('test_vertex_1')) return def vertex_remove_from_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_t1') self.admin.remove_vertex_from_arangopipe('test_vertex_t1', purge=True) return def test_arangopipe_vertex_remove(self): self.vertex_remove_from_arangopipe() self.assertFalse(self.admin.has_vertex('test_vertex_t1')) return def test_register_dataset(self): err_raised = False try: self.register_dataset() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering dataset') self.assertFalse(err_raised) return def test_reregister_dataset(self): err_raised = False try: self.register_dataset() self.register_dataset() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering dataset') self.assertFalse(err_raised) return def test_lookup_dataset(self): err_raised = False try: self.register_dataset() self.lookup_dataset() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while looking up dataset') self.assertFalse(err_raised) return def test_register_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_reregister_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_featureset() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_lookup_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() self.lookup_featureset() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_register_model(self): err_raised = False try: self.register_model() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering model') self.assertFalse(err_raised) return def test_reregister_model(self): err_raised = False try: self.register_model() self.register_model() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while registering model') self.assertFalse(err_raised) return def test_lookup_model(self): err_raised = False try: self.register_model() self.lookup_model() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while looking up model') self.assertFalse(err_raised) return def test_log_run(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while logging performance') self.assertFalse(err_raised) return def test_link_models(self): err_raised = False try: self.link_models() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while provisioning deployment') self.assertFalse(err_raised) def test_provision_deployment(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() self.provision_deployment() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while provisioning deployment') self.assertFalse(err_raised) return def test_log_serving_performance(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() self.provision_deployment() self.log_servingperf() except: err_raised = True print('-'*60) traceback.print_exc(file=sys.stdout) print('-'*60) self.assertTrue(err_raised, 'Exception raised while logging serving performance') self.assertFalse(err_raised) return def test_dataset_shift_positive(self): score = self.dataset_shift_positive() self.assertTrue(score > 0.8) return def test_dataset_shift_negative(self): score = self.dataset_shift_negative() self.assertTrue(score < 0.6) return def add_edge_to_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_s') self.admin.add_vertex_to_arangopipe('test_vertex_d') self.admin.add_edge_definition_to_arangopipe('test_edge_col', 'test_edge', 'test_vertex_s', 'test_vertex_d') return def test_arangopipe_edge_add(self): self.add_edge_to_arangopipe() self.assertTrue(self.admin.has_edge('test_edge_col')) return def remove_edge_from_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_s1') self.admin.add_vertex_to_arangopipe('test_vertex_d1') self.admin.add_edge_definition_to_arangopipe('test_edge_col', 'test_edge_1', 'test_vertex_s1', 'test_vertex_d1') self.admin.remove_edge_definition_from_arangopipe( 'test_edge_1', purge=True) return def test_arangopipe_edge_remove(self): self.remove_edge_from_arangopipe() self.assertFalse(self.admin.has_edge('test_edge_1')) return def add_vertex_node(self): ni = None self.admin.add_vertex_to_arangopipe('test_vertex_s2') sd = {'name': "sample doc"} ni = self.ap.insert_into_vertex_type('test_vertex_s2', sd) return ni def test_arangopipe_vertex_node_add(self): ni = self.add_vertex_node() self.assertIsNotNone(ni) return def add_edge_link(self): ei = None self.admin.add_vertex_to_arangopipe('test_vertex_s3') self.admin.add_vertex_to_arangopipe('test_vertex_s4') sd = {'name': "sample doc"} v1 = self.ap.insert_into_vertex_type('test_vertex_s3', sd) v2 = self.ap.insert_into_vertex_type('test_vertex_s4', sd) self.admin.add_edge_definition_to_arangopipe('test_edge_col', 'test_edge', 'test_vertex_s3', 'test_vertex_s4') ei = self.ap.insert_into_edge_type('test_edge_col', v1, v2) return ei def test_arangopipe_edge_link_add(self): ei = self.add_edge_link() self.assertIsNotNone(ei) return def test_export(self): file_path = '/tmp/arangopipe_config.yaml' self.config.export_cfg(file_path) file_exists = os.path.exists(file_path) self.assertTrue(file_exists) return def test_import(self): file_path = '/tmp/arangopipe_config.yaml' self.config.export_cfg(file_path) cc = self.config.create_config(file_path) self.assertTrue(len(cc) > 0) return def test_connection_manager(self): msc = ManagedServiceConnParam() conn_params = { msc.DB_SERVICE_HOST : "arangoml.arangodb.cloud", \ msc.DB_SERVICE_END_POINT : "createDB",\ msc.DB_SERVICE_NAME : "createDB",\ msc.DB_SERVICE_PORT : 8529,\ msc.DB_CONN_PROTOCOL : 'https'} with arango_pipe_connections(conn_params, False) as (ap_admin, ap): proj_info = {"name": "Python With Generator Admin test"} proj_reg = ap_admin.register_project(proj_info) print("Done with test!") return
class TestArangopipe(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestArangopipe, self).__init__(*args, **kwargs) self.test_cfg = self.get_test_config() self.mscp = ManagedServiceConnParam() return def setUp(self): conn_config = ArangoPipeConfig() conn_params = { self.mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \ #self.mscp.DB_ROOT_USER : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER],\ #self.mscp.DB_ROOT_USER_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER_PASSWORD],\ self.mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\ self.mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\ self.mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\ self.mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]} conn_config = conn_config.create_connection_config(conn_params) self.admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config) the_config = self.admin.get_config() self.ap = ArangoPipe(config=the_config) self.provision_project() return def get_test_config(self): file_name = os.path.join(os.path.dirname(__file__), "../test_config/test_datagen_config.yaml") with open(file_name, "r") as file_descriptor: test_cfg = yaml.load(file_descriptor, Loader=yaml.FullLoader) return test_cfg def provision_project(self): err_raised = False try: proj_info = {"name": "Wine-Quality-Regression-Modelling"} proj_reg = self.admin.register_project(proj_info) except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while provisioning project') self.assertFalse(err_raised) return def register_dataset(self): ds_info = {"name" : "wine_dataset",\ "description": "Wine quality ratings",\ "source": "UCI ML Repository" } ds_reg = self.ap.register_dataset(ds_info) return def lookup_dataset(self): ds_reg = self.ap.lookup_dataset("wine_dataset") return def lookup_featureset(self): fs_reg = self.ap.lookup_featureset("wine_no_transformations") return def register_model(self): model_info = {"name": "elastic_net_wine_model", \ "type": "elastic net regression"} model_reg = self.ap.register_model(model_info) return def lookup_model(self): model_reg = self.ap.lookup_model("elastic_net_wine_model") return def log_run(self): ds_reg = self.ap.lookup_dataset("wine_dataset") fs_reg = self.ap.lookup_featureset("wine_no_transformations") model_reg = self.ap.lookup_model("elastic_net_wine_model") model_params = { "l1_ratio": 0.1, "alpha": 0.2,\ "run_id": "0ef73d9edf08487793c77a1742f4033e"} model_perf = { "rmse": 0.7836984021909766, "r2": 0.20673590971167466,\ "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e",\ "timestamp": "2019-06-06 12:52:11.190048"} run_info = {"dataset" : ds_reg["_key"],\ "featureset": fs_reg["_key"],\ "run_id": "0ef73d9edf08487793c77a1742f4033e",\ "model": model_reg["_key"],\ "model-params": model_params,\ "model-perf": model_perf,\ "pipeline" : "Wine-Regression-Pipeline",\ "project": "Wine-Quality-Assessment",\ "deployment_tag": "Wine_Elastic_Net_Regression",\ "tag": "wine regression model test 1"} self.ap.log_run(run_info) return def provision_deployment(self): ret = self.admin.register_deployment("Wine_Elastic_Net_Regression") return def register_featureset(self): fs_info = {"fixed acidity": "float64",\ "volatile acidity": "float64",\ "citric acid": "float64",\ "residual sugar": "float64",\ "chlorides": "float64",\ "free sulfur dioxide": "float64",\ "total sulfur dioxide": "float64",\ "density": "float64",\ "pH": "float64",\ "sulphates": "float64",\ "alcohol": "float64",\ "quality": "int64",\ "name": "wine_no_transformations" } ds_reg = self.ap.lookup_dataset("wine_dataset") fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"]) return def log_servingperf(self): to_date = datetime.datetime.now() from_date = to_date - datetime.timedelta(days=30) ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,\ "from_date": str(from_date), "to_date": str(to_date)} dep_tag = "Wine_Elastic_Net_Regression" user_id = "prvileged user" ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id) return def dataset_shift_positive(self): ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cal_housing.csv") df = pd.read_csv(ds_path) req_cols = df.columns.tolist() df = df[req_cols] df1 = df.query("lat <= -119") df2 = df.query("lat > -119") rfd = RF_DatasetShiftDetector() score = rfd.detect_dataset_shift(df1, df2) print("Detaset shift score : ", score) return score def dataset_shift_negative(self): ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cal_housing.csv") df = pd.read_csv(ds_path) req_cols = df.columns.tolist() df = df[req_cols] df1 = df.query("lat <= -119") df2 = df1.copy() rfd = RF_DatasetShiftDetector() score = rfd.detect_dataset_shift(df1, df2) print("Detaset shift score : ", score) return score def torch_test(self): print("Running test for pytorch...") run_driver() print("Pytorch test completed!") return def test_torch_workflow(self): err_raised = False try: self.torch_test() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while provisioning project') self.assertFalse(err_raised) return def vertex_add_to_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_1') return def test_arangopipe_vertex_add(self): self.vertex_add_to_arangopipe() self.assertTrue(self.admin.has_vertex('test_vertex_1')) return def vertex_remove_from_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_t1') self.admin.remove_vertex_from_arangopipe('test_vertex_t1', purge=True) return def test_arangopipe_vertex_remove(self): self.vertex_remove_from_arangopipe() self.assertFalse(self.admin.has_vertex('test_vertex_t1')) return def test_register_dataset(self): err_raised = False try: self.register_dataset() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while registering dataset') self.assertFalse(err_raised) return def test_lookup_dataset(self): err_raised = False try: self.register_dataset() self.lookup_dataset() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while looking up dataset') self.assertFalse(err_raised) return def test_register_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_lookup_featureset(self): err_raised = False try: self.register_dataset() self.register_featureset() self.lookup_featureset() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while registering featureset') self.assertFalse(err_raised) return def test_register_model(self): err_raised = False try: self.register_model() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while registering model') self.assertFalse(err_raised) return def test_lookup_model(self): err_raised = False try: self.register_model() self.lookup_model() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while looking up model') self.assertFalse(err_raised) return def test_log_run(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while logging performance') self.assertFalse(err_raised) return def test_provision_deployment(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() self.provision_deployment() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while provisioning deployment') self.assertFalse(err_raised) return def test_log_serving_performance(self): err_raised = False try: self.register_dataset() self.register_featureset() self.register_model() self.log_run() self.provision_deployment() self.log_servingperf() except: err_raised = True print('-' * 60) traceback.print_exc(file=sys.stdout) print('-' * 60) self.assertTrue(err_raised,\ 'Exception raised while logging serving performance') self.assertFalse(err_raised) return def test_dataset_shift_positive(self): score = self.dataset_shift_positive() self.assertTrue(score > 0.8) return def test_dataset_shift_negative(self): score = self.dataset_shift_negative() self.assertTrue(score < 0.6) return def add_edge_to_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_s') self.admin.add_vertex_to_arangopipe('test_vertex_d') self.admin.add_edge_definition_to_arangopipe('test_col', 'test_edge',\ 'test_vertex_s', 'test_vertex_d') return def test_arangopipe_edge_add(self): self.add_edge_to_arangopipe() self.assertTrue(self.admin.has_edge('test_col')) return def remove_edge_from_arangopipe(self): self.admin.add_vertex_to_arangopipe('test_vertex_s1') self.admin.add_vertex_to_arangopipe('test_vertex_d1') self.admin.add_edge_definition_to_arangopipe('test_col', 'test_edge_1',\ 'test_vertex_s1', 'test_vertex_d1') self.admin.remove_edge_definition_from_arangopipe('test_edge_1', purge=True) return def test_arangopipe_edge_remove(self): self.remove_edge_from_arangopipe() self.assertFalse(self.admin.has_edge('test_edge_1')) return def add_vertex_node(self): ni = None self.admin.add_vertex_to_arangopipe('test_vertex_s2') sd = {'name': "sample doc"} ni = self.ap.insert_into_vertex_type('test_vertex_s2', sd) return ni def test_arangopipe_vertex_node_add(self): ni = self.add_vertex_node() self.assertIsNotNone(ni) return def add_edge_link(self): ei = None self.admin.add_vertex_to_arangopipe('test_vertex_s3') self.admin.add_vertex_to_arangopipe('test_vertex_s4') sd = {'name': "sample doc"} v1 = self.ap.insert_into_vertex_type('test_vertex_s3', sd) v2 = self.ap.insert_into_vertex_type('test_vertex_s4', sd) self.admin.add_edge_definition_to_arangopipe('test_col','test_edge',\ 'test_vertex_s3', 'test_vertex_s4') ei = self.ap.insert_into_edge_type('test_col', v1, v2) return ei def test_arangopipe_edge_link_add(self): ei = self.add_edge_link() self.assertIsNotNone(ei) return
conn_config = ArangoPipeConfig() msc = ManagedServiceConnParam() conn_params = { msc.DB_SERVICE_HOST : "localhost", \ msc.DB_SERVICE_END_POINT : "apmdb",\ msc.DB_SERVICE_NAME : "createDB",\ msc.DB_SERVICE_PORT : 8529, msc.DB_CONN_PROTOCOL : 'http'} conn_config = conn_config.create_connection_config(conn_params) admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config) proj_reg = admin.register_project(proj_info) warnings.filterwarnings("ignore") np.random.seed(40) ap_config = admin.get_config() ap = ArangoPipe(config=ap_config) # Read the wine-quality csv file (make sure you're running this from the root of MLflow!) wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") data = pd.read_csv(wine_path) ds_info = {"name" : "wine dataset",\ "description": "Wine quality ratings","source": "UCI ML Repository" } ds_reg = ap.register_dataset(ds_info) featureset = data.dtypes.to_dict() featureset = {k: str(featureset[k]) for k in featureset} featureset["name"] = "wine_no_transformations" fs_reg = ap.register_featureset(featureset, ds_reg["_key"]) model_info = { "name": "elastic_net_wine_model", "type": "elastic net regression"