예제 #1
0
def verify_install():
    #mshost: "5366b66b7d19.arangodb.cloud"
    config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    conn_params = { msc.DB_SERVICE_HOST : "d874fc3f1fa5.arangodb.cloud", \
                    msc.DB_SERVICE_END_POINT : "apmdb",\
                    msc.DB_SERVICE_NAME : "createDB",\
                    msc.DB_SERVICE_PORT : 8529,\
                    msc.DB_CONN_PROTOCOL : 'https',\
                    msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}

    config = config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    ap.lookup_dataset("non existent dataset")
    try:
        proj_info = {"name": "Wine-Quality-Regression-Modelling"}
        proj_reg = admin.register_project(proj_info)
    except:
        print('-' * 60)
        traceback.print_exc(file=sys.stdout)
        print('-' * 60)

    print("Installation of Arangopipe API verified !")

    return
예제 #2
0
def arango_pipe_connections(conn_params, reuse_prev_connection=True):
    mdb_config = ArangoPipeConfig()
    mdb_config = mdb_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=reuse_prev_connection,
                            config=mdb_config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    yield admin, ap
    def setUp(self):
        conn_config = ArangoPipeConfig()
        conn_params = { self.mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                #self.mscp.DB_ROOT_USER : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER],\
                #self.mscp.DB_ROOT_USER_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER_PASSWORD],\
                self.mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                self.mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                self.mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                self.mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}

        conn_config = conn_config.create_connection_config(conn_params)
        self.admin = ArangoPipeAdmin(reuse_connection=False,
                                     config=conn_config)
        the_config = self.admin.get_config()
        self.ap = ArangoPipe(config=the_config)
        self.provision_project()

        return
예제 #4
0
 def setUp(self):
     #mshost: "5366b66b7d19.arangodb.cloud"
     config = ArangoPipeConfig()
     msc = ManagedServiceConnParam()
     conn_params = { msc.DB_SERVICE_HOST : self.test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
                     msc.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
                     msc.DB_SERVICE_NAME : self.test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
                     msc.DB_SERVICE_PORT : self.test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
                     msc.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][msc.DB_CONN_PROTOCOL],\
                     msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}
     
     config = config.create_connection_config(conn_params)
     self.config = config
     self.admin = ArangoPipeAdmin(reuse_connection = False,\
                                  config= self.config, persist_conn= False)
     ap_config = self.admin.get_config()
     self.ap = ArangoPipe(config = ap_config)
     self.provision_project()
     return
예제 #5
0
    def test_using_deleted_database(self):
        err_raised = False
        print("Running the test using a stale connection... ")
        self.delete_arangopipe_db()
        new_admin = ArangoPipeAdmin(reuse_connection=True)
        ap_config = new_admin.get_config()

        try:
            ap = ArangoPipe(config=ap_config)
        except Exception:
            print("Stale connection identified...")
            print("Using a new connection...")
            mscp = ManagedServiceConnParam()
            conn_config = ArangoPipeConfig()
            conn_params = { mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                        mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                        mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                        mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                        mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}
            conn_config = conn_config.create_connection_config(conn_params)
            admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
            ap_config = admin.get_config()
            ap = ArangoPipe(config=ap_config)

        print("Using new connection to look up a non existent dataset...")
        ap.lookup_dataset("non existent dataset")
        self.assertFalse(err_raised)

        return
예제 #6
0
from arangopipe.arangopipe_storage.arangopipe_config import ArangoPipeConfig


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    admin = ArangoPipeAdmin(reuse_connection=True)
    the_config = admin.get_config()
    ap = ArangoPipe(config=the_config)
    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "wine-quality.csv")
    data = pd.read_csv(wine_path)

    ds_reg = ap.lookup_dataset("wine dataset")
    fs_reg = ap.lookup_featureset("wine_no_transformations")

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
예제 #7
0
def generate_runs(clean=False):
    cfg = read_data()
    mscp = ManagedServiceConnParam()

    #delete_users()
    #delete_arangopipe_db()
    conn_config = ArangoPipeConfig()

    conn_params = { mscp.DB_SERVICE_HOST : cfg['arangodb'][mscp.DB_SERVICE_HOST], \
                    mscp.DB_USER_NAME : cfg['arangodb'][mscp.DB_USER_NAME],\
                    mscp.DB_PASSWORD : cfg['arangodb'][mscp.DB_PASSWORD],\
                    mscp.DB_NAME : cfg['arangodb'][mscp.DB_NAME], \
                    mscp.DB_ROOT_USER : cfg['arangodb'][mscp.DB_ROOT_USER],\
                    mscp.DB_ROOT_USER_PASSWORD : cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD],\
                    mscp.DB_SERVICE_END_POINT : cfg['arangodb'][mscp.DB_SERVICE_END_POINT],\
                    mscp.DB_SERVICE_NAME : cfg['arangodb'][mscp.DB_SERVICE_NAME],\
                    mscp.DB_SERVICE_PORT : cfg['arangodb'][mscp.DB_SERVICE_PORT],\
                    mscp.DB_CONN_PROTOCOL : cfg['arangodb'][mscp.DB_CONN_PROTOCOL]}

    conn_config = conn_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)

    if clean:
        admin.delete_arangomldb()
        admin.create_db()
        admin.create_enterprise_ml_graph()

    proj_info = {"name": "Home_Value_Assessor"}
    proj_reg = admin.register_project(proj_info)

    period = period_string_generator()
    ds_info = {"description": "Housing Price Data"}
    featureset = data.dtypes.to_dict()
    featureset = {k: str(featureset[k]) for k in featureset}
    count = 1

    for data_tuple in dataset_generator():
        logger.info("Processing Dataset:" + str(count))
        count = count + 1
        aperiod = next(period)
        X_train = data_tuple[0]
        X_test = data_tuple[1]
        y_train = data_tuple[2]
        y_test = data_tuple[3]
        X_val = data_tuple[4]
        y_val = data_tuple[5]
        alpha_random = np.random.uniform(0.0005, 0.001)
        lrm = linear_model.Lasso(alpha=alpha_random)
        lrm.fit(X_train, y_train)
        predicted_val = lrm.predict(X_val)
        (rmse, mae, r2) = eval_metrics(y_val, predicted_val)
        ruuid = uuid.uuid4()
        model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \
                      "timestamp": str(dt.datetime.now())}
        serving_pred = lrm.predict(X_test)
        (rmse, mae, r2) = eval_metrics(y_test, serving_pred)
        ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\
                      "period" : aperiod}
        deployment_tag = "Deployment_HPE_" + aperiod
        dataset_tag = "Housing_Dataset_" + aperiod
        pipeline_tag = "Pipeline_HPE" + aperiod
        feature_pipeline_tag = "Feature Pipeline HPE" + aperiod
        ds_info["name"] = dataset_tag
        ds_info["tag"] = dataset_tag
        ds_info["source"] = "Housing Price Operational Data Store"
        featureset["generated_by"] = feature_pipeline_tag
        featureset["name"] = "log_transformed_house_value_" + str(ruuid)
        featureset["tag"] = dataset_tag

        ds_reg = ap.register_dataset(ds_info)
        fs_reg = ap.register_featureset(featureset, ds_reg["_key"])
        model_tag = "model_period:" + aperiod
        model_name = "Housing Regression Model_" + str(ruuid)
        model_info = {"name": model_name,\
                      "type": "LASSO regression", "tag": model_tag}
        model_reg = ap.register_model(model_info,
                                      project="Home_Value_Assessor")
        model_params = {"alpha": alpha_random, "run_id": str(ruuid)}
        run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": str(ruuid),\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "pipeline" : pipeline_tag,\
                    "project": "Housing Price Assessor",
                    "tag_for_deployment": True,\
                    "deployment_tag": deployment_tag}
        ap.log_run(run_info)
        admin.register_deployment(deployment_tag)
        user_id = "Arangopipe Test Data Generator"
        ap.log_serving_perf(ex_servingperf, deployment_tag, user_id)

    print("Done loading data into the test database!")

    return
예제 #8
0
def run_driver():

    params = {'batch_size': 128, 'shuffle': True, 'num_workers': 6}
    trng_dataset = CH_Dataset()
    test_dataset = CH_Dataset(train=False)
    training_generator = data.DataLoader(trng_dataset, **params)
    test_generator = data.DataLoader(test_dataset, **params)
    input_size = trng_dataset.input_size
    output_size = trng_dataset.output_size

    m = CH_LinearRegression(inputSize=input_size, outputSize=output_size)
    cost_func = nn.MSELoss()
    learning_rate = 0.1
    optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate)
    all_losses = []
    test_pred_list = []
    test_acts_list = []
    num_epochs = 100
    loss_sched = {}
    for e in range(num_epochs):
        batch_losses = []
        for ix, (Xb, yb) in enumerate(training_generator):
            _X = Variable(Xb).float()

            _y = Variable(yb).float()
            #==========Forward pass===============
            preds = m(_X)
            preds = torch.flatten(preds)
            loss = cost_func(preds, _y)

            #==========backward pass==============

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())
            all_losses.append(loss.item())

        mbl = sqrt(np.mean(batch_losses))

        if e % 5 == 0:
            print("training loss: " + str(mbl))
            loss_sched[e] = mbl

    # prepares model for inference when trained with a dropout layer


#    print(m.training)
#    m.eval()
#    print(m.training)

    test_batch_losses = []
    test_pred_list = []
    test_acts_list = []
    for _X, _y in test_generator:

        _X = Variable(_X).float()
        _y = Variable(_y).float()

        #apply model
        test_preds = m(_X)
        test_preds = torch.flatten(test_preds)
        test_loss = cost_func(test_preds, _y)
        test_pred_list.extend(test_preds.detach().numpy().ravel())
        test_acts_list.extend(_y.numpy().ravel())

        test_batch_losses.append(test_loss.item())
    # print("Batch loss: {}".format(test_loss.item()))

    tmbl = sqrt(np.mean(test_batch_losses))
    print("test loss: " + str(tmbl))

    # Store experiment results in Arangopipe
    conn_config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    test_cfg = get_test_config()
    conn_params = { msc.DB_SERVICE_HOST : test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
msc.DB_SERVICE_END_POINT : test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
msc.DB_SERVICE_NAME : test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
msc.DB_SERVICE_PORT : test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
msc.DB_CONN_PROTOCOL : test_cfg['arangodb'][msc.DB_CONN_PROTOCOL]}
    #    conn_params = { msc.DB_SERVICE_HOST : "localhost", \
    #                        msc.DB_SERVICE_END_POINT : "apmdb",\
    #                        msc.DB_SERVICE_NAME : "createDB",\
    #                        msc.DB_SERVICE_PORT : 8529,\
    #                        msc.DB_CONN_PROTOCOL : 'http',\
    #                        msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}

    conn_config = conn_config.create_connection_config(conn_params)
    proj_info = {"name": "Housing_Price_Estimation_Project"}
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    proj_reg = admin.register_project(proj_info)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    ruuid = str(uuid.uuid4().int)
    model_name = "pytorch-linear-reg" + "_dev_run_" + ruuid
    model_info = {"name": model_name, "type": "model-development"}
    model_reg = ap.register_model(model_info,
                                  project="Housing_Price_Estimation_Project")
    ds_info = trng_dataset.get_dataset()
    ds_reg = ap.register_dataset(ds_info)
    fs = trng_dataset.get_featureset()
    fs_reg = ap.register_featureset(fs, ds_reg["_key"])

    model_params = {"optimizer": "Adam", "training_epochs": 100,\
                    "batch_size": 128, "learning_rate": learning_rate,\
                    "run_id": ruuid}
    model_perf = {"training_loss_schedule": jsonpickle.encode(loss_sched),\
                  "run_id": ruuid, "timestamp":    str(datetime.datetime.now())}
    run_tag = "Housing-Price-Pytorch-Experiment" + "_dev_run_" + ruuid
    run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": ruuid,\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "tag": run_tag,\
                    "project": "Housing Price Estimation Project"}
    ap.log_run(run_info)
    mp = ap.lookup_modelperf(run_tag)
    print(
        "A look up of the loss schedule for this experiment in Arangopipe yields:"
    )
    print(str(mp["training_loss_schedule"]))

    return
예제 #9
0
class TestArangopipe(unittest.TestCase):
    
    def __init__(self, *args, **kwargs):
        super(TestArangopipe, self).__init__(*args, **kwargs)
        self.test_cfg = self.get_test_config()
        
        return
    
    def setUp(self):
        #mshost: "5366b66b7d19.arangodb.cloud"
        config = ArangoPipeConfig()
        msc = ManagedServiceConnParam()
        conn_params = { msc.DB_SERVICE_HOST : self.test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
                        msc.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
                        msc.DB_SERVICE_NAME : self.test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
                        msc.DB_SERVICE_PORT : self.test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
                        msc.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][msc.DB_CONN_PROTOCOL],\
                        msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}
        
        config = config.create_connection_config(conn_params)
        self.config = config
        self.admin = ArangoPipeAdmin(reuse_connection = False,\
                                     config= self.config, persist_conn= False)
        ap_config = self.admin.get_config()
        self.ap = ArangoPipe(config = ap_config)
        self.provision_project()
        return
    
    def get_test_config(self):
        file_name = os.path.join(os.path.dirname(__file__),
                                     "../test_config/test_datagen_config.yaml")
        with open(file_name, "r") as file_descriptor:
            test_cfg = yaml.load(file_descriptor, Loader=yaml.FullLoader)
        
        return test_cfg
    
    def provision_project(self):
        err_raised = False
        try:
            proj_info = {"name": "Wine-Quality-Regression-Modelling"}
            proj_reg = self.admin.register_project(proj_info)
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while provisioning project')

        #cls.assertFalse(err_raised, )
        return
    
    def register_dataset(self):
        ds_info = {"name": "wine_dataset",
                   "description": "Wine quality ratings",
                   "source": "UCI ML Repository"}
        ds_reg = self.ap.register_dataset(ds_info)
        return

    def lookup_dataset(self):
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        return

    def lookup_featureset(self):
        fs_reg = self.ap.lookup_featureset("wine_no_transformations")
        return

    def register_model(self):

        model_info = {"name": "elastic_net_wine_model",
                      "type": "elastic net regression"}
        model_reg = self.ap.register_model(model_info)
        return
    
    def link_models(self):

        model_info1 = {"name": "elastic_net_wine_model1",
                      "type": "elastic net regression1"}
        model_reg1 = self.ap.register_model(model_info1)
        
        model_info2 = {"name": "elastic_net_wine_model2",
                      "type": "elastic net regression2"}
        model_reg2 = self.ap.register_model(model_info2)
        
        model_info3 = {"name": "elastic_net_wine_model3",
                      "type": "elastic net regression3"}
        model_reg3 = self.ap.register_model(model_info3)
        
        self.ap.link_entities(model_reg1['_id'], model_reg2['_id'])
        updated_model_info = self.ap.lookup_model(model_info1["name"])
        print("Updated model:")
        print(updated_model_info)
        print("Adding another model link")
        self.ap.link_entities(model_reg1['_id'], model_reg3['_id'])
        updated_model_info = self.ap.lookup_model(model_info1["name"])
        print("Updated model:")
        print(updated_model_info)
        added_str = updated_model_info['related_models']
        added_links = added_str.split(",")
        link_added = len(added_links) == 2
        self.assertTrue(link_added,
                            'Exception raised while linking models')
        
        
        return

    def lookup_model(self):

        model_reg = self.ap.lookup_model("elastic_net_wine_model")
        return

    def log_run(self):

        ds_reg = self.ap.lookup_dataset("wine_dataset")
        fs_reg = self.ap.lookup_featureset("wine_no_transformations")
        model_reg = self.ap.lookup_model("elastic_net_wine_model")
        model_params = {"l1_ratio": 0.1, "alpha": 0.2,
                        "run_id": "0ef73d9edf08487793c77a1742f4033e"}
        model_perf = {"rmse": 0.7836984021909766, "r2": 0.20673590971167466,
                      "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e",
                      "timestamp": "2019-06-06 12:52:11.190048"}
        run_info = {"dataset": ds_reg["_key"],
                    "featureset": fs_reg["_key"],
                    "run_id": "0ef73d9edf08487793c77a1742f4033e",
                    "model": model_reg["_key"],
                    "model-params": model_params,
                    "model-perf": model_perf,
                    "pipeline": "Wine-Regression-Pipeline",
                    "project": "Wine-Quality-Assessment",
                    "deployment_tag": "Wine_Elastic_Net_Regression",
                    "tag": "wine regression model test 1"}
        self.ap.log_run(run_info)
        return

    def provision_deployment(self):

        ret = self.admin.register_deployment("Wine_Elastic_Net_Regression")

        return

    def register_featureset(self):

        fs_info = {"fixed acidity": "float64",
                   "volatile acidity": "float64",
                   "citric acid": "float64",
                   "residual sugar": "float64",
                   "chlorides": "float64",
                   "free sulfur dioxide": "float64",
                   "total sulfur dioxide": "float64",
                   "density": "float64",
                   "pH": "float64",
                   "sulphates": "float64",
                   "alcohol": "float64",
                   "quality": "int64",
                   "name": "wine_no_transformations"
                   }
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"])
        return

    def log_servingperf(self):
        to_date = datetime.datetime.now()
        from_date = to_date - datetime.timedelta(days=30)
        ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,
                          "from_date": str(from_date), "to_date": str(to_date)}
        dep_tag = "Wine_Elastic_Net_Regression"
        user_id = "prvileged user"
        ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id)

        return

    def dataset_shift_positive(self):
        ds_path = os.path.join(os.path.dirname(
            os.path.abspath(__file__)), "cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df.query("lat > -119")
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print("Detaset shift score : ", score)

        return score

    def dataset_shift_negative(self):
        ds_path = os.path.join(os.path.dirname(
            os.path.abspath(__file__)), "cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df1.copy()
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print("Detaset shift score : ", score)

        return score

    def vertex_add_to_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_1')

        return

    def test_arangopipe_vertex_add(self):
        self.vertex_add_to_arangopipe()
        self.assertTrue(self.admin.has_vertex('test_vertex_1'))

        return

    def vertex_remove_from_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_t1')
        self.admin.remove_vertex_from_arangopipe('test_vertex_t1', purge=True)

        return

    def test_arangopipe_vertex_remove(self):
        self.vertex_remove_from_arangopipe()
        self.assertFalse(self.admin.has_vertex('test_vertex_t1'))

        return

    def test_register_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering dataset')
        self.assertFalse(err_raised)
        return
    def test_reregister_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_dataset()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering dataset')
        self.assertFalse(err_raised)
        return

    def test_lookup_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.lookup_dataset()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while looking up dataset')
        self.assertFalse(err_raised)
        return

    def test_register_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return
    
    def test_reregister_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_featureset()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return

    def test_lookup_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.lookup_featureset()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return

    def test_register_model(self):
        err_raised = False
        try:
            self.register_model()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering model')
        self.assertFalse(err_raised)
        return
    
    def test_reregister_model(self):
        err_raised = False
        try:
            self.register_model()
            self.register_model()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while registering model')
        self.assertFalse(err_raised)
        return

    def test_lookup_model(self):
        err_raised = False
        try:
            self.register_model()
            self.lookup_model()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while looking up model')
        self.assertFalse(err_raised)
        return

    def test_log_run(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while logging performance')
        self.assertFalse(err_raised)
        return

    def test_link_models(self):
        err_raised = False
        try:
            self.link_models()
 

        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while provisioning deployment')
        self.assertFalse(err_raised)
        
    def test_provision_deployment(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()

        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while provisioning deployment')
        self.assertFalse(err_raised)
        return

    def test_log_serving_performance(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()
            self.log_servingperf()

        except:
            err_raised = True
            print('-'*60)
            traceback.print_exc(file=sys.stdout)
            print('-'*60)
            self.assertTrue(err_raised,
                            'Exception raised while logging serving performance')
        self.assertFalse(err_raised)
        return

    def test_dataset_shift_positive(self):

        score = self.dataset_shift_positive()

        self.assertTrue(score > 0.8)
        return

    def test_dataset_shift_negative(self):

        score = self.dataset_shift_negative()

        self.assertTrue(score < 0.6)
        return

    def add_edge_to_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_s')
        self.admin.add_vertex_to_arangopipe('test_vertex_d')
        self.admin.add_edge_definition_to_arangopipe('test_edge_col', 'test_edge',
                                                     'test_vertex_s', 'test_vertex_d')
        return

    def test_arangopipe_edge_add(self):
        self.add_edge_to_arangopipe()
        self.assertTrue(self.admin.has_edge('test_edge_col'))

        return

    def remove_edge_from_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_s1')
        self.admin.add_vertex_to_arangopipe('test_vertex_d1')
        self.admin.add_edge_definition_to_arangopipe('test_edge_col', 'test_edge_1',
                                                     'test_vertex_s1', 'test_vertex_d1')
        self.admin.remove_edge_definition_from_arangopipe(
            'test_edge_1', purge=True)

        return

    def test_arangopipe_edge_remove(self):
        self.remove_edge_from_arangopipe()
        self.assertFalse(self.admin.has_edge('test_edge_1'))

        return

    def add_vertex_node(self):
        ni = None
        self.admin.add_vertex_to_arangopipe('test_vertex_s2')
        sd = {'name': "sample doc"}
        ni = self.ap.insert_into_vertex_type('test_vertex_s2', sd)

        return ni

    def test_arangopipe_vertex_node_add(self):
        ni = self.add_vertex_node()
        self.assertIsNotNone(ni)
        return

    def add_edge_link(self):
        ei = None
        self.admin.add_vertex_to_arangopipe('test_vertex_s3')
        self.admin.add_vertex_to_arangopipe('test_vertex_s4')
        sd = {'name': "sample doc"}
        v1 = self.ap.insert_into_vertex_type('test_vertex_s3', sd)
        v2 = self.ap.insert_into_vertex_type('test_vertex_s4', sd)
        self.admin.add_edge_definition_to_arangopipe('test_edge_col', 'test_edge',
                                                     'test_vertex_s3', 'test_vertex_s4')
        ei = self.ap.insert_into_edge_type('test_edge_col', v1, v2)

        return ei

    def test_arangopipe_edge_link_add(self):
        ei = self.add_edge_link()
        self.assertIsNotNone(ei)
        return
    
    def test_export(self):
        file_path = '/tmp/arangopipe_config.yaml'
        self.config.export_cfg(file_path)
        file_exists = os.path.exists(file_path)
        self.assertTrue(file_exists)
        
        return
    
    def test_import(self):
        file_path = '/tmp/arangopipe_config.yaml'
        self.config.export_cfg(file_path)
        cc = self.config.create_config(file_path)
        self.assertTrue(len(cc) > 0)
        
        return
    
    def test_connection_manager(self):
        msc = ManagedServiceConnParam()
        conn_params = { msc.DB_SERVICE_HOST : "arangoml.arangodb.cloud", \
                        msc.DB_SERVICE_END_POINT : "createDB",\
                        msc.DB_SERVICE_NAME : "createDB",\
                        msc.DB_SERVICE_PORT : 8529,\
                        msc.DB_CONN_PROTOCOL : 'https'}
        
        
        with arango_pipe_connections(conn_params, False) as (ap_admin, ap):
             proj_info = {"name": "Python With Generator Admin test"}
             proj_reg = ap_admin.register_project(proj_info)
             print("Done with test!")
        
        return
class TestArangopipe(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestArangopipe, self).__init__(*args, **kwargs)
        self.test_cfg = self.get_test_config()
        self.mscp = ManagedServiceConnParam()

        return

    def setUp(self):
        conn_config = ArangoPipeConfig()
        conn_params = { self.mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                #self.mscp.DB_ROOT_USER : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER],\
                #self.mscp.DB_ROOT_USER_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER_PASSWORD],\
                self.mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                self.mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                self.mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                self.mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}

        conn_config = conn_config.create_connection_config(conn_params)
        self.admin = ArangoPipeAdmin(reuse_connection=False,
                                     config=conn_config)
        the_config = self.admin.get_config()
        self.ap = ArangoPipe(config=the_config)
        self.provision_project()

        return

    def get_test_config(self):
        file_name = os.path.join(os.path.dirname(__file__),
                                 "../test_config/test_datagen_config.yaml")
        with open(file_name, "r") as file_descriptor:
            test_cfg = yaml.load(file_descriptor, Loader=yaml.FullLoader)

        return test_cfg

    def provision_project(self):
        err_raised = False
        try:
            proj_info = {"name": "Wine-Quality-Regression-Modelling"}
            proj_reg = self.admin.register_project(proj_info)
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning project')

        self.assertFalse(err_raised)
        return

    def register_dataset(self):
        ds_info = {"name" : "wine_dataset",\
                   "description": "Wine quality ratings",\
                   "source": "UCI ML Repository" }
        ds_reg = self.ap.register_dataset(ds_info)
        return

    def lookup_dataset(self):
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        return

    def lookup_featureset(self):
        fs_reg = self.ap.lookup_featureset("wine_no_transformations")
        return

    def register_model(self):

        model_info = {"name": "elastic_net_wine_model", \
                  "type": "elastic net regression"}
        model_reg = self.ap.register_model(model_info)
        return

    def lookup_model(self):

        model_reg = self.ap.lookup_model("elastic_net_wine_model")
        return

    def log_run(self):

        ds_reg = self.ap.lookup_dataset("wine_dataset")
        fs_reg = self.ap.lookup_featureset("wine_no_transformations")
        model_reg = self.ap.lookup_model("elastic_net_wine_model")
        model_params = { "l1_ratio": 0.1, "alpha": 0.2,\
                        "run_id": "0ef73d9edf08487793c77a1742f4033e"}
        model_perf = { "rmse": 0.7836984021909766, "r2": 0.20673590971167466,\
                       "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e",\
                       "timestamp": "2019-06-06 12:52:11.190048"}
        run_info = {"dataset" : ds_reg["_key"],\
                       "featureset": fs_reg["_key"],\
                       "run_id": "0ef73d9edf08487793c77a1742f4033e",\
                       "model": model_reg["_key"],\
                       "model-params": model_params,\
                       "model-perf": model_perf,\
                       "pipeline" : "Wine-Regression-Pipeline",\
                       "project": "Wine-Quality-Assessment",\
                       "deployment_tag": "Wine_Elastic_Net_Regression",\
                       "tag": "wine regression model test 1"}
        self.ap.log_run(run_info)
        return

    def provision_deployment(self):

        ret = self.admin.register_deployment("Wine_Elastic_Net_Regression")

        return

    def register_featureset(self):

        fs_info = {"fixed acidity": "float64",\
                   "volatile acidity": "float64",\
                   "citric acid": "float64",\
                   "residual sugar": "float64",\
                   "chlorides": "float64",\
                   "free sulfur dioxide": "float64",\
                   "total sulfur dioxide": "float64",\
                   "density": "float64",\
                   "pH": "float64",\
                   "sulphates": "float64",\
                   "alcohol": "float64",\
                   "quality": "int64",\
                   "name": "wine_no_transformations"
                   }
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"])
        return

    def log_servingperf(self):
        to_date = datetime.datetime.now()
        from_date = to_date - datetime.timedelta(days=30)
        ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,\
                          "from_date": str(from_date), "to_date": str(to_date)}
        dep_tag = "Wine_Elastic_Net_Regression"
        user_id = "prvileged user"
        ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id)

        return

    def dataset_shift_positive(self):
        ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               "cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df.query("lat > -119")
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print("Detaset shift score : ", score)

        return score

    def dataset_shift_negative(self):
        ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               "cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df1.copy()
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print("Detaset shift score : ", score)

        return score

    def torch_test(self):
        print("Running test for pytorch...")
        run_driver()
        print("Pytorch test completed!")
        return

    def test_torch_workflow(self):
        err_raised = False
        try:
            self.torch_test()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning project')

        self.assertFalse(err_raised)

        return

    def vertex_add_to_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_1')

        return

    def test_arangopipe_vertex_add(self):
        self.vertex_add_to_arangopipe()
        self.assertTrue(self.admin.has_vertex('test_vertex_1'))

        return

    def vertex_remove_from_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_t1')
        self.admin.remove_vertex_from_arangopipe('test_vertex_t1', purge=True)

        return

    def test_arangopipe_vertex_remove(self):
        self.vertex_remove_from_arangopipe()
        self.assertFalse(self.admin.has_vertex('test_vertex_t1'))

        return

    def test_register_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering dataset')
        self.assertFalse(err_raised)
        return

    def test_lookup_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.lookup_dataset()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while looking up dataset')
        self.assertFalse(err_raised)
        return

    def test_register_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return

    def test_lookup_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.lookup_featureset()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return

    def test_register_model(self):
        err_raised = False
        try:
            self.register_model()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering model')
        self.assertFalse(err_raised)
        return

    def test_lookup_model(self):
        err_raised = False
        try:
            self.register_model()
            self.lookup_model()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while looking up model')
        self.assertFalse(err_raised)
        return

    def test_log_run(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while logging performance')
        self.assertFalse(err_raised)
        return

    def test_provision_deployment(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()

        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning deployment')
        self.assertFalse(err_raised)
        return

    def test_log_serving_performance(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()
            self.log_servingperf()

        except:
            err_raised = True
            print('-' * 60)
            traceback.print_exc(file=sys.stdout)
            print('-' * 60)
            self.assertTrue(err_raised,\
                            'Exception raised while logging serving performance')
        self.assertFalse(err_raised)
        return

    def test_dataset_shift_positive(self):

        score = self.dataset_shift_positive()

        self.assertTrue(score > 0.8)
        return

    def test_dataset_shift_negative(self):

        score = self.dataset_shift_negative()

        self.assertTrue(score < 0.6)
        return

    def add_edge_to_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_s')
        self.admin.add_vertex_to_arangopipe('test_vertex_d')
        self.admin.add_edge_definition_to_arangopipe('test_col', 'test_edge',\
                                                     'test_vertex_s', 'test_vertex_d')
        return

    def test_arangopipe_edge_add(self):
        self.add_edge_to_arangopipe()
        self.assertTrue(self.admin.has_edge('test_col'))

        return

    def remove_edge_from_arangopipe(self):
        self.admin.add_vertex_to_arangopipe('test_vertex_s1')
        self.admin.add_vertex_to_arangopipe('test_vertex_d1')
        self.admin.add_edge_definition_to_arangopipe('test_col', 'test_edge_1',\
                                                     'test_vertex_s1', 'test_vertex_d1')
        self.admin.remove_edge_definition_from_arangopipe('test_edge_1',
                                                          purge=True)

        return

    def test_arangopipe_edge_remove(self):
        self.remove_edge_from_arangopipe()
        self.assertFalse(self.admin.has_edge('test_edge_1'))

        return

    def add_vertex_node(self):
        ni = None
        self.admin.add_vertex_to_arangopipe('test_vertex_s2')
        sd = {'name': "sample doc"}
        ni = self.ap.insert_into_vertex_type('test_vertex_s2', sd)

        return ni

    def test_arangopipe_vertex_node_add(self):
        ni = self.add_vertex_node()
        self.assertIsNotNone(ni)
        return

    def add_edge_link(self):
        ei = None
        self.admin.add_vertex_to_arangopipe('test_vertex_s3')
        self.admin.add_vertex_to_arangopipe('test_vertex_s4')
        sd = {'name': "sample doc"}
        v1 = self.ap.insert_into_vertex_type('test_vertex_s3', sd)
        v2 = self.ap.insert_into_vertex_type('test_vertex_s4', sd)
        self.admin.add_edge_definition_to_arangopipe('test_col','test_edge',\
                                                'test_vertex_s3', 'test_vertex_s4')
        ei = self.ap.insert_into_edge_type('test_col', v1, v2)

        return ei

    def test_arangopipe_edge_link_add(self):
        ei = self.add_edge_link()
        self.assertIsNotNone(ei)
        return
예제 #11
0
    conn_config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    conn_params = { msc.DB_SERVICE_HOST : "localhost", \
                    msc.DB_SERVICE_END_POINT : "apmdb",\
                    msc.DB_SERVICE_NAME : "createDB",\
                    msc.DB_SERVICE_PORT : 8529,
                    msc.DB_CONN_PROTOCOL : 'http'}

    conn_config = conn_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    proj_reg = admin.register_project(proj_info)

    warnings.filterwarnings("ignore")
    np.random.seed(40)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "wine-quality.csv")
    data = pd.read_csv(wine_path)
    ds_info = {"name" : "wine dataset",\
                   "description": "Wine quality ratings","source": "UCI ML Repository" }
    ds_reg = ap.register_dataset(ds_info)
    featureset = data.dtypes.to_dict()
    featureset = {k: str(featureset[k]) for k in featureset}
    featureset["name"] = "wine_no_transformations"
    fs_reg = ap.register_featureset(featureset, ds_reg["_key"])
    model_info = {
        "name": "elastic_net_wine_model",
        "type": "elastic net regression"