Example #1
0
 def setUp(self):
     self.config = ArangoPipeConfig()
     self.config.set_dbconnection(hostname = "localhost", port = 8529,\
                             root_user = "******",\
                             root_user_password = "******")
     self.admin = ArangoPipeAdmin(config = self.config)
     self.ap = ArangoPipe(config = self.config)
     self.provision_project()
def run_tests():
    try:
        config = ArangoPipeConfig()
        config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******", root_user_password = "******")
        clean(config)
        test_provision_project(config)
        test_dataset_registration(config)
        test_dataset_lookup(config)
        test_featureset_registration(config)
        test_featureset_lookup(config)
        test_model_registration(config)
        test_model_lookup(config)
        test_log_run(config)
        test_provision_deployment(config)
        test_log_servingperf(config)

    except:
        print("Failures encountered, exceptions occured")
        raise
    print("Arangopipe test cases verified, tests successfull!")
    return
Example #3
0
class TestArangopipe(unittest.TestCase):
        
    def setUp(self):
        self.config = ArangoPipeConfig()
        self.config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******",\
                                root_user_password = "******")
        self.admin = ArangoPipeAdmin(config = self.config)
        self.ap = ArangoPipe(config = self.config)
        self.provision_project()

    def provision_project(self):
        err_raised = False
        try:
            proj_info = {"name": "Wine-Quality-Regression-Modelling"}
            proj_reg = self.admin.register_project(proj_info)
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning project')
        
        self.assertFalse(err_raised)
        return
    
    def register_dataset(self):
        ds_info = {"name" : "wine_dataset",\
                   "description": "Wine quality ratings",\
                   "source": "UCI ML Repository" }
        ds_reg = self.ap.register_dataset(ds_info)
        return
    
    def lookup_dataset(self):
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        return
    
    def lookup_featureset(self):
        fs_reg = self.ap.lookup_featureset("wine_no_transformations")
        return
    
    def register_model(self):
   
        model_info = {"name": "elastic_net_wine_model", \
                  "type": "elastic net regression"}
        model_reg = self.ap.register_model(model_info)
        return
    
    def lookup_model(self):
        
        model_reg = self.ap.lookup_model("elastic_net_wine_model")
        return
    
    def log_run(self):

         ds_reg = self.ap.lookup_dataset("wine_dataset")
         fs_reg = self.ap.lookup_featureset("wine_no_transformations")
         model_reg = self.ap.lookup_model("elastic_net_wine_model")
         model_params = { "l1_ratio": 0.1, "alpha": 0.2,\
                         "run_id": "0ef73d9edf08487793c77a1742f4033e"}
         model_perf = { "rmse": 0.7836984021909766, "r2": 0.20673590971167466,\
                        "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e",\
                        "timestamp": "2019-06-06 12:52:11.190048"}
         run_info = {"dataset" : ds_reg["_key"],\
                        "featureset": fs_reg["_key"],\
                        "run_id": "0ef73d9edf08487793c77a1742f4033e",\
                        "model": model_reg["_key"],\
                        "model-params": model_params,\
                        "model-perf": model_perf,\
                        "pipeline" : "Wine-Regression-Pipeline",\
                        "project": "Wine-Quality-Assessment",\
                        "deployment_tag": "Wine_Elastic_Net_Regression",\
                        "tag": "wine regression model test 1"}
         self.ap.log_run(run_info)
         return
    
    def provision_deployment(self):
    
        ret = self.admin.register_deployment("Wine_Elastic_Net_Regression")
    
        return
        
    
    def register_featureset(self):
        
        fs_info = {"fixed acidity": "float64",\
                   "volatile acidity": "float64",\
                   "citric acid": "float64",\
                   "residual sugar": "float64",\
                   "chlorides": "float64",\
                   "free sulfur dioxide": "float64",\
                   "total sulfur dioxide": "float64",\
                   "density": "float64",\
                   "pH": "float64",\
                   "sulphates": "float64",\
                   "alcohol": "float64",\
                   "quality": "int64",\
                   "name": "wine_no_transformations"
                   }
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"])
        return
    
    def log_servingperf(self):
        to_date = datetime.datetime.now()
        from_date = to_date - datetime.timedelta(days = 30)
        ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,\
                          "from_date": str(from_date), "to_date": str(to_date)}
        dep_tag = "Wine_Elastic_Net_Regression"
        user_id = "prvileged user"
        ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id)
       
        return
    
    def dataset_shift_positive(self):
        ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "covariate_shift/cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df.query("lat > -119")
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print ("Detaset shift score : ", score)
        
        return score

    def dataset_shift_negative(self):
        ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "covariate_shift/cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df1.copy()
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print ("Detaset shift score : ", score)
    
        return score
    
    def test_register_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering dataset')
        self.assertFalse(err_raised)
        return
    
    def test_lookup_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.lookup_dataset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while looking up dataset')
        self.assertFalse(err_raised)
        return
    
    def test_register_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return
    
    def test_lookup_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.lookup_featureset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return
    
    
    
    def test_register_model(self):
        err_raised = False
        try:
            self.register_model()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering model')
        self.assertFalse(err_raised)
        return
    
    def test_lookup_model(self):
        err_raised = False
        try:
            self.register_model()
            self.lookup_model()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while looking up model')
        self.assertFalse(err_raised)
        return
    
    def test_log_run(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while logging performance')
        self.assertFalse(err_raised)
        return
    
    def test_provision_deployment(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()
        
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning deployment')
        self.assertFalse(err_raised)
        return
    
    def test_log_serving_performance(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()
            self.log_servingperf()
        
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while logging serving performance')
        self.assertFalse(err_raised)
        return
    
    def test_dataset_shift_positive(self):
        
        score = self.dataset_shift_positive()
        
        self.assertTrue(score > 0.8)
        return
    
    def test_dataset_shift_negative(self):

        score = self.dataset_shift_negative()
        

        self.assertTrue(score < 0.6)
        return
        

    
    def tearDown(self):
        #pass
        self.admin.delete_arangomldb()
        self.ap = None
        self.admin = None
        return
Example #4
0
import datetime
from arangopipe.arangopipe_admin_api import ArangoPipeAdmin
from arangopipe.arangopipe_config import ArangoPipeConfig


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    np.random.seed(40)
    conn_config = ArangoPipeConfig()
    conn_config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******", root_user_password = "******")
    ap = ArangoPipe(config=conn_config)
    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "wine-quality.csv")
    data = pd.read_csv(wine_path)

    ds_reg = ap.lookup_dataset("wine dataset")
    fs_reg = ap.lookup_featureset("wine_no_transformations")

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
Example #5
0
 def get_config(self):
     apc = ArangoPipeConfig()
     return apc.get_cfg()
def generate_runs(clean=False):
    conn_config = ArangoPipeConfig()
    conn_config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******", root_user_password = "******")
    admin = ArangoPipeAdmin(config=conn_config)
    ap = ArangoPipe(config=conn_config)

    if clean:
        admin.delete_arangomldb()
        admin.create_db()
        admin.create_enterprise_ml_graph()

    proj_info = {"name": "Home_Value_Assessor"}
    proj_reg = admin.register_project(proj_info)

    period = period_string_generator()
    ds_info = {"description": "Housing Price Data"}
    featureset = data.dtypes.to_dict()
    featureset = {k: str(featureset[k]) for k in featureset}
    count = 1

    for data_tuple in dataset_generator():
        logger.info("Processing Dataset:" + str(count))
        count = count + 1
        aperiod = next(period)
        X_train = data_tuple[0]
        X_test = data_tuple[1]
        y_train = data_tuple[2]
        y_test = data_tuple[3]
        X_val = data_tuple[4]
        y_val = data_tuple[5]
        alpha_random = np.random.uniform(0.0005, 0.001)
        lrm = linear_model.Lasso(alpha=alpha_random)
        lrm.fit(X_train, y_train)
        predicted_val = lrm.predict(X_val)
        (rmse, mae, r2) = eval_metrics(y_val, predicted_val)
        ruuid = uuid.uuid4()
        model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \
                      "timestamp": str(dt.datetime.now())}
        serving_pred = lrm.predict(X_test)
        (rmse, mae, r2) = eval_metrics(y_test, serving_pred)
        ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\
                      "period" : aperiod}
        deployment_tag = "Deployment_HPE_" + aperiod
        dataset_tag = "Housing_Dataset_" + aperiod
        pipeline_tag = "Pipeline_HPE" + aperiod
        feature_pipeline_tag = "Feature Pipeline HPE" + aperiod
        ds_info["name"] = dataset_tag
        ds_info["tag"] = dataset_tag
        ds_info["source"] = "Housing Price Operational Data Store"
        featureset["generated_by"] = feature_pipeline_tag
        featureset["name"] = "log_transformed_house_value"
        featureset["tag"] = dataset_tag

        ds_reg = ap.register_dataset(ds_info)
        fs_reg = ap.register_featureset(featureset, ds_reg["_key"])
        model_tag = "model_period:" + aperiod
        model_info = {"name": "Housing Regression Model",\
                      "type": "LASSO regression", "tag": model_tag}
        model_reg = ap.register_model(model_info,
                                      project="Home_Value_Assessor")
        model_params = {"alpha": alpha_random, "run_id": str(ruuid)}
        run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": str(ruuid),\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "pipeline" : pipeline_tag,\
                    "project": "Housing Price Assessor",
                    "tag_for_deployment": True,\
                    "deployment_tag": deployment_tag}
        ap.log_run(run_info)
        admin.register_deployment(deployment_tag)
        user_id = "Arangopipe Test Data Generator"
        ap.log_serving_perf(ex_servingperf, deployment_tag, user_id)

    return
Example #7
0
def run_driver():

    params = {'batch_size': 128, 'shuffle': True, 'num_workers': 6}
    trng_dataset = CH_Dataset()
    test_dataset = CH_Dataset(train=False)
    training_generator = data.DataLoader(trng_dataset, **params)
    test_generator = data.DataLoader(test_dataset, **params)
    input_size = trng_dataset.input_size
    output_size = trng_dataset.output_size

    m = CH_LinearRegression(inputSize=input_size, outputSize=output_size)
    cost_func = nn.MSELoss()
    learning_rate = 0.1
    optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate)
    all_losses = []
    test_pred_list = []
    test_acts_list = []
    num_epochs = 100
    loss_sched = {}
    for e in range(num_epochs):
        batch_losses = []
        for ix, (Xb, yb) in enumerate(training_generator):
            _X = Variable(Xb).float()

            _y = Variable(yb).float()
            #==========Forward pass===============
            preds = m(_X)
            preds = torch.flatten(preds)
            loss = cost_func(preds, _y)

            #==========backward pass==============

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())
            all_losses.append(loss.item())

        mbl = sqrt(np.mean(batch_losses))

        if e % 5 == 0:
            print("training loss: " + str(mbl))
            loss_sched[e] = mbl

    # prepares model for inference when trained with a dropout layer
#    print(m.training)
#    m.eval()
#    print(m.training)

    test_batch_losses = []
    test_pred_list = []
    test_acts_list = []
    for _X, _y in test_generator:

        _X = Variable(_X).float()
        _y = Variable(_y).float()

        #apply model
        test_preds = m(_X)
        test_preds = torch.flatten(test_preds)
        test_loss = cost_func(test_preds, _y)
        test_pred_list.extend(test_preds.detach().numpy().ravel())
        test_acts_list.extend(_y.numpy().ravel())

        test_batch_losses.append(test_loss.item())
    # print("Batch loss: {}".format(test_loss.item()))

    tmbl = sqrt(np.mean(test_batch_losses))
    print("test loss: " + str(tmbl))

    # Store experiment results in Arangopipe
    conn_config = ArangoPipeConfig()
    conn_config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******", root_user_password = "******")
    proj_info = {"name": "Housing_Price_Estimation_Project"}
    admin = ArangoPipeAdmin(config=conn_config)
    proj_reg = admin.register_project(proj_info)
    ap = ArangoPipe(conn_config)
    ruuid = str(uuid.uuid4().int)
    model_name = "pytorch-linear-reg" + "_dev_run_" + ruuid
    model_info = {"name": model_name, "type": "model-development"}
    model_reg = ap.register_model(model_info,
                                  project="Housing_Price_Estimation_Project")
    ds_info = trng_dataset.get_dataset()
    ds_reg = ap.register_dataset(ds_info)
    fs = trng_dataset.get_featureset()
    fs_reg = ap.register_featureset(fs, ds_reg["_key"])

    model_params = {"optimizer": "Adam", "training_epochs": 100,\
                    "batch_size": 128, "learning_rate": learning_rate,\
                    "run_id": ruuid}
    model_perf = {"training_loss_schedule": jsonpickle.encode(loss_sched),\
                  "run_id": ruuid, "timestamp":    str(datetime.datetime.now())}
    run_tag = "Housing-Price-Pytorch-Experiment" + "_dev_run_" + ruuid
    run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": ruuid,\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "tag": run_tag,\
                    "project": "Housing Price Estimation Project"}
    ap.log_run(run_info)
    mp = ap.lookup_modelperf(run_tag)
    print(
        "A look up of the loss schedule for this experiment in Arangopipe yields:"
    )
    print(str(mp["training_loss_schedule"]))

    return