Ejemplo n.º 1
0
def clean(config):
    admin = ArangoPipeAdmin(config=config)
    admin.delete_arangomldb()
    return
Ejemplo n.º 2
0
class TestArangopipe(unittest.TestCase):
        
    def setUp(self):
        self.config = ArangoPipeConfig()
        self.config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******",\
                                root_user_password = "******")
        self.admin = ArangoPipeAdmin(config = self.config)
        self.ap = ArangoPipe(config = self.config)
        self.provision_project()

    def provision_project(self):
        err_raised = False
        try:
            proj_info = {"name": "Wine-Quality-Regression-Modelling"}
            proj_reg = self.admin.register_project(proj_info)
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning project')
        
        self.assertFalse(err_raised)
        return
    
    def register_dataset(self):
        ds_info = {"name" : "wine_dataset",\
                   "description": "Wine quality ratings",\
                   "source": "UCI ML Repository" }
        ds_reg = self.ap.register_dataset(ds_info)
        return
    
    def lookup_dataset(self):
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        return
    
    def lookup_featureset(self):
        fs_reg = self.ap.lookup_featureset("wine_no_transformations")
        return
    
    def register_model(self):
   
        model_info = {"name": "elastic_net_wine_model", \
                  "type": "elastic net regression"}
        model_reg = self.ap.register_model(model_info)
        return
    
    def lookup_model(self):
        
        model_reg = self.ap.lookup_model("elastic_net_wine_model")
        return
    
    def log_run(self):

         ds_reg = self.ap.lookup_dataset("wine_dataset")
         fs_reg = self.ap.lookup_featureset("wine_no_transformations")
         model_reg = self.ap.lookup_model("elastic_net_wine_model")
         model_params = { "l1_ratio": 0.1, "alpha": 0.2,\
                         "run_id": "0ef73d9edf08487793c77a1742f4033e"}
         model_perf = { "rmse": 0.7836984021909766, "r2": 0.20673590971167466,\
                        "mae": 0.6142020452688988, "run_id": "0ef73d9edf08487793c77a1742f4033e",\
                        "timestamp": "2019-06-06 12:52:11.190048"}
         run_info = {"dataset" : ds_reg["_key"],\
                        "featureset": fs_reg["_key"],\
                        "run_id": "0ef73d9edf08487793c77a1742f4033e",\
                        "model": model_reg["_key"],\
                        "model-params": model_params,\
                        "model-perf": model_perf,\
                        "pipeline" : "Wine-Regression-Pipeline",\
                        "project": "Wine-Quality-Assessment",\
                        "deployment_tag": "Wine_Elastic_Net_Regression",\
                        "tag": "wine regression model test 1"}
         self.ap.log_run(run_info)
         return
    
    def provision_deployment(self):
    
        ret = self.admin.register_deployment("Wine_Elastic_Net_Regression")
    
        return
        
    
    def register_featureset(self):
        
        fs_info = {"fixed acidity": "float64",\
                   "volatile acidity": "float64",\
                   "citric acid": "float64",\
                   "residual sugar": "float64",\
                   "chlorides": "float64",\
                   "free sulfur dioxide": "float64",\
                   "total sulfur dioxide": "float64",\
                   "density": "float64",\
                   "pH": "float64",\
                   "sulphates": "float64",\
                   "alcohol": "float64",\
                   "quality": "int64",\
                   "name": "wine_no_transformations"
                   }
        ds_reg = self.ap.lookup_dataset("wine_dataset")
        fs_reg = self.ap.register_featureset(fs_info, ds_reg["_key"])
        return
    
    def log_servingperf(self):
        to_date = datetime.datetime.now()
        from_date = to_date - datetime.timedelta(days = 30)
        ex_servingperf = {"rmse": 0.822242, "r2": 0.12678, "mae": 0.62787,\
                          "from_date": str(from_date), "to_date": str(to_date)}
        dep_tag = "Wine_Elastic_Net_Regression"
        user_id = "prvileged user"
        ret = self.ap.log_serving_perf(ex_servingperf, dep_tag, user_id)
       
        return
    
    def dataset_shift_positive(self):
        ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "covariate_shift/cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df.query("lat > -119")
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print ("Detaset shift score : ", score)
        
        return score

    def dataset_shift_negative(self):
        ds_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "covariate_shift/cal_housing.csv")
        df = pd.read_csv(ds_path)
        req_cols = df.columns.tolist()
        df = df[req_cols]
        df1 = df.query("lat <= -119")
        df2 = df1.copy()
        rfd = RF_DatasetShiftDetector()
        score = rfd.detect_dataset_shift(df1, df2)
        print ("Detaset shift score : ", score)
    
        return score
    
    def test_register_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering dataset')
        self.assertFalse(err_raised)
        return
    
    def test_lookup_dataset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.lookup_dataset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while looking up dataset')
        self.assertFalse(err_raised)
        return
    
    def test_register_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return
    
    def test_lookup_featureset(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.lookup_featureset()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering featureset')
        self.assertFalse(err_raised)
        return
    
    
    
    def test_register_model(self):
        err_raised = False
        try:
            self.register_model()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while registering model')
        self.assertFalse(err_raised)
        return
    
    def test_lookup_model(self):
        err_raised = False
        try:
            self.register_model()
            self.lookup_model()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while looking up model')
        self.assertFalse(err_raised)
        return
    
    def test_log_run(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while logging performance')
        self.assertFalse(err_raised)
        return
    
    def test_provision_deployment(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()
        
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while provisioning deployment')
        self.assertFalse(err_raised)
        return
    
    def test_log_serving_performance(self):
        err_raised = False
        try:
            self.register_dataset()
            self.register_featureset()
            self.register_model()
            self.log_run()
            self.provision_deployment()
            self.log_servingperf()
        
        except:
            err_raised = True
            print ('-'*60)
            traceback.print_exc(file=sys.stdout)
            print ('-'*60)
            self.assertTrue(err_raised,\
                            'Exception raised while logging serving performance')
        self.assertFalse(err_raised)
        return
    
    def test_dataset_shift_positive(self):
        
        score = self.dataset_shift_positive()
        
        self.assertTrue(score > 0.8)
        return
    
    def test_dataset_shift_negative(self):

        score = self.dataset_shift_negative()
        

        self.assertTrue(score < 0.6)
        return
        

    
    def tearDown(self):
        #pass
        self.admin.delete_arangomldb()
        self.ap = None
        self.admin = None
        return
Ejemplo n.º 3
0
def generate_runs(clean=False):
    conn_config = ArangoPipeConfig()
    conn_config.set_dbconnection(hostname = "localhost", port = 8529,\
                                root_user = "******", root_user_password = "******")
    admin = ArangoPipeAdmin(config=conn_config)
    ap = ArangoPipe(config=conn_config)

    if clean:
        admin.delete_arangomldb()
        admin.create_db()
        admin.create_enterprise_ml_graph()

    proj_info = {"name": "Home_Value_Assessor"}
    proj_reg = admin.register_project(proj_info)

    period = period_string_generator()
    ds_info = {"description": "Housing Price Data"}
    featureset = data.dtypes.to_dict()
    featureset = {k: str(featureset[k]) for k in featureset}
    count = 1

    for data_tuple in dataset_generator():
        logger.info("Processing Dataset:" + str(count))
        count = count + 1
        aperiod = next(period)
        X_train = data_tuple[0]
        X_test = data_tuple[1]
        y_train = data_tuple[2]
        y_test = data_tuple[3]
        X_val = data_tuple[4]
        y_val = data_tuple[5]
        alpha_random = np.random.uniform(0.0005, 0.001)
        lrm = linear_model.Lasso(alpha=alpha_random)
        lrm.fit(X_train, y_train)
        predicted_val = lrm.predict(X_val)
        (rmse, mae, r2) = eval_metrics(y_val, predicted_val)
        ruuid = uuid.uuid4()
        model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \
                      "timestamp": str(dt.datetime.now())}
        serving_pred = lrm.predict(X_test)
        (rmse, mae, r2) = eval_metrics(y_test, serving_pred)
        ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\
                      "period" : aperiod}
        deployment_tag = "Deployment_HPE_" + aperiod
        dataset_tag = "Housing_Dataset_" + aperiod
        pipeline_tag = "Pipeline_HPE" + aperiod
        feature_pipeline_tag = "Feature Pipeline HPE" + aperiod
        ds_info["name"] = dataset_tag
        ds_info["tag"] = dataset_tag
        ds_info["source"] = "Housing Price Operational Data Store"
        featureset["generated_by"] = feature_pipeline_tag
        featureset["name"] = "log_transformed_house_value"
        featureset["tag"] = dataset_tag

        ds_reg = ap.register_dataset(ds_info)
        fs_reg = ap.register_featureset(featureset, ds_reg["_key"])
        model_tag = "model_period:" + aperiod
        model_info = {"name": "Housing Regression Model",\
                      "type": "LASSO regression", "tag": model_tag}
        model_reg = ap.register_model(model_info,
                                      project="Home_Value_Assessor")
        model_params = {"alpha": alpha_random, "run_id": str(ruuid)}
        run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": str(ruuid),\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "pipeline" : pipeline_tag,\
                    "project": "Housing Price Assessor",
                    "tag_for_deployment": True,\
                    "deployment_tag": deployment_tag}
        ap.log_run(run_info)
        admin.register_deployment(deployment_tag)
        user_id = "Arangopipe Test Data Generator"
        ap.log_serving_perf(ex_servingperf, deployment_tag, user_id)

    return