예제 #1
0
    def test_using_deleted_database(self):
        err_raised = False
        print("Running the test using a stale connection... ")
        self.delete_arangopipe_db()
        new_admin = ArangoPipeAdmin(reuse_connection=True)
        ap_config = new_admin.get_config()

        try:
            ap = ArangoPipe(config=ap_config)
        except Exception:
            print("Stale connection identified...")
            print("Using a new connection...")
            mscp = ManagedServiceConnParam()
            conn_config = ArangoPipeConfig()
            conn_params = { mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                        mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                        mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                        mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                        mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}
            conn_config = conn_config.create_connection_config(conn_params)
            admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
            ap_config = admin.get_config()
            ap = ArangoPipe(config=ap_config)

        print("Using new connection to look up a non existent dataset...")
        ap.lookup_dataset("non existent dataset")
        self.assertFalse(err_raised)

        return
예제 #2
0
def verify_install():
    #mshost: "5366b66b7d19.arangodb.cloud"
    config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    conn_params = { msc.DB_SERVICE_HOST : "d874fc3f1fa5.arangodb.cloud", \
                    msc.DB_SERVICE_END_POINT : "apmdb",\
                    msc.DB_SERVICE_NAME : "createDB",\
                    msc.DB_SERVICE_PORT : 8529,\
                    msc.DB_CONN_PROTOCOL : 'https',\
                    msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}

    config = config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    ap.lookup_dataset("non existent dataset")
    try:
        proj_info = {"name": "Wine-Quality-Regression-Modelling"}
        proj_reg = admin.register_project(proj_info)
    except:
        print('-' * 60)
        traceback.print_exc(file=sys.stdout)
        print('-' * 60)

    print("Installation of Arangopipe API verified !")

    return
예제 #3
0
    def setUp(self):

        #mshost: "5366b66b7d19.arangodb.cloud"
        self.delete_users()
        self.delete_arangopipe_db()
        conn_config = ArangoPipeConfig()

        self.test_cfg = self.get_test_config()

        conn_params = { self.mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                        self.mscp.DB_USER_NAME : self.test_cfg['arangodb'][self.mscp.DB_USER_NAME],\
                        self.mscp.DB_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_PASSWORD],\
                        self.mscp.DB_NAME : self.test_cfg['arangodb'][self.mscp.DB_NAME], \
                        self.mscp.DB_ROOT_USER : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER],\
                        self.mscp.DB_ROOT_USER_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER_PASSWORD],\
                        self.mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                        self.mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                        self.mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                        self.mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}

        conn_config = conn_config.create_connection_config(conn_params)
        self.admin = ArangoPipeAdmin(reuse_connection = False,\
                                     config = conn_config, persist_conn= False)

        return
예제 #4
0
def arango_pipe_connections(conn_params, reuse_prev_connection=True):
    mdb_config = ArangoPipeConfig()
    mdb_config = mdb_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=reuse_prev_connection,
                            config=mdb_config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    yield admin, ap
    def setUp(self):
        conn_config = ArangoPipeConfig()
        conn_params = { self.mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                #self.mscp.DB_ROOT_USER : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER],\
                #self.mscp.DB_ROOT_USER_PASSWORD : self.test_cfg['arangodb'][self.mscp.DB_ROOT_USER_PASSWORD],\
                self.mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                self.mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                self.mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                self.mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}

        conn_config = conn_config.create_connection_config(conn_params)
        self.admin = ArangoPipeAdmin(reuse_connection=False,
                                     config=conn_config)
        the_config = self.admin.get_config()
        self.ap = ArangoPipe(config=the_config)
        self.provision_project()

        return
예제 #6
0
 def setUp(self):
     #mshost: "5366b66b7d19.arangodb.cloud"
     config = ArangoPipeConfig()
     msc = ManagedServiceConnParam()
     conn_params = { msc.DB_SERVICE_HOST : self.test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
                     msc.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
                     msc.DB_SERVICE_NAME : self.test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
                     msc.DB_SERVICE_PORT : self.test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
                     msc.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][msc.DB_CONN_PROTOCOL],\
                     msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}
     
     config = config.create_connection_config(conn_params)
     self.config = config
     self.admin = ArangoPipeAdmin(reuse_connection = False,\
                                  config= self.config, persist_conn= False)
     ap_config = self.admin.get_config()
     self.ap = ArangoPipe(config = ap_config)
     self.provision_project()
     return
예제 #7
0
    def create_db(self, db_srv_host, db_srv_port, db_serv_name,\
                  db_end_point, db_dbName, db_user_name, db_password,\
                  db_conn_protocol):

        host_connection = db_conn_protocol + "://" + db_srv_host + ":" + str(
            db_srv_port)
        client = ArangoClient(hosts=host_connection)
        logger.debug("Connection reuse: " + str(self.reuse_connection))
        if not self.reuse_connection:
            API_ENDPOINT = host_connection + "/_db/_system/" + db_end_point + \
                            "/" + db_serv_name
            print("API endpoint: " + API_ENDPOINT)

            if db_dbName:
                logger.info("DB name preferrence: " + str(db_dbName))
            if db_user_name:
                logger.info("DB user name preferrence: " + str(db_user_name))
            if db_password:
                logger.info(
                    "Password preference for managed connection was indicated !"
                )

            api_data = {self.mscp.DB_NAME : db_dbName,\
                        self.mscp.DB_USER_NAME: db_user_name,\
                        self.mscp.DB_PASSWORD: db_password }
            logger.info("Requesting a managed service database...")

            r = requests.post(url=API_ENDPOINT, json=api_data, verify=False)

            if r.status_code == 409 or r.status_code == 400:
                logger.error(
                    "It appears that you are attempting to connecting using \
                             existing connection information. So either set reconnect = True when you create ArangoPipeAdmin or recreate a connection config and try again!"
                )
                return

            assert r.status_code == 200, \
            "Managed DB endpoint is unavailable !, reason: " + r.reason + " err code: " +\
            str(r.status_code)
            result = json.loads(r.text)
            logger.info("Managed service database was created !")
            ms_dbName = result['dbName']
            ms_user_name = result['username']
            ms_password = result['password']
            self.cfg['arangodb'][self.mscp.DB_NAME] = ms_dbName
            self.cfg['arangodb'][self.mscp.DB_USER_NAME] = ms_user_name
            self.cfg['arangodb'][self.mscp.DB_PASSWORD] = ms_password
            self.cfg['arangodb'][self.mscp.DB_SERVICE_HOST] = db_srv_host
            self.cfg['arangodb'][self.mscp.DB_SERVICE_NAME] = db_serv_name
            self.cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT] = db_end_point
            self.cfg['arangodb'][self.mscp.DB_SERVICE_PORT] = db_srv_port
            self.cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL] = db_conn_protocol

        else:
            if self.use_supp_config_to_reconnect:
                ms_dbName = self.cfg['arangodb'][self.mscp.DB_NAME]
                ms_user_name = self.cfg['arangodb'][self.mscp.DB_USER_NAME]
                ms_password = self.cfg['arangodb'][self.mscp.DB_PASSWORD]

            else:
                disk_cfg = ArangoPipeConfig()
                pcfg = disk_cfg.get_cfg()  # persisted config values
                ms_dbName = pcfg['arangodb'][self.mscp.DB_NAME]
                ms_user_name = pcfg['arangodb'][self.mscp.DB_USER_NAME]
                ms_password = pcfg['arangodb'][self.mscp.DB_PASSWORD]
                self.config = disk_cfg
                self.cfg = disk_cfg.cfg
        # Connect to arangopipe database as administrative user.
        #This returns an API wrapper for "test" database.
        print("Host Connection: " + str(host_connection))
        client = ArangoClient(hosts= host_connection,\
                              http_client=CustomHTTPClient(username = ms_user_name,\
                                                           password = ms_password))

        db = client.db(ms_dbName, ms_user_name, ms_password)
        self.db = db

        return
예제 #8
0
 def create_config(self):
     apc = ArangoPipeConfig()
     return apc
예제 #9
0
def generate_runs(clean=False):
    cfg = read_data()
    mscp = ManagedServiceConnParam()

    #delete_users()
    #delete_arangopipe_db()
    conn_config = ArangoPipeConfig()

    conn_params = { mscp.DB_SERVICE_HOST : cfg['arangodb'][mscp.DB_SERVICE_HOST], \
                    mscp.DB_USER_NAME : cfg['arangodb'][mscp.DB_USER_NAME],\
                    mscp.DB_PASSWORD : cfg['arangodb'][mscp.DB_PASSWORD],\
                    mscp.DB_NAME : cfg['arangodb'][mscp.DB_NAME], \
                    mscp.DB_ROOT_USER : cfg['arangodb'][mscp.DB_ROOT_USER],\
                    mscp.DB_ROOT_USER_PASSWORD : cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD],\
                    mscp.DB_SERVICE_END_POINT : cfg['arangodb'][mscp.DB_SERVICE_END_POINT],\
                    mscp.DB_SERVICE_NAME : cfg['arangodb'][mscp.DB_SERVICE_NAME],\
                    mscp.DB_SERVICE_PORT : cfg['arangodb'][mscp.DB_SERVICE_PORT],\
                    mscp.DB_CONN_PROTOCOL : cfg['arangodb'][mscp.DB_CONN_PROTOCOL]}

    conn_config = conn_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)

    if clean:
        admin.delete_arangomldb()
        admin.create_db()
        admin.create_enterprise_ml_graph()

    proj_info = {"name": "Home_Value_Assessor"}
    proj_reg = admin.register_project(proj_info)

    period = period_string_generator()
    ds_info = {"description": "Housing Price Data"}
    featureset = data.dtypes.to_dict()
    featureset = {k: str(featureset[k]) for k in featureset}
    count = 1

    for data_tuple in dataset_generator():
        logger.info("Processing Dataset:" + str(count))
        count = count + 1
        aperiod = next(period)
        X_train = data_tuple[0]
        X_test = data_tuple[1]
        y_train = data_tuple[2]
        y_test = data_tuple[3]
        X_val = data_tuple[4]
        y_val = data_tuple[5]
        alpha_random = np.random.uniform(0.0005, 0.001)
        lrm = linear_model.Lasso(alpha=alpha_random)
        lrm.fit(X_train, y_train)
        predicted_val = lrm.predict(X_val)
        (rmse, mae, r2) = eval_metrics(y_val, predicted_val)
        ruuid = uuid.uuid4()
        model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \
                      "timestamp": str(dt.datetime.now())}
        serving_pred = lrm.predict(X_test)
        (rmse, mae, r2) = eval_metrics(y_test, serving_pred)
        ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\
                      "period" : aperiod}
        deployment_tag = "Deployment_HPE_" + aperiod
        dataset_tag = "Housing_Dataset_" + aperiod
        pipeline_tag = "Pipeline_HPE" + aperiod
        feature_pipeline_tag = "Feature Pipeline HPE" + aperiod
        ds_info["name"] = dataset_tag
        ds_info["tag"] = dataset_tag
        ds_info["source"] = "Housing Price Operational Data Store"
        featureset["generated_by"] = feature_pipeline_tag
        featureset["name"] = "log_transformed_house_value_" + str(ruuid)
        featureset["tag"] = dataset_tag

        ds_reg = ap.register_dataset(ds_info)
        fs_reg = ap.register_featureset(featureset, ds_reg["_key"])
        model_tag = "model_period:" + aperiod
        model_name = "Housing Regression Model_" + str(ruuid)
        model_info = {"name": model_name,\
                      "type": "LASSO regression", "tag": model_tag}
        model_reg = ap.register_model(model_info,
                                      project="Home_Value_Assessor")
        model_params = {"alpha": alpha_random, "run_id": str(ruuid)}
        run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": str(ruuid),\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "pipeline" : pipeline_tag,\
                    "project": "Housing Price Assessor",
                    "tag_for_deployment": True,\
                    "deployment_tag": deployment_tag}
        ap.log_run(run_info)
        admin.register_deployment(deployment_tag)
        user_id = "Arangopipe Test Data Generator"
        ap.log_serving_perf(ex_servingperf, deployment_tag, user_id)

    print("Done loading data into the test database!")

    return
예제 #10
0
def run_driver():

    params = {'batch_size': 128, 'shuffle': True, 'num_workers': 6}
    trng_dataset = CH_Dataset()
    test_dataset = CH_Dataset(train=False)
    training_generator = data.DataLoader(trng_dataset, **params)
    test_generator = data.DataLoader(test_dataset, **params)
    input_size = trng_dataset.input_size
    output_size = trng_dataset.output_size

    m = CH_LinearRegression(inputSize=input_size, outputSize=output_size)
    cost_func = nn.MSELoss()
    learning_rate = 0.1
    optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate)
    all_losses = []
    test_pred_list = []
    test_acts_list = []
    num_epochs = 100
    loss_sched = {}
    for e in range(num_epochs):
        batch_losses = []
        for ix, (Xb, yb) in enumerate(training_generator):
            _X = Variable(Xb).float()

            _y = Variable(yb).float()
            #==========Forward pass===============
            preds = m(_X)
            preds = torch.flatten(preds)
            loss = cost_func(preds, _y)

            #==========backward pass==============

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())
            all_losses.append(loss.item())

        mbl = sqrt(np.mean(batch_losses))

        if e % 5 == 0:
            print("training loss: " + str(mbl))
            loss_sched[e] = mbl

    # prepares model for inference when trained with a dropout layer


#    print(m.training)
#    m.eval()
#    print(m.training)

    test_batch_losses = []
    test_pred_list = []
    test_acts_list = []
    for _X, _y in test_generator:

        _X = Variable(_X).float()
        _y = Variable(_y).float()

        #apply model
        test_preds = m(_X)
        test_preds = torch.flatten(test_preds)
        test_loss = cost_func(test_preds, _y)
        test_pred_list.extend(test_preds.detach().numpy().ravel())
        test_acts_list.extend(_y.numpy().ravel())

        test_batch_losses.append(test_loss.item())
    # print("Batch loss: {}".format(test_loss.item()))

    tmbl = sqrt(np.mean(test_batch_losses))
    print("test loss: " + str(tmbl))

    # Store experiment results in Arangopipe
    conn_config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    test_cfg = get_test_config()
    conn_params = { msc.DB_SERVICE_HOST : test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
msc.DB_SERVICE_END_POINT : test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
msc.DB_SERVICE_NAME : test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
msc.DB_SERVICE_PORT : test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
msc.DB_CONN_PROTOCOL : test_cfg['arangodb'][msc.DB_CONN_PROTOCOL]}
    #    conn_params = { msc.DB_SERVICE_HOST : "localhost", \
    #                        msc.DB_SERVICE_END_POINT : "apmdb",\
    #                        msc.DB_SERVICE_NAME : "createDB",\
    #                        msc.DB_SERVICE_PORT : 8529,\
    #                        msc.DB_CONN_PROTOCOL : 'http',\
    #                        msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}

    conn_config = conn_config.create_connection_config(conn_params)
    proj_info = {"name": "Housing_Price_Estimation_Project"}
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    proj_reg = admin.register_project(proj_info)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    ruuid = str(uuid.uuid4().int)
    model_name = "pytorch-linear-reg" + "_dev_run_" + ruuid
    model_info = {"name": model_name, "type": "model-development"}
    model_reg = ap.register_model(model_info,
                                  project="Housing_Price_Estimation_Project")
    ds_info = trng_dataset.get_dataset()
    ds_reg = ap.register_dataset(ds_info)
    fs = trng_dataset.get_featureset()
    fs_reg = ap.register_featureset(fs, ds_reg["_key"])

    model_params = {"optimizer": "Adam", "training_epochs": 100,\
                    "batch_size": 128, "learning_rate": learning_rate,\
                    "run_id": ruuid}
    model_perf = {"training_loss_schedule": jsonpickle.encode(loss_sched),\
                  "run_id": ruuid, "timestamp":    str(datetime.datetime.now())}
    run_tag = "Housing-Price-Pytorch-Experiment" + "_dev_run_" + ruuid
    run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": ruuid,\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "tag": run_tag,\
                    "project": "Housing Price Estimation Project"}
    ap.log_run(run_info)
    mp = ap.lookup_modelperf(run_tag)
    print(
        "A look up of the loss schedule for this experiment in Arangopipe yields:"
    )
    print(str(mp["training_loss_schedule"]))

    return
예제 #11
0
 def get_config(self):
     apc = ArangoPipeConfig()
     return apc.get_cfg()
예제 #12
0
from arangopipe.arangopipe_storage.arangopipe_admin_api import ArangoPipeAdmin
from arangopipe.arangopipe_storage.arangopipe_config import ArangoPipeConfig
from arangopipe.arangopipe_storage.managed_service_conn_parameters import ManagedServiceConnParam


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


if __name__ == "__main__":

    proj_info = {"name": "Wine-Quality-Regression-Modelling"}
    conn_config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    conn_params = { msc.DB_SERVICE_HOST : "localhost", \
                    msc.DB_SERVICE_END_POINT : "apmdb",\
                    msc.DB_SERVICE_NAME : "createDB",\
                    msc.DB_SERVICE_PORT : 8529,
                    msc.DB_CONN_PROTOCOL : 'http'}

    conn_config = conn_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    proj_reg = admin.register_project(proj_info)

    warnings.filterwarnings("ignore")
    np.random.seed(40)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)