예제 #1
0
def validate_conn_params(conn_params):
    valid_conn_params = True
    msc = ManagedServiceConnParam()

    if not msc.DB_SERVICE_HOST in conn_params:
        print("Service host information not provided!, please provide")
        valid_conn_params = False

    if not msc.DB_SERVICE_END_POINT in conn_params:
        print("Service end point information not provided!, please provide")
        valid_conn_params = False

    if not msc.DB_SERVICE_NAME in conn_params:
        print("Service name information not provided!, please provide")
        valid_conn_params = False

    if not msc.DB_SERVICE_PORT in conn_params:
        print("Service port information not provided!, please provide")
        valid_conn_params = False

    if not msc.DB_CONN_PROTOCOL in conn_params:
        print(
            "Service connection protocol information not provided!, please provide"
        )
        valid_conn_params = False

    return valid_conn_params
예제 #2
0
def verify_install():
    #mshost: "5366b66b7d19.arangodb.cloud"
    config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    conn_params = { msc.DB_SERVICE_HOST : "d874fc3f1fa5.arangodb.cloud", \
                    msc.DB_SERVICE_END_POINT : "apmdb",\
                    msc.DB_SERVICE_NAME : "createDB",\
                    msc.DB_SERVICE_PORT : 8529,\
                    msc.DB_CONN_PROTOCOL : 'https',\
                    msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}

    config = config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    ap.lookup_dataset("non existent dataset")
    try:
        proj_info = {"name": "Wine-Quality-Regression-Modelling"}
        proj_reg = admin.register_project(proj_info)
    except:
        print('-' * 60)
        traceback.print_exc(file=sys.stdout)
        print('-' * 60)

    print("Installation of Arangopipe API verified !")

    return
예제 #3
0
def delete_users():
    cfg = read_data()
    mscp = ManagedServiceConnParam()
    print("Deleting users before test !")
    pl = ['_system', 'root', 'rajiv', 'node2vec_db_admin', 'susr']
    protocol = cfg['arangodb'][mscp.DB_CONN_PROTOCOL]
    srv_host = cfg['arangodb'][mscp.DB_SERVICE_HOST]
    port = cfg['arangodb'][mscp.DB_SERVICE_PORT]
    try:
        root_user = cfg['arangodb'][mscp.DB_ROOT_USER]
        root_user_password = cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD]
    except KeyError as k:
        msg = "Root credentials are unvailable, try again " + \
             "with a new connection and credentials for root provided"
        print(msg)
        print("Credential information that is missing : " + k.args[0])
        raise Exception("Key error associated with missing " + k.args[0])

    host_connection = protocol + "://" + srv_host + ":" + str(port)
    #    sys_user_name = cfg['arangodb'][mscp.DB_ROOT_USER]
    #    sys_passwd = cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD]
    client = ArangoClient(hosts= host_connection,\
                        http_client=CustomHTTPClient(username = root_user,\
                                                     password = root_user_password))
    sys_db = client.db('_system',\
                       username=root_user,\
                       password=root_user_password)
    ul = sys_db.users()
    unl = [tu['username'] for tu in ul]
    for u in unl:
        if not u in pl:
            sys_db.delete_user(u)

    return
예제 #4
0
 def __init__(self, config):
     self.cfg = config.get_cfg()
     self.emlg = None
     self.db = None
     self.mscp = ManagedServiceConnParam()
     self.init_graph()
     self.heart_beat()
예제 #5
0
    def test_using_deleted_database(self):
        err_raised = False
        print("Running the test using a stale connection... ")
        self.delete_arangopipe_db()
        new_admin = ArangoPipeAdmin(reuse_connection=True)
        ap_config = new_admin.get_config()

        try:
            ap = ArangoPipe(config=ap_config)
        except Exception:
            print("Stale connection identified...")
            print("Using a new connection...")
            mscp = ManagedServiceConnParam()
            conn_config = ArangoPipeConfig()
            conn_params = { mscp.DB_SERVICE_HOST : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_HOST], \
                        mscp.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT],\
                        mscp.DB_SERVICE_NAME : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_NAME],\
                        mscp.DB_SERVICE_PORT : self.test_cfg['arangodb'][self.mscp.DB_SERVICE_PORT],\
                        mscp.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]}
            conn_config = conn_config.create_connection_config(conn_params)
            admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
            ap_config = admin.get_config()
            ap = ArangoPipe(config=ap_config)

        print("Using new connection to look up a non existent dataset...")
        ap.lookup_dataset("non existent dataset")
        self.assertFalse(err_raised)

        return
예제 #6
0
 def test_connection_manager(self):
     msc = ManagedServiceConnParam()
     conn_params = { msc.DB_SERVICE_HOST : "arangoml.arangodb.cloud", \
                     msc.DB_SERVICE_END_POINT : "createDB",\
                     msc.DB_SERVICE_NAME : "createDB",\
                     msc.DB_SERVICE_PORT : 8529,\
                     msc.DB_CONN_PROTOCOL : 'https'}
     
     
     with arango_pipe_connections(conn_params, False) as (ap_admin, ap):
          proj_info = {"name": "Python With Generator Admin test"}
          proj_reg = ap_admin.register_project(proj_info)
          print("Done with test!")
     
     return
예제 #7
0
 def setUp(self):
     #mshost: "5366b66b7d19.arangodb.cloud"
     config = ArangoPipeConfig()
     msc = ManagedServiceConnParam()
     conn_params = { msc.DB_SERVICE_HOST : self.test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
                     msc.DB_SERVICE_END_POINT : self.test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
                     msc.DB_SERVICE_NAME : self.test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
                     msc.DB_SERVICE_PORT : self.test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
                     msc.DB_CONN_PROTOCOL : self.test_cfg['arangodb'][msc.DB_CONN_PROTOCOL],\
                     msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}
     
     config = config.create_connection_config(conn_params)
     self.config = config
     self.admin = ArangoPipeAdmin(reuse_connection = False,\
                                  config= self.config, persist_conn= False)
     ap_config = self.admin.get_config()
     self.ap = ArangoPipe(config = ap_config)
     self.provision_project()
     return
예제 #8
0
def delete_arangopipe_db():
    print("Deleting users before test !")
    cfg = read_data()
    mscp = ManagedServiceConnParam()
    protocol = cfg['arangodb'][mscp.DB_CONN_PROTOCOL]
    srv_host = cfg['arangodb'][mscp.DB_SERVICE_HOST]
    port = cfg['arangodb'][mscp.DB_SERVICE_PORT]
    try:
        root_user = cfg['arangodb'][mscp.DB_ROOT_USER]
        root_user_password = cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD]
    except KeyError as k:
        msg = "Root credentials are unvailable, try again " + \
             "with a new connection and credentials for root provided"
        print(msg)
        print("Credential information that is missing : " + k.args[0])
        raise Exception("Key error associated with missing " + k.args[0])
    host_connection = protocol + "://" + srv_host + ":" + str(port)

    #sys_user_name = cfg['arangodb'][mscp.DB_ROOT_USER]
    #sys_passwd = cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD]
    client = ArangoClient(hosts= host_connection,\
                        http_client=CustomHTTPClient(username = root_user,\
                                                     password = root_user_password))
    sys_db = client.db('_system',\
                       username=root_user,\
                       password=root_user_password)
    try:
        if sys_db.has_database("arangopipe"):
            print(
                "Before starting the test, cleaning up arangopipe instances..."
            )
            sys_db.delete_database("arangopipe")
        else:
            print("Test Prep: The database arangopipe does not exist !")

    except DatabaseListError as err:
        print.error(err)
        print("Error code: " + str(err.error_code) + " received !")
        print("Error Message: " + str(err.error_message))

    return
예제 #9
0
    def __init__(self, reuse_connection=True, config=None, persist_conn=True):
        self.reuse_connection = reuse_connection
        self.db = None
        self.emlg = None
        self.config = None
        self.cfg = None
        self.mscp = ManagedServiceConnParam()
        self.use_supp_config_to_reconnect = False

        if reuse_connection:
            info_msg = "If a config is provided, it will be used for setting up the connection"
            if config is None:
                self.config = self.create_config()
                self.cfg = self.config.get_cfg()
                self.use_supp_config_to_reconnect = False
            else:
                self.config = config
                self.cfg = config.cfg
                self.use_supp_config_to_reconnect = True

            logger.info(info_msg)
        else:

            assert config is not None,\
                   "You must provide connection information for new connections"

            self.config = config
            self.cfg = config.cfg

        try:

            db_serv_host = self.cfg['arangodb'][self.mscp.DB_SERVICE_HOST]
            db_serv_port = self.cfg['arangodb'][self.mscp.DB_SERVICE_PORT]
            db_end_point = self.cfg['arangodb'][self.mscp.DB_SERVICE_END_POINT]
            db_serv_name = self.cfg['arangodb'][self.mscp.DB_SERVICE_NAME]

        except KeyError as k:
            logger.error("Connection information is missing : " + k.args[0])
            logger.error(
                "Please try again after providing the missing information !")
            raise Exception("Key error associated with missing " + k.args[0])

        # check if connection preferences are indicated
        if 'dbName' in self.cfg['arangodb']:
            logger.info("DB name for connection: " + \
                        str(self.cfg['arangodb'][self.mscp.DB_NAME]))
            db_dbName = self.cfg['arangodb'][self.mscp.DB_NAME]
        else:
            db_dbName = ''
        if 'username' in self.cfg['arangodb']:
            logger.info("user name for connection: " +\
                        str(self.cfg['arangodb'][self.mscp.DB_USER_NAME]))
            db_user_name = self.cfg['arangodb'][self.mscp.DB_USER_NAME]
        else:
            db_user_name = ''
        if 'password' in self.cfg['arangodb']:
            logger.info("A specific password was requested !")
            db_password = self.cfg['arangodb'][self.mscp.DB_PASSWORD]
        else:
            db_password = ''

        if self.mscp.DB_CONN_PROTOCOL in self.cfg['arangodb']:
            db_conn_protocol = self.cfg['arangodb'][self.mscp.DB_CONN_PROTOCOL]
        else:
            db_conn_protocol = "http"

        if self.mscp.DB_REPLICATION_FACTOR in self.cfg['arangodb']:
            db_replication_factor = self.cfg['arangodb'][
                self.mscp.DB_REPLICATION_FACTOR]
        else:
            db_replication_factor = None

        if self.mscp.DB_ROOT_USER in self.cfg['arangodb']:
            logger.info("A root user was specified, persisting...")

        if self.mscp.DB_ROOT_USER_PASSWORD in self.cfg['arangodb']:
            logger.info("A root user password was specified, persisting...")


        self.create_db(db_serv_host, db_serv_port,\
                       db_serv_name, db_end_point,\
                       db_dbName, db_user_name, db_password, db_conn_protocol)

        # If you could create a DB, proceed with provisioning the graph. Otherwise you
        # had an issue creating the database.
        if self.db is not None:
            self.create_enterprise_ml_graph(db_replication_factor)

            if persist_conn:
                self.config.dump_data()

        return
예제 #10
0
 def __init__(self):
     self.cfg = None
     self.mscp = ManagedServiceConnParam()
예제 #11
0
def generate_runs(clean=False):
    cfg = read_data()
    mscp = ManagedServiceConnParam()

    #delete_users()
    #delete_arangopipe_db()
    conn_config = ArangoPipeConfig()

    conn_params = { mscp.DB_SERVICE_HOST : cfg['arangodb'][mscp.DB_SERVICE_HOST], \
                    mscp.DB_USER_NAME : cfg['arangodb'][mscp.DB_USER_NAME],\
                    mscp.DB_PASSWORD : cfg['arangodb'][mscp.DB_PASSWORD],\
                    mscp.DB_NAME : cfg['arangodb'][mscp.DB_NAME], \
                    mscp.DB_ROOT_USER : cfg['arangodb'][mscp.DB_ROOT_USER],\
                    mscp.DB_ROOT_USER_PASSWORD : cfg['arangodb'][mscp.DB_ROOT_USER_PASSWORD],\
                    mscp.DB_SERVICE_END_POINT : cfg['arangodb'][mscp.DB_SERVICE_END_POINT],\
                    mscp.DB_SERVICE_NAME : cfg['arangodb'][mscp.DB_SERVICE_NAME],\
                    mscp.DB_SERVICE_PORT : cfg['arangodb'][mscp.DB_SERVICE_PORT],\
                    mscp.DB_CONN_PROTOCOL : cfg['arangodb'][mscp.DB_CONN_PROTOCOL]}

    conn_config = conn_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)

    if clean:
        admin.delete_arangomldb()
        admin.create_db()
        admin.create_enterprise_ml_graph()

    proj_info = {"name": "Home_Value_Assessor"}
    proj_reg = admin.register_project(proj_info)

    period = period_string_generator()
    ds_info = {"description": "Housing Price Data"}
    featureset = data.dtypes.to_dict()
    featureset = {k: str(featureset[k]) for k in featureset}
    count = 1

    for data_tuple in dataset_generator():
        logger.info("Processing Dataset:" + str(count))
        count = count + 1
        aperiod = next(period)
        X_train = data_tuple[0]
        X_test = data_tuple[1]
        y_train = data_tuple[2]
        y_test = data_tuple[3]
        X_val = data_tuple[4]
        y_val = data_tuple[5]
        alpha_random = np.random.uniform(0.0005, 0.001)
        lrm = linear_model.Lasso(alpha=alpha_random)
        lrm.fit(X_train, y_train)
        predicted_val = lrm.predict(X_val)
        (rmse, mae, r2) = eval_metrics(y_val, predicted_val)
        ruuid = uuid.uuid4()
        model_perf = {"rmse": rmse, "r2": r2, "mae": mae, "run_id": str(ruuid), \
                      "timestamp": str(dt.datetime.now())}
        serving_pred = lrm.predict(X_test)
        (rmse, mae, r2) = eval_metrics(y_test, serving_pred)
        ex_servingperf = {"rmse": rmse, "r2": r2, "mae": mae,\
                      "period" : aperiod}
        deployment_tag = "Deployment_HPE_" + aperiod
        dataset_tag = "Housing_Dataset_" + aperiod
        pipeline_tag = "Pipeline_HPE" + aperiod
        feature_pipeline_tag = "Feature Pipeline HPE" + aperiod
        ds_info["name"] = dataset_tag
        ds_info["tag"] = dataset_tag
        ds_info["source"] = "Housing Price Operational Data Store"
        featureset["generated_by"] = feature_pipeline_tag
        featureset["name"] = "log_transformed_house_value_" + str(ruuid)
        featureset["tag"] = dataset_tag

        ds_reg = ap.register_dataset(ds_info)
        fs_reg = ap.register_featureset(featureset, ds_reg["_key"])
        model_tag = "model_period:" + aperiod
        model_name = "Housing Regression Model_" + str(ruuid)
        model_info = {"name": model_name,\
                      "type": "LASSO regression", "tag": model_tag}
        model_reg = ap.register_model(model_info,
                                      project="Home_Value_Assessor")
        model_params = {"alpha": alpha_random, "run_id": str(ruuid)}
        run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": str(ruuid),\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "pipeline" : pipeline_tag,\
                    "project": "Housing Price Assessor",
                    "tag_for_deployment": True,\
                    "deployment_tag": deployment_tag}
        ap.log_run(run_info)
        admin.register_deployment(deployment_tag)
        user_id = "Arangopipe Test Data Generator"
        ap.log_serving_perf(ex_servingperf, deployment_tag, user_id)

    print("Done loading data into the test database!")

    return
예제 #12
0
def run_driver():

    params = {'batch_size': 128, 'shuffle': True, 'num_workers': 6}
    trng_dataset = CH_Dataset()
    test_dataset = CH_Dataset(train=False)
    training_generator = data.DataLoader(trng_dataset, **params)
    test_generator = data.DataLoader(test_dataset, **params)
    input_size = trng_dataset.input_size
    output_size = trng_dataset.output_size

    m = CH_LinearRegression(inputSize=input_size, outputSize=output_size)
    cost_func = nn.MSELoss()
    learning_rate = 0.1
    optimizer = torch.optim.Adam(m.parameters(), lr=learning_rate)
    all_losses = []
    test_pred_list = []
    test_acts_list = []
    num_epochs = 100
    loss_sched = {}
    for e in range(num_epochs):
        batch_losses = []
        for ix, (Xb, yb) in enumerate(training_generator):
            _X = Variable(Xb).float()

            _y = Variable(yb).float()
            #==========Forward pass===============
            preds = m(_X)
            preds = torch.flatten(preds)
            loss = cost_func(preds, _y)

            #==========backward pass==============

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())
            all_losses.append(loss.item())

        mbl = sqrt(np.mean(batch_losses))

        if e % 5 == 0:
            print("training loss: " + str(mbl))
            loss_sched[e] = mbl

    # prepares model for inference when trained with a dropout layer


#    print(m.training)
#    m.eval()
#    print(m.training)

    test_batch_losses = []
    test_pred_list = []
    test_acts_list = []
    for _X, _y in test_generator:

        _X = Variable(_X).float()
        _y = Variable(_y).float()

        #apply model
        test_preds = m(_X)
        test_preds = torch.flatten(test_preds)
        test_loss = cost_func(test_preds, _y)
        test_pred_list.extend(test_preds.detach().numpy().ravel())
        test_acts_list.extend(_y.numpy().ravel())

        test_batch_losses.append(test_loss.item())
    # print("Batch loss: {}".format(test_loss.item()))

    tmbl = sqrt(np.mean(test_batch_losses))
    print("test loss: " + str(tmbl))

    # Store experiment results in Arangopipe
    conn_config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    test_cfg = get_test_config()
    conn_params = { msc.DB_SERVICE_HOST : test_cfg['arangodb'][msc.DB_SERVICE_HOST], \
msc.DB_SERVICE_END_POINT : test_cfg['arangodb'][msc.DB_SERVICE_END_POINT],\
msc.DB_SERVICE_NAME : test_cfg['arangodb'][msc.DB_SERVICE_NAME],\
msc.DB_SERVICE_PORT : test_cfg['arangodb'][msc.DB_SERVICE_PORT],\
msc.DB_CONN_PROTOCOL : test_cfg['arangodb'][msc.DB_CONN_PROTOCOL]}
    #    conn_params = { msc.DB_SERVICE_HOST : "localhost", \
    #                        msc.DB_SERVICE_END_POINT : "apmdb",\
    #                        msc.DB_SERVICE_NAME : "createDB",\
    #                        msc.DB_SERVICE_PORT : 8529,\
    #                        msc.DB_CONN_PROTOCOL : 'http',\
    #                        msc.DB_NOTIFICATION_EMAIL : 'somebody@some_company.com'}

    conn_config = conn_config.create_connection_config(conn_params)
    proj_info = {"name": "Housing_Price_Estimation_Project"}
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    proj_reg = admin.register_project(proj_info)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)
    ruuid = str(uuid.uuid4().int)
    model_name = "pytorch-linear-reg" + "_dev_run_" + ruuid
    model_info = {"name": model_name, "type": "model-development"}
    model_reg = ap.register_model(model_info,
                                  project="Housing_Price_Estimation_Project")
    ds_info = trng_dataset.get_dataset()
    ds_reg = ap.register_dataset(ds_info)
    fs = trng_dataset.get_featureset()
    fs_reg = ap.register_featureset(fs, ds_reg["_key"])

    model_params = {"optimizer": "Adam", "training_epochs": 100,\
                    "batch_size": 128, "learning_rate": learning_rate,\
                    "run_id": ruuid}
    model_perf = {"training_loss_schedule": jsonpickle.encode(loss_sched),\
                  "run_id": ruuid, "timestamp":    str(datetime.datetime.now())}
    run_tag = "Housing-Price-Pytorch-Experiment" + "_dev_run_" + ruuid
    run_info = {"dataset" : ds_reg["_key"],\
                    "featureset": fs_reg["_key"],\
                    "run_id": ruuid,\
                    "model": model_reg["_key"],\
                    "model-params": model_params,\
                    "model-perf": model_perf,\
                    "tag": run_tag,\
                    "project": "Housing Price Estimation Project"}
    ap.log_run(run_info)
    mp = ap.lookup_modelperf(run_tag)
    print(
        "A look up of the loss schedule for this experiment in Arangopipe yields:"
    )
    print(str(mp["training_loss_schedule"]))

    return
    def __init__(self, *args, **kwargs):
        super(TestArangopipe, self).__init__(*args, **kwargs)
        self.test_cfg = self.get_test_config()
        self.mscp = ManagedServiceConnParam()

        return
예제 #14
0
from arangopipe.arangopipe_storage.arangopipe_config import ArangoPipeConfig
from arangopipe.arangopipe_storage.managed_service_conn_parameters import ManagedServiceConnParam


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


if __name__ == "__main__":

    proj_info = {"name": "Wine-Quality-Regression-Modelling"}
    conn_config = ArangoPipeConfig()
    msc = ManagedServiceConnParam()
    conn_params = { msc.DB_SERVICE_HOST : "localhost", \
                    msc.DB_SERVICE_END_POINT : "apmdb",\
                    msc.DB_SERVICE_NAME : "createDB",\
                    msc.DB_SERVICE_PORT : 8529,
                    msc.DB_CONN_PROTOCOL : 'http'}

    conn_config = conn_config.create_connection_config(conn_params)
    admin = ArangoPipeAdmin(reuse_connection=False, config=conn_config)
    proj_reg = admin.register_project(proj_info)

    warnings.filterwarnings("ignore")
    np.random.seed(40)
    ap_config = admin.get_config()
    ap = ArangoPipe(config=ap_config)