def test(cas_session, iris_dataset): pytest.skip('Re-enable once MAS publish no longer hangs.') cas_session.loadactionset('decisiontree') tbl = cas_session.upload(iris_dataset).casTable features = list(tbl.columns[tbl.columns != TARGET]) # Fit a linear regression model in CAS and output an ASTORE tbl.decisiontree.gbtreetrain(target=TARGET, inputs=features, savestate='model_table') astore = cas_session.CASTable('model_table') from sklearn.ensemble import GradientBoostingClassifier X = iris_dataset.drop(TARGET, axis=1) y = iris_dataset[TARGET] sk_model = GradientBoostingClassifier() sk_model.fit(X, y) sas_model = register_model(astore, SAS_MODEL_NAME, PROJECT_NAME, force=True) sk_model = register_model(sk_model, SCIKIT_MODEL_NAME, PROJECT_NAME, input=X) # Publish to MAS sas_module = publish_model(sas_model, 'maslocal', replace=True) sk_module = publish_model(sk_model, 'maslocal', replace=True) # Pass a row of data to MAS and receive the predicted result. first_row = tbl.head(1) result = sas_module.score(first_row) p1, p1, p2, species, warning = result result2 = sk_module.predict(first_row) assert result2 in ('setosa', 'virginica', 'versicolor') # SAS model may have CHAR variable that's padded with spaces. assert species.strip() == result2 result3 = sk_module.predict_proba(first_row) assert round(sum(result3), 5) == 1
def test(cas_session, boston_dataset): cas_session.loadactionset('regression') tbl = cas_session.upload(boston_dataset).casTable features = tbl.columns[tbl.columns != 'Price'] # Fit a linear regression model in CAS and output an ASTORE tbl.glm(target='Price', inputs=list(features), savestate='model_table') astore = cas_session.CASTable('model_table') from sklearn.linear_model import LinearRegression X = boston_dataset.drop('Price', axis=1) y = boston_dataset['Price'] sk_model = LinearRegression() sk_model.fit(X, y) sas_model = register_model(astore, SAS_MODEL_NAME, PROJECT_NAME, force=True) sk_model = register_model(sk_model, SCIKIT_MODEL_NAME, PROJECT_NAME, input=X) # Publish to MAS sas_module = publish_model(sas_model, 'maslocal', replace=True) sk_module = publish_model(sk_model, 'maslocal', replace=True) # Pass a row of data to MAS and receive the predicted result. first_row = tbl.head(1) result = sas_module.score(first_row) assert isinstance(result, float) result2 = sk_module.predict(first_row) assert isinstance(result2, float) assert round(result, 5) == round(result2, 5)
def test_register_model(self, iris_dataset): pytest.importorskip('sklearn') from sasctl import register_model from sklearn.ensemble import GradientBoostingClassifier TARGET = 'Species' X = iris_dataset.drop(TARGET, axis=1) y = iris_dataset[TARGET] model = GradientBoostingClassifier() model.fit(X, y) model = register_model(model, self.MODEL_NAME, self.PROJECT_NAME, input=X, force=True) assert model.name == self.MODEL_NAME assert model.projectName == self.PROJECT_NAME assert model.function.lower() == 'classification' assert model.algorithm.lower() == 'gradient boosting' assert model.tool.lower().startswith('python')
def test_register_model(self, cas_session, boston_dataset): from sasctl import register_model TARGET = 'Price' # Upload the data to CAS tbl = cas_session.upload(boston_dataset).casTable # Create the model cas_session.loadactionset('regression') features = tbl.columns[tbl.columns != TARGET] tbl.glm(target=TARGET, inputs=list(features), savestate='model_table') astore = cas_session.CASTable('model_table') model = register_model(astore, self.MODEL_NAME, self.PROJECT_NAME, force=True) assert model.name == self.MODEL_NAME assert model.projectName == self.PROJECT_NAME assert model.function.lower() == 'prediction' assert model.algorithm.lower() == 'linear regression' assert model.targetVariable.lower() == 'price'
def test_register_model(self, boston_dataset): pytest.importorskip('sklearn') from sasctl import register_model from sklearn.ensemble import GradientBoostingRegressor TARGET = 'Price' X = boston_dataset.drop(TARGET, axis=1) y = boston_dataset[TARGET] model = GradientBoostingRegressor() model.fit(X, y) model = register_model(model, self.MODEL_NAME, self.PROJECT_NAME, input=X, force=True) assert model.name == self.MODEL_NAME assert model.projectName == self.PROJECT_NAME assert model.function.lower() == 'prediction' assert model.algorithm.lower() == 'gradient boosting' assert model.targetLevel.lower() == 'interval' assert model.tool.lower().startswith('python')
#!/usr/bin/env python # encoding: utf-8 # # Copyright © 2019, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 import pandas as pd from sklearn import datasets from sklearn.linear_model import LogisticRegression from sasctl import Session, register_model raw = datasets.load_iris() X = pd.DataFrame(raw.data, columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']) y = pd.DataFrame(raw.target, columns=['Species'], dtype='category') y.Species.cat.categories = raw.target_names model = LogisticRegression() model.fit(X, y) with Session('example.com', user='******', password='******'): register_model(model, 'Logistic Regression', project='Iris', force=True)
outputs = table.columns.to_list()[4] outputs = pd.DataFrame(columns=[str(outputs), 'P_set', 'P_vers', 'P_virg']) outputs.loc[len(outputs)] = ['virginica', 0.5, 0.5, .5] #model.predict_proba(inputs[:1]) ### DON'T DO THAT IN PRODUCTION model_exists = model_repository.get_model(modelname, refresh=False) #model_repository.delete_model(modelname) if model_exists == None: print('Creating new model') register_model( model=model, name=modelname, project=project, input=inputs, ## somehow using a pd.df bug but SASdf don't force=True) else: print('Model exists, creting new version') model_repository.delete_model_contents(modelname) register_model(model=model, name=modelname, project=project, input=inputs, force=True, version='latest') ### adding extra files ### not needed but good practice path = Path.cwd()
# Convert the local CSV file into a Pandas DataFrame df = pd.read_csv('/home/viya/data/Los_Angeles_house_prices.csv') # The model input data (X) is every column in the DataFrame except the target. # The target (y) is equal to the median home value. target = 'medv' X = df.drop(target, axis=1) y = df[target] # Fit a sci-kit learn model model = GradientBoostingRegressor() model.fit(X, y) # Establish a session with Viya with Session('dsasspre.org', 'robinswu', 'password'): model_name = 'GB Regression' project_name = 'Los Angeles Housing' # Register the model in SAS Model Manager register_model(model, model_name, project_name, input=X, force=True) # Publish the model to the real-time scoring engine module = publish_model(model_name, 'maslocal', replace=True) # Select the first row of training data x = X.iloc[0, :] # Call the published module and score the record result = module.predict(x) print(result)
s = Session('hostname', 'username', 'password') # The register_model task will attempt to extract the necessary metadata from the provided ASTORE file or Python model. # However, if this doesn't work for your model or you need to specify different metadata, you can provide it as a # dictionary instead. For a full list of parameters that can be specified see the documentation here: # https://developer.sas.com/apis/rest/DecisionManagement/#schemamodel model_info = { 'name': 'Custom Model', 'description': 'This model is for demonstration purposes only.', 'scoreCodeType': 'Python', 'algorithm': 'Other' } # To include the contents of the model itself, simply provide the information for each file in a list. files = [ # Files can be added to the model by specifying a name of the file and its contents dict(name='greetings.txt', file='Hello World!'), # You can also specify file-like object to be included. Here we upload this Python file itself to the model. # In addition, the optional `role` parameter can be used to assign a File Role to the file in Model Manager. dict(name=__file__, file=open(__file__), role='Score code'), # The files also need not be simple text. Here we create a simple Python datetime object, pickle it, and then # include the binary file with the model. dict(name='datetime.pkl', file=pickle.dumps(datetime.now())) ] model = register_model(model_info, name=model_info['name'], project='Examples', files=files, force=True)
astore = conn.CASTable(astore_table) #### coneccting from SASCTL s = Session(host, user, password, verify_ssl = False) model_exists = model_repository.get_model(modelname, refresh=False) #model_repository.delete_model(modelname) if model_exists == None: print('Creating new model') model = register_model(astore, modelname, project, force = True) else: print('Model exists, creting new version') model_repository.delete_model(modelname) register_model(model = astore, name= modelname, project= project, force=True, version = 'latest') path = Path.cwd() ############################## ######## adding files ########
# Load the Iris data set and convert into a Pandas data frame. raw = datasets.load_iris() X = pd.DataFrame(raw.data, columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']) y = pd.DataFrame(raw.target, columns=['Species'], dtype='category') y.Species.cat.categories = raw.target_names # Fit a sci-kit learn model model = LogisticRegression() model.fit(X, y) # Establish a session with Viya with Session('hostname', 'username', 'password'): model_name = 'Iris Regression' # Register the model in Model Manager register_model(model, model_name, input=X, # Use X to determine model inputs project='Iris', # Register in "Iris" project force=True) # Create project if it doesn't exist # Publish the model to the real-time scoring engine module = publish_model(model_name, 'maslocal') # Select the first row of training data x = X.iloc[0, :] # Call the published module and score the record result = module.score(**x) print(result)