def create_files_from_astore(table): """Generate files for importing a model from an ASTORE. Parameters ---------- table : swat.CASTable The CAS table containing the ASTORE. Returns ------- dict Dictionary of filename: content pairs. """ if swat is None: raise RuntimeError("The 'swat' package is required to work with " "ASTORE models.") if not isinstance(table, swat.CASTable): raise ValueError("Parameter 'table' should be an instance of '%r' but " "received '%r'." % (swat.CASTable, table)) sess = table.session.get_connection() sess.loadactionset('astore') result = sess.astore.describe(rstore=table, epcode=True) # Model Manager expects a 0-byte ASTORE file. Will retrieve actual ASTORE # from CAS during model publish. astore = bytes() # Raise error if describe action fails if result.status_code != 0: raise RuntimeError(result) astore_key = result.Key.Key[0].strip() # Remove "Keep" sas code from CAS/EP code so full table plus output are # returned. This is so the MM performance charts and test work. keepstart = result.epcode.find("Keep") keepend = result.epcode.find(";", keepstart) ep_ds2 = result.epcode[0:keepstart] + result.epcode[keepend + 1:] package_ds2 = _generate_package_code(result) model_properties = _get_model_properties(result) input_vars = [ get_variable_properties(var) for var in result.InputVariables.itertuples() ] output_vars = [ get_variable_properties(var) for var in result.OutputVariables.itertuples() ] astore_filename = '_' + uuid.uuid4().hex[:25].upper() # Copy the ASTORE table to the ModelStore. # Raise an error if the action fails with swat.options(exception_on_severity=2): table.save(name=astore_filename, caslib='ModelStore', replace=True) file_metadata = [{ 'role': 'analyticStore', 'name': '' }, { 'role': 'score', 'name': 'dmcas_epscorecode.sas' }] astore_metadata = [{ 'name': astore_filename, 'caslib': 'ModelStore', 'uri': '/dataTables/dataSources/cas~fs~cas-shared-default~fs~ModelStore/tables/{}' .format(astore_filename), 'key': astore_key }] return { 'dmcas_packagescorecode.sas': '\n'.join(package_ds2), 'dmcas_epscorecode.sas': ep_ds2, astore_filename: astore, 'ModelProperties.json': model_properties, 'fileMetadata.json': file_metadata, 'AstoreMetadata.json': astore_metadata, 'inputVar.json': input_vars, 'outputVar.json': output_vars }
def update_model_performance(data, model, label, refresh=True): """Upload data for calculating model performance metrics. Model performance and data distributions can be tracked over time by designating one or more tables that contain input data and target values. Performance metrics can be updated by uploading a data set for a new time period and executing the performance definition. Parameters ---------- data : Dataframe model : str or dict The name or id of the model, or a dictionary representation of the model. label : str The time period the data is from. Should be unique and will be displayed on performance charts. Examples: 'Q1', '2019', 'APR2019'. refresh : bool, optional Whether to execute the performance definition and refresh results with the new data. Returns ------- CASTable The CAS table containing the performance data. See Also -------- :meth:`model_management.create_performance_definition <.ModelManagement.create_performance_definition>` .. versionadded:: v1.3 """ from .services import model_management as mm try: import swat except ImportError: raise RuntimeError("The 'swat' package is required to save model " "performance data.") # Default to true refresh = True if refresh is None else refresh model_obj = mr.get_model(model) if model_obj is None: raise ValueError('Model %s was not found.', model) project = mr.get_project(model_obj.projectId) if project.get('function', '').lower() not in ('prediction', 'classification'): raise ValueError( "Performance monitoring is currently supported for " "regression and binary classification projects. " "Received project with '%s' function. Should be " "'Prediction' or 'Classification'.", project.get('function')) elif project.get('targetLevel', '').lower() not in ('interval', 'binary'): raise ValueError( "Performance monitoring is currently supported for " "regression and binary classification projects. " "Received project with '%s' target level. Should be " "'Interval' or 'Binary'.", project.get('targetLevel')) elif project.get('predictionVariable', '') == '': raise ValueError("Project '%s' does not have a prediction variable " "specified." % project) # Find the performance definition for the model # As of Viya 3.4, no way to search by model or project perf_def = None for p in mm.list_performance_definitions(): if model_obj.id in p.modelIds: perf_def = p break if perf_def is None: raise ValueError("Unable to find a performance definition for model " "'%s'" % model) # Check where performance datasets should be uploaded cas_id = perf_def['casServerId'] caslib = perf_def['dataLibrary'] table_prefix = perf_def['dataPrefix'] # All input variables must be present missing_cols = [ col for col in perf_def.inputVariables if col not in data.columns ] if len(missing_cols): raise ValueError( "The following columns were expected but not found in " "the data set: %s" % ', '.join(missing_cols)) # If CAS is not executing the model then the output variables must also be # provided if not perf_def.scoreExecutionRequired: missing_cols = [ col for col in perf_def.outputVariables if col not in data.columns ] if len(missing_cols): raise ValueError( "The following columns were expected but not found in the data " "set: %s" % ', '.join(missing_cols)) sess = current_session() url = '{}://{}/{}-http/'.format(sess._settings['protocol'], sess.hostname, cas_id) regex = r'{}_(\d)_.*_{}'.format(table_prefix, model_obj.id) # Save the current setting before overwriting orig_sslreqcert = os.environ.get('SSLREQCERT') # If SSL connections to microservices are not being verified, don't attempt # to verify connections to CAS - most likely certs are not in place. if not sess.verify: os.environ['SSLREQCERT'] = 'no' # Upload the performance data to CAS with swat.CAS(url, username=sess.username, password=sess._settings['password']) as s: s.setsessopt(messagelevel='warning') with swat.options(exception_on_severity=2): caslib_info = s.table.tableinfo(caslib=caslib) all_tables = getattr(caslib_info, 'TableInfo', None) if all_tables is not None: # Find tables with similar names perf_tables = all_tables.Name.str.extract(regex, flags=re.IGNORECASE, expand=False) # Get last-used sequence number last_seq = perf_tables.dropna().astype(int).max() next_seq = 1 if math.isnan(last_seq) else last_seq + 1 else: next_seq = 1 table_name = '{prefix}_{sequence}_{label}_{model}'.format( prefix=table_prefix, sequence=next_seq, label=label, model=model_obj.id) with swat.options(exception_on_severity=2): # Table must be promoted so performance jobs can access. tbl = s.upload(data, casout=dict(name=table_name, caslib=caslib, promote=True)).casTable # Restore the original value if orig_sslreqcert is not None: os.environ['SSLREQCERT'] = orig_sslreqcert # Execute the definition if requested if refresh: mm.execute_performance_definition(perf_def) return tbl
def create_package_from_astore(table): """Create an importable model package from an ASTORE. Parameters ---------- table : swat.CASTable The CAS table containing the ASTORE. Returns ------- BytesIO A byte stream representing a ZIP archive which can be imported. See Also -------- :meth:`model_repository.import_model_from_zip <.ModelRepository.import_model_from_zip>` """ if swat is None: raise RuntimeError("The 'swat' package is required to work with " "ASTORE models.") assert isinstance(table, swat.CASTable) sess = table.session.get_connection() sess.loadactionset('astore') result = sess.astore.describe(rstore=table, epcode=True) astore = sess.astore.download(rstore=table) if not hasattr(astore, "blob"): raise ValueError("Failed to download binary data for ASTORE '%s'." % astore) astore = astore.blob astore = bytes(astore) # Convert from SWAT blob type # Raise error if describe action fails if result.status_code != 0: raise RuntimeError(result) astore_key = result.Key.Key[0].strip() ep_ds2 = result.epcode package_ds2 = _generate_package_code(result) model_properties = _get_model_properties(result) input_vars = [ get_variable_properties(var) for var in result.InputVariables.itertuples() ] output_vars = [ get_variable_properties(var) for var in result.OutputVariables.itertuples() ] astore_filename = '_' + uuid.uuid4().hex[:25].upper() # Copy the ASTORE table to the ModelStore. # Raise an error if the action fails with swat.options(exception_on_severity=2): table.save(name=astore_filename, caslib='ModelStore', replace=True) file_metadata = [{ 'role': 'analyticStore', 'name': '' }, { 'role': 'score', 'name': 'dmcas_packagescorecode.sas' }] astore_metadata = [{ 'name': astore_filename, 'caslib': 'ModelStore', 'uri': '/dataTables/dataSources/cas~fs~cas-shared-default~fs~ModelStore/tables/{}' .format(astore_filename), 'key': astore_key }] zip_file = _build_zip_from_files({ 'dmcas_packagescorecode.sas': '\n'.join(package_ds2), 'dmcas_epscorecode.sas': ep_ds2, astore_filename: astore, 'ModelProperties.json': model_properties, 'fileMetadata.json': file_metadata, 'AstoreMetadata.json': astore_metadata, 'inputVar.json': input_vars, 'outputVar.json': output_vars }) return zip_file
def create_package_from_astore(table): if swat is None: raise RuntimeError("The 'swat' package is required to work with ASTORE models.") assert isinstance(table, swat.CASTable) sess = table.session.get_connection() sess.loadactionset('astore') result = sess.astore.describe(rstore=table, epcode=True) astore = sess.astore.download(rstore=table).blob astore = bytes(astore) # Convert from SWAT blob type # Raise error if describe action fails if result.status_code != 0: raise RuntimeError(result) astore_key = result.Key.Key[0].strip() ds2 = _generate_package_code(result) model_properties = _get_model_properties(result) input_vars = [get_variable_properties(var) for var in result.InputVariables.itertuples()] output_vars = [get_variable_properties(var) for var in result.OutputVariables.itertuples()] astore_filename = '_' + uuid.uuid4().hex[:25].upper() # Copy the ASTORE table to the ModelStore. # Raise an error if the action fails with swat.options(exception_on_severity=2): table.save(name=astore_filename, caslib='ModelStore', replace=True) file_metadata = [{'role': 'analyticStore', 'name': ''}, {'role': 'score', 'name': 'dmcas_packagescorecode.sas'}] astore_metadata = [{'name': astore_filename, 'caslib': 'ModelStore', 'uri': '/dataTables/dataSources/cas~fs~cas-shared-default~fs~ModelStore/tables/{}'.format(astore_filename), 'key': astore_key}] try: # Create a temp folder folder = tempfile.mkdtemp() # Closure for easily adding JSON files def json_file(data, filename): filename = os.path.join(folder, filename) with open(filename, 'w') as f: json.dump(data, f, indent=1) filename = os.path.join(folder, 'dmcas_packagescorecode.sas') with open(filename, 'w') as f: f.write('\n'.join(ds2)) filename = os.path.join(folder, astore_filename) with open(filename, 'wb') as f: f.write(astore) json_file(model_properties, 'ModelProperties.json') json_file(file_metadata, 'fileMetadata.json') json_file(astore_metadata, 'AstoreMetadata.json') json_file(input_vars, 'inputVar.json') json_file(output_vars, 'outputVar.json') files = os.listdir(folder) with zipfile.ZipFile(os.path.join(folder, 'model.zip'), 'w') as z: for file in files: z.write(os.path.join(folder, file), file) # Need to return the ZIP file data but also need to ensure the directory is cleaned up. # Read the bytes from disk and return an in memory "file". with open(os.path.join(folder, 'model.zip'), 'rb') as z: return io.BytesIO(z.read()) finally: shutil.rmtree(folder)