예제 #1
0
def create_files_from_astore(table):
    """Generate files for importing a model from an ASTORE.

    Parameters
    ----------
    table : swat.CASTable
        The CAS table containing the ASTORE.

    Returns
    -------
    dict
        Dictionary of filename: content pairs.

    """
    if swat is None:
        raise RuntimeError("The 'swat' package is required to work with "
                           "ASTORE models.")

    if not isinstance(table, swat.CASTable):
        raise ValueError("Parameter 'table' should be an instance of '%r' but "
                         "received '%r'." % (swat.CASTable, table))

    sess = table.session.get_connection()
    sess.loadactionset('astore')

    result = sess.astore.describe(rstore=table, epcode=True)

    # Model Manager expects a 0-byte ASTORE file.  Will retrieve actual ASTORE
    # from CAS during model publish.
    astore = bytes()

    # Raise error if describe action fails
    if result.status_code != 0:
        raise RuntimeError(result)

    astore_key = result.Key.Key[0].strip()

    # Remove "Keep" sas code from CAS/EP code so full table plus output are
    # returned. This is so the MM performance charts and test work.
    keepstart = result.epcode.find("Keep")
    keepend = result.epcode.find(";", keepstart)
    ep_ds2 = result.epcode[0:keepstart] + result.epcode[keepend + 1:]

    package_ds2 = _generate_package_code(result)
    model_properties = _get_model_properties(result)
    input_vars = [
        get_variable_properties(var)
        for var in result.InputVariables.itertuples()
    ]
    output_vars = [
        get_variable_properties(var)
        for var in result.OutputVariables.itertuples()
    ]
    astore_filename = '_' + uuid.uuid4().hex[:25].upper()

    # Copy the ASTORE table to the ModelStore.
    # Raise an error if the action fails
    with swat.options(exception_on_severity=2):
        table.save(name=astore_filename, caslib='ModelStore', replace=True)

    file_metadata = [{
        'role': 'analyticStore',
        'name': ''
    }, {
        'role': 'score',
        'name': 'dmcas_epscorecode.sas'
    }]

    astore_metadata = [{
        'name':
        astore_filename,
        'caslib':
        'ModelStore',
        'uri':
        '/dataTables/dataSources/cas~fs~cas-shared-default~fs~ModelStore/tables/{}'
        .format(astore_filename),
        'key':
        astore_key
    }]

    return {
        'dmcas_packagescorecode.sas': '\n'.join(package_ds2),
        'dmcas_epscorecode.sas': ep_ds2,
        astore_filename: astore,
        'ModelProperties.json': model_properties,
        'fileMetadata.json': file_metadata,
        'AstoreMetadata.json': astore_metadata,
        'inputVar.json': input_vars,
        'outputVar.json': output_vars
    }
예제 #2
0
def update_model_performance(data, model, label, refresh=True):
    """Upload data for calculating model performance metrics.

    Model performance and data distributions can be tracked over time by
    designating one or more tables that contain input data and target values.
    Performance metrics can be updated by uploading a data set for a new time
    period and executing the performance definition.

    Parameters
    ----------
    data : Dataframe
    model : str or dict
        The name or id of the model, or a dictionary representation of
        the model.
    label : str
        The time period the data is from.  Should be unique and will be
        displayed on performance charts.  Examples: 'Q1', '2019', 'APR2019'.
    refresh : bool, optional
        Whether to execute the performance definition and refresh results with
        the new data.

    Returns
    -------
    CASTable
        The CAS table containing the performance data.

    See Also
    --------
     :meth:`model_management.create_performance_definition <.ModelManagement.create_performance_definition>`

    .. versionadded:: v1.3

    """
    from .services import model_management as mm
    try:
        import swat
    except ImportError:
        raise RuntimeError("The 'swat' package is required to save model "
                           "performance data.")

    # Default to true
    refresh = True if refresh is None else refresh

    model_obj = mr.get_model(model)

    if model_obj is None:
        raise ValueError('Model %s was not found.', model)

    project = mr.get_project(model_obj.projectId)

    if project.get('function',
                   '').lower() not in ('prediction', 'classification'):
        raise ValueError(
            "Performance monitoring is currently supported for "
            "regression and binary classification projects.  "
            "Received project with '%s' function.  Should be "
            "'Prediction' or 'Classification'.", project.get('function'))
    elif project.get('targetLevel', '').lower() not in ('interval', 'binary'):
        raise ValueError(
            "Performance monitoring is currently supported for "
            "regression and binary classification projects.  "
            "Received project with '%s' target level.  Should be "
            "'Interval' or 'Binary'.", project.get('targetLevel'))
    elif project.get('predictionVariable', '') == '':
        raise ValueError("Project '%s' does not have a prediction variable "
                         "specified." % project)

    # Find the performance definition for the model
    # As of Viya 3.4, no way to search by model or project
    perf_def = None
    for p in mm.list_performance_definitions():
        if model_obj.id in p.modelIds:
            perf_def = p
            break

    if perf_def is None:
        raise ValueError("Unable to find a performance definition for model "
                         "'%s'" % model)

    # Check where performance datasets should be uploaded
    cas_id = perf_def['casServerId']
    caslib = perf_def['dataLibrary']
    table_prefix = perf_def['dataPrefix']

    # All input variables must be present
    missing_cols = [
        col for col in perf_def.inputVariables if col not in data.columns
    ]
    if len(missing_cols):
        raise ValueError(
            "The following columns were expected but not found in "
            "the data set: %s" % ', '.join(missing_cols))

    # If CAS is not executing the model then the output variables must also be
    # provided
    if not perf_def.scoreExecutionRequired:
        missing_cols = [
            col for col in perf_def.outputVariables if col not in data.columns
        ]
        if len(missing_cols):
            raise ValueError(
                "The following columns were expected but not found in the data "
                "set: %s" % ', '.join(missing_cols))

    sess = current_session()
    url = '{}://{}/{}-http/'.format(sess._settings['protocol'], sess.hostname,
                                    cas_id)
    regex = r'{}_(\d)_.*_{}'.format(table_prefix, model_obj.id)

    # Save the current setting before overwriting
    orig_sslreqcert = os.environ.get('SSLREQCERT')

    # If SSL connections to microservices are not being verified, don't attempt
    # to verify connections to CAS - most likely certs are not in place.
    if not sess.verify:
        os.environ['SSLREQCERT'] = 'no'

    # Upload the performance data to CAS
    with swat.CAS(url,
                  username=sess.username,
                  password=sess._settings['password']) as s:

        s.setsessopt(messagelevel='warning')

        with swat.options(exception_on_severity=2):
            caslib_info = s.table.tableinfo(caslib=caslib)

        all_tables = getattr(caslib_info, 'TableInfo', None)
        if all_tables is not None:
            # Find tables with similar names
            perf_tables = all_tables.Name.str.extract(regex,
                                                      flags=re.IGNORECASE,
                                                      expand=False)

            # Get last-used sequence number
            last_seq = perf_tables.dropna().astype(int).max()
            next_seq = 1 if math.isnan(last_seq) else last_seq + 1
        else:
            next_seq = 1

        table_name = '{prefix}_{sequence}_{label}_{model}'.format(
            prefix=table_prefix,
            sequence=next_seq,
            label=label,
            model=model_obj.id)

        with swat.options(exception_on_severity=2):
            # Table must be promoted so performance jobs can access.
            tbl = s.upload(data,
                           casout=dict(name=table_name,
                                       caslib=caslib,
                                       promote=True)).casTable

    # Restore the original value
    if orig_sslreqcert is not None:
        os.environ['SSLREQCERT'] = orig_sslreqcert

    # Execute the definition if requested
    if refresh:
        mm.execute_performance_definition(perf_def)

    return tbl
예제 #3
0
def create_package_from_astore(table):
    """Create an importable model package from an ASTORE.

    Parameters
    ----------
    table : swat.CASTable
        The CAS table containing the ASTORE.

    Returns
    -------
    BytesIO
        A byte stream representing a ZIP archive which can be imported.

    See Also
    --------
    :meth:`model_repository.import_model_from_zip <.ModelRepository.import_model_from_zip>`

    """
    if swat is None:
        raise RuntimeError("The 'swat' package is required to work with "
                           "ASTORE models.")

    assert isinstance(table, swat.CASTable)

    sess = table.session.get_connection()
    sess.loadactionset('astore')

    result = sess.astore.describe(rstore=table, epcode=True)
    astore = sess.astore.download(rstore=table)
    if not hasattr(astore, "blob"):
        raise ValueError("Failed to download binary data for ASTORE '%s'." %
                         astore)
    astore = astore.blob

    astore = bytes(astore)  # Convert from SWAT blob type

    # Raise error if describe action fails
    if result.status_code != 0:
        raise RuntimeError(result)

    astore_key = result.Key.Key[0].strip()
    ep_ds2 = result.epcode
    package_ds2 = _generate_package_code(result)
    model_properties = _get_model_properties(result)
    input_vars = [
        get_variable_properties(var)
        for var in result.InputVariables.itertuples()
    ]
    output_vars = [
        get_variable_properties(var)
        for var in result.OutputVariables.itertuples()
    ]
    astore_filename = '_' + uuid.uuid4().hex[:25].upper()

    # Copy the ASTORE table to the ModelStore.
    # Raise an error if the action fails
    with swat.options(exception_on_severity=2):
        table.save(name=astore_filename, caslib='ModelStore', replace=True)

    file_metadata = [{
        'role': 'analyticStore',
        'name': ''
    }, {
        'role': 'score',
        'name': 'dmcas_packagescorecode.sas'
    }]

    astore_metadata = [{
        'name':
        astore_filename,
        'caslib':
        'ModelStore',
        'uri':
        '/dataTables/dataSources/cas~fs~cas-shared-default~fs~ModelStore/tables/{}'
        .format(astore_filename),
        'key':
        astore_key
    }]

    zip_file = _build_zip_from_files({
        'dmcas_packagescorecode.sas':
        '\n'.join(package_ds2),
        'dmcas_epscorecode.sas':
        ep_ds2,
        astore_filename:
        astore,
        'ModelProperties.json':
        model_properties,
        'fileMetadata.json':
        file_metadata,
        'AstoreMetadata.json':
        astore_metadata,
        'inputVar.json':
        input_vars,
        'outputVar.json':
        output_vars
    })

    return zip_file
예제 #4
0
def create_package_from_astore(table):
    if swat is None:
        raise RuntimeError("The 'swat' package is required to work with ASTORE models.")

    assert isinstance(table, swat.CASTable)

    sess = table.session.get_connection()
    sess.loadactionset('astore')

    result = sess.astore.describe(rstore=table, epcode=True)
    astore = sess.astore.download(rstore=table).blob
    astore = bytes(astore)      # Convert from SWAT blob type

    # Raise error if describe action fails
    if result.status_code != 0:
        raise RuntimeError(result)

    astore_key = result.Key.Key[0].strip()
    ds2 = _generate_package_code(result)
    model_properties = _get_model_properties(result)
    input_vars = [get_variable_properties(var) for var in result.InputVariables.itertuples()]
    output_vars = [get_variable_properties(var) for var in result.OutputVariables.itertuples()]
    astore_filename = '_' + uuid.uuid4().hex[:25].upper()

    # Copy the ASTORE table to the ModelStore.
    # Raise an error if the action fails
    with swat.options(exception_on_severity=2):
        table.save(name=astore_filename, caslib='ModelStore', replace=True)

    file_metadata = [{'role': 'analyticStore', 'name': ''},
                     {'role': 'score', 'name': 'dmcas_packagescorecode.sas'}]

    astore_metadata = [{'name': astore_filename,
                        'caslib': 'ModelStore',
                        'uri': '/dataTables/dataSources/cas~fs~cas-shared-default~fs~ModelStore/tables/{}'.format(astore_filename),
                        'key': astore_key}]

    try:
        # Create a temp folder
        folder = tempfile.mkdtemp()

        # Closure for easily adding JSON files
        def json_file(data, filename):
            filename = os.path.join(folder, filename)
            with open(filename, 'w') as f:
                json.dump(data, f, indent=1)

        filename = os.path.join(folder, 'dmcas_packagescorecode.sas')
        with open(filename, 'w') as f:
            f.write('\n'.join(ds2))

        filename = os.path.join(folder, astore_filename)
        with open(filename, 'wb') as f:
            f.write(astore)

        json_file(model_properties, 'ModelProperties.json')
        json_file(file_metadata, 'fileMetadata.json')
        json_file(astore_metadata, 'AstoreMetadata.json')
        json_file(input_vars, 'inputVar.json')
        json_file(output_vars, 'outputVar.json')

        files = os.listdir(folder)

        with zipfile.ZipFile(os.path.join(folder, 'model.zip'), 'w') as z:
            for file in files:
                z.write(os.path.join(folder, file), file)

        # Need to return the ZIP file data but also need to ensure the directory is cleaned up.
        # Read the bytes from disk and return an in memory "file".
        with open(os.path.join(folder, 'model.zip'), 'rb') as z:
            return io.BytesIO(z.read())

    finally:
        shutil.rmtree(folder)