Exemple #1
0
    def test_create_project(self):
        # Count the current # of projects in the environment
        projects = mr.list_projects()
        assert isinstance(projects, list)
        project_count = len(projects)

        # Create a new project
        repo = mr.default_repository().get('id')

        project = mr.create_project(self.project_name, repo)
        assert isinstance(project, RestObj)

        # Total number of projects should have increased
        projects = mr.list_projects()
        assert len(projects) == project_count + 1
Exemple #2
0
def register_model(model,
                   name,
                   project,
                   repository=None,
                   input=None,
                   version='latest',
                   files=None,
                   force=False):
    """Register a model in the model repository.

    Parameters
    ----------
    model : swat.CASTable or sklearn.BaseEstimator
        The model to register.  If an instance of ``swat.CASTable`` the table is assumed to hold an ASTORE, which will
        be downloaded and used to construct the model to register.  If a scikit-learn estimator, the model will be
        pickled and uploaded to the registry and score code will be generated for publishing the model to MAS.
    name : str
        Designated name for the model in the repository.
    project : str or dict
        The name or id of the project, or a dictionary representation of the project.
    repository : str or dict, optional
        The name or id of the repository, or a dictionary representation of the repository.  If omitted, the default
        repository will be used.
    input
    version : {'new', 'latest', int}, optional
        Version number of the project in which the model should be created.
    files :
    force : bool, optional
        Create dependencies such as projects and repositories if they do not already exist.

    Returns
    -------
    model : RestObj
        The newly registered model as an instance of ``RestObj``

    Notes
    -----
    If the specified model is a CAS table the model data and metadata will be written to a temporary zip file and then
    imported using model_repository.import_model_from_zip.

    If the specified model is from the Scikit-Learn package, the model will be created using
    model_repository.create_model and any additional files will be uploaded as content.

    Examples
    --------

    """

    # TODO: Create new version if model already exists
    # TODO: Allow file info to be specified
    # TODO: Performance stats

    files = files or []

    # Find the project if it already exists
    p = mr.get_project(project) if project is not None else None

    # Do we need to create the project first?
    create_project = True if p is None and force else False

    if p is None and not create_project:
        raise ValueError("Project '{}' not found".format(project))

    repository = mr.default_repository(
    ) if repository is None else mr.get_repository(repository)

    # Unable to find or create the repo.
    if repository is None:
        raise ValueError("Unable to find repository '{}'".format(repository))

    # If model is a CASTable then assume it holds an ASTORE model.
    # Import these via a ZIP file.
    if 'swat.cas.table.CASTable' in str(type(model)):
        zipfile = utils.create_package_from_astore(model)

        if create_project:
            project = mr.create_project(project, repository)

        model = mr.import_model_from_zip(name,
                                         project,
                                         zipfile,
                                         version=version)
        return model

    # If the model is an scikit-learn model, generate the model dictionary from it and pickle the model for storage
    elif all(
            hasattr(model, attr)
            for attr in ['_estimator_type', 'get_params']):
        # Pickle the model so we can store it
        model_pkl = pickle.dumps(model)
        files.append({
            'name': 'model.pkl',
            'file': model_pkl,
            'role': 'Python Pickle'
        })

        # Extract model properties
        model = _sklearn_to_dict(model)
        model['name'] = name

        # Generate PyMAS wrapper
        try:
            mas_module = from_pickle(model_pkl,
                                     'predict',
                                     input_types=input,
                                     array_input=True)
            assert isinstance(mas_module, PyMAS)

            # Include score code files from ESP and MAS
            files.append({
                'name': 'dmcas_packagescorecode.sas',
                'file': mas_module.score_code(),
                'role': 'Score Code'
            })
            files.append({
                'name': 'dmcas_espscorecode.sas',
                'file': mas_module.score_code(dest='ESP'),
                'role': 'Score Code'
            })

            model['inputVariables'] = [
                var.as_model_metadata() for var in mas_module.variables
                if not var.out
            ]
            model['outputVariables'] = [
                var.as_model_metadata() for var in mas_module.variables
                if var.out
            ]
        except ValueError:
            # PyMAS creation failed, most likely because input data wasn't provided
            warnings.warn(
                'Unable to determine input/output variables.  Model variables will not be specified.'
            )

    else:
        # Otherwise, the model better be a dictionary of metadata
        assert isinstance(model, dict)

    if create_project:
        vars = model.get('inputVariables', []) + model.get(
            'outputVariables', [])
        target_level = 'Interval' if model.get(
            'function') == 'Regression' else None
        project = mr.create_project(project,
                                    repository,
                                    variables=vars,
                                    targetLevel=target_level)

    model = mr.create_model(model, project)

    assert isinstance(model, RestObj)

    # Upload any additional files
    for file in files:
        if isinstance(file, dict):
            mr.add_model_content(model, **file)
        else:
            mr.add_model_content(model, file)

    return model