Example #1
0
def test_modelpipeline_classmethod_constructor(mock_future,
                                               container_response_stub):
    mock_client = mock.Mock()
    mock_client.scripts.get_containers.return_value = \
        container = container_response_stub
    mock_client.credentials.get.return_value = Response({'name': 'Token'})

    resources = {
        'REQUIRED_CPU': 1000,
        'REQUIRED_MEMORY': 9999,
        'REQUIRED_DISK_SPACE': -20
    }

    # test everything is working fine
    mp = _model.ModelPipeline.from_existing(1, 1, client=mock_client)
    assert isinstance(mp, _model.ModelPipeline)
    assert mp.dependent_variable == [container.arguments['TARGET_COLUMN']]
    assert mp.primary_key == container.arguments['PRIMARY_KEY']
    excluded = container.arguments.get('EXCLUDE_COLS', None)
    assert mp.excluded_columns == excluded.split() if excluded else None
    assert mp.model == container.arguments['MODEL']
    assert mp.calibration == container.arguments['CALIBRATION']
    assert mp.cv_params == json.loads(container.arguments['CVPARAMS'])
    assert mp.parameters == json.loads(container.arguments['PARAMS'])
    assert mp.job_resources == resources
    assert mp.model_name == container.name[:-6]
    assert mp.notifications == {
        camel_to_snake(key): val
        for key, val in container.notifications.items()
    }
    deps = container.arguments.get('DEPENDENCIES', None)
    assert mp.dependencies == deps.split() if deps else None
    assert mp.git_token_name == 'Token'
Example #2
0
    def __init__(self, json_data, snake_case=True, headers=None):
        self.json_data = json_data
        if headers is not None:
            # this circumvents recursive calls
            self.headers = headers
            self.calls_remaining = headers.get('X-RateLimit-Remaining')
            self.rate_limit = headers.get('X-RateLimit-Limit')

        # Keys to update for this response object.
        self_updates = {}

        if json_data is not None:
            for key, v in json_data.items():
                if snake_case:
                    key = camel_to_snake(key)

                if isinstance(v, dict):
                    val = Response(v, False)
                elif isinstance(v, list):
                    val = [
                        Response(o) if isinstance(o, dict) else o for o in v
                    ]
                else:
                    val = v

                self_updates[key] = val

        self.update(self_updates)
        # Update self.__dict__ at the end to avoid replacing the update method.
        self.__dict__.update(self_updates)
Example #3
0
def test_camel_to_snake():
    test_cases = [
        ('CAMELCase', 'camel_case'),
        ('camelCase', 'camel_case'),
        ('CamelCase', 'camel_case'),
        ('c__amel', 'c__amel'),
    ]
    for in_word, out_word in test_cases:
        assert camel_to_snake(in_word) == out_word
Example #4
0
def name_and_type_doc(name, prop, child, level, optional=False):
    """ Create a doc string element that includes a parameter's name
    and its type. This is intented to be combined with another
    doc string element that gives a description of the parameter.
    """
    prop_type = property_type(prop)
    snake_name = camel_to_snake(name)
    indent = " " * level * 4
    dash = "- " if level > 0 else ""
    colons = "::" if child else ""
    opt_str = ", optional" if optional else ""
    doc = "{}{}{} : {}{}{}"
    return doc.format(indent, dash, snake_name, prop_type, opt_str, colons)
Example #5
0
def parse_param_body(parameter):
    """ Parse the nested element of a parameter into a list of dictionaries
    which can be used to add the parameter to a dynamically generated
    function.
    """
    schema = parameter['schema']
    properties = schema['properties']
    req = schema.get('required', [])
    arguments = []
    for name, prop in properties.items():
        snake_name = camel_to_snake(name)
        is_req = name in req
        doc_list = docs_from_property(name, prop, properties, 0, not is_req)
        doc = "\n".join(doc_list) + "\n"
        a = {"name": snake_name, "in": "body", "required": is_req, "doc": doc}
        arguments.append(a)
    return arguments
Example #6
0
def parse_param(param):
    """ Parse a parameter into a list of dictionaries which can
    be used to add the parameter to a dynamically generated function.
    """
    doc = ""
    args = []
    param_in = param['in']
    if param_in == 'body':
        body_args = parse_param_body(param)
        args.extend(body_args)
    else:
        snake_name = camel_to_snake(param['name'])
        req = param['required']
        doc = doc_from_param(param)
        a = {"name": snake_name, "in": param_in, "required": req, "doc": doc}
        args.append(a)
    return args
Example #7
0
def doc_from_param(param):
    """ Return a doc string element for a single parameter.
    Intended to be joined with other doc string elements to
    form a complete docstring of the accepted parameters of
    a function.
    """
    snake_name = camel_to_snake(param['name'])
    param_type = param['type']
    desc = param.get('description')
    optional = "" if param["required"] else ", optional"
    doc_body = ""
    if desc:
        indent = " " * 4
        doc_wrap = textwrap.fill(desc,
                                 initial_indent=indent,
                                 subsequent_indent=indent,
                                 width=79)
        doc_body += doc_wrap
        doc_body += "\n"
    doc_head = "{} : {}{}\n".format(snake_name, param_type, optional)
    return doc_head + doc_body
Example #8
0
    def __init__(self, json_data, snake_case=True, headers=None):
        self.json_data = json_data
        if headers is not None:
            # this circumvents recursive calls
            self.headers = headers
            self.calls_remaining = headers.get('X-RateLimit-Remaining')
            self.rate_limit = headers.get('X-RateLimit-Limit')

        for key, v in json_data.items():
            if snake_case:
                key = camel_to_snake(key)

            if isinstance(v, dict):
                val = Response(v, False)
            elif isinstance(v, list):
                val = [Response(o) if isinstance(o, dict) else o for o in v]
            else:
                val = v

            self.update({key: val})
            self.__dict__.update({key: val})
Example #9
0
    def from_existing(cls, train_job_id, train_run_id='latest', client=None):
        """Create a :class:`ModelPipeline` object from existing model IDs

        Parameters
        ----------
        train_job_id : int
            The ID of the CivisML job in the Civis Platform
        train_run_id : int or string, optional
            Location of the model run, either

            * an explicit run ID,
            * "latest" : The most recent run
            * "active" : The run designated by the training job's
              "active build" parameter
        client : :class:`~civis.APIClient`, optional
            If not provided, an :class:`~civis.APIClient` object will be
            created from the :envvar:`CIVIS_API_KEY`.

        Returns
        -------
        :class:`~civis.ml.ModelPipeline`
            A :class:`~civis.ml.ModelPipeline` which refers to
            a previously-trained model

        Examples
        --------
        >>> from civis.ml import ModelPipeline
        >>> model = ModelPipeline.from_existing(job_id)
        >>> model.train_result_.metrics['roc_auc']
        0.843
        """
        train_job_id = int(train_job_id)  # Convert np.int to int
        if client is None:
            client = APIClient(resources='all')
        train_run_id = _decode_train_run(train_job_id, train_run_id, client)
        try:
            fut = ModelFuture(train_job_id, train_run_id, client=client)
            container = client.scripts.get_containers(train_job_id)
        except CivisAPIError as api_err:
            if api_err.status_code == 404:
                msg = ('There is no Civis Platform job with '
                       'script ID {} and run ID {}!'.format(train_job_id,
                                                            train_run_id))
                six.raise_from(ValueError(msg), api_err)
            raise

        args = container.arguments

        # Older templates used "WORKFLOW" instead of "MODEL"
        model = args.get('MODEL', args.get('WORKFLOW'))
        dependent_variable = args['TARGET_COLUMN'].split()
        primary_key = args.get('PRIMARY_KEY')
        parameters = json.loads(args.get('PARAMS', "{}"))
        cross_validation_parameters = json.loads(args.get('CVPARAMS', "{}"))
        calibration = args.get('CALIBRATION')
        excluded_columns = args.get('EXCLUDE_COLS', None)
        if excluded_columns:
            excluded_columns = excluded_columns.split()
        cpu_requested = args.get('REQUIRED_CPU')
        memory_requested = args.get('REQUIRED_MEMORY')
        disk_requested = args.get('REQUIRED_DISK_SPACE')
        name = container.name
        if name.endswith(' Train'):
            # Strip object-applied suffix
            name = name[:-len(' Train')]
        notifications = {camel_to_snake(key): val for key, val
                         in container.notifications.items()}
        dependencies = args.get('DEPENDENCIES', None)
        if dependencies:
            dependencies = dependencies.split()
        git_token_name = args.get('GIT_CRED', None)
        if git_token_name:
            git_token_name = client.credentials.get(git_token_name).name

        klass = cls(model=model,
                    dependent_variable=dependent_variable,
                    primary_key=primary_key,
                    model_name=name,
                    parameters=parameters,
                    cross_validation_parameters=cross_validation_parameters,
                    calibration=calibration,
                    excluded_columns=excluded_columns,
                    client=client,
                    cpu_requested=cpu_requested,
                    disk_requested=disk_requested,
                    memory_requested=memory_requested,
                    notifications=notifications,
                    dependencies=dependencies,
                    git_token_name=git_token_name,
                    verbose=args.get('DEBUG', False))
        klass.train_result_ = fut

        # Set prediction template corresponding to training template
        template_id = int(container['from_template_id'])
        p_id = _PRED_TEMPLATES.get(template_id)
        if p_id is None:
            warnings.warn('Model %s was trained with a newer version of '
                          'CivisML than is available in the API client '
                          'version %s. Please update your API client version. '
                          'Attempting to use an older version of the '
                          'prediction code. Prediction will either fail '
                          'immediately or succeed.'
                          % (train_job_id, __version__), RuntimeWarning)
            p_id = max(_PRED_TEMPLATES.values())
        klass.predict_template_id = p_id

        return klass