def test_modelpipeline_classmethod_constructor(mock_future, container_response_stub): mock_client = mock.Mock() mock_client.scripts.get_containers.return_value = \ container = container_response_stub mock_client.credentials.get.return_value = Response({'name': 'Token'}) resources = { 'REQUIRED_CPU': 1000, 'REQUIRED_MEMORY': 9999, 'REQUIRED_DISK_SPACE': -20 } # test everything is working fine mp = _model.ModelPipeline.from_existing(1, 1, client=mock_client) assert isinstance(mp, _model.ModelPipeline) assert mp.dependent_variable == [container.arguments['TARGET_COLUMN']] assert mp.primary_key == container.arguments['PRIMARY_KEY'] excluded = container.arguments.get('EXCLUDE_COLS', None) assert mp.excluded_columns == excluded.split() if excluded else None assert mp.model == container.arguments['MODEL'] assert mp.calibration == container.arguments['CALIBRATION'] assert mp.cv_params == json.loads(container.arguments['CVPARAMS']) assert mp.parameters == json.loads(container.arguments['PARAMS']) assert mp.job_resources == resources assert mp.model_name == container.name[:-6] assert mp.notifications == { camel_to_snake(key): val for key, val in container.notifications.items() } deps = container.arguments.get('DEPENDENCIES', None) assert mp.dependencies == deps.split() if deps else None assert mp.git_token_name == 'Token'
def __init__(self, json_data, snake_case=True, headers=None): self.json_data = json_data if headers is not None: # this circumvents recursive calls self.headers = headers self.calls_remaining = headers.get('X-RateLimit-Remaining') self.rate_limit = headers.get('X-RateLimit-Limit') # Keys to update for this response object. self_updates = {} if json_data is not None: for key, v in json_data.items(): if snake_case: key = camel_to_snake(key) if isinstance(v, dict): val = Response(v, False) elif isinstance(v, list): val = [ Response(o) if isinstance(o, dict) else o for o in v ] else: val = v self_updates[key] = val self.update(self_updates) # Update self.__dict__ at the end to avoid replacing the update method. self.__dict__.update(self_updates)
def test_camel_to_snake(): test_cases = [ ('CAMELCase', 'camel_case'), ('camelCase', 'camel_case'), ('CamelCase', 'camel_case'), ('c__amel', 'c__amel'), ] for in_word, out_word in test_cases: assert camel_to_snake(in_word) == out_word
def name_and_type_doc(name, prop, child, level, optional=False): """ Create a doc string element that includes a parameter's name and its type. This is intented to be combined with another doc string element that gives a description of the parameter. """ prop_type = property_type(prop) snake_name = camel_to_snake(name) indent = " " * level * 4 dash = "- " if level > 0 else "" colons = "::" if child else "" opt_str = ", optional" if optional else "" doc = "{}{}{} : {}{}{}" return doc.format(indent, dash, snake_name, prop_type, opt_str, colons)
def parse_param_body(parameter): """ Parse the nested element of a parameter into a list of dictionaries which can be used to add the parameter to a dynamically generated function. """ schema = parameter['schema'] properties = schema['properties'] req = schema.get('required', []) arguments = [] for name, prop in properties.items(): snake_name = camel_to_snake(name) is_req = name in req doc_list = docs_from_property(name, prop, properties, 0, not is_req) doc = "\n".join(doc_list) + "\n" a = {"name": snake_name, "in": "body", "required": is_req, "doc": doc} arguments.append(a) return arguments
def parse_param(param): """ Parse a parameter into a list of dictionaries which can be used to add the parameter to a dynamically generated function. """ doc = "" args = [] param_in = param['in'] if param_in == 'body': body_args = parse_param_body(param) args.extend(body_args) else: snake_name = camel_to_snake(param['name']) req = param['required'] doc = doc_from_param(param) a = {"name": snake_name, "in": param_in, "required": req, "doc": doc} args.append(a) return args
def doc_from_param(param): """ Return a doc string element for a single parameter. Intended to be joined with other doc string elements to form a complete docstring of the accepted parameters of a function. """ snake_name = camel_to_snake(param['name']) param_type = param['type'] desc = param.get('description') optional = "" if param["required"] else ", optional" doc_body = "" if desc: indent = " " * 4 doc_wrap = textwrap.fill(desc, initial_indent=indent, subsequent_indent=indent, width=79) doc_body += doc_wrap doc_body += "\n" doc_head = "{} : {}{}\n".format(snake_name, param_type, optional) return doc_head + doc_body
def __init__(self, json_data, snake_case=True, headers=None): self.json_data = json_data if headers is not None: # this circumvents recursive calls self.headers = headers self.calls_remaining = headers.get('X-RateLimit-Remaining') self.rate_limit = headers.get('X-RateLimit-Limit') for key, v in json_data.items(): if snake_case: key = camel_to_snake(key) if isinstance(v, dict): val = Response(v, False) elif isinstance(v, list): val = [Response(o) if isinstance(o, dict) else o for o in v] else: val = v self.update({key: val}) self.__dict__.update({key: val})
def from_existing(cls, train_job_id, train_run_id='latest', client=None): """Create a :class:`ModelPipeline` object from existing model IDs Parameters ---------- train_job_id : int The ID of the CivisML job in the Civis Platform train_run_id : int or string, optional Location of the model run, either * an explicit run ID, * "latest" : The most recent run * "active" : The run designated by the training job's "active build" parameter client : :class:`~civis.APIClient`, optional If not provided, an :class:`~civis.APIClient` object will be created from the :envvar:`CIVIS_API_KEY`. Returns ------- :class:`~civis.ml.ModelPipeline` A :class:`~civis.ml.ModelPipeline` which refers to a previously-trained model Examples -------- >>> from civis.ml import ModelPipeline >>> model = ModelPipeline.from_existing(job_id) >>> model.train_result_.metrics['roc_auc'] 0.843 """ train_job_id = int(train_job_id) # Convert np.int to int if client is None: client = APIClient(resources='all') train_run_id = _decode_train_run(train_job_id, train_run_id, client) try: fut = ModelFuture(train_job_id, train_run_id, client=client) container = client.scripts.get_containers(train_job_id) except CivisAPIError as api_err: if api_err.status_code == 404: msg = ('There is no Civis Platform job with ' 'script ID {} and run ID {}!'.format(train_job_id, train_run_id)) six.raise_from(ValueError(msg), api_err) raise args = container.arguments # Older templates used "WORKFLOW" instead of "MODEL" model = args.get('MODEL', args.get('WORKFLOW')) dependent_variable = args['TARGET_COLUMN'].split() primary_key = args.get('PRIMARY_KEY') parameters = json.loads(args.get('PARAMS', "{}")) cross_validation_parameters = json.loads(args.get('CVPARAMS', "{}")) calibration = args.get('CALIBRATION') excluded_columns = args.get('EXCLUDE_COLS', None) if excluded_columns: excluded_columns = excluded_columns.split() cpu_requested = args.get('REQUIRED_CPU') memory_requested = args.get('REQUIRED_MEMORY') disk_requested = args.get('REQUIRED_DISK_SPACE') name = container.name if name.endswith(' Train'): # Strip object-applied suffix name = name[:-len(' Train')] notifications = {camel_to_snake(key): val for key, val in container.notifications.items()} dependencies = args.get('DEPENDENCIES', None) if dependencies: dependencies = dependencies.split() git_token_name = args.get('GIT_CRED', None) if git_token_name: git_token_name = client.credentials.get(git_token_name).name klass = cls(model=model, dependent_variable=dependent_variable, primary_key=primary_key, model_name=name, parameters=parameters, cross_validation_parameters=cross_validation_parameters, calibration=calibration, excluded_columns=excluded_columns, client=client, cpu_requested=cpu_requested, disk_requested=disk_requested, memory_requested=memory_requested, notifications=notifications, dependencies=dependencies, git_token_name=git_token_name, verbose=args.get('DEBUG', False)) klass.train_result_ = fut # Set prediction template corresponding to training template template_id = int(container['from_template_id']) p_id = _PRED_TEMPLATES.get(template_id) if p_id is None: warnings.warn('Model %s was trained with a newer version of ' 'CivisML than is available in the API client ' 'version %s. Please update your API client version. ' 'Attempting to use an older version of the ' 'prediction code. Prediction will either fail ' 'immediately or succeed.' % (train_job_id, __version__), RuntimeWarning) p_id = max(_PRED_TEMPLATES.values()) klass.predict_template_id = p_id return klass