def execute_module_step(module, step, return_dict=True, **kwargs): module_name = module.name if hasattr(module, 'name') else str(module) module = get_module(module) if module is None: raise ValueError("Module '{}' was not found.".format(module_name)) module = module.id step = step.id if hasattr(step, 'id') else step body = {'inputs': [{'name': k, 'value': v} for k, v in kwargs.items()]} r = post(SERVICE_ROOT + '/modules/{}/steps/{}'.format(module, step), json=body) # Convert list of name/value pair dictionaries to single dict outputs = OrderedDict() for output in r.get('outputs', []): k, v = output['name'], output.get('value') # Remove padding from CHAR columns if isinstance(v, str): v = v.strip() outputs[k] = v if return_dict: # Return results as k=v pairs return outputs else: # Return only the values, as if calling another Python function. outputs = tuple(outputs.values()) if len(outputs) == 1: return outputs[0] else: return outputs
def import_model_from_zip(name, project, file, description=None, version='latest'): # TODO: Allow import into folder if no project is given # TODO: Create new version if model already exists project = get_project(project) if project is None: raise ValueError('Project `%s` could not be found.' % str(project)) params = { 'name': name, 'description': description, 'type': 'ZIP', 'projectId': project.id, 'versionOption': version } params = '&'.join(['{}={}'.format(k, v) for k, v in params.items()]) r = post(ROOT_PATH + '/models#octetStream', data=file.read(), params=params, headers={'Content-Type': 'application/octet-stream'}) return r
def create_folder(name, parent=None, description=None): """ Parameters ---------- name : str The name of the new folder parent : str or dict, optional The parent folder for this folder, if any. Can be a folder name, id, or dict response from get_folder description : str, optional A description of the folder Returns ------- """ parent = get_folder(parent) body = { 'name': name, 'description': description, 'folderType': 'folder', 'parentFolderUri': parent.id if parent else None } return post( _SERVICE_ROOT + '/folders', json=body, headers={'Content-Type': 'application/vnd.sas.content.folder+json'})
def create_performance_definition(model, library_name, table_name, name=None, description=None, cas_server=None): from .model_repository import get_model, get_project model = get_model(model) project = get_project(model.projectId) # Performance data cannot be captured unless certain project properties have been configured. for required in ['targetVariable', 'targetLevel', 'predictionVariable']: if getattr(project, required, None) is None: raise ValueError("Project %s must have the '%s' property set." % (project.name, required)) request = { 'projectId': project.id, 'modelIds': [model.id], 'name': name or model.name + ' Performance', 'description': description or 'Performance definition for model ' + model.name, 'casServerId': cas_server or 'cas-shared-default', 'resultLibrary': 'ModelPerformanceData', 'dataLibrary': library_name, 'dataTable': table_name } # If model doesn't specify input/output variables, try to pull from project definition if len(model.get('inputVariables', [])) > 0: request['inputVariables'] = [ v.get('name') for v in model['inputVariables'] ] request['outputVariables'] = [ v.get('name') for v in model['outputVariables'] ] else: request['inputVariables'] = [ v.get('name') for v in project.get('variables', []) if v.get('role') == 'input' ] request['outputVariables'] = [ v.get('name') for v in project.get('variables', []) if v.get('role') == 'output' ] return post(SERVICE_ROOT + '/performanceTasks', json=request, headers={ 'Content-Type': 'application/vnd.sas.models.performance.task+json' })
def create_project(project, repository, **kwargs): if isinstance(project, str): project = {'name': project} repository = get_repository(repository) project['repositoryId'] = repository['id'] project['folderId'] = repository['folderId'] project.update(kwargs) return post( ROOT_PATH + '/projects', json=project, headers={'Content-Type': 'application/vnd.sas.models.project+json'})
def create_file(file, folder=None, filename=None, expiration=None): """Create a new file on the server by uploading a local file. Parameters ---------- file : str or file_like Path to the file to upload or a file-like object. folder : str or dict, optional Name, or, or folder information as returned by :func:`.get_folder`. filename : str, optional Name to assign to the uploaded file. Defaults to the filename if `file` is a path, otherwise required. expiration : datetime, optional A timestamp that indicates when to expire the file. Defaults to no expiration. Returns ------- RestObj A dictionary containing the file attributes. """ if isinstance(file, six.string_types): filename = filename or os.path.splitext(os.path.split(file)[1])[0] with open(file, 'rb') as f: file = f.read() else: if filename is None: raise ValueError( '`filename` must be specified if `file` is not a path.') file = file.read() params = {} if folder is not None: _folder = get_folder(folder) if _folder is None: raise ValueError("Folder '%s' could not be found." % folder) else: params['parentFolderUri'] = get_link(_folder, 'self')['href'] if expiration is not None: pass # TODO: add 'expirationTimeStamp' to params. Need to determine correct format return post(_SERVICE_ROOT + '/files#multipartUpload', files={filename: file}, params=params)
def add_model_content(model, file, name=None, role=None): if is_uuid(model): id = model elif isinstance(model, dict) and 'id' in model: id = model['id'] else: model = get_model(model) id = model['id'] metadata = {'role': role} if name is not None: metadata['name'] = name return post(ROOT_PATH + '/models/{}/contents'.format(id), files={name: file}, data=metadata)
def create_module(name=None, description=None, source=None, language='python', scope='public'): """ Parameters ---------- name : str description : str source : str language : str { 'python', 'ds2' } scope : str { 'public', 'private' } Returns ------- """ if source is None: raise ValueError('The `source` parameter is required.') else: source = str(source) if language == 'python': t = 'text/x-python' elif language == 'ds2': t = 'text/vnd.sas.source.ds2' else: raise ValueError('Unrecognized source code language `%s`.' % language) data = { 'id': name, 'type': t, 'description': description, 'source': source, 'scope': scope } r = post(SERVICE_ROOT + '/modules', json=data) return r
def publish_model(model, destination, name=None, force=False): from .model_repository import get_model, get_model_link model_obj = get_model(model) if model_obj is None: model_name = model.name if hasattr(model, 'name') else str(model) raise ValueError("Model '{}' was not found.".format(model_name)) model_uri = get_model_link(model_obj, 'self') # TODO: Verify allowed formats by destination type. # As of 19w04 MAS throws HTTP 500 if name is in invalid format. model_name = name or '{}_{}'.format(model_obj['name'].replace(' ', ''), model_obj['id']).replace('-', '') request = { "name": model_obj.get('name'), "notes": model_obj.get('description'), "modelContents": [{ "modelName": _publish_name(model_obj.get('name')), "sourceUri": model_uri.get('uri'), "publishLevel": "model" }], "destinationName": destination } # Publishes a model that has already been registered in the model repository. # Unlike model_publish service, does not require Code to be specified. r = post( SERVICE_ROOT + '/publish', json=request, params=dict(force=force), headers={ 'Content-Type': 'application/vnd.sas.models.publishing.request.asynchronous+json' }) return r
def publish_model(model, destination, name=None, code=None, notes=None): from .model_repository import get_model, get_model_link code_types = { 'ds2package': 'ds2', 'datastep': 'datastep', '': '' } model = get_model(model) model_uri = get_model_link(model, 'self') # Get score code from registry if no code specified if code is None: code_link = get_model_link(model, 'scoreCode', True) if code_link: code = get(code_link['href']) request = dict( name=name or model.get('name'), notes=notes, destinationName=destination, ) modelContents = { 'modelName': model.get('name'), 'modelId': model.get('id'), 'sourceUri': model_uri.get('href'), 'publishLevel': 'model', # ?? What are the options? 'codeType': code_types[model.get('scoreCodeType', '').lower()], 'codeUri': '', # ?? Not needed if code is specified? 'code': code } request['modelContents'] = [modelContents] return post(ROOT_PATH + '/models', json=request, headers={'Content-Type': 'application/vnd.sas.models.publishing.request+json'})
def create_project(name, description=None, image=None): """ Parameters ---------- name : str description : str image : str URI of an image to use as the project avatar Returns ------- RestObj """ body = {'name': name, 'description': description, 'imageUri': image } return post(ROOT_PATH + '/projects', json=body, headers={'Content-Type': 'application/vnd.sas.project+json'})
def parse_documents(documents, caslib=None, id_column=None, text_column=None, description=None, standard_entities=False, noun_groups=False, min_doc_count=10, concept_model=None, output_postfix=None, spell_check=False, override_list=None, stop_list=None, start_list=None, synonym_list=None, language='en'): """Performs natural language parsing on the input data. Creates a text parsing job that executes asynchronously. There are two different interactions for parsing: parsing documents in CAS tables and parsing documents that are uploaded directly. Parameters ---------- documents : str or dict or list_like: Documents to parse. May be either the URI to a CAS table where the documents are currently stored, or an iterable of strings containing the documents' text. caslib : str or dict, optional URI of a caslib in which the documents will be stored. Required if `documents` is a list of strings. id_column : str, optional The column in `documents` that contains a unique id for each document. Required if `documents` is a CAS table URI. text_column : str, optional The column in `documents` that contains the document text to parse. Required if `documents` is a CAS table URI. description : str, optional Description to add to the text parsing job. standard_entities : bool, optional noun_groups : bool, optional min_doc_count : int, optional Minimum number of documents in which a term must appear to be kept. Defaults to 10. output_postfix : str, optional Text to be added to the end of all output table names. spell_check : bool, optional Whether spell checking should be performed during parsing. concept_model : str or dict, optional URI of a table containing the concept LITI binaries to apply during parsing. override_list : str or dict, optional URI of a table containing overrides for the keep and drop terms. language : str, optional Two letter `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639>`_ code indicating the source language. Defaults to 'en'. Returns ------- RestObj The submitted job See Also -------- :func:`.cas_management.get_caslib` :func:`.cas_management.get_table` """ if documents is None: raise TypeError('`documents` cannot be None.') if isinstance(documents, (dict, six.string_types)): data = { "inputUri": uri_as_str(documents), "documentIdVariable": id_column, "textVariable": text_column, "version": 1 } else: data = { 'caslibUri': uri_as_str(caslib), 'documents': documents, 'version': 1 } data.update({ "description": description, "language": language, "includeStandardEntities": standard_entities, "includeNounGroups": noun_groups, "startListUri": uri_as_str(start_list), "stopListUri": uri_as_str(stop_list), "synonymListUri": uri_as_str(synonym_list), "minimumDocumentCount": min_doc_count, "conceptModelUri": uri_as_str(concept_model), "outputTableNamePostfix": output_postfix, "enableSpellChecking": spell_check, "overrideListUri": uri_as_str(override_list), }) # Optional fields are not ignored if None so explicitly remove before # sending. for k in list(data.keys()): if data[k] is None: del data[k] url = _SERVICE_ROOT + '/jobs' # Update URL if passing in raw documents. if 'documents' in data: url += '#data' headers = { 'Content-Type': 'application/vnd.sas.text.parsing.job.request.documents+json', 'Accept': 'application/vnd.sas.text.parsing.job+json' } else: headers = { 'Content-Type': 'application/vnd.sas.text.parsing.job.request+json', 'Accept': 'application/vnd.sas.text.parsing.job+json' } return post(url, json=data, headers=headers)
def create_model(model, project, description=None, modeler=None, function=None, algorithm=None, tool=None, is_champion=False, properties={}, **kwargs): """ Parameters ---------- model project description : str, optional modeler : str, optional Name of the user that created the model. Current user name will be used if unspecified. function algorithm tool modeler scoreCodeType trainTable classificationEventProbabilityVariableName classificationTargetEventValue champion (T/F) role location targetVariable projectId, projectName, projectVersionId, projectVersionName??? suggestedChampion (T/F) retrainable immutable modelVersionName properties (custom properties) name value type inputVariables - outputVariables - properties kwargs Returns ------- """ if isinstance(model, str): model = {'name': model} assert isinstance(model, dict) p = get_project(project) if p is None: raise ValueError("Unable to find project '%s'" % project) model['projectId'] = p['id'] model['modeler'] = modeler or current_session().user model['description'] = description or model.get('description') model['function'] = function or model.get('function') model['algorithm'] = algorithm or model.get('algorithm') model['tool'] = tool or model.get('tool') model['champion'] = is_champion or model.get('champion') model['role'] = 'Champion' if model.get('champion', False) else 'Challenger' model['description'] = description or model.get('description') model.setdefault('properties', [{ 'name': k, 'value': v } for k, v in properties.items()]) # TODO: add kwargs (pop) # model.update(kwargs) return post( ROOT_PATH + '/models', json=model, headers={'Content-Type': 'application/vnd.sas.models.model+json'})