def create_execution(self, origin_resource, args=None, wait_time=3, retries=10): """Creates an execution from a `script` or a list of `scripts`. """ create_args = {} if args is not None: create_args.update(args) if (isinstance(origin_resource, str) or isinstance(origin_resource, dict)): # single script scripts = [origin_resource] else: scripts = origin_resource try: script_ids = [get_script_id(script) for script in scripts] except TypeError: raise Exception("A script id or a list of them is needed to create" " a script execution. %s found." % get_resource_type(origin_resource)) if all([ get_resource_type(script_id) == SCRIPT_PATH for script_id in script_ids ]): for script in scripts: check_resource(script, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A script id or a list of them is needed to create" " a script execution. %s found." % get_resource_type(origin_resource)) if len(scripts) > 1: create_args.update({"scripts": script_ids}) else: create_args.update({"script": script_ids[0]}) body = json.dumps(create_args) return self._create(self.execution_url, body)
def create_anomaly_score(self, anomaly, input_data=None, args=None, wait_time=3, retries=10): """Creates a new anomaly score. """ anomaly_id = None resource_type = get_resource_type(anomaly) if resource_type == ANOMALY_PATH: anomaly_id = get_anomaly_id(anomaly) check_resource(anomaly_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("An anomaly detector id is needed to create an" " anomaly score. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) create_args.update({"anomaly": anomaly_id}) body = json.dumps(create_args) return self._create(self.anomaly_score_url, body, verify=self.verify)
def get_fields(self, resource): """Retrieve fields used by a resource. Returns a dictionary with the fields that uses the resource keyed by Id. """ if isinstance(resource, dict) and 'resource' in resource: resource_id = resource['resource'] elif isinstance(resource, str) and get_resource_type(resource) \ in RESOURCES_WITH_FIELDS: resource_id = resource resource = self.retrieve_resource(resource, query_string=ALL_FIELDS) else: LOGGER.error("Wrong resource id") return # Tries to extract fields information from resource dict. If it fails, # a get remote call is used to retrieve the resource by id. fields = None try: fields = get_fields(resource) except KeyError: resource = self._get("%s%s" % (self.url, resource_id)) fields = get_fields(resource) return fields
def create_correlation(self, dataset, args=None, wait_time=3, retries=10): """Creates a correlation from a `dataset`. """ dataset_id = None resource_type = get_resource_type(dataset) if resource_type == DATASET_PATH: dataset_id = get_dataset_id(dataset) check_resource(dataset_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A dataset id is needed to create a" " correlation. %s found." % resource_type) create_args = {} if args is not None: create_args.update(args) create_args.update({"dataset": dataset_id}) body = json.dumps(create_args) return self._create(self.correlation_url, body)
def create_projection(self, pca, input_data=None, args=None, wait_time=3, retries=10): """Creates a new projection. The pca parameter can be a pca resource or ID """ pca_id = None resource_type = get_resource_type(pca) if resource_type != PCA_PATH: raise Exception("A PCA resource id is needed" " to create a projection. %s found." % resource_type) pca_id = get_resource_id(pca) if pca_id is not None: check_resource(pca_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) if pca_id is not None: create_args.update({ "pca": pca_id}) body = json.dumps(create_args) return self._create(self.projection_url, body, verify=self.verify)
def create_topic_distribution(self, topic_model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new topic distribution. """ topic_model_id = get_topic_model_id(topic_model) if topic_model_id is not None: check_resource(topic_model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: resource_type = get_resource_type(topic_model) raise Exception("A topic model id is needed to create a" " topic distribution. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data, "topicmodel": topic_model_id}) body = json.dumps(create_args) return self._create(self.topic_distribution_url, body, verify=self.verify_prediction)
def check_model_fields(model): """Checks the model structure to see whether it contains the required fields information """ inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model') if check_model_structure(model, inner_key): model = model.get('object', model) fields = model.get("fields", model.get(inner_key, {}).get('fields')) input_fields = model.get("input_fields") # models only need model_fields to work. The rest of resources will # need all fields to work model_fields = list(model.get(inner_key, {}).get( \ 'model_fields', {}).keys()) # fusions don't have input fields if input_fields is None and inner_key != "fusion": return False if not model_fields: fields_meta = model.get('fields_meta', \ model.get(inner_key, {}).get('fields_meta', {})) try: return fields_meta['count'] == fields_meta['total'] except KeyError: # stored old models will not have the fields_meta info, so # we return True to avoid failing in this case return True else: if fields is None: return False return all([field_id in list(fields.keys()) \ for field_id in model_fields]) return False
def create_forecast(self, time_series, input_data=None, args=None, wait_time=3, retries=10): """Creates a new forecast. """ time_series_id = get_time_series_id(time_series) resource_type = get_resource_type(time_series_id) if resource_type == TIME_SERIES_PATH and time_series_id is not None: check_resource(time_series_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A time series model id is needed to create a" " forecast. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) if time_series_id is not None: create_args.update({ "timeseries": time_series_id}) body = json.dumps(create_args) return self._create(self.forecast_url, body, verify=self.verify_prediction)
def create_association_set(self, association, input_data=None, args=None, wait_time=3, retries=10): """Creates a new association set. """ association_id = None resource_type = get_resource_type(association) if resource_type == ASSOCIATION_PATH: association_id = get_association_id(association) check_resource(association_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A association id is needed to create an" " association set. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) create_args.update({ "association": association_id}) body = json.dumps(create_args) return self._create(self.association_set_url, body, verify=self.verify)
def create_centroid(self, cluster, input_data=None, args=None, wait_time=3, retries=10): """Creates a new centroid. """ cluster_id = None resource_type = get_resource_type(cluster) if resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(cluster) check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A cluster id is needed to create a" " centroid. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) create_args.update({ "cluster": cluster_id}) body = json.dumps(create_args) return self._create(self.centroid_url, body, verify=self.verify)
def delete(self, resource, **kwargs): """Method to delete resources """ try: resource_type = get_resource_type(resource) return self.deleters[resource_type](resource, **kwargs) except KeyError: raise ValueError("%s is not a resource." % resource)
def create_model(self, origin_resource, args=None, wait_time=3, retries=10): """Creates a model from an origin_resource. Uses a remote resource to create a new model using the arguments in `args`. The allowed remote resources can be: - dataset - list of datasets - cluster In the case of using cluster id as origin_resource, a centroid must also be provided in the args argument. The first centroid is used otherwise. """ create_args = {} if args is not None: create_args.update(args) if isinstance(origin_resource, list): # mutidatasets create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries) else: resource_type = get_resource_type(origin_resource) # model from cluster and centroid if resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(origin_resource) cluster = check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if 'centroid' not in create_args: try: centroid = list(cluster['object'][ 'cluster_models'].keys())[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the model. A " "centroid id is needed in the args " "argument to generate a model from " "a cluster.") create_args.update({'cluster': cluster_id}) elif resource_type == DATASET_PATH: create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries) else: raise Exception("A dataset, list of dataset ids" " or cluster id plus centroid id are needed" " to create a" " dataset. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.model_url, body)
def check_model_structure(model, inner_key=None): """Checks the model structure to see if it contains all the main expected keys """ if inner_key is None: inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model') return (isinstance(model, dict) and 'resource' in model and model['resource'] is not None and (('object' in model and inner_key in model['object']) or inner_key in model))
def create_script(self, source_code=None, args=None, wait_time=3, retries=10): """Creates a whizzml script from its source code. The `source_code` parameter can be a: {script ID}: the ID for an existing whizzml script {path}: the path to a file containing the source code {string} : the string containing the source code for the script """ create_args = {} if args is not None: create_args.update(args) if source_code is None: raise Exception('A valid code string' ' or a script id must be provided.') resource_type = get_resource_type(source_code) if resource_type == SCRIPT_PATH: script_id = get_script_id(source_code) if script_id: check_resource(script_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({"origin": script_id}) elif isinstance(source_code, str): if is_url(source_code): script_args = retrieve_script_args(source_code) source_code = script_args.get("source_code") create_args.update(json.loads(script_args.get("json"))) else: try: if os.path.exists(source_code): with open(source_code) as code_file: source_code = code_file.read() except IOError: raise IOError("Could not open the source code file %s." % source_code) create_args.update({"source_code": source_code}) else: raise Exception("A script id or a valid source code" " is needed to create a" " script. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.script_url, body)
def get(self, resource, **kwargs): """Method to get resources """ finished = kwargs.get('finished', True) get_kwargs = filter_kwargs(kwargs, ['finished']) try: resource_type = get_resource_type(resource) resource_info = self.getters[resource_type](resource, **get_kwargs) except KeyError: raise ValueError("%s is not a resource or ID." % resource) if finished: ok_kwargs = filter_kwargs(kwargs, ['query_string']) ok_kwargs.update({"error_retries": 5}) self.ok(resource_info, **ok_kwargs) return resource_info
def create_prediction(self, model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new prediction. The model parameter can be: - a simple tree model - a simple logistic regression model - an ensemble - a deepnet . a linear regression - a fusion Note that the old `by_name` argument has been deprecated. """ model_id = None resource_type = get_resource_type(model) if resource_type not in SUPERVISED_PATHS: raise Exception("A supervised model resource id is needed" " to create a prediction. %s found." % resource_type) model_id = get_resource_id(model) if model_id is not None: check_resource(model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) if model_id is not None: create_args.update({"model": model_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, verify=self.verify_prediction)
def create_library(self, source_code=None, args=None, wait_time=3, retries=10): """Creates a whizzml library from its source code. The `source_code` parameter can be a: {library ID}: the ID for an existing whizzml library {path}: the path to a file containing the source code {string} : the string containing the source code for the library """ create_args = {} if args is not None: create_args.update(args) if source_code is None: raise Exception('A valid code string' ' or a library id must be provided.') resource_type = get_resource_type(source_code) if resource_type == LIBRARY_PATH: library_id = get_library_id(source_code) if library_id: check_resource(library_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({"origin": library_id}) elif isinstance(source_code, str): try: if os.path.exists(source_code): with open(source_code) as code_file: source_code = code_file.read() except IOError: raise IOError("Could not open the source code file %s." % source_code) create_args.update({"source_code": source_code}) else: raise Exception("A library id or a valid source code" " is needed to create a" " library. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.library_url, body)
def get_fields(resource): """Returns the field information in a resource dictionary structure """ try: resource_type = get_resource_type(resource) except ValueError: raise ValueError("Unknown resource structure. Failed to find" " a valid resource dictionary as argument.") if resource_type in RESOURCES_WITH_FIELDS: resource = resource.get('object', resource) # fields structure if resource_type in list(FIELDS_PARENT.keys()): fields = resource[FIELDS_PARENT[resource_type]].get('fields', {}) else: fields = resource.get('fields', {}) if resource_type == SAMPLE_PATH: fields = dict([(field['id'], field) for field in fields]) return fields
def __init__(self, fields, objective_id=None, data_locale=None, missing_tokens=None, terms=False, categories=False, numerics=False): if isinstance(fields, dict): try: self.objective_id = objective_id self.uniquify_varnames(fields) self.inverted_fields = invert_dictionary(fields) self.fields = {} self.fields.update(fields) if not (hasattr(self, "input_fields") and self.input_fields): self.input_fields = [field_id for field_id, field in \ sorted(list(self.fields.items()), key=lambda x: x[1].get("column_number")) \ if not self.objective_id or \ field_id != self.objective_id] self.model_fields = {} self.datetime_parents = [] self.model_fields.update( dict([(field_id, field) for field_id, field in \ list(self.fields.items()) if field_id in self.input_fields and \ self.fields[field_id].get("preferred", True)])) # if any of the model fields is a generated datetime field # we need to add the parent datetime field self.model_fields = self.add_datetime_parents() self.data_locale = data_locale self.missing_tokens = missing_tokens if self.data_locale is None: self.data_locale = DEFAULT_LOCALE if self.missing_tokens is None: self.missing_tokens = DEFAULT_MISSING_TOKENS if terms: # adding text and items information to handle terms # expansion self.term_forms = {} self.tag_clouds = {} self.term_analysis = {} self.items = {} self.item_analysis = {} if categories: self.categories = {} if terms or categories or numerics: self.add_terms(categories, numerics) if self.objective_id is not None and \ hasattr(self, "resource_id") and self.resource_id and \ get_resource_type(self.resource_id) != ENSEMBLE_PATH: # Only for models. Ensembles need their own logic self.regression = \ (not hasattr(self, "boosting") or not self.boosting) \ and self.fields[self.objective_id][ \ 'optype'] == NUMERIC \ or (hasattr(self, "boosting") and self.boosting and \ self.boosting.get("objective_class") is None) except KeyError: raise Exception("Wrong field structure.")
def create_dataset(self, origin_resource, args=None, wait_time=3, retries=10): """Creates a remote dataset. Uses a remote resource to create a new dataset using the arguments in `args`. The allowed remote resources can be: - source - dataset - list of datasets - cluster In the case of using cluster id as origin_resources, a centroid must also be provided in the args argument. The first centroid is used otherwise. If `wait_time` is higher than 0 then the dataset creation request is not sent until the `source` has been created successfuly. """ create_args = {} if args is not None: create_args.update(args) if isinstance(origin_resource, list): # mutidatasets create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries, key="origin_datasets") else: # dataset from source resource_type = get_resource_type(origin_resource) if resource_type == SOURCE_PATH: source_id = get_source_id(origin_resource) if source_id: check_resource(source_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({"source": source_id}) # dataset from dataset elif resource_type == DATASET_PATH: create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries, key="origin_dataset") # dataset from cluster and centroid elif resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(origin_resource) cluster = check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if 'centroid' not in create_args: try: centroid = list(cluster['object'] ['cluster_datasets_ids'].keys())[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the dataset. A " "centroid id is needed in the args " "argument to generate a dataset from " "a cluster.") create_args.update({'cluster': cluster_id}) else: raise Exception("A source, dataset, list of dataset ids" " or cluster id plus centroid id are needed" " to create a" " dataset. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.dataset_url, body)