def create_correlation(self, dataset, args=None, wait_time=3, retries=10): """Creates a correlation from a `dataset`. """ dataset_id = None resource_type = get_resource_type(dataset) if resource_type == DATASET_PATH: dataset_id = get_dataset_id(dataset) check_resource(dataset_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A dataset id is needed to create a" " correlation. %s found." % resource_type) create_args = {} if args is not None: create_args.update(args) create_args.update({"dataset": dataset_id}) body = json.dumps(create_args) return self._create(self.correlation_url, body)
def create_anomaly_score(self, anomaly, input_data=None, args=None, wait_time=3, retries=10): """Creates a new anomaly score. """ anomaly_id = None resource_type = get_resource_type(anomaly) if resource_type == ANOMALY_PATH: anomaly_id = get_anomaly_id(anomaly) check_resource(anomaly_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("An anomaly detector id is needed to create an" " anomaly score. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) create_args.update({ "anomaly": anomaly_id}) body = json.dumps(create_args) return self._create(self.anomaly_score_url, body, verify=self.verify)
def create_forecast(self, time_series, input_data=None, args=None, wait_time=3, retries=10): """Creates a new forecast. """ time_series_id = get_time_series_id(time_series) resource_type = get_resource_type(time_series_id) if resource_type == TIME_SERIES_PATH and time_series_id is not None: check_resource(time_series_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A time series model id is needed to create a" " forecast. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) if time_series_id is not None: create_args.update({ "timeseries": time_series_id}) body = json.dumps(create_args) return self._create(self.forecast_url, body, verify=self.verify_prediction)
def create_anomaly_score(self, anomaly, input_data=None, args=None, wait_time=3, retries=10): """Creates a new anomaly score. """ anomaly_id = None resource_type = get_resource_type(anomaly) if resource_type == ANOMALY_PATH: anomaly_id = get_anomaly_id(anomaly) check_resource(anomaly_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("An anomaly detector id is needed to create an" " anomaly score. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) create_args.update({"anomaly": anomaly_id}) body = json.dumps(create_args) return self._create(self.anomaly_score_url, body, verify=self.verify)
def create_topic_distribution(self, topic_model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new topic distribution. """ topic_model_id = get_topic_model_id(topic_model) if topic_model_id is not None: check_resource(topic_model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: resource_type = get_resource_type(topic_model) raise Exception("A topic model id is needed to create a" " topic distribution. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data, "topicmodel": topic_model_id}) body = json.dumps(create_args) return self._create(self.topic_distribution_url, body, verify=self.verify_prediction)
def create_association_set(self, association, input_data=None, args=None, wait_time=3, retries=10): """Creates a new association set. """ association_id = None resource_type = get_resource_type(association) if resource_type == ASSOCIATION_PATH: association_id = get_association_id(association) check_resource(association_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A association id is needed to create an" " association set. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) create_args.update({ "association": association_id}) body = json.dumps(create_args) return self._create(self.association_set_url, body, verify=self.verify)
def create_centroid(self, cluster, input_data=None, args=None, wait_time=3, retries=10): """Creates a new centroid. """ cluster_id = None resource_type = get_resource_type(cluster) if resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(cluster) check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A cluster id is needed to create a" " centroid. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) create_args.update({ "cluster": cluster_id}) body = json.dumps(create_args) return self._create(self.centroid_url, body, verify=self.verify)
def create_projection(self, pca, input_data=None, args=None, wait_time=3, retries=10): """Creates a new projection. The pca parameter can be a pca resource or ID """ pca_id = None resource_type = get_resource_type(pca) if resource_type != PCA_PATH: raise Exception("A PCA resource id is needed" " to create a projection. %s found." % resource_type) pca_id = get_resource_id(pca) if pca_id is not None: check_resource(pca_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) if pca_id is not None: create_args.update({ "pca": pca_id}) body = json.dumps(create_args) return self._create(self.projection_url, body, verify=self.verify)
def create_script(self, source_code=None, args=None, wait_time=3, retries=10): """Creates a whizzml script from its source code. The `source_code` parameter can be a: {script ID}: the ID for an existing whizzml script {path}: the path to a file containing the source code {string} : the string containing the source code for the script """ create_args = {} if args is not None: create_args.update(args) if source_code is None: raise Exception('A valid code string' ' or a script id must be provided.') resource_type = get_resource_type(source_code) if resource_type == SCRIPT_PATH: script_id = get_script_id(source_code) if script_id: check_resource(script_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({"origin": script_id}) elif isinstance(source_code, basestring): if is_url(source_code): script_args = retrieve_script_args(source_code) source_code = script_args.get("source_code") create_args.update(json.loads(script_args.get("json"))) else: try: if os.path.exists(source_code): with open(source_code) as code_file: source_code = code_file.read() except IOError: raise IOError("Could not open the source code file %s." % source_code) create_args.update({"source_code": source_code}) else: raise Exception("A script id or a valid source code" " is needed to create a" " script. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.script_url, body)
def create_prediction(self, model, input_data=None, args=None, wait_time=3, retries=10, by_name=True): """Creates a new prediction. The model parameter can be: - a simple model - an ensemble The by_name argument is now deprecated. It will be removed. """ ensemble_id = None model_id = None resource_type = get_resource_type(model) if resource_type == ENSEMBLE_PATH: ensemble_id = get_ensemble_id(model) if ensemble_id is not None: check_resource( ensemble_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self, ) elif resource_type == MODEL_PATH: model_id = get_model_id(model) check_resource( model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self, ) else: raise Exception("A model or ensemble id is needed to create a" " prediction. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) if ensemble_id is None: create_args.update({"model": model_id}) else: create_args.update({"ensemble": ensemble_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, verify=self.verify_prediction)
def create_execution(self, origin_resource, args=None, wait_time=3, retries=10): """Creates an execution from a `script` or a list of `scripts`. """ create_args = {} if args is not None: create_args.update(args) if (isinstance(origin_resource, basestring) or isinstance(origin_resource, dict)): # single script scripts = [origin_resource] else: scripts = origin_resource try: script_ids = [get_script_id(script) for script in scripts] except TypeError: raise Exception("A script id or a list of them is needed to create" " a script execution. %s found." % get_resource_type(origin_resource)) if all([ get_resource_type(script_id) == SCRIPT_PATH for script_id in script_ids ]): for script in scripts: check_resource(script, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A script id or a list of them is needed to create" " a script execution. %s found." % get_resource_type(origin_resource)) if len(scripts) > 1: create_args.update({"scripts": script_ids}) else: create_args.update({"script": script_ids[0]}) body = json.dumps(create_args) return self._create(self.execution_url, body)
def error_counts(self, dataset, raise_on_error=True): """Returns the ids of the fields that contain errors and their number. The dataset argument can be either a dataset resource structure or a dataset id (that will be used to retrieve the associated remote resource). """ errors_dict = {} if not isinstance(dataset, dict) or 'object' not in dataset: check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) dataset = check_resource(dataset_id, self.get_dataset, raise_on_error=raise_on_error) if not raise_on_error and dataset['error'] is not None: dataset_id = None else: dataset_id = get_dataset_id(dataset) if dataset_id: errors = dataset.get('object', {}).get( 'status', {}).get('field_errors', {}) for field_id in errors: errors_dict[field_id] = errors[field_id]['total'] return errors_dict
def retrieve_resource(self, resource_id, query_string=None, check_local_fn=None, retries=None): """ Retrieves resource info either from the local repo or from the remote server """ if query_string is None: query_string = '' if self.storage is not None: try: stored_resource = os.path.join(self.storage, resource_id.replace("/", "_")) with open(stored_resource) as resource_file: resource = json.loads(resource_file.read()) # we check that the stored resource has the information # needed (for instance, input_fields for predicting) if check_local_fn is None or check_local_fn(resource): return resource except ValueError: raise ValueError("The file %s contains no JSON") except IOError: pass if self.auth == '?username=;api_key=;': raise ValueError("The credentials information is missing. This" " information is needed to download resource %s" " for the first time and store it locally for further" " use. Please export BIGML_USERNAME" " and BIGML_API_KEY." % resource_id) api_getter = self.getters[get_resource_type(resource_id)] resource = check_resource(resource_id, api_getter, query_string, retries=retries) return resource
def error_counts(self, dataset, raise_on_error=True): """Returns the ids of the fields that contain errors and their number. The dataset argument can be either a dataset resource structure or a dataset id (that will be used to retrieve the associated remote resource). """ errors_dict = {} if not isinstance(dataset, dict) or not 'object' in dataset: check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) dataset = check_resource(dataset_id, self.get_dataset, raise_on_error=raise_on_error) if not raise_on_error and dataset['error'] is not None: dataset_id = None else: dataset_id = get_dataset_id(dataset) if dataset_id: errors = dataset.get('object', {}).get('status', {}).get('field_errors', {}) for field_id in errors: errors_dict[field_id] = errors[field_id]['total'] return errors_dict
def create_prediction(self, model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new prediction. The model parameter can be: - a simple tree model - a simple logistic regression model - an ensemble - a deepnet . a linear regression - a fusion Note that the old `by_name` argument has been deprecated. """ model_id = None resource_type = get_resource_type(model) if resource_type not in SUPERVISED_PATHS: raise Exception("A supervised model resource id is needed" " to create a prediction. %s found." % resource_type) model_id = get_resource_id(model) if model_id is not None: check_resource(model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) if model_id is not None: create_args.update({"model": model_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, verify=self.verify_prediction)
def create_prediction(self, model, input_data=None, args=None, wait_time=3, retries=10, by_name=True): """Creates a new prediction. The model parameter can be: - a simple model - an ensemble The by_name argument is now deprecated. It will be removed. """ ensemble_id = None model_id = None resource_type = get_resource_type(model) if resource_type == ENSEMBLE_PATH: ensemble_id = get_ensemble_id(model) if ensemble_id is not None: check_resource(ensemble_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) elif resource_type == MODEL_PATH: model_id = get_model_id(model) check_resource(model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A model or ensemble id is needed to create a" " prediction. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) if ensemble_id is None: create_args.update({ "model": model_id}) else: create_args.update({ "ensemble": ensemble_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, verify=self.verify_prediction)
def check_resource(self, resource, query_string='', wait_time=1): """Check resource method. """ return check_resource(resource, query_string=query_string, wait_time=wait_time, api=self)
def create_library(self, source_code=None, args=None, wait_time=3, retries=10): """Creates a whizzml library from its source code. The `source_code` parameter can be a: {library ID}: the ID for an existing whizzml library {path}: the path to a file containing the source code {string} : the string containing the source code for the library """ create_args = {} if args is not None: create_args.update(args) if source_code is None: raise Exception('A valid code string' ' or a library id must be provided.') resource_type = get_resource_type(source_code) if resource_type == LIBRARY_PATH: library_id = get_library_id(source_code) if library_id: check_resource(library_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({"origin": library_id}) elif isinstance(source_code, basestring): try: if os.path.exists(source_code): with open(source_code) as code_file: source_code = code_file.read() except IOError: raise IOError("Could not open the source code file %s." % source_code) create_args.update({"source_code": source_code}) else: raise Exception("A library id or a valid source code" " is needed to create a" " library. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.library_url, body)
def create_execution(self, origin_resource, args=None, wait_time=3, retries=10): """Creates an execution from a `script` or a list of `scripts`. """ create_args = {} if args is not None: create_args.update(args) if (isinstance(origin_resource, basestring) or isinstance(origin_resource, dict)): # single script scripts = [origin_resource] else: scripts = origin_resource try: script_ids = [get_script_id(script) for script in scripts] except TypeError: raise Exception("A script id or a list of them is needed to create" " a script execution. %s found." % get_resource_type(origin_resource)) if all([get_resource_type(script_id) == SCRIPT_PATH for script_id in script_ids]): for script in scripts: check_resource(script, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A script id or a list of them is needed to create" " a script execution. %s found." % get_resource_type(origin_resource)) if len(scripts) > 1: create_args.update({ "scripts": script_ids}) else: create_args.update({ "script": script_ids[0]}) body = json.dumps(create_args) return self._create(self.execution_url, body)
def create_library(self, source_code=None, args=None, wait_time=3, retries=10): """Creates a whizzml library from its source code. The `source_code` parameter can be a: {library ID}: the ID for an existing whizzml library {path}: the path to a file containing the source code {string} : the string containing the source code for the library """ create_args = {} if args is not None: create_args.update(args) if source_code is None: raise Exception('A valid code string' ' or a library id must be provided.') resource_type = get_resource_type(source_code) if resource_type == LIBRARY_PATH: library_id = get_library_id(source_code) if library_id: check_resource(library_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({ "origin": library_id}) elif isinstance(source_code, basestring): try: if os.path.exists(source_code): with open(source_code) as code_file: source_code = code_file.read() except IOError: raise IOError("Could not open the source code file %s." % source_code) create_args.update({ "source_code": source_code}) else: raise Exception("A library id or a valid source code" " is needed to create a" " library. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.library_url, body)
def create_model(self, origin_resource, args=None, wait_time=3, retries=10): """Creates a model from an origin_resource. Uses a remote resource to create a new model using the arguments in `args`. The allowed remote resources can be: - dataset - list of datasets - cluster In the case of using cluster id as origin_resource, a centroid must also be provided in the args argument. The first centroid is used otherwise. """ create_args = {} if args is not None: create_args.update(args) if isinstance(origin_resource, list): # mutidatasets create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries) else: resource_type = get_resource_type(origin_resource) # model from cluster and centroid if resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(origin_resource) cluster = check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if not 'centroid' in create_args: try: centroid = cluster['object'][ 'cluster_models'].keys()[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the model. A " "centroid id is needed in the args " "argument to generate a model from " "a cluster.") create_args.update({'cluster': cluster_id}) elif resource_type == DATASET_PATH: create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries) else: raise Exception("A dataset, list of dataset ids" " or cluster id plus centroid id are needed" " to create a" " dataset. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.model_url, body)
def create_model(self, origin_resource, args=None, wait_time=3, retries=10): """Creates a model from an origin_resource. Uses a remote resource to create a new model using the arguments in `args`. The allowed remote resources can be: - dataset - list of datasets - cluster In the case of using cluster id as origin_resource, a centroid must also be provided in the args argument. The first centroid is used otherwise. """ create_args = {} if args is not None: create_args.update(args) if isinstance(origin_resource, list): # mutidatasets create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries) else: resource_type = get_resource_type(origin_resource) # model from cluster and centroid if resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(origin_resource) cluster = check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if 'centroid' not in create_args: try: centroid = cluster['object'][ 'cluster_models'].keys()[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the model. A " "centroid id is needed in the args " "argument to generate a model from " "a cluster.") create_args.update({'cluster': cluster_id}) elif resource_type == DATASET_PATH: create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries) else: raise Exception("A dataset, list of dataset ids" " or cluster id plus centroid id are needed" " to create a" " dataset. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.model_url, body)
def create_prediction(self, model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new prediction. The model parameter can be: - a simple tree model - a simple logistic regression model - an ensemble - a deepnet . a linear regression - a fusion Note that the old `by_name` argument has been deprecated. """ model_id = None resource_type = get_resource_type(model) if resource_type not in SUPERVISED_PATHS: raise Exception("A supervised model resource id is needed" " to create a prediction. %s found." % resource_type) model_id = get_resource_id(model) if model_id is not None: check_resource(model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data}) if model_id is not None: create_args.update({ "model": model_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, verify=self.verify_prediction)
def create_projection(self, pca, input_data=None, args=None, wait_time=3, retries=10): """Creates a new projection. The pca parameter can be a pca resource or ID """ pca_id = None resource_type = get_resource_type(pca) if resource_type != PCA_PATH: raise Exception("A PCA resource id is needed" " to create a projection. %s found." % resource_type) pca_id = get_resource_id(pca) if pca_id is not None: check_resource(pca_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) if pca_id is not None: create_args.update({"pca": pca_id}) body = json.dumps(create_args) return self._create(self.projection_url, body, verify=self.verify)
def create_statistical_test(self, dataset, args=None, wait_time=3, retries=10): """Creates a statistical test from a `dataset`. """ dataset_id = None resource_type = get_resource_type(dataset) if resource_type == DATASET_PATH: dataset_id = get_dataset_id(dataset) check_resource(dataset_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A dataset id is needed to create a" " statistical test. %s found." % resource_type) create_args = {} if args is not None: create_args.update(args) create_args.update({ "dataset": dataset_id}) body = json.dumps(create_args) return self._create(self.statistical_test_url, body)
def create_topic_distribution(self, topic_model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new topic distribution. """ topic_model_id = get_topic_model_id(topic_model) if topic_model_id is not None: check_resource(topic_model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: resource_type = get_resource_type(topic_model) raise Exception("A topic model id is needed to create a" " topic distribution. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ "input_data": input_data, "topicmodel": topic_model_id }) body = json.dumps(create_args) return self._create(self.topic_distribution_url, body, verify=self.verify_prediction)
def create_dataset(self, origin_resource, args=None, wait_time=3, retries=10): """Creates a remote dataset. Uses a remote resource to create a new dataset using the arguments in `args`. The allowed remote resources can be: - source - dataset - list of datasets - cluster In the case of using cluster id as origin_resources, a centroid must also be provided in the args argument. The first centroid is used otherwise. If `wait_time` is higher than 0 then the dataset creation request is not sent until the `source` has been created successfuly. """ create_args = {} if args is not None: create_args.update(args) if isinstance(origin_resource, list): # mutidatasets create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries, key="origin_datasets") else: # dataset from source resource_type = get_resource_type(origin_resource) if resource_type == SOURCE_PATH: source_id = get_source_id(origin_resource) if source_id: check_resource(source_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({ "source": source_id}) # dataset from dataset elif resource_type == DATASET_PATH: create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries, key="origin_dataset") # dataset from cluster and centroid elif resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(origin_resource) cluster = check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if 'centroid' not in create_args: try: centroid = cluster['object'][ 'cluster_datasets_ids'].keys()[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the dataset. A " "centroid id is needed in the args " "argument to generate a dataset from " "a cluster.") create_args.update({'cluster': cluster_id}) else: raise Exception("A source, dataset, list of dataset ids" " or cluster id plus centroid id are needed" " to create a" " dataset. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.dataset_url, body)
def create_prediction(self, model, input_data=None, args=None, wait_time=3, retries=10): """Creates a new prediction. The model parameter can be: - a simple tree model - a simple logistic regression model - an ensemble - a deepnet Note that the old `by_name` argument has been deprecated. """ deepnet_id = None logistic_regression_id = None ensemble_id = None model_id = None resource_type = get_resource_type(model) if resource_type == ENSEMBLE_PATH: ensemble_id = get_ensemble_id(model) if ensemble_id is not None: check_resource(ensemble_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) elif resource_type == MODEL_PATH: model_id = get_model_id(model) check_resource(model_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) elif resource_type == LOGISTIC_REGRESSION_PATH: logistic_regression_id = get_logistic_regression_id(model) check_resource(logistic_regression_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) elif resource_type == DEEPNET_PATH: deepnet_id = get_deepnet_id(model) check_resource(deepnet_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A model or ensemble id is needed to create a" " prediction. %s found." % resource_type) if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({"input_data": input_data}) if model_id is not None: create_args.update({"model": model_id}) elif ensemble_id is not None: create_args.update({"ensemble": ensemble_id}) elif logistic_regression_id is not None: create_args.update({"logisticregression": logistic_regression_id}) elif deepnet_id is not None: create_args.update({"deepnet": deepnet_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, verify=self.verify_prediction)
def create_dataset(self, origin_resource, args=None, wait_time=3, retries=10): """Creates a remote dataset. Uses a remote resource to create a new dataset using the arguments in `args`. The allowed remote resources can be: - source - dataset - list of datasets - cluster In the case of using cluster id as origin_resources, a centroid must also be provided in the args argument. The first centroid is used otherwise. If `wait_time` is higher than 0 then the dataset creation request is not sent until the `source` has been created successfuly. """ create_args = {} if args is not None: create_args.update(args) if isinstance(origin_resource, list): # mutidatasets create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries, key="origin_datasets") else: # dataset from source resource_type = get_resource_type(origin_resource) if resource_type == SOURCE_PATH: source_id = get_source_id(origin_resource) if source_id: check_resource(source_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) create_args.update({"source": source_id}) # dataset from dataset elif resource_type == DATASET_PATH: create_args = self._set_create_from_datasets_args( origin_resource, args=create_args, wait_time=wait_time, retries=retries, key="origin_dataset") # dataset from cluster and centroid elif resource_type == CLUSTER_PATH: cluster_id = get_cluster_id(origin_resource) cluster = check_resource(cluster_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) if not 'centroid' in create_args: try: centroid = cluster['object'][ 'cluster_datasets_ids'].keys()[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the dataset. A " "centroid id is needed in the args " "argument to generate a dataset from " "a cluster.") create_args.update({'cluster': cluster_id}) else: raise Exception("A source, dataset, list of dataset ids" " or cluster id plus centroid id are needed" " to create a" " dataset. %s found." % resource_type) body = json.dumps(create_args) return self._create(self.dataset_url, body)