def error_counts(self, dataset, raise_on_error=True): """Returns the ids of the fields that contain errors and their number. The dataset argument can be either a dataset resource structure or a dataset id (that will be used to retrieve the associated remote resource). """ errors_dict = {} if not isinstance(dataset, dict) or 'object' not in dataset: check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) dataset = check_resource(dataset_id, self.get_dataset, raise_on_error=raise_on_error) if not raise_on_error and dataset['error'] is not None: dataset_id = None else: dataset_id = get_dataset_id(dataset) if dataset_id: errors = dataset.get('object', {}).get( 'status', {}).get('field_errors', {}) for field_id in errors: errors_dict[field_id] = errors[field_id]['total'] return errors_dict
def error_counts(self, dataset, raise_on_error=True): """Returns the ids of the fields that contain errors and their number. The dataset argument can be either a dataset resource structure or a dataset id (that will be used to retrieve the associated remote resource). """ errors_dict = {} if not isinstance(dataset, dict) or not 'object' in dataset: check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) dataset = check_resource(dataset_id, self.get_dataset, raise_on_error=raise_on_error) if not raise_on_error and dataset['error'] is not None: dataset_id = None else: dataset_id = get_dataset_id(dataset) if dataset_id: errors = dataset.get('object', {}).get('status', {}).get('field_errors', {}) for field_id in errors: errors_dict[field_id] = errors[field_id]['total'] return errors_dict
def create_correlation(self, dataset, args=None, wait_time=3, retries=10): """Creates a correlation from a `dataset`. """ dataset_id = None resource_type = get_resource_type(dataset) if resource_type == DATASET_PATH: dataset_id = get_dataset_id(dataset) check_resource(dataset_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A dataset id is needed to create a" " correlation. %s found." % resource_type) create_args = {} if args is not None: create_args.update(args) create_args.update({"dataset": dataset_id}) body = json.dumps(create_args) return self._create(self.correlation_url, body)
def download_dataset(self, dataset, filename=None, retries=10): """Donwloads dataset contents to a csv file or file object """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) if dataset_id: return self._download("%s%s%s" % (self.url, dataset_id, DOWNLOAD_DIR), filename=filename, retries=retries)
def delete_dataset(self, dataset): """Deletes a dataset. """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) if dataset_id: return self._delete("%s%s" % (self.url, dataset_id))
def update_dataset(self, dataset, changes): """Updates a dataset. """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) if dataset_id: body = json.dumps(changes) return self._update("%s%s" % (self.url, dataset_id), body)
def get_dataset(self, dataset, query_string=""): """Retrieves a dataset. The dataset parameter should be a string containing the dataset id or the dict returned by create_dataset. As dataset is an evolving object that is processed until it reaches the FINISHED or FAULTY state, the function will return a dict that encloses the dataset values and state info available at the time it is called. """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) if dataset_id: return self._get("%s%s" % (self.url, dataset_id), query_string=query_string)
def get_dataset(self, dataset, query_string=''): """Retrieves a dataset. The dataset parameter should be a string containing the dataset id or the dict returned by create_dataset. As dataset is an evolving object that is processed until it reaches the FINISHED or FAULTY state, the function will return a dict that encloses the dataset values and state info available at the time it is called. """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") dataset_id = get_dataset_id(dataset) if dataset_id: return self._get("%s%s" % (self.url, dataset_id), query_string=query_string)
def create_statistical_test(self, dataset, args=None, wait_time=3, retries=10): """Creates a statistical test from a `dataset`. """ dataset_id = None resource_type = get_resource_type(dataset) if resource_type == DATASET_PATH: dataset_id = get_dataset_id(dataset) check_resource(dataset_id, query_string=TINY_RESOURCE, wait_time=wait_time, retries=retries, raise_on_error=True, api=self) else: raise Exception("A dataset id is needed to create a" " statistical test. %s found." % resource_type) create_args = {} if args is not None: create_args.update(args) create_args.update({ "dataset": dataset_id}) body = json.dumps(create_args) return self._create(self.statistical_test_url, body)