Exemple #1
0
    def error_counts(self, dataset, raise_on_error=True):
        """Returns the ids of the fields that contain errors and their number.

           The dataset argument can be either a dataset resource structure
           or a dataset id (that will be used to retrieve the associated
           remote resource).

        """
        errors_dict = {}
        if not isinstance(dataset, dict) or 'object' not in dataset:
            check_resource_type(dataset,
                                DATASET_PATH,
                                message="A dataset id is needed.")
            dataset_id = get_dataset_id(dataset)
            dataset = check_resource(dataset_id,
                                     self.get_dataset,
                                     raise_on_error=raise_on_error)
            if not raise_on_error and dataset['error'] is not None:
                dataset_id = None
        else:
            dataset_id = get_dataset_id(dataset)
        if dataset_id:
            errors = dataset.get('object', {}).get('status',
                                                   {}).get('field_errors', {})
            for field_id in errors:
                errors_dict[field_id] = errors[field_id]['total']
        return errors_dict
    def create_correlation(self, dataset, args=None, wait_time=3, retries=10):
        """Creates a correlation from a `dataset`.

        """
        dataset_id = None
        resource_type = get_resource_type(dataset)
        if resource_type == DATASET_PATH:
            dataset_id = get_dataset_id(dataset)
            check_resource(dataset_id,
                           query_string=TINY_RESOURCE,
                           wait_time=wait_time,
                           retries=retries,
                           raise_on_error=True,
                           api=self)
        else:
            raise Exception("A dataset id is needed to create a"
                            " correlation. %s found." % resource_type)

        create_args = {}
        if args is not None:
            create_args.update(args)
        create_args.update({"dataset": dataset_id})

        body = json.dumps(create_args)
        return self._create(self.correlation_url, body)
Exemple #3
0
    def delete_dataset(self, dataset):
        """Deletes a dataset.

        """
        check_resource_type(dataset,
                            DATASET_PATH,
                            message="A dataset id is needed.")
        dataset_id = get_dataset_id(dataset)
        if dataset_id:
            return self._delete("%s%s" % (self.url, dataset_id))
Exemple #4
0
    def update_dataset(self, dataset, changes):
        """Updates a dataset.

        """
        check_resource_type(dataset,
                            DATASET_PATH,
                            message="A dataset id is needed.")
        dataset_id = get_dataset_id(dataset)
        if dataset_id:
            body = json.dumps(changes)
            return self._update("%s%s" % (self.url, dataset_id), body)
Exemple #5
0
    def download_dataset(self, dataset, filename=None, retries=10):
        """Donwloads dataset contents to a csv file or file object

        """
        check_resource_type(dataset,
                            DATASET_PATH,
                            message="A dataset id is needed.")
        dataset_id = get_dataset_id(dataset)
        if dataset_id:
            return self._download("%s%s%s" %
                                  (self.url, dataset_id, DOWNLOAD_DIR),
                                  filename=filename,
                                  retries=retries)
Exemple #6
0
    def get_dataset(self, dataset, query_string=''):
        """Retrieves a dataset.

           The dataset parameter should be a string containing the
           dataset id or the dict returned by create_dataset.
           As dataset is an evolving object that is processed
           until it reaches the FINISHED or FAULTY state, the function will
           return a dict that encloses the dataset values and state info
           available at the time it is called.
        """
        check_resource_type(dataset,
                            DATASET_PATH,
                            message="A dataset id is needed.")
        dataset_id = get_dataset_id(dataset)
        if dataset_id:
            return self._get("%s%s" % (self.url, dataset_id),
                             query_string=query_string)