Example #1
0
    def __create_or_update(self, url=None, csv_file=None, json_file=None,
                           schema=None, na_values=[], perish=0,
                           dataset_id=None):
        result = None
        error = 'url, csv_file or schema required'

        try:
            if schema or url or csv_file or json_file:
                if dataset_id is None:
                    dataset = Dataset()
                    dataset.save()
                else:
                    dataset = Dataset.find_one(dataset_id)
                    Observation.delete_all(dataset)

                if schema:
                    dataset.import_schema(schema)

                na_values = safe_json_loads(na_values)

                if url:
                    dataset.import_from_url(url, na_values=na_values)
                elif csv_file:
                    dataset.import_from_csv(csv_file, na_values=na_values)
                elif json_file:
                    dataset.import_from_json(json_file)

                result = {Dataset.ID: dataset.dataset_id}

            perish = parse_int(perish)
            perish and dataset.delete(countdown=perish)
        except urllib2.URLError:
            error = 'could not load: %s' % url
        except IOError:
            error = 'could not get a filehandle for: %s' % csv_file
        except JSONError as e:
            error = e.__str__()

        self.set_response_params(result, success_status_code=201)

        return self._dump_or_error(result, error)
Example #2
0
    def __create_or_update(self, url=None, csv_file=None, json_file=None,
                           schema=None, na_values=[], perish=0,
                           dataset_id=None):
        result = None
        error = 'url, csv_file or schema required'

        try:
            if schema or url or csv_file or json_file:
                if dataset_id is None:
                    dataset = Dataset()
                    dataset.save()
                else:
                    dataset = Dataset.find_one(dataset_id)
                    Observation.delete_all(dataset)

                if schema:
                    dataset.import_schema(schema)

                na_values = safe_json_loads(na_values)

                if url:
                    dataset.import_from_url(url, na_values=na_values)
                elif csv_file:
                    dataset.import_from_csv(csv_file, na_values=na_values)
                elif json_file:
                    dataset.import_from_json(json_file)

                result = {Dataset.ID: dataset.dataset_id}

            perish = parse_int(perish)
            perish and dataset.delete(countdown=perish)
        except urllib2.URLError:
            error = 'could not load: %s' % url
        except IOError:
            error = 'could not get a filehandle for: %s' % csv_file
        except JSONError as e:
            error = e.__str__()

        self.set_response_params(result, success_status_code=201)

        return self._dump_or_error(result, error)
Example #3
0
    def create(self, url=None, csv_file=None, json_file=None, schema=None,
               na_values=[], perish=0):
        """Create a dataset by URL, CSV or schema file.

        If `url` is provided, create a dataset by downloading a CSV from that
        URL. If `url` is not provided and `csv_file` is provided, create a
        dataset with the data in the passed `csv_file`. If both `url` and
        `csv_file` are provided, `csv_file` is ignored. If `schema` is
        supplied, an empty dataset is created with the associated column
        structure.

        .. note::

            The follow words are reserved and will be slugified by adding
            underscores (or multiple underscores to ensure uniqueness) if used
            as column names:

                - all
                - and
                - case
                - date
                - default
                - in
                - not
                - or
                - sum
                - years

        :param url: A URL to load a CSV file from. The URL must point to a CSV
            file.
        :param csv_file: An uploaded CSV file to read from.
        :param json_file: An uploaded JSON file to read from.
        :param schema: A SDF schema file (JSON)
        :param na_values: A JSON list of values to interpret as missing data.
        :param perish: Number of seconds after which to delete the dataset.

        :returns: An error message if `url`, `csv_file`, or `scehma` are not
            provided. An error message if an improperly formatted value raises
            a ValueError, e.g. an improperly formatted CSV file. An error
            message if the URL could not be loaded. Otherwise returns a JSON
            string with the dataset ID of the newly created dataset.  Note that
            the dataset will not be fully loaded until its state is set to
            ready.
        """
        result = None
        error = 'url, csv_file or schema required'

        try:
            if schema or url or csv_file or json_file:
                dataset = Dataset()
                dataset.save()

                if schema:
                    dataset.import_schema(schema)
                if na_values:
                    na_values = safe_json_loads(na_values)

                if url:
                    dataset.import_from_url(url, na_values=na_values)
                elif csv_file:
                    dataset.import_from_csv(csv_file, na_values=na_values)
                elif json_file:
                    dataset.import_from_json(json_file)

                result = {Dataset.ID: dataset.dataset_id}

            perish = parse_int(perish)
            if perish:
                dataset.delete(countdown=perish)
        except urllib2.URLError:
            error = 'could not load: %s' % url
        except IOError:
            error = 'could not get a filehandle for: %s' % csv_file

        self.set_response_params(result, success_status_code=201)
        return self._dump_or_error(result, error)