Example #1
0
    def test_create_async_sets_calculation_status(self):
        self.dataset_id = create_dataset_from_url(
            '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'),
            allow_local_file=True).dataset_id

        self._wait_for_dataset_state(self.dataset_id)

        response = json.loads(self._post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        response = json.loads(self.controller.show(self.dataset_id))[0]

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(Calculation.STATE in response)
        self.assertEqual(response[Calculation.STATE],
                         Calculation.STATE_PENDING)

        self._wait_for_calculation_ready(self.dataset_id, self.name)

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue(self.name in dataset.schema.keys())
Example #2
0
    def test_create_async_not_ready(self):
        self.dataset_id = create_dataset_from_url(
            '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'),
            allow_local_file=True).dataset_id
        response = json.loads(self._post_formula())
        dataset = Dataset.find_one(self.dataset_id)

        self.assertFalse(dataset.is_ready)
        self.assertTrue(isinstance(response, dict))
        self.assertFalse(DATASET_ID in response)

        self._wait_for_dataset_state(self.dataset_id)

        self.assertFalse(self.name in dataset.schema.keys())
Example #3
0
    def create(self, url=None, csv_file=None, json_file=None, schema=None,
               perish=0):
        """Create a dataset by URL, CSV or schema file.

        If `url` is provided, create a dataset by downloading a CSV from that
        URL. If `url` is not provided and `csv_file` is provided, create a
        dataset with the data in the passed `csv_file`. If both `url` and
        `csv_file` are provided, `csv_file` is ignored. If `schema` is
        supplied, an empty dataset is created with the associated column
        structure.

        .. note::

            The follow words are reserved and will be slugified by adding
            underscores (or multiple underscores to ensure uniqueness) if used
            as column names:

                - all
                - and
                - case
                - date
                - default
                - in
                - not
                - or
                - sum
                - years

        :param url: A URL to load a CSV file from. The URL must point to a CSV
            file.
        :param csv_file: An uploaded CSV file to read from.
        :param json_file: An uploaded JSON file to read from.
        :param schema: A SDF schema file (JSON)
        :param perish: Number of seconds after which to dlete the dataset.

        :returns: An error message if `url`, `csv_file`, or `scehma` are not
            provided. An error message if an improperly formatted value raises
            a ValueError, e.g. an improperly formatted CSV file. An error
            message if the URL could not be loaded. Otherwise returns a JSON
            string with the dataset ID of the newly created dataset.  Note that
            the dataset will not be fully loaded until its state is set to
            ready.
        """
        result = None
        error = 'url, csv_file or schema required'

        try:
            dataset = None
            if url:
                dataset = create_dataset_from_url(url)
            elif csv_file:
                dataset = create_dataset_from_csv(csv_file)
            elif json_file:
                dataset = create_dataset_from_json(json_file)
            elif schema:
                dataset = create_dataset_from_schema(schema)
            if dataset:
                result = {Dataset.ID: dataset.dataset_id}

            perish = parse_int(perish, None)
            if perish:
                dataset.delete(countdown=perish)
        except urllib2.URLError:
            error = 'could not load: %s' % url
        except IOError:
            error = 'could not get a filehandle for: %s' % csv_file

        self.set_response_params(result, success_status_code=201)
        return self.dump_or_error(result, error)