def test_create_async_sets_calculation_status(self): self.dataset_id = create_dataset_from_url( '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'), allow_local_file=True).dataset_id self._wait_for_dataset_state(self.dataset_id) response = json.loads(self._post_formula()) self.assertTrue(isinstance(response, dict)) self.assertTrue(self.controller.SUCCESS in response) self.assertTrue(self.dataset_id in response[self.controller.SUCCESS]) response = json.loads(self.controller.show(self.dataset_id))[0] self.assertTrue(isinstance(response, dict)) self.assertTrue(Calculation.STATE in response) self.assertEqual(response[Calculation.STATE], Calculation.STATE_PENDING) self._wait_for_calculation_ready(self.dataset_id, self.name) dataset = Dataset.find_one(self.dataset_id) self.assertTrue(self.name in dataset.schema.keys())
def test_create_async_not_ready(self): self.dataset_id = create_dataset_from_url( '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'), allow_local_file=True).dataset_id response = json.loads(self._post_formula()) dataset = Dataset.find_one(self.dataset_id) self.assertFalse(dataset.is_ready) self.assertTrue(isinstance(response, dict)) self.assertFalse(DATASET_ID in response) self._wait_for_dataset_state(self.dataset_id) self.assertFalse(self.name in dataset.schema.keys())
def create(self, url=None, csv_file=None, json_file=None, schema=None, perish=0): """Create a dataset by URL, CSV or schema file. If `url` is provided, create a dataset by downloading a CSV from that URL. If `url` is not provided and `csv_file` is provided, create a dataset with the data in the passed `csv_file`. If both `url` and `csv_file` are provided, `csv_file` is ignored. If `schema` is supplied, an empty dataset is created with the associated column structure. .. note:: The follow words are reserved and will be slugified by adding underscores (or multiple underscores to ensure uniqueness) if used as column names: - all - and - case - date - default - in - not - or - sum - years :param url: A URL to load a CSV file from. The URL must point to a CSV file. :param csv_file: An uploaded CSV file to read from. :param json_file: An uploaded JSON file to read from. :param schema: A SDF schema file (JSON) :param perish: Number of seconds after which to dlete the dataset. :returns: An error message if `url`, `csv_file`, or `scehma` are not provided. An error message if an improperly formatted value raises a ValueError, e.g. an improperly formatted CSV file. An error message if the URL could not be loaded. Otherwise returns a JSON string with the dataset ID of the newly created dataset. Note that the dataset will not be fully loaded until its state is set to ready. """ result = None error = 'url, csv_file or schema required' try: dataset = None if url: dataset = create_dataset_from_url(url) elif csv_file: dataset = create_dataset_from_csv(csv_file) elif json_file: dataset = create_dataset_from_json(json_file) elif schema: dataset = create_dataset_from_schema(schema) if dataset: result = {Dataset.ID: dataset.dataset_id} perish = parse_int(perish, None) if perish: dataset.delete(countdown=perish) except urllib2.URLError: error = 'could not load: %s' % url except IOError: error = 'could not get a filehandle for: %s' % csv_file self.set_response_params(result, success_status_code=201) return self.dump_or_error(result, error)