def __create_or_update(self, url=None, csv_file=None, json_file=None, schema=None, na_values=[], perish=0, dataset_id=None): result = None error = 'url, csv_file or schema required' try: if schema or url or csv_file or json_file: if dataset_id is None: dataset = Dataset() dataset.save() else: dataset = Dataset.find_one(dataset_id) Observation.delete_all(dataset) if schema: dataset.import_schema(schema) na_values = safe_json_loads(na_values) if url: dataset.import_from_url(url, na_values=na_values) elif csv_file: dataset.import_from_csv(csv_file, na_values=na_values) elif json_file: dataset.import_from_json(json_file) result = {Dataset.ID: dataset.dataset_id} perish = parse_int(perish) perish and dataset.delete(countdown=perish) except urllib2.URLError: error = 'could not load: %s' % url except IOError: error = 'could not get a filehandle for: %s' % csv_file except JSONError as e: error = e.__str__() self.set_response_params(result, success_status_code=201) return self._dump_or_error(result, error)
def __create_or_update(self, url=None, csv_file=None, json_file=None, schema=None, na_values=[], perish=0, dataset_id=None): result = None error = 'url, csv_file or schema required' try: if schema or url or csv_file or json_file: if dataset_id is None: dataset = Dataset() dataset.save() else: dataset = Dataset.find_one(dataset_id) Observation.delete_all(dataset) if schema: dataset.import_schema(schema) na_values = safe_json_loads(na_values) if url: dataset.import_from_url(url, na_values=na_values) elif csv_file: dataset.import_from_csv(csv_file, na_values=na_values) elif json_file: dataset.import_from_json(json_file) result = {Dataset.ID: dataset.dataset_id} perish = parse_int(perish) perish and dataset.delete(countdown=perish) except urllib2.URLError: error = 'could not load: %s' % url except IOError: error = 'could not get a filehandle for: %s' % csv_file except JSONError as e: error = e.__str__() self.set_response_params(result, success_status_code=201) return self._dump_or_error(result, error)
def create(self, url=None, csv_file=None, json_file=None, schema=None, na_values=[], perish=0): """Create a dataset by URL, CSV or schema file. If `url` is provided, create a dataset by downloading a CSV from that URL. If `url` is not provided and `csv_file` is provided, create a dataset with the data in the passed `csv_file`. If both `url` and `csv_file` are provided, `csv_file` is ignored. If `schema` is supplied, an empty dataset is created with the associated column structure. .. note:: The follow words are reserved and will be slugified by adding underscores (or multiple underscores to ensure uniqueness) if used as column names: - all - and - case - date - default - in - not - or - sum - years :param url: A URL to load a CSV file from. The URL must point to a CSV file. :param csv_file: An uploaded CSV file to read from. :param json_file: An uploaded JSON file to read from. :param schema: A SDF schema file (JSON) :param na_values: A JSON list of values to interpret as missing data. :param perish: Number of seconds after which to delete the dataset. :returns: An error message if `url`, `csv_file`, or `scehma` are not provided. An error message if an improperly formatted value raises a ValueError, e.g. an improperly formatted CSV file. An error message if the URL could not be loaded. Otherwise returns a JSON string with the dataset ID of the newly created dataset. Note that the dataset will not be fully loaded until its state is set to ready. """ result = None error = 'url, csv_file or schema required' try: if schema or url or csv_file or json_file: dataset = Dataset() dataset.save() if schema: dataset.import_schema(schema) if na_values: na_values = safe_json_loads(na_values) if url: dataset.import_from_url(url, na_values=na_values) elif csv_file: dataset.import_from_csv(csv_file, na_values=na_values) elif json_file: dataset.import_from_json(json_file) result = {Dataset.ID: dataset.dataset_id} perish = parse_int(perish) if perish: dataset.delete(countdown=perish) except urllib2.URLError: error = 'could not load: %s' % url except IOError: error = 'could not get a filehandle for: %s' % csv_file self.set_response_params(result, success_status_code=201) return self._dump_or_error(result, error)