def __parse_query_args(self, limit, order_by, query, select, distinct=None, dataset=None): limit = parse_int(limit, 0) query = self.__parse_query(query) select = self.__parse_select(select) return QueryArgs(query=query, select=select, distinct=distinct, limit=limit, order_by=order_by, dataset=dataset)
def action(dataset, select=select, limit=limit): if not dataset.is_ready: raise ArgumentError('dataset is not finished importing') if select is None: raise ArgumentError('no select') limit = parse_int(limit, 0) if select == self.SELECT_ALL_FOR_SUMMARY: select = None return dataset.summarize(dataset, query, select, group, limit=limit, order_by=order_by)
def action(dataset, limit=limit, query=query, select=select): limit = parse_int(limit, 0) query = self.__parse_query(query) select = self.__parse_select(select) query_args = QueryArgs( query=query, select=select, distinct=distinct, limit=limit, order_by=order_by) if count: return dataset.count(query_args) else: dframe = dataset.dframe(query_args, index=index) if distinct: return sorted(dframe[0].tolist()) return self.__dataframe_as_content_type(content_type, dframe)
def action(dataset, query=query, select=select, limit=limit): if not dataset.is_ready: raise ArgumentError("dataset is not finished importing") limit = parse_int(limit, 0) query = self.__parse_query(query) select = self.__parse_select(select, required=True) groups = dataset.split_groups(group) [valid_column(dataset, c) for c in groups] # if select append groups to select if select: select.update(dict(zip(groups, [1] * len(groups)))) query_args = QueryArgs(query=query, select=select, limit=limit, order_by=order_by) dframe = dataset.dframe(query_args) return dataset.summarize(dframe, groups=groups, no_cache=query or select, flat=flat)
def __create_or_update(self, url=None, csv_file=None, json_file=None, schema=None, na_values=[], perish=0, dataset_id=None): result = None error = 'url, csv_file or schema required' try: if schema or url or csv_file or json_file: if dataset_id is None: dataset = Dataset() dataset.save() else: dataset = Dataset.find_one(dataset_id) Observation.delete_all(dataset) if schema: dataset.import_schema(schema) na_values = safe_json_loads(na_values) if url: dataset.import_from_url(url, na_values=na_values) elif csv_file: dataset.import_from_csv(csv_file, na_values=na_values) elif json_file: dataset.import_from_json(json_file) result = {Dataset.ID: dataset.dataset_id} perish = parse_int(perish) perish and dataset.delete(countdown=perish) except urllib2.URLError: error = 'could not load: %s' % url except IOError: error = 'could not get a filehandle for: %s' % csv_file except JSONError as e: error = e.__str__() self.set_response_params(result, success_status_code=201) return self._dump_or_error(result, error)
def action(dataset, query=query, select=select, limit=limit): if not dataset.is_ready: raise ArgumentError('dataset is not finished importing') limit = parse_int(limit, 0) query = self.__parse_query(query) select = self.__parse_select(select, required=True) groups = dataset.split_groups(group) [valid_column(dataset, c) for c in groups] # if select append groups to select if select: select.update(dict(zip(groups, [1] * len(groups)))) query_args = QueryArgs(query=query, select=select, limit=limit, order_by=order_by) dframe = dataset.dframe(query_args) return dataset.summarize(dframe, groups=groups, no_cache=query or select, flat=flat)
def show(self, dataset_id, query=None, select=None, distinct=None, limit=0, order_by=None, format=None, callback=False): """ Return rows for `dataset_id`, matching the passed parameters. Retrieve the dataset by ID then limit that data using the optional `query`, `select` and `limit` parameters. Order the results using `order_by` if passed. :param dataset_id: The dataset ID of the dataset to return. :param select: This is a required argument, it can be 'all' or a MongoDB JSON query :param distinct: A field to return distinct results for. :param query: If passed restrict results to rows matching this query. :param limit: If passed limit the rows to this number. :param order_by: If passed order the result using this column. :param format: Format of output data, 'json' or 'csv' :param callback: A JSONP callback function to wrap the result in. :returns: An error message if `dataset_id` does not exist or the JSON for query or select is improperly formatted. Otherwise a JSON string of the rows matching the parameters. """ limit = parse_int(limit, 0) content_type = self.CSV if format == self.CSV else self.JSON def _action(dataset): dframe = dataset.dframe( query=query, select=select, distinct=distinct, limit=limit, order_by=order_by) if distinct: return sorted(dframe[0].tolist()) if content_type == self.CSV: return dframe.to_csv_as_string() else: return dframe.to_jsondict() return self._safe_get_and_call( dataset_id, _action, callback=callback, content_type=content_type)
def action(dataset): row = Observation.find_one(dataset, parse_int(index)) if row: return row.clean_record
def action(dataset): dataset.delete_observation(parse_int(index)) return { self.SUCCESS: 'Deleted row with index "%s".' % index, Dataset.ID: dataset_id}
def create(self, url=None, csv_file=None, json_file=None, schema=None, na_values=[], perish=0): """Create a dataset by URL, CSV or schema file. If `url` is provided, create a dataset by downloading a CSV from that URL. If `url` is not provided and `csv_file` is provided, create a dataset with the data in the passed `csv_file`. If both `url` and `csv_file` are provided, `csv_file` is ignored. If `schema` is supplied, an empty dataset is created with the associated column structure. .. note:: The follow words are reserved and will be slugified by adding underscores (or multiple underscores to ensure uniqueness) if used as column names: - all - and - case - date - default - in - not - or - sum - years :param url: A URL to load a CSV file from. The URL must point to a CSV file. :param csv_file: An uploaded CSV file to read from. :param json_file: An uploaded JSON file to read from. :param schema: A SDF schema file (JSON) :param na_values: A JSON list of values to interpret as missing data. :param perish: Number of seconds after which to delete the dataset. :returns: An error message if `url`, `csv_file`, or `scehma` are not provided. An error message if an improperly formatted value raises a ValueError, e.g. an improperly formatted CSV file. An error message if the URL could not be loaded. Otherwise returns a JSON string with the dataset ID of the newly created dataset. Note that the dataset will not be fully loaded until its state is set to ready. """ result = None error = 'url, csv_file or schema required' try: if schema or url or csv_file or json_file: dataset = Dataset() dataset.save() if schema: dataset.import_schema(schema) if na_values: na_values = safe_json_loads(na_values) if url: dataset.import_from_url(url, na_values=na_values) elif csv_file: dataset.import_from_csv(csv_file, na_values=na_values) elif json_file: dataset.import_from_json(json_file) result = {Dataset.ID: dataset.dataset_id} perish = parse_int(perish) if perish: dataset.delete(countdown=perish) except urllib2.URLError: error = 'could not load: %s' % url except IOError: error = 'could not get a filehandle for: %s' % csv_file self.set_response_params(result, success_status_code=201) return self._dump_or_error(result, error)
def action(dataset, data=data): data = safe_json_loads(data) dataset.update_observation(parse_int(index), data) return self._success('Updated row with index "%s".' % index, dataset_id)
def action(dataset): dataset.delete_observation(parse_int(index)) return self._success('Deleted row with index "%s".' % index, dataset_id)