def _get_data(self, select, query, order_by, limit, distinct, format, callback, count, index): params = {} if select: if not isinstance(select, list): raise PyBambooException( 'select must be a list of strings.') select = dict([(sel, 1) for sel in select]) params['select'] = safe_json_dumps( select, PyBambooException('select is not JSON-serializable.')) if query: if not isinstance(query, dict): raise PyBambooException('query must be a dict.') params['query'] = safe_json_dumps( query, PyBambooException('query is not JSON-serializable.')) if order_by: if not isinstance(order_by, basestring): raise PyBambooException('order_by must be a string.') params['order_by'] = order_by if format: if not isinstance(format, basestring): raise PyBambooException('format must be a string.') if format not in self.DATA_FORMATS: raise PyBambooException('format must be one of: %s.' % self.DATA_FORMATS) params['format'] = format if distinct: if not isinstance(distinct, basestring): raise PyBambooException('distinct must be a string.') params['distinct'] = distinct if callback: if not isinstance(callback, basestring): raise PyBambooException('callback must be a string.') params['callback'] = callback if limit: if not isinstance(limit, int): raise PyBambooException('limit must be an int.') params['limit'] = safe_json_dumps( limit, PyBambooException('limit is not JSON-serializable.')) if count: params['count'] = bool(count) if index: params['index'] = bool(index) return self._connection.make_api_request( 'GET', '/datasets/%s' % self._id, params=params)
def merge(cls, datasets, connection=None): """ Create a new dataset that is a row-wise merge of those in *datasets*. Returns the new merged dataset. """ if connection is None: connection = Connection() # TODO: allow list of dataset_ids? checked_datasets = [] for dataset in datasets: if not isinstance(dataset, Dataset): raise PyBambooException( 'Datasets need to be instances of Dataset.') checked_datasets.append(dataset.id) data = {'dataset_ids': safe_json_dumps( checked_datasets, PyBambooException('datasets is not JSON-serializable.'))} result = connection.make_api_request( 'POST', '/datasets/merge', data=data) if 'id' in result.keys(): return Dataset(result['id'], connection=connection) # this is never reached... # see TestDataset.test_merge_fail() return False
def _resample(self, date_column, interval, how, query, format): params = {} if not date_column or not isinstance(date_column, basestring): raise PyBambooException('date_column must be a string.') params['date_column'] = date_column if not interval or not isinstance(interval, basestring): raise PyBambooException('interval must be a string ' 'representing a frequency. cf. ' 'http://pytseries.sourceforge.net/' 'core.constants.html#date-frequencies') params['interval'] = interval if how: if not isinstance(how, basestring): raise PyBambooException('how must be a string.') params['how'] = how if format: if not isinstance(format, basestring): raise PyBambooException('format must be a string.') params['format'] = format if query: if not isinstance(query, dict): raise PyBambooException('query must be a dict.') params['query'] = safe_json_dumps( query, PyBambooException('query is not JSON-serializable.')) return self._connection.make_api_request( 'GET', '/datasets/%s/resample' % self._id, params=params)
def _get_summary(self, select, groups, query, order_by, limit, callback): params = {} # TODO: check input params if select != 'all': if not isinstance(select, list): raise PyBambooException( 'select must be a list of strings.') select = dict([(sel, 1) for sel in select]) params['select'] = safe_json_dumps( select, PyBambooException('select is not JSON-serializable.')) else: params['select'] = select if groups is not None: if not isinstance(groups, list): raise PyBambooException( 'groups must be a list of strings.') params['group'] = ','.join(groups) if query is not None: if not isinstance(query, dict): raise PyBambooException('query must be a dict.') params['query'] = safe_json_dumps( query, PyBambooException('query is not JSON-serializable.')) if order_by: if not isinstance(order_by, basestring): raise PyBambooException('order_by must be a string.') params['order_by'] = order_by if limit: if not isinstance(limit, int): raise PyBambooException('limit must be an int.') params['limit'] = safe_json_dumps( limit, PyBambooException('limit is not JSON-serializable.')) if callback: if not isinstance(callback, basestring): raise PyBambooException('callback must be a string.') params['callback'] = callback return self._connection.make_api_request( 'GET', '/datasets/%s/summary' % self._id, params=params)
def row(self, action=None, index=None, payload=None): data = None if not action in ('show', 'delete', 'edit'): raise PyBambooException('Row action must be show|edit|delete.') if not isinstance(index, int): raise PyBambooException('index must be an int.') if action == 'edit': data = {"data": safe_json_dumps(payload, PyBambooException( 'payload is not JSON-serializable'))} http_action = {'show': 'GET', 'delete': 'DELETE', 'edit': 'PUT'}.get(action) return self._connection.make_api_request( http_action, '/datasets/%s/row/%d' % (self._id, index), data=data)
def _add_calculations(self, path, content, json): files = {} if path is None and content is None and json is None: raise PyBambooException('JSON fomulae must be provided as ' 'JSON, path or content.') if json is not None: json_data = safe_json_dumps(json, PyBambooException('formulae are ' 'not JSON-serial' 'izable')) content = StringIO.StringIO(json_data) data = content if content is not None else open(path) files.update({'json_file': ('data.json', data)}) response = self._connection.make_api_request( 'POST', '/datasets/%s/calculations' % self._id, files=files) return 'error' not in response.keys()
def update_data(self, rows): """ Updates this dataset with the rows given in {column: value} format. Any unspecified columns will result in n/a values. """ if not isinstance(rows, list): raise PyBambooException( 'rows must be a list of dictionaries') if len(rows) == 0: raise PyBambooException( 'rows must contain at least one row dictionary') for row in rows: if not isinstance(row, dict): raise PyBambooException( 'rows must be a list of dictionaries') data = { 'update': safe_json_dumps(rows, PyBambooException( 'rows is not JSON-serializable')) } response = self._connection.make_api_request( 'PUT', '/datasets/%s' % self._id, data=data) return 'id' in response.keys()
def __init__(self, dataset_id=None, url=None, path=None, content=None, data_format='csv', schema_path=None, schema_content=None, na_values=None, connection=None, reset=False): """ Create a new pybamboo.Dataset from one of the following: * dataset_id - the id of an existing bamboo.Dataset * url - url to a .csv file * path - path to a local .csv or .json file * content - a CSV or JSON string * data_format - whether path or content is csv | json * schema_path - path to a JSON SDF schema * schema_content - a JSON SDF string One can also pass in a pybamboo.Connection object. If this is not supplied one will be created automatically with the default options. """ if dataset_id is None and url is None \ and path is None and content is None \ and schema_path is None and schema_content is None: raise PyBambooException( 'Must supply dataset_id, url, content, schema or file path.') if data_format not in self.DATA_FORMATS: raise PyBambooException('Illegal data_format: %s. data_format' ' must be one of %s' % (data_format, self.DATA_FORMATS)) req_data = {} if reset: req_data.update({'dataset_id': self._id}) if na_values is not None: if not isinstance(na_values, (list, tuple, set)): raise PyBambooException('N/A values must be a list.') self.NA_VALUES = na_values req_data.update({'na_values': safe_json_dumps(na_values, PyBambooException('na_values ' 'are not JSON-serializable'))}) if connection is None: self._connection = Connection() else: self._connection = connection if dataset_id is not None: # TODO: check if this dataset exists? self._id = dataset_id return if url is not None: # TODO: check valid url? req_data.update({'url': url}) self._id = self._connection.make_api_request( 'POST', '/datasets', req_data).get('id') return # files might be overloaded by schema or path/content files = {} if schema_path is not None or schema_content is not None: # TODO: check for bad file stuff? schema_data = schema_content if schema_content is not None \ else open(schema_path) files.update({'schema': ('data.schema.json', schema_data)}) if path is not None or content is not None: # TODO: check for bad file stuff? data = content if content is not None else open(path) files.update({'%s_file' % data_format: ('data.%s' % data_format, data)}) self._id = self._connection.make_api_request('POST', '/datasets', files=files, data=req_data).get('id')