def test_path_arg(self): url = '/formhandler/%s/formhandler/sales?group=product&col=city&val=Bangalore' for sub_url in ['path_arg', 'path_kwarg']: actual = pd.DataFrame(self.get(url % sub_url).json()) expected = self.sales[self.sales['city'] == 'Bangalore'].groupby('product') expected = expected['sales'].sum().reset_index() afe(actual, expected, check_like=True)
def test_chart(self): r = self.get('/formhandler/chart', data={ '_format': 'svg', 'chart': 'barplot', 'x': 'देश', 'y': 'sales', 'dpi': 72, 'width': 500, 'height': 300, }) tree = etree.fromstring(r.text.encode('utf-8')) eq_(tree.get('viewBox'), '0 0 500 300') # TODO: expand on test cases # Check spec, data for vega, vega-lite, vegam formats base = '/formhandler/chart?_format={}' data = pd.DataFrame(self.get(base.format('json')).json()) for fmt in {'vega', 'vega-lite', 'vegam'}: r = self.get(base.format(fmt)) var = json.loads(re.findall(r'}\)\((.*?)}\)', r.text)[-1] + '}') var = var['spec'] if 'fromjson' in var: df = var['fromjson'][0]['data'] var['fromjson'][0]['data'] = '__DATA__' else: df = var.pop('data') df = (df[0] if isinstance(df, list) else df)['values'] yaml_path = os.path.join(folder, '{}.yaml'.format(fmt)) spec = gramex.cache.open(yaml_path, 'yaml') afe(pd.DataFrame(df), data) self.assertDictEqual(var, spec)
def test_save(self): path = os.path.join(cache_folder, 'data.csv') data = pd.read_csv(path, encoding='utf-8') config = { 'csv': dict(index=False, ignore_keyword=1), 'xlsx': dict(index=False, sheet_name='Sheet1', ignore_keyword=1), 'html': dict(index=False, escape=False, ignore_keyword=1), 'hdf': dict(index=False, key='data', format='fixed', ignore_keyword=1), 'json': dict(orient='records', ignore_keyword=1), # 'stata': dict(index=False), # cannot test since it doesn't support unicode } for ext, kwargs in config.items(): target = os.path.join(cache_folder, 'killme.' + ext) gramex.cache.save(data, target, **kwargs) try: result = gramex.cache.open(target) if ext == 'html': result = result[0] elif ext == 'json': result = pd.DataFrame(data) afe(result, data) finally: os.remove(target)
def test_transform(self): # Check that transform function is applied and used as a cache key cache = {} path = os.path.join(cache_folder, 'data.csv') data = gramex.cache.open(path, 'csv', transform=len, _cache=cache) eq_(data, len(pd.read_csv(path))) # noqa - ignore encoding cache_key = (path, 'csv', hashfn(len), frozenset([])) self.assertIn(cache_key, cache) def transform2(d): return d['a'].sum() data = gramex.cache.open(path, 'csv', transform=transform2, _cache=cache) eq_(data, pd.read_csv(path)['a'].sum()) # noqa - ignore encoding cache_key = (path, 'csv', hashfn(transform2), frozenset([])) self.assertIn(cache_key, cache) # Check that non-callable transforms are ignored but used as cache key data = gramex.cache.open(path, 'csv', transform='ignore', _cache=cache) afe(data, pd.read_csv(path)) # noqa - ignore encoding cache_key = (path, 'csv', hashfn('ignore'), frozenset([])) self.assertIn(cache_key, cache) # Check that temporary caches are hashed by function v = 1 data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache) eq_(data, 1) v = 2 data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache) eq_(data, 2)
def check(reload): result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True, encoding='utf-8') eq_(reloaded, reload) afe(result, expected)
def test_insert_new_file(self): new_files = [ { 'url': os.path.join(folder, 'insert.csv'), 'encoding': 'utf-8' }, { 'url': os.path.join(folder, 'insert.xlsx'), 'sheet_name': 'test' }, { 'url': os.path.join(folder, 'insert.hdf'), 'key': 'test' }, ] for conf in new_files: if os.path.exists(conf['url']): os.remove(conf['url']) self.tmpfiles.append(conf['url']) gramex.data.insert(args=self.insert_rows, **conf) # Check if added rows are correct try: actual = gramex.data.filter(**conf) except ValueError: # TODO: This is a temporary fix for NumPy 1.16.2, Tables 3.4.4 # https://github.com/pandas-dev/pandas/issues/24839 if conf['url'].endswith( '.hdf') and pd.np.__version__.startswith('1.16'): raise SkipTest( 'Ignore NumPy 1.16.2 / PyTables 3.4.4 quirk') expected = pd.DataFrame(self.insert_rows) actual['sales'] = actual['sales'].astype(float) expected['sales'] = expected['sales'].astype(float) afe(actual, expected, check_like=True)
def test_insert_new_file(self): new_files = [ { 'url': os.path.join(folder, 'insert.csv'), 'encoding': 'utf-8' }, { 'url': os.path.join(folder, 'insert.xlsx'), 'sheet_name': 'test' }, { 'url': os.path.join(folder, 'insert.hdf'), 'key': 'test' }, ] for conf in new_files: if os.path.exists(conf['url']): os.remove(conf['url']) self.tmpfiles.append(conf['url']) gramex.data.insert(args=self.insert_rows, **conf) # Check if added rows are correct actual = gramex.data.filter(**conf) expected = pd.DataFrame(self.insert_rows) actual['sales'] = actual['sales'].astype(float) expected['sales'] = expected['sales'].astype(float) afe(actual, expected, check_like=True)
def check(q, result, **kwargs): kwargs['api'] = 'mock' actual = gramex.ml.translate(*q, **kwargs) expected = pd.DataFrame([ {'source': item[0], 'target': item[1], 'q': item[2], 't': item[3]} for item in result ]) actual.index = expected.index afe(actual, expected, check_like=True)
def test_date_comparison(self): data = gramex.cache.open(os.path.join(folder, 'sales.xlsx'), 'xlsx', sheet_name='dates') for dt in ('2018-01-10', '2018-01-20T15:34Z'): url = '/formhandler/dates?date>=%s&_format=xlsx' % dt actual = pd.read_excel(BytesIO(self.get(url).content)) expected = data[data['date'] > pd.to_datetime(dt)] expected.index = actual.index afe(actual, expected, check_like=True)
def test_open_jsondata(self): path = os.path.join(cache_folder, 'data.jsondata') expected = pd.read_json(path) def check(reload): result, reloaded = gramex.cache.open(path, 'jsondata', _reload_status=True) eq_(reloaded, reload) afe(result, expected) self.check_file_cache(path, check) afe(gramex.cache.open(path), gramex.cache.open(path, 'jsondata'))
def test_date_comparison(self): data = gramex.cache.open(os.path.join(folder, 'sales.xlsx'), 'xlsx', sheet_name='dates') for dt in ('2018-01-10', '2018-01-20T15:34Z'): url = '/formhandler/dates?date>=%s' % dt r = self.get(url, params={'_format': 'json', '_meta': 'y'}) # Check ISO output pd.to_datetime(pd.DataFrame(r.json())['date'], format='%Y-%m-%dT%H:%M:%S.%fZ') actual = pd.read_excel(BytesIO(self.get(url, params={'_format': 'xlsx'}).content)) expected = data[data['date'] > pd.to_datetime(dt).tz_localize(None)] expected.index = actual.index afe(actual, expected, check_like=True)
def check_insert_db(self, url, dbname): self.db.add(dbname) # Insert 2 rows in the EMPTY database with a primary key rows = self.insert_rows.copy() rows['primary_key'] = [1, 2] meta = {} inserted = gramex.data.insert(url, meta, args=rows, table='test_insert', id='primary_key') eq_(inserted, 2, 'insert() returns # of records added') # metadata has no filters applied, and no columns ignored eq_(meta['filters'], []) eq_(meta['ignored'], []) # Actual data created has the same content, factoring in type conversion actual = gramex.data.filter(url, table='test_insert') expected = pd.DataFrame(rows) for df in [actual, expected]: df['sales'] = df['sales'].astype(float) afe(actual, expected, check_like=True) # Check if it created a primary key engine = sa.create_engine(url) insp = sa.inspect(engine) ok_('primary_key' in insp.get_pk_constraint('test_insert')['constrained_columns']) # Inserting duplicate keys raises an Exception with assert_raises(sa.exc.IntegrityError): gramex.data.insert(url, args=rows, table='test_insert', id='primary_key') # Inserting a single row returns meta['data']['inserted'] with the primary key rows = {'primary_key': [3], 'देश': ['भारत'], 'city': ['London'], 'sales': ['']} inserted = gramex.data.insert(url, meta, args=rows, table='test_insert', id='primary_key') eq_(inserted, 1, 'insert() returns # of records added') eq_(meta['inserted'], [{'primary_key': 3}]) # Adding multiple primary keys via id= is supported rows = {'a': [1, 2], 'b': [True, False], 'x': [3, None], 'y': [None, 'y']} inserted = gramex.data.insert(url, meta, args=rows, table='t2', id=['a', 'b']) eq_(inserted, 2, 'insert() returns # of records added') eq_(insp.get_pk_constraint('t2')['constrained_columns'], ['a', 'b'], 'multiple primary keys are created') # Multiple primary keys are returned rows = {'a': [3], 'b': [True]} inserted = gramex.data.insert(url, meta, args=rows, table='t2', id=['a', 'b']) eq_(meta['inserted'], [{'a': 3, 'b': True}]) # Primary keys not specified in input (AUTO INCREMENT) are turned gramex.data.alter(url, 't3', columns={ 'id': {'type': 'int', 'primary_key': True, 'autoincrement': True}, 'x': 'varchar(10)' }) # Single inserts return the ID gramex.data.insert(url, meta, args={'x': ['a']}, table='t3') eq_(meta['inserted'], [{'id': 1}]) gramex.data.insert(url, meta, args={'x': ['b']}, table='t3') eq_(meta['inserted'], [{'id': 2}])
def test_insert_file(self): data = gramex.cache.open(self.insert_file, 'xlsx') gramex.data.insert(url=self.insert_file, args=self.insert_rows) new_data = gramex.cache.open(self.insert_file, 'xlsx') # Check original data is identical afe(data, new_data.head(len(data))) # Check if added rows are correct added_rows = pd.DataFrame(self.insert_rows) added_rows['sales'] = added_rows['sales'].astype(float) added_rows['growth'] = pd.np.nan added_rows.index = new_data.tail(2).index afe(new_data.tail(2), added_rows, check_like=True)
def test_open_csv(self): path = os.path.join(cache_folder, 'data.csv') expected = pd.read_csv(path, encoding='utf-8') def check(reload): result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True, encoding='utf-8') eq_(reloaded, reload) afe(result, expected) self.check_file_cache(path, check) afe(gramex.cache.open(path), gramex.cache.open(path, 'csv'))
def test_args(self): # url: and sheet_name: accepts query formatting for files url = '/formhandler/arg-url?path=sales&sheet=sales&ext=excel' afe(pd.DataFrame(self.get(url).json()), self.sales, check_like=True) url = '/formhandler/arg-url?path=sales&sheet=census' census = gramex.cache.open(os.path.join(folder, 'sales.xlsx'), sheet_name='census') afe(pd.DataFrame(self.get(url).json()), census, check_like=True) # url: and table: accept query formatting for SQLAlchemy url = '/formhandler/arg-table?db=formhandler&table=sales' afe(pd.DataFrame(self.get(url).json()), self.sales, check_like=True) # url: and table: accept query formatting for SQLAlchemy # TODO: In Python 2, unicode keys don't work well on Tornado. So use safe keys key, val = ('product', '芯片') if six.PY2 else ('देश', 'भारत') url = '/formhandler/arg-query?db=formhandler&col=%s&val=%s' % (key, val) actual = pd.DataFrame(self.get(url).json()) expected = self.sales[self.sales[key] == val] expected.index = actual.index afe(actual, expected, check_like=True) # Files with ../ etc should be skipped self.check('/formhandler/arg-url?path=../sales', code=500, text='KeyError') # Test that the ?skip= parameter is used to find the table. self.check('/formhandler/arg-table?db=formhandler&table=sales&skip=ab', code=500, text='NoSuchTableError') # Spaces are ignored in SQLAlchemy query. So ?skip= will be a missing key self.check( '/formhandler/arg-table?db=formhandler&table=sales&skip=a b', code=500, text='KeyError')
def test_file(self): self.check_filter(url=sales_file) afe( gramex.data.filter(url=sales_file, transform='2.1', sheet_name='dummy'), gramex.cache.open(sales_file, 'excel', transform='2.2', sheet_name='dummy'), ) self.check_filter( url=sales_file, transform=lambda d: d[d['sales'] > 100], df=self.sales[self.sales['sales'] > 100], ) with assert_raises(ValueError): gramex.data.filter(url='', engine='nonexistent') with assert_raises(OSError): gramex.data.filter(url='nonexistent') with assert_raises(TypeError): gramex.data.filter(url=os.path.join(folder, 'test_cache_module.py'))
def check_insert_db(self, url, dbname): self.db.add(dbname) rows = self.insert_rows.copy() rows['index'] = [1, 2] # create a primary key inserted = gramex.data.insert(url, args=rows, table='test_insert', id='index') eq_(inserted, 2) # query table here actual = gramex.data.filter(url, table='test_insert') expected = pd.DataFrame(rows) for df in [actual, expected]: df['sales'] = df['sales'].astype(float) afe(actual, expected, check_like=True) # Check if it created a primary key engine = sa.create_engine(url) insp = sa.inspect(engine) ok_('index' in insp.get_pk_constraint('test_insert')['constrained_columns']) # Inserting duplicate keys raises an Exception with assert_raises(sa.exc.IntegrityError): gramex.data.insert(url, args=rows, table='test_insert', id='index')
def test_add_handler_get(self): self.check('/func/total/40/2', text='42.0') self.check('/func/total/40/2?items=10', text='52.0') self.check('/func/total/40/2?items=10&items=10', text='62.0') self.check('/func/name_age/johndoe/age/42', text='johndoe is 42 years old.') self.check('/func/name_age', text='alpha is 10 years old.') self.check('/func/name_age?name=johndoe&age=42', text='johndoe is 42 years old.') # In case of multiple kwargs, the last parameter is picked self.check('/func/name_age?name=x&name=y&age=1&age=2', text='y is 2 years old.') # When type hints are violated: self.check('/func/hints?name=johndoe&age=42.3', code=500) # When multiple arguments are passed: self.check('/func/total?items=1&items=2&items=3', text='6.0') self.check('/func/multilist?items=1&items=2&items=3&start=1', text='7.0') # Positional args with types self.check('/func/strtotal?items=a&items=b&items=c', text='abc') # Test native types. Note: "i=false" won't work -- use "i=" since it's a np.bool8 # Note: datetimes must be quoted, since they'll be read as JSON usually. self.check( '/func/nativetypes?a=3&b=1.5&c=false&d=d&e=null&f=3&g=1.5&h=h&i=', text=''.join([ '3', '1.5', 'false', 'd', '', '3', '1.5', 'h', 'false', '"2020-01-01T00:00:00+00:00"', '{"a":3,"b":1.5}', '[3,1.5]' ])) self.check('/func/greet', text='Hello, Stranger!') self.check('/func/greet?name=gramex', text='Hello, gramex!') self.check('/func/multilist?items=1&items=2&items=3&start=1', text='7.0') sales = self.check('/func/sales').json() afe(pd.DataFrame(sales), gramex.cache.open('sales.xlsx', rel=True)) self.check('/func/content/003.json', text='{"x":3}', headers={'Content-Type': 'application/json'}) self.check('/func/content/003.txt', text='x=3', headers={'Content-Type': 'text/plain'})
def test_download_excel(self): out = gramex.data.download(self.dummy, format='xlsx') afe(pd.read_excel(io.BytesIO(out)), self.dummy) out = gramex.data.download({'dummy': self.dummy, 'sales': self.sales}, format='xlsx') result = pd.read_excel(io.BytesIO(out), sheet_name=None) afe(result['dummy'], self.dummy) afe(result['sales'], self.sales)
def check_alter(self, url, id=999, age=4.5): # Add a new column of types str, int, float. # Also test default, nullable gramex.data.alter(url, table='sales', columns={ 'id': {'type': 'int'}, 'email': {'type': 'varchar(99)', 'nullable': True, 'default': 'none'}, 'age': {'type': 'float', 'nullable': False, 'default': age}, }) # New tables also support primary_key, autoincrement gramex.data.alter(url, table='new', columns={ 'id': {'type': 'int', 'primary_key': True, 'autoincrement': True}, 'email': {'type': 'varchar(99)', 'nullable': True, 'default': 'none'}, 'age': {'type': 'float', 'nullable': False, 'default': age}, }) engine = sa.create_engine(url) meta = sa.MetaData(bind=engine) meta.reflect() # Test types for table in (meta.tables['sales'], meta.tables['new']): eq_(table.columns.id.type.python_type, int) # eq_(table.columns.id.nullable, True) eq_(table.columns.email.type.python_type, str) # eq_(table.columns.email.nullable, True) eq_(table.columns.age.type.python_type, float) eq_(table.columns.age.nullable, False) # sales: insert and test row for default and types gramex.data.insert(url, table='sales', args={'id': [id]}) result = gramex.data.filter(url, table='sales', args={'id': [id]}) eq_(len(result), 1) eq_(result['id'].iloc[0], id) eq_(result['email'].iloc[0], 'none') eq_(result['age'].iloc[0], age) # new: test types gramex.data.insert(url, table='new', args={'age': [3.0, 4.0]}) afe(gramex.data.filter(url, table='new'), pd.DataFrame([ {'id': 1, 'email': 'none', 'age': 3.0}, {'id': 2, 'email': 'none', 'age': 4.0}, ]))
def test_download_html(self): # Note: In Python 2, pd.read_html returns .columns.inferred_type=mixed # instead of unicde. So check column type only in PY3 not PY2 out = gramex.data.download(self.dummy, format='html') result = pd.read_html(io.BytesIO(out), encoding='utf-8')[0] afe(result, self.dummy, check_column_type=six.PY3) out = gramex.data.download(AttrDict([('dummy', self.dummy), ('sales', self.sales)]), format='html') result = pd.read_html(io.BytesIO(out), encoding='utf-8') afe(result[0], self.dummy, check_column_type=six.PY3) afe(result[1], self.sales, check_column_type=six.PY3)
def test_download_json(self): out = gramex.data.download(self.dummy, format='json') afe(pd.read_json(io.BytesIO(out)), self.dummy, check_like=True) out = gramex.data.download({'dummy': self.dummy, 'sales': self.sales}, format='json') result = json.loads(out, object_pairs_hook=AttrDict) def from_json(key): s = json.dumps(result[key]) # PY2 returns str (binary). PY3 returns str (unicode). Ensure it's binary if isinstance(s, six.text_type): s = s.encode('utf-8') return pd.read_json(io.BytesIO(s)) afe(from_json('dummy'), self.dummy, check_like=True) afe(from_json('sales'), self.sales, check_like=True)
def test_download_csv(self): out = gramex.data.download(self.dummy, format='csv') ok_(out.startswith(''.encode('utf-8-sig'))) afe(pd.read_csv(io.BytesIO(out), encoding='utf-8'), self.dummy) out = gramex.data.download(AttrDict([ ('dummy', self.dummy), ('sales', self.sales), ]), format='csv') lines = out.splitlines(True) eq_(lines[0], 'dummy\n'.encode('utf-8-sig')) actual = pd.read_csv(io.BytesIO(b''.join(lines[1:4])), encoding='utf-8') afe(actual, self.dummy) eq_(lines[5], 'sales\n'.encode('utf-8')) actual = pd.read_csv(io.BytesIO(b''.join(lines[6:])), encoding='utf-8') afe(actual, self.sales)
def test_upload(self): ok_(os.path.isfile(self.dbpath)) data = gramex.data.filter(self.con, table='drive', args={}) cols = ('id', 'file', 'ext', 'path', 'size', 'mime', 'user_id', 'user_role', 'tag', 'cat') for col in cols: ok_(col in data.columns, col) self.check_upload(dict(file='userdata.csv')) # If upload filehame has a path, only the basename is considered # Extension is stored in lowercase even if filename is in uppercase self.check_upload(dict(file='dir/image.JPG')) # If filename is repeated, it's extended randomly data = self.check_upload(dict(file='userdata.csv')) path = data.path.iloc[0] ok_(path != 'userdata.csv') ok_(path.startswith('userdata')) ok_(path.endswith('.csv')) # If filename has weird characters, it's hyphenated data = self.check_upload(dict(file='userdata.csv', name='β x.csv')) eq_(data.path.iloc[0], '--x.csv') # If content-type is available, it's used. Else it's guessed data = self.check_upload(dict(file='userdata.csv', mime='text/plain')) eq_(data.mime.iloc[0], 'text/plain') data = self.check_upload(dict(file='userdata.csv')) eq_(data.mime.iloc[0], guess_type('userdata.csv')[0]) # Large files fail self.check_upload(dict(file='gramex.yaml'), code=REQUEST_ENTITY_TOO_LARGE, check=False) # .yaml disallowed because of allow self.check_upload(dict(file='gramextest.yaml'), code=UNSUPPORTED_MEDIA_TYPE, check=False) # .py disallowed because of exclude (though allow allows it) self.check_upload(dict(file='server.py'), code=UNSUPPORTED_MEDIA_TYPE, check=False) # Multi-uploads are supported, with tags self.check_upload(dict(file='userdata.csv', tag='t1'), dict(file='actors.csv', tag='t2')) r = requests.post(self.url, files=( ('file', ('x.csv', open('userdata.csv', 'rb'))), ('file', ('y.csv', open('userdata.csv', 'rb'))), ), data={ 'tag': ['t1'], 'cat': ['c1', 'c2', 'c3'], 'rand': ['x', 'y'] }) eq_(r.status_code, OK) data = gramex.data.filter(self.con, table='drive').sort_values('id').tail(2) # If there are insufficient tags, they become empty strings eq_(data.tag.tolist(), ['t1', '']) # If there are more tags, they're truncated eq_(data.cat.tolist(), ['c1', 'c2']) # If there are irrelevant fields, they're ignored ok_('rand' not in data.columns) # ?id=..&_download downloads the file data = self.check_upload(dict(file='dir/index.html')) r = requests.get(self.url, params={ '_download': '', 'id': data.id.iloc[0] }) eq_(r.headers['Content-Disposition'], 'attachment; filename="index.html"') # TODO: FormHandler returns Content-Type using _format, so don't check for Content-Type # eq_(r.headers['Content-Type'], 'text/html') # Serves file with correct length despite unicode eq_(int(r.headers['Content-Length']), os.stat('dir/index.html').st_size) # If the ID is invalid, raises a NOT FOUND r = requests.get(self.url, params={'_download': '', 'id': 9999}) eq_(r.status_code, NOT_FOUND) # If there are 2 IDs, it doesn't download r = requests.get(self.url, params={'_download': '', 'id': [0, 1]}) eq_(r.headers['Content-Type'], 'application/json') # User attributes are captured on all files user = {'id': 'X', 'role': 'Y'} data = self.check_upload(dict(file='userdata.csv'), dict(file='actors.csv'), user=user) for index in range(2): eq_(data.user_id.iloc[index], 'X') eq_(data.user_role.iloc[index], 'Y') # DELETE ?id=... deletes the specified file data = gramex.data.filter(self.con, table='drive') indices = (0, 3, 6) for index in indices: r = requests.delete(self.url, params={'id': [data.id.iloc[index]]}) data2 = gramex.data.filter(self.con, table='drive') eq_(len(data2), len(data) - len(indices)) for index in indices: # Entry is removed from the database ok_(data.id.iloc[index] not in data2.id.values) # File is removed from the file system ok_(not os.path.exists( os.path.join(self.kwargs.path, data.path.iloc[index]))) # DELETE without ?id= does not delete r = requests.delete(self.url) eq_(r.status_code, BAD_REQUEST) # PUT w/o file upload updates file, mime, ext, tags, etc. # NOT path, size, date, user_* data = gramex.data.filter(self.con, table='drive') params = { 'id': data.id.iloc[0], 'file': 'a.x', 'ext': '.x', 'mime': 'text/x', 'tag': 't', 'cat': 'c', 'path': 'a.x', 'size': 100, 'date': 100, 'user_id': 'A', 'user_role': 'B' } r = requests.put(self.url, params=params) eq_(r.status_code, OK) data2 = gramex.data.filter(self.con, table='drive') new = data2[data2.id == data.id.iloc[0]].iloc[0] old = data[data.id == data.id.iloc[0]].iloc[0] for field in ('id', 'file', 'mime', 'ext', 'tag', 'cat'): eq_(new[field], params[field]) for field in ('path', 'size', 'date', 'user_id', 'user_role'): eq_(new[field], old[field]) # ... but with file upload updates size, date and user attributes params['id'] = data.id.iloc[1] files = ( ('file', ('dir/text.txt', open('dir/text.txt', 'rb'))), # Even if multiple files are PUT, only the 1st is considered ('file', ('userdata', open('userdata.csv', 'rb'))), ) secret = gramex.service.app.settings['cookie_secret'] user = {'id': 'AB', 'role': 'CD'} r = requests.put(self.url, params=params, files=files, headers={ 'X-Gramex-User': create_signed_value(secret, 'user', json.dumps(user)) }) eq_(r.status_code, OK) data2 = gramex.data.filter(self.con, table='drive') new = data2[data2.id == data.id.iloc[1]].iloc[0] old = data[data.id == data.id.iloc[1]].iloc[0] for field in ('id', 'file', 'mime', 'ext', 'tag', 'cat'): eq_(new[field], params[field]) eq_(new['path'], old['path']) eq_(new['size'], os.stat('dir/text.txt').st_size) ok_(time.time() - 2 <= new['date'] <= time.time()) eq_(new['user_id'], user['id']) eq_(new['user_role'], user['role']) # Actual files are overwritten eq_(new['size'], os.stat(os.path.join(self.kwargs.path, new['path'])).st_size) # TEST: Nothing changes if ID is missing, even if file is present params['id'] = -1 data = gramex.data.filter(self.con, table='drive') r = requests.put(self.url, params=params, files=files) data2 = gramex.data.filter(self.con, table='drive') afe(data, data2) # The modify: works even though we have a download override. # It sets the 'm' column to 'OK' data = pd.DataFrame(requests.get(self.url).json()) ok_((data['m'] == 'OK').all())
def check_filter_db(self, dbname, url, na_position, sum_na=True): self.db.add(dbname) df = self.sales[self.sales['sales'] > 100] kwargs = {'na_position': na_position, 'sum_na': sum_na} self.check_filter(url=url, table='sales', **kwargs) self.check_filter(url=url, table='sales', transform=lambda d: d[d['sales'] > 100], df=df, **kwargs) self.check_filter(url=url, table='sales', query='SELECT * FROM sales WHERE sales > 100', df=df, **kwargs) self.check_filter(url=url, table='sales', query='SELECT * FROM sales WHERE sales > 999999', queryfile=os.path.join(folder, 'sales-query.sql'), df=df, **kwargs) self.check_filter(url=url, table=['sales', 'sales'], query='SELECT * FROM sales WHERE sales > 100', transform=lambda d: d[d['growth'] < 0.5], df=df[df['growth'] < 0.5], **kwargs) self.check_filter(url=url, query='SELECT * FROM sales WHERE sales > 100', transform=lambda d: d[d['growth'] < 0.5], df=df[df['growth'] < 0.5], **kwargs) self.check_filter(url=url, table='sales', query='SELECT * FROM sales WHERE sales > 100', transform=lambda d: d[d['growth'] < 0.5], df=df[df['growth'] < 0.5], **kwargs) # Check both parameter substitutions -- {} formatting and : substitution afe(gramex.data.filter(url=url, table='{x}', args={'x': ['sales']}), self.sales) actual = gramex.data.filter( url=url, table='{兴}', args={ '兴': ['sales'], 'col': ['growth'], 'val': [0], 'city': ['South Plainfield'], }, query='SELECT * FROM {兴} WHERE {col} > :val AND city = :city', ) expected = self.sales[(self.sales['growth'] > 0) & (self.sales['city'] == 'South Plainfield')] eqframe(actual, expected) # _by= _sort= _c=agg(s) by = ['product'] aggs = ['growth|sum', 'sales|sum'] sort = ['sales|sum'] expected = df.groupby(by).agg( AttrDict([agg.split('|') for agg in aggs])) self.flatten_sort(expected, sort, sum_na, *aggs) params = {'_by': by, '_c': aggs, '_sort': sort, 'sales>': [100]} actual = gramex.data.filter(url, table='sales', args=params) eqframe(actual, expected) # Test invalid parameters with assert_raises(ValueError): gramex.data.filter(url=url, table=1, query='SELECT * FROM sales WHERE sales > 100') with assert_raises(ValueError): gramex.data.filter(url=url, table={}, query='SELECT * FROM sales WHERE sales > 100') # Arguments with spaces raise an Exception with assert_raises(Exception): gramex.data.filter(url=url, table='{x}', args={'x': ['a b']}) with assert_raises(Exception): gramex.data.filter(url=url, table='{x}', args={ 'x': ['sales'], 'p': ['a b'] }, query='SELECT * FROM {x} WHERE {p} > 0')
def eqframe(actual, expected, **kwargs): '''Same as assert_frame_equal or afe, but does not compare index''' expected.index = actual.index afe(actual, expected, **kwargs)
def check(expected, **params): actual = pd.DataFrame( self.get('/formhandler/dir', params=params).json()) expected.index = actual.index afe(actual, expected, check_like=True)
def eq(args, expected): result = self.get(url, params=args).json() actual = pd.DataFrame(result[key] if key else result) expected.index = actual.index if len(expected) > 0: afe(actual, expected, check_like=True)
def test_download(self): # Modelled on testlib.test_data.TestDownload big = self.sales[self.sales['sales'] > 100] by_growth = self.sales.sort_values('growth') big.index = range(len(big)) by_growth.index = range(len(by_growth)) out = self.get('/formhandler/file?_format=html') # Note: In Python 2, pd.read_html returns .columns.inferred_type=mixed # instead of unicde. So check column type only in PY3 not PY2 afe(pd.read_html(out.content, encoding='utf-8')[0], self.sales, check_column_type=six.PY3) eq_(out.headers['Content-Type'], 'text/html;charset=UTF-8') eq_(out.headers.get('Content-Disposition'), None) out = self.get('/formhandler/file-multi?_format=html') result = pd.read_html(BytesIO(out.content), encoding='utf-8') afe(result[0], big, check_column_type=six.PY3) afe(result[1], by_growth, check_column_type=six.PY3) eq_(out.headers['Content-Type'], 'text/html;charset=UTF-8') eq_(out.headers.get('Content-Disposition'), None) out = self.get('/formhandler/file?_format=xlsx') afe(pd.read_excel(BytesIO(out.content)), self.sales) eq_(out.headers['Content-Type'], xlsx_mime_type) eq_(out.headers['Content-Disposition'], 'attachment;filename=data.xlsx') out = self.get('/formhandler/file-multi?_format=xlsx') result = pd.read_excel(BytesIO(out.content), sheet_name=None) afe(result['big'], big) afe(result['by-growth'], by_growth) eq_(out.headers['Content-Type'], xlsx_mime_type) eq_(out.headers['Content-Disposition'], 'attachment;filename=data.xlsx') out = self.get('/formhandler/file?_format=csv') ok_(out.content.startswith(''.encode('utf-8-sig'))) afe(pd.read_csv(BytesIO(out.content), encoding='utf-8'), self.sales) eq_(out.headers['Content-Type'], 'text/csv;charset=UTF-8') eq_(out.headers['Content-Disposition'], 'attachment;filename=data.csv') out = self.get('/formhandler/file-multi?_format=csv') lines = out.content.splitlines(True) eq_(lines[0], 'big\n'.encode('utf-8-sig')) actual = pd.read_csv(BytesIO(b''.join(lines[1:len(big) + 2])), encoding='utf-8') afe(actual, big) eq_(lines[len(big) + 3], 'by-growth\n'.encode('utf-8')) actual = pd.read_csv(BytesIO(b''.join(lines[len(big) + 4:])), encoding='utf-8') afe(actual, by_growth) eq_(out.headers['Content-Type'], 'text/csv;charset=UTF-8') eq_(out.headers['Content-Disposition'], 'attachment;filename=data.csv') for fmt in ['csv', 'html', 'json', 'xlsx']: out = self.get('/formhandler/file?_format=%s&_download=test.%s' % (fmt, fmt)) eq_(out.headers['Content-Disposition'], 'attachment;filename=test.%s' % fmt) out = self.get( '/formhandler/file-multi?_format=%s&_download=test.%s' % (fmt, fmt)) eq_(out.headers['Content-Disposition'], 'attachment;filename=test.%s' % fmt)
def eq(self, url, expected): out = self.get(url).content actual = pd.read_csv(BytesIO(out), encoding='utf-8') expected.index = range(len(expected)) afe(actual, expected, check_column_type=six.PY3)