Beispiel #1
0
 def test_path_arg(self):
     url = '/formhandler/%s/formhandler/sales?group=product&col=city&val=Bangalore'
     for sub_url in ['path_arg', 'path_kwarg']:
         actual = pd.DataFrame(self.get(url % sub_url).json())
         expected = self.sales[self.sales['city'] == 'Bangalore'].groupby('product')
         expected = expected['sales'].sum().reset_index()
         afe(actual, expected, check_like=True)
 def test_chart(self):
     r = self.get('/formhandler/chart',
                  data={
                      '_format': 'svg',
                      'chart': 'barplot',
                      'x': 'देश',
                      'y': 'sales',
                      'dpi': 72,
                      'width': 500,
                      'height': 300,
                  })
     tree = etree.fromstring(r.text.encode('utf-8'))
     eq_(tree.get('viewBox'), '0 0 500 300')
     # TODO: expand on test cases
     # Check spec, data for vega, vega-lite, vegam formats
     base = '/formhandler/chart?_format={}'
     data = pd.DataFrame(self.get(base.format('json')).json())
     for fmt in {'vega', 'vega-lite', 'vegam'}:
         r = self.get(base.format(fmt))
         var = json.loads(re.findall(r'}\)\((.*?)}\)', r.text)[-1] + '}')
         var = var['spec']
         if 'fromjson' in var:
             df = var['fromjson'][0]['data']
             var['fromjson'][0]['data'] = '__DATA__'
         else:
             df = var.pop('data')
             df = (df[0] if isinstance(df, list) else df)['values']
         yaml_path = os.path.join(folder, '{}.yaml'.format(fmt))
         spec = gramex.cache.open(yaml_path, 'yaml')
         afe(pd.DataFrame(df), data)
         self.assertDictEqual(var, spec)
 def test_save(self):
     path = os.path.join(cache_folder, 'data.csv')
     data = pd.read_csv(path, encoding='utf-8')
     config = {
         'csv': dict(index=False, ignore_keyword=1),
         'xlsx': dict(index=False, sheet_name='Sheet1', ignore_keyword=1),
         'html': dict(index=False, escape=False, ignore_keyword=1),
         'hdf': dict(index=False,
                     key='data',
                     format='fixed',
                     ignore_keyword=1),
         'json': dict(orient='records', ignore_keyword=1),
         # 'stata': dict(index=False),   # cannot test since it doesn't support unicode
     }
     for ext, kwargs in config.items():
         target = os.path.join(cache_folder, 'killme.' + ext)
         gramex.cache.save(data, target, **kwargs)
         try:
             result = gramex.cache.open(target)
             if ext == 'html':
                 result = result[0]
             elif ext == 'json':
                 result = pd.DataFrame(data)
             afe(result, data)
         finally:
             os.remove(target)
    def test_transform(self):
        # Check that transform function is applied and used as a cache key
        cache = {}
        path = os.path.join(cache_folder, 'data.csv')

        data = gramex.cache.open(path, 'csv', transform=len, _cache=cache)
        eq_(data, len(pd.read_csv(path)))  # noqa - ignore encoding
        cache_key = (path, 'csv', hashfn(len), frozenset([]))
        self.assertIn(cache_key, cache)

        def transform2(d):
            return d['a'].sum()

        data = gramex.cache.open(path,
                                 'csv',
                                 transform=transform2,
                                 _cache=cache)
        eq_(data, pd.read_csv(path)['a'].sum())  # noqa - ignore encoding
        cache_key = (path, 'csv', hashfn(transform2), frozenset([]))
        self.assertIn(cache_key, cache)

        # Check that non-callable transforms are ignored but used as cache key
        data = gramex.cache.open(path, 'csv', transform='ignore', _cache=cache)
        afe(data, pd.read_csv(path))  # noqa - ignore encoding
        cache_key = (path, 'csv', hashfn('ignore'), frozenset([]))
        self.assertIn(cache_key, cache)

        # Check that temporary caches are hashed by function
        v = 1
        data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache)
        eq_(data, 1)
        v = 2
        data = gramex.cache.open(path, 'csv', lambda x: v, _cache=cache)
        eq_(data, 2)
 def check(reload):
     result, reloaded = gramex.cache.open(path,
                                          'csv',
                                          _reload_status=True,
                                          encoding='utf-8')
     eq_(reloaded, reload)
     afe(result, expected)
Beispiel #6
0
 def test_insert_new_file(self):
     new_files = [
         {
             'url': os.path.join(folder, 'insert.csv'),
             'encoding': 'utf-8'
         },
         {
             'url': os.path.join(folder, 'insert.xlsx'),
             'sheet_name': 'test'
         },
         {
             'url': os.path.join(folder, 'insert.hdf'),
             'key': 'test'
         },
     ]
     for conf in new_files:
         if os.path.exists(conf['url']):
             os.remove(conf['url'])
         self.tmpfiles.append(conf['url'])
         gramex.data.insert(args=self.insert_rows, **conf)
         # Check if added rows are correct
         try:
             actual = gramex.data.filter(**conf)
         except ValueError:
             # TODO: This is a temporary fix for NumPy 1.16.2, Tables 3.4.4
             # https://github.com/pandas-dev/pandas/issues/24839
             if conf['url'].endswith(
                     '.hdf') and pd.np.__version__.startswith('1.16'):
                 raise SkipTest(
                     'Ignore NumPy 1.16.2 / PyTables 3.4.4 quirk')
         expected = pd.DataFrame(self.insert_rows)
         actual['sales'] = actual['sales'].astype(float)
         expected['sales'] = expected['sales'].astype(float)
         afe(actual, expected, check_like=True)
Beispiel #7
0
 def test_insert_new_file(self):
     new_files = [
         {
             'url': os.path.join(folder, 'insert.csv'),
             'encoding': 'utf-8'
         },
         {
             'url': os.path.join(folder, 'insert.xlsx'),
             'sheet_name': 'test'
         },
         {
             'url': os.path.join(folder, 'insert.hdf'),
             'key': 'test'
         },
     ]
     for conf in new_files:
         if os.path.exists(conf['url']):
             os.remove(conf['url'])
         self.tmpfiles.append(conf['url'])
         gramex.data.insert(args=self.insert_rows, **conf)
         # Check if added rows are correct
         actual = gramex.data.filter(**conf)
         expected = pd.DataFrame(self.insert_rows)
         actual['sales'] = actual['sales'].astype(float)
         expected['sales'] = expected['sales'].astype(float)
         afe(actual, expected, check_like=True)
Beispiel #8
0
 def check(q, result, **kwargs):
     kwargs['api'] = 'mock'
     actual = gramex.ml.translate(*q, **kwargs)
     expected = pd.DataFrame([
         {'source': item[0], 'target': item[1], 'q': item[2], 't': item[3]}
         for item in result
     ])
     actual.index = expected.index
     afe(actual, expected, check_like=True)
Beispiel #9
0
 def test_date_comparison(self):
     data = gramex.cache.open(os.path.join(folder, 'sales.xlsx'),
                              'xlsx',
                              sheet_name='dates')
     for dt in ('2018-01-10', '2018-01-20T15:34Z'):
         url = '/formhandler/dates?date>=%s&_format=xlsx' % dt
         actual = pd.read_excel(BytesIO(self.get(url).content))
         expected = data[data['date'] > pd.to_datetime(dt)]
         expected.index = actual.index
         afe(actual, expected, check_like=True)
Beispiel #10
0
    def test_open_jsondata(self):
        path = os.path.join(cache_folder, 'data.jsondata')
        expected = pd.read_json(path)

        def check(reload):
            result, reloaded = gramex.cache.open(path, 'jsondata', _reload_status=True)
            eq_(reloaded, reload)
            afe(result, expected)

        self.check_file_cache(path, check)
        afe(gramex.cache.open(path), gramex.cache.open(path, 'jsondata'))
Beispiel #11
0
 def test_date_comparison(self):
     data = gramex.cache.open(os.path.join(folder, 'sales.xlsx'), 'xlsx', sheet_name='dates')
     for dt in ('2018-01-10', '2018-01-20T15:34Z'):
         url = '/formhandler/dates?date>=%s' % dt
         r = self.get(url, params={'_format': 'json', '_meta': 'y'})
         # Check ISO output
         pd.to_datetime(pd.DataFrame(r.json())['date'], format='%Y-%m-%dT%H:%M:%S.%fZ')
         actual = pd.read_excel(BytesIO(self.get(url, params={'_format': 'xlsx'}).content))
         expected = data[data['date'] > pd.to_datetime(dt).tz_localize(None)]
         expected.index = actual.index
         afe(actual, expected, check_like=True)
Beispiel #12
0
    def check_insert_db(self, url, dbname):
        self.db.add(dbname)

        # Insert 2 rows in the EMPTY database with a primary key
        rows = self.insert_rows.copy()
        rows['primary_key'] = [1, 2]
        meta = {}
        inserted = gramex.data.insert(url, meta, args=rows, table='test_insert', id='primary_key')
        eq_(inserted, 2, 'insert() returns # of records added')
        # metadata has no filters applied, and no columns ignored
        eq_(meta['filters'], [])
        eq_(meta['ignored'], [])
        # Actual data created has the same content, factoring in type conversion
        actual = gramex.data.filter(url, table='test_insert')
        expected = pd.DataFrame(rows)
        for df in [actual, expected]:
            df['sales'] = df['sales'].astype(float)
        afe(actual, expected, check_like=True)
        # Check if it created a primary key
        engine = sa.create_engine(url)
        insp = sa.inspect(engine)
        ok_('primary_key' in insp.get_pk_constraint('test_insert')['constrained_columns'])
        # Inserting duplicate keys raises an Exception
        with assert_raises(sa.exc.IntegrityError):
            gramex.data.insert(url, args=rows, table='test_insert', id='primary_key')

        # Inserting a single row returns meta['data']['inserted'] with the primary key
        rows = {'primary_key': [3], 'देश': ['भारत'], 'city': ['London'], 'sales': ['']}
        inserted = gramex.data.insert(url, meta, args=rows, table='test_insert', id='primary_key')
        eq_(inserted, 1, 'insert() returns # of records added')
        eq_(meta['inserted'], [{'primary_key': 3}])

        # Adding multiple primary keys via id= is supported
        rows = {'a': [1, 2], 'b': [True, False], 'x': [3, None], 'y': [None, 'y']}
        inserted = gramex.data.insert(url, meta, args=rows, table='t2', id=['a', 'b'])
        eq_(inserted, 2, 'insert() returns # of records added')
        eq_(insp.get_pk_constraint('t2')['constrained_columns'], ['a', 'b'],
            'multiple primary keys are created')
        # Multiple primary keys are returned
        rows = {'a': [3], 'b': [True]}
        inserted = gramex.data.insert(url, meta, args=rows, table='t2', id=['a', 'b'])
        eq_(meta['inserted'], [{'a': 3, 'b': True}])

        # Primary keys not specified in input (AUTO INCREMENT) are turned
        gramex.data.alter(url, 't3', columns={
            'id': {'type': 'int', 'primary_key': True, 'autoincrement': True},
            'x': 'varchar(10)'
        })
        # Single inserts return the ID
        gramex.data.insert(url, meta, args={'x': ['a']}, table='t3')
        eq_(meta['inserted'], [{'id': 1}])
        gramex.data.insert(url, meta, args={'x': ['b']}, table='t3')
        eq_(meta['inserted'], [{'id': 2}])
Beispiel #13
0
 def test_insert_file(self):
     data = gramex.cache.open(self.insert_file, 'xlsx')
     gramex.data.insert(url=self.insert_file, args=self.insert_rows)
     new_data = gramex.cache.open(self.insert_file, 'xlsx')
     # Check original data is identical
     afe(data, new_data.head(len(data)))
     # Check if added rows are correct
     added_rows = pd.DataFrame(self.insert_rows)
     added_rows['sales'] = added_rows['sales'].astype(float)
     added_rows['growth'] = pd.np.nan
     added_rows.index = new_data.tail(2).index
     afe(new_data.tail(2), added_rows, check_like=True)
Beispiel #14
0
    def test_open_csv(self):
        path = os.path.join(cache_folder, 'data.csv')
        expected = pd.read_csv(path, encoding='utf-8')

        def check(reload):
            result, reloaded = gramex.cache.open(path, 'csv', _reload_status=True,
                                                 encoding='utf-8')
            eq_(reloaded, reload)
            afe(result, expected)

        self.check_file_cache(path, check)
        afe(gramex.cache.open(path), gramex.cache.open(path, 'csv'))
Beispiel #15
0
    def test_args(self):
        # url: and sheet_name: accepts query formatting for files
        url = '/formhandler/arg-url?path=sales&sheet=sales&ext=excel'
        afe(pd.DataFrame(self.get(url).json()), self.sales, check_like=True)
        url = '/formhandler/arg-url?path=sales&sheet=census'
        census = gramex.cache.open(os.path.join(folder, 'sales.xlsx'),
                                   sheet_name='census')
        afe(pd.DataFrame(self.get(url).json()), census, check_like=True)
        # url: and table: accept query formatting for SQLAlchemy
        url = '/formhandler/arg-table?db=formhandler&table=sales'
        afe(pd.DataFrame(self.get(url).json()), self.sales, check_like=True)

        # url: and table: accept query formatting for SQLAlchemy
        # TODO: In Python 2, unicode keys don't work well on Tornado. So use safe keys
        key, val = ('product', '芯片') if six.PY2 else ('देश', 'भारत')
        url = '/formhandler/arg-query?db=formhandler&col=%s&val=%s' % (key,
                                                                       val)
        actual = pd.DataFrame(self.get(url).json())
        expected = self.sales[self.sales[key] == val]
        expected.index = actual.index
        afe(actual, expected, check_like=True)

        # Files with ../ etc should be skipped
        self.check('/formhandler/arg-url?path=../sales',
                   code=500,
                   text='KeyError')
        # Test that the ?skip= parameter is used to find the table.
        self.check('/formhandler/arg-table?db=formhandler&table=sales&skip=ab',
                   code=500,
                   text='NoSuchTableError')
        # Spaces are ignored in SQLAlchemy query. So ?skip= will be a missing key
        self.check(
            '/formhandler/arg-table?db=formhandler&table=sales&skip=a b',
            code=500,
            text='KeyError')
Beispiel #16
0
 def test_file(self):
     self.check_filter(url=sales_file)
     afe(
         gramex.data.filter(url=sales_file, transform='2.1', sheet_name='dummy'),
         gramex.cache.open(sales_file, 'excel', transform='2.2', sheet_name='dummy'),
     )
     self.check_filter(
         url=sales_file,
         transform=lambda d: d[d['sales'] > 100],
         df=self.sales[self.sales['sales'] > 100],
     )
     with assert_raises(ValueError):
         gramex.data.filter(url='', engine='nonexistent')
     with assert_raises(OSError):
         gramex.data.filter(url='nonexistent')
     with assert_raises(TypeError):
         gramex.data.filter(url=os.path.join(folder, 'test_cache_module.py'))
Beispiel #17
0
 def check_insert_db(self, url, dbname):
     self.db.add(dbname)
     rows = self.insert_rows.copy()
     rows['index'] = [1, 2]  # create a primary key
     inserted = gramex.data.insert(url, args=rows, table='test_insert', id='index')
     eq_(inserted, 2)
     # query table here
     actual = gramex.data.filter(url, table='test_insert')
     expected = pd.DataFrame(rows)
     for df in [actual, expected]:
         df['sales'] = df['sales'].astype(float)
     afe(actual, expected, check_like=True)
     # Check if it created a primary key
     engine = sa.create_engine(url)
     insp = sa.inspect(engine)
     ok_('index' in insp.get_pk_constraint('test_insert')['constrained_columns'])
     # Inserting duplicate keys raises an Exception
     with assert_raises(sa.exc.IntegrityError):
         gramex.data.insert(url, args=rows, table='test_insert', id='index')
 def test_add_handler_get(self):
     self.check('/func/total/40/2', text='42.0')
     self.check('/func/total/40/2?items=10', text='52.0')
     self.check('/func/total/40/2?items=10&items=10', text='62.0')
     self.check('/func/name_age/johndoe/age/42',
                text='johndoe is 42 years old.')
     self.check('/func/name_age', text='alpha is 10 years old.')
     self.check('/func/name_age?name=johndoe&age=42',
                text='johndoe is 42 years old.')
     # In case of multiple kwargs, the last parameter is picked
     self.check('/func/name_age?name=x&name=y&age=1&age=2',
                text='y is 2 years old.')
     # When type hints are violated:
     self.check('/func/hints?name=johndoe&age=42.3', code=500)
     # When multiple arguments are passed:
     self.check('/func/total?items=1&items=2&items=3', text='6.0')
     self.check('/func/multilist?items=1&items=2&items=3&start=1',
                text='7.0')
     # Positional args with types
     self.check('/func/strtotal?items=a&items=b&items=c', text='abc')
     # Test native types. Note: "i=false" won't work -- use "i=" since it's a np.bool8
     # Note: datetimes must be quoted, since they'll be read as JSON usually.
     self.check(
         '/func/nativetypes?a=3&b=1.5&c=false&d=d&e=null&f=3&g=1.5&h=h&i=',
         text=''.join([
             '3', '1.5', 'false', 'd', '', '3', '1.5', 'h', 'false',
             '"2020-01-01T00:00:00+00:00"', '{"a":3,"b":1.5}', '[3,1.5]'
         ]))
     self.check('/func/greet', text='Hello, Stranger!')
     self.check('/func/greet?name=gramex', text='Hello, gramex!')
     self.check('/func/multilist?items=1&items=2&items=3&start=1',
                text='7.0')
     sales = self.check('/func/sales').json()
     afe(pd.DataFrame(sales), gramex.cache.open('sales.xlsx', rel=True))
     self.check('/func/content/003.json',
                text='{"x":3}',
                headers={'Content-Type': 'application/json'})
     self.check('/func/content/003.txt',
                text='x=3',
                headers={'Content-Type': 'text/plain'})
Beispiel #19
0
    def test_download_excel(self):
        out = gramex.data.download(self.dummy, format='xlsx')
        afe(pd.read_excel(io.BytesIO(out)), self.dummy)

        out = gramex.data.download({'dummy': self.dummy, 'sales': self.sales}, format='xlsx')
        result = pd.read_excel(io.BytesIO(out), sheet_name=None)
        afe(result['dummy'], self.dummy)
        afe(result['sales'], self.sales)
Beispiel #20
0
 def check_alter(self, url, id=999, age=4.5):
     # Add a new column of types str, int, float.
     # Also test default, nullable
     gramex.data.alter(url, table='sales', columns={
         'id': {'type': 'int'},
         'email': {'type': 'varchar(99)', 'nullable': True, 'default': 'none'},
         'age': {'type': 'float', 'nullable': False, 'default': age},
     })
     # New tables also support primary_key, autoincrement
     gramex.data.alter(url, table='new', columns={
         'id': {'type': 'int', 'primary_key': True, 'autoincrement': True},
         'email': {'type': 'varchar(99)', 'nullable': True, 'default': 'none'},
         'age': {'type': 'float', 'nullable': False, 'default': age},
     })
     engine = sa.create_engine(url)
     meta = sa.MetaData(bind=engine)
     meta.reflect()
     # Test types
     for table in (meta.tables['sales'], meta.tables['new']):
         eq_(table.columns.id.type.python_type, int)
         # eq_(table.columns.id.nullable, True)
         eq_(table.columns.email.type.python_type, str)
         # eq_(table.columns.email.nullable, True)
         eq_(table.columns.age.type.python_type, float)
         eq_(table.columns.age.nullable, False)
     # sales: insert and test row for default and types
     gramex.data.insert(url, table='sales', args={'id': [id]})
     result = gramex.data.filter(url, table='sales', args={'id': [id]})
     eq_(len(result), 1)
     eq_(result['id'].iloc[0], id)
     eq_(result['email'].iloc[0], 'none')
     eq_(result['age'].iloc[0], age)
     # new: test types
     gramex.data.insert(url, table='new', args={'age': [3.0, 4.0]})
     afe(gramex.data.filter(url, table='new'), pd.DataFrame([
         {'id': 1, 'email': 'none', 'age': 3.0},
         {'id': 2, 'email': 'none', 'age': 4.0},
     ]))
Beispiel #21
0
    def test_download_html(self):
        # Note: In Python 2, pd.read_html returns .columns.inferred_type=mixed
        # instead of unicde. So check column type only in PY3 not PY2
        out = gramex.data.download(self.dummy, format='html')
        result = pd.read_html(io.BytesIO(out), encoding='utf-8')[0]
        afe(result, self.dummy, check_column_type=six.PY3)

        out = gramex.data.download(AttrDict([('dummy', self.dummy),
                                             ('sales', self.sales)]),
                                   format='html')
        result = pd.read_html(io.BytesIO(out), encoding='utf-8')
        afe(result[0], self.dummy, check_column_type=six.PY3)
        afe(result[1], self.sales, check_column_type=six.PY3)
Beispiel #22
0
    def test_download_json(self):
        out = gramex.data.download(self.dummy, format='json')
        afe(pd.read_json(io.BytesIO(out)), self.dummy, check_like=True)

        out = gramex.data.download({'dummy': self.dummy, 'sales': self.sales}, format='json')
        result = json.loads(out, object_pairs_hook=AttrDict)

        def from_json(key):
            s = json.dumps(result[key])
            # PY2 returns str (binary). PY3 returns str (unicode). Ensure it's binary
            if isinstance(s, six.text_type):
                s = s.encode('utf-8')
            return pd.read_json(io.BytesIO(s))

        afe(from_json('dummy'), self.dummy, check_like=True)
        afe(from_json('sales'), self.sales, check_like=True)
Beispiel #23
0
    def test_download_csv(self):
        out = gramex.data.download(self.dummy, format='csv')
        ok_(out.startswith(''.encode('utf-8-sig')))
        afe(pd.read_csv(io.BytesIO(out), encoding='utf-8'), self.dummy)

        out = gramex.data.download(AttrDict([
            ('dummy', self.dummy),
            ('sales', self.sales),
        ]), format='csv')
        lines = out.splitlines(True)
        eq_(lines[0], 'dummy\n'.encode('utf-8-sig'))
        actual = pd.read_csv(io.BytesIO(b''.join(lines[1:4])), encoding='utf-8')
        afe(actual, self.dummy)

        eq_(lines[5], 'sales\n'.encode('utf-8'))
        actual = pd.read_csv(io.BytesIO(b''.join(lines[6:])), encoding='utf-8')
        afe(actual, self.sales)
Beispiel #24
0
    def test_upload(self):
        ok_(os.path.isfile(self.dbpath))
        data = gramex.data.filter(self.con, table='drive', args={})
        cols = ('id', 'file', 'ext', 'path', 'size', 'mime', 'user_id',
                'user_role', 'tag', 'cat')
        for col in cols:
            ok_(col in data.columns, col)

        self.check_upload(dict(file='userdata.csv'))
        # If upload filehame has a path, only the basename is considered
        # Extension is stored in lowercase even if filename is in uppercase
        self.check_upload(dict(file='dir/image.JPG'))

        # If filename is repeated, it's extended randomly
        data = self.check_upload(dict(file='userdata.csv'))
        path = data.path.iloc[0]
        ok_(path != 'userdata.csv')
        ok_(path.startswith('userdata'))
        ok_(path.endswith('.csv'))

        # If filename has weird characters, it's hyphenated
        data = self.check_upload(dict(file='userdata.csv', name='β x.csv'))
        eq_(data.path.iloc[0], '--x.csv')

        # If content-type is available, it's used. Else it's guessed
        data = self.check_upload(dict(file='userdata.csv', mime='text/plain'))
        eq_(data.mime.iloc[0], 'text/plain')
        data = self.check_upload(dict(file='userdata.csv'))
        eq_(data.mime.iloc[0], guess_type('userdata.csv')[0])

        # Large files fail
        self.check_upload(dict(file='gramex.yaml'),
                          code=REQUEST_ENTITY_TOO_LARGE,
                          check=False)
        # .yaml disallowed because of allow
        self.check_upload(dict(file='gramextest.yaml'),
                          code=UNSUPPORTED_MEDIA_TYPE,
                          check=False)
        # .py disallowed because of exclude (though allow allows it)
        self.check_upload(dict(file='server.py'),
                          code=UNSUPPORTED_MEDIA_TYPE,
                          check=False)

        # Multi-uploads are supported, with tags
        self.check_upload(dict(file='userdata.csv', tag='t1'),
                          dict(file='actors.csv', tag='t2'))
        r = requests.post(self.url,
                          files=(
                              ('file', ('x.csv', open('userdata.csv', 'rb'))),
                              ('file', ('y.csv', open('userdata.csv', 'rb'))),
                          ),
                          data={
                              'tag': ['t1'],
                              'cat': ['c1', 'c2', 'c3'],
                              'rand': ['x', 'y']
                          })
        eq_(r.status_code, OK)
        data = gramex.data.filter(self.con,
                                  table='drive').sort_values('id').tail(2)
        # If there are insufficient tags, they become empty strings
        eq_(data.tag.tolist(), ['t1', ''])
        # If there are more tags, they're truncated
        eq_(data.cat.tolist(), ['c1', 'c2'])
        # If there are irrelevant fields, they're ignored
        ok_('rand' not in data.columns)

        # ?id=..&_download downloads the file
        data = self.check_upload(dict(file='dir/index.html'))
        r = requests.get(self.url,
                         params={
                             '_download': '',
                             'id': data.id.iloc[0]
                         })
        eq_(r.headers['Content-Disposition'],
            'attachment; filename="index.html"')
        # TODO: FormHandler returns Content-Type using _format, so don't check for Content-Type
        # eq_(r.headers['Content-Type'], 'text/html')
        # Serves file with correct length despite unicode
        eq_(int(r.headers['Content-Length']),
            os.stat('dir/index.html').st_size)
        # If the ID is invalid, raises a NOT FOUND
        r = requests.get(self.url, params={'_download': '', 'id': 9999})
        eq_(r.status_code, NOT_FOUND)
        # If there are 2 IDs, it doesn't download
        r = requests.get(self.url, params={'_download': '', 'id': [0, 1]})
        eq_(r.headers['Content-Type'], 'application/json')

        # User attributes are captured on all files
        user = {'id': 'X', 'role': 'Y'}
        data = self.check_upload(dict(file='userdata.csv'),
                                 dict(file='actors.csv'),
                                 user=user)
        for index in range(2):
            eq_(data.user_id.iloc[index], 'X')
            eq_(data.user_role.iloc[index], 'Y')

        # DELETE ?id=... deletes the specified file
        data = gramex.data.filter(self.con, table='drive')
        indices = (0, 3, 6)
        for index in indices:
            r = requests.delete(self.url, params={'id': [data.id.iloc[index]]})
        data2 = gramex.data.filter(self.con, table='drive')
        eq_(len(data2), len(data) - len(indices))
        for index in indices:
            # Entry is removed from the database
            ok_(data.id.iloc[index] not in data2.id.values)
            # File is removed from the file system
            ok_(not os.path.exists(
                os.path.join(self.kwargs.path, data.path.iloc[index])))

        # DELETE without ?id= does not delete
        r = requests.delete(self.url)
        eq_(r.status_code, BAD_REQUEST)

        # PUT w/o file upload updates file, mime, ext, tags, etc.
        # NOT path, size, date, user_*
        data = gramex.data.filter(self.con, table='drive')
        params = {
            'id': data.id.iloc[0],
            'file': 'a.x',
            'ext': '.x',
            'mime': 'text/x',
            'tag': 't',
            'cat': 'c',
            'path': 'a.x',
            'size': 100,
            'date': 100,
            'user_id': 'A',
            'user_role': 'B'
        }
        r = requests.put(self.url, params=params)
        eq_(r.status_code, OK)
        data2 = gramex.data.filter(self.con, table='drive')
        new = data2[data2.id == data.id.iloc[0]].iloc[0]
        old = data[data.id == data.id.iloc[0]].iloc[0]
        for field in ('id', 'file', 'mime', 'ext', 'tag', 'cat'):
            eq_(new[field], params[field])
        for field in ('path', 'size', 'date', 'user_id', 'user_role'):
            eq_(new[field], old[field])
        # ... but with file upload updates size, date and user attributes
        params['id'] = data.id.iloc[1]
        files = (
            ('file', ('dir/text.txt', open('dir/text.txt', 'rb'))),
            # Even if multiple files are PUT, only the 1st is considered
            ('file', ('userdata', open('userdata.csv', 'rb'))),
        )
        secret = gramex.service.app.settings['cookie_secret']
        user = {'id': 'AB', 'role': 'CD'}
        r = requests.put(self.url,
                         params=params,
                         files=files,
                         headers={
                             'X-Gramex-User':
                             create_signed_value(secret, 'user',
                                                 json.dumps(user))
                         })
        eq_(r.status_code, OK)
        data2 = gramex.data.filter(self.con, table='drive')
        new = data2[data2.id == data.id.iloc[1]].iloc[0]
        old = data[data.id == data.id.iloc[1]].iloc[0]
        for field in ('id', 'file', 'mime', 'ext', 'tag', 'cat'):
            eq_(new[field], params[field])
        eq_(new['path'], old['path'])
        eq_(new['size'], os.stat('dir/text.txt').st_size)
        ok_(time.time() - 2 <= new['date'] <= time.time())
        eq_(new['user_id'], user['id'])
        eq_(new['user_role'], user['role'])
        # Actual files are overwritten
        eq_(new['size'],
            os.stat(os.path.join(self.kwargs.path, new['path'])).st_size)

        # TEST: Nothing changes if ID is missing, even if file is present
        params['id'] = -1
        data = gramex.data.filter(self.con, table='drive')
        r = requests.put(self.url, params=params, files=files)
        data2 = gramex.data.filter(self.con, table='drive')
        afe(data, data2)

        # The modify: works even though we have a download override.
        # It sets the 'm' column to 'OK'
        data = pd.DataFrame(requests.get(self.url).json())
        ok_((data['m'] == 'OK').all())
Beispiel #25
0
    def check_filter_db(self, dbname, url, na_position, sum_na=True):
        self.db.add(dbname)
        df = self.sales[self.sales['sales'] > 100]
        kwargs = {'na_position': na_position, 'sum_na': sum_na}
        self.check_filter(url=url, table='sales', **kwargs)
        self.check_filter(url=url,
                          table='sales',
                          transform=lambda d: d[d['sales'] > 100],
                          df=df,
                          **kwargs)
        self.check_filter(url=url,
                          table='sales',
                          query='SELECT * FROM sales WHERE sales > 100',
                          df=df,
                          **kwargs)
        self.check_filter(url=url,
                          table='sales',
                          query='SELECT * FROM sales WHERE sales > 999999',
                          queryfile=os.path.join(folder, 'sales-query.sql'),
                          df=df,
                          **kwargs)
        self.check_filter(url=url,
                          table=['sales', 'sales'],
                          query='SELECT * FROM sales WHERE sales > 100',
                          transform=lambda d: d[d['growth'] < 0.5],
                          df=df[df['growth'] < 0.5],
                          **kwargs)
        self.check_filter(url=url,
                          query='SELECT * FROM sales WHERE sales > 100',
                          transform=lambda d: d[d['growth'] < 0.5],
                          df=df[df['growth'] < 0.5],
                          **kwargs)
        self.check_filter(url=url,
                          table='sales',
                          query='SELECT * FROM sales WHERE sales > 100',
                          transform=lambda d: d[d['growth'] < 0.5],
                          df=df[df['growth'] < 0.5],
                          **kwargs)
        # Check both parameter substitutions -- {} formatting and : substitution
        afe(gramex.data.filter(url=url, table='{x}', args={'x': ['sales']}),
            self.sales)
        actual = gramex.data.filter(
            url=url,
            table='{兴}',
            args={
                '兴': ['sales'],
                'col': ['growth'],
                'val': [0],
                'city': ['South Plainfield'],
            },
            query='SELECT * FROM {兴} WHERE {col} > :val AND city = :city',
        )
        expected = self.sales[(self.sales['growth'] > 0)
                              & (self.sales['city'] == 'South Plainfield')]
        eqframe(actual, expected)

        # _by= _sort= _c=agg(s)
        by = ['product']
        aggs = ['growth|sum', 'sales|sum']
        sort = ['sales|sum']
        expected = df.groupby(by).agg(
            AttrDict([agg.split('|') for agg in aggs]))
        self.flatten_sort(expected, sort, sum_na, *aggs)
        params = {'_by': by, '_c': aggs, '_sort': sort, 'sales>': [100]}
        actual = gramex.data.filter(url, table='sales', args=params)
        eqframe(actual, expected)

        # Test invalid parameters
        with assert_raises(ValueError):
            gramex.data.filter(url=url,
                               table=1,
                               query='SELECT * FROM sales WHERE sales > 100')
        with assert_raises(ValueError):
            gramex.data.filter(url=url,
                               table={},
                               query='SELECT * FROM sales WHERE sales > 100')

        # Arguments with spaces raise an Exception
        with assert_raises(Exception):
            gramex.data.filter(url=url, table='{x}', args={'x': ['a b']})
        with assert_raises(Exception):
            gramex.data.filter(url=url,
                               table='{x}',
                               args={
                                   'x': ['sales'],
                                   'p': ['a b']
                               },
                               query='SELECT * FROM {x} WHERE {p} > 0')
Beispiel #26
0
def eqframe(actual, expected, **kwargs):
    '''Same as assert_frame_equal or afe, but does not compare index'''
    expected.index = actual.index
    afe(actual, expected, **kwargs)
 def check(expected, **params):
     actual = pd.DataFrame(
         self.get('/formhandler/dir', params=params).json())
     expected.index = actual.index
     afe(actual, expected, check_like=True)
 def eq(args, expected):
     result = self.get(url, params=args).json()
     actual = pd.DataFrame(result[key] if key else result)
     expected.index = actual.index
     if len(expected) > 0:
         afe(actual, expected, check_like=True)
    def test_download(self):
        # Modelled on testlib.test_data.TestDownload
        big = self.sales[self.sales['sales'] > 100]
        by_growth = self.sales.sort_values('growth')
        big.index = range(len(big))
        by_growth.index = range(len(by_growth))

        out = self.get('/formhandler/file?_format=html')
        # Note: In Python 2, pd.read_html returns .columns.inferred_type=mixed
        # instead of unicde. So check column type only in PY3 not PY2
        afe(pd.read_html(out.content, encoding='utf-8')[0],
            self.sales,
            check_column_type=six.PY3)
        eq_(out.headers['Content-Type'], 'text/html;charset=UTF-8')
        eq_(out.headers.get('Content-Disposition'), None)

        out = self.get('/formhandler/file-multi?_format=html')
        result = pd.read_html(BytesIO(out.content), encoding='utf-8')
        afe(result[0], big, check_column_type=six.PY3)
        afe(result[1], by_growth, check_column_type=six.PY3)
        eq_(out.headers['Content-Type'], 'text/html;charset=UTF-8')
        eq_(out.headers.get('Content-Disposition'), None)

        out = self.get('/formhandler/file?_format=xlsx')
        afe(pd.read_excel(BytesIO(out.content)), self.sales)
        eq_(out.headers['Content-Type'], xlsx_mime_type)
        eq_(out.headers['Content-Disposition'],
            'attachment;filename=data.xlsx')

        out = self.get('/formhandler/file-multi?_format=xlsx')
        result = pd.read_excel(BytesIO(out.content), sheet_name=None)
        afe(result['big'], big)
        afe(result['by-growth'], by_growth)
        eq_(out.headers['Content-Type'], xlsx_mime_type)
        eq_(out.headers['Content-Disposition'],
            'attachment;filename=data.xlsx')

        out = self.get('/formhandler/file?_format=csv')
        ok_(out.content.startswith(''.encode('utf-8-sig')))
        afe(pd.read_csv(BytesIO(out.content), encoding='utf-8'), self.sales)
        eq_(out.headers['Content-Type'], 'text/csv;charset=UTF-8')
        eq_(out.headers['Content-Disposition'], 'attachment;filename=data.csv')

        out = self.get('/formhandler/file-multi?_format=csv')
        lines = out.content.splitlines(True)
        eq_(lines[0], 'big\n'.encode('utf-8-sig'))
        actual = pd.read_csv(BytesIO(b''.join(lines[1:len(big) + 2])),
                             encoding='utf-8')
        afe(actual, big)
        eq_(lines[len(big) + 3], 'by-growth\n'.encode('utf-8'))
        actual = pd.read_csv(BytesIO(b''.join(lines[len(big) + 4:])),
                             encoding='utf-8')
        afe(actual, by_growth)
        eq_(out.headers['Content-Type'], 'text/csv;charset=UTF-8')
        eq_(out.headers['Content-Disposition'], 'attachment;filename=data.csv')

        for fmt in ['csv', 'html', 'json', 'xlsx']:
            out = self.get('/formhandler/file?_format=%s&_download=test.%s' %
                           (fmt, fmt))
            eq_(out.headers['Content-Disposition'],
                'attachment;filename=test.%s' % fmt)
            out = self.get(
                '/formhandler/file-multi?_format=%s&_download=test.%s' %
                (fmt, fmt))
            eq_(out.headers['Content-Disposition'],
                'attachment;filename=test.%s' % fmt)
 def eq(self, url, expected):
     out = self.get(url).content
     actual = pd.read_csv(BytesIO(out), encoding='utf-8')
     expected.index = range(len(expected))
     afe(actual, expected, check_column_type=six.PY3)