def test_find_using_columns(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) self.pddb.insert( self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) record_A = self.pddb.find_one(self.tname, columns=['A']) record_B = self.pddb.find_one(self.tname, columns='B') record_ABC = self.pddb.find(self.tname, columns=['A', 'B', 'C'], astype='dict')[0] cols_A = sorted(list(record_A.keys())) cols_B = sorted(list(record_B.keys())) cols_ABC = sorted(list(record_ABC.keys())) self.assertEqual(cols_A, ['A']) self.assertEqual(cols_B, ['B']) self.assertEqual(cols_ABC, ['A', 'B', 'C']) self.pddb.drop_all() self.pddb = None
def test_find_where_in(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} self.pddb.insert(self.tname, record) record_store.append(record) search_for = { self.cols[0]: ['%s_%d' % (self.cols[0], i) for i in range(5)] } results = self.pddb.find(self.tname, where=search_for, columns=self.cols, astype='dict') self.assertEqual(record_store[:5], results) self.pddb.drop_all() self.pddb = None
def test_save_then_drop_all(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.pddb.load(table_schemas=schema) self.pddb.insert( self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) self.pddb.save() header_expected = ','.join( self.cols) + ',%s\n' % PandasDatabase._id_colname record_expected_regex = header_expected + \ ','.join(['%s_%d' % (c, i) for (i, c) in enumerate(self.cols)]) + ',.+?\n' with open( os.path.join(self.pddb.root_dir, test_name, self.tname + '.csv'), 'r') as f: record_csv = f.read() self.assertTrue(os.path.exists(test_name)) self.pddb.drop_all() self.assertRegex(record_csv, record_expected_regex) self.assertFalse(os.path.exists(test_name)) self.pddb.drop_all() self.pddb = None
def test_defer_save_wait(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype='dict', auto_load=False, auto_save=True, deferred_save=True, persistent=True, debug=False) self.pddb.load(table_names=self.tname) for i in range(self.pddb.save_queue_max): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertFalse( os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) time.sleep(self.pddb.save_wait + 1) # Give an extra second to actually save the file to disk self.assertTrue( os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.drop_all() self.pddb = None
def test_single_delete_record(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) c = self.cols[0] firstrecord = {c: '%s_%d' % (c, 0)} delrows = self.pddb.delete(self.tname, where=firstrecord) allrows = self.pddb.find(self.tname) allvalA = allrows['A'].values self.assertEqual(1, len(delrows)) self.assertEqual(9, len(allrows)) self.assertFalse('A_0' in allvalA) self.pddb.drop_all() self.pddb = None
def test_astype(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertEqual(str(self.pddb.find(self.tname)), str(self.pddb.find(self.tname, astype='dataframe'))) self.assertEqual(str(self.pddb.find(self.tname, astype=dict)), str(self.pddb.find(self.tname, astype='dict'))) self.assertEqual(str(self.pddb.find(self.tname, astype=str)), str(self.pddb.find(self.tname, astype='json'))) self.assertRaisesRegex( RuntimeError, '.*', lambda: self.pddb.find_one(self.tname, astype='dataframe')) self.pddb.drop_all() self.pddb = None
def test_drop_table(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) all_tables = [] for i in range(10): tname = '%s_%d' % (self.tname, i) all_tables.append(tname) self.pddb.insert(tname, record={c: '%s_%d' % (c, i) for c in self.cols}) tnames = self.pddb.get_table_names() self.assertEqual(sorted(tnames), sorted(all_tables)) for i in range(5): tname = '%s_%d' % (self.tname, i) all_tables.remove(tname) self.pddb.drop(tname) tnames = self.pddb.get_table_names() self.assertEqual(sorted(tnames), sorted(all_tables)) self.pddb.drop(all_tables) self.assertEqual(len(self.pddb.get_table_names()), 0) self.pddb.drop_all() self.pddb = None
def test_many_upsert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) test_record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} record_new = self.pddb.upsert(self.tname, record=record, astype='dict')[0] for c in self.cols: record_new[c] = '%s_%d' % (c, -i) self.pddb.upsert(self.tname, record=record_new, where={'__id__': record_new['__id__']}) test_record_store.append(record_new) rows = self.pddb.find(self.tname, astype='dict') self.assertEqual(rows, test_record_store) self.pddb.drop_all() self.pddb = None
def test_single_upsert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype='dict', auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) # Test insert first record = {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)} record = self.pddb.upsert(self.tname, record=record)[0] record_id = record['__id__'] record_db = self.pddb.find_one(self.tname, where={'__id__': record_id}) self.assertEqual(record, record_db) # Test update second record_new = {c: '%s_%d' % (c, -i) for (i, c) in enumerate(self.cols)} record_new = self.pddb.upsert(self.tname, record=record_new, where={'__id__': record_id})[0] record_db = self.pddb.find(self.tname, where={'__id__': record_id})[0] self.assertNotEqual(record, record_new) self.assertEqual(record_db, record_new) self.pddb.drop_all() self.pddb = None
def test_find_one(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = {'col_name': 'A_1'} record_from_insert = self.pddb.insert(self.tname, record, columns='col_name', astype='dict') record_from_findone = self.pddb.find_one(self.tname, where=record, columns='col_name', astype='dict') json_from_record = json.dumps(record) json_from_findone = json.dumps(record_from_findone) json_from_insert = json.dumps(record_from_insert) self.assertEqual(json_from_record, json_from_insert) self.assertEqual(json_from_record, json_from_findone) self.pddb.drop_all() self.pddb = None
def test_create_database(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.assertTrue(os.path.exists(test_name.lower())) rmtree(test_name.lower()) self.pddb.drop_all() self.pddb = None
def test_find_one_none(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.find_one(self.tname, where={'my_cond': None}, astype='json') self.assertEqual(record, json.dumps(dict())) self.pddb.drop_all() self.pddb = None
def test_single_rowgen(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) newrower = lambda: {c: '%s_%d' % (c, 0) for c in self.cols} self.pddb.load(table_rowgens={self.tname: newrower}) row = newrower() record = self.pddb.insert(self.tname, columns=self.cols, astype='dict') self.assertEqual(row, record) self.pddb.drop_all()
def test_illegal_column_name(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype=list, auto_load=True, auto_save=False, persistent=False, debug=False) record = {'col*name': 'A_1'} insert_function = lambda: self.pddb.insert(self.tname, record) self.assertRaisesRegex( ValueError, 'Column names must match the following regex: ".+"', insert_function) self.pddb.drop_all() self.pddb = None
def test_find_regex(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, record={'col_name': str(i)}) rows = self.pddb.find(self.tname, where={'col_name': re.compile(r'[1-5]')}) self.assertEqual(len(rows), 5) self.pddb.drop_all() self.pddb = None
def test_upsert_to_insert_with_where(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where={'col_name_2': '2'}, columns=['col_name_1', 'col_name_2'], astype='dict') self.assertEqual(record[0], {'col_name_1': '1', 'col_name_2': '2'}) self.pddb.drop_all() self.pddb = None
def test_create_table_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype=list, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) db_cols = set(self.pddb._db[self.tname].columns) cols_with_id = set(['__id__'] + [c for c in self.cols]) self.assertEqual(db_cols, cols_with_id) self.pddb.drop_all() self.pddb = None
def test_unicode_conversion(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.insert(self.tname, record={u'col_name_1': u'1'}, astype='dict') self.assertTrue(all([type(x) == str for x in list(record.keys()) + list(record.values())])) self.pddb.drop_all() self.pddb = None
def _authenticate(self, callback_success=None, callback_failure=None, token=None, _request_fallback=None, **callback_success_kwargs): ''' Authenticate user with the user-provided token ''' callback_success = callback_success or \ (lambda: bottle.HTTPResponse(status=200, body='OK')) callback_failure = callback_failure or \ (lambda code, err: bottle.HTTPResponse(status=code, body=err)) callback_success_kwargs = callback_success_kwargs or dict() # Try to retrieve the token from the cookies first token_cookie = bottle.request.get_cookie('Token') # If no token was found, retrieve it from the request data if not token_cookie: request_dict = PandasDatabase._request(bottle.request, request_fallback=_request_fallback) token = token or request_dict.get('Token') else: token = str(token_cookie) # Verify that the token is provided in the request if token is None: msg = 'Auth error: "Token" field must be present as part of the request.' self._print(msg) return callback_failure(400, msg) # Validate the provided token against the token store token_record = self.pddb.find_one( 'bottleship_tokens', where={'Token': token}, astype='dict') if not token_record or time.time() > float(token_record.get('Expiry', '0')): msg = 'Auth error: Provided token does not exist or has expired.' self._print(msg) return callback_failure(403, msg) # Retrieve the user record that the token belongs to user_record = self.pddb.find_one( 'bottleship_users', where={'Username': token_record.get('Username')}, astype='dict') # If callback accepts it as an argument, add bottleship_user_record arg_spec = inspect.getargspec(callback_success) if 'bottleship_user_record' in arg_spec.args: callback_success_kwargs['bottleship_user_record'] = user_record # If token requires only plaintext security, we're done if 'plaintext' in token_record.get('SecurityLevel'): return callback_success(**callback_success_kwargs) # Depending on the security level, the data might need to be encrypted or signed elif 'hmac' in token_record.get('SecurityLevel'): key = token_record.get('Key') code, data = 200, callback_success(**callback_success_kwargs) if isinstance(data, bottle.HTTPResponse): code, data = data.status_code, data_encode(data.body, key) elif data: data = data_encode(data, key) return bottle.HTTPResponse(body=data, status=code)
def test_create_table_with_dynamic_schema(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i, c in enumerate(self.cols): record = {c: str(i)} self.pddb.insert(self.tname, record) PandasDatabase_cols = set(self.pddb._db[self.tname].columns) cols_with_id = set(['__id__'] + [c for c in self.cols]) self.assertEqual(PandasDatabase_cols, cols_with_id) self.pddb.drop_all() self.pddb = None
def test_unicode_conversion(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.insert(self.tname, record={u'col_name_1': u'1'}, astype='dict') self.assertTrue( all([ type(x) == str for x in list(record.keys()) + list(record.values()) ])) self.pddb.drop_all() self.pddb = None
def test_fixed_schema_fail_column(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) insert_function = lambda: self.pddb.insert(self.tname, record={'D': '0'}) self.assertRaisesRegex( ValueError, 'Column "D" does not exist in schema for table "%s"' % self.tname, insert_function) self.pddb.drop_all() self.pddb = None
def test_illegal_column_name(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype=list, auto_load=True, auto_save=False, persistent=False, debug=False) record = {'col*name': 'A_1'} insert_function = lambda: self.pddb.insert(self.tname, record) self.assertRaisesRegex(ValueError, 'Column names must match the following regex: ".+"', insert_function) self.pddb.drop_all() self.pddb = None
def test_upsert_to_insert_with_conflict(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) newrower = lambda: {'col_name_2': '2'} self.pddb.load(table_rowgens={self.tname: newrower}) insert_function = lambda: self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where_not= {'col_name_2': '2'}) self.assertRaisesRegex( ValueError, 'Cannot insert new record because default ' 'values conflict with conditions provided: {.+}', insert_function) self.pddb.drop_all() self.pddb = None
def test_fixed_schema_fail_column(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) insert_function = lambda: self.pddb.insert(self.tname, record={'D': '0'}) self.assertRaisesRegex(ValueError, 'Column "D" does not exist in schema for table "%s"' % self.tname, insert_function) self.pddb.drop_all() self.pddb = None
def logout(self, token=None, cookie_only=True, _request_fallback=None): ''' Expire a given token immediately. Parameters ---------- token : str Token to immediately expire. This will be retrieved from the header cookies or from the request depending on the value of parameter `cookie_only`. cookie_only : bool If true, only retrieve Token from the header cookies. This is to prevent malicious users to log out other users; if this method is exposed in the application\'s API, this parameter should always be True (which is the default behavior). _request_fallback : dict Used for testing purposes. The parameter `Token` can also be passed to this method as items in the `_request_fallback` dictionary. ''' # Try to retrieve the token from the cookies first token_cookie = bottle.request.get_cookie('Token') # If no token was found, retrieve it from the request data if not token_cookie and not cookie_only: request_dict = PandasDatabase._request(bottle.request, request_fallback=_request_fallback) token = token or request_dict.get('Token') else: token = str(token_cookie) # Verify that the token is provided in the request if token is None: msg = 'Auth error: "Token" field must be present as part of the request.' self._print(msg) return bottle.HTTPResponse(status=400, body=msg) # Validate the provided token against the token store token_record = self.pddb.find_one( 'bottleship_tokens', where={'Token': token}, astype='dict') if not token_record or time.time() > float(token_record.get('Expiry', '0')): msg = 'Auth error: Provided token does not exist or has expired.' self._print(msg) return bottle.HTTPResponse(status=400, body=msg) # Expire token record in the database self.pddb.upsert('bottleship_tokens', where={'Token': token}, record={'Expiry': '0'}) res = bottle.HTTPResponse(status=200, body='OK') res.set_cookie('Token', '', path='/', expires=0) return res
def test_upsert_to_insert_with_conflict(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) newrower = lambda: {'col_name_2': '2'} self.pddb.load(table_rowgens={self.tname: newrower}) insert_function = lambda: self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where_not={'col_name_2': '2'}) self.assertRaisesRegex(ValueError, 'Cannot insert new record because default ' 'values conflict with conditions provided: {.+}', insert_function) self.pddb.drop_all() self.pddb = None
def test_type_cast(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, auto_cast=True, persistent=False, debug=False) dict_mix = {1: 'a', 'col_name': 1, 'b': 0.5} dict_str = {str(k): str(v) for k, v in dict_mix.items()} record = self.pddb.insert(self.tname, record=dict_mix, astype='dict') self.assertTrue( all([ type(x) == str for x in list(record.keys()) + list(record.values()) ])) record.pop(PandasDatabase._id_colname) self.assertEqual(record, dict_str) self.pddb.drop_all() self.pddb = None
def test_type_cast(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, auto_cast=True, persistent=False, debug=False) dict_mix = {1: 'a', 'col_name': 1, 'b': 0.5} dict_str = {str(k): str(v) for k,v in dict_mix.items()} record = self.pddb.insert(self.tname, record=dict_mix, astype='dict') self.assertTrue(all([type(x) == str for x in list(record.keys()) + list(record.values())])) record.pop(PandasDatabase._id_colname) self.assertEqual(record, dict_str) self.pddb.drop_all() self.pddb = None
def test_defer_save_wait(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype='dict', auto_load=False, auto_save=True, deferred_save=True, persistent=True, debug=False) self.pddb.load(table_names=self.tname) for i in range(self.pddb.save_queue_max): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertFalse(os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) time.sleep(self.pddb.save_wait + 1) # Give an extra second to actually save the file to disk self.assertTrue(os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.drop_all() self.pddb = None
def test_find_where_in(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} self.pddb.insert(self.tname, record) record_store.append(record) search_for = {self.cols[0]: ['%s_%d' % (self.cols[0], i) for i in range(5)]} results = self.pddb.find(self.tname, where=search_for, columns=self.cols, astype='dict') self.assertEqual(record_store[:5], results) self.pddb.drop_all() self.pddb = None
def test_find_one(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = {'col_name': 'A_1'} record_from_insert = self.pddb.insert( self.tname, record, columns='col_name', astype='dict') record_from_findone = self.pddb.find_one( self.tname, where=record, columns='col_name', astype='dict') json_from_record = json.dumps(record) json_from_findone = json.dumps(record_from_findone) json_from_insert = json.dumps(record_from_insert) self.assertEqual(json_from_record, json_from_insert) self.assertEqual(json_from_record, json_from_findone) self.pddb.drop_all() self.pddb = None
def test_astype(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertEqual(str(self.pddb.find(self.tname)), str(self.pddb.find(self.tname, astype='dataframe'))) self.assertEqual(str(self.pddb.find(self.tname, astype=dict)), str(self.pddb.find(self.tname, astype='dict'))) self.assertEqual(str(self.pddb.find(self.tname, astype=str)), str(self.pddb.find(self.tname, astype='json'))) self.assertRaisesRegex(RuntimeError, '.*', lambda: self.pddb.find_one(self.tname, astype='dataframe')) self.pddb.drop_all() self.pddb = None
def test_many_insert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) test_record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} record_db = self.pddb.insert(self.tname, record) record_id = record_db.loc['__id__'] record['__id__'] = record_id test_record_store.append(record) rows = self.pddb.find(self.tname, astype='dict') self.assertEqual(rows, test_record_store) self.pddb.drop_all() self.pddb = None
def test_find_using_columns(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) record_A = self.pddb.find_one(self.tname, columns=['A']) record_B = self.pddb.find_one(self.tname, columns='B') record_ABC = self.pddb.find(self.tname, columns=['A', 'B', 'C'], astype='dict')[0] cols_A = sorted(list(record_A.keys())) cols_B = sorted(list(record_B.keys())) cols_ABC = sorted(list(record_ABC.keys())) self.assertEqual(cols_A, ['A']) self.assertEqual(cols_B, ['B']) self.assertEqual(cols_ABC, ['A', 'B', 'C']) self.pddb.drop_all() self.pddb = None
def test_save_then_drop_all(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.pddb.load(table_schemas=schema) self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) self.pddb.save() header_expected = ','.join(self.cols) + ',%s\n' % PandasDatabase._id_colname record_expected_regex = header_expected + \ ','.join(['%s_%d' % (c, i) for (i, c) in enumerate(self.cols)]) + ',.+?\n' with open(os.path.join(self.pddb.root_dir, test_name, self.tname + '.csv'), 'r') as f: record_csv = f.read() self.assertTrue(os.path.exists(test_name)) self.pddb.drop_all() self.assertRegex(record_csv, record_expected_regex) self.assertFalse(os.path.exists(test_name)) self.pddb.drop_all() self.pddb = None
def setUpClass(cls): cls.pddb = PandasDatabase('TestPandasDatabaseServerMethods', debug=True) cls.pddb_app = cls.pddb.bind_bottle_routes(default_permissions='w') cls.record = {'A': '1', 'B': '2', 'C': '3'}
def login(self, username=None, password=None, _request_fallback=None): ''' Log in an existing user. Parameters ---------- username : str Username to login. It can also be passed as the value of key `Username` as part of the GET or POST request. password : str Plaintext password for this username. It can also be passed as the value of key `Password` as part of the GET or POST request. If the requested security level requires it, password must be signed/encrypted. _request_fallback : dict Used for testing purposes. The parameters `Username` and `Password` can also be passed to this method as items in the `_request_fallback` dictionary. Returns ------- response : bottle.HTTPResponse Status code and body will determine if the login was successful. If successful, the body will contain the user record in JSON format. Examples -------- >>> app = BottleShip() >>> res = app.login("john", "1234") >>> print(res.status_code, res.body) 403 Login error: Provided password does not match records for that username or username does not exist. ''' request_dict = PandasDatabase._request(bottle.request, request_fallback=_request_fallback) # If data and token are provided, then this must be secure data transfer secure_data = False if 'Data' in request_dict and 'Token' in request_dict: secure_data = True request_dict, err_msg = self._read_secure_json(request_dict) if err_msg: self._print(err_msg) return bottle.HTTPResponse(status=400, body=err_msg) # Cleanup information from the request request_dict = {tos(req_k): tos(req_v) for req_k, req_v in request_dict.items() if re.match(PandasDatabase._colname_rgx, tos(req_k))} # Verify username and password username = username or request_dict.get('Username') password = password or request_dict.get('Password', '') auth_header = bottle.request.get_header('Authorization') if auth_header: # If auth is available in the headers, take that username, password = bottle.parse_auth(auth_header) error_msg = self._error_username_password(username, password) if error_msg: self._print(error_msg) return bottle.HTTPResponse(status=400, body=error_msg) # Look for existing user record user_record = self.pddb.find_one( 'bottleship_users', where={'Username': username}, astype='dict') if not user_record: msg = ('Login error: Provided password does not match records for that username or ' 'username does not exist.') self._print(msg) return bottle.HTTPResponse(status=403, body=msg) # Make sure that the security level is supported security_level = request_dict.get( 'SecurityLevel', user_record.get('SecurityLevel', self.allowed_security[0])) if 'ipaddr' in user_record.get('SecurityLevel') and 'ipaddr' not in security_level: security_level += '+ipaddr' # Force IP address verification if registration requests it user_record['SecurityLevel'] = security_level if security_level not in self.allowed_security: msg = 'Login error: Security level must be one of: %r' % list(self.allowed_security) self._print(msg) res = bottle.HTTPResponse(status=400, body=msg) return res elif not secure_data and ('hmac' in security_level or 'rsa' in security_level): msg = ('Login error: Security level requested requires secure data transfer but ' 'plaintext was used instead') self._print(msg) res = bottle.HTTPResponse(status=400, body=msg) return res # Verify user password if 'Password' in user_record and user_record.get('Password') != str(hash(password)): msg = ('Login error: Provided password does not match records for that username or ' 'username does not exist.') self._print(msg) return bottle.HTTPResponse(status=403, body=msg) # Get user's IP address from request ip_addr = bottle.request.environ.get('REMOTE_ADDR', '') if ip_addr != user_record.get('RemoteIpAddr'): if 'ipaddr' in security_level: msg = 'Login error: Registration IP address does not match login attempt.' self._print(msg) return bottle.HTTPResponse(status=403, body=msg) else: user_record['RemoteIpAddr'] = ip_addr # Provide user with a temporary token token_key = str(request_dict.get('Key') if secure_data else user_record.get('Key')) token_record = self._gen_token(username, security_level=security_level, key=token_key) user_record['Token'] = token_record['Token'] # Update the user record user_cond = {'Username': username} user_record['Key'] = token_record.get('Key') user_record['LastLogin'] = str(time.time()) user_record = self.pddb.upsert('bottleship_users', record=user_record, where=user_cond, astype='dict')[0] # Depending on the security level, we may need to encrypt or sign the data user_record_json = self._dump_user_record(security_level, user_record) res = bottle.HTTPResponse(status=200, body=user_record_json) res.set_cookie('Token', token_record['Token'], path='/', expires=int(float(token_record.get('Expiry')))) return res
def register(self, username=None, password=None, user_info=None): ''' Register a new user. Parameters ---------- username : str Username to register. Must be unique in the application. It can also be passed as the value of key `Username` as part of the GET or POST request. password : str Plaintext password for this username. It can also be passed as the value of key `Password` as part of the GET or POST request. user_info : dict Dictionary containing any additional information about this user. The key `RemoteIpAddr` will be added to this dictionary with the value provided by `bottle.request.environ.get("REMOTE_ADDR")` prior to matching the user against the whitelist and blacklist parameters given to the constructor of this class. The parameters `username` and `password` can also be passed to this method as items in the `user_info` dictionary. Any key-value pairs not described above that are passed as part of the GET or POST request will be added to this dictionary. If the requested security level requires it, all user info including username and password must be serialized and signed/encrypted into a field named `Data` as a json string. In that case, the single-use `Token` provided by the key exchange must also be provided. Returns ------- response : bottle.HTTPResponse Status code and body will determine if the login was successful. If successful, the body will contain the user record in JSON format. Examples -------- >>> app = BottleShip() >>> app.register("john", "1234").body '{"Username": "******", "Password": "******", "__id__": "2c849965-251f-4b5d- 8a27-77f86fa9e0e3", "RemoteIpAddr": null}' ''' request_dict = PandasDatabase._request(bottle.request, request_fallback=user_info) # If data and token are provided, then this must be secure data transfer secure_data = False if 'Data' in request_dict and 'Token' in request_dict: secure_data = True request_dict, err_msg = self._read_secure_json(request_dict) if err_msg: self._print(err_msg) return bottle.HTTPResponse(status=400, body=err_msg) # Cleanup information from the request request_dict = {tos(req_k): tos(req_v) for req_k, req_v in request_dict.items() if re.match(PandasDatabase._colname_rgx, tos(req_k))} # Verify username and password username = username or request_dict.get('Username') password = password or request_dict.get('Password', '') auth_header = bottle.request.get_header('Authorization') if auth_header: # If auth is available in the headers, take that username, password = bottle.parse_auth(auth_header) error_msg = self._error_username_password(username, password) if error_msg: self._print(error_msg) return bottle.HTTPResponse(status=400, body=error_msg) # Look for existing user record and, if any, reject registration user_record = self.pddb.find_one( 'bottleship_users', where={'Username': username}, astype='dict') if user_record: msg = 'Register error: Provided username already exists in the database.' self._print(msg) return bottle.HTTPResponse(status=400, body=msg) # Get the user requested security level or default security_level = request_dict.get('SecurityLevel', self.allowed_security[0]) request_dict['SecurityLevel'] = security_level if security_level not in self.allowed_security: msg = 'Login error: Security level must be one of: %r' % list(self.allowed_security) self._print(msg) res = bottle.HTTPResponse(status=400, body=msg) return res elif not secure_data and ('hmac' in security_level or 'rsa' in security_level): msg = ('Login error: Security level requested requires secure data transfer but ' 'plaintext was used instead') self._print(msg) res = bottle.HTTPResponse(status=400, body=msg) return res # Get user's IP address from request request_dict['RemoteIpAddr'] = bottle.request.environ.get('REMOTE_ADDR', '') # Insert the hashed password into user's record if password is not None: request_dict['Password'] = str(hash(password)) # Validate the user against our rules if not self._check_user(request_dict): msg = 'User does not meet the requirements.' self._print(msg) return bottle.HTTPResponse(status=403, body=msg) # Insert or update the user record user_cond = {'Username': username} user_record = self.pddb.upsert('bottleship_users', record=request_dict, where=user_cond, astype='dict')[0] # Depending on the security level, we may need to encrypt or sign the data user_record_json = self._dump_user_record(security_level, user_record) # Return the inserted user record return bottle.HTTPResponse(status=200, body=user_record_json)
class TestPandasDatabaseMethods(unittest2.TestCase): # pylint: disable=invalid-name,too-many-public-methods,protected-access @classmethod def setUpClass(cls): cls.pddb = None cls.cols = ['A', 'B', 'C'] cls.tname = 'table_name' @classmethod def tearDownClass(cls): if cls.pddb is not None and cls.pddb.persistent: cls.pddb.drop_all() def test_create_database(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.assertTrue(os.path.exists(test_name.lower())) rmtree(test_name.lower()) self.pddb.drop_all() self.pddb = None def test_find_one(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = {'col_name': 'A_1'} record_from_insert = self.pddb.insert( self.tname, record, columns='col_name', astype='dict') record_from_findone = self.pddb.find_one( self.tname, where=record, columns='col_name', astype='dict') json_from_record = json.dumps(record) json_from_findone = json.dumps(record_from_findone) json_from_insert = json.dumps(record_from_insert) self.assertEqual(json_from_record, json_from_insert) self.assertEqual(json_from_record, json_from_findone) self.pddb.drop_all() self.pddb = None def test_find_one_none(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.find_one(self.tname, where={'my_cond': None}, astype='json') self.assertEqual(record, json.dumps(dict())) self.pddb.drop_all() self.pddb = None def test_create_table_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype=list, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) db_cols = set(self.pddb._db[self.tname].columns) cols_with_id = set(['__id__'] + [c for c in self.cols]) self.assertEqual(db_cols, cols_with_id) self.pddb.drop_all() self.pddb = None def test_single_insert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype='dict', auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) record = {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)} record = self.pddb.insert(self.tname, record) record_id = record['__id__'] record_db = self.pddb.find(self.tname, where={'__id__': record_id})[0] self.assertEqual(record, record_db) self.pddb.drop_all() self.pddb = None def test_many_insert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) test_record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} record_db = self.pddb.insert(self.tname, record) record_id = record_db.loc['__id__'] record['__id__'] = record_id test_record_store.append(record) rows = self.pddb.find(self.tname, astype='dict') self.assertEqual(rows, test_record_store) self.pddb.drop_all() self.pddb = None def test_single_upsert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype='dict', auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) # Test insert first record = {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)} record = self.pddb.upsert(self.tname, record=record)[0] record_id = record['__id__'] record_db = self.pddb.find_one(self.tname, where={'__id__': record_id}) self.assertEqual(record, record_db) # Test update second record_new = {c: '%s_%d' % (c, -i) for (i, c) in enumerate(self.cols)} record_new = self.pddb.upsert(self.tname, record=record_new, where={'__id__': record_id})[0] record_db = self.pddb.find(self.tname, where={'__id__': record_id})[0] self.assertNotEqual(record, record_new) self.assertEqual(record_db, record_new) self.pddb.drop_all() self.pddb = None def test_many_upsert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) test_record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} record_new = self.pddb.upsert(self.tname, record=record, astype='dict')[0] for c in self.cols: record_new[c] = '%s_%d' % (c, -i) self.pddb.upsert(self.tname, record=record_new, where={'__id__': record_new['__id__']}) test_record_store.append(record_new) rows = self.pddb.find(self.tname, astype='dict') self.assertEqual(rows, test_record_store) self.pddb.drop_all() self.pddb = None def test_create_table_with_dynamic_schema(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i, c in enumerate(self.cols): record = {c: str(i)} self.pddb.insert(self.tname, record) PandasDatabase_cols = set(self.pddb._db[self.tname].columns) cols_with_id = set(['__id__'] + [c for c in self.cols]) self.assertEqual(PandasDatabase_cols, cols_with_id) self.pddb.drop_all() self.pddb = None def test_astype(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertEqual(str(self.pddb.find(self.tname)), str(self.pddb.find(self.tname, astype='dataframe'))) self.assertEqual(str(self.pddb.find(self.tname, astype=dict)), str(self.pddb.find(self.tname, astype='dict'))) self.assertEqual(str(self.pddb.find(self.tname, astype=str)), str(self.pddb.find(self.tname, astype='json'))) self.assertRaisesRegex(RuntimeError, '.*', lambda: self.pddb.find_one(self.tname, astype='dataframe')) self.pddb.drop_all() self.pddb = None def test_bytes_conversion(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.insert(self.tname, record={b'col_name_1': b'1'}, astype='dict') self.assertTrue(all([type(x) == str for x in list(record.keys()) + list(record.values())])) self.pddb.drop_all() self.pddb = None def test_unicode_conversion(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.insert(self.tname, record={u'col_name_1': u'1'}, astype='dict') self.assertTrue(all([type(x) == str for x in list(record.keys()) + list(record.values())])) self.pddb.drop_all() self.pddb = None def test_illegal_column_name(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype=list, auto_load=True, auto_save=False, persistent=False, debug=False) record = {'col*name': 'A_1'} insert_function = lambda: self.pddb.insert(self.tname, record) self.assertRaisesRegex(ValueError, 'Column names must match the following regex: ".+"', insert_function) self.pddb.drop_all() self.pddb = None def test_find_regex(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, record={'col_name': str(i)}) rows = self.pddb.find(self.tname, where={'col_name': re.compile(r'[1-5]')}) self.assertEqual(len(rows), 5) self.pddb.drop_all() self.pddb = None def test_type_cast(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, auto_cast=True, persistent=False, debug=False) dict_mix = {1: 'a', 'col_name': 1, 'b': 0.5} dict_str = {str(k): str(v) for k,v in dict_mix.items()} record = self.pddb.insert(self.tname, record=dict_mix, astype='dict') self.assertTrue(all([type(x) == str for x in list(record.keys()) + list(record.values())])) record.pop(PandasDatabase._id_colname) self.assertEqual(record, dict_str) self.pddb.drop_all() self.pddb = None def test_upsert_to_insert_with_where(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where={'col_name_2': '2'}, columns=['col_name_1', 'col_name_2'], astype='dict') self.assertEqual(record[0], {'col_name_1': '1', 'col_name_2': '2'}) self.pddb.drop_all() self.pddb = None def test_upsert_to_insert_with_conflict(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) newrower = lambda: {'col_name_2': '2'} self.pddb.load(table_rowgens={self.tname: newrower}) insert_function = lambda: self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where_not={'col_name_2': '2'}) self.assertRaisesRegex(ValueError, 'Cannot insert new record because default ' 'values conflict with conditions provided: {.+}', insert_function) self.pddb.drop_all() self.pddb = None def test_create_table_with_upper_case(self): test_name = self.id().lower() tname = self.tname.upper() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) newrower = lambda: {c: '%s_%d' % (c, 0) for c in self.cols} self.pddb.load(table_rowgens={tname: newrower}) row = newrower() record = self.pddb.insert(tname, columns=self.cols, astype='dict') self.assertEqual(row, record) self.pddb.drop_all() self.pddb = None def test_find_using_columns(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) record_A = self.pddb.find_one(self.tname, columns=['A']) record_B = self.pddb.find_one(self.tname, columns='B') record_ABC = self.pddb.find(self.tname, columns=['A', 'B', 'C'], astype='dict')[0] cols_A = sorted(list(record_A.keys())) cols_B = sorted(list(record_B.keys())) cols_ABC = sorted(list(record_ABC.keys())) self.assertEqual(cols_A, ['A']) self.assertEqual(cols_B, ['B']) self.assertEqual(cols_ABC, ['A', 'B', 'C']) self.pddb.drop_all() self.pddb = None def test_find_where_in(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} self.pddb.insert(self.tname, record) record_store.append(record) search_for = {self.cols[0]: ['%s_%d' % (self.cols[0], i) for i in range(5)]} results = self.pddb.find(self.tname, where=search_for, columns=self.cols, astype='dict') self.assertEqual(record_store[:5], results) self.pddb.drop_all() self.pddb = None def test_single_delete_record(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) c = self.cols[0] firstrecord = {c: '%s_%d' % (c, 0)} delrows = self.pddb.delete(self.tname, where=firstrecord) allrows = self.pddb.find(self.tname) allvalA = allrows['A'].values self.assertEqual(1, len(delrows)) self.assertEqual(9, len(allrows)) self.assertFalse('A_0' in allvalA) self.pddb.drop_all() self.pddb = None def test_single_rowgen(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) newrower = lambda: {c: '%s_%d' % (c, 0) for c in self.cols} self.pddb.load(table_rowgens={self.tname: newrower}) row = newrower() record = self.pddb.insert(self.tname, columns=self.cols, astype='dict') self.assertEqual(row, record) self.pddb.drop_all() def test_fixed_schema_fail_column(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) insert_function = lambda: self.pddb.insert(self.tname, record={'D': '0'}) self.assertRaisesRegex(ValueError, 'Column "D" does not exist in schema for table "%s"' % self.tname, insert_function) self.pddb.drop_all() self.pddb = None def test_drop_table(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) all_tables = [] for i in range(10): tname = '%s_%d' % (self.tname, i) all_tables.append(tname) self.pddb.insert(tname, record={c: '%s_%d' % (c, i) for c in self.cols}) tnames = self.pddb.get_table_names() self.assertEqual(sorted(tnames), sorted(all_tables)) for i in range(5): tname = '%s_%d' % (self.tname, i) all_tables.remove(tname) self.pddb.drop(tname) tnames = self.pddb.get_table_names() self.assertEqual(sorted(tnames), sorted(all_tables)) self.pddb.drop(all_tables) self.assertEqual(len(self.pddb.get_table_names()), 0) self.pddb.drop_all() self.pddb = None def test_save_then_drop_all(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.pddb.load(table_schemas=schema) self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) self.pddb.save() header_expected = ','.join(self.cols) + ',%s\n' % PandasDatabase._id_colname record_expected_regex = header_expected + \ ','.join(['%s_%d' % (c, i) for (i, c) in enumerate(self.cols)]) + ',.+?\n' with open(os.path.join(self.pddb.root_dir, test_name, self.tname + '.csv'), 'r') as f: record_csv = f.read() self.assertTrue(os.path.exists(test_name)) self.pddb.drop_all() self.assertRegex(record_csv, record_expected_regex) self.assertFalse(os.path.exists(test_name)) self.pddb.drop_all() self.pddb = None def test_defer_save_queue_max(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype='dict', auto_load=False, auto_save=True, deferred_save=True, persistent=True, debug=False) self.pddb.load(table_names=self.tname) i = 0 for i in range(self.pddb.save_queue_max): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertFalse(os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) time.sleep(1) # Give an extra second to actually save the file to disk self.assertTrue(os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.drop_all() def test_defer_save_wait(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype='dict', auto_load=False, auto_save=True, deferred_save=True, persistent=True, debug=False) self.pddb.load(table_names=self.tname) for i in range(self.pddb.save_queue_max): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertFalse(os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) time.sleep(self.pddb.save_wait + 1) # Give an extra second to actually save the file to disk self.assertTrue(os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.drop_all() self.pddb = None '''
class TestPandasDatabaseMethods(unittest2.TestCase): # pylint: disable=invalid-name,too-many-public-methods,protected-access @classmethod def setUpClass(cls): cls.pddb = None cls.cols = ['A', 'B', 'C'] cls.tname = 'table_name' @classmethod def tearDownClass(cls): if cls.pddb is not None and cls.pddb.persistent: cls.pddb.drop_all() def test_create_database(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.assertTrue(os.path.exists(test_name.lower())) rmtree(test_name.lower()) self.pddb.drop_all() self.pddb = None def test_find_one(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = {'col_name': 'A_1'} record_from_insert = self.pddb.insert(self.tname, record, columns='col_name', astype='dict') record_from_findone = self.pddb.find_one(self.tname, where=record, columns='col_name', astype='dict') json_from_record = json.dumps(record) json_from_findone = json.dumps(record_from_findone) json_from_insert = json.dumps(record_from_insert) self.assertEqual(json_from_record, json_from_insert) self.assertEqual(json_from_record, json_from_findone) self.pddb.drop_all() self.pddb = None def test_find_one_none(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.find_one(self.tname, where={'my_cond': None}, astype='json') self.assertEqual(record, json.dumps(dict())) self.pddb.drop_all() self.pddb = None def test_create_table_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype=list, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) db_cols = set(self.pddb._db[self.tname].columns) cols_with_id = set(['__id__'] + [c for c in self.cols]) self.assertEqual(db_cols, cols_with_id) self.pddb.drop_all() self.pddb = None def test_single_insert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype='dict', auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) record = {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)} record = self.pddb.insert(self.tname, record) record_id = record['__id__'] record_db = self.pddb.find(self.tname, where={'__id__': record_id})[0] self.assertEqual(record, record_db) self.pddb.drop_all() self.pddb = None def test_many_insert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) test_record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} record_db = self.pddb.insert(self.tname, record) record_id = record_db.loc['__id__'] record['__id__'] = record_id test_record_store.append(record) rows = self.pddb.find(self.tname, astype='dict') self.assertEqual(rows, test_record_store) self.pddb.drop_all() self.pddb = None def test_single_upsert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, astype='dict', auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) # Test insert first record = {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)} record = self.pddb.upsert(self.tname, record=record)[0] record_id = record['__id__'] record_db = self.pddb.find_one(self.tname, where={'__id__': record_id}) self.assertEqual(record, record_db) # Test update second record_new = {c: '%s_%d' % (c, -i) for (i, c) in enumerate(self.cols)} record_new = self.pddb.upsert(self.tname, record=record_new, where={'__id__': record_id})[0] record_db = self.pddb.find(self.tname, where={'__id__': record_id})[0] self.assertNotEqual(record, record_new) self.assertEqual(record_db, record_new) self.pddb.drop_all() self.pddb = None def test_many_upsert_with_fixed_schema(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) test_record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} record_new = self.pddb.upsert(self.tname, record=record, astype='dict')[0] for c in self.cols: record_new[c] = '%s_%d' % (c, -i) self.pddb.upsert(self.tname, record=record_new, where={'__id__': record_new['__id__']}) test_record_store.append(record_new) rows = self.pddb.find(self.tname, astype='dict') self.assertEqual(rows, test_record_store) self.pddb.drop_all() self.pddb = None def test_create_table_with_dynamic_schema(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i, c in enumerate(self.cols): record = {c: str(i)} self.pddb.insert(self.tname, record) PandasDatabase_cols = set(self.pddb._db[self.tname].columns) cols_with_id = set(['__id__'] + [c for c in self.cols]) self.assertEqual(PandasDatabase_cols, cols_with_id) self.pddb.drop_all() self.pddb = None def test_astype(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertEqual(str(self.pddb.find(self.tname)), str(self.pddb.find(self.tname, astype='dataframe'))) self.assertEqual(str(self.pddb.find(self.tname, astype=dict)), str(self.pddb.find(self.tname, astype='dict'))) self.assertEqual(str(self.pddb.find(self.tname, astype=str)), str(self.pddb.find(self.tname, astype='json'))) self.assertRaisesRegex( RuntimeError, '.*', lambda: self.pddb.find_one(self.tname, astype='dataframe')) self.pddb.drop_all() self.pddb = None def test_bytes_conversion(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.insert(self.tname, record={b'col_name_1': b'1'}, astype='dict') self.assertTrue( all([ type(x) == str for x in list(record.keys()) + list(record.values()) ])) self.pddb.drop_all() self.pddb = None def test_unicode_conversion(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.insert(self.tname, record={u'col_name_1': u'1'}, astype='dict') self.assertTrue( all([ type(x) == str for x in list(record.keys()) + list(record.values()) ])) self.pddb.drop_all() self.pddb = None def test_illegal_column_name(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype=list, auto_load=True, auto_save=False, persistent=False, debug=False) record = {'col*name': 'A_1'} insert_function = lambda: self.pddb.insert(self.tname, record) self.assertRaisesRegex( ValueError, 'Column names must match the following regex: ".+"', insert_function) self.pddb.drop_all() self.pddb = None def test_find_regex(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) for i in range(10): self.pddb.insert(self.tname, record={'col_name': str(i)}) rows = self.pddb.find(self.tname, where={'col_name': re.compile(r'[1-5]')}) self.assertEqual(len(rows), 5) self.pddb.drop_all() self.pddb = None def test_type_cast(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, auto_cast=True, persistent=False, debug=False) dict_mix = {1: 'a', 'col_name': 1, 'b': 0.5} dict_str = {str(k): str(v) for k, v in dict_mix.items()} record = self.pddb.insert(self.tname, record=dict_mix, astype='dict') self.assertTrue( all([ type(x) == str for x in list(record.keys()) + list(record.values()) ])) record.pop(PandasDatabase._id_colname) self.assertEqual(record, dict_str) self.pddb.drop_all() self.pddb = None def test_upsert_to_insert_with_where(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) record = self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where={'col_name_2': '2'}, columns=['col_name_1', 'col_name_2'], astype='dict') self.assertEqual(record[0], {'col_name_1': '1', 'col_name_2': '2'}) self.pddb.drop_all() self.pddb = None def test_upsert_to_insert_with_conflict(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=True) newrower = lambda: {'col_name_2': '2'} self.pddb.load(table_rowgens={self.tname: newrower}) insert_function = lambda: self.pddb.upsert(self.tname, record={'col_name_1': '1'}, where_not= {'col_name_2': '2'}) self.assertRaisesRegex( ValueError, 'Cannot insert new record because default ' 'values conflict with conditions provided: {.+}', insert_function) self.pddb.drop_all() self.pddb = None def test_create_table_with_upper_case(self): test_name = self.id().lower() tname = self.tname.upper() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) newrower = lambda: {c: '%s_%d' % (c, 0) for c in self.cols} self.pddb.load(table_rowgens={tname: newrower}) row = newrower() record = self.pddb.insert(tname, columns=self.cols, astype='dict') self.assertEqual(row, record) self.pddb.drop_all() self.pddb = None def test_find_using_columns(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) self.pddb.insert( self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) record_A = self.pddb.find_one(self.tname, columns=['A']) record_B = self.pddb.find_one(self.tname, columns='B') record_ABC = self.pddb.find(self.tname, columns=['A', 'B', 'C'], astype='dict')[0] cols_A = sorted(list(record_A.keys())) cols_B = sorted(list(record_B.keys())) cols_ABC = sorted(list(record_ABC.keys())) self.assertEqual(cols_A, ['A']) self.assertEqual(cols_B, ['B']) self.assertEqual(cols_ABC, ['A', 'B', 'C']) self.pddb.drop_all() self.pddb = None def test_find_where_in(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) record_store = [] for i in range(10): record = {c: '%s_%d' % (c, i) for c in self.cols} self.pddb.insert(self.tname, record) record_store.append(record) search_for = { self.cols[0]: ['%s_%d' % (self.cols[0], i) for i in range(5)] } results = self.pddb.find(self.tname, where=search_for, columns=self.cols, astype='dict') self.assertEqual(record_store[:5], results) self.pddb.drop_all() self.pddb = None def test_single_delete_record(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_names=self.tname) for i in range(10): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) c = self.cols[0] firstrecord = {c: '%s_%d' % (c, 0)} delrows = self.pddb.delete(self.tname, where=firstrecord) allrows = self.pddb.find(self.tname) allvalA = allrows['A'].values self.assertEqual(1, len(delrows)) self.assertEqual(9, len(allrows)) self.assertFalse('A_0' in allvalA) self.pddb.drop_all() self.pddb = None def test_single_rowgen(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=False, auto_save=False, persistent=False, debug=False) newrower = lambda: {c: '%s_%d' % (c, 0) for c in self.cols} self.pddb.load(table_rowgens={self.tname: newrower}) row = newrower() record = self.pddb.insert(self.tname, columns=self.cols, astype='dict') self.assertEqual(row, record) self.pddb.drop_all() def test_fixed_schema_fail_column(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=False, debug=False) self.pddb.load(table_schemas=schema) insert_function = lambda: self.pddb.insert(self.tname, record={'D': '0'}) self.assertRaisesRegex( ValueError, 'Column "D" does not exist in schema for table "%s"' % self.tname, insert_function) self.pddb.drop_all() self.pddb = None def test_drop_table(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, auto_load=True, auto_save=False, persistent=False, debug=False) all_tables = [] for i in range(10): tname = '%s_%d' % (self.tname, i) all_tables.append(tname) self.pddb.insert(tname, record={c: '%s_%d' % (c, i) for c in self.cols}) tnames = self.pddb.get_table_names() self.assertEqual(sorted(tnames), sorted(all_tables)) for i in range(5): tname = '%s_%d' % (self.tname, i) all_tables.remove(tname) self.pddb.drop(tname) tnames = self.pddb.get_table_names() self.assertEqual(sorted(tnames), sorted(all_tables)) self.pddb.drop(all_tables) self.assertEqual(len(self.pddb.get_table_names()), 0) self.pddb.drop_all() self.pddb = None def test_save_then_drop_all(self): test_name = self.id().lower() schema = {self.tname: self.cols} self.pddb = PandasDatabase(test_name, dynamic_schema=False, auto_load=False, auto_save=False, persistent=True, debug=False) self.pddb.load(table_schemas=schema) self.pddb.insert( self.tname, {c: '%s_%d' % (c, i) for (i, c) in enumerate(self.cols)}) self.pddb.save() header_expected = ','.join( self.cols) + ',%s\n' % PandasDatabase._id_colname record_expected_regex = header_expected + \ ','.join(['%s_%d' % (c, i) for (i, c) in enumerate(self.cols)]) + ',.+?\n' with open( os.path.join(self.pddb.root_dir, test_name, self.tname + '.csv'), 'r') as f: record_csv = f.read() self.assertTrue(os.path.exists(test_name)) self.pddb.drop_all() self.assertRegex(record_csv, record_expected_regex) self.assertFalse(os.path.exists(test_name)) self.pddb.drop_all() self.pddb = None def test_defer_save_queue_max(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype='dict', auto_load=False, auto_save=True, deferred_save=True, persistent=True, debug=False) self.pddb.load(table_names=self.tname) i = 0 for i in range(self.pddb.save_queue_max): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertFalse( os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) time.sleep(1) # Give an extra second to actually save the file to disk self.assertTrue( os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.drop_all() def test_defer_save_wait(self): test_name = self.id().lower() self.pddb = PandasDatabase(test_name, dynamic_schema=True, astype='dict', auto_load=False, auto_save=True, deferred_save=True, persistent=True, debug=False) self.pddb.load(table_names=self.tname) for i in range(self.pddb.save_queue_max): self.pddb.insert(self.tname, {c: '%s_%d' % (c, i) for c in self.cols}) self.assertFalse( os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) time.sleep(self.pddb.save_wait + 1) # Give an extra second to actually save the file to disk self.assertTrue( os.path.exists(os.path.join(test_name.lower(), self.tname + '.csv'))) self.pddb.drop_all() self.pddb = None '''