def test_query_with_non_unique_keys(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.conn, inst.cursor = connect(self.source) cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = [ { 'attname': 'idx3', 'indisunique': False, 'indisprimary': False, 'indnatts': 1, 'indexrelid': 'idx3123' }, ] schema = 'public' table = 'test' inckey = '' incval = '' max_value = 100 keys = inst.get_table_metadata(SQL_GET_KEYS, schema, table) keys = key_strategy(keys) state = None result = get_query(schema, table, inckey, incval, keys, max_value, state) expected = 'SELECT * FROM "public"."test" ORDER BY "idx3"' self.assertEqual(result, expected)
def test_recover_from_state(self, mock_connect, mock_execute, _, __): """continues to read a table from the saved state""" tables = [ { 'value': 'public.test1' }, { 'value': 'public.test2' }, { 'value': 'public.test3' }, ] last_index = 1 self.source['state'] = {'last_index': last_index, 'max_value': 100} inst = Postgres(self.source, OPTIONS) inst.tables = tables cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = [{ 'id': 101 }, { 'id': 102 }, { 'id': 103 }] inst.read() first_query = mock_execute.call_args_list[0][0][0] self.assertTrue( "\"inckey\" >= 'incval' AND \"inckey\" <= '100'" in first_query) self.assertTrue('FROM "public"."test2"' in first_query)
def test_connect_auth_error(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] msg = 'authentication failed' mock_connect.side_effect = psycopg2.OperationalError(msg) with self.assertRaises(PanoplyException): inst.get_tables()
def test_read(self, m): '''reads a table from the database and validates that each row has a __tablename and __schemaname column''' mock_recs = [{ 'id': 1, 'col1': 'foo1', 'col2': 'bar1' }, { 'id': 2, 'col1': 'foo2', 'col2': 'bar2' }, { 'id': 3, 'col1': 'foo3', 'col2': 'bar3' }] inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] m.return_value.cursor.return_value.fetchall.return_value = mock_recs rows = inst.read() self.assertEqual(len(rows), len(mock_recs)) for x in range(0, len(rows)): self.assertEqual(rows[x]['__tablename'], 'foo_bar') self.assertEqual(rows[x]['__schemaname'], 'my_schema')
def test_connect_other_error(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] msg = 'something unexpected' mock_connect.side_effect = psycopg2.OperationalError(msg) with self.assertRaises(psycopg2.OperationalError): inst.get_tables()
def test_read_retries(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] mock_connect.side_effect = psycopg2.DatabaseError('TestRetiresError') with self.assertRaises(psycopg2.DatabaseError): inst.read() self.assertEqual(mock_connect.call_count, postgres.source.MAX_RETRIES)
def test_incremental(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] inst.read() q = ('DECLARE cur CURSOR FOR ' 'SELECT * FROM "schema"."foo" WHERE inckey > \'incval\'') execute_mock = mock_connect.return_value.cursor.return_value.execute execute_mock.assert_has_calls([mock.call(q)], True)
def test_reset_query_on_error(self): inst = Postgres(self.source, OPTIONS) mock_cursor = mock.Mock() mock_cursor.execute.side_effect = psycopg2.DatabaseError('oh noes!') inst.cursor = mock_cursor with self.assertRaises(psycopg2.DatabaseError): inst.execute('SELECT 1') # The self.loaded variable should have been reset to 0 in order to # reset the query and start from the begining. self.assertEqual(inst.loaded, 0) self.assertEqual(inst.cursor, None)
def test_batch_size(self, mock_connect, mock_execute): customBatchSize = 42 self.source['__batchSize'] = customBatchSize inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs inst.read() second_query = mock_execute.call_args_list[1][0][0] txt = 'FETCH FORWARD %s' % customBatchSize self.assertTrue(second_query.startswith(txt))
def test_read(self, mock_connect): '''reads a table from the database and validates that each row has a __tablename and __schemaname column''' inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs rows = inst.read() self.assertEqual(len(rows), len(self.mock_recs)) for x in range(0, len(rows)): self.assertEqual(rows[x]['__tablename'], 'foo_bar') self.assertEqual(rows[x]['__schemaname'], 'my_schema')
def test_no_state_for_empty_results(self, mock_connect, mock_state): '''before returning a batch of data, the sources state should be reported as well as having the state ID appended to each data object''' inst = Postgres(self.source, OPTIONS) table_name = 'my_schema.foo_bar' inst.tables = [{'value': table_name}] result_order = [[], []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order rows = inst.read() # State function was called with relevant table name and row count mock_state.assert_not_called()
def test_recover_from_state(self, mock_connect, mock_execute): ''' continues to read a table from the saved state ''' table_offset = 100 self.source['state'] = {'my_schema.foo_bar': table_offset} inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs inst.read() first_query = mock_execute.call_args_list[0][0][0] # The query should use `OFFSET` to skip already collected data self.assertTrue(first_query.endswith('OFFSET %s' % table_offset)) # Three records were returned so the loaded count should be OFFSET + 3 self.assertEqual(inst.loaded, table_offset + len(self.mock_recs))
def test_no_port(self, mock_connect): source = { "addr": "test.database.name/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() mock_connect.assert_called_with( dsn="postgres://test.database.name/foobar", user=source['user'], password=source['password'], connect_timeout=postgres.source.CONNECT_TIMEOUT)
def test_custom_port(self, m): source = { "addr": "test.database.name:5439/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() m.assert_called_with(host="test.database.name", port=5439, user=source['user'], password=source['password'], dbname="foobar")
def test_connection_parameters(self, mock_connect): source = { "addr": "test.database:5439/foobar?sslmode=verify-full", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() mock_connect.assert_called_with( dsn="postgres://test.database:5439/foobar?sslmode=verify-full", user=source['user'], password=source['password'], connect_timeout=postgres.source.CONNECT_TIMEOUT)
def test_schema_name(self, mock_connect): '''Test schema name is used when queries and that both schema and table names are wrapped in enclosing quotes''' source = { "addr": "test.database.name/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() q = 'DECLARE cur CURSOR FOR SELECT * FROM "schema"."foo"' execute_mock = mock_connect.return_value.cursor.return_value.execute execute_mock.assert_has_calls([mock.call(q)], True)
def test_custom_port(self, mock_connect): source = { "addr": "test.database.name:5439/foobar", "user": "******", "password": "******", "tables": [{ 'value': 'schema.foo' }] } inst = Postgres(source, OPTIONS) inst.read() mock_connect.assert_called_with( host="test.database.name", port=5439, user=source['user'], password=source['password'], dbname="foobar", connect_timeout=postgres.source.CONNECT_TIMEOUT)
def test_get_tables(self, m): '''gets the list of tables from the database''' # Notice 'name' here is only for validation of expected result. # It is not a field that returns in the actual query results mock_tables = [{ 'table_schema': 'dbo', 'table_name': 'testNoUnique', 'table_type': 'BASE TABLE', 'name': 'dbo.testNoUnique' }, { 'table_schema': 'dbo', 'table_name': 'testNoIndex', 'table_type': 'BASE TABLE', 'name': 'dbo.testNoIndex' }, { 'table_schema': 'SalesLT', 'table_name': 'Customer', 'table_type': 'BASE TABLE', 'name': 'SalesLT.Customer' }, { 'table_schema': 'SalesLT', 'table_name': 'ProductModel', 'table_type': 'BASE TABLE', 'name': 'SalesLT.ProductModel' }, { 'table_schema': 'mySchema', 'table_name': 'someTable', 'table_type': 'VIEW', 'name': 'mySchema.someTable (VIEW)' }] inst = Postgres(self.source, OPTIONS) m.return_value.cursor.return_value.fetchall.return_value = mock_tables tables = inst.get_tables() self.assertEqual(len(tables), len(mock_tables)) for x in range(0, len(tables)): mtable = mock_tables[x] v = '{}.{}'.format(mtable["table_schema"], mtable["table_name"]) self.assertEqual(tables[x]['name'], mtable['name']) self.assertEqual(tables[x]['value'], v)
def test_remove_state_from_source(self): ''' once extracted, the state object is removed from the source ''' state = {'my_schema.foo_bar': 1} self.source['state'] = state inst = Postgres(self.source, OPTIONS) # State object should have been extracted and saved on the stream self.assertEqual(inst.saved_state, state) # No state key should be inside the source definition self.assertIsNone(inst.source.get('state', None))
def test_reports_state(self, mock_connect, mock_state): '''before returning a batch of data, the sources state should be reported as well as having the state ID appended to each data object''' inst = Postgres(self.source, OPTIONS) table_name = 'my_schema.foo_bar' inst.tables = [{'value': table_name}] result_order = [self.mock_recs, []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order rows = inst.read() state_id = rows[0]['__state'] state_obj = dict([(table_name, len(self.mock_recs))]) msg = 'State ID is not the same in all rows!' for row in rows: self.assertEqual(row['__state'], state_id, msg) # State function was called with relevant table name and row count mock_state.assert_called_with(state_id, state_obj)
def test_read_from_other_schema(self, mock_connect, mock_metadata, __): inst = Postgres(self.source, OPTIONS) inst.tables = [{ 'value': 'my_schema.foo_bar' }, { 'value': 'your_schema.bar_foo' }] cursor_return_value = mock_connect.return_value.cursor.return_value mock_data = [self.mock_recs[:1], []] * 2 cursor_return_value.fetchall.side_effect = mock_data expected = [('my_schema', 'foo_bar'), ('my_schema', 'foo_bar'), ('your_schema', 'bar_foo'), ('your_schema', 'bar_foo')] for expected_schema, expected_table in expected: inst.read() _, schema, table = mock_metadata.call_args[0] self.assertEqual(schema, expected_schema) self.assertEqual(table, expected_table)
def test_remove_state_from_source(self): """ once extracted, the state object is removed from the source """ last_index = 3 state = { 'last_index': last_index, } self.source['state'] = state inst = Postgres(self.source, OPTIONS) self.assertEqual(inst.index, last_index) # No state key should be inside the source definition self.assertIsNone(inst.source.get('state', None))
def test_read_end_stream(self, m): '''reads the entire table from the database and validates that the stream returns None to indicate the end''' mock_recs = [{ 'id': 1, 'col1': 'foo1', 'col2': 'bar1' }, { 'id': 2, 'col1': 'foo2', 'col2': 'bar2' }, { 'id': 3, 'col1': 'foo3', 'col2': 'bar3' }] inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] result_order = [mock_recs, []] m.return_value.cursor.return_value.fetchall.side_effect = result_order rows = inst.read() self.assertEqual(len(rows), len(mock_recs)) empty = inst.read() self.assertEqual(empty, []) end = inst.read() self.assertEqual(end, None)
def test_get_table_metadata(self, mock_connect, mock_execute): testcases = [ { 'schema': 'public', 'table': 'Errors' }, { 'schema': 'public', 'table': 'Notifications' }, ] stream = Postgres(self.source, OPTIONS) stream.conn, stream.cursor = connect(self.source) for i, tcase in enumerate(testcases): schema, table = tcase['schema'], tcase['table'] stream.get_table_metadata(SQL_GET_KEYS, schema, table) # make sure tables and schema are properly escaped when # casting to regclass (as it implicitly converts to lowercase) query = mock_execute.call_args_list[i][0][0] self.assertIn('\'"%s"."%s"\'::regclass' % (schema, table), query)
def test_query_with_no_keys(self, mock_connect, _): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = [ [], [ { 'attname': 'id', 'data_type': 'integer' }, { 'attname': 'name', 'data_type': 'text' }, ], [], ] cursor_execute = mock_connect.return_value.cursor.return_value.execute cursor_execute.return_value = lambda *args: None inst.read() # Extract mock call arguments args = mock_connect.return_value.cursor.return_value \ .execute.call_args_list args = [r[0] for r, _ in args] args = filter(lambda x: 'DECLARE' in x, args) expected = 'DECLARE cur ' \ 'CURSOR FOR SELECT * FROM "my_schema"."foo_bar" ' \ "WHERE (\"inckey\" >= 'incval' AND \"inckey\" <= '100') " \ 'ORDER BY "id","inckey"' self.assertEqual(args[0], expected)
def test_retry_with_last_values(self, mock_connect, mock_metadata, _): mock_metadata.side_effect = lambda *args: [{ 'attname': 'col1', 'indisunique': True, 'indisprimary': True }, { 'attname': 'col2', 'indisunique': True, 'indisprimary': True }] inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] inst.batch_size = 1 cursor_execute = mock_connect.return_value.cursor.return_value.execute cursor_execute.side_effect = [ lambda *args: None, lambda *args: None, psycopg2.DatabaseError('TestRetriesError'), lambda *args: None, lambda *args: None, ] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs # First read no error inst.read() # Raise retry error inst.read() # Extract mock call arguments args = mock_connect.return_value.cursor.return_value\ .execute.call_args_list args = [r[0] for r, _ in args] args = filter(lambda x: 'DECLARE' in x, args) # Second DECLARATION of cursor should start from last row fetched self.assertTrue( 'WHERE ("col1","col2") >= (\'foo3\',\'bar3\')' in args[1])
def test_read_end_stream(self, mock_connect): '''reads the entire table from the database and validates that the stream returns None to indicate the end''' inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] result_order = [self.mock_recs, []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order rows = inst.read() self.assertEqual(len(rows), len(self.mock_recs)) empty = inst.read() self.assertEqual(empty, []) end = inst.read() self.assertEqual(end, None)
def test_connect_error(self, m): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] m.side_effect = psycopg2.OperationalError('Mock Error') with self.assertRaises(PanoplyException): inst.read()
def test_read_end_stream(self, mock_connect, mock_execute, mock_metadata, _): """reads the entire table from the database and validates that the stream returns None to indicate the end""" tables = [ { 'value': 'public.table1' }, { 'value': 'public.table2' }, { 'value': 'public.table3' }, ] mock_metadata.side_effect = [ [{ 'attname': 'col1' }], [{ 'attname': 'col2' }], [{ 'attname': 'col3' }], ] inst = Postgres(self.source, OPTIONS) inst.tables = tables result_order = [ self.mock_recs, [], self.mock_recs, [], self.mock_recs, [] ] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order # First call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[0][0][0] expected_query = 'FROM "public"."table1" ' \ 'WHERE ("inckey" >= \'incval\' ' \ 'AND "inckey" <= \'100\') '\ 'ORDER BY "col1","inckey"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[1][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Second call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[2][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Third call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[3][0][0] expected_query = 'FROM "public"."table2" ' \ 'WHERE ("inckey" >= \'incval\' ' \ 'AND "inckey" <= \'100\') '\ 'ORDER BY "col2","inckey"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[4][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Fourth call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[5][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Fifth call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[6][0][0] expected_query = 'FROM "public"."table3" ' \ 'WHERE ("inckey" >= \'incval\' ' \ 'AND "inckey" <= \'100\') '\ 'ORDER BY "col3","inckey"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[7][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Sixth call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[8][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) end = inst.read() self.assertEqual(end, None)