def test_read_from_other_schema(self, mock_connect, __): inst = Postgres(self.source, OPTIONS) inst.tables = [ {'value': 'my_schema.foo_bar'}, {'value': 'your_schema.bar_foo'} ] cursor_return_value = mock_connect.return_value.cursor.return_value mock_data = [self.mock_recs[:1], []] * 2 cursor_return_value.fetchall.side_effect = mock_data expected = [ ('my_schema', 'foo_bar'), (None, None), ('your_schema', 'bar_foo') ] for expected_schema, expected_table in expected: result = inst.read() if result: schema = result[0]['__schemaname'] table = result[0]['__tablename'] self.assertEqual(schema, expected_schema) self.assertEqual(table, expected_table)
def test_recover_from_state(self, mock_connect, mock_execute): """continues to read a table from the saved state""" tables = [ { 'value': 'public.test1' }, { 'value': 'public.test2' }, { 'value': 'public.test3' }, ] last_index = 1 self.source['state'] = {'last_index': last_index} inst = Postgres(self.source, OPTIONS) inst.tables = tables cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = [{ 'id': 101 }, { 'id': 102 }, { 'id': 103 }] inst.read() first_query = mock_execute.call_args_list[0][0][0] self.assertTrue("\"id\" >= '1'" in first_query) self.assertTrue('FROM "public"."test2"' in first_query)
def test_read_retries(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] mock_connect.side_effect = psycopg2.DatabaseError('TestRetriesError') with self.assertRaises(psycopg2.DatabaseError): inst.read() self.assertEqual(mock_connect.call_count, MAX_RETRIES)
def test_invalid_inckey(self, mock_connect): tables = [{'value': 'public.table1'}] inst = Postgres(self.source, OPTIONS) inst.tables = tables mock_connector = mock.Mock() mock_connector.cursor.execute.side_effect = \ psycopg2.errors.UndefinedColumn('column does not exist') inst.connector = mock_connector with self.assertRaises(PostgresInckeyError): inst.read()
def test_incremental(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'schema.foo'}] inst.read() q = ('DECLARE cur CURSOR FOR ' 'SELECT * FROM "schema"."foo" ' 'WHERE "id" >= \'1\' ORDER BY "id"') execute_mock = mock_connect.return_value.cursor.return_value.execute execute_mock.assert_has_calls([mock.call(q)], True)
def test_use_inckey_as_index(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = [{'id': 1, 'col1': 1}] inst.read() expected = [self.source['inckey']] result = inst.keys self.assertEqual(result, expected)
def test_batch_size(self, mock_connect, mock_execute): customBatchSize = 42 self.source['__batchSize'] = customBatchSize inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs inst.read() second_query = mock_execute.call_args_list[1][0][0] txt = 'FETCH FORWARD {}'.format(customBatchSize) self.assertTrue(second_query.startswith(txt))
def test_read(self, mock_connect): """reads a table from the database and validates that each row has a __tablename, __schemaname and __databasename column""" inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = self.mock_recs rows = inst.read() self.assertEqual(len(rows), len(self.mock_recs)) for x in range(0, len(rows)): self.assertEqual(rows[x]['__tablename'], 'foo_bar') self.assertEqual(rows[x]['__schemaname'], 'my_schema') self.assertEqual(rows[x]['__databasename'], self.source['db_name'])
def test_no_state_for_empty_results(self, mock_connect, mock_state): """before returning a batch of data, the sources state should be reported as well as having the state ID appended to each data object""" inst = Postgres(self.source, OPTIONS) table_name = 'my_schema.foo_bar' inst.tables = [{'value': table_name}] result_order = [[], []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order inst.read() # State function was called with relevant table name and row count mock_state.assert_not_called()
def test_retry_cursor_from_last_value(self, mock_connect): inst = Postgres(self.source, OPTIONS) inst.tables = [{'value': 'my_schema.foo_bar'}] inst.inckey = None inst.incval = None inst.keys = ['col1', 'col2'] records = [{ 'id': 1, 'col1': 'foo1', 'col2': 'bar1' }, { 'id': 2, 'col1': 'foo2', 'col2': 'bar2' }, { 'id': 3, 'col1': 'foo3', 'col2': 'bar3' }] cursor_execute = mock_connect.return_value.cursor.return_value.execute cursor_execute.side_effect = [ lambda *args: None, lambda *args: None, psycopg2.DatabaseError('TestRetriesError'), lambda *args: None, lambda *args: None ] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.return_value = records # First read no error inst.read() init_query = cursor_execute.call_args_list[0][0][0] expected_init_query = 'DECLARE cur CURSOR FOR SELECT * FROM ' \ '"my_schema"."foo_bar" ORDER BY "col1", "col2"' # Raise retry error and restart cursor inst.read() retried_query = cursor_execute.call_args_list[-2][0][0] expected_retried_query = 'DECLARE cur CURSOR FOR SELECT * FROM ' \ '"my_schema"."foo_bar" WHERE ' \ '"col1" >= \'foo3\' AND "col2" >= \'bar3\' ' \ 'ORDER BY "col1", "col2"' self.assertEqual(init_query, expected_init_query) self.assertEqual(retried_query, expected_retried_query)
def test_reports_state(self, mock_connect, mock_state): """before returning a batch of data, the sources state should be reported as well as having the state ID appended to each data object""" inst = Postgres(self.source, OPTIONS) table_name = 'my_schema.foo_bar' inst.tables = [{'value': table_name}] result_order = [self.mock_recs, []] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order rows = inst.read() state_id = rows[0]['__state'] state_obj = dict([ ('last_index', 0), ]) msg = 'State ID is not the same in all rows!' for row in rows: self.assertEqual(row['__state'], state_id, msg) # State function was called with relevant table name and row count mock_state.assert_called_with(state_id, state_obj)
def test_read_end_stream(self, mock_connect, mock_execute): """reads the entire table from the database and validates that the stream returns None to indicate the end""" tables = [ { 'value': 'public.table1' }, { 'value': 'public.table2' }, { 'value': 'public.table3' }, ] inst = Postgres(self.source, OPTIONS) inst.tables = tables result_order = [ self.mock_recs, [], self.mock_recs, [], self.mock_recs, [] ] cursor_return_value = mock_connect.return_value.cursor.return_value cursor_return_value.fetchall.side_effect = result_order # First call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[0][0][0] expected_query = 'FROM "public"."table1" ' \ 'WHERE "id" >= \'1\' ' \ 'ORDER BY "id"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[1][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Second call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[2][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Third call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[3][0][0] expected_query = 'FROM "public"."table2" ' \ 'WHERE "id" >= \'1\' ' \ 'ORDER BY "id"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[4][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Fourth call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[5][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Fifth call to read result = inst.read() self.assertEqual(len(result), len(self.mock_recs)) query = mock_execute.call_args_list[6][0][0] expected_query = 'FROM "public"."table3" ' \ 'WHERE "id" >= \'1\' ' \ 'ORDER BY "id"' self.assertTrue(expected_query in query) query = mock_execute.call_args_list[7][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) # Sixth call to read result = inst.read() self.assertEqual(result, []) query = mock_execute.call_args_list[8][0][0] expected_query = 'FETCH FORWARD' self.assertTrue(expected_query in query) end = inst.read() self.assertEqual(end, None)