def test_catalog(self): singer.write_message = singer_write_message_no_cow with get_test_connection() as conn: conn.autocommit = True catalog = tap_oracle.do_discovery(get_test_conn_config(), []) cow_stream = [s for s in catalog.streams if s.table == 'COW'][0] cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream( cow_stream, 'FULL_TABLE') chicken_stream = [ s for s in catalog.streams if s.table == 'CHICKEN' ][0] chicken_stream = select_all_of_stream(chicken_stream) chicken_stream = set_replication_method_for_stream( chicken_stream, 'FULL_TABLE') cur = conn.cursor() cow_rec = {'NAME': 'betty', 'colour': 'blue'} insert_record(cur, 'COW', cow_rec) chicken_rec = {'NAME': 'fred', 'colour': 'red'} insert_record(cur, 'CHICKEN', chicken_rec) state = {} #this will sync the CHICKEN but then blow up on the COW try: tap_oracle.do_sync(get_test_conn_config(), catalog, None, state) except Exception: blew_up_on_cow = True self.assertTrue(blew_up_on_cow) self.assertEqual(9, len(CAUGHT_MESSAGES)) self.assertTrue( isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertEqual('CHICKEN', CAUGHT_MESSAGES[3].stream) self.assertTrue( isinstance(CAUGHT_MESSAGES[4], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) self.assertEqual( None, singer.get_currently_syncing(CAUGHT_MESSAGES[5].value)) #cow messages self.assertTrue( isinstance(CAUGHT_MESSAGES[6], singer.SchemaMessage)) self.assertEqual("COW", CAUGHT_MESSAGES[6].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)) old_state = CAUGHT_MESSAGES[7].value self.assertEqual("ROOT-COW", old_state.get('currently_syncing')) self.assertTrue( isinstance(CAUGHT_MESSAGES[8], singer.ActivateVersionMessage)) #run another do_sync which will resume with COW but then also do chicken singer.write_message = singer_write_message_ok CAUGHT_MESSAGES.clear() tap_oracle.do_sync(get_test_conn_config(), catalog, None, old_state) #cow messages self.assertEqual(10, len(CAUGHT_MESSAGES)) self.assertTrue( isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertEqual( "ROOT-COW", singer.get_currently_syncing(CAUGHT_MESSAGES[1].value)) self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertEqual('COW', CAUGHT_MESSAGES[2].stream) self.assertTrue( isinstance(CAUGHT_MESSAGES[3], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)) self.assertEqual( None, singer.get_currently_syncing(CAUGHT_MESSAGES[4].value)) #chicken messages self.assertTrue( isinstance(CAUGHT_MESSAGES[5], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) self.assertEqual( "ROOT-CHICKEN", singer.get_currently_syncing(CAUGHT_MESSAGES[6].value)) self.assertTrue( isinstance(CAUGHT_MESSAGES[7], singer.RecordMessage)) self.assertEqual('CHICKEN', CAUGHT_MESSAGES[7].stream) self.assertTrue( isinstance(CAUGHT_MESSAGES[8], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[9], singer.StateMessage)) self.assertEqual( None, singer.get_currently_syncing(CAUGHT_MESSAGES[9].value))
def test_catalog(self): singer.write_message = singer_write_message_no_cow pg_common.write_schema_message = singer_write_message_ok conn_config = get_test_connection_config() streams = tap_postgres.do_discovery(conn_config) cow_stream = [s for s in streams if s['table_name'] == 'COW'][0] self.assertIsNotNone(cow_stream) cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, 'FULL_TABLE') chicken_stream = [s for s in streams if s['table_name'] == 'CHICKEN'][0] self.assertIsNotNone(chicken_stream) chicken_stream = select_all_of_stream(chicken_stream) chicken_stream = set_replication_method_for_stream(chicken_stream, 'FULL_TABLE') with get_test_connection() as conn: conn.autocommit = True cur = conn.cursor() cow_rec = {'name' : 'betty', 'colour' : 'blue'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name' : 'smelly', 'colour' : 'brow'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name' : 'pooper', 'colour' : 'green'} insert_record(cur, 'COW', cow_rec) chicken_rec = {'name' : 'fred', 'colour' : 'red'} insert_record(cur, 'CHICKEN', chicken_rec) state = {} #this will sync the CHICKEN but then blow up on the COW try: tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, state) except Exception as ex: # LOGGER.exception(ex) blew_up_on_cow = True self.assertTrue(blew_up_on_cow) self.assertEqual(14, len(CAUGHT_MESSAGES)) self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA') self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-CHICKEN'].get('xmin')) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) new_version = CAUGHT_MESSAGES[2].version self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertEqual('CHICKEN', CAUGHT_MESSAGES[3].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)) #xmin is set while we are processing the full table replication self.assertIsNotNone(CAUGHT_MESSAGES[4].value['bookmarks']['postgres-public-CHICKEN']['xmin']) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[5].version, new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) self.assertEqual(None, singer.get_currently_syncing( CAUGHT_MESSAGES[6].value)) #xmin is cleared at the end of the full table replication self.assertIsNone(CAUGHT_MESSAGES[6].value['bookmarks']['postgres-public-CHICKEN']['xmin']) #cow messages self.assertEqual(CAUGHT_MESSAGES[7]['type'], 'SCHEMA') self.assertEqual("COW", CAUGHT_MESSAGES[7]['stream']) self.assertTrue(isinstance(CAUGHT_MESSAGES[8], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[8].value['bookmarks']['postgres-public-COW'].get('xmin')) self.assertEqual("postgres-public-COW", CAUGHT_MESSAGES[8].value['currently_syncing']) self.assertTrue(isinstance(CAUGHT_MESSAGES[9], singer.ActivateVersionMessage)) cow_version = CAUGHT_MESSAGES[9].version self.assertTrue(isinstance(CAUGHT_MESSAGES[10], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[10].record['name'], 'betty') self.assertEqual('COW', CAUGHT_MESSAGES[10].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[11], singer.StateMessage)) #xmin is set while we are processing the full table replication self.assertIsNotNone(CAUGHT_MESSAGES[11].value['bookmarks']['postgres-public-COW']['xmin']) self.assertEqual(CAUGHT_MESSAGES[12].record['name'], 'smelly') self.assertEqual('COW', CAUGHT_MESSAGES[12].stream) old_state = CAUGHT_MESSAGES[13].value #run another do_sync singer.write_message = singer_write_message_ok CAUGHT_MESSAGES.clear() global COW_RECORD_COUNT COW_RECORD_COUNT = 0 tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, old_state) self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA') self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) # because we were interrupted, we do not switch versions self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW']['version'], cow_version) self.assertIsNotNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW']['xmin']) self.assertEqual("postgres-public-COW", singer.get_currently_syncing(CAUGHT_MESSAGES[1].value)) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[2].record['name'], 'smelly') self.assertEqual('COW', CAUGHT_MESSAGES[2].stream) #after record: activate version, state with no xmin or currently syncing self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)) #we still have an xmin for COW because are not yet done with the COW table self.assertIsNotNone(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW']['xmin']) self.assertEqual(singer.get_currently_syncing( CAUGHT_MESSAGES[3].value), 'postgres-public-COW') self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[4].record['name'], 'pooper') self.assertEqual('COW', CAUGHT_MESSAGES[4].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) self.assertIsNotNone(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW']['xmin']) self.assertEqual(singer.get_currently_syncing( CAUGHT_MESSAGES[5].value), 'postgres-public-COW') #xmin is cleared because we are finished the full table replication self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[6].version, cow_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)) self.assertIsNone(singer.get_currently_syncing( CAUGHT_MESSAGES[7].value)) self.assertIsNone(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-CHICKEN']['xmin']) self.assertIsNone(singer.get_currently_syncing( CAUGHT_MESSAGES[7].value))
def test_catalog(self): singer.write_message = singer_write_message_no_cow pg_common.write_schema_message = singer_write_message_ok conn_config = get_test_connection_config() streams = tap_postgres.do_discovery(conn_config) cow_stream = [s for s in streams if s['table_name'] == 'COW'][0] self.assertIsNotNone(cow_stream) cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, 'LOG_BASED') with get_test_connection() as conn: conn.autocommit = True cur = conn.cursor() cow_rec = {'name' : 'betty', 'colour' : 'blue'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name' : 'smelly', 'colour' : 'brow'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name' : 'pooper', 'colour' : 'green'} insert_record(cur, 'COW', cow_rec) state = {} #the initial phase of cows logical replication will be a full table. #it will sync the first record and then blow up on the 2nd record try: tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, state) except Exception as ex: blew_up_on_cow = True self.assertTrue(blew_up_on_cow) self.assertEqual(7, len(CAUGHT_MESSAGES)) self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA') self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('xmin')) self.assertIsNotNone(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('lsn')) end_lsn = CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('lsn') self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) new_version = CAUGHT_MESSAGES[2].version self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[3].record, {'colour': 'blue', 'id': 1, 'name': 'betty'}) self.assertEqual('COW', CAUGHT_MESSAGES[3].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)) #xmin is set while we are processing the full table replication self.assertIsNotNone(CAUGHT_MESSAGES[4].value['bookmarks']['postgres-public-COW']['xmin']) self.assertEqual(CAUGHT_MESSAGES[4].value['bookmarks']['postgres-public-COW']['lsn'], end_lsn) self.assertEqual(CAUGHT_MESSAGES[5].record['name'], 'smelly') self.assertEqual('COW', CAUGHT_MESSAGES[5].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) last_xmin = CAUGHT_MESSAGES[6].value['bookmarks']['postgres-public-COW']['xmin'] old_state = CAUGHT_MESSAGES[6].value #run another do_sync, should get the remaining record which effectively finishes the initial full_table #replication portion of the logical replication singer.write_message = singer_write_message_ok global COW_RECORD_COUNT COW_RECORD_COUNT = 0 CAUGHT_MESSAGES.clear() tap_postgres.do_sync(get_test_connection_config(), {'streams' : streams}, None, old_state) self.assertEqual(8, len(CAUGHT_MESSAGES)) self.assertEqual(CAUGHT_MESSAGES[0]['type'], 'SCHEMA') self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('xmin'), last_xmin) self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn) self.assertEqual(CAUGHT_MESSAGES[1].value['bookmarks']['postgres-public-COW'].get('version'), new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[2].record, {'colour': 'brow', 'id': 2, 'name': 'smelly'}) self.assertEqual('COW', CAUGHT_MESSAGES[2].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)) self.assertTrue(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW'].get('xmin'),last_xmin) self.assertEqual(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn) self.assertEqual(CAUGHT_MESSAGES[3].value['bookmarks']['postgres-public-COW'].get('version'), new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[4].record['name'], 'pooper') self.assertEqual('COW', CAUGHT_MESSAGES[4].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) self.assertTrue(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW'].get('xmin') > last_xmin) self.assertEqual(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn) self.assertEqual(CAUGHT_MESSAGES[5].value['bookmarks']['postgres-public-COW'].get('version'), new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[6].version, new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-COW'].get('xmin')) self.assertEqual(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-COW'].get('lsn'), end_lsn) self.assertEqual(CAUGHT_MESSAGES[7].value['bookmarks']['postgres-public-COW'].get('version'), new_version)
def test_catalog(self): singer.write_message = singer_write_message with get_test_connection() as conn: conn.autocommit = True catalog = tap_oracle.do_discovery(get_test_conn_config(), []) chicken_stream = [s for s in catalog.streams if s.table == 'CHICKEN'][0] chicken_stream = select_all_of_stream(chicken_stream) #unselect the NO_SYNC column chicken_stream = unselect_column(chicken_stream, 'NO_SYNC') #select logminer chicken_stream = set_replication_method_for_stream(chicken_stream, 'LOG_BASED') cur = conn.cursor() rec_1 = { '"none_column"' : None, '"our_number_10_2"' : decimal.Decimal('100.11'), '"our_binary_float"' : 1234567.8901234, '"our_date"' : datetime.date(1996, 6, 6), '"name-char-explicit-byte"' :'name-char-explicit-byte I', } insert_record(cur, 'CHICKEN', rec_1) rec_2 = copy.deepcopy(rec_1) rec_2.update({'"size_number_4_0"' : 101, '"our_number_10_2"' : decimal.Decimal('101.11') + 1, '"our_binary_float"' : 1234567.8901234 + 1, '"our_date"' : datetime.date(1996, 6, 6) + datetime.timedelta(days=1) }) insert_record(cur, 'CHICKEN', rec_2) original_state = {} #initial run should be full_table tap_oracle.do_sync(get_test_conn_config(), catalog, None, original_state) #messages for initial full table replication: ActivateVersion, SchemaMessage, Record, Record, State, ActivateVersion self.assertEqual(7, len(CAUGHT_MESSAGES)) self.assertTrue(isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) state = CAUGHT_MESSAGES[6].value version = state.get('bookmarks', {}).get(chicken_stream.tap_stream_id, {}).get('version') scn = state.get('bookmarks', {}).get(chicken_stream.tap_stream_id, {}).get('scn') self.assertIsNotNone(version) self.assertIsNotNone(scn) self.assertEqual(CAUGHT_MESSAGES[2].version, version) self.assertEqual(CAUGHT_MESSAGES[5].version, version) #run another do_sync CAUGHT_MESSAGES.clear() rec_3 = copy.deepcopy(rec_2) rec_3.update({'"size_number_4_0"' : 102, '"our_number_10_2"' : decimal.Decimal('101.11') + 3, '"our_binary_float"' : 1234567.8901234 + 2, '"our_date"' : datetime.date(1996, 6, 6) + datetime.timedelta(days=2) }) insert_record(cur, 'CHICKEN', rec_3) #this sync should activate logminer because of the scn in state tap_oracle.do_sync(get_test_conn_config(), catalog, None, state) #TODO: assert new scn self.assertEqual(3, len(CAUGHT_MESSAGES)) self.assertTrue(isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.RecordMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.StateMessage)) new_scn = CAUGHT_MESSAGES[2].value.get('bookmarks', {}).get(chicken_stream.tap_stream_id, {}).get('scn') new_version = CAUGHT_MESSAGES[2].value.get('bookmarks', {}).get(chicken_stream.tap_stream_id, {}).get('version') self.assertTrue(new_scn > scn) self.assertTrue(version == new_version)
def test_catalog(self): singer.write_message = singer_write_message with get_test_connection() as conn: conn.autocommit = True catalog = tap_oracle.do_discovery(get_test_conn_config(), []) chicken_stream = [ s for s in catalog.streams if s.table == 'CHICKEN' ][0] chicken_stream = select_all_of_stream(chicken_stream) #unselect the NO_SYNC column chicken_stream = unselect_column(chicken_stream, 'NO_SYNC') chicken_stream = set_replication_method_for_stream( chicken_stream, 'FULL_TABLE') cur = conn.cursor() our_date = datetime.date(1996, 6, 6) our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz_edt = nyc_tz.localize( datetime.datetime(1997, 3, 3, 3, 3, 3, 722184)) our_ts_tz_utc = datetime.datetime(1997, 3, 3, 3, 3, 3, 722184, pytz.UTC) california_tz = pytz.timezone('America/Los_Angeles') our_ts_local = california_tz.localize( datetime.datetime(2018, 4, 1, 10, 0, 0, 000000)) our_float = decimal.Decimal( '1234567.890123456789012345678901234567890123456789') our_real = our_float our_double_precision = our_float rec_1 = { '"none_column"': None, 'NO_SYNC': 666, #should not sync this column '"size_number"': 1E-6, '"size_number_*"': 100.12345, '"size_number_4"': 100.12345, '"size_number_4_0"': 100.12345, '"size_number_*_0"': 2**128, # 39 Decimal Places in pow(2) '"size_number_*_38"': 1E-6, '"size_number_10_-1"': 311.12345, '"size_number_integer"': 400.12345, '"size_number_int"': 500.12345, '"size_number_smallint"': 50000.12345, '"our_number_10_2"': decimal.Decimal('100.11'), '"our_number_38_4"': decimal.Decimal('99999999999999999.99991'), '"our_double_precision"': our_double_precision, '"our_real"': our_real, '"our_float"': our_float, '"our_binary_float"': 1234567.8901234, '"our_binary_double"': 1234567.8901234, '"our_nan"': float('nan'), '"our_+_infinity"': float('+inf'), '"our_-_infinity"': float('-inf'), '"our_date"': our_date, '"our_ts"': our_ts, '"our_ts_tz_edt"': our_ts_tz_edt, '"our_ts_tz_utc"': our_ts_tz_utc, '"our_ts_tz_local"': our_ts_local, '"name-char-explicit-byte"': 'name-char-explicit-byte I', '"name-char-explicit-char"': 'name-char-explicit-char I', 'NAME_NCHAR': 'name-nchar I', '"name-nvarchar2"': 'name-nvarchar2 I', '"name-varchar-explicit-byte"': 'name-varchar-explicit-byte I', '"name-varchar-explicit-char"': 'name-varchar-explicit-char I', '"name-varchar2-explicit-byte"': 'name-varchar2-explicit-byte I', '"name-varchar2-explicit-char"': 'name-varchar2-explicit-char I' } insert_record(cur, 'CHICKEN', rec_1) rec_2 = copy.deepcopy(rec_1) rec_2.update({ '"size_number_4_0"': 101, '"our_number_10_2"': decimal.Decimal('101.11') + 1, '"our_double_precision"': our_double_precision + 1, '"our_date"': our_date + datetime.timedelta(days=1), 'NAME_NCHAR': 'name-nchar II' }) insert_record(cur, 'CHICKEN', rec_2) state = {} tap_oracle.do_sync(get_test_conn_config(), catalog, None, state) #messages: ActivateVersion, SchemaMessage, Record, Record, State, ActivateVersion self.assertEqual(7, len(CAUGHT_MESSAGES)) self.assertTrue( isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) state = CAUGHT_MESSAGES[1].value version = state.get('bookmarks', {}).get(chicken_stream.tap_stream_id, {}).get('version') self.assertIsNotNone(version) self.assertEqual(CAUGHT_MESSAGES[2].version, version) self.assertEqual(CAUGHT_MESSAGES[5].version, version) edt = pytz.timezone('America/New_York') expected_rec_1 = { 'ID': 1, 'none_column': None, 'size_number': '0.000001', 'size_number_*': '100.12345', 'size_number_4': 100, 'size_number_4_0': 100, 'size_number_*_0': 2**128, 'size_number_*_38': '0.000001', 'size_number_10_-1': 310, 'size_number_integer': 400, 'size_number_int': 500, 'size_number_smallint': 50000, 'our_number_10_2': '100.11', 'our_number_38_4': '99999999999999999.9999', 'our_double_precision': '1234567.8901234567890123456789012345679', 'our_float': '1234567.8901234567890123456789012345679', 'our_real': '1234567.890123456789', 'our_binary_float': 1234567.875, 'our_binary_double': 1234567.890123, 'our_+_infinity': float('+inf'), 'our_-_infinity': float('-inf'), 'our_date': '1996-06-06T00:00:00.00+00:00', 'our_ts': '1997-02-02T02:02:02.722184+00:00', 'our_ts_tz_edt': '1997-03-03T03:03:03.722184-05:00', 'our_ts_tz_utc': '1997-03-03T03:03:03.722184+00:00', 'our_ts_tz_local': '2018-04-01T17:00:00.000000+00:00', 'name-char-explicit-byte': 'name-char-explicit-byte I ', 'name-char-explicit-char': 'name-char-explicit-char I ', 'NAME_NCHAR': 'name-nchar I ', 'name-nvarchar2': 'name-nvarchar2 I', 'name-varchar-explicit-byte': 'name-varchar-explicit-byte I', 'name-varchar-explicit-char': 'name-varchar-explicit-char I', 'name-varchar2-explicit-byte': 'name-varchar2-explicit-byte I', 'name-varchar2-explicit-char': 'name-varchar2-explicit-char I' } self.assertTrue( math.isnan(CAUGHT_MESSAGES[3].record.get('our_nan'))) CAUGHT_MESSAGES[3].record.pop('our_nan') self.assertEqual(CAUGHT_MESSAGES[3].record, expected_rec_1) expected_rec_2 = expected_rec_1 expected_rec_2.update({ 'ID': 2, 'size_number_4_0': 101, 'our_number_10_2': '102.11', 'our_double_precision': '1234568.890123456789012345679', 'our_date': '1996-06-07T00:00:00.00+00:00', 'NAME_NCHAR': 'name-nchar II ' }) self.assertTrue( math.isnan(CAUGHT_MESSAGES[4].record.get('our_nan'))) CAUGHT_MESSAGES[4].record.pop('our_nan') self.assertEqual(CAUGHT_MESSAGES[4].record, expected_rec_2) #run another do_sync CAUGHT_MESSAGES.clear() tap_oracle.do_sync(get_test_conn_config(), catalog, None, state) self.assertEqual(6, len(CAUGHT_MESSAGES)) self.assertTrue( isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertTrue( isinstance(CAUGHT_MESSAGES[4], singer.ActivateVersionMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) nascent_version = CAUGHT_MESSAGES[1].value.get( 'bookmarks', {}).get(chicken_stream.tap_stream_id, {}).get('version') self.assertTrue(nascent_version > version)
def test_catalog(self): singer.write_message = singer_write_message_no_cow conn_config = get_test_conn_config() catalog = tap_oracle.do_discovery(conn_config, []) cow_stream = [s for s in catalog.streams if s.table == 'COW'][0] self.assertIsNotNone(cow_stream) cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, 'LOG_BASED') with get_test_connection() as conn: conn.autocommit = True cur = conn.cursor() cow_rec = {'name': 'betty', 'colour': 'blue'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name': 'smelly', 'colour': 'brow'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name': 'pooper', 'colour': 'green'} insert_record(cur, 'COW', cow_rec) state = {} #the initial phase of cows logical replication will be a full table. #it will sync the first record and then blow up on the 2nd record try: tap_oracle.do_sync(get_test_conn_config(), catalog, None, state) except Exception as ex: blew_up_on_cow = True self.assertTrue(blew_up_on_cow) self.assertEqual(7, len(CAUGHT_MESSAGES)) # import pdb # pdb.set_trace() self.assertTrue(isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertEqual(CAUGHT_MESSAGES[1].value['currently_syncing'], 'ROOT-COW') self.assertIsNotNone( CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW']['version']) self.assertEqual( CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'] ['last_replication_method'], 'LOG_BASED') self.assertIsNone(CAUGHT_MESSAGES[1].value['bookmarks'] ['ROOT-COW'].get('ORA_ROWSCN')) self.assertIsNotNone( CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'].get('scn')) end_scn = CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'].get('scn') first_version = CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'].get( 'version') self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[2].version, first_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[3].record, { 'NAME': 'betty', 'ID': 1, 'COLOUR': 'blue' }) self.assertEqual('ROOT-COW', CAUGHT_MESSAGES[3].stream) self.assertEqual(first_version, CAUGHT_MESSAGES[3].version) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)) #ORA_ROWSCN is set while we are processing the full table replication self.assertIsNotNone( CAUGHT_MESSAGES[4].value['bookmarks']['ROOT-COW']['ORA_ROWSCN']) self.assertEqual( CAUGHT_MESSAGES[4].value['bookmarks']['ROOT-COW']['scn'], end_scn) self.assertEqual( first_version, CAUGHT_MESSAGES[4].value['bookmarks']['ROOT-COW']['version']) self.assertEqual(CAUGHT_MESSAGES[5].record['NAME'], 'smelly') self.assertEqual('ROOT-COW', CAUGHT_MESSAGES[5].stream) self.assertEqual(first_version, CAUGHT_MESSAGES[5].version) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) self.assertEqual( first_version, CAUGHT_MESSAGES[6].value['bookmarks']['ROOT-COW']['version']) last_ora_rowscn = CAUGHT_MESSAGES[6].value['bookmarks']['ROOT-COW'][ 'ORA_ROWSCN'] old_state = CAUGHT_MESSAGES[6].value #run another do_sync, should get the remaining record which effectively finishes the initial full_table #replication portion of the logical replication singer.write_message = singer_write_message_ok global COW_RECORD_COUNT COW_RECORD_COUNT = 0 CAUGHT_MESSAGES.clear() tap_oracle.do_sync(get_test_conn_config(), catalog, None, old_state) self.assertEqual(8, len(CAUGHT_MESSAGES)) self.assertTrue(isinstance(CAUGHT_MESSAGES[0], singer.SchemaMessage)) self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertEqual( CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'].get( 'ORA_ROWSCN'), last_ora_rowscn) self.assertEqual( CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'].get('scn'), end_scn) self.assertEqual( CAUGHT_MESSAGES[1].value['bookmarks']['ROOT-COW'].get('version'), first_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[2].record, { 'COLOUR': 'brow', 'ID': 2, 'NAME': 'smelly' }) self.assertEqual('ROOT-COW', CAUGHT_MESSAGES[2].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)) self.assertTrue( CAUGHT_MESSAGES[3].value['bookmarks']['ROOT-COW'].get( 'ORA_ROWSCN'), last_ora_rowscn) self.assertEqual( CAUGHT_MESSAGES[3].value['bookmarks']['ROOT-COW'].get('scn'), end_scn) self.assertEqual( CAUGHT_MESSAGES[3].value['bookmarks']['ROOT-COW'].get('version'), first_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[4].record['NAME'], 'pooper') self.assertEqual('ROOT-COW', CAUGHT_MESSAGES[4].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) self.assertTrue(CAUGHT_MESSAGES[5].value['bookmarks']['ROOT-COW'].get( 'ORA_ROWSCN') > last_ora_rowscn) self.assertEqual( CAUGHT_MESSAGES[5].value['bookmarks']['ROOT-COW'].get('scn'), end_scn) self.assertEqual( CAUGHT_MESSAGES[5].value['bookmarks']['ROOT-COW'].get('version'), first_version) self.assertTrue( isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[6].version, first_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[7].value['bookmarks'] ['ROOT-COW'].get('ORA_ROWSCN')) self.assertEqual( CAUGHT_MESSAGES[7].value['bookmarks']['ROOT-COW'].get('scn'), end_scn) self.assertEqual( CAUGHT_MESSAGES[7].value['bookmarks']['ROOT-COW'].get('version'), first_version)
def test_catalog(self, mock_connect, use_secondary): singer.write_message = singer_write_message_no_cow pg_common.write_schema_message = singer_write_message_ok conn_config = get_test_connection_config(use_secondary=use_secondary) streams = tap_postgres.do_discovery(conn_config) # Assert that we connected to the correct database expected_connection = { 'application_name': unittest.mock.ANY, 'dbname': unittest.mock.ANY, 'user': unittest.mock.ANY, 'password': unittest.mock.ANY, 'connect_timeout': unittest.mock.ANY, 'host': conn_config['secondary_host'] if use_secondary else conn_config['host'], 'port': conn_config['secondary_port'] if use_secondary else conn_config['port'], } mock_connect.assert_called_once_with(**expected_connection) mock_connect.reset_mock() cow_stream = [s for s in streams if s['table_name'] == 'COW'][0] assert cow_stream is not None cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, 'LOG_BASED') conn = get_test_connection() conn.autocommit = True with conn.cursor() as cur: cow_rec = { 'name': 'betty', 'colour': 'blue', 'timestamp_ntz': '2020-09-01 10:40:59', 'timestamp_tz': '2020-09-01 00:50:59+02' } insert_record(cur, 'COW', cow_rec) cow_rec = { 'name': 'smelly', 'colour': 'brow', 'timestamp_ntz': '2020-09-01 10:40:59 BC', 'timestamp_tz': '2020-09-01 00:50:59+02 BC' } insert_record(cur, 'COW', cow_rec) cow_rec = { 'name': 'pooper', 'colour': 'green', 'timestamp_ntz': '30000-09-01 10:40:59', 'timestamp_tz': '10000-09-01 00:50:59+02' } insert_record(cur, 'COW', cow_rec) conn.close() blew_up_on_cow = False state = {} #the initial phase of cows logical replication will be a full table. #it will sync the first record and then blow up on the 2nd record try: tap_postgres.do_sync( get_test_connection_config(use_secondary=use_secondary), {'streams': streams}, None, state) except Exception: blew_up_on_cow = True assert blew_up_on_cow is True mock_connect.assert_called_with(**expected_connection) mock_connect.reset_mock() assert 7 == len(CAUGHT_MESSAGES) assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA' assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage) assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get( 'xmin') is None assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get( 'lsn') is not None end_lsn = CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get( 'lsn') assert isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage) new_version = CAUGHT_MESSAGES[2].version assert isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage) assert CAUGHT_MESSAGES[3].record == { 'colour': 'blue', 'id': 1, 'name': 'betty', 'timestamp_ntz': '2020-09-01T10:40:59+00:00', 'timestamp_tz': '2020-08-31T22:50:59+00:00' } assert 'public-COW' == CAUGHT_MESSAGES[3].stream assert isinstance(CAUGHT_MESSAGES[4], singer.StateMessage) #xmin is set while we are processing the full table replication assert CAUGHT_MESSAGES[4].value['bookmarks']['public-COW'][ 'xmin'] is not None assert CAUGHT_MESSAGES[4].value['bookmarks']['public-COW'][ 'lsn'] == end_lsn assert CAUGHT_MESSAGES[5].record == { 'colour': 'brow', 'id': 2, 'name': 'smelly', 'timestamp_ntz': '9999-12-31T23:59:59.999000+00:00', 'timestamp_tz': '9999-12-31T23:59:59.999000+00:00' } assert 'public-COW' == CAUGHT_MESSAGES[5].stream assert isinstance(CAUGHT_MESSAGES[6], singer.StateMessage) last_xmin = CAUGHT_MESSAGES[6].value['bookmarks']['public-COW']['xmin'] old_state = CAUGHT_MESSAGES[6].value #run another do_sync, should get the remaining record which effectively finishes the initial full_table #replication portion of the logical replication singer.write_message = singer_write_message_ok global COW_RECORD_COUNT COW_RECORD_COUNT = 0 CAUGHT_MESSAGES.clear() tap_postgres.do_sync( get_test_connection_config(use_secondary=use_secondary), {'streams': streams}, None, old_state) mock_connect.assert_called_with(**expected_connection) mock_connect.reset_mock() assert 8 == len(CAUGHT_MESSAGES) assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA' assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage) assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get( 'xmin') == last_xmin assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get( 'lsn') == end_lsn assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'].get( 'version') == new_version assert isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage) assert CAUGHT_MESSAGES[2].record == { 'colour': 'brow', 'id': 2, 'name': 'smelly', 'timestamp_ntz': '9999-12-31T23:59:59.999000+00:00', 'timestamp_tz': '9999-12-31T23:59:59.999000+00:00' } assert 'public-COW' == CAUGHT_MESSAGES[2].stream assert isinstance(CAUGHT_MESSAGES[3], singer.StateMessage) assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'].get( 'xmin'), last_xmin assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'].get( 'lsn') == end_lsn assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'].get( 'version') == new_version assert isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage) assert CAUGHT_MESSAGES[4].record == { 'colour': 'green', 'id': 3, 'name': 'pooper', 'timestamp_ntz': '9999-12-31T23:59:59.999000+00:00', 'timestamp_tz': '9999-12-31T23:59:59.999000+00:00' } assert 'public-COW' == CAUGHT_MESSAGES[4].stream assert isinstance(CAUGHT_MESSAGES[5], singer.StateMessage) assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'].get( 'xmin') > last_xmin assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'].get( 'lsn') == end_lsn assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'].get( 'version') == new_version assert isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage) assert CAUGHT_MESSAGES[6].version == new_version assert isinstance(CAUGHT_MESSAGES[7], singer.StateMessage) assert CAUGHT_MESSAGES[7].value['bookmarks']['public-COW'].get( 'xmin') is None assert CAUGHT_MESSAGES[7].value['bookmarks']['public-COW'].get( 'lsn') == end_lsn assert CAUGHT_MESSAGES[7].value['bookmarks']['public-COW'].get( 'version') == new_version
def test_catalog(self, mock_connect, use_secondary): singer.write_message = singer_write_message_no_cow pg_common.write_schema_message = singer_write_message_ok conn_config = get_test_connection_config(use_secondary=use_secondary) streams = tap_postgres.do_discovery(conn_config) # Assert that we connected to the correct database expected_connection = { 'application_name': unittest.mock.ANY, 'dbname': unittest.mock.ANY, 'user': unittest.mock.ANY, 'password': unittest.mock.ANY, 'connect_timeout': unittest.mock.ANY, 'host': conn_config['secondary_host'] if use_secondary else conn_config['host'], 'port': conn_config['secondary_port'] if use_secondary else conn_config['port'], } mock_connect.assert_called_once_with(**expected_connection) mock_connect.reset_mock() cow_stream = [s for s in streams if s['table_name'] == 'COW'][0] assert cow_stream is not None cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, 'FULL_TABLE') chicken_stream = [s for s in streams if s['table_name'] == 'CHICKEN'][0] assert chicken_stream is not None chicken_stream = select_all_of_stream(chicken_stream) chicken_stream = set_replication_method_for_stream( chicken_stream, 'FULL_TABLE') conn = get_test_connection() conn.autocommit = True with conn.cursor() as cur: cow_rec = {'name': 'betty', 'colour': 'blue'} insert_record(cur, 'COW', {'name': 'betty', 'colour': 'blue'}) cow_rec = {'name': 'smelly', 'colour': 'brow'} insert_record(cur, 'COW', cow_rec) cow_rec = {'name': 'pooper', 'colour': 'green'} insert_record(cur, 'COW', cow_rec) chicken_rec = {'name': 'fred', 'colour': 'red'} insert_record(cur, 'CHICKEN', chicken_rec) conn.close() state = {} blew_up_on_cow = False #this will sync the CHICKEN but then blow up on the COW try: tap_postgres.do_sync( get_test_connection_config(use_secondary=use_secondary), {'streams': streams}, None, state) except Exception as ex: # LOGGER.exception(ex) blew_up_on_cow = True assert blew_up_on_cow mock_connect.assert_called_with(**expected_connection) mock_connect.reset_mock() assert 14 == len(CAUGHT_MESSAGES) assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA' assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage) assert CAUGHT_MESSAGES[1].value['bookmarks']['public-CHICKEN'].get( 'xmin') is None assert isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage) new_version = CAUGHT_MESSAGES[2].version assert isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage) assert 'public-CHICKEN' == CAUGHT_MESSAGES[3].stream assert isinstance(CAUGHT_MESSAGES[4], singer.StateMessage) #xmin is set while we are processing the full table replication assert CAUGHT_MESSAGES[4].value['bookmarks']['public-CHICKEN'][ 'xmin'] is not None assert isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage) assert CAUGHT_MESSAGES[5].version == new_version assert isinstance(CAUGHT_MESSAGES[6], singer.StateMessage) assert None == singer.get_currently_syncing(CAUGHT_MESSAGES[6].value) #xmin is cleared at the end of the full table replication assert CAUGHT_MESSAGES[6].value['bookmarks']['public-CHICKEN'][ 'xmin'] is None #cow messages assert CAUGHT_MESSAGES[7]['type'] == 'SCHEMA' assert "public-COW" == CAUGHT_MESSAGES[7]['stream'] assert isinstance(CAUGHT_MESSAGES[8], singer.StateMessage) assert CAUGHT_MESSAGES[8].value['bookmarks']['public-COW'].get( 'xmin') is None assert "public-COW" == CAUGHT_MESSAGES[8].value['currently_syncing'] assert isinstance(CAUGHT_MESSAGES[9], singer.ActivateVersionMessage) cow_version = CAUGHT_MESSAGES[9].version assert isinstance(CAUGHT_MESSAGES[10], singer.RecordMessage) assert CAUGHT_MESSAGES[10].record['name'] == 'betty' assert 'public-COW' == CAUGHT_MESSAGES[10].stream assert isinstance(CAUGHT_MESSAGES[11], singer.StateMessage) #xmin is set while we are processing the full table replication assert CAUGHT_MESSAGES[11].value['bookmarks']['public-COW'][ 'xmin'] is not None assert CAUGHT_MESSAGES[12].record['name'] == 'smelly' assert 'public-COW' == CAUGHT_MESSAGES[12].stream old_state = CAUGHT_MESSAGES[13].value #run another do_sync singer.write_message = singer_write_message_ok CAUGHT_MESSAGES.clear() global COW_RECORD_COUNT COW_RECORD_COUNT = 0 tap_postgres.do_sync( get_test_connection_config(use_secondary=use_secondary), {'streams': streams}, None, old_state) mock_connect.assert_called_with(**expected_connection) mock_connect.reset_mock() assert CAUGHT_MESSAGES[0]['type'] == 'SCHEMA' assert isinstance(CAUGHT_MESSAGES[1], singer.StateMessage) # because we were interrupted, we do not switch versions assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'][ 'version'] == cow_version assert CAUGHT_MESSAGES[1].value['bookmarks']['public-COW'][ 'xmin'] is not None assert "public-COW" == singer.get_currently_syncing( CAUGHT_MESSAGES[1].value) assert isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage) assert CAUGHT_MESSAGES[2].record['name'] == 'smelly' assert 'public-COW' == CAUGHT_MESSAGES[2].stream #after record: activate version, state with no xmin or currently syncing assert isinstance(CAUGHT_MESSAGES[3], singer.StateMessage) #we still have an xmin for COW because are not yet done with the COW table assert CAUGHT_MESSAGES[3].value['bookmarks']['public-COW'][ 'xmin'] is not None assert singer.get_currently_syncing( CAUGHT_MESSAGES[3].value) == 'public-COW' assert isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage) assert CAUGHT_MESSAGES[4].record['name'] == 'pooper' assert 'public-COW' == CAUGHT_MESSAGES[4].stream assert isinstance(CAUGHT_MESSAGES[5], singer.StateMessage) assert CAUGHT_MESSAGES[5].value['bookmarks']['public-COW'][ 'xmin'] is not None assert singer.get_currently_syncing( CAUGHT_MESSAGES[5].value) == 'public-COW' #xmin is cleared because we are finished the full table replication assert isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage) assert CAUGHT_MESSAGES[6].version == cow_version assert isinstance(CAUGHT_MESSAGES[7], singer.StateMessage) assert singer.get_currently_syncing(CAUGHT_MESSAGES[7].value) is None assert CAUGHT_MESSAGES[7].value['bookmarks']['public-CHICKEN'][ 'xmin'] is None assert singer.get_currently_syncing(CAUGHT_MESSAGES[7].value) is None
def test_catalog(self): singer.write_message = singer_write_message_no_cow pg_common.write_schema_message = singer_write_message_ok conn_config = get_test_connection_config() conn_config["emit_state_every_n_rows"] = 1 streams = tap_postgres.do_discovery(conn_config) cow_stream = [s for s in streams if s["table_name"] == "COW"][0] self.assertIsNotNone(cow_stream) cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, "LOG_BASED") with get_test_connection() as conn: conn.autocommit = True cur = conn.cursor() cow_rec = {"name": "betty", "colour": "blue"} insert_record(cur, "COW", cow_rec) cow_rec = {"name": "smelly", "colour": "brow"} insert_record(cur, "COW", cow_rec) cow_rec = {"name": "pooper", "colour": "green"} insert_record(cur, "COW", cow_rec) state = {} # the initial phase of cows logical replication will be a full table. # it will sync the first record and then blow up on the 2nd record try: tap_postgres.do_sync(conn_config, {"streams": streams}, None, state) except Exception: blew_up_on_cow = True self.assertTrue(blew_up_on_cow) self.assertEqual(7, len(CAUGHT_MESSAGES)) self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA") self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[1].value["bookmarks"] ["postgres-public-COW"].get("xmin")) self.assertIsNotNone(CAUGHT_MESSAGES[1].value["bookmarks"] ["postgres-public-COW"].get("lsn")) end_lsn = CAUGHT_MESSAGES[1].value["bookmarks"][ "postgres-public-COW"].get("lsn") self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) new_version = CAUGHT_MESSAGES[2].version self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[3].record, { "colour": "blue", "id": 1, "name": "betty" }) self.assertEqual("COW", CAUGHT_MESSAGES[3].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)) # xmin is set while we are processing the full table replication self.assertIsNotNone(CAUGHT_MESSAGES[4].value["bookmarks"] ["postgres-public-COW"]["xmin"]) self.assertEqual( CAUGHT_MESSAGES[4].value["bookmarks"]["postgres-public-COW"] ["lsn"], end_lsn) self.assertEqual(CAUGHT_MESSAGES[5].record["name"], "smelly") self.assertEqual("COW", CAUGHT_MESSAGES[5].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) last_xmin = CAUGHT_MESSAGES[6].value["bookmarks"][ "postgres-public-COW"]["xmin"] old_state = CAUGHT_MESSAGES[6].value # run another do_sync, should get the remaining record which effectively finishes the # initial full_table replication portion of the logical replication singer.write_message = singer_write_message_ok global COW_RECORD_COUNT COW_RECORD_COUNT = 0 CAUGHT_MESSAGES.clear() tap_postgres.do_sync(conn_config, {"streams": streams}, None, old_state) self.assertEqual(8, len(CAUGHT_MESSAGES)) self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA") self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertEqual( CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"].get( "xmin"), last_xmin) self.assertEqual( CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"].get( "lsn"), end_lsn) self.assertEqual( CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"].get( "version"), new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[2].record, { "colour": "brow", "id": 2, "name": "smelly" }) self.assertEqual("COW", CAUGHT_MESSAGES[2].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)) self.assertTrue( CAUGHT_MESSAGES[3].value["bookmarks"]["postgres-public-COW"].get( "xmin"), last_xmin) self.assertEqual( CAUGHT_MESSAGES[3].value["bookmarks"]["postgres-public-COW"].get( "lsn"), end_lsn) self.assertEqual( CAUGHT_MESSAGES[3].value["bookmarks"]["postgres-public-COW"].get( "version"), new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[4].record["name"], "pooper") self.assertEqual("COW", CAUGHT_MESSAGES[4].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) self.assertTrue(CAUGHT_MESSAGES[5].value["bookmarks"] ["postgres-public-COW"].get("xmin") > last_xmin) self.assertEqual( CAUGHT_MESSAGES[5].value["bookmarks"]["postgres-public-COW"].get( "lsn"), end_lsn) self.assertEqual( CAUGHT_MESSAGES[5].value["bookmarks"]["postgres-public-COW"].get( "version"), new_version) self.assertTrue( isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[6].version, new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[7].value["bookmarks"] ["postgres-public-COW"].get("xmin")) self.assertEqual( CAUGHT_MESSAGES[7].value["bookmarks"]["postgres-public-COW"].get( "lsn"), end_lsn) self.assertEqual( CAUGHT_MESSAGES[7].value["bookmarks"]["postgres-public-COW"].get( "version"), new_version)
def test_catalog(self): singer.write_message = singer_write_message_no_cow pg_common.write_schema_message = singer_write_message_ok conn_config = get_test_connection_config() conn_config["emit_state_every_n_rows"] = 1 streams = tap_postgres.do_discovery(conn_config) cow_stream = [s for s in streams if s["table_name"] == "COW"][0] self.assertIsNotNone(cow_stream) cow_stream = select_all_of_stream(cow_stream) cow_stream = set_replication_method_for_stream(cow_stream, "FULL_TABLE") chicken_stream = [s for s in streams if s["table_name"] == "CHICKEN"][0] self.assertIsNotNone(chicken_stream) chicken_stream = select_all_of_stream(chicken_stream) chicken_stream = set_replication_method_for_stream( chicken_stream, "FULL_TABLE") with get_test_connection() as conn: conn.autocommit = True cur = conn.cursor() cow_rec = {"name": "betty", "colour": "blue"} insert_record(cur, "COW", cow_rec) cow_rec = {"name": "smelly", "colour": "brow"} insert_record(cur, "COW", cow_rec) cow_rec = {"name": "pooper", "colour": "green"} insert_record(cur, "COW", cow_rec) chicken_rec = {"name": "fred", "colour": "red"} insert_record(cur, "CHICKEN", chicken_rec) state = {} # this will sync the CHICKEN but then blow up on the COW try: tap_postgres.do_sync(conn_config, {"streams": streams}, None, state) except Exception: # LOGGER.exception(ex) blew_up_on_cow = True self.assertTrue(blew_up_on_cow) self.assertEqual(14, len(CAUGHT_MESSAGES)) self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA") self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[1].value["bookmarks"] ["postgres-public-CHICKEN"].get("xmin")) self.assertTrue( isinstance(CAUGHT_MESSAGES[2], singer.ActivateVersionMessage)) new_version = CAUGHT_MESSAGES[2].version self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.RecordMessage)) self.assertEqual("CHICKEN", CAUGHT_MESSAGES[3].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.StateMessage)) # xmin is set while we are processing the full table replication self.assertIsNotNone(CAUGHT_MESSAGES[4].value["bookmarks"] ["postgres-public-CHICKEN"]["xmin"]) self.assertTrue( isinstance(CAUGHT_MESSAGES[5], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[5].version, new_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[6], singer.StateMessage)) self.assertEqual( None, singer.get_currently_syncing(CAUGHT_MESSAGES[6].value)) # xmin is cleared at the end of the full table replication self.assertIsNone(CAUGHT_MESSAGES[6].value["bookmarks"] ["postgres-public-CHICKEN"]["xmin"]) # cow messages self.assertEqual(CAUGHT_MESSAGES[7]["type"], "SCHEMA") self.assertEqual("COW", CAUGHT_MESSAGES[7]["stream"]) self.assertTrue(isinstance(CAUGHT_MESSAGES[8], singer.StateMessage)) self.assertIsNone(CAUGHT_MESSAGES[8].value["bookmarks"] ["postgres-public-COW"].get("xmin")) self.assertEqual("postgres-public-COW", CAUGHT_MESSAGES[8].value["currently_syncing"]) self.assertTrue( isinstance(CAUGHT_MESSAGES[9], singer.ActivateVersionMessage)) cow_version = CAUGHT_MESSAGES[9].version self.assertTrue(isinstance(CAUGHT_MESSAGES[10], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[10].record["name"], "betty") self.assertEqual("COW", CAUGHT_MESSAGES[10].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[11], singer.StateMessage)) # xmin is set while we are processing the full table replication self.assertIsNotNone(CAUGHT_MESSAGES[11].value["bookmarks"] ["postgres-public-COW"]["xmin"]) self.assertEqual(CAUGHT_MESSAGES[12].record["name"], "smelly") self.assertEqual("COW", CAUGHT_MESSAGES[12].stream) old_state = CAUGHT_MESSAGES[13].value # run another do_sync singer.write_message = singer_write_message_ok CAUGHT_MESSAGES.clear() global COW_RECORD_COUNT COW_RECORD_COUNT = 0 tap_postgres.do_sync(conn_config, {"streams": streams}, None, old_state) self.assertEqual(CAUGHT_MESSAGES[0]["type"], "SCHEMA") self.assertTrue(isinstance(CAUGHT_MESSAGES[1], singer.StateMessage)) # because we were interrupted, we do not switch versions self.assertEqual( CAUGHT_MESSAGES[1].value["bookmarks"]["postgres-public-COW"] ["version"], cow_version) self.assertIsNotNone(CAUGHT_MESSAGES[1].value["bookmarks"] ["postgres-public-COW"]["xmin"]) self.assertEqual( "postgres-public-COW", singer.get_currently_syncing(CAUGHT_MESSAGES[1].value)) self.assertTrue(isinstance(CAUGHT_MESSAGES[2], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[2].record["name"], "smelly") self.assertEqual("COW", CAUGHT_MESSAGES[2].stream) # after record: activate version, state with no xmin or currently syncing self.assertTrue(isinstance(CAUGHT_MESSAGES[3], singer.StateMessage)) # we still have an xmin for COW because are not yet done with the COW table self.assertIsNotNone(CAUGHT_MESSAGES[3].value["bookmarks"] ["postgres-public-COW"]["xmin"]) self.assertEqual( singer.get_currently_syncing(CAUGHT_MESSAGES[3].value), "postgres-public-COW") self.assertTrue(isinstance(CAUGHT_MESSAGES[4], singer.RecordMessage)) self.assertEqual(CAUGHT_MESSAGES[4].record["name"], "pooper") self.assertEqual("COW", CAUGHT_MESSAGES[4].stream) self.assertTrue(isinstance(CAUGHT_MESSAGES[5], singer.StateMessage)) self.assertIsNotNone(CAUGHT_MESSAGES[5].value["bookmarks"] ["postgres-public-COW"]["xmin"]) self.assertEqual( singer.get_currently_syncing(CAUGHT_MESSAGES[5].value), "postgres-public-COW") # xmin is cleared because we are finished the full table replication self.assertTrue( isinstance(CAUGHT_MESSAGES[6], singer.ActivateVersionMessage)) self.assertEqual(CAUGHT_MESSAGES[6].version, cow_version) self.assertTrue(isinstance(CAUGHT_MESSAGES[7], singer.StateMessage)) self.assertIsNone( singer.get_currently_syncing(CAUGHT_MESSAGES[7].value)) self.assertIsNone(CAUGHT_MESSAGES[7].value["bookmarks"] ["postgres-public-CHICKEN"]["xmin"]) self.assertIsNone( singer.get_currently_syncing(CAUGHT_MESSAGES[7].value))