def create_view(view_name, dest_conn=None): """ Checks if view exists before getting a sample json and creating a view based on the table_name :param view_name: table to sample from...also name of destination view to be created :param dest_conn: Destination connection :param src_conn: Source connection :return: True if view created, false if not """ view_count_sql = view_count_query(view_name) view_count = get_rows(view_count_sql, dest_conn)[0].count if view_count == 0: sample_rows = sample_rows_query(view_name) rows = get_rows(sample_rows, dest_conn) if len(rows) > 0: print(rows[0]) sample_json = rows[0].fields_json['f1'] fields = "" for k, v in sample_json.items(): fields += "fields #>> '{f1,%s}' as %s, " % (k, k) fields = fields[:-2] print('Creating view %s' % view_name) view_sql = view_create_query(D_TABLE, fields, view_name) execute(view_sql, dest_conn) return True return False
def test_get_rows_with_parameters(pgtestdb_test_tables, pgtestdb_conn, test_table_data): # parameters=None is tested by default in other tests # Bind by index sql = "SELECT * FROM src where ID = %s" result = get_rows(sql, pgtestdb_conn, parameters=(1, )) assert result == [test_table_data[0]] # Bind by name sql = "SELECT * FROM src where ID = %(identifier)s" result = get_rows(sql, pgtestdb_conn, parameters={'identifier': 1}) assert result == [test_table_data[0]]
def test_copy_table_rows_on_error(test_tables, testdb_conn, test_table_data): # Arrange duplicate_id_row_sql = """ INSERT INTO dest (id) VALUES ( 1 )""".strip() execute(duplicate_id_row_sql, testdb_conn) # Act errors = [] copy_table_rows('src', testdb_conn, testdb_conn, target='dest', on_error=errors.extend) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Check that first row was caught as error row, exception = errors[0] assert row.id == 1 assert "unique" in str(exception).lower() # Check that other rows were inserted correctly assert result[1:] == test_table_data[1:]
def test_insert_rows_on_error(pgtestdb_conn, pgtestdb_test_tables, pgtestdb_insert_sql, test_table_data, commit_chunks): # Parameterized to ensure success with and without commit_chunks # Arrange insert_sql = pgtestdb_insert_sql.replace('src', 'dest') # Create duplicated rows to data that will fail primary key check duplicated_rows = test_table_data * 2 # Act errors = [] executemany(insert_sql, pgtestdb_conn, duplicated_rows, on_error=errors.extend, commit_chunks=commit_chunks) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, pgtestdb_conn) assert result == test_table_data # Assert full set of failed rows failing unique constraint failed_rows, exceptions = zip(*errors) assert set(failed_rows) == set(test_table_data) assert all(['unique' in str(e).lower() for e in exceptions])
def test_copy_rows_with_dict_row_factory(pgtestdb_conn, pgtestdb_test_tables, pgtestdb_insert_sql, test_table_data): # Arrange select_sql = "SELECT * FROM src" insert_sql = """ INSERT INTO dest (id, value, simple_text, utf8_text, day, date_time) VALUES ( %(id)s, %(value)s, %(simple_text)s, %(utf8_text)s, %(day)s, %(date_time)s ); """ expected = [(1001, 1.234, 'TEXT', 'Öæ° z', date(2018, 12, 7), datetime(2018, 12, 7, 13, 1, 59)), (1002, 2.234, 'TEXT', 'Öæ° z', date(2018, 12, 8), datetime(2018, 12, 8, 13, 1, 59)), (1003, 2.234, 'TEXT', 'Öæ° z', date(2018, 12, 9), datetime(2018, 12, 9, 13, 1, 59))] # Act copy_rows(select_sql, pgtestdb_conn, insert_sql, pgtestdb_conn, transform=transform_modify_dict, row_factory=dict_row_factory) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, pgtestdb_conn) assert result == expected
def test_get_rows_with_parameters(test_tables, testdb_conn, test_table_data): # parameters=None is tested by default in other tests # Bind by index sql = "SELECT * FROM src where ID = ?" result = get_rows(sql, testdb_conn, parameters=(1, )) assert len(result) == 1 assert result[0].id == 1
def test_load_named_tuples(testdb_conn, test_tables, test_table_data): # Act load('dest', testdb_conn, test_table_data) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) assert result == test_table_data
def get_rows_func(query, conn): if engine == ENGINES['mysql']: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() return rows rows = get_rows(query, conn) return rows
def test_copy_table_rows_happy_path(test_tables, testdb_conn, test_table_data): # Arrange and act copy_table_rows('src', testdb_conn, testdb_conn, target='dest') # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) assert result == test_table_data
def test_load_named_tuples_chunk_size(pgtestdb_conn, pgtestdb_test_tables, test_table_data, chunk_size): # Act load('dest', pgtestdb_conn, test_table_data, chunk_size=chunk_size) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, pgtestdb_conn) assert result == test_table_data
def test_get_rows_with_transform(pgtestdb_test_tables, pgtestdb_conn): sql = "SELECT * FROM src" def my_transform(rows): # Simple transform function that changes size and number of rows return [row.id for row in rows if row.id > 1] result = get_rows(sql, pgtestdb_conn, transform=my_transform) assert result == [2, 3]
def test_execute_happy_path(pgtestdb_test_tables, pgtestdb_conn): # Arrange sql = "DELETE FROM src;" # Act execute(sql, pgtestdb_conn) # Assert result = get_rows('SELECT * FROM src;', pgtestdb_conn) assert result == []
def test_copy_table_rows_happy_path_fast_true(test_tables, testdb_conn, testdb_conn2, test_table_data): # Note: ODBC driver requires separate connections for source and destination, # even if they are the same database. # Arrange and act copy_table_rows('src', testdb_conn, testdb_conn2, target='dest') # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) assert result == test_table_data
def test_load_dicts(pgtestdb_conn, pgtestdb_test_tables, test_table_data): # Arrange data_as_dicts = [row._asdict() for row in test_table_data] # Act load('dest', pgtestdb_conn, data_as_dicts) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, pgtestdb_conn) assert result == test_table_data
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data): # Arrange and act select_sql = "SELECT * FROM src" insert_sql = INSERT_SQL.format(tablename='dest') copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) assert result == test_table_data
def copy_src_to_dest(): delete_sql = table_delete_query(D_TABLE) # USE THIS TO CLEAR DESTINATION FOR IDEMPOTENCE src_conn = get_source_connection() print('Connected to source') dest_conn = get_destination_connection() print('Connected to destination') s_tables = [] if DCLEAR: execute(delete_sql, dest_conn) print('Cleared destination') if engine == ENGINES['mysql']: mysql_copy_src_to_dest(src_conn, s_tables, dest_conn) else: if S_TABLES == '__all__': tables_query = get_tables_query() rows = get_rows(tables_query, src_conn) for row in rows: s_tables.append(row.tablename) else: s_tables = S_TABLES.split(",") for S_DB_TABLE in s_tables: record_count = get_rows("select count(*) from {0}".format(S_DB_TABLE), src_conn)[0].count offset = 0 limit = 50000 print('Copying {0} records from {1}'.format(record_count, S_DB_TABLE)) while record_count > 0: select_sql = table_select_query(S_DB_TABLE, src_conn, offset=offset, limit=limit) print('Table: {} | Records remaining: {} | Limit: {} | Offset: {} '.format(S_DB_TABLE, record_count,limit, offset)) insert_sql = table_insert_query(D_TABLE) copy_rows(select_sql, src_conn, insert_sql, dest_conn) record_count -= limit offset += limit create_view(S_DB_TABLE, dest_conn) refresh_mat_views(dest_conn)
def test_execute_with_params(pgtestdb_test_tables, pgtestdb_conn, test_table_data): # Arrange sql = "DELETE FROM src WHERE id = %s;" params = [1] expected = test_table_data[1:] # Act execute(sql, pgtestdb_conn, parameters=params) # Assert result = get_rows('SELECT * FROM src;', pgtestdb_conn) assert result == expected
def test_copy_rows_happy_path_fast_true(test_tables, testdb_conn, testdb_conn2, test_table_data): # Note: ODBC driver requires separate connections for source and destination, # even if they are the same database. # Arrange and act select_sql = "SELECT * FROM src" insert_sql = INSERT_SQL.format(tablename='dest') copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn2) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) assert result == test_table_data
def test_copy_table_rows_happy_path(test_tables, testdb_conn, test_table_data): # Arrange and act copy_table_rows('src', testdb_conn, testdb_conn, target='dest') # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Fix result date and datetime strings to native classes fixed_dates = [] for row in result: fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME)) assert fixed_dates == test_table_data
def test_insert_rows_chunked(pgtestdb_conn, pgtestdb_test_tables, pgtestdb_insert_sql, test_table_data, monkeypatch, chunk_size): # Arrange monkeypatch.setattr('etlhelper.etl.CHUNKSIZE', chunk_size) insert_sql = pgtestdb_insert_sql.replace('src', 'dest') # Act executemany(insert_sql, pgtestdb_conn, test_table_data) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, pgtestdb_conn) assert result == test_table_data
def refresh_mat_views(dest_conn): """ Checks if refresh_matviews() function is defined in destination database before calling it :param dest_conn: Destination connection :return: True if materialized views refreshed, false if not. """ count_query = "SELECT count(*)FROM information_schema.routines where routine_name = 'refresh_matviews';" counts = get_rows(count_query, dest_conn)[0].count if counts > 0: print('Refreshing materialized views') q = "select * from refresh_matviews();" execute(q, dest_conn) print('Done') return True return False
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data): # Arrange and act select_sql = "SELECT * FROM src" insert_sql = INSERT_SQL.format(tablename='dest') copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Fix result date and datetime strings to native classes fixed_dates = [] for row in result: fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME)) assert fixed_dates == test_table_data
def test_insert_rows_happy_path(pgtestdb_conn, pgtestdb_test_tables, pgtestdb_insert_sql, test_table_data, commit_chunks): # Parameterized to ensure success with and without commit_chunks # Arrange insert_sql = pgtestdb_insert_sql.replace('src', 'dest') # Act executemany(insert_sql, pgtestdb_conn, test_table_data, commit_chunks=commit_chunks) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, pgtestdb_conn) assert result == test_table_data
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data): # Arrange and act select_sql = "SELECT * FROM src" insert_sql = INSERT_SQL.format(tablename='dest') copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Oracle returns date object as datetime, convert test data to compare for i, row in enumerate(test_table_data): row_with_datetimes = [datetime.combine(x, datetime.min.time()) if isinstance(x, date) and not isinstance(x, datetime) else x for x in row] assert result[i] == tuple(row_with_datetimes)
def test_copy_rows_happy_path(test_tables, testdb_conn, test_table_data): # Arrange and act select_sql = "SELECT * FROM src" insert_sql = INSERT_SQL.format(tablename='dest') copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Fix result date and datetime strings to native classes fixed = [] for row in result: fixed.append( (*row[:4], dt.datetime.strptime(row.day, '%Y-%m-%d').date(), dt.datetime.strptime(row.date_time, '%Y-%m-%d %H:%M:%S'))) assert fixed == test_table_data
def test_copy_rows_happy_path_deprecated_tables_fast_true( test_deprecated_tables, testdb_conn, testdb_conn2, test_table_data): # Note: ODBC driver requires separate connections for source and destination, # even if they are the same database. # Arrange and act select_sql = "SELECT * FROM src" insert_sql = INSERT_SQL.format(tablename='dest') with pytest.warns(UserWarning) as record: copy_rows(select_sql, testdb_conn, insert_sql, testdb_conn2) # Assert assert len(record) == 1 assert str( record[0].message).startswith("fast_executemany execution failed") sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) assert result == test_table_data
def test_load_named_tuples(testdb_conn, test_tables, test_table_data): # Arrange # Convert to plain tuples as ORACLE makes column names upper case expected = [tuple(row) for row in test_table_data] # Act load('dest', testdb_conn, test_table_data) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Fix result date and datetime strings to native classes fixed_dates = [] for row in result: fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME)) assert fixed_dates == expected
def test_get_rows_with_modify_dict(pgtestdb_conn, pgtestdb_test_tables): # Arrange select_sql = "SELECT * FROM src LIMIT 1" expected = [{ 'date_time': datetime(2018, 12, 7, 13, 1, 59), 'day': date(2018, 12, 7), 'id': 1001, 'simple_text': 'TEXT', 'utf8_text': 'Öæ° z', 'value': 1.234 }] # Act result = get_rows(select_sql, pgtestdb_conn, row_factory=dict_row_factory, transform=transform_modify_dict) # Assert assert result == expected
def test_copy_table_rows_on_error(test_tables, testdb_conn, test_table_data): # Arrange duplicate_id_row_sql = """ INSERT INTO dest (id, day, date_time) VALUES ( 1, TO_DATE('2003/05/03 21:02:44', 'yyyy/mm/dd hh24:mi:ss'), TO_DATE('2003/05/03 21:02:44', 'yyyy/mm/dd hh24:mi:ss') )""".strip() execute(duplicate_id_row_sql, testdb_conn) # Act errors = [] copy_table_rows('src', testdb_conn, testdb_conn, target='dest', on_error=errors.extend) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Fix result date and datetime strings to native classes fixed_dates = [] for row in result: fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME)) # Check that first row was caught as error, noting that Oracle # changes the case of column names row, exception = errors[0] assert row.ID == 1 assert "unique" in str(exception).lower() # Check that other rows were inserted correctly assert fixed_dates[1:] == test_table_data[1:]
def test_get_rows_happy_path(pgtestdb_test_tables, pgtestdb_conn, test_table_data): sql = "SELECT * FROM src" result = get_rows(sql, pgtestdb_conn) assert result == test_table_data