def run(env, run_pipeline, sql_templates_path, bigquery): start = datetime.datetime.now() end = start + datetime.timedelta(seconds=1) start = start.strftime('%Y-%m-%d %H:%M:%S') + ' UTC' end = end.strftime('%Y-%m-%d %H:%M:%S') + ' UTC' FactoryRegistry.create_multiple( LakeTreeUserFactory, 2, [{ 'icentris_client': 'bluesun', 'leo_eid': 'z/1970/01/01/00/00/0000000000001-0000001', 'ingestion_timestamp': f'{str(end)} UTC' }, { 'icentris_client': 'worldventures', 'leo_eid': 'z/1970/01/01/00/00/0000000000001-0000001', 'ingestion_timestamp': f'{str(end)} UTC' }]) FactoryRegistry.create_multiple(LakeUserFactory, 2, [ { 'tree_user_id': FactoryRegistry.registry['LakeTreeUserFactory'][0]['id'], 'icentris_client': FactoryRegistry.registry['LakeTreeUserFactory'][0] ['icentris_client'] }, { 'tree_user_id': FactoryRegistry.registry['LakeTreeUserFactory'][1]['id'], 'icentris_client': FactoryRegistry.registry['LakeTreeUserFactory'][1] ['icentris_client'] }, ]) seeds = [('lake', [('tree_users', FactoryRegistry.registry['LakeTreeUserFactory']), ('users', FactoryRegistry.registry['LakeUserFactory']), ('tree_user_types', FactoryRegistry.registry['LakeTreeUserTypeFactory']), ('pyr_rank_definitions', FactoryRegistry.registry['LakePyrRankDefinitionFactory']), ('tree_user_statuses', FactoryRegistry.registry['LakeTreeUserStatusFactory'])])] checkpoint = { 'first_ingestion_timestamp': start, 'last_ingestion_timestamp': end } sql = parse_template(f'{sql_templates_path}/lake_to_pii.users.sql', **checkpoint) run_pipeline(Runner, RuntimeOptions(['--env', env['env'], '--query', sql]), seeds) rs = bigquery.query( f'select * from pii.users WHERE ingestion_timestamp >= "{start}"') return rs
def test_normalize_user_type_params(): if 'LakeTreeUserFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeUserFactory'] FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client) user = FactoryRegistry.registry['LakeTreeUserFactory'][0] user['foo'] = 'Distributor' with TestPipeline() as p: pcoll = (p | beam.Create([user]) | beam.ParDo( WorldVenturesNormalizeUserType(in_name='foo', out_name='bar')) | beam.Map(lambda x: {'bar': x['bar']})) assert_that(pcoll, equal_to([{'bar': 'Distributor'}]))
def test_order_type_autoship(): if 'LakeTreeUserFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeUserFactory'] if 'LakeTreeOrderFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeOrderFactory'] FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client) FactoryRegistry.create(LakeTreeOrderFactory, icentris_client=client) order = FactoryRegistry.registry['LakeTreeOrderFactory'][0] order['type'] = 'Autoship' with TestPipeline() as p: pcoll = (p | beam.Create([order]) | beam.ParDo(WorldVenturesNormalizeOrderType()) | beam.Map(lambda x: {'type': x['type']})) assert_that(pcoll, equal_to([{'type': 'Autoship'}]))
def test_order_type_distributor_wholesale(): if 'LakeTreeUserFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeUserFactory'] if 'LakeTreeOrderFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeOrderFactory'] FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client) FactoryRegistry.create(LakeTreeOrderFactory, icentris_client=client) order = FactoryRegistry.registry['LakeTreeOrderFactory'][0] order['type'] = 'Distributor' order['created'] = '2020-04-17 14:03:50' order['order_date'] = '2020-04-17 14:03:50' with TestPipeline() as p: pcoll = (p | beam.Create([order]) | beam.ParDo(WorldVenturesNormalizeOrderType()) | beam.Map(lambda x: {'type': x['type']})) assert_that(pcoll, equal_to([{'type': 'Wholesale'}]))
def test_orders_transform(): if 'LakeTreeUserFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeUserFactory'] if 'LakeTreeOrderFactory' in FactoryRegistry.registry: del FactoryRegistry.registry['LakeTreeOrderFactory'] FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client) FactoryRegistry.create(LakeTreeOrderFactory, icentris_client=client) FactoryRegistry.create(LakeTreeOrderStatusFactory, icentris_client=client) order = FactoryRegistry.registry['LakeTreeOrderFactory'][0] status = FactoryRegistry.registry['LakeTreeOrderStatusFactory'][0] order['sponsor_id'] = None order['client_user_type'] = 'Distributor' order['client_status'] = status['description'] order['created'] = '2020-04-17 14:03:50' order['order_date'] = '2020-04-17 14:03:50' with TestPipeline() as p: pcoll = (p | beam.Create([order]) | WorldVenturesStagingOrdersTransform() | beam.Map(lambda x: {'type': x['type']})) assert_that(pcoll, equal_to([{'type': 'Wholesale'}]))
def run(env, run_pipeline, sql_templates_path, bigquery): FactoryRegistry.create_multiple( LakeTreeUserFactory, 5, [{ 'id': 1, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 1, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 1, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/03/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-03-01 00:00:00.0 UTC' }]) FactoryRegistry.create_multiple( LakePyrContactsFactory, 6, [{ 'id': 1, 'tree_user_id': 1, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'tree_user_id': 2, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 1, 'tree_user_id': 1, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'tree_user_id': 2, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'tree_user_id': 2, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/02/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-02-01 00:00:00.0 UTC' }, { 'id': 1, 'tree_user_id': 1, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-03-01 00:00:01.0 UTC' }]) FactoryRegistry.create_multiple( LakePyrContactCategoriesFactory, 4, [{ 'id': 1, 'category_name': 'bluesun One', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'category_name': 'bluesun Two', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 1, 'category_name': 'WV One', 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 1, 'category_name': 'bluesun One-2020', 'icentris_client': 'bluesun', 'leo_eid': 'z/2020/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2020-01-01 00:00:00.0 UTC' }]) FactoryRegistry.create_multiple( LakePyrContactsContactCategoriesFactory, 5, [{ 'id': 1, 'contact_id': 1, 'contact_category_id': 1, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'contact_id': 2, 'contact_category_id': 1, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 3, 'contact_id': 1, 'contact_category_id': 1, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 4, 'contact_id': 2, 'contact_category_id': 1, 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 5, 'contact_id': 1, 'contact_category_id': 2, 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/04/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-04-01 00:00:00.0 UTC' }]) FactoryRegistry.create_multiple( LakePyrContactEmailsFactory, 5, [{ 'id': 1, 'contact_id': 1, 'email': '*****@*****.**', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'contact_id': 2, 'email': '*****@*****.**', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 3, 'contact_id': 1, 'email': '*****@*****.**', 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 4, 'contact_id': 2, 'email': '*****@*****.**', 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 5, 'contact_id': 1, 'email': '*****@*****.**', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/05/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-05-01 00:00:01.0 UTC' }]) FactoryRegistry.create_multiple( LakePyrContactPhoneNumbersFactory, 5, [{ 'id': 1, 'contact_id': 1, 'phone_number': '111-111-1111', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 2, 'contact_id': 2, 'phone_number': '222-222-2222', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 3, 'contact_id': 1, 'phone_number': '1-111-111-1111', 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 4, 'contact_id': 2, 'phone_number': '2-222-222-2222', 'icentris_client': 'worldventures', 'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC' }, { 'id': 5, 'contact_id': 1, 'phone_number': '333-333-3333', 'icentris_client': 'bluesun', 'leo_eid': 'z/2019/06/01/00/00/0000000000000-0000000', 'ingestion_timestamp': '2019-06-01 00:00:00.0 UTC' }]) seeds = [('lake', [ ('tree_users', FactoryRegistry.registry['LakeTreeUserFactory']), ('pyr_contacts', FactoryRegistry.registry['LakePyrContactsFactory']), ('pyr_contact_categories', FactoryRegistry.registry['LakePyrContactCategoriesFactory']), ('pyr_contacts_contact_categories', FactoryRegistry.registry['LakePyrContactsContactCategoriesFactory']), ('pyr_contact_emails', FactoryRegistry.registry['LakePyrContactEmailsFactory']), ('pyr_contact_phone_numbers', FactoryRegistry.registry['LakePyrContactPhoneNumbersFactory']) ])] checkpoint = { 'first_ingestion_timestamp': '1970-01-01 00:00:00.0 UTC', 'last_ingestion_timestamp': '2019-01-01 00:00:00.0 UTC', "first_eid": 'z/1970/01/01/00/00/0000000000000-0000000', "last_eid": 'z/2019/01/01/00/00/0000000000000-0000000' } sql = parse_template(f'{sql_templates_path}/lake_to_staging.contacts.sql', **checkpoint) run_pipeline(Runner, RuntimeOptions(['--env', env['env'], '--query', sql]), seeds) it = datetime.datetime.fromisoformat('2019-01-01 00:00:00') rs = bigquery.query( f'select * from staging.contacts WHERE ingestion_timestamp >= "{it}"') return rs