Exemple #1
0
def run(env, run_pipeline, sql_templates_path, bigquery):
    start = datetime.datetime.now()
    end = start + datetime.timedelta(seconds=1)

    start = start.strftime('%Y-%m-%d %H:%M:%S') + ' UTC'
    end = end.strftime('%Y-%m-%d %H:%M:%S') + ' UTC'

    FactoryRegistry.create_multiple(
        LakeTreeUserFactory, 2, [{
            'icentris_client': 'bluesun',
            'leo_eid': 'z/1970/01/01/00/00/0000000000001-0000001',
            'ingestion_timestamp': f'{str(end)} UTC'
        }, {
            'icentris_client': 'worldventures',
            'leo_eid': 'z/1970/01/01/00/00/0000000000001-0000001',
            'ingestion_timestamp': f'{str(end)} UTC'
        }])
    FactoryRegistry.create_multiple(LakeUserFactory, 2, [
        {
            'tree_user_id':
            FactoryRegistry.registry['LakeTreeUserFactory'][0]['id'],
            'icentris_client':
            FactoryRegistry.registry['LakeTreeUserFactory'][0]
            ['icentris_client']
        },
        {
            'tree_user_id':
            FactoryRegistry.registry['LakeTreeUserFactory'][1]['id'],
            'icentris_client':
            FactoryRegistry.registry['LakeTreeUserFactory'][1]
            ['icentris_client']
        },
    ])

    seeds = [('lake',
              [('tree_users', FactoryRegistry.registry['LakeTreeUserFactory']),
               ('users', FactoryRegistry.registry['LakeUserFactory']),
               ('tree_user_types',
                FactoryRegistry.registry['LakeTreeUserTypeFactory']),
               ('pyr_rank_definitions',
                FactoryRegistry.registry['LakePyrRankDefinitionFactory']),
               ('tree_user_statuses',
                FactoryRegistry.registry['LakeTreeUserStatusFactory'])])]

    checkpoint = {
        'first_ingestion_timestamp': start,
        'last_ingestion_timestamp': end
    }

    sql = parse_template(f'{sql_templates_path}/lake_to_pii.users.sql',
                         **checkpoint)

    run_pipeline(Runner, RuntimeOptions(['--env', env['env'], '--query', sql]),
                 seeds)

    rs = bigquery.query(
        f'select * from pii.users WHERE ingestion_timestamp >= "{start}"')
    return rs
def test_normalize_user_type_params():
    if 'LakeTreeUserFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeUserFactory']

    FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client)
    user = FactoryRegistry.registry['LakeTreeUserFactory'][0]
    user['foo'] = 'Distributor'

    with TestPipeline() as p:
        pcoll = (p | beam.Create([user])
                 | beam.ParDo(
                     WorldVenturesNormalizeUserType(in_name='foo',
                                                    out_name='bar'))
                 | beam.Map(lambda x: {'bar': x['bar']}))
        assert_that(pcoll, equal_to([{'bar': 'Distributor'}]))
def test_order_type_autoship():
    if 'LakeTreeUserFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeUserFactory']

    if 'LakeTreeOrderFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeOrderFactory']

    FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client)
    FactoryRegistry.create(LakeTreeOrderFactory, icentris_client=client)
    order = FactoryRegistry.registry['LakeTreeOrderFactory'][0]
    order['type'] = 'Autoship'

    with TestPipeline() as p:
        pcoll = (p | beam.Create([order])
                 | beam.ParDo(WorldVenturesNormalizeOrderType())
                 | beam.Map(lambda x: {'type': x['type']}))
        assert_that(pcoll, equal_to([{'type': 'Autoship'}]))
def test_order_type_distributor_wholesale():
    if 'LakeTreeUserFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeUserFactory']

    if 'LakeTreeOrderFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeOrderFactory']

    FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client)
    FactoryRegistry.create(LakeTreeOrderFactory, icentris_client=client)
    order = FactoryRegistry.registry['LakeTreeOrderFactory'][0]
    order['type'] = 'Distributor'
    order['created'] = '2020-04-17 14:03:50'
    order['order_date'] = '2020-04-17 14:03:50'

    with TestPipeline() as p:
        pcoll = (p | beam.Create([order])
                 | beam.ParDo(WorldVenturesNormalizeOrderType())
                 | beam.Map(lambda x: {'type': x['type']}))
        assert_that(pcoll, equal_to([{'type': 'Wholesale'}]))
def test_orders_transform():
    if 'LakeTreeUserFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeUserFactory']

    if 'LakeTreeOrderFactory' in FactoryRegistry.registry:
        del FactoryRegistry.registry['LakeTreeOrderFactory']

    FactoryRegistry.create(LakeTreeUserFactory, icentris_client=client)
    FactoryRegistry.create(LakeTreeOrderFactory, icentris_client=client)
    FactoryRegistry.create(LakeTreeOrderStatusFactory, icentris_client=client)
    order = FactoryRegistry.registry['LakeTreeOrderFactory'][0]
    status = FactoryRegistry.registry['LakeTreeOrderStatusFactory'][0]
    order['sponsor_id'] = None
    order['client_user_type'] = 'Distributor'
    order['client_status'] = status['description']
    order['created'] = '2020-04-17 14:03:50'
    order['order_date'] = '2020-04-17 14:03:50'

    with TestPipeline() as p:
        pcoll = (p | beam.Create([order])
                 | WorldVenturesStagingOrdersTransform()
                 | beam.Map(lambda x: {'type': x['type']}))
        assert_that(pcoll, equal_to([{'type': 'Wholesale'}]))
def run(env, run_pipeline, sql_templates_path, bigquery):

    FactoryRegistry.create_multiple(
        LakeTreeUserFactory, 5,
        [{
            'id': 1,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 1,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 1,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/03/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-03-01 00:00:00.0 UTC'
        }])

    FactoryRegistry.create_multiple(
        LakePyrContactsFactory, 6,
        [{
            'id': 1,
            'tree_user_id': 1,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'tree_user_id': 2,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 1,
            'tree_user_id': 1,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'tree_user_id': 2,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'tree_user_id': 2,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/02/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-02-01 00:00:00.0 UTC'
        }, {
            'id': 1,
            'tree_user_id': 1,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-03-01 00:00:01.0 UTC'
        }])

    FactoryRegistry.create_multiple(
        LakePyrContactCategoriesFactory, 4,
        [{
            'id': 1,
            'category_name': 'bluesun One',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'category_name': 'bluesun Two',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 1,
            'category_name': 'WV One',
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 1,
            'category_name': 'bluesun One-2020',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2020/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2020-01-01 00:00:00.0 UTC'
        }])

    FactoryRegistry.create_multiple(
        LakePyrContactsContactCategoriesFactory, 5,
        [{
            'id': 1,
            'contact_id': 1,
            'contact_category_id': 1,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'contact_id': 2,
            'contact_category_id': 1,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 3,
            'contact_id': 1,
            'contact_category_id': 1,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 4,
            'contact_id': 2,
            'contact_category_id': 1,
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 5,
            'contact_id': 1,
            'contact_category_id': 2,
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/04/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-04-01 00:00:00.0 UTC'
        }])

    FactoryRegistry.create_multiple(
        LakePyrContactEmailsFactory, 5,
        [{
            'id': 1,
            'contact_id': 1,
            'email': '*****@*****.**',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'contact_id': 2,
            'email': '*****@*****.**',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 3,
            'contact_id': 1,
            'email': '*****@*****.**',
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 4,
            'contact_id': 2,
            'email': '*****@*****.**',
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 5,
            'contact_id': 1,
            'email': '*****@*****.**',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/05/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-05-01 00:00:01.0 UTC'
        }])

    FactoryRegistry.create_multiple(
        LakePyrContactPhoneNumbersFactory, 5,
        [{
            'id': 1,
            'contact_id': 1,
            'phone_number': '111-111-1111',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 2,
            'contact_id': 2,
            'phone_number': '222-222-2222',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 3,
            'contact_id': 1,
            'phone_number': '1-111-111-1111',
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 4,
            'contact_id': 2,
            'phone_number': '2-222-222-2222',
            'icentris_client': 'worldventures',
            'leo_eid': 'z/2019/01/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-01-01 00:00:00.0 UTC'
        }, {
            'id': 5,
            'contact_id': 1,
            'phone_number': '333-333-3333',
            'icentris_client': 'bluesun',
            'leo_eid': 'z/2019/06/01/00/00/0000000000000-0000000',
            'ingestion_timestamp': '2019-06-01 00:00:00.0 UTC'
        }])

    seeds = [('lake', [
        ('tree_users', FactoryRegistry.registry['LakeTreeUserFactory']),
        ('pyr_contacts', FactoryRegistry.registry['LakePyrContactsFactory']),
        ('pyr_contact_categories',
         FactoryRegistry.registry['LakePyrContactCategoriesFactory']),
        ('pyr_contacts_contact_categories',
         FactoryRegistry.registry['LakePyrContactsContactCategoriesFactory']),
        ('pyr_contact_emails',
         FactoryRegistry.registry['LakePyrContactEmailsFactory']),
        ('pyr_contact_phone_numbers',
         FactoryRegistry.registry['LakePyrContactPhoneNumbersFactory'])
    ])]

    checkpoint = {
        'first_ingestion_timestamp': '1970-01-01 00:00:00.0 UTC',
        'last_ingestion_timestamp': '2019-01-01 00:00:00.0 UTC',
        "first_eid": 'z/1970/01/01/00/00/0000000000000-0000000',
        "last_eid": 'z/2019/01/01/00/00/0000000000000-0000000'
    }

    sql = parse_template(f'{sql_templates_path}/lake_to_staging.contacts.sql',
                         **checkpoint)

    run_pipeline(Runner, RuntimeOptions(['--env', env['env'], '--query', sql]),
                 seeds)

    it = datetime.datetime.fromisoformat('2019-01-01 00:00:00')
    rs = bigquery.query(
        f'select * from staging.contacts WHERE ingestion_timestamp >= "{it}"')
    return rs