Example #1
0
def down(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    table = client.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    if new_schema[1].name == 'commission_user_id':
        new_schema.pop(1)
        client.delete_table(table)
        client.create_table(name=table_name,
                            project=client.client.project,
                            schema=new_schema,
                            dataset=dataset,
                            partition={
                                'type': 'range',
                                'field': 'client_partition_id',
                                'start': 1,
                                'end': 100,
                                'interval': 1
                            },
                            clustering_fields=[
                                'leo_eid:STRING',
                                'ingestion_timestamp:TIMESTAMP'
                            ])

    return dataset
Example #2
0
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.dataset(
        dataset_name
    )  # use me if you are NOT creating a new dataset. -- ndg 2/5/20

    clusters = migration.default_clustering_fields

    del clusters[0]

    clusters.insert(0, 'client_wrench_id:STRING')

    schema = [
        bigquery.SchemaField('entity_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField("tree_user_id", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("prediction", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("client_wrench_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("expirement_name", "STRING"),
        bigquery.SchemaField("processing_datetime", "DATETIME")
    ]

    migration.create_table(name=table_name,
                           project=migration.client.project,
                           schema=schema,
                           dataset=dataset,
                           partition={'type': 'time'},
                           clustering_fields=clusters)

    return dataset
Example #3
0
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.create_dataset(dataset_name)

    parent_dataset = migration.dataset('pyr_bluesun_{}'.format(client.env))
    tbls = migration.client.list_tables(parent_dataset)

    clusters = migration.default_clustering_fields

    clusters.insert(0, 'icentris_client:STRING')

    for item in tbls:
        tbl = migration.client.get_table(item.reference)

        orig = tbl.schema

        new = orig.copy()

        migration.create_table(name=tbl.table_id,
                               project=migration.client.project,
                               schema=new,
                               dataset=dataset,
                               partition={'type': 'time'},
                               clustering_fields=clusters)

    return dataset
Example #4
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    table = client.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    new_schema.insert(
        1,
        bigquery.SchemaField('commission_user_id', 'INTEGER', mode='REQUIRED'))

    client.delete_table(table)
    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=new_schema,
        dataset=dataset,
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'],
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        })
    return dataset
Example #5
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    product_reviews = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('site_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('user_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('tree_user_id', 'INTEGER'),
        bigquery.SchemaField('product_code', 'STRING'),
        bigquery.SchemaField('product_name', 'STRING'),
        bigquery.SchemaField('product_description', 'STRING'),
        bigquery.SchemaField('product_short_description', 'STRING'),
        bigquery.SchemaField('product_available_on', 'DATETIME'),
        bigquery.SchemaField('product_discontinued_on', 'DATETIME'),
        bigquery.SchemaField('product_slug', 'STRING'),
        bigquery.SchemaField('product_avg_rating', 'NUMERIC'),
        bigquery.SchemaField('product_reviews_count', 'INTEGER'),
        bigquery.SchemaField('review_name', 'STRING'),
        bigquery.SchemaField('location', 'STRING'),
        bigquery.SchemaField('rating', 'INTEGER'),
        bigquery.SchemaField('title', 'STRING'),
        bigquery.SchemaField('review', 'STRING'),
        bigquery.SchemaField('approved', 'BOOLEAN'),
        bigquery.SchemaField('created_at', 'DATETIME'),
        bigquery.SchemaField('updated_at', 'DATETIME'),
        bigquery.SchemaField('ip_address', 'STRING'),
        bigquery.SchemaField('show_identifier', 'BOOLEAN'),
    ]

    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=product_reviews,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
def up(client):
    migration = BigQueryMigration(client)

    global dataset_name
    dataset_name = dataset_name.format(client.env)
    wv_ds = migration.create_dataset(dataset_name)
    bs_ds = migration.dataset('pyr_bluesun_{}'.format(client.env))

    ls = client.list_tables(bs_ds)
    for tbl in ls:
        tbl_ref = bs_ds.table(tbl.table_id)
        tbl = client.get_table(tbl_ref)
        migration.create_table(tbl_ref.table_id,
                               tbl_ref.project,
                               wv_ds,
                               schema=tbl.schema)

    return wv_ds
def up(client):
    client = BigQueryMigration(client)

    dataset = client.create_dataset('staging')
    for tbl, schema in schemas.items():
        clusters = ['leo_eid:STRING',  'ingestion_timestamp:TIMESTAMP']

        client.create_table(name=tbl,
                            project=client.client.project, schema=schema, dataset=dataset,
                            partition={
                                'type': 'range',
                                'field':
                                'client_partition_id',
                                'start': 1,
                                'end': 100,
                                'interval': 1},
                            clustering_fields=clusters)
    return dataset
Example #8
0
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    del new_schema[1]
    new_schema.insert(
        0, bigquery.SchemaField('dag_id', 'STRING', mode='REQUIRED'))

    migration.delete_table(table)
    migration.create_table(name=table_name,
                           project=migration.client.project,
                           schema=new_schema,
                           dataset=dataset)
    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    flat_site_visitors = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('site_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('user_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('tree_user_id', 'INTEGER'),
        bigquery.SchemaField('visitor_id', 'STRING'),
        bigquery.SchemaField('last_visit_date', 'DATETIME'),
        bigquery.SchemaField('visit_count', 'INTEGER'),
        bigquery.SchemaField('ipaddress', 'STRING'),
        bigquery.SchemaField('browser_agent', 'STRING'),
        bigquery.SchemaField('created_at', 'DATETIME'),
        bigquery.SchemaField('site_template_id', 'INTEGER'),
        bigquery.SchemaField('active', 'INTEGER'),
        bigquery.SchemaField('third_party_tracking_company', 'STRING'),
        bigquery.SchemaField('tracking_code', 'STRING'),
        bigquery.SchemaField('owner_name', 'STRING'),
        bigquery.SchemaField('email', 'STRING'),
        bigquery.SchemaField('story', 'STRING'),
        bigquery.SchemaField('avatar_file_name', 'STRING')
    ]

    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=flat_site_visitors,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    # In order to use clusters in BigQuery, the must be inside partitions.
    # Even though TIMESTAMP is supported as a partition type,
    # a partition can only be done by date not datetime.  Furthermore, if timestamp is used
    checkpoint_schema = [
        bigquery.SchemaField("table", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("leo_eid", "STRING",
                             mode="REQUIRED"),  # Partition
        bigquery.SchemaField("checkpoint", "TIMESTAMP",
                             mode="REQUIRED"),  # Partition
    ]

    client.create_table(name=table_name,
                        project=client.client.project,
                        schema=checkpoint_schema,
                        dataset=dataset)
    return dataset
Example #11
0
def down(client):
    migration = BigQueryMigration(client)
    dataset = migration.dataset(dataset_name)

    table = migration.client.get_table(dataset.table(table_name))
    orig_schema = table.schema
    new_schema = orig_schema.copy()

    if new_schema[0].name == 'dag_id':
        new_schema.pop(0)

        new_schema.insert(
            1, bigquery.SchemaField('leo_eid', 'STRING', mode="NULLABLE"))

        migration.delete_table(table)
        migration.create_table(name=table_name,
                               project=migration.client.project,
                               schema=new_schema,
                               dataset=dataset)

    return dataset
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.create_dataset(
        dataset_name
    )  # use me if you are creating a new dataset. -- ndg 2/5/20

    for tbl, schema in schemas.items():
        migration.create_table(
            name=tbl,
            project=migration.client.project,
            schema=schema,
            dataset=dataset,
            partition={
                'type': 'range',
                'field': 'client_partition_id',
                'start': 1,
                'end': 100,
                'interval': 1
            },
            clustering_fields=migration.default_clustering_fields)
    return dataset
Example #13
0
def up(client):
    migration = BigQueryMigration(client)
    name = dataset_name + '_{}'.format(client.env)

    with PosixPath('/workspace/bigquery/migrations/bluesun_schema.json').open(
            mode='r') as f:
        tbls = json.loads(f.read())

    dataset = migration.create_dataset(name)

    for tbl, raw in tbls.items():
        schema = []
        for f in raw['fields']:
            schema.append(bigquery.SchemaField(f['name'], f['type'],
                                               f['mode']))

        migration.create_table(name=tbl,
                               project=migration.client.project,
                               schema=schema,
                               dataset=dataset)

    return dataset
Example #14
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.create_dataset('pii')

    pii_schema = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("tree_user_id", "INTEGER"),
        bigquery.SchemaField("first_name", "STRING"),
        bigquery.SchemaField("last_name", "STRING"),
        bigquery.SchemaField("company_name", "STRING"),
        bigquery.SchemaField("email", "STRING"),
        bigquery.SchemaField("phone", "STRING"),
        bigquery.SchemaField("mobile_phone", "STRING"),
        bigquery.SchemaField("street", "STRING"),
        bigquery.SchemaField("city", "STRING"),
        bigquery.SchemaField("state", "STRING"),
        bigquery.SchemaField("country", "STRING"),
        bigquery.SchemaField("birth_date", "DATE"),
        bigquery.SchemaField("gender", "STRING")
    ]

    client.create_table(
        name='users',
        project=client.client.project,
        schema=pii_schema,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    schema = [
        bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("partition_id", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("wrench_id", "STRING", mode="REQUIRED")
    ]

    tbl = client.create_table(name=table_name,
                              project=client.client.project,
                              schema=schema,
                              dataset=dataset)

    client.client.insert_rows(
        client.client.get_table(tbl),
        [('monat', 1, '2c889143-9169-436a-b610-48c8fe31bb87'),
         ('worldventures', 2, 'd7d3e26f-d105-4816-825d-d5858b9cf0d1'),
         ('naturessunshine', 3, '16bcfb48-153a-4c7d-bb65-19074d9edb17')])

    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    contacts = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('owner_tree_user_id', 'INTEGER'),
        bigquery.SchemaField('owner_user_id', 'INTEGER'),
        bigquery.SchemaField('tree_user_id', 'INTEGER'),
        bigquery.SchemaField('user_id', 'INTEGER'),
        bigquery.SchemaField('type', 'INTEGER'),
        bigquery.SchemaField('level_of_interest', 'INTEGER'),
        bigquery.SchemaField('first_name', 'STRING'),
        bigquery.SchemaField('last_name', 'STRING'),
        bigquery.SchemaField('birthday', 'DATE'),
        bigquery.SchemaField('created_at', 'DATETIME'),
        bigquery.SchemaField('updated_at', 'DATETIME'),
        bigquery.SchemaField('is_downline', 'BOOLEAN'),
        bigquery.SchemaField('opt_in', 'BOOLEAN'),
        bigquery.SchemaField('info', 'STRING'),
        bigquery.SchemaField('avatar_file_name', 'STRING'),
        bigquery.SchemaField('avatar_content_type', 'STRING'),
        bigquery.SchemaField('avatar_file_size', 'INTEGER'),
        bigquery.SchemaField('avatar_updated_at', 'DATETIME'),
        bigquery.SchemaField('addresses',
                             'RECORD',
                             mode='REPEATED',
                             fields=[
                                 bigquery.SchemaField('address1', 'STRING'),
                                 bigquery.SchemaField('address2', 'STRING'),
                                 bigquery.SchemaField('city', 'STRING'),
                                 bigquery.SchemaField('state', 'STRING'),
                                 bigquery.SchemaField('postal_code', 'STRING'),
                                 bigquery.SchemaField('country', 'STRING'),
                             ]),
        bigquery.SchemaField('emails',
                             'RECORD',
                             mode='REPEATED',
                             fields=[
                                 bigquery.SchemaField('email', 'STRING'),
                                 bigquery.SchemaField('preferred', 'BOOLEAN'),
                                 bigquery.SchemaField('undeliverable_count',
                                                      'INTEGER'),
                                 bigquery.SchemaField('spam_reported_count',
                                                      'INTEGER'),
                                 bigquery.SchemaField('source', 'STRING'),
                             ]),
        bigquery.SchemaField(
            'phone_numbers',
            'RECORD',
            mode='REPEATED',
            fields=[
                bigquery.SchemaField('phone_number', 'STRING'),
                bigquery.SchemaField('label', 'STRING'),
                bigquery.SchemaField('unformatted_phone_number', 'STRING'),
                bigquery.SchemaField('source', 'STRING'),
                bigquery.SchemaField('dialing_code', 'STRING'),
            ]),
        bigquery.SchemaField('categories',
                             'RECORD',
                             mode='REPEATED',
                             fields=[
                                 bigquery.SchemaField('category', 'STRING'),
                                 bigquery.SchemaField('created_at',
                                                      'DATETIME'),
                                 bigquery.SchemaField('updated_at',
                                                      'DATETIME'),
                             ]),
    ]

    client.create_table(
        name=table_name,
        project=client.client.project,
        schema=contacts,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        dataset=dataset,
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
def up(client):
    client = BigQueryMigration(client)
    dataset = client.dataset(dataset_name)

    commissisions_schema = [
        bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('tree_user_id', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('client_user_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField(
            "commissisions",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("currency_code", "STRING"),
                bigquery.SchemaField('earnings', 'INTEGER'),
                bigquery.SchemaField('previous_balance', 'INTEGER'),
                bigquery.SchemaField('balance_forward', 'INTEGER'),
                bigquery.SchemaField('fee', 'INTEGER'),
                bigquery.SchemaField('total', 'INTEGER'),
                bigquery.SchemaField('checksum', 'STRING'),
            ]
        ),
        bigquery.SchemaField(
            "details",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField('source_amount', 'INTEGER'),
                bigquery.SchemaField('percentage', 'INTEGER'),
                bigquery.SchemaField('commission_amount', 'INTEGER'),
                bigquery.SchemaField('level', 'INTEGER'),
                bigquery.SchemaField('paid_level', 'INTEGER')
            ]
        ),
        bigquery.SchemaField(
            "bonuses",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField('period_type', 'STRING'),
                bigquery.SchemaField('description', 'STRING')
            ]
        ),
        bigquery.SchemaField(
            "runs",
            "RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField('period', 'STRING'),
                bigquery.SchemaField('period_type', 'STRING'),
                bigquery.SchemaField('description', 'STRING'),
                bigquery.SchemaField('run_status', 'STRING'),
                bigquery.SchemaField('plan', 'STRING'),
            ]
        ),
        bigquery.SchemaField('created_date', 'DATETIME'),
        bigquery.SchemaField('modified_date', 'DATETIME')
    ]
    client.create_table(name=table_name,
                        project=client.client.project,
                        schema=contacts,
                        partition={'type': 'range',
                                   'field': 'client_partition_id',
                                   'start': 1,
                                   'end': 100,
                                   'interval': 1},
                        dataset=dataset,
                        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])

    return dataset