Ejemplo n.º 1
0
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.create_dataset(dataset_name)

    parent_dataset = migration.dataset('pyr_bluesun_{}'.format(client.env))
    tbls = migration.client.list_tables(parent_dataset)

    clusters = migration.default_clustering_fields

    clusters.insert(0, 'icentris_client:STRING')

    for item in tbls:
        tbl = migration.client.get_table(item.reference)

        orig = tbl.schema

        new = orig.copy()

        migration.create_table(name=tbl.table_id,
                               project=migration.client.project,
                               schema=new,
                               dataset=dataset,
                               partition={'type': 'time'},
                               clustering_fields=clusters)

    return dataset
def up(client):
    migration = BigQueryMigration(client)

    global dataset_name
    dataset_name = dataset_name.format(client.env)
    wv_ds = migration.create_dataset(dataset_name)
    bs_ds = migration.dataset('pyr_bluesun_{}'.format(client.env))

    ls = client.list_tables(bs_ds)
    for tbl in ls:
        tbl_ref = bs_ds.table(tbl.table_id)
        tbl = client.get_table(tbl_ref)
        migration.create_table(tbl_ref.table_id,
                               tbl_ref.project,
                               wv_ds,
                               schema=tbl.schema)

    return wv_ds
def up(client):
    client = BigQueryMigration(client)

    dataset = client.create_dataset('staging')
    for tbl, schema in schemas.items():
        clusters = ['leo_eid:STRING',  'ingestion_timestamp:TIMESTAMP']

        client.create_table(name=tbl,
                            project=client.client.project, schema=schema, dataset=dataset,
                            partition={
                                'type': 'range',
                                'field':
                                'client_partition_id',
                                'start': 1,
                                'end': 100,
                                'interval': 1},
                            clustering_fields=clusters)
    return dataset
Ejemplo n.º 4
0
def up(client):
    migration = BigQueryMigration(client)
    name = dataset_name + '_{}'.format(client.env)

    with PosixPath('/workspace/bigquery/migrations/bluesun_schema.json').open(
            mode='r') as f:
        tbls = json.loads(f.read())

    dataset = migration.create_dataset(name)

    for tbl, raw in tbls.items():
        schema = []
        for f in raw['fields']:
            schema.append(bigquery.SchemaField(f['name'], f['type'],
                                               f['mode']))

        migration.create_table(name=tbl,
                               project=migration.client.project,
                               schema=schema,
                               dataset=dataset)

    return dataset
def up(client):
    migration = BigQueryMigration(client)

    dataset = migration.create_dataset(
        dataset_name
    )  # use me if you are creating a new dataset. -- ndg 2/5/20

    for tbl, schema in schemas.items():
        migration.create_table(
            name=tbl,
            project=migration.client.project,
            schema=schema,
            dataset=dataset,
            partition={
                'type': 'range',
                'field': 'client_partition_id',
                'start': 1,
                'end': 100,
                'interval': 1
            },
            clustering_fields=migration.default_clustering_fields)
    return dataset
Ejemplo n.º 6
0
def up(client):
    client = BigQueryMigration(client)
    dataset = client.create_dataset('pii')

    pii_schema = [
        bigquery.SchemaField('client_partition_id', 'INTEGER',
                             mode='REQUIRED'),
        bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("tree_user_id", "INTEGER"),
        bigquery.SchemaField("first_name", "STRING"),
        bigquery.SchemaField("last_name", "STRING"),
        bigquery.SchemaField("company_name", "STRING"),
        bigquery.SchemaField("email", "STRING"),
        bigquery.SchemaField("phone", "STRING"),
        bigquery.SchemaField("mobile_phone", "STRING"),
        bigquery.SchemaField("street", "STRING"),
        bigquery.SchemaField("city", "STRING"),
        bigquery.SchemaField("state", "STRING"),
        bigquery.SchemaField("country", "STRING"),
        bigquery.SchemaField("birth_date", "DATE"),
        bigquery.SchemaField("gender", "STRING")
    ]

    client.create_table(
        name='users',
        project=client.client.project,
        schema=pii_schema,
        dataset=dataset,
        partition={
            'type': 'range',
            'field': 'client_partition_id',
            'start': 1,
            'end': 100,
            'interval': 1
        },
        clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'])
    return dataset
Ejemplo n.º 7
0
def up(client):
    migration = BigQueryMigration(client)
    dataset = migration.create_dataset(dataset_name)
    return dataset