def down(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) table = client.client.get_table(dataset.table(table_name)) orig_schema = table.schema new_schema = orig_schema.copy() if new_schema[1].name == 'commission_user_id': new_schema.pop(1) client.delete_table(table) client.create_table(name=table_name, project=client.client.project, schema=new_schema, dataset=dataset, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }, clustering_fields=[ 'leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP' ]) return dataset
def up(client): migration = BigQueryMigration(client) dataset = migration.dataset( dataset_name ) # use me if you are NOT creating a new dataset. -- ndg 2/5/20 clusters = migration.default_clustering_fields del clusters[0] clusters.insert(0, 'client_wrench_id:STRING') schema = [ bigquery.SchemaField('entity_id', 'STRING', mode='REQUIRED'), bigquery.SchemaField("tree_user_id", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("prediction", "STRING", mode="REQUIRED"), bigquery.SchemaField("client_wrench_id", "STRING", mode="REQUIRED"), bigquery.SchemaField("expirement_name", "STRING"), bigquery.SchemaField("processing_datetime", "DATETIME") ] migration.create_table(name=table_name, project=migration.client.project, schema=schema, dataset=dataset, partition={'type': 'time'}, clustering_fields=clusters) return dataset
def up(client): migration = BigQueryMigration(client) dataset = migration.create_dataset(dataset_name) parent_dataset = migration.dataset('pyr_bluesun_{}'.format(client.env)) tbls = migration.client.list_tables(parent_dataset) clusters = migration.default_clustering_fields clusters.insert(0, 'icentris_client:STRING') for item in tbls: tbl = migration.client.get_table(item.reference) orig = tbl.schema new = orig.copy() migration.create_table(name=tbl.table_id, project=migration.client.project, schema=new, dataset=dataset, partition={'type': 'time'}, clustering_fields=clusters) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) table = client.client.get_table(dataset.table(table_name)) orig_schema = table.schema new_schema = orig_schema.copy() new_schema.insert( 1, bigquery.SchemaField('commission_user_id', 'INTEGER', mode='REQUIRED')) client.delete_table(table) client.create_table( name=table_name, project=client.client.project, schema=new_schema, dataset=dataset, clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'], partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) product_reviews = [ bigquery.SchemaField('client_partition_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'), bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'), bigquery.SchemaField('site_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('user_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('tree_user_id', 'INTEGER'), bigquery.SchemaField('product_code', 'STRING'), bigquery.SchemaField('product_name', 'STRING'), bigquery.SchemaField('product_description', 'STRING'), bigquery.SchemaField('product_short_description', 'STRING'), bigquery.SchemaField('product_available_on', 'DATETIME'), bigquery.SchemaField('product_discontinued_on', 'DATETIME'), bigquery.SchemaField('product_slug', 'STRING'), bigquery.SchemaField('product_avg_rating', 'NUMERIC'), bigquery.SchemaField('product_reviews_count', 'INTEGER'), bigquery.SchemaField('review_name', 'STRING'), bigquery.SchemaField('location', 'STRING'), bigquery.SchemaField('rating', 'INTEGER'), bigquery.SchemaField('title', 'STRING'), bigquery.SchemaField('review', 'STRING'), bigquery.SchemaField('approved', 'BOOLEAN'), bigquery.SchemaField('created_at', 'DATETIME'), bigquery.SchemaField('updated_at', 'DATETIME'), bigquery.SchemaField('ip_address', 'STRING'), bigquery.SchemaField('show_identifier', 'BOOLEAN'), ] client.create_table( name=table_name, project=client.client.project, schema=product_reviews, dataset=dataset, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }, clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP']) return dataset
def up(client): migration = BigQueryMigration(client) global dataset_name dataset_name = dataset_name.format(client.env) wv_ds = migration.create_dataset(dataset_name) bs_ds = migration.dataset('pyr_bluesun_{}'.format(client.env)) ls = client.list_tables(bs_ds) for tbl in ls: tbl_ref = bs_ds.table(tbl.table_id) tbl = client.get_table(tbl_ref) migration.create_table(tbl_ref.table_id, tbl_ref.project, wv_ds, schema=tbl.schema) return wv_ds
def up(client): client = BigQueryMigration(client) dataset = client.create_dataset('staging') for tbl, schema in schemas.items(): clusters = ['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP'] client.create_table(name=tbl, project=client.client.project, schema=schema, dataset=dataset, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1}, clustering_fields=clusters) return dataset
def up(client): migration = BigQueryMigration(client) dataset = migration.dataset(dataset_name) table = migration.client.get_table(dataset.table(table_name)) orig_schema = table.schema new_schema = orig_schema.copy() del new_schema[1] new_schema.insert( 0, bigquery.SchemaField('dag_id', 'STRING', mode='REQUIRED')) migration.delete_table(table) migration.create_table(name=table_name, project=migration.client.project, schema=new_schema, dataset=dataset) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) flat_site_visitors = [ bigquery.SchemaField('client_partition_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'), bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'), bigquery.SchemaField('site_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('user_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('tree_user_id', 'INTEGER'), bigquery.SchemaField('visitor_id', 'STRING'), bigquery.SchemaField('last_visit_date', 'DATETIME'), bigquery.SchemaField('visit_count', 'INTEGER'), bigquery.SchemaField('ipaddress', 'STRING'), bigquery.SchemaField('browser_agent', 'STRING'), bigquery.SchemaField('created_at', 'DATETIME'), bigquery.SchemaField('site_template_id', 'INTEGER'), bigquery.SchemaField('active', 'INTEGER'), bigquery.SchemaField('third_party_tracking_company', 'STRING'), bigquery.SchemaField('tracking_code', 'STRING'), bigquery.SchemaField('owner_name', 'STRING'), bigquery.SchemaField('email', 'STRING'), bigquery.SchemaField('story', 'STRING'), bigquery.SchemaField('avatar_file_name', 'STRING') ] client.create_table( name=table_name, project=client.client.project, schema=flat_site_visitors, dataset=dataset, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }, clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP']) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) # In order to use clusters in BigQuery, the must be inside partitions. # Even though TIMESTAMP is supported as a partition type, # a partition can only be done by date not datetime. Furthermore, if timestamp is used checkpoint_schema = [ bigquery.SchemaField("table", "STRING", mode="REQUIRED"), bigquery.SchemaField("leo_eid", "STRING", mode="REQUIRED"), # Partition bigquery.SchemaField("checkpoint", "TIMESTAMP", mode="REQUIRED"), # Partition ] client.create_table(name=table_name, project=client.client.project, schema=checkpoint_schema, dataset=dataset) return dataset
def down(client): migration = BigQueryMigration(client) dataset = migration.dataset(dataset_name) table = migration.client.get_table(dataset.table(table_name)) orig_schema = table.schema new_schema = orig_schema.copy() if new_schema[0].name == 'dag_id': new_schema.pop(0) new_schema.insert( 1, bigquery.SchemaField('leo_eid', 'STRING', mode="NULLABLE")) migration.delete_table(table) migration.create_table(name=table_name, project=migration.client.project, schema=new_schema, dataset=dataset) return dataset
def up(client): migration = BigQueryMigration(client) dataset = migration.create_dataset( dataset_name ) # use me if you are creating a new dataset. -- ndg 2/5/20 for tbl, schema in schemas.items(): migration.create_table( name=tbl, project=migration.client.project, schema=schema, dataset=dataset, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }, clustering_fields=migration.default_clustering_fields) return dataset
def up(client): migration = BigQueryMigration(client) name = dataset_name + '_{}'.format(client.env) with PosixPath('/workspace/bigquery/migrations/bluesun_schema.json').open( mode='r') as f: tbls = json.loads(f.read()) dataset = migration.create_dataset(name) for tbl, raw in tbls.items(): schema = [] for f in raw['fields']: schema.append(bigquery.SchemaField(f['name'], f['type'], f['mode'])) migration.create_table(name=tbl, project=migration.client.project, schema=schema, dataset=dataset) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.create_dataset('pii') pii_schema = [ bigquery.SchemaField('client_partition_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'), bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"), bigquery.SchemaField("tree_user_id", "INTEGER"), bigquery.SchemaField("first_name", "STRING"), bigquery.SchemaField("last_name", "STRING"), bigquery.SchemaField("company_name", "STRING"), bigquery.SchemaField("email", "STRING"), bigquery.SchemaField("phone", "STRING"), bigquery.SchemaField("mobile_phone", "STRING"), bigquery.SchemaField("street", "STRING"), bigquery.SchemaField("city", "STRING"), bigquery.SchemaField("state", "STRING"), bigquery.SchemaField("country", "STRING"), bigquery.SchemaField("birth_date", "DATE"), bigquery.SchemaField("gender", "STRING") ] client.create_table( name='users', project=client.client.project, schema=pii_schema, dataset=dataset, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }, clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP']) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) schema = [ bigquery.SchemaField("icentris_client", "STRING", mode="REQUIRED"), bigquery.SchemaField("partition_id", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("wrench_id", "STRING", mode="REQUIRED") ] tbl = client.create_table(name=table_name, project=client.client.project, schema=schema, dataset=dataset) client.client.insert_rows( client.client.get_table(tbl), [('monat', 1, '2c889143-9169-436a-b610-48c8fe31bb87'), ('worldventures', 2, 'd7d3e26f-d105-4816-825d-d5858b9cf0d1'), ('naturessunshine', 3, '16bcfb48-153a-4c7d-bb65-19074d9edb17')]) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) contacts = [ bigquery.SchemaField('client_partition_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('client_wrench_id', 'STRING', mode='REQUIRED'), bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'), bigquery.SchemaField('id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('owner_tree_user_id', 'INTEGER'), bigquery.SchemaField('owner_user_id', 'INTEGER'), bigquery.SchemaField('tree_user_id', 'INTEGER'), bigquery.SchemaField('user_id', 'INTEGER'), bigquery.SchemaField('type', 'INTEGER'), bigquery.SchemaField('level_of_interest', 'INTEGER'), bigquery.SchemaField('first_name', 'STRING'), bigquery.SchemaField('last_name', 'STRING'), bigquery.SchemaField('birthday', 'DATE'), bigquery.SchemaField('created_at', 'DATETIME'), bigquery.SchemaField('updated_at', 'DATETIME'), bigquery.SchemaField('is_downline', 'BOOLEAN'), bigquery.SchemaField('opt_in', 'BOOLEAN'), bigquery.SchemaField('info', 'STRING'), bigquery.SchemaField('avatar_file_name', 'STRING'), bigquery.SchemaField('avatar_content_type', 'STRING'), bigquery.SchemaField('avatar_file_size', 'INTEGER'), bigquery.SchemaField('avatar_updated_at', 'DATETIME'), bigquery.SchemaField('addresses', 'RECORD', mode='REPEATED', fields=[ bigquery.SchemaField('address1', 'STRING'), bigquery.SchemaField('address2', 'STRING'), bigquery.SchemaField('city', 'STRING'), bigquery.SchemaField('state', 'STRING'), bigquery.SchemaField('postal_code', 'STRING'), bigquery.SchemaField('country', 'STRING'), ]), bigquery.SchemaField('emails', 'RECORD', mode='REPEATED', fields=[ bigquery.SchemaField('email', 'STRING'), bigquery.SchemaField('preferred', 'BOOLEAN'), bigquery.SchemaField('undeliverable_count', 'INTEGER'), bigquery.SchemaField('spam_reported_count', 'INTEGER'), bigquery.SchemaField('source', 'STRING'), ]), bigquery.SchemaField( 'phone_numbers', 'RECORD', mode='REPEATED', fields=[ bigquery.SchemaField('phone_number', 'STRING'), bigquery.SchemaField('label', 'STRING'), bigquery.SchemaField('unformatted_phone_number', 'STRING'), bigquery.SchemaField('source', 'STRING'), bigquery.SchemaField('dialing_code', 'STRING'), ]), bigquery.SchemaField('categories', 'RECORD', mode='REPEATED', fields=[ bigquery.SchemaField('category', 'STRING'), bigquery.SchemaField('created_at', 'DATETIME'), bigquery.SchemaField('updated_at', 'DATETIME'), ]), ] client.create_table( name=table_name, project=client.client.project, schema=contacts, partition={ 'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1 }, dataset=dataset, clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP']) return dataset
def up(client): client = BigQueryMigration(client) dataset = client.dataset(dataset_name) commissisions_schema = [ bigquery.SchemaField('icentris_client', 'STRING', mode='REQUIRED'), bigquery.SchemaField('tree_user_id', 'INTEGER', mode='REQUIRED'), bigquery.SchemaField('client_user_id', 'STRING', mode='REQUIRED'), bigquery.SchemaField( "commissisions", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField("currency_code", "STRING"), bigquery.SchemaField('earnings', 'INTEGER'), bigquery.SchemaField('previous_balance', 'INTEGER'), bigquery.SchemaField('balance_forward', 'INTEGER'), bigquery.SchemaField('fee', 'INTEGER'), bigquery.SchemaField('total', 'INTEGER'), bigquery.SchemaField('checksum', 'STRING'), ] ), bigquery.SchemaField( "details", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField('source_amount', 'INTEGER'), bigquery.SchemaField('percentage', 'INTEGER'), bigquery.SchemaField('commission_amount', 'INTEGER'), bigquery.SchemaField('level', 'INTEGER'), bigquery.SchemaField('paid_level', 'INTEGER') ] ), bigquery.SchemaField( "bonuses", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField('period_type', 'STRING'), bigquery.SchemaField('description', 'STRING') ] ), bigquery.SchemaField( "runs", "RECORD", mode="REPEATED", fields=[ bigquery.SchemaField('period', 'STRING'), bigquery.SchemaField('period_type', 'STRING'), bigquery.SchemaField('description', 'STRING'), bigquery.SchemaField('run_status', 'STRING'), bigquery.SchemaField('plan', 'STRING'), ] ), bigquery.SchemaField('created_date', 'DATETIME'), bigquery.SchemaField('modified_date', 'DATETIME') ] client.create_table(name=table_name, project=client.client.project, schema=contacts, partition={'type': 'range', 'field': 'client_partition_id', 'start': 1, 'end': 100, 'interval': 1}, dataset=dataset, clustering_fields=['leo_eid:STRING', 'ingestion_timestamp:TIMESTAMP']) return dataset