def select_insert(self, source_table_id, destination_table_id, query_field, prefix=' ', fg='yellow'): query = 'SELECT {query_field} FROM {dataset_id}.{source_table_id}'.format( query_field=query_field, dataset_id=self._dataset_ref.dataset_id, source_table_id=source_table_id) destination_table = self.dataset.table(destination_table_id) job_config = QueryJobConfig() job_config.use_legacy_sql = False job_config.use_query_cache = False job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE job_config.destination = destination_table job = self._client.query(query, job_config) echo('Inserting... {0}'.format(job.job_id), prefix=prefix, fg=fg, no_color=self.no_color) echo(' {0}'.format(job.query), prefix=prefix, fg=fg, no_color=self.no_color) job.result() assert job.state == 'DONE' error_result = job.error_result if error_result: raise RuntimeError(job.errors)
def plan_intersection_destroy(self, source, target, prefix=' ', fg='red'): count, datasets = self.get_intersection_datasets(target, source) _logger.debug('Destroy datasets: {0}'.format(datasets)) for dataset in datasets: echo('- {0}'.format(dataset.dataset_id), prefix=prefix, fg=fg, no_color=self.no_color) echo() return count
def _destroy(self, model, prefix=' ', fg='red'): table = BigQueryTable.to_table(self._dataset_ref, model) echo('Destroying... {0}'.format(table.path), prefix=prefix, fg=fg, no_color=self.no_color) self._client.delete_table(table) echo()
def plan_add(self, source, target, prefix=' ', fg='green'): count, datasets = self.get_add_datasets(source, target) _logger.debug('Add datasets: {0}'.format(datasets)) for dataset in datasets: echo('+ {0}'.format(dataset.dataset_id), prefix=prefix, fg=fg, no_color=self.no_color) echo_dump(dataset, prefix=prefix + ' ', fg=fg, no_color=self.no_color) echo() return count
def _add(self, model, prefix=' ', fg='green'): table = BigQueryTable.to_table(self._dataset_ref, model) echo('Adding... {0}'.format(table.path), prefix=prefix, fg=fg, no_color=self.no_color) echo_dump(model, prefix=prefix + ' ', fg=fg, no_color=self.no_color) self._client.create_table(table) echo()
def get_dataset(self, dataset_id): dataset_ref = self._client.dataset(dataset_id) dataset = None try: dataset = self._client.get_dataset(dataset_ref) echo('Load dataset: ' + dataset.path) dataset = BigQueryDataset.from_dataset(dataset) except NotFound: _logger.info('Dataset {0} is not found.'.format(dataset_id)) return dataset
def plan_destroy(self, source, target, prefix=' ', fg='red'): count, tables = self.get_destroy_tables(source, target) _logger.debug('Destroy tables: {0}'.format(tables)) for table in tables: echo('- {0}'.format(table.table_id), prefix=prefix, fg=fg, no_color=self.no_color) echo() return count
def _export(self, output_dir, dataset_id): dataset = self.get_dataset(dataset_id) if dataset: data = dump(dataset) _logger.debug(data) export_path = os.path.join(output_dir, '{0}.yml'.format(dataset.dataset_id)) echo('Export dataset config: {0}'.format(export_path)) with codecs.open(export_path, 'wb', 'utf-8') as f: f.write(data) return dataset
def _export(self, output_dir, table_id): table = self.get_table(table_id) data = dump(table) _logger.debug(data) export_path = os.path.join(output_dir, '{0}.yml'.format(table.table_id)) echo('Export table config: {0}'.format(export_path)) with codecs.open(export_path, 'wb', 'utf-8') as f: f.write(data) return table
def get_table(self, table_id): table_ref = self.dataset.table(table_id) table = None try: table = self._client.get_table(table_ref) echo('Load table: ' + table.path) table = BigQueryTable.from_table(table) except NotFound: _logger.info('Table {0} is not found.'.format(table_id)) return table
def create_temporary_table(self, model): tmp_table_model = copy.deepcopy(model) tmp_table_id = str(uuid.uuid4()).replace('-', '_') tmp_table_model.table_id = tmp_table_id tmp_table = BigQueryTable.to_table(self._dataset_ref, tmp_table_model) echo(' Temporary table creating... {0}'.format(tmp_table.path), fg='yellow', no_color=self.no_color) self._client.create_table(tmp_table) return tmp_table_model
def plan_change(self, source, target, prefix=' ', fg='yellow'): count, datasets = self.get_change_datasets(source, target) _logger.debug('Change datasets: {0}'.format(datasets)) for dataset in datasets: echo('~ {0}'.format(dataset.dataset_id), prefix=prefix, fg=fg, no_color=self.no_color) source_dataset = next((s for s in source if s.dataset_id == dataset.dataset_id), None) echo_ndiff(source_dataset, dataset, prefix=prefix + ' ', fg=fg) echo() return count
def _add(self, model, prefix=' ', fg='green'): dataset = BigQueryDataset.to_dataset(self._client.project, model) echo('Adding... {0}'.format(dataset.path), prefix=prefix, fg=fg, no_color=self.no_color) echo_dump(model, prefix=prefix + ' ', fg=fg, no_color=self.no_color) self._client.create_dataset(dataset) self._client.update_dataset(dataset, [ 'access_entries' ]) echo()
def plan_change(self, source, target, prefix=' ', fg='yellow'): count, tables = self.get_change_tables(source, target) _logger.debug('Change tables: {0}'.format(tables)) for table in tables: echo('~ {0}'.format(table.table_id), prefix=prefix, fg=fg, no_color=self.no_color) source_table = next( (s for s in source if s.table_id == table.table_id), None) echo_ndiff(source_table, table, prefix=prefix + ' ', fg=fg) echo() return count
def plan_add(self, source, target, prefix=' ', fg='green'): count, tables = self.get_add_tables(source, target) _logger.debug('Add tables: {0}'.format(tables)) for table in tables: echo('+ {0}'.format(table.table_id), prefix=prefix, fg=fg, no_color=self.no_color) echo_dump(table, prefix=prefix + ' ', fg=fg, no_color=self.no_color) echo() return count
def _change(self, source_model, target_model, prefix=' ', fg='yellow'): dataset = BigQueryDataset.to_dataset(self._client.project, target_model) echo('Changing... {0}'.format(dataset.path), prefix=prefix, fg=fg, no_color=self.no_color) echo_ndiff(source_model, target_model, prefix=prefix + ' ', fg=fg) source_labels = source_model.labels if source_labels: labels = dataset.labels.copy() for k, v in iteritems(source_labels): if k not in labels.keys(): labels[k] = None dataset.labels = labels self._client.update_dataset(dataset, [ 'friendly_name', 'description', 'default_table_expiration_ms', 'labels', 'access_entries' ]) echo()
def backup(self, source_table_id, prefix=' ', fg='yellow'): source_table = self.dataset.table(source_table_id) backup_table_id = 'backup_{source_table_id}_{timestamp}'.format( source_table_id=source_table_id, timestamp=datetime.utcnow().strftime('%Y%m%d%H%M%S%f')) backup_table = self.backup_dataset.table(backup_table_id) job_config = CopyJobConfig() job_config.create_disposition = CreateDisposition.CREATE_IF_NEEDED job = self._client.copy_table(source_table, backup_table, job_config=job_config) echo('Backing up... {0}'.format(job.job_id), prefix=prefix, fg=fg, no_color=self.no_color) job.result() assert job.state == 'DONE' error_result = job.error_result if error_result: raise RuntimeError(job.errors)
def _change(self, source_model, target_model, prefix=' ', fg='yellow'): table = BigQueryTable.to_table(self._dataset_ref, target_model) echo('Changing... {0}'.format(table.path), prefix=prefix, fg=fg, no_color=self.no_color) echo_ndiff(source_model, target_model, prefix=prefix + ' ', fg=fg) source_labels = source_model.labels if source_labels: labels = table.labels.copy() for k, v in iteritems(source_labels): if k not in labels.keys(): labels[k] = None table.labels = labels if target_model.partitioning_type != source_model.partitioning_type: assert self._migration_mode not in [ SchemaMigrationMode.SELECT_INSERT, SchemaMigrationMode.SELECT_INSERT_BACKUP],\ 'Migration mode: `{0}` not supported.'.format(self._migration_mode.value) target_schema_exclude_description = target_model.schema_exclude_description( ) source_schema_exclude_description = source_model.schema_exclude_description( ) if target_schema_exclude_description != source_schema_exclude_description or \ target_model.partitioning_type != source_model.partitioning_type: self.migrate(source_model, target_model) if target_schema_exclude_description == source_schema_exclude_description and \ target_model.schema != source_model.schema: self.update_schema_description(target_model) self._client.update_table(table, [ 'friendly_name', 'description', 'expires', 'view_use_legacy_sql', 'view_query', 'labels', ]) echo()
def apply(ctx, conf_dir, auto_approve, dataset, exclude_dataset, mode, backup_dataset): # TODO Impl auto-approve option add_counts, change_counts, destroy_counts = [], [], [] with ThreadPoolExecutor(max_workers=ctx.obj['parallelism']) as e: dataset_action = DatasetAction(e, project=ctx.obj['project'], credential_file=ctx.obj['credential_file'], no_color=not ctx.obj['color'], debug=ctx.obj['debug']) source_datasets = [d for d in as_completed(dataset_action.list_datasets( dataset, exclude_dataset)) if d] target_datasets = list_local_datasets(conf_dir, dataset, exclude_dataset) echo('------------------------------------------------------------------------') echo() fs = [] add_count, add_fs = dataset_action.add(source_datasets, target_datasets) add_counts.append(add_count) fs.extend(add_fs) change_count, change_fs = dataset_action.change(source_datasets, target_datasets) change_counts.append(change_count) fs.extend(change_fs) destroy_count, destroy_fs = dataset_action.destroy(source_datasets, target_datasets) destroy_counts.append(destroy_count) fs.extend(destroy_fs) as_completed(fs) fs = [] for d in target_datasets: target_tables = list_local_tables(conf_dir, d.dataset_id) if target_tables is None: continue table_action = TableAction(e, d.dataset_id, migration_mode=mode, backup_dataset_id=backup_dataset, project=ctx.obj['project'], credential_file=ctx.obj['credential_file'], no_color=not ctx.obj['color'], debug=ctx.obj['debug']) source_tables = [t for t in as_completed(table_action.list_tables()) if t] if target_tables or source_tables: echo('------------------------------------------------------------------------') echo() add_count, add_fs = table_action.add(source_tables, target_tables) add_counts.append(add_count) fs.extend(add_fs) change_count, change_fs = table_action.change(source_tables, target_tables) change_counts.append(change_count) fs.extend(change_fs) destroy_count, destroy_fs = table_action.destroy(source_tables, target_tables) destroy_counts.append(destroy_count) fs.extend(destroy_fs) as_completed(fs) if not any(chain.from_iterable([add_counts, change_counts, destroy_counts])): echo(msg.MESSAGE_SUMMARY_NO_CHANGE) echo() else: echo(msg.MESSAGE_APPLY_SUMMARY.format( sum(add_counts), sum(change_counts), sum(destroy_counts))) echo()
def plan(ctx, conf_dir, detailed_exitcode, dataset, exclude_dataset): echo(msg.MESSAGE_PLAN_HEADER) add_counts, change_counts, destroy_counts = [], [], [] with ThreadPoolExecutor(max_workers=ctx.obj['parallelism']) as e: dataset_action = DatasetAction(e, project=ctx.obj['project'], credential_file=ctx.obj['credential_file'], no_color=not ctx.obj['color'], debug=ctx.obj['debug']) source_datasets = [d for d in as_completed(dataset_action.list_datasets( dataset, exclude_dataset)) if d] target_datasets = list_local_datasets(conf_dir, dataset, exclude_dataset) echo('------------------------------------------------------------------------') echo() add_counts.append(dataset_action.plan_add(source_datasets, target_datasets)) change_counts.append(dataset_action.plan_change(source_datasets, target_datasets)) destroy_counts.append(dataset_action.plan_destroy(source_datasets, target_datasets)) for d in target_datasets: target_tables = list_local_tables(conf_dir, d.dataset_id) if target_tables is None: continue table_action = TableAction(e, d.dataset_id, project=ctx.obj['project'], credential_file=ctx.obj['credential_file'], no_color=not ctx.obj['color'], debug=ctx.obj['debug']) source_tables = [t for t in as_completed(table_action.list_tables()) if t] if target_tables or source_tables: echo('------------------------------------------------------------------------') echo() add_counts.append(table_action.plan_add(source_tables, target_tables)) change_counts.append(table_action.plan_change(source_tables, target_tables)) destroy_counts.append(table_action.plan_destroy(source_tables, target_tables)) if not any(chain.from_iterable([add_counts, change_counts, destroy_counts])): echo(msg.MESSAGE_SUMMARY_NO_CHANGE) echo() else: echo(msg.MESSAGE_PLAN_SUMMARY.format( sum(add_counts), sum(change_counts), sum(destroy_counts))) echo() if detailed_exitcode: sys.exit(2)
def apply_destroy(ctx, conf_dir, auto_approve, dataset, exclude_dataset): # TODO Impl auto-approve option destroy_counts = [] with ThreadPoolExecutor(max_workers=ctx.obj['parallelism']) as e: dataset_action = DatasetAction(e, project=ctx.obj['project'], credential_file=ctx.obj['credential_file'], no_color=not ctx.obj['color'], debug=ctx.obj['debug']) source_datasets = [d for d in as_completed(dataset_action.list_datasets( dataset, exclude_dataset)) if d] target_datasets = list_local_datasets(conf_dir, dataset, exclude_dataset) echo('------------------------------------------------------------------------') echo() fs = [] for d in target_datasets: table_action = TableAction(e, d.dataset_id, project=ctx.obj['project'], credential_file=ctx.obj['credential_file'], no_color=not ctx.obj['color'], debug=ctx.obj['debug']) source_tables = [t for t in as_completed(table_action.list_tables()) if t] if source_tables: echo('------------------------------------------------------------------------') echo() destroy_count, destroy_fs = table_action.destroy(source_tables, []) destroy_counts.append(destroy_count) fs.extend(destroy_fs) as_completed(fs) fs = [] destroy_count, destroy_fs = dataset_action.intersection_destroy( source_datasets, target_datasets) destroy_counts.append(destroy_count) fs.extend(destroy_fs) as_completed(fs) if not any(destroy_counts): echo(msg.MESSAGE_SUMMARY_NO_CHANGE) echo() else: echo(msg.MESSAGE_APPLY_DESTROY_SUMMARY.format(sum(destroy_counts))) echo()
def _destroy(self, model, prefix=' ', fg='red'): datasetted = BigQueryDataset.to_dataset(self._client.project, model) echo('Destroying... {0}'.format(datasetted.path), prefix=prefix, fg=fg, no_color=self.no_color) self._client.delete_dataset(datasetted) echo()