def select_insert(self,
                   source_table_id,
                   destination_table_id,
                   query_field,
                   prefix='    ',
                   fg='yellow'):
     query = 'SELECT {query_field} FROM {dataset_id}.{source_table_id}'.format(
         query_field=query_field,
         dataset_id=self._dataset_ref.dataset_id,
         source_table_id=source_table_id)
     destination_table = self.dataset.table(destination_table_id)
     job_config = QueryJobConfig()
     job_config.use_legacy_sql = False
     job_config.use_query_cache = False
     job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
     job_config.destination = destination_table
     job = self._client.query(query, job_config)
     echo('Inserting... {0}'.format(job.job_id),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     echo('  {0}'.format(job.query),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     job.result()
     assert job.state == 'DONE'
     error_result = job.error_result
     if error_result:
         raise RuntimeError(job.errors)
 def plan_intersection_destroy(self, source, target, prefix='  ', fg='red'):
     count, datasets = self.get_intersection_datasets(target, source)
     _logger.debug('Destroy datasets: {0}'.format(datasets))
     for dataset in datasets:
         echo('- {0}'.format(dataset.dataset_id),
              prefix=prefix, fg=fg, no_color=self.no_color)
         echo()
     return count
 def _destroy(self, model, prefix='  ', fg='red'):
     table = BigQueryTable.to_table(self._dataset_ref, model)
     echo('Destroying... {0}'.format(table.path),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     self._client.delete_table(table)
     echo()
 def plan_add(self, source, target, prefix='  ', fg='green'):
     count, datasets = self.get_add_datasets(source, target)
     _logger.debug('Add datasets: {0}'.format(datasets))
     for dataset in datasets:
         echo('+ {0}'.format(dataset.dataset_id),
              prefix=prefix, fg=fg, no_color=self.no_color)
         echo_dump(dataset, prefix=prefix + '  ', fg=fg, no_color=self.no_color)
         echo()
     return count
 def _add(self, model, prefix='  ', fg='green'):
     table = BigQueryTable.to_table(self._dataset_ref, model)
     echo('Adding... {0}'.format(table.path),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     echo_dump(model, prefix=prefix + '  ', fg=fg, no_color=self.no_color)
     self._client.create_table(table)
     echo()
 def get_dataset(self, dataset_id):
     dataset_ref = self._client.dataset(dataset_id)
     dataset = None
     try:
         dataset = self._client.get_dataset(dataset_ref)
         echo('Load dataset: ' + dataset.path)
         dataset = BigQueryDataset.from_dataset(dataset)
     except NotFound:
         _logger.info('Dataset {0} is not found.'.format(dataset_id))
     return dataset
 def plan_destroy(self, source, target, prefix='  ', fg='red'):
     count, tables = self.get_destroy_tables(source, target)
     _logger.debug('Destroy tables: {0}'.format(tables))
     for table in tables:
         echo('- {0}'.format(table.table_id),
              prefix=prefix,
              fg=fg,
              no_color=self.no_color)
         echo()
     return count
 def _export(self, output_dir, dataset_id):
     dataset = self.get_dataset(dataset_id)
     if dataset:
         data = dump(dataset)
         _logger.debug(data)
         export_path = os.path.join(output_dir, '{0}.yml'.format(dataset.dataset_id))
         echo('Export dataset config: {0}'.format(export_path))
         with codecs.open(export_path, 'wb', 'utf-8') as f:
             f.write(data)
     return dataset
 def _export(self, output_dir, table_id):
     table = self.get_table(table_id)
     data = dump(table)
     _logger.debug(data)
     export_path = os.path.join(output_dir,
                                '{0}.yml'.format(table.table_id))
     echo('Export table config: {0}'.format(export_path))
     with codecs.open(export_path, 'wb', 'utf-8') as f:
         f.write(data)
     return table
 def get_table(self, table_id):
     table_ref = self.dataset.table(table_id)
     table = None
     try:
         table = self._client.get_table(table_ref)
         echo('Load table: ' + table.path)
         table = BigQueryTable.from_table(table)
     except NotFound:
         _logger.info('Table {0} is not found.'.format(table_id))
     return table
 def create_temporary_table(self, model):
     tmp_table_model = copy.deepcopy(model)
     tmp_table_id = str(uuid.uuid4()).replace('-', '_')
     tmp_table_model.table_id = tmp_table_id
     tmp_table = BigQueryTable.to_table(self._dataset_ref, tmp_table_model)
     echo('    Temporary table creating... {0}'.format(tmp_table.path),
          fg='yellow',
          no_color=self.no_color)
     self._client.create_table(tmp_table)
     return tmp_table_model
 def plan_change(self, source, target, prefix='  ', fg='yellow'):
     count, datasets = self.get_change_datasets(source, target)
     _logger.debug('Change datasets: {0}'.format(datasets))
     for dataset in datasets:
         echo('~ {0}'.format(dataset.dataset_id),
              prefix=prefix, fg=fg, no_color=self.no_color)
         source_dataset = next((s for s in source if s.dataset_id == dataset.dataset_id), None)
         echo_ndiff(source_dataset, dataset, prefix=prefix + '  ', fg=fg)
         echo()
     return count
 def _add(self, model, prefix='  ', fg='green'):
     dataset = BigQueryDataset.to_dataset(self._client.project, model)
     echo('Adding... {0}'.format(dataset.path),
          prefix=prefix, fg=fg, no_color=self.no_color)
     echo_dump(model, prefix=prefix + '  ', fg=fg, no_color=self.no_color)
     self._client.create_dataset(dataset)
     self._client.update_dataset(dataset, [
         'access_entries'
     ])
     echo()
 def plan_change(self, source, target, prefix='  ', fg='yellow'):
     count, tables = self.get_change_tables(source, target)
     _logger.debug('Change tables: {0}'.format(tables))
     for table in tables:
         echo('~ {0}'.format(table.table_id),
              prefix=prefix,
              fg=fg,
              no_color=self.no_color)
         source_table = next(
             (s for s in source if s.table_id == table.table_id), None)
         echo_ndiff(source_table, table, prefix=prefix + ' ', fg=fg)
         echo()
     return count
 def plan_add(self, source, target, prefix='  ', fg='green'):
     count, tables = self.get_add_tables(source, target)
     _logger.debug('Add tables: {0}'.format(tables))
     for table in tables:
         echo('+ {0}'.format(table.table_id),
              prefix=prefix,
              fg=fg,
              no_color=self.no_color)
         echo_dump(table,
                   prefix=prefix + '  ',
                   fg=fg,
                   no_color=self.no_color)
         echo()
     return count
 def _change(self, source_model, target_model, prefix='  ', fg='yellow'):
     dataset = BigQueryDataset.to_dataset(self._client.project, target_model)
     echo('Changing... {0}'.format(dataset.path),
          prefix=prefix, fg=fg, no_color=self.no_color)
     echo_ndiff(source_model, target_model, prefix=prefix + '  ', fg=fg)
     source_labels = source_model.labels
     if source_labels:
         labels = dataset.labels.copy()
         for k, v in iteritems(source_labels):
             if k not in labels.keys():
                 labels[k] = None
         dataset.labels = labels
     self._client.update_dataset(dataset, [
         'friendly_name',
         'description',
         'default_table_expiration_ms',
         'labels',
         'access_entries'
     ])
     echo()
 def backup(self, source_table_id, prefix='    ', fg='yellow'):
     source_table = self.dataset.table(source_table_id)
     backup_table_id = 'backup_{source_table_id}_{timestamp}'.format(
         source_table_id=source_table_id,
         timestamp=datetime.utcnow().strftime('%Y%m%d%H%M%S%f'))
     backup_table = self.backup_dataset.table(backup_table_id)
     job_config = CopyJobConfig()
     job_config.create_disposition = CreateDisposition.CREATE_IF_NEEDED
     job = self._client.copy_table(source_table,
                                   backup_table,
                                   job_config=job_config)
     echo('Backing up... {0}'.format(job.job_id),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     job.result()
     assert job.state == 'DONE'
     error_result = job.error_result
     if error_result:
         raise RuntimeError(job.errors)
 def _change(self, source_model, target_model, prefix='  ', fg='yellow'):
     table = BigQueryTable.to_table(self._dataset_ref, target_model)
     echo('Changing... {0}'.format(table.path),
          prefix=prefix,
          fg=fg,
          no_color=self.no_color)
     echo_ndiff(source_model, target_model, prefix=prefix + '  ', fg=fg)
     source_labels = source_model.labels
     if source_labels:
         labels = table.labels.copy()
         for k, v in iteritems(source_labels):
             if k not in labels.keys():
                 labels[k] = None
         table.labels = labels
     if target_model.partitioning_type != source_model.partitioning_type:
         assert self._migration_mode not in [
             SchemaMigrationMode.SELECT_INSERT,
             SchemaMigrationMode.SELECT_INSERT_BACKUP],\
             'Migration mode: `{0}` not supported.'.format(self._migration_mode.value)
     target_schema_exclude_description = target_model.schema_exclude_description(
     )
     source_schema_exclude_description = source_model.schema_exclude_description(
     )
     if target_schema_exclude_description != source_schema_exclude_description or \
             target_model.partitioning_type != source_model.partitioning_type:
         self.migrate(source_model, target_model)
     if target_schema_exclude_description == source_schema_exclude_description and \
             target_model.schema != source_model.schema:
         self.update_schema_description(target_model)
     self._client.update_table(table, [
         'friendly_name',
         'description',
         'expires',
         'view_use_legacy_sql',
         'view_query',
         'labels',
     ])
     echo()
Ejemplo n.º 19
0
def apply(ctx, conf_dir, auto_approve, dataset, exclude_dataset, mode, backup_dataset):
    # TODO Impl auto-approve option
    add_counts, change_counts, destroy_counts = [], [], []
    with ThreadPoolExecutor(max_workers=ctx.obj['parallelism']) as e:
        dataset_action = DatasetAction(e, project=ctx.obj['project'],
                                       credential_file=ctx.obj['credential_file'],
                                       no_color=not ctx.obj['color'],
                                       debug=ctx.obj['debug'])
        source_datasets = [d for d in as_completed(dataset_action.list_datasets(
            dataset, exclude_dataset)) if d]
        target_datasets = list_local_datasets(conf_dir, dataset, exclude_dataset)
        echo('------------------------------------------------------------------------')
        echo()

        fs = []
        add_count, add_fs = dataset_action.add(source_datasets, target_datasets)
        add_counts.append(add_count)
        fs.extend(add_fs)
        change_count, change_fs = dataset_action.change(source_datasets, target_datasets)
        change_counts.append(change_count)
        fs.extend(change_fs)
        destroy_count, destroy_fs = dataset_action.destroy(source_datasets, target_datasets)
        destroy_counts.append(destroy_count)
        fs.extend(destroy_fs)
        as_completed(fs)

        fs = []
        for d in target_datasets:
            target_tables = list_local_tables(conf_dir, d.dataset_id)
            if target_tables is None:
                continue
            table_action = TableAction(e, d.dataset_id,
                                       migration_mode=mode,
                                       backup_dataset_id=backup_dataset,
                                       project=ctx.obj['project'],
                                       credential_file=ctx.obj['credential_file'],
                                       no_color=not ctx.obj['color'],
                                       debug=ctx.obj['debug'])
            source_tables = [t for t in as_completed(table_action.list_tables()) if t]
            if target_tables or source_tables:
                echo('------------------------------------------------------------------------')
                echo()
                add_count, add_fs = table_action.add(source_tables, target_tables)
                add_counts.append(add_count)
                fs.extend(add_fs)
                change_count, change_fs = table_action.change(source_tables, target_tables)
                change_counts.append(change_count)
                fs.extend(change_fs)
                destroy_count, destroy_fs = table_action.destroy(source_tables, target_tables)
                destroy_counts.append(destroy_count)
                fs.extend(destroy_fs)
        as_completed(fs)

    if not any(chain.from_iterable([add_counts, change_counts, destroy_counts])):
        echo(msg.MESSAGE_SUMMARY_NO_CHANGE)
        echo()
    else:
        echo(msg.MESSAGE_APPLY_SUMMARY.format(
            sum(add_counts), sum(change_counts), sum(destroy_counts)))
        echo()
Ejemplo n.º 20
0
def plan(ctx, conf_dir, detailed_exitcode, dataset, exclude_dataset):
    echo(msg.MESSAGE_PLAN_HEADER)

    add_counts, change_counts, destroy_counts = [], [], []
    with ThreadPoolExecutor(max_workers=ctx.obj['parallelism']) as e:
        dataset_action = DatasetAction(e, project=ctx.obj['project'],
                                       credential_file=ctx.obj['credential_file'],
                                       no_color=not ctx.obj['color'],
                                       debug=ctx.obj['debug'])
        source_datasets = [d for d in as_completed(dataset_action.list_datasets(
            dataset, exclude_dataset)) if d]
        target_datasets = list_local_datasets(conf_dir, dataset, exclude_dataset)
        echo('------------------------------------------------------------------------')
        echo()

        add_counts.append(dataset_action.plan_add(source_datasets, target_datasets))
        change_counts.append(dataset_action.plan_change(source_datasets, target_datasets))
        destroy_counts.append(dataset_action.plan_destroy(source_datasets, target_datasets))

        for d in target_datasets:
            target_tables = list_local_tables(conf_dir, d.dataset_id)
            if target_tables is None:
                continue
            table_action = TableAction(e, d.dataset_id,
                                       project=ctx.obj['project'],
                                       credential_file=ctx.obj['credential_file'],
                                       no_color=not ctx.obj['color'],
                                       debug=ctx.obj['debug'])
            source_tables = [t for t in as_completed(table_action.list_tables()) if t]
            if target_tables or source_tables:
                echo('------------------------------------------------------------------------')
                echo()
                add_counts.append(table_action.plan_add(source_tables, target_tables))
                change_counts.append(table_action.plan_change(source_tables, target_tables))
                destroy_counts.append(table_action.plan_destroy(source_tables, target_tables))

    if not any(chain.from_iterable([add_counts, change_counts, destroy_counts])):
        echo(msg.MESSAGE_SUMMARY_NO_CHANGE)
        echo()
    else:
        echo(msg.MESSAGE_PLAN_SUMMARY.format(
            sum(add_counts), sum(change_counts), sum(destroy_counts)))
        echo()
        if detailed_exitcode:
            sys.exit(2)
Ejemplo n.º 21
0
def apply_destroy(ctx, conf_dir, auto_approve, dataset, exclude_dataset):
    # TODO Impl auto-approve option
    destroy_counts = []
    with ThreadPoolExecutor(max_workers=ctx.obj['parallelism']) as e:
        dataset_action = DatasetAction(e, project=ctx.obj['project'],
                                       credential_file=ctx.obj['credential_file'],
                                       no_color=not ctx.obj['color'],
                                       debug=ctx.obj['debug'])
        source_datasets = [d for d in as_completed(dataset_action.list_datasets(
            dataset, exclude_dataset)) if d]
        target_datasets = list_local_datasets(conf_dir, dataset, exclude_dataset)
        echo('------------------------------------------------------------------------')
        echo()

        fs = []
        for d in target_datasets:
            table_action = TableAction(e, d.dataset_id,
                                       project=ctx.obj['project'],
                                       credential_file=ctx.obj['credential_file'],
                                       no_color=not ctx.obj['color'],
                                       debug=ctx.obj['debug'])
            source_tables = [t for t in as_completed(table_action.list_tables()) if t]
            if source_tables:
                echo('------------------------------------------------------------------------')
                echo()
                destroy_count, destroy_fs = table_action.destroy(source_tables, [])
                destroy_counts.append(destroy_count)
                fs.extend(destroy_fs)
        as_completed(fs)

        fs = []
        destroy_count, destroy_fs = dataset_action.intersection_destroy(
            source_datasets, target_datasets)
        destroy_counts.append(destroy_count)
        fs.extend(destroy_fs)
        as_completed(fs)

    if not any(destroy_counts):
        echo(msg.MESSAGE_SUMMARY_NO_CHANGE)
        echo()
    else:
        echo(msg.MESSAGE_APPLY_DESTROY_SUMMARY.format(sum(destroy_counts)))
        echo()
 def _destroy(self, model, prefix='  ', fg='red'):
     datasetted = BigQueryDataset.to_dataset(self._client.project, model)
     echo('Destroying... {0}'.format(datasetted.path),
          prefix=prefix, fg=fg, no_color=self.no_color)
     self._client.delete_dataset(datasetted)
     echo()