class BigtableTableCreateOperator(BaseOperator, BigtableValidationMixin): """ Creates the table in the Cloud Bigtable instance. For more details about creating table have a look at the reference: https://googleapis.github.io/google-cloud-python/latest/bigtable/table.html#google.cloud.bigtable.table.Table.create .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableTableCreateOperator` :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance that will hold the new table. :type table_id: str :param table_id: The ID of the table to be created. :type project_id: str :param project_id: Optional, the ID of the GCP project. If set to None or missing, the default project_id from the GCP connection is used. :type initial_split_keys: list :param initial_split_keys: (Optional) list of row keys in bytes that will be used to initially split the table into several tablets. :type column_families: dict :param column_families: (Optional) A map columns to create. The key is the column_id str and the value is a :class:`google.cloud.bigtable.column_family.GarbageCollectionRule` """ REQUIRED_ATTRIBUTES = ('instance_id', 'table_id') template_fields = ['project_id', 'instance_id', 'table_id'] @apply_defaults def __init__(self, instance_id, table_id, project_id=None, initial_split_keys=None, column_families=None, *args, **kwargs): self.project_id = project_id self.instance_id = instance_id self.table_id = table_id self.initial_split_keys = initial_split_keys or list() self.column_families = column_families or dict() self._validate_inputs() self.hook = BigtableHook() self.instance = None super(BigtableTableCreateOperator, self).__init__(*args, **kwargs) def _compare_column_families(self): table_column_families = self.hook.get_column_families_for_table( self.instance, self.table_id) if set(table_column_families.keys()) != set( self.column_families.keys()): self.log.error("Table '%s' has different set of Column Families", self.table_id) self.log.error("Expected: %s", self.column_families.keys()) self.log.error("Actual: %s", table_column_families.keys()) return False for key in table_column_families.keys(): # There is difference in structure between local Column Families # and remote ones # Local `self.column_families` is dict with column_id as key # and GarbageCollectionRule as value. # Remote `table_column_families` is list of ColumnFamily objects. # For more information about ColumnFamily please refer to the documentation: # https://googleapis.github.io/google-cloud-python/latest/bigtable/column-family.html#google.cloud.bigtable.column_family.ColumnFamily if table_column_families[key].gc_rule != self.column_families[key]: self.log.error("Column Family '%s' differs for table '%s'.", key, self.table_id) return False return True def execute(self, context): self.instance = self.hook.get_instance(project_id=self.project_id, instance_id=self.instance_id) if not self.instance: raise AirflowException( "Dependency: instance '{}' does not exist in project '{}'.". format(self.instance_id, self.project_id)) try: self.hook.create_table(instance=self.instance, table_id=self.table_id, initial_split_keys=self.initial_split_keys, column_families=self.column_families) except google.api_core.exceptions.AlreadyExists: if not self._compare_column_families(): raise AirflowException( "Table '{}' already exists with different Column Families." .format(self.table_id)) self.log.info( "The table '%s' already exists. Consider it as created", self.table_id)
class BigtableTableCreateOperator(BaseOperator, BigtableValidationMixin): """ Creates the table in the Cloud Bigtable instance. For more details about creating table have a look at the reference: https://googleapis.github.io/google-cloud-python/latest/bigtable/table.html#google.cloud.bigtable.table.Table.create :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance that will hold the new table. :type table_id: str :param table_id: The ID of the table to be created. :type project_id: str :param project_id: Optional, the ID of the GCP project. If set to None or missing, the default project_id from the GCP connection is used. :type initial_split_keys: list :param initial_split_keys: (Optional) list of row keys in bytes that will be used to initially split the table into several tablets. :type column_families: dict :param column_families: (Optional) A map columns to create. The key is the column_id str and the value is a GarbageCollectionRule """ REQUIRED_ATTRIBUTES = ('instance_id', 'table_id') template_fields = ['project_id', 'instance_id', 'table_id'] @apply_defaults def __init__(self, instance_id, table_id, project_id=None, initial_split_keys=None, column_families=None, *args, **kwargs): self.project_id = project_id self.instance_id = instance_id self.table_id = table_id self.initial_split_keys = initial_split_keys or list() self.column_families = column_families or dict() self._validate_inputs() self.hook = BigtableHook() self.instance = None super(BigtableTableCreateOperator, self).__init__(*args, **kwargs) def _compare_column_families(self): table_column_families = self.hook.get_column_families_for_table(self.instance, self.table_id) if set(table_column_families.keys()) != set(self.column_families.keys()): self.log.error("Table '%s' has different set of Column Families", self.table_id) self.log.error("Expected: %s", self.column_families.keys()) self.log.error("Actual: %s", table_column_families.keys()) return False for key in table_column_families.keys(): # There is difference in structure between local Column Families # and remote ones # Local `self.column_families` is dict with column_id as key # and GarbageCollectionRule as value. # Remote `table_column_families` is list of ColumnFamily objects. # For more information about ColumnFamily please refer to the documentation: # https://googleapis.github.io/google-cloud-python/latest/bigtable/column-family.html#google.cloud.bigtable.column_family.ColumnFamily if table_column_families[key].gc_rule != self.column_families[key]: self.log.error("Column Family '%s' differs for table '%s'.", key, self.table_id) return False return True def execute(self, context): self.instance = self.hook.get_instance(project_id=self.project_id, instance_id=self.instance_id) if not self.instance: raise AirflowException( "Dependency: instance '{}' does not exist in project '{}'.". format(self.instance_id, self.project_id)) try: self.hook.create_table( instance=self.instance, table_id=self.table_id, initial_split_keys=self.initial_split_keys, column_families=self.column_families ) except google.api_core.exceptions.AlreadyExists: if not self._compare_column_families(): raise AirflowException( "Table '{}' already exists with different Column Families.". format(self.table_id)) self.log.info("The table '%s' already exists. Consider it as created", self.table_id)
class TestBigtableHookDefaultProjectId(unittest.TestCase): def setUp(self): with mock.patch( 'airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.__init__', new=mock_base_gcp_hook_default_project_id): self.bigtable_hook_default_project_id = BigtableHook( gcp_conn_id='test') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True res = self.bigtable_hook_default_project_id.get_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() get_client.assert_called_once_with(project_id='example-project') self.assertIsNotNone(res) @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_instance_overridden_project_id(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True res = self.bigtable_hook_default_project_id.get_instance( project_id='new-project', instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() get_client.assert_called_once_with(project_id='new-project') self.assertIsNotNone(res) @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_instance_no_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = False res = self.bigtable_hook_default_project_id.get_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() get_client.assert_called_once_with(project_id='example-project') self.assertIsNone(res) @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True delete_method = instance_method.return_value.delete res = self.bigtable_hook_default_project_id.delete_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() delete_method.assert_called_once_with() get_client.assert_called_once_with(project_id='example-project') self.assertIsNone(res) @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_instance_overridden_project_id(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True delete_method = instance_method.return_value.delete res = self.bigtable_hook_default_project_id.delete_instance( project_id='new-project', instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() delete_method.assert_called_once_with() get_client.assert_called_once_with(project_id='new-project') self.assertIsNone(res) @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_instance_no_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = False delete_method = instance_method.return_value.delete self.bigtable_hook_default_project_id.delete_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() delete_method.assert_not_called() get_client.assert_called_once_with(project_id='example-project') @mock.patch('google.cloud.bigtable.instance.Instance.create') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_create_instance(self, get_client, instance_create): operation = mock.Mock() operation.result_return_value = Instance(instance_id=CBT_INSTANCE, client=get_client) instance_create.return_value = operation res = self.bigtable_hook_default_project_id.create_instance( instance_id=CBT_INSTANCE, main_cluster_id=CBT_CLUSTER, main_cluster_zone=CBT_ZONE) get_client.assert_called_once_with(project_id='example-project') instance_create.assert_called_once_with(clusters=mock.ANY) self.assertEqual(res.instance_id, 'instance') @mock.patch('google.cloud.bigtable.instance.Instance.create') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_create_instance_overridden_project_id(self, get_client, instance_create): operation = mock.Mock() operation.result_return_value = Instance(instance_id=CBT_INSTANCE, client=get_client) instance_create.return_value = operation res = self.bigtable_hook_default_project_id.create_instance( project_id='new-project', instance_id=CBT_INSTANCE, main_cluster_id=CBT_CLUSTER, main_cluster_zone=CBT_ZONE) get_client.assert_called_once_with(project_id='new-project') instance_create.assert_called_once_with(clusters=mock.ANY) self.assertEqual(res.instance_id, 'instance') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_table(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists table_delete_method = instance_method.return_value.table.return_value.delete instance_exists_method.return_value = True self.bigtable_hook_default_project_id.delete_table( instance_id=CBT_INSTANCE, table_id=CBT_TABLE) get_client.assert_called_once_with(project_id='example-project') instance_exists_method.assert_called_once_with() table_delete_method.assert_called_once_with() @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_table_overridden_project_id(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists table_delete_method = instance_method.return_value.table.return_value.delete instance_exists_method.return_value = True self.bigtable_hook_default_project_id.delete_table( project_id='new-project', instance_id=CBT_INSTANCE, table_id=CBT_TABLE) get_client.assert_called_once_with(project_id='new-project') instance_exists_method.assert_called_once_with() table_delete_method.assert_called_once_with() @mock.patch('google.cloud.bigtable.table.Table.create') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_create_table(self, get_client, create): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.create_table(instance=instance, table_id=CBT_TABLE) get_client.assert_not_called() create.assert_called_once_with([], {}) @mock.patch('google.cloud.bigtable.cluster.Cluster.update') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_update_cluster(self, get_client, update): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.update_cluster( instance=instance, cluster_id=CBT_CLUSTER, nodes=4) get_client.assert_not_called() update.assert_called_once_with() @mock.patch('google.cloud.bigtable.table.Table.list_column_families') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_list_column_families(self, get_client, list_column_families): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) get_client.return_value = client instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.get_column_families_for_table( instance=instance, table_id=CBT_TABLE) get_client.assert_not_called() list_column_families.assert_called_once_with() @mock.patch('google.cloud.bigtable.table.Table.get_cluster_states') @mock.patch( 'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_cluster_states(self, get_client, get_cluster_states): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.get_cluster_states_for_table( instance=instance, table_id=CBT_TABLE) get_client.assert_not_called() get_cluster_states.assert_called_once_with()
class TestBigtableHookDefaultProjectId(unittest.TestCase): def setUp(self): with mock.patch('airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.__init__', new=mock_base_gcp_hook_default_project_id): self.bigtable_hook_default_project_id = BigtableHook(gcp_conn_id='test') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True res = self.bigtable_hook_default_project_id.get_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() get_client.assert_called_once_with(project_id='example-project') self.assertIsNotNone(res) @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_instance_overridden_project_id(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True res = self.bigtable_hook_default_project_id.get_instance( project_id='new-project', instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() get_client.assert_called_once_with(project_id='new-project') self.assertIsNotNone(res) @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_instance_no_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = False res = self.bigtable_hook_default_project_id.get_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() get_client.assert_called_once_with(project_id='example-project') self.assertIsNone(res) @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True delete_method = instance_method.return_value.delete res = self.bigtable_hook_default_project_id.delete_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() delete_method.assert_called_once_with() get_client.assert_called_once_with(project_id='example-project') self.assertIsNone(res) @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_instance_overridden_project_id(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True delete_method = instance_method.return_value.delete res = self.bigtable_hook_default_project_id.delete_instance( project_id='new-project', instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() delete_method.assert_called_once_with() get_client.assert_called_once_with(project_id='new-project') self.assertIsNone(res) @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_instance_no_instance(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = False delete_method = instance_method.return_value.delete self.bigtable_hook_default_project_id.delete_instance( instance_id=CBT_INSTANCE) instance_method.assert_called_once_with('instance') instance_exists_method.assert_called_once_with() delete_method.assert_not_called() get_client.assert_called_once_with(project_id='example-project') @mock.patch('google.cloud.bigtable.instance.Instance.create') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_create_instance(self, get_client, instance_create): operation = mock.Mock() operation.result_return_value = Instance(instance_id=CBT_INSTANCE, client=get_client) instance_create.return_value = operation res = self.bigtable_hook_default_project_id.create_instance( instance_id=CBT_INSTANCE, main_cluster_id=CBT_CLUSTER, main_cluster_zone=CBT_ZONE) get_client.assert_called_once_with(project_id='example-project') instance_create.assert_called_once_with(clusters=mock.ANY) self.assertEqual(res.instance_id, 'instance') @mock.patch('google.cloud.bigtable.instance.Instance.create') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_create_instance_overridden_project_id(self, get_client, instance_create): operation = mock.Mock() operation.result_return_value = Instance(instance_id=CBT_INSTANCE, client=get_client) instance_create.return_value = operation res = self.bigtable_hook_default_project_id.create_instance( project_id='new-project', instance_id=CBT_INSTANCE, main_cluster_id=CBT_CLUSTER, main_cluster_zone=CBT_ZONE) get_client.assert_called_once_with(project_id='new-project') instance_create.assert_called_once_with(clusters=mock.ANY) self.assertEqual(res.instance_id, 'instance') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_table(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists table_delete_method = instance_method.return_value.table.return_value.delete instance_exists_method.return_value = True self.bigtable_hook_default_project_id.delete_table( instance_id=CBT_INSTANCE, table_id=CBT_TABLE) get_client.assert_called_once_with(project_id='example-project') instance_exists_method.assert_called_once_with() table_delete_method.assert_called_once_with() @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_delete_table_overridden_project_id(self, get_client): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists table_delete_method = instance_method.return_value.table.return_value.delete instance_exists_method.return_value = True self.bigtable_hook_default_project_id.delete_table( project_id='new-project', instance_id=CBT_INSTANCE, table_id=CBT_TABLE) get_client.assert_called_once_with(project_id='new-project') instance_exists_method.assert_called_once_with() table_delete_method.assert_called_once_with() @mock.patch('google.cloud.bigtable.table.Table.create') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_create_table(self, get_client, create): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.create_table( instance=instance, table_id=CBT_TABLE) get_client.assert_not_called() create.assert_called_once_with([], {}) @mock.patch('google.cloud.bigtable.cluster.Cluster.update') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_update_cluster(self, get_client, update): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.update_cluster( instance=instance, cluster_id=CBT_CLUSTER, nodes=4) get_client.assert_not_called() update.assert_called_once_with() @mock.patch('google.cloud.bigtable.table.Table.list_column_families') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_list_column_families(self, get_client, list_column_families): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) get_client.return_value = client instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.get_column_families_for_table( instance=instance, table_id=CBT_TABLE) get_client.assert_not_called() list_column_families.assert_called_once_with() @mock.patch('google.cloud.bigtable.table.Table.get_cluster_states') @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client') def test_get_cluster_states(self, get_client, get_cluster_states): instance_method = get_client.return_value.instance instance_exists_method = instance_method.return_value.exists instance_exists_method.return_value = True client = mock.Mock(Client) instance = google.cloud.bigtable.instance.Instance( instance_id=CBT_INSTANCE, client=client) self.bigtable_hook_default_project_id.get_cluster_states_for_table( instance=instance, table_id=CBT_TABLE) get_client.assert_not_called() get_cluster_states.assert_called_once_with()