class BigtableTableWaitForReplicationSensor(BaseSensorOperator, BigtableValidationMixin):
    """
    Sensor that waits for Cloud Bigtable table to be fully replicated to its clusters.
    No exception will be raised if the instance or the table does not exist.

    For more details about cluster states for a table, have a look at the reference:
    https://googleapis.github.io/google-cloud-python/latest/bigtable/table.html#google.cloud.bigtable.table.Table.get_cluster_states

    :type project_id: str
    :param project_id: The ID of the GCP project.
    :type instance_id: str
    :param instance_id: The ID of the Cloud Bigtable instance.
    :type table_id: str
    :param table_id: The ID of the table to check replication status.
    """
    REQUIRED_ATTRIBUTES = ('project_id', 'instance_id', 'table_id')
    template_fields = ['project_id', 'instance_id', 'table_id']

    @apply_defaults
    def __init__(self,
                 project_id,
                 instance_id,
                 table_id,
                 *args, **kwargs):
        self.project_id = project_id
        self.instance_id = instance_id
        self.table_id = table_id
        self._validate_inputs()
        self.hook = BigtableHook()
        super(BigtableTableWaitForReplicationSensor, self).__init__(*args, **kwargs)

    def poke(self, context):
        instance = self.hook.get_instance(self.project_id, self.instance_id)
        if not instance:
            self.log.info("Dependency: instance '%s' does not exist.", self.instance_id)
            return False

        try:
            cluster_states = self.hook.get_cluster_states_for_table(instance, self.table_id)
        except google.api_core.exceptions.NotFound:
            self.log.info(
                "Dependency: table '%s' does not exist in instance '%s'.", self.table_id, self.instance_id)
            return False

        ready_state = ClusterState(enums.Table.ClusterState.ReplicationState.READY)

        is_table_replicated = True
        for cluster_id in cluster_states.keys():
            if cluster_states[cluster_id] != ready_state:
                self.log.info("Table '%s' is not yet replicated on cluster '%s'.", self.table_id, cluster_id)
                is_table_replicated = False

        if not is_table_replicated:
            return False

        self.log.info("Table '%s' is replicated.", self.table_id)
        return True
Beispiel #2
0
class BigtableTableWaitForReplicationSensor(BaseSensorOperator,
                                            BigtableValidationMixin):
    """
    Sensor that waits for Cloud Bigtable table to be fully replicated to its clusters.
    No exception will be raised if the instance or the table does not exist.

    For more details about cluster states for a table, have a look at the reference:
    https://googleapis.github.io/google-cloud-python/latest/bigtable/table.html#google.cloud.bigtable.table.Table.get_cluster_states

    .. seealso::
        For more information on how to use this operator, take a look at the guide:
        :ref:`howto/operator:BigtableTableWaitForReplicationSensor`

    :type instance_id: str
    :param instance_id: The ID of the Cloud Bigtable instance.
    :type table_id: str
    :param table_id: The ID of the table to check replication status.
    :type project_id: str
    :param project_id: Optional, the ID of the GCP project.
    """
    REQUIRED_ATTRIBUTES = ('instance_id', 'table_id')
    template_fields = ['project_id', 'instance_id', 'table_id']

    @apply_defaults
    def __init__(self,
                 instance_id,
                 table_id,
                 project_id=None,
                 *args,
                 **kwargs):
        self.project_id = project_id
        self.instance_id = instance_id
        self.table_id = table_id
        self._validate_inputs()
        self.hook = BigtableHook()
        super(BigtableTableWaitForReplicationSensor,
              self).__init__(*args, **kwargs)

    def poke(self, context):
        instance = self.hook.get_instance(project_id=self.project_id,
                                          instance_id=self.instance_id)
        if not instance:
            self.log.info("Dependency: instance '%s' does not exist.",
                          self.instance_id)
            return False

        try:
            cluster_states = self.hook.get_cluster_states_for_table(
                instance=instance, table_id=self.table_id)
        except google.api_core.exceptions.NotFound:
            self.log.info(
                "Dependency: table '%s' does not exist in instance '%s'.",
                self.table_id, self.instance_id)
            return False

        ready_state = ClusterState(
            enums.Table.ClusterState.ReplicationState.READY)

        is_table_replicated = True
        for cluster_id in cluster_states.keys():
            if cluster_states[cluster_id] != ready_state:
                self.log.info(
                    "Table '%s' is not yet replicated on cluster '%s'.",
                    self.table_id, cluster_id)
                is_table_replicated = False

        if not is_table_replicated:
            return False

        self.log.info("Table '%s' is replicated.", self.table_id)
        return True
Beispiel #3
0
class TestBigtableHookDefaultProjectId(unittest.TestCase):
    def setUp(self):
        with mock.patch(
                'airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.__init__',
                new=mock_base_gcp_hook_default_project_id):
            self.bigtable_hook_default_project_id = BigtableHook(
                gcp_conn_id='test')

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        res = self.bigtable_hook_default_project_id.get_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='example-project')
        self.assertIsNotNone(res)

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_instance_overridden_project_id(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        res = self.bigtable_hook_default_project_id.get_instance(
            project_id='new-project', instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='new-project')
        self.assertIsNotNone(res)

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_instance_no_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = False
        res = self.bigtable_hook_default_project_id.get_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='example-project')
        self.assertIsNone(res)

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        delete_method = instance_method.return_value.delete
        res = self.bigtable_hook_default_project_id.delete_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        delete_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='example-project')
        self.assertIsNone(res)

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_instance_overridden_project_id(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        delete_method = instance_method.return_value.delete
        res = self.bigtable_hook_default_project_id.delete_instance(
            project_id='new-project', instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        delete_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='new-project')
        self.assertIsNone(res)

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_instance_no_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = False
        delete_method = instance_method.return_value.delete
        self.bigtable_hook_default_project_id.delete_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        delete_method.assert_not_called()
        get_client.assert_called_once_with(project_id='example-project')

    @mock.patch('google.cloud.bigtable.instance.Instance.create')
    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_create_instance(self, get_client, instance_create):
        operation = mock.Mock()
        operation.result_return_value = Instance(instance_id=CBT_INSTANCE,
                                                 client=get_client)
        instance_create.return_value = operation
        res = self.bigtable_hook_default_project_id.create_instance(
            instance_id=CBT_INSTANCE,
            main_cluster_id=CBT_CLUSTER,
            main_cluster_zone=CBT_ZONE)
        get_client.assert_called_once_with(project_id='example-project')
        instance_create.assert_called_once_with(clusters=mock.ANY)
        self.assertEqual(res.instance_id, 'instance')

    @mock.patch('google.cloud.bigtable.instance.Instance.create')
    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_create_instance_overridden_project_id(self, get_client,
                                                   instance_create):
        operation = mock.Mock()
        operation.result_return_value = Instance(instance_id=CBT_INSTANCE,
                                                 client=get_client)
        instance_create.return_value = operation
        res = self.bigtable_hook_default_project_id.create_instance(
            project_id='new-project',
            instance_id=CBT_INSTANCE,
            main_cluster_id=CBT_CLUSTER,
            main_cluster_zone=CBT_ZONE)
        get_client.assert_called_once_with(project_id='new-project')
        instance_create.assert_called_once_with(clusters=mock.ANY)
        self.assertEqual(res.instance_id, 'instance')

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_table(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        table_delete_method = instance_method.return_value.table.return_value.delete
        instance_exists_method.return_value = True
        self.bigtable_hook_default_project_id.delete_table(
            instance_id=CBT_INSTANCE, table_id=CBT_TABLE)
        get_client.assert_called_once_with(project_id='example-project')
        instance_exists_method.assert_called_once_with()
        table_delete_method.assert_called_once_with()

    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_table_overridden_project_id(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        table_delete_method = instance_method.return_value.table.return_value.delete
        instance_exists_method.return_value = True
        self.bigtable_hook_default_project_id.delete_table(
            project_id='new-project',
            instance_id=CBT_INSTANCE,
            table_id=CBT_TABLE)
        get_client.assert_called_once_with(project_id='new-project')
        instance_exists_method.assert_called_once_with()
        table_delete_method.assert_called_once_with()

    @mock.patch('google.cloud.bigtable.table.Table.create')
    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_create_table(self, get_client, create):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE, client=client)
        self.bigtable_hook_default_project_id.create_table(instance=instance,
                                                           table_id=CBT_TABLE)
        get_client.assert_not_called()
        create.assert_called_once_with([], {})

    @mock.patch('google.cloud.bigtable.cluster.Cluster.update')
    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_update_cluster(self, get_client, update):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE, client=client)
        self.bigtable_hook_default_project_id.update_cluster(
            instance=instance, cluster_id=CBT_CLUSTER, nodes=4)
        get_client.assert_not_called()
        update.assert_called_once_with()

    @mock.patch('google.cloud.bigtable.table.Table.list_column_families')
    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_list_column_families(self, get_client, list_column_families):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        get_client.return_value = client
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE, client=client)
        self.bigtable_hook_default_project_id.get_column_families_for_table(
            instance=instance, table_id=CBT_TABLE)
        get_client.assert_not_called()
        list_column_families.assert_called_once_with()

    @mock.patch('google.cloud.bigtable.table.Table.get_cluster_states')
    @mock.patch(
        'airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_cluster_states(self, get_client, get_cluster_states):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE, client=client)
        self.bigtable_hook_default_project_id.get_cluster_states_for_table(
            instance=instance, table_id=CBT_TABLE)
        get_client.assert_not_called()
        get_cluster_states.assert_called_once_with()
class TestBigtableHookDefaultProjectId(unittest.TestCase):

    def setUp(self):
        with mock.patch('airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.__init__',
                        new=mock_base_gcp_hook_default_project_id):
            self.bigtable_hook_default_project_id = BigtableHook(gcp_conn_id='test')

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        res = self.bigtable_hook_default_project_id.get_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='example-project')
        self.assertIsNotNone(res)

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_instance_overridden_project_id(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        res = self.bigtable_hook_default_project_id.get_instance(
            project_id='new-project',
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='new-project')
        self.assertIsNotNone(res)

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_instance_no_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = False
        res = self.bigtable_hook_default_project_id.get_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='example-project')
        self.assertIsNone(res)

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        delete_method = instance_method.return_value.delete
        res = self.bigtable_hook_default_project_id.delete_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        delete_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='example-project')
        self.assertIsNone(res)

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_instance_overridden_project_id(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        delete_method = instance_method.return_value.delete
        res = self.bigtable_hook_default_project_id.delete_instance(
            project_id='new-project', instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        delete_method.assert_called_once_with()
        get_client.assert_called_once_with(project_id='new-project')
        self.assertIsNone(res)

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_instance_no_instance(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = False
        delete_method = instance_method.return_value.delete
        self.bigtable_hook_default_project_id.delete_instance(
            instance_id=CBT_INSTANCE)
        instance_method.assert_called_once_with('instance')
        instance_exists_method.assert_called_once_with()
        delete_method.assert_not_called()
        get_client.assert_called_once_with(project_id='example-project')

    @mock.patch('google.cloud.bigtable.instance.Instance.create')
    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_create_instance(self, get_client, instance_create):
        operation = mock.Mock()
        operation.result_return_value = Instance(instance_id=CBT_INSTANCE, client=get_client)
        instance_create.return_value = operation
        res = self.bigtable_hook_default_project_id.create_instance(
            instance_id=CBT_INSTANCE,
            main_cluster_id=CBT_CLUSTER,
            main_cluster_zone=CBT_ZONE)
        get_client.assert_called_once_with(project_id='example-project')
        instance_create.assert_called_once_with(clusters=mock.ANY)
        self.assertEqual(res.instance_id, 'instance')

    @mock.patch('google.cloud.bigtable.instance.Instance.create')
    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_create_instance_overridden_project_id(self, get_client, instance_create):
        operation = mock.Mock()
        operation.result_return_value = Instance(instance_id=CBT_INSTANCE, client=get_client)
        instance_create.return_value = operation
        res = self.bigtable_hook_default_project_id.create_instance(
            project_id='new-project',
            instance_id=CBT_INSTANCE,
            main_cluster_id=CBT_CLUSTER,
            main_cluster_zone=CBT_ZONE)
        get_client.assert_called_once_with(project_id='new-project')
        instance_create.assert_called_once_with(clusters=mock.ANY)
        self.assertEqual(res.instance_id, 'instance')

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_table(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        table_delete_method = instance_method.return_value.table.return_value.delete
        instance_exists_method.return_value = True
        self.bigtable_hook_default_project_id.delete_table(
            instance_id=CBT_INSTANCE,
            table_id=CBT_TABLE)
        get_client.assert_called_once_with(project_id='example-project')
        instance_exists_method.assert_called_once_with()
        table_delete_method.assert_called_once_with()

    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_delete_table_overridden_project_id(self, get_client):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        table_delete_method = instance_method.return_value.table.return_value.delete
        instance_exists_method.return_value = True
        self.bigtable_hook_default_project_id.delete_table(
            project_id='new-project',
            instance_id=CBT_INSTANCE,
            table_id=CBT_TABLE)
        get_client.assert_called_once_with(project_id='new-project')
        instance_exists_method.assert_called_once_with()
        table_delete_method.assert_called_once_with()

    @mock.patch('google.cloud.bigtable.table.Table.create')
    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_create_table(self, get_client, create):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE,
            client=client)
        self.bigtable_hook_default_project_id.create_table(
            instance=instance,
            table_id=CBT_TABLE)
        get_client.assert_not_called()
        create.assert_called_once_with([], {})

    @mock.patch('google.cloud.bigtable.cluster.Cluster.update')
    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_update_cluster(self, get_client, update):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE,
            client=client)
        self.bigtable_hook_default_project_id.update_cluster(
            instance=instance,
            cluster_id=CBT_CLUSTER,
            nodes=4)
        get_client.assert_not_called()
        update.assert_called_once_with()

    @mock.patch('google.cloud.bigtable.table.Table.list_column_families')
    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_list_column_families(self, get_client, list_column_families):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        get_client.return_value = client
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE,
            client=client)
        self.bigtable_hook_default_project_id.get_column_families_for_table(
            instance=instance, table_id=CBT_TABLE)
        get_client.assert_not_called()
        list_column_families.assert_called_once_with()

    @mock.patch('google.cloud.bigtable.table.Table.get_cluster_states')
    @mock.patch('airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook._get_client')
    def test_get_cluster_states(self, get_client, get_cluster_states):
        instance_method = get_client.return_value.instance
        instance_exists_method = instance_method.return_value.exists
        instance_exists_method.return_value = True
        client = mock.Mock(Client)
        instance = google.cloud.bigtable.instance.Instance(
            instance_id=CBT_INSTANCE,
            client=client)
        self.bigtable_hook_default_project_id.get_cluster_states_for_table(
            instance=instance, table_id=CBT_TABLE)
        get_client.assert_not_called()
        get_cluster_states.assert_called_once_with()