Example #1
0
    def test_run_incompatible_schema(self, spark_session):
        cassandra_client = CassandraClient(host=["mock"],
                                           keyspace="dummy_keyspace")

        cassandra_client.sql = MagicMock(  # type: ignore
            return_value=[
                {
                    "column_name": "feature1",
                    "type": "text"
                },
                {
                    "column_name": "feature2",
                    "type": "bigint"
                },
            ])

        table = "table"

        input_dataframe = spark_session.sql(
            "select 'abc' as feature1, 1 as feature2")

        hook = CassandraTableSchemaCompatibilityHook(cassandra_client, table)

        # act and assert
        with pytest.raises(ValueError,
                           match="There's a schema incompatibility between"):
            hook.run(input_dataframe)
Example #2
0
    def test_run_compatible_schema(self, spark_session):
        cassandra_client = CassandraClient(host=["mock"],
                                           keyspace="dummy_keyspace")

        cassandra_client.sql = MagicMock(  # type: ignore
            return_value=[
                {
                    "column_name": "feature1",
                    "type": "text"
                },
                {
                    "column_name": "feature2",
                    "type": "int"
                },
            ])

        table = "table"

        input_dataframe = spark_session.sql(
            "select 'abc' as feature1, 1 as feature2")

        hook = CassandraTableSchemaCompatibilityHook(cassandra_client, table)

        # act and assert
        assert hook.run(input_dataframe) == input_dataframe
Example #3
0
    def test_cassandra_get_schema(self,
                                  cassandra_client: CassandraClient) -> None:
        cassandra_client.sql = MagicMock(  # type: ignore
            return_value=[
                {
                    "column_name": "feature1",
                    "type": "text"
                },
                {
                    "column_name": "feature2",
                    "type": "bigint"
                },
            ])

        table = "table"

        expected_query = (
            f"SELECT column_name, type FROM system_schema.columns "  # noqa
            f"WHERE keyspace_name = 'dummy_keyspace' "  # noqa
            f"AND table_name = '{table}';"  # noqa
        )

        cassandra_client.get_schema(table)
        query = cassandra_client.sql.call_args[0][0]

        assert sanitize_string(query) == sanitize_string(expected_query)
Example #4
0
    def test_cassandra_create_table(
        self,
        cassandra_client: CassandraClient,
        cassandra_feature_set: List[Dict[str, Any]],
    ) -> None:
        cassandra_client.sql = MagicMock()  # type: ignore

        columns: List[CassandraColumn] = [
            {
                "column_name": "id",
                "type": "int",
                "primary_key": True
            },
            {
                "column_name": "rent_per_month",
                "type": "float",
                "primary_key": False
            },
        ]
        table = "dummy_table"

        expected_query = """
            CREATE TABLE dummy_keyspace.dummy_table
            (id int, rent_per_month float, PRIMARY KEY (id));
                """

        cassandra_client.create_table(columns, table)
        query = cassandra_client.sql.call_args[0][0]

        assert sanitize_string(query) == sanitize_string(expected_query)
Example #5
0
    def test_cassandra_client_sql(
        self,
        cassandra_client: CassandraClient,
        cassandra_feature_set: List[Dict[str, Any]],
    ) -> None:
        cassandra_client.sql = MagicMock(  # type: ignore
            return_value=cassandra_feature_set)

        assert isinstance(
            cassandra_client.sql(
                "select feature1, feature2 from cassandra_feature_set"),
            list,
        )
        assert all(
            isinstance(elem, dict) for elem in cassandra_client.sql(
                "select feature1, feature2 from cassandra_feature_set"))
Example #6
0
    def test_conn(self, cassandra_client: CassandraClient) -> None:
        # arrange
        cassandra_client = CassandraClient(
            cassandra_host=["mock"], cassandra_key_space="dummy_keyspace")

        # act
        start_conn = cassandra_client._session

        # assert
        assert start_conn is None
 def __init__(self) -> None:
     self._db_config = CassandraConfig()
     super(CassandraMigration, self).__init__(
         CassandraClient(
             host=[self._db_config.host],
             keyspace=self._db_config.keyspace,  # type: ignore
             user=self._db_config.username,
             password=self._db_config.password,
         )
     )
Example #8
0
    def test_cassandra_without_session(
            self, cassandra_client: CassandraClient) -> None:
        cassandra_client = cassandra_client

        with pytest.raises(
                RuntimeError,
                match="There's no session available for this query."):
            cassandra_client.sql(
                query="select feature1, feature2 from cassandra_feature_set")
        with pytest.raises(
                RuntimeError,
                match="There's no session available for this query."):
            cassandra_client.create_table(
                [
                    {
                        "column_name": "id",
                        "type": "int",
                        "primary_key": True
                    },
                    {
                        "column_name": "rent_per_month",
                        "type": "float",
                        "primary_key": False,
                    },
                ],
                "test",
            )
        with pytest.raises(
                RuntimeError,
                match="There's no session available for this query."):
            cassandra_client.get_schema("test")
Example #9
0
def cassandra_client() -> CassandraClient:
    return CassandraClient(cassandra_host=["mock"],
                           cassandra_key_space="dummy_keyspace")