Example #1
0
 def test_poke(self, mock_hook):
     sensor = CassandraTableSensor(
         task_id='test_task',
         cassandra_conn_id='cassandra_default',
         table='t',
     )
     sensor.poke(None)
     mock_hook.return_value.table_exists.assert_called_once_with('t')
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
     self.sensor = CassandraTableSensor(
         task_id='test_task',
         cassandra_conn_id='cassandra_default',
         dag=self.dag,
         table='t',
     )
Example #3
0
    def test_poke(self, mock_hook):
        sensor = CassandraTableSensor(
            task_id='test_task',
            cassandra_conn_id=TEST_CASSANDRA_CONN_ID,
            table=TEST_CASSANDRA_TABLE,
        )
        exists = sensor.poke(dict())

        assert exists

        mock_hook.return_value.table_exists.assert_called_once_with(
            TEST_CASSANDRA_TABLE)
        mock_hook.assert_called_once_with(TEST_CASSANDRA_CONN_ID)
Example #4
0
    def test_poke_should_succeed_for_table_with_mentioned_keyspace(
            self, mock_hook):
        sensor = CassandraTableSensor(
            task_id='test_task',
            cassandra_conn_id=TEST_CASSANDRA_CONN_ID,
            table=TEST_CASSANDRA_TABLE_WITH_KEYSPACE,
        )
        exists = sensor.poke(dict())

        assert exists

        mock_hook.return_value.table_exists.assert_called_once_with(
            TEST_CASSANDRA_TABLE_WITH_KEYSPACE)
        mock_hook.assert_called_once_with(TEST_CASSANDRA_CONN_ID)
Example #5
0
    def test_poke_should_return_false_for_non_existing_table(self, mock_hook):
        mock_hook.return_value.table_exists.return_value = False

        sensor = CassandraTableSensor(
            task_id='test_task',
            cassandra_conn_id=TEST_CASSANDRA_CONN_ID,
            table=TEST_CASSANDRA_TABLE,
        )
        exists = sensor.poke(dict())

        assert not exists

        mock_hook.return_value.table_exists.assert_called_once_with(
            TEST_CASSANDRA_TABLE)
        mock_hook.assert_called_once_with(TEST_CASSANDRA_CONN_ID)
class TestCassandraTableSensor(unittest.TestCase):
    def setUp(self):
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        self.dag = DAG('test_dag_id', default_args=args)
        self.sensor = CassandraTableSensor(
            task_id='test_task',
            cassandra_conn_id='cassandra_default',
            dag=self.dag,
            table='t',
        )

    @patch("airflow.contrib.hooks.cassandra_hook.CassandraHook.table_exists")
    def test_poke(self, mock_table_exists):
        self.sensor.poke(None)
        mock_table_exists.assert_called_once_with('t')
def cassandra_to_avro():
    # @task
    def load_from_cassandra() -> List[Tuple[str, str]]:
        conn: Connection = Connection.get_connection_from_secrets('local_cassandra')
        auth_provider = PlainTextAuthProvider(username=conn.login, password=conn.password)
        cluster: Cluster = Cluster([conn.host], conn.port, auth_provider=auth_provider)
        session: Session = cluster.connect(conn.schema)
        rows: ResultSet = session.execute("SELECT title, description FROM videos")
        result = list(map(lambda row: (row[0], row[1]), rows))
        print(result)
        return result
    
    # @task
    def write_to_hdfs(rows: List[Tuple[str, str]]):
        conn: Connection = Connection.get_connection_from_secrets('local_hdfs')
        uri = conn.get_uri()
        pat = re.compile("http://(\w+(:\w+)?)?@")
        print(conn.get_uri())

        uri = pat.sub("http://", uri)
        print(uri)
        print(conn.login)
        client = InsecureClient(uri, user=conn.login)
        sch = avro.schema.make_avsc_object({
            'type':'record',
            'name':'Video',
            'fields': [
                {'type': {'type': 'string', 'avro.java.string': 'String'}, 'name': 'title'},
                {'type': ["null", {'type': 'string', 'avro.java.string': 'String'}], 'name': 'description'},
            ]
        })
        local_file_name = 'videos.avro'
        writer = DataFileWriter(open(local_file_name, "wb"), DatumWriter(), sch)
        for row in rows:
            print(row)
            writer.append({"title":row[0], "description":row[1]})
        writer.close()
        client.upload('/tmp/videos.avro', local_file_name)

    load_and_save_using_spark = SparkSubmitOperator(
        task_id="cassandra_to_avro_spark",
        conn_id="spark_local",
        name="cassandra_to_avro_spark",
        application="dags/cassandra_to_avro_spark.py",
        packages="org.apache.spark:spark-avro_2.12:3.1.1,com.datastax.spark:spark-cassandra-connector_2.12:3.0.0",
    )
        
    # ctx = get_current_context()
    table_sensor = CassandraTableSensor(
        task_id="cassandra_table_sensor",
        cassandra_conn_id='local_cassandra',
        table="killrvideo.videos",
    )

    # load = load_from_cassandra()
    # write_to_hdfs(load)
    table_sensor >> load_and_save_using_spark
Example #8
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example Airflow DAG to check if a Cassandra Table and a Records exists
or not using `CassandraTableSensor` and `CassandraRecordSensor`.
"""
from datetime import datetime

from airflow.models import DAG
from airflow.providers.apache.cassandra.sensors.record import CassandraRecordSensor
from airflow.providers.apache.cassandra.sensors.table import CassandraTableSensor

# [START howto_operator_cassandra_sensors]
with DAG(
        dag_id='example_cassandra_operator',
        schedule_interval=None,
        start_date=datetime(2021, 1, 1),
        default_args={'table': 'keyspace_name.table_name'},
        catchup=False,
        tags=['example'],
) as dag:
    table_sensor = CassandraTableSensor(task_id="cassandra_table_sensor")

    record_sensor = CassandraRecordSensor(task_id="cassandra_record_sensor",
                                          keys={
                                              "p1": "v1",
                                              "p2": "v2"
                                          })
# [END howto_operator_cassandra_sensors]
Example #9
0
args = {
    'owner': 'Airflow',
}

with DAG(
        dag_id='example_cassandra_operator',
        default_args=args,
        schedule_interval=None,
        start_date=days_ago(2),
        tags=['example'],
) as dag:
    # [START howto_operator_cassandra_table_sensor]
    table_sensor = CassandraTableSensor(
        task_id="cassandra_table_sensor",
        cassandra_conn_id="cassandra_default",
        table="keyspace_name.table_name",
    )
    # [END howto_operator_cassandra_table_sensor]

    # [START howto_operator_cassandra_record_sensor]
    record_sensor = CassandraRecordSensor(
        task_id="cassandra_record_sensor",
        cassandra_conn_id="cassandra_default",
        table="keyspace_name.table_name",
        keys={
            "p1": "v1",
            "p2": "v2"
        },
    )
    # [END howto_operator_cassandra_record_sensor]