def test_poke(self, mock_hook): sensor = CassandraTableSensor( task_id='test_task', cassandra_conn_id='cassandra_default', table='t', ) sensor.poke(None) mock_hook.return_value.table_exists.assert_called_once_with('t')
def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG('test_dag_id', default_args=args) self.sensor = CassandraTableSensor( task_id='test_task', cassandra_conn_id='cassandra_default', dag=self.dag, table='t', )
def test_poke(self, mock_hook): sensor = CassandraTableSensor( task_id='test_task', cassandra_conn_id=TEST_CASSANDRA_CONN_ID, table=TEST_CASSANDRA_TABLE, ) exists = sensor.poke(dict()) assert exists mock_hook.return_value.table_exists.assert_called_once_with( TEST_CASSANDRA_TABLE) mock_hook.assert_called_once_with(TEST_CASSANDRA_CONN_ID)
def test_poke_should_succeed_for_table_with_mentioned_keyspace( self, mock_hook): sensor = CassandraTableSensor( task_id='test_task', cassandra_conn_id=TEST_CASSANDRA_CONN_ID, table=TEST_CASSANDRA_TABLE_WITH_KEYSPACE, ) exists = sensor.poke(dict()) assert exists mock_hook.return_value.table_exists.assert_called_once_with( TEST_CASSANDRA_TABLE_WITH_KEYSPACE) mock_hook.assert_called_once_with(TEST_CASSANDRA_CONN_ID)
def test_poke_should_return_false_for_non_existing_table(self, mock_hook): mock_hook.return_value.table_exists.return_value = False sensor = CassandraTableSensor( task_id='test_task', cassandra_conn_id=TEST_CASSANDRA_CONN_ID, table=TEST_CASSANDRA_TABLE, ) exists = sensor.poke(dict()) assert not exists mock_hook.return_value.table_exists.assert_called_once_with( TEST_CASSANDRA_TABLE) mock_hook.assert_called_once_with(TEST_CASSANDRA_CONN_ID)
class TestCassandraTableSensor(unittest.TestCase): def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG('test_dag_id', default_args=args) self.sensor = CassandraTableSensor( task_id='test_task', cassandra_conn_id='cassandra_default', dag=self.dag, table='t', ) @patch("airflow.contrib.hooks.cassandra_hook.CassandraHook.table_exists") def test_poke(self, mock_table_exists): self.sensor.poke(None) mock_table_exists.assert_called_once_with('t')
def cassandra_to_avro(): # @task def load_from_cassandra() -> List[Tuple[str, str]]: conn: Connection = Connection.get_connection_from_secrets('local_cassandra') auth_provider = PlainTextAuthProvider(username=conn.login, password=conn.password) cluster: Cluster = Cluster([conn.host], conn.port, auth_provider=auth_provider) session: Session = cluster.connect(conn.schema) rows: ResultSet = session.execute("SELECT title, description FROM videos") result = list(map(lambda row: (row[0], row[1]), rows)) print(result) return result # @task def write_to_hdfs(rows: List[Tuple[str, str]]): conn: Connection = Connection.get_connection_from_secrets('local_hdfs') uri = conn.get_uri() pat = re.compile("http://(\w+(:\w+)?)?@") print(conn.get_uri()) uri = pat.sub("http://", uri) print(uri) print(conn.login) client = InsecureClient(uri, user=conn.login) sch = avro.schema.make_avsc_object({ 'type':'record', 'name':'Video', 'fields': [ {'type': {'type': 'string', 'avro.java.string': 'String'}, 'name': 'title'}, {'type': ["null", {'type': 'string', 'avro.java.string': 'String'}], 'name': 'description'}, ] }) local_file_name = 'videos.avro' writer = DataFileWriter(open(local_file_name, "wb"), DatumWriter(), sch) for row in rows: print(row) writer.append({"title":row[0], "description":row[1]}) writer.close() client.upload('/tmp/videos.avro', local_file_name) load_and_save_using_spark = SparkSubmitOperator( task_id="cassandra_to_avro_spark", conn_id="spark_local", name="cassandra_to_avro_spark", application="dags/cassandra_to_avro_spark.py", packages="org.apache.spark:spark-avro_2.12:3.1.1,com.datastax.spark:spark-cassandra-connector_2.12:3.0.0", ) # ctx = get_current_context() table_sensor = CassandraTableSensor( task_id="cassandra_table_sensor", cassandra_conn_id='local_cassandra', table="killrvideo.videos", ) # load = load_from_cassandra() # write_to_hdfs(load) table_sensor >> load_and_save_using_spark
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example Airflow DAG to check if a Cassandra Table and a Records exists or not using `CassandraTableSensor` and `CassandraRecordSensor`. """ from datetime import datetime from airflow.models import DAG from airflow.providers.apache.cassandra.sensors.record import CassandraRecordSensor from airflow.providers.apache.cassandra.sensors.table import CassandraTableSensor # [START howto_operator_cassandra_sensors] with DAG( dag_id='example_cassandra_operator', schedule_interval=None, start_date=datetime(2021, 1, 1), default_args={'table': 'keyspace_name.table_name'}, catchup=False, tags=['example'], ) as dag: table_sensor = CassandraTableSensor(task_id="cassandra_table_sensor") record_sensor = CassandraRecordSensor(task_id="cassandra_record_sensor", keys={ "p1": "v1", "p2": "v2" }) # [END howto_operator_cassandra_sensors]
args = { 'owner': 'Airflow', } with DAG( dag_id='example_cassandra_operator', default_args=args, schedule_interval=None, start_date=days_ago(2), tags=['example'], ) as dag: # [START howto_operator_cassandra_table_sensor] table_sensor = CassandraTableSensor( task_id="cassandra_table_sensor", cassandra_conn_id="cassandra_default", table="keyspace_name.table_name", ) # [END howto_operator_cassandra_table_sensor] # [START howto_operator_cassandra_record_sensor] record_sensor = CassandraRecordSensor( task_id="cassandra_record_sensor", cassandra_conn_id="cassandra_default", table="keyspace_name.table_name", keys={ "p1": "v1", "p2": "v2" }, ) # [END howto_operator_cassandra_record_sensor]