def test_named_hive_partition_sensor(self): op = NamedHivePartitionSensor( task_id='hive_partition_check', partition_names=["airflow.static_babynames_partitioned/ds={{ds}}"], dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_poke_non_existing(self): partitions = [ "{}.{}/{}={}".format(self.database, self.table, self.partition_by, self.next_day) ] sensor = NamedHivePartitionSensor(partition_names=partitions, task_id='test_poke_non_existing', poke_interval=1, hook=self.hook, dag=self.dag) self.assertFalse(sensor.poke(None))
def test_times_out_on_nonexistent_partition(self): with self.assertRaises(AirflowSensorTimeout): op = NamedHivePartitionSensor( task_id='hive_partition_check', partition_names=[ "airflow.static_babynames_partitioned/ds={{ds}}", "airflow.static_babynames_partitioned/ds=nonexistent" ], poke_interval=0.1, timeout=1, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_named_hive_partition_sensor_parses_partitions_with_periods(self): name = NamedHivePartitionSensor.parse_partition_name( partition="schema.table/part1=this.can.be.an.issue/part2=ok") self.assertEqual(name[0], "schema") self.assertEqual(name[1], "table") self.assertEqual(name[2], "part1=this.can.be.an.issue/part2=this_should_be_ok")
def test_poke_non_existing(self): self.hook.metastore.__enter__( ).check_for_named_partition.return_value = False partitions = [ "{}.{}/{}={}".format(self.database, self.table, self.partition_by, self.next_day) ] sensor = NamedHivePartitionSensor(partition_names=partitions, task_id='test_poke_non_existing', poke_interval=1, hook=self.hook, dag=self.dag) self.assertFalse(sensor.poke(None)) self.hook.metastore.__enter__( ).check_for_named_partition.assert_called_with( self.database, self.table, f"{self.partition_by}={self.next_day}")
def test_succeeds_on_one_partition(self): mock_hive_metastore_hook = MockHiveMetastoreHook() mock_hive_metastore_hook.check_for_named_partition = mock.MagicMock( return_value=True) op = NamedHivePartitionSensor( task_id='hive_partition_check', partition_names=["airflow.static_babynames_partitioned/ds={{ds}}"], dag=self.dag, hook=mock_hive_metastore_hook) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_hive_metastore_hook.check_for_named_partition.assert_called_once_with( 'airflow', 'static_babynames_partitioned', 'ds=2015-01-01')
def test_parse_partition_name_default(self): table = 'users' partition = 'ds=2016-01-01/state=IT' name = '{table}/{partition}'.format(table=table, partition=partition) parsed_schema, parsed_table, parsed_partition = ( NamedHivePartitionSensor.parse_partition_name(name)) self.assertEqual('default', parsed_schema) self.assertEqual(table, parsed_table) self.assertEqual(partition, parsed_partition)
def test_parse_partition_name_default(self): table = 'users' partition = 'ds=2016-01-01/state=IT' name = f'{table}/{partition}' parsed_schema, parsed_table, parsed_partition = NamedHivePartitionSensor.parse_partition_name( name) assert 'default' == parsed_schema assert table == parsed_table assert partition == parsed_partition
def test_poke_existing(self): self.hook.metastore.__enter__( ).check_for_named_partition.return_value = True partitions = [ f"{self.database}.{self.table}/{self.partition_by}={DEFAULT_DATE_DS}" ] sensor = NamedHivePartitionSensor( partition_names=partitions, task_id='test_poke_existing', poke_interval=1, hook=self.hook, dag=self.dag, ) assert sensor.poke(None) self.hook.metastore.__enter__( ).check_for_named_partition.assert_called_with( self.database, self.table, f"{self.partition_by}={DEFAULT_DATE_DS}")
def test_times_out_on_nonexistent_partition(self): with self.assertRaises(AirflowSensorTimeout): mock_hive_metastore_hook = MockHiveMetastoreHook() mock_hive_metastore_hook.check_for_named_partition = mock.MagicMock( return_value=False) op = NamedHivePartitionSensor( task_id='hive_partition_check', partition_names=[ "airflow.static_babynames_partitioned/ds={{ds}}", "airflow.static_babynames_partitioned/ds=nonexistent" ], poke_interval=0.1, timeout=1, dag=self.dag, hook=mock_hive_metastore_hook) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_parse_partition_name_correct(self): schema = 'default' table = 'users' partition = 'ds=2016-01-01/state=IT' name = f'{schema}.{table}/{partition}' parsed_schema, parsed_table, parsed_partition = NamedHivePartitionSensor.parse_partition_name( name) self.assertEqual(schema, parsed_schema) self.assertEqual(table, parsed_table) self.assertEqual(partition, parsed_partition)
def test_get_classpath(self): # Test the classpath in/out airflow obj1 = NamedHivePartitionSensor(partition_names=['test_partition'], task_id='meta_partition_test_1') obj1_classpath = SensorInstance.get_classpath(obj1) obj1_importpath = ( "airflow.providers.apache.hive.sensors.named_hive_partition.NamedHivePartitionSensor" ) assert obj1_classpath == obj1_importpath def test_callable(): return obj3 = PythonSensor(python_callable=test_callable, task_id='python_sensor_test') obj3_classpath = SensorInstance.get_classpath(obj3) obj3_importpath = "airflow.sensors.python.PythonSensor" assert obj3_classpath == obj3_importpath
def test_parse_partition_name_incorrect(self): name = 'incorrect.name' with self.assertRaises(ValueError): NamedHivePartitionSensor.parse_partition_name(name)
def test_parses_partitions_with_periods(self): name = NamedHivePartitionSensor.parse_partition_name( partition="schema.table/part1=this.can.be.an.issue/part2=ok") assert name[0] == "schema" assert name[1] == "table" assert name[2] == "part1=this.can.be.an.issue/part2=ok"