def execute(self, context): """ Call the SparkSubmitHook to run the provided spark job """ self._hook = SparkJDBCHook( spark_app_name=self._spark_app_name, spark_conn_id=self._spark_conn_id, spark_conf=self._spark_conf, spark_py_files=self._spark_py_files, spark_files=self._spark_files, spark_jars=self._spark_jars, num_executors=self._num_executors, executor_cores=self._executor_cores, executor_memory=self._executor_memory, driver_memory=self._driver_memory, verbose=self._verbose, keytab=self._keytab, principal=self._principal, cmd_type=self._cmd_type, jdbc_table=self._jdbc_table, jdbc_conn_id=self._jdbc_conn_id, jdbc_driver=self._jdbc_driver, metastore_table=self._metastore_table, jdbc_truncate=self._jdbc_truncate, save_mode=self._save_mode, save_format=self._save_format, batch_size=self._batch_size, fetch_size=self._fetch_size, num_partitions=self._num_partitions, partition_column=self._partition_column, lower_bound=self._lower_bound, upper_bound=self._upper_bound, create_table_column_types=self._create_table_column_types) self._hook.submit_jdbc_job()
def test_build_jdbc_arguments(self): # Given hook = SparkJDBCHook(**self._config) # When cmd = hook._build_jdbc_application_arguments(hook._resolve_jdbc_connection()) # Then expected_jdbc_arguments = [ '-cmdType', 'spark_to_jdbc', '-url', 'jdbc:postgresql://localhost:5432/default', '-user', 'user', '-password', 'supersecret', '-metastoreTable', 'hiveMcHiveFace', '-jdbcTable', 'tableMcTableFace', '-jdbcDriver', 'org.postgresql.Driver', '-batchsize', '100', '-fetchsize', '200', '-numPartitions', '10', '-partitionColumn', 'columnMcColumnFace', '-lowerBound', '10', '-upperBound', '20', '-saveMode', 'append', '-saveFormat', 'parquet', '-createTableColumnTypes', 'columnMcColumnFace INTEGER(100), name CHAR(64),' 'comments VARCHAR(1024)' ] self.assertEquals(expected_jdbc_arguments, cmd)
def test_resolve_jdbc_connection(self): # Given hook = SparkJDBCHook(jdbc_conn_id='jdbc-default') expected_connection = { 'url': 'localhost:5432', 'schema': 'default', 'conn_prefix': 'jdbc:postgresql://', 'user': '******', 'password': '******' } # When connection = hook._resolve_jdbc_connection() # Then self.assertEqual(connection, expected_connection)
def test_build_jdbc_arguments_invalid(self): # Given hook = SparkJDBCHook(**self._invalid_config) # Expect Exception hook._build_jdbc_application_arguments(hook._resolve_jdbc_connection())