예제 #1
0
 def execute(self, context):
     """
     Call the SparkSubmitHook to run the provided spark job
     """
     self._hook = SparkJDBCHook(
         spark_app_name=self._spark_app_name,
         spark_conn_id=self._spark_conn_id,
         spark_conf=self._spark_conf,
         spark_py_files=self._spark_py_files,
         spark_files=self._spark_files,
         spark_jars=self._spark_jars,
         num_executors=self._num_executors,
         executor_cores=self._executor_cores,
         executor_memory=self._executor_memory,
         driver_memory=self._driver_memory,
         verbose=self._verbose,
         keytab=self._keytab,
         principal=self._principal,
         cmd_type=self._cmd_type,
         jdbc_table=self._jdbc_table,
         jdbc_conn_id=self._jdbc_conn_id,
         jdbc_driver=self._jdbc_driver,
         metastore_table=self._metastore_table,
         jdbc_truncate=self._jdbc_truncate,
         save_mode=self._save_mode,
         save_format=self._save_format,
         batch_size=self._batch_size,
         fetch_size=self._fetch_size,
         num_partitions=self._num_partitions,
         partition_column=self._partition_column,
         lower_bound=self._lower_bound,
         upper_bound=self._upper_bound,
         create_table_column_types=self._create_table_column_types)
     self._hook.submit_jdbc_job()
예제 #2
0
    def test_build_jdbc_arguments(self):
        # Given
        hook = SparkJDBCHook(**self._config)

        # When
        cmd = hook._build_jdbc_application_arguments(hook._resolve_jdbc_connection())

        # Then
        expected_jdbc_arguments = [
            '-cmdType', 'spark_to_jdbc',
            '-url', 'jdbc:postgresql://localhost:5432/default',
            '-user', 'user',
            '-password', 'supersecret',
            '-metastoreTable', 'hiveMcHiveFace',
            '-jdbcTable', 'tableMcTableFace',
            '-jdbcDriver', 'org.postgresql.Driver',
            '-batchsize', '100',
            '-fetchsize', '200',
            '-numPartitions', '10',
            '-partitionColumn', 'columnMcColumnFace',
            '-lowerBound', '10',
            '-upperBound', '20',
            '-saveMode', 'append',
            '-saveFormat', 'parquet',
            '-createTableColumnTypes', 'columnMcColumnFace INTEGER(100), name CHAR(64),'
                                       'comments VARCHAR(1024)'
        ]
        self.assertEquals(expected_jdbc_arguments, cmd)
예제 #3
0
    def test_resolve_jdbc_connection(self):
        # Given
        hook = SparkJDBCHook(jdbc_conn_id='jdbc-default')
        expected_connection = {
            'url': 'localhost:5432',
            'schema': 'default',
            'conn_prefix': 'jdbc:postgresql://',
            'user': '******',
            'password': '******'
        }

        # When
        connection = hook._resolve_jdbc_connection()

        # Then
        self.assertEqual(connection, expected_connection)
예제 #4
0
    def test_build_jdbc_arguments_invalid(self):
        # Given
        hook = SparkJDBCHook(**self._invalid_config)

        # Expect Exception
        hook._build_jdbc_application_arguments(hook._resolve_jdbc_connection())