def setUp(self):
     self._upload_dataframe()
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
     self.database = 'airflow'
     self.table = 'hive_server_hook'
     self.hql = """
     CREATE DATABASE IF NOT EXISTS {{ params.database }};
     USE {{ params.database }};
     DROP TABLE IF EXISTS {{ params.table }};
     CREATE TABLE IF NOT EXISTS {{ params.table }} (
         a int,
         b int)
     ROW FORMAT DELIMITED
     FIELDS TERMINATED BY ',';
     LOAD DATA LOCAL INPATH '{{ params.csv_path }}'
     OVERWRITE INTO TABLE {{ params.table }};
     """
     self.columns = ['{}.a'.format(self.table),
                     '{}.b'.format(self.table)]
     self.hook = HiveMetastoreHook()
     t = HiveOperator(
         task_id='HiveHook_' + str(random.randint(1, 10000)),
         params={
             'database': self.database,
             'table': self.table,
             'csv_path': self.local_path
         },
         hive_cli_conn_id='hive_cli_default',
         hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
     self.next_day = (DEFAULT_DATE +
                      datetime.timedelta(days=1)).isoformat()[:10]
     self.database = 'airflow'
     self.partition_by = 'ds'
     self.table = 'static_babynames_partitioned'
     self.hql = """
     CREATE DATABASE IF NOT EXISTS {{ params.database }};
     USE {{ params.database }};
     DROP TABLE IF EXISTS {{ params.table }};
     CREATE TABLE IF NOT EXISTS {{ params.table }} (
         state string,
         year string,
         name string,
         gender string,
         num int)
     PARTITIONED BY ({{ params.partition_by }} string);
     ALTER TABLE {{ params.table }}
     ADD PARTITION({{ params.partition_by }}='{{ ds }}');
     """
     self.hook = HiveMetastoreHook()
     t = HiveOperator(
         task_id='HiveHook_' + str(random.randint(1, 10000)),
         params={
             'database': self.database,
             'table': self.table,
             'partition_by': self.partition_by
         },
         hive_cli_conn_id='hive_cli_default',
         hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def setUp(self):
     configuration.load_test_config()
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
     self.next_day = (DEFAULT_DATE +
                      datetime.timedelta(days=1)).isoformat()[:10]
     self.database = 'airflow'
     self.partition_by = 'ds'
     self.table = 'static_babynames_partitioned'
     self.hql = """
     CREATE DATABASE IF NOT EXISTS {{ params.database }};
     USE {{ params.database }};
     DROP TABLE IF EXISTS {{ params.table }};
     CREATE TABLE IF NOT EXISTS {{ params.table }} (
         state string,
         year string,
         name string,
         gender string,
         num int)
     PARTITIONED BY ({{ params.partition_by }} string);
     ALTER TABLE {{ params.table }}
     ADD PARTITION({{ params.partition_by }}='{{ ds }}');
     """
     self.hook = HiveMetastoreHook()
     t = HiveOperator(
         task_id='HiveHook_' + str(random.randint(1, 10000)),
         params={
             'database': self.database,
             'table': self.table,
             'partition_by': self.partition_by
         },
         hive_cli_conn_id='beeline_default',
         hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
    def test_beeline(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "tst"

        hive_cmd = [
            'beeline', '-u', '"jdbc:hive2://localhost:10000/default"',
            '-hiveconf', 'airflow.ctx.dag_id=test_dag_id', '-hiveconf',
            'airflow.ctx.task_id=beeline_hql', '-hiveconf',
            'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
            '-hiveconf', 'airflow.ctx.dag_run_id=', '-hiveconf',
            'airflow.ctx.dag_owner=airflow', '-hiveconf',
            'airflow.ctx.dag_email=', '-hiveconf',
            'mapreduce.job.queuename=airflow', '-hiveconf',
            'mapred.job.queue.name=airflow', '-hiveconf',
            'tez.queue.name=airflow', '-hiveconf',
            'mapred.job.name=test_job_name', '-f',
            '/tmp/airflow_hiveop_tst/tmptst'
        ]

        op = HiveOperator(task_id='beeline_hql',
                          hive_cli_conn_id='hive_cli_default',
                          hql=self.hql,
                          dag=self.dag,
                          mapred_job_name="test_job_name")
        op.run(start_date=DEFAULT_DATE,
               end_date=DEFAULT_DATE,
               ignore_ti_state=True)
        mock_popen.assert_called_with(hive_cmd,
                                      stdout=mock_subprocess.PIPE,
                                      stderr=mock_subprocess.STDOUT,
                                      cwd="/tmp/airflow_hiveop_tst",
                                      close_fds=True)
 def setUp(self):
     configuration.load_test_config()
     self._upload_dataframe()
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
     self.database = 'airflow'
     self.table = 'hive_server_hook'
     self.hql = """
     CREATE DATABASE IF NOT EXISTS {{ params.database }};
     USE {{ params.database }};
     DROP TABLE IF EXISTS {{ params.table }};
     CREATE TABLE IF NOT EXISTS {{ params.table }} (
         a int,
         b int)
     ROW FORMAT DELIMITED
     FIELDS TERMINATED BY ',';
     LOAD DATA LOCAL INPATH '{{ params.csv_path }}'
     OVERWRITE INTO TABLE {{ params.table }};
     """
     self.columns = ['{}.a'.format(self.table),
                     '{}.b'.format(self.table)]
     self.hook = HiveMetastoreHook()
     t = HiveOperator(
         task_id='HiveHook_' + str(random.randint(1, 10000)),
         params={
             'database': self.database,
             'table': self.table,
             'csv_path': self.local_path
         },
         hive_cli_conn_id='beeline_default',
         hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
Example #6
0
 def test_beeline(self):
     t = HiveOperator(task_id='beeline_hql',
                      hive_cli_conn_id='hive_cli_default',
                      hql=self.hql,
                      dag=self.dag)
     t.run(start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def test_hive_queues(self):
     t = HiveOperator(
         task_id='test_hive_queues', hql=self.hql,
         mapred_queue='default', mapred_queue_priority='HIGH',
         mapred_job_name='airflow.test_hive_queues',
         dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def test_hive_queues(self):
     t = HiveOperator(
         task_id='test_hive_queues', hql=self.hql,
         mapred_queue='default', mapred_queue_priority='HIGH',
         mapred_job_name='airflow.test_hive_queues',
         dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
Example #9
0
 def test_hive(self):
     t = HiveOperator(task_id='basic_hql', hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def test_beeline(self):
     t = HiveOperator(
         task_id='beeline_hql', hive_cli_conn_id='beeline_default',
         hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)
 def test_hive(self):
     t = HiveOperator(
         task_id='basic_hql', hql=self.hql, dag=self.dag)
     t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
           ignore_ti_state=True)