def test_construct_ingest_query(self):
        operator = HiveToDruidTransfer(
            task_id='hive_to_druid',
            dag=self.dag,
            **self.hook_config
        )

        provided_index_spec = operator.construct_ingest_query(
            **self.index_spec_config
        )

        expected_index_spec = {
            "hadoopDependencyCoordinates": self.hook_config['hadoop_dependency_coordinates'],
            "type": "index_hadoop",
            "spec": {
                "dataSchema": {
                    "metricsSpec": self.hook_config['metric_spec'],
                    "granularitySpec": {
                        "queryGranularity": self.hook_config['query_granularity'],
                        "intervals": self.hook_config['intervals'],
                        "type": "uniform",
                        "segmentGranularity": self.hook_config['segment_granularity'],
                    },
                    "parser": {
                        "type": "string",
                        "parseSpec": {
                            "columns": self.index_spec_config['columns'],
                            "dimensionsSpec": {
                                "dimensionExclusions": [],
                                "dimensions": self.index_spec_config['columns'],
                                "spatialDimensions": []
                            },
                            "timestampSpec": {
                                "column": self.hook_config['ts_dim'],
                                "format": "auto"
                            },
                            "format": "tsv"
                        }
                    },
                    "dataSource": self.hook_config['druid_datasource']
                },
                "tuningConfig": {
                    "type": "hadoop",
                    "jobProperties": {
                        "mapreduce.job.user.classpath.first": "false",
                        "mapreduce.map.output.compress": "false",
                        "mapreduce.output.fileoutputformat.compress": "false",
                    },
                    "partitionsSpec": {
                        "type": "hashed",
                        "targetPartitionSize": self.hook_config['target_partition_size'],
                        "numShards": self.hook_config['num_shards'],
                    },
                },
                "ioConfig": {
                    "inputSpec": {
                        "paths": self.index_spec_config['static_path'],
                        "type": "static"
                    },
                    "type": "hadoop"
                }
            }
        }

        # Make sure it is like we expect it
        self.assertEqual(provided_index_spec, expected_index_spec)
    def test_construct_ingest_query(self):
        operator = HiveToDruidTransfer(task_id='hive_to_druid',
                                       dag=self.dag,
                                       **self.hook_config)

        provided_index_spec = operator.construct_ingest_query(
            **self.index_spec_config)

        expected_index_spec = {
            "hadoopDependencyCoordinates":
            self.hook_config['hadoop_dependency_coordinates'],
            "type":
            "index_hadoop",
            "spec": {
                "dataSchema": {
                    "metricsSpec": self.hook_config['metric_spec'],
                    "granularitySpec": {
                        "queryGranularity":
                        self.hook_config['query_granularity'],
                        "intervals":
                        self.hook_config['intervals'],
                        "type":
                        "uniform",
                        "segmentGranularity":
                        self.hook_config['segment_granularity'],
                    },
                    "parser": {
                        "type": "string",
                        "parseSpec": {
                            "columns": self.index_spec_config['columns'],
                            "dimensionsSpec": {
                                "dimensionExclusions": [],
                                "dimensions":
                                self.index_spec_config['columns'],
                                "spatialDimensions": []
                            },
                            "timestampSpec": {
                                "column": self.hook_config['ts_dim'],
                                "format": "auto"
                            },
                            "format": "tsv"
                        }
                    },
                    "dataSource": self.hook_config['druid_datasource']
                },
                "tuningConfig": {
                    "type": "hadoop",
                    "jobProperties": self.hook_config['job_properties'],
                    "partitionsSpec": {
                        "type":
                        "hashed",
                        "targetPartitionSize":
                        self.hook_config['target_partition_size'],
                        "numShards":
                        self.hook_config['num_shards'],
                    },
                },
                "ioConfig": {
                    "inputSpec": {
                        "paths": self.index_spec_config['static_path'],
                        "type": "static"
                    },
                    "type": "hadoop"
                }
            }
        }

        # Make sure it is like we expect it
        self.assertEqual(provided_index_spec, expected_index_spec)
Exemple #3
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from airflow.operators.hive_to_druid import HiveToDruidTransfer
from airflow import DAG
from datetime import datetime

args = {
            'owner': 'qi_wang',
            'start_date': datetime(2015, 4, 4),
}

dag = DAG("test_druid", default_args=args)


HiveToDruidTransfer(task_id="load_dummy_test",
                    sql="select * from qi.druid_test_dataset_w_platform_1 \
                            limit 10;",
                    druid_datasource="airflow_test",
                    ts_dim="ds",
                    dag=dag
                )