def exec_and_compare_hive_and_impala_hs2(self, stmt, compare = lambda x, y: x == y): """Compare Hive and Impala results when executing the same statment over HS2""" # execute_using_jdbc expects a Query object. Convert the query string into a Query # object query = Query() query.query_str = stmt # Run the statement targeting Hive exec_opts = JdbcQueryExecConfig(impalad=HIVE_HS2_HOST_PORT, transport='SASL') hive_results = execute_using_jdbc(query, exec_opts).data # Run the statement targeting Impala exec_opts = JdbcQueryExecConfig(impalad=IMPALAD_HS2_HOST_PORT, transport='NOSASL') impala_results = execute_using_jdbc(query, exec_opts).data # Compare the results assert (impala_results is not None) and (hive_results is not None) assert compare(impala_results, hive_results)
def exec_and_compare_hive_and_impala_hs2(self, stmt): """Compare Hive and Impala results when executing the same statment over HS2""" # execute_using_jdbc expects a Query object. Convert the query string into a Query # object query = Query() query.query_str = stmt # Run the statement targeting Hive exec_opts = JdbcQueryExecConfig(impalad=HIVE_HS2_HOST_PORT) hive_results = execute_using_jdbc(query, exec_opts).data # Run the statement targeting Impala exec_opts = JdbcQueryExecConfig(impalad=IMPALAD_HS2_HOST_PORT) impala_results = execute_using_jdbc(query, exec_opts).data # Compare the results assert (impala_results is not None) and (hive_results is not None) for impala, hive in zip(impala_results, hive_results): assert impala == hive
def construct_queries(self, test_vector, scale_factor): """Transform a query map into a list of query objects. Transform all the queries in the workload's query map to query objects based on the input test vector and scale factor. Args: test_vector (?): query vector scale_factor (str): eg. "300gb" Returns: (list of Query): these will be consumed by ? """ queries = list() for query_name, query_str in self._query_map.iteritems(): queries.append(Query(name=query_name, query_str=query_str, workload=self._name, scale_factor=scale_factor, test_vector=test_vector)) return queries