예제 #1
0
 def generate_schedule_item(self):
     '''Generate a default schedule_item. This method should normally be called every few
 hours.
 '''
     if self.should_generate_new_item():
         profile = ImpalaNestedTypesProfile(
         ) if NESTED_TYPES_MODE else DefaultProfile()
         schedule_item = ScheduleItem(run_name='{0}-{1}'.format(
             strftime("%Y-%b-%d-%H:%M:%S", localtime()), DEFAULT_RUN_NAME),
                                      query_profile=profile,
                                      time_limit_sec=RUN_TIME_LIMIT)
         schedule_item.save_pickle()
         self.time_last_generated = time.time()
         LOG.info('Generated Schedule Item')
     sleep(2)
예제 #2
0
파일: queries.py 프로젝트: yx91490/impala-1
def generate_random_queries(impala, random_db):
    """Generator function to produce random queries. 'impala' is the Impala service
  object. random_db is the name of the database that queries should be
  generated for."""
    with impala.cursor(db_name=random_db) as cursor:
        tables = [cursor.describe_table(t) for t in cursor.list_table_names()]
    query_generator = QueryGenerator(DefaultProfile())
    model_translator = SqlWriter.create()
    while True:
        query_model = query_generator.generate_statement(tables)
        sql = model_translator.write_query(query_model)
        query = Query()
        query.sql = sql
        query.db_name = random_db
        yield query
예제 #3
0
def test_func_tree_contains_funcs():
    """
  Tests the QueryGenerator.func_tree_contains_funcs() method
  """

    qgen = QueryGenerator(DefaultProfile())

    # Create a simple func_tree with only one function
    and_func = And.create_from_args(Boolean(True), Boolean(True))
    and_func.parent = None
    assert qgen._func_tree_contains_funcs(and_func, [And])
    assert not qgen._func_tree_contains_funcs(and_func, [Or])

    # Create a func_tree that contains one parent, and two children
    equals_func = Equals.create_from_args(Boolean(True), Boolean(True))
    and_func = And.create_from_args(equals_func, equals_func)
    equals_func.parent = and_func
    and_func.parent = None
    assert qgen._func_tree_contains_funcs(equals_func, [And])
    assert qgen._func_tree_contains_funcs(equals_func, [Equals])
    assert not qgen._func_tree_contains_funcs(equals_func, [Or])
예제 #4
0
파일: job.py 프로젝트: yx91490/impala-1
 def __init__(self,
     query_profile,
     job_id,
     run_name = 'default',
     time_limit_sec = 24 * 3600,
     git_command = None,
     parent_job = None):
   self.git_hash = ''
   self.job_id = job_id
   self.job_name = run_name
   self.parent_job = parent_job
   self.query_profile = query_profile or (
       ImpalaNestedTypesProfile() if NESTED_TYPES_MODE else DefaultProfile())
   self.ref_connection = None
   self.result_list = []
   self.start_time = time()
   self.stop_time = None
   self.target_stop_time = time() + time_limit_sec
   self.test_connection = None
   self.num_queries_executed = 0
   self.num_queries_returned_correct_data = 0
   self.flatten_dialect = 'POSTGRESQL' if NESTED_TYPES_MODE else None
   self.impala_env = ImpalaDockerEnv(git_command)
예제 #5
0
def start_run():
    '''Method that receives POST requests and gernerates a schedule item.'''

    if request.method != 'POST': return 'fail'

    if 'time_limit' in request.form:
        # This is a custom run because time_limit item is present only in the custom_run form.
        # Values will be extracted from the form and a new profile will be generated.

        new_profile = DefaultProfile()

        # Bounds
        new_profile._bounds['MAX_NESTED_QUERY_COUNT'] = (
            int(request.form['max_nested_query_count_from']),
            int(request.form['max_nested_query_count_to']))
        new_profile._bounds['MAX_NESTED_EXPR_COUNT'] = (
            int(request.form['max_nested_expr_count_from']),
            int(request.form['max_nested_expr_count_to']))
        new_profile._bounds['SELECT_ITEM_COUNT'] = (
            int(request.form['select_item_count_from']),
            int(request.form['select_item_count_to']))
        new_profile._bounds['WITH_TABLE_COUNT'] = (
            int(request.form['with_table_count_from']),
            int(request.form['with_table_count_to']))
        new_profile._bounds['TABLE_COUNT'] = (
            int(request.form['table_count_from']),
            int(request.form['table_count_to']))
        new_profile._bounds['ANALYTIC_LEAD_LAG_OFFSET'] = (
            int(request.form['analytic_lead_lag_offset_from']),
            int(request.form['analytic_lead_lag_offset_to']))
        new_profile._bounds['ANALYTIC_WINDOW_OFFSET'] = (
            int(request.form['analytic_window_offset_from']),
            int(request.form['analytic_window_offset_to']))

        # Select Item Category
        new_profile._weights['SELECT_ITEM_CATEGORY']['AGG'] = int(
            request.form['select_agg'])
        new_profile._weights['SELECT_ITEM_CATEGORY']['ANALYTIC'] = int(
            request.form['select_analytic'])
        new_profile._weights['SELECT_ITEM_CATEGORY']['BASIC'] = int(
            request.form['select_basic'])

        # Types
        new_profile._weights['TYPES'][Boolean] = int(
            request.form['types_boolean'])
        new_profile._weights['TYPES'][Char] = int(request.form['types_char'])
        new_profile._weights['TYPES'][Decimal] = int(
            request.form['types_decimal'])
        new_profile._weights['TYPES'][Float] = int(request.form['types_float'])
        new_profile._weights['TYPES'][Int] = int(request.form['types_int'])
        new_profile._weights['TYPES'][Timestamp] = int(
            request.form['types_timestamp'])

        # Join
        new_profile._weights['JOIN']['INNER'] = int(request.form['join_inner'])
        new_profile._weights['JOIN']['LEFT'] = int(request.form['join_left'])
        new_profile._weights['JOIN']['RIGHT'] = int(request.form['join_right'])
        new_profile._weights['JOIN']['FULL_OUTER'] = int(
            request.form['join_full_outer'])
        new_profile._weights['JOIN']['CROSS'] = int(request.form['join_cross'])

        # Optional Query Clauses Probabilities
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = float(
            request.form['optional_with'])
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['FROM'] = float(
            request.form['optional_from'])
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['WHERE'] = float(
            request.form['optional_where'])
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES'][
            'GROUP_BY'] = float(request.form['optional_group_by'])
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['HAVING'] = float(
            request.form['optional_having'])
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['UNION'] = float(
            request.form['optional_union'])
        new_profile._probabilities['OPTIONAL_QUERY_CLAUSES'][
            'ORDER_BY'] = float(request.form['optional_order_by'])

        # Optional Analytic Clauses Probabilities
        new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES'][
            'PARTITION_BY'] = float(
                request.form['optional_analytic_partition_by'])
        new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES'][
            'ORDER_BY'] = float(request.form['optional_analytic_order_by'])
        new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES'][
            'WINDOW'] = float(request.form['optional_analytic_window'])

        # Misc Probabilities
        new_profile._probabilities['MISC']['INLINE_VIEW'] = float(
            request.form['misc_inline_view'])
        new_profile._probabilities['MISC']['SELECT_DISTINCT'] = float(
            request.form['misc_select_distinct'])
        new_profile._probabilities['MISC']['SCALAR_SUBQUERY'] = float(
            request.form['misc_scalar_subquery'])
        new_profile._probabilities['MISC']['UNION_ALL'] = float(
            request.form['misc_union_all'])

        # Analytic Designs
        new_profile._flags['ANALYTIC_DESIGNS']['TOP_LEVEL_QUERY_WITHOUT_LIMIT'] = \
            'analytic_designs_top_level_no_limit' in request.form
        new_profile._flags['ANALYTIC_DESIGNS']['DETERMINISTIC_ORDER_BY'] = \
            'analytic_designs_deterministic_order_by' in request.form
        new_profile._flags['ANALYTIC_DESIGNS']['NO_ORDER_BY'] = \
            'analytic_designs_no_order_by' in request.form
        new_profile._flags['ANALYTIC_DESIGNS']['ONLY_SELECT_ITEM'] = \
            'analytic_designs_only_select_item' in request.form
        new_profile._flags['ANALYTIC_DESIGNS']['UNBOUNDED_WINDOW'] = \
            'analytic_designs_unbounded_window' in request.form
        new_profile._flags['ANALYTIC_DESIGNS']['RANK_FUNC'] = \
            'analytic_designs_rank_func' in request.form

        schedule_item = ScheduleItem(run_name=request.form['run_name'],
                                     query_profile=new_profile,
                                     time_limit_sec=int(
                                         request.form['time_limit']),
                                     git_command=request.form['git_command'],
                                     parent_job='')
    else:
        # Run based on previous run
        schedule_item = ScheduleItem(
            run_name=request.form['run_name'],
            query_profile=DefaultProfile(),
            time_limit_sec=24 * 3600,  # Default time limit is 24 hours
            git_command=request.form['git_command'],
            parent_job=request.form['report_id'])

    schedule_item.save_pickle()

    return 'success'
예제 #6
0
def start_run():
  '''Method that receives POST requests and gernerates a schedule item.'''

  if request.method != 'POST': return 'fail'

  if 'time_limit' in request.form:
    # This is a custom run because time_limit item is present only in the custom_run form.
    # Values will be extracted from the form and a new profile will be generated.

    new_profile = DefaultProfile()

    # Bounds
    new_profile._bounds['MAX_NESTED_QUERY_COUNT'] = (
        int(request.form['max_nested_query_count_from']),
        int(request.form['max_nested_query_count_to']))
    new_profile._bounds['MAX_NESTED_EXPR_COUNT'] = (
        int(request.form['max_nested_expr_count_from']),
        int(request.form['max_nested_expr_count_to']))
    new_profile._bounds['SELECT_ITEM_COUNT'] = (
        int(request.form['select_item_count_from']),
        int(request.form['select_item_count_to']))
    new_profile._bounds['WITH_TABLE_COUNT'] = (
        int(request.form['with_table_count_from']),
        int(request.form['with_table_count_to']))
    new_profile._bounds['TABLE_COUNT'] = (
        int(request.form['table_count_from']),
        int(request.form['table_count_to']))
    new_profile._bounds['ANALYTIC_LEAD_LAG_OFFSET'] = (
        int(request.form['analytic_lead_lag_offset_from']),
        int(request.form['analytic_lead_lag_offset_to']))
    new_profile._bounds['ANALYTIC_WINDOW_OFFSET'] = (
        int(request.form['analytic_window_offset_from']),
        int(request.form['analytic_window_offset_to']))

    # Select Item Category
    new_profile._weights['SELECT_ITEM_CATEGORY']['AGG'] = int(
        request.form['select_agg'])
    new_profile._weights['SELECT_ITEM_CATEGORY']['ANALYTIC'] = int(
        request.form['select_analytic'])
    new_profile._weights['SELECT_ITEM_CATEGORY']['BASIC'] = int(
        request.form['select_basic'])

    # Types
    new_profile._weights['TYPES'][Boolean] = int(request.form['types_boolean'])
    new_profile._weights['TYPES'][Char] = int(request.form['types_char'])
    new_profile._weights['TYPES'][Decimal] = int(request.form['types_decimal'])
    new_profile._weights['TYPES'][Float] = int(request.form['types_float'])
    new_profile._weights['TYPES'][Int] = int(request.form['types_int'])
    new_profile._weights['TYPES'][Timestamp] = int(request.form['types_timestamp'])

    # Join
    new_profile._weights['JOIN']['INNER'] = int(request.form['join_inner'])
    new_profile._weights['JOIN']['LEFT'] = int(request.form['join_left'])
    new_profile._weights['JOIN']['RIGHT'] = int(request.form['join_right'])
    new_profile._weights['JOIN']['FULL_OUTER'] = int(request.form['join_full_outer'])
    new_profile._weights['JOIN']['CROSS'] = int(request.form['join_cross'])

    # Optional Query Clauses Probabilities
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = float(
        request.form['optional_with'])
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['FROM'] = float(
        request.form['optional_from'])
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['WHERE'] = float(
        request.form['optional_where'])
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['GROUP_BY'] = float(
        request.form['optional_group_by'])
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['HAVING'] = float(
        request.form['optional_having'])
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['UNION'] = float(
        request.form['optional_union'])
    new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['ORDER_BY'] = float(
        request.form['optional_order_by'])

    # Optional Analytic Clauses Probabilities
    new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES']['PARTITION_BY'] = float(
        request.form['optional_analytic_partition_by'])
    new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES']['ORDER_BY'] = float(
        request.form['optional_analytic_order_by'])
    new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES']['WINDOW'] = float(
        request.form['optional_analytic_window'])

    # Misc Probabilities
    new_profile._probabilities['MISC']['INLINE_VIEW'] = float(
        request.form['misc_inline_view'])
    new_profile._probabilities['MISC']['SELECT_DISTINCT'] = float(
        request.form['misc_select_distinct'])
    new_profile._probabilities['MISC']['SCALAR_SUBQUERY'] = float(
        request.form['misc_scalar_subquery'])
    new_profile._probabilities['MISC']['UNION_ALL'] = float(
        request.form['misc_union_all'])

    # Analytic Designs
    new_profile._flags['ANALYTIC_DESIGNS']['TOP_LEVEL_QUERY_WITHOUT_LIMIT'] = \
        'analytic_designs_top_level_no_limit' in request.form
    new_profile._flags['ANALYTIC_DESIGNS']['DETERMINISTIC_ORDER_BY'] = \
        'analytic_designs_deterministic_order_by' in request.form
    new_profile._flags['ANALYTIC_DESIGNS']['NO_ORDER_BY'] = \
        'analytic_designs_no_order_by' in request.form
    new_profile._flags['ANALYTIC_DESIGNS']['ONLY_SELECT_ITEM'] = \
        'analytic_designs_only_select_item' in request.form
    new_profile._flags['ANALYTIC_DESIGNS']['UNBOUNDED_WINDOW'] = \
        'analytic_designs_unbounded_window' in request.form
    new_profile._flags['ANALYTIC_DESIGNS']['RANK_FUNC'] = \
        'analytic_designs_rank_func' in request.form

    schedule_item = ScheduleItem(
        run_name = request.form['run_name'],
        query_profile = new_profile,
        time_limit_sec = int(request.form['time_limit']),
        git_command = request.form['git_command'],
        parent_job = '')
  else:
    # Run based on previous run
    schedule_item = ScheduleItem(
        run_name = request.form['run_name'],
        query_profile = DefaultProfile(),
        time_limit_sec = 24 * 3600, # Default time limit is 24 hours
        git_command = request.form['git_command'],
        parent_job = request.form['report_id'])

  schedule_item.save_pickle()

  return 'success'