예제 #1
0
  def __init__(self, impala_connection, reference_connection):
    self.reference_db_type = reference_connection.db_type

    self.impala_cursor = impala_connection.create_cursor()
    self.reference_cursor = reference_connection.create_cursor()

    self.impala_sql_writer = SqlWriter.create(dialect=impala_connection.db_type)
    self.reference_sql_writer = SqlWriter.create(dialect=reference_connection.db_type)

    # At this time the connection will be killed and ther comparison result will be
    # timeout.
    self.query_timeout_seconds = 3 * 60
예제 #2
0
 def __init__(self, query_profile, ref_conn, test_conn):
   '''query_profile should be an instance of one of the profiles in query_profile.py'''
   self.query_profile = query_profile
   self.ref_conn = ref_conn
   self.test_conn = test_conn
   self.ref_sql_writer = SqlWriter.create(dialect=ref_conn.db_type)
   self.test_sql_writer = SqlWriter.create(dialect=test_conn.db_type)
   with ref_conn.cursor() as ref_cursor:
     with test_conn.cursor() as test_cursor:
       self.common_tables = DbCursor.describe_common_tables([ref_cursor, test_cursor])
       if not self.common_tables:
         raise Exception("Unable to find a common set of tables in both databases")
 def __init__(self, query_profile, ref_conn, test_conn):
   '''query_profile should be an instance of one of the profiles in query_profile.py'''
   self.query_profile = query_profile
   self.ref_conn = ref_conn
   self.test_conn = test_conn
   self.ref_sql_writer = SqlWriter.create(dialect=ref_conn.db_type)
   self.test_sql_writer = SqlWriter.create(dialect=test_conn.db_type)
   with ref_conn.cursor() as ref_cursor:
     with test_conn.cursor() as test_cursor:
       self.common_tables = DbCursor.describe_common_tables([ref_cursor, test_cursor])
       if not self.common_tables:
         raise Exception("Unable to find a common set of tables in both databases")
예제 #4
0
  def __init__(self, ref_connection, test_connection):
    '''test/ref_connection arguments should be an instance of DbConnection'''
    ref_cursor = ref_connection.create_cursor()
    test_cursor = test_connection.create_cursor()

    self.ref_connection = ref_connection
    self.ref_sql_writer = SqlWriter.create(dialect=ref_connection.db_type)
    self.test_connection = test_connection
    self.test_sql_writer = SqlWriter.create(dialect=test_connection.db_type)

    self.query_executor = QueryExecutor(
        [ref_cursor, test_cursor],
        [self.ref_sql_writer, self.test_sql_writer],
        query_timeout_seconds=(3 * 60))
예제 #5
0
    def __init__(self, impala_connection, reference_connection):
        self.reference_db_type = reference_connection.db_type

        self.impala_cursor = impala_connection.create_cursor()
        self.reference_cursor = reference_connection.create_cursor()

        self.impala_sql_writer = SqlWriter.create(
            dialect=impala_connection.db_type)
        self.reference_sql_writer = SqlWriter.create(
            dialect=reference_connection.db_type)

        # At this time the connection will be killed and ther comparison result will be
        # timeout.
        self.query_timeout_seconds = 3 * 60
예제 #6
0
    def __init__(self, ref_connection, test_connection):
        '''test/ref_connection arguments should be an instance of DbConnection'''
        ref_cursor = ref_connection.create_cursor()
        test_cursor = test_connection.create_cursor()

        self.ref_connection = ref_connection
        self.ref_sql_writer = SqlWriter.create(dialect=ref_connection.db_type)
        self.test_connection = test_connection
        self.test_sql_writer = SqlWriter.create(
            dialect=test_connection.db_type)

        self.query_executor = QueryExecutor(
            [ref_cursor, test_cursor],
            [self.ref_sql_writer, self.test_sql_writer],
            query_timeout_seconds=(3 * 60))
예제 #7
0
  def __init__(self, query_profile, ref_conn,
      test_conn, query_timeout_seconds, flatten_dialect=None):
    '''test/ref_conn arguments should be an instance of DbConnection'''
    ref_cursor = ref_conn.cursor()
    test_cursor = test_conn.cursor()

    self.ref_conn = ref_conn
    self.ref_sql_writer = SqlWriter.create(
        dialect=ref_conn.db_type, nulls_order_asc=query_profile.nulls_order_asc())
    self.test_conn = test_conn
    self.test_sql_writer = SqlWriter.create(dialect=test_conn.db_type)

    self.query_executor = QueryExecutor(
        [ref_cursor, test_cursor],
        [self.ref_sql_writer, self.test_sql_writer],
        query_timeout_seconds=query_timeout_seconds,
        flatten_dialect=flatten_dialect)
예제 #8
0
  def __init__(self, query_profile, ref_conn,
      test_conn, query_timeout_seconds, flatten_dialect=None):
    '''test/ref_conn arguments should be an instance of DbConnection'''
    ref_cursor = ref_conn.cursor()
    test_cursor = test_conn.cursor()

    self.ref_conn = ref_conn
    self.ref_sql_writer = SqlWriter.create(
        dialect=ref_conn.db_type, nulls_order_asc=query_profile.nulls_order_asc())
    self.test_conn = test_conn
    self.test_sql_writer = SqlWriter.create(dialect=test_conn.db_type)

    self.query_executor = QueryExecutor(
        [ref_cursor, test_cursor],
        [self.ref_sql_writer, self.test_sql_writer],
        query_timeout_seconds=query_timeout_seconds,
        flatten_dialect=flatten_dialect)
예제 #9
0
파일: queries.py 프로젝트: yx91490/impala-1
def generate_random_queries(impala, random_db):
    """Generator function to produce random queries. 'impala' is the Impala service
  object. random_db is the name of the database that queries should be
  generated for."""
    with impala.cursor(db_name=random_db) as cursor:
        tables = [cursor.describe_table(t) for t in cursor.list_table_names()]
    query_generator = QueryGenerator(DefaultProfile())
    model_translator = SqlWriter.create()
    while True:
        query_model = query_generator.generate_statement(tables)
        sql = model_translator.write_query(query_model)
        query = Query()
        query.sql = sql
        query.db_name = random_db
        yield query
예제 #10
0
def generate_random_queries(impala, random_db):
  """Generator function to produce random queries. 'impala' is the Impala service
  object. random_db is the name of the database that queries should be
  generated for."""
  with impala.cursor(db_name=random_db) as cursor:
    tables = [cursor.describe_table(t) for t in cursor.list_table_names()]
  query_generator = QueryGenerator(DefaultProfile())
  model_translator = SqlWriter.create()
  while True:
    query_model = query_generator.generate_statement(tables)
    sql = model_translator.write_query(query_model)
    query = Query()
    query.sql = sql
    query.db_name = random_db
    yield query
예제 #11
0
    def search(self, number_of_test_queries, stop_on_result_mismatch,
               stop_on_crash):
        if exists(self.query_shelve_path):
            # Ensure a clean shelve will be created
            remove(self.query_shelve_path)

        start_time = time()
        impala_sql_writer = SqlWriter.create(dialect=IMPALA)
        reference_sql_writer = SqlWriter.create(
            dialect=self.reference_connection.db_type)
        query_result_comparator = QueryResultComparator(
            self.impala_connection, self.reference_connection)
        query_generator = QueryGenerator()
        query_count = 0
        queries_resulted_in_data_count = 0
        mismatch_count = 0
        query_timeout_count = 0
        known_error_count = 0
        impala_crash_count = 0
        last_error = None
        repeat_error_count = 0
        with open(self.query_log_path, 'w') as impala_query_log:
            impala_query_log.write('--\n' '-- Stating new run\n' '--\n')
            while number_of_test_queries > query_count:
                query = query_generator.create_query(self.common_tables)
                impala_sql = impala_sql_writer.write_query(query)
                if 'FULL OUTER JOIN' in impala_sql and self.reference_connection.db_type == MYSQL:
                    # Not supported by MySQL
                    continue

                query_count += 1
                LOG.info('Running query #%s', query_count)
                impala_query_log.write(impala_sql + ';\n')
                result = query_result_comparator.compare_query_results(query)
                if result.query_resulted_in_data:
                    queries_resulted_in_data_count += 1
                if result.error:
                    # TODO: These first two come from psycopg2, the postgres driver. Maybe we should
                    #       try a different driver? Or maybe the usage of the driver isn't correct.
                    #       Anyhow ignore these failures.
                    if 'division by zero' in result.error \
                        or 'out of range' in result.error \
                        or 'Too much data' in result.error:
                        LOG.debug('Ignoring error: %s', result.error)
                        query_count -= 1
                        continue

                    if result.is_known_error:
                        known_error_count += 1
                    elif result.query_timed_out:
                        query_timeout_count += 1
                    else:
                        mismatch_count += 1
                        with closing(open_shelve(
                                self.query_shelve_path)) as query_shelve:
                            query_shelve[str(query_count)] = query

                    print('---Impala Query---\n')
                    print(
                        impala_sql_writer.write_query(query, pretty=True) +
                        '\n')
                    print('---Reference Query---\n')
                    print(
                        reference_sql_writer.write_query(query, pretty=True) +
                        '\n')
                    print('---Error---\n')
                    print(result.error + '\n')
                    print('------\n')

                    if 'Could not connect' in result.error \
                        or "Couldn't open transport for" in result.error:
                        # if stop_on_crash:
                        #   break
                        # Assume Impala crashed and try restarting
                        impala_crash_count += 1
                        LOG.info('Restarting Impala')
                        call([
                            join(getenv('IMPALA_HOME'),
                                 'bin/start-impala-cluster.py'),
                            '--log_dir=%s' % getenv('LOG_DIR', "/tmp/")
                        ])
                        self.impala_connection.reconnect()
                        query_result_comparator.impala_cursor = self.impala_connection.create_cursor(
                        )
                        result = query_result_comparator.compare_query_results(
                            query)
                        if result.error:
                            LOG.info('Restarting Impala')
                            call([
                                join(getenv('IMPALA_HOME'),
                                     'bin/start-impala-cluster.py'),
                                '--log_dir=%s' % getenv('LOG_DIR', "/tmp/")
                            ])
                            self.impala_connection.reconnect()
                            query_result_comparator.impala_cursor = self.impala_connection.create_cursor(
                            )
                        else:
                            break

                    if stop_on_result_mismatch and \
                        not (result.is_known_error or result.query_timed_out):
                        break

                    if last_error == result.error \
                        and not (result.is_known_error or result.query_timed_out):
                        repeat_error_count += 1
                        if repeat_error_count == self.ABORT_ON_REPEAT_ERROR_COUNT:
                            break
                    else:
                        last_error = result.error
                        repeat_error_count = 0
                else:
                    if result.query_resulted_in_data:
                        LOG.info('Results matched (%s rows)',
                                 result.impala_row_count)
                    else:
                        LOG.info('Query did not produce meaningful data')
                    last_error = None
                    repeat_error_count = 0

            return SearchResults(query_count, queries_resulted_in_data_count,
                                 mismatch_count, query_timeout_count,
                                 known_error_count, impala_crash_count,
                                 time() - start_time)
예제 #12
0
  def search(self, number_of_test_queries, stop_on_result_mismatch, stop_on_crash):
    if exists(self.query_shelve_path):
      # Ensure a clean shelve will be created
      remove(self.query_shelve_path)

    start_time = time()
    impala_sql_writer = SqlWriter.create(dialect=IMPALA)
    reference_sql_writer = SqlWriter.create(
        dialect=self.reference_connection.db_type)
    query_result_comparator = QueryResultComparator(
        self.impala_connection, self.reference_connection)
    query_generator = QueryGenerator()
    query_count = 0
    queries_resulted_in_data_count = 0
    mismatch_count = 0
    query_timeout_count = 0
    known_error_count = 0
    impala_crash_count = 0
    last_error = None
    repeat_error_count = 0
    with open(self.query_log_path, 'w') as impala_query_log:
      impala_query_log.write(
         '--\n'
         '-- Stating new run\n'
         '--\n')
      while number_of_test_queries > query_count:
        query = query_generator.create_query(self.common_tables)
        impala_sql = impala_sql_writer.write_query(query)
        if 'FULL OUTER JOIN' in impala_sql and self.reference_connection.db_type == MYSQL:
          # Not supported by MySQL
          continue

        query_count += 1
        LOG.info('Running query #%s', query_count)
        impala_query_log.write(impala_sql + ';\n')
        result = query_result_comparator.compare_query_results(query)
        if result.query_resulted_in_data:
          queries_resulted_in_data_count += 1
        if result.error:
          # TODO: These first two come from psycopg2, the postgres driver. Maybe we should
          #       try a different driver? Or maybe the usage of the driver isn't correct.
          #       Anyhow ignore these failures.
          if 'division by zero' in result.error \
              or 'out of range' in result.error \
              or 'Too much data' in result.error:
            LOG.debug('Ignoring error: %s', result.error)
            query_count -= 1
            continue

          if result.is_known_error:
            known_error_count += 1
          elif result.query_timed_out:
            query_timeout_count += 1
          else:
            mismatch_count += 1
            with closing(open_shelve(self.query_shelve_path)) as query_shelve:
              query_shelve[str(query_count)] = query

          print('---Impala Query---\n')
          print(impala_sql_writer.write_query(query, pretty=True) + '\n')
          print('---Reference Query---\n')
          print(reference_sql_writer.write_query(query, pretty=True) + '\n')
          print('---Error---\n')
          print(result.error + '\n')
          print('------\n')

          if 'Could not connect' in result.error \
              or "Couldn't open transport for" in result.error:
            # if stop_on_crash:
            #   break
            # Assume Impala crashed and try restarting
            impala_crash_count += 1
            LOG.info('Restarting Impala')
            call([join(getenv('IMPALA_HOME'), 'bin/start-impala-cluster.py'),
                            '--log_dir=%s' % getenv('LOG_DIR', "/tmp/")])
            self.impala_connection.reconnect()
            query_result_comparator.impala_cursor = self.impala_connection.create_cursor()
            result = query_result_comparator.compare_query_results(query)
            if result.error:
              LOG.info('Restarting Impala')
              call([join(getenv('IMPALA_HOME'), 'bin/start-impala-cluster.py'),
                              '--log_dir=%s' % getenv('LOG_DIR', "/tmp/")])
              self.impala_connection.reconnect()
              query_result_comparator.impala_cursor = self.impala_connection.create_cursor()
            else:
              break

          if stop_on_result_mismatch and \
              not (result.is_known_error or result.query_timed_out):
            break

          if last_error == result.error \
              and not (result.is_known_error or result.query_timed_out):
            repeat_error_count += 1
            if repeat_error_count == self.ABORT_ON_REPEAT_ERROR_COUNT:
              break
          else:
            last_error = result.error
            repeat_error_count = 0
        else:
          if result.query_resulted_in_data:
            LOG.info('Results matched (%s rows)', result.impala_row_count)
          else:
            LOG.info('Query did not produce meaningful data')
          last_error = None
          repeat_error_count = 0

      return SearchResults(
          query_count,
          queries_resulted_in_data_count,
          mismatch_count,
          query_timeout_count,
          known_error_count,
          impala_crash_count,
          time() - start_time)
예제 #13
0
def sql_writer(request):
  """
  Return a SqlWriter object that is torn down at the end of each test.
  """
  yield SqlWriter.create(dialect=request.param)
예제 #14
0
def sql_writer(request):
    """
  Return a SqlWriter object that is torn down at the end of each test.
  """
    yield SqlWriter.create(dialect=request.param)
예제 #15
0
def sql_writer():
    """
  Return a SqlWriter object that is torn down at the end of each test.
  """
    # TODO: Later, we can parametrize on dialect, but for now, this is just PoC.
    yield SqlWriter.create(dialect='IMPALA')