Beispiel #1
0
    def _concurrently_copy_table(self, src_table):
        """
    Given a Table object, create another Table with the same schema and return the new
    Table object.  The schema will be created in both the test and reference databases.

    The data is then copied in both the ref and test databases using threads.
    """
        with test_conn.cursor() as test_cursor:
            test_cursor.execute('SHOW CREATE TABLE {0}'.format(src_table.name))
            (create_table_sql, ) = test_cursor.fetchall()[0]
            new_table_name = src_table.name + self.COPY_TABLE_SUFFIX
            create_table_sql = create_table_sql.replace(
                src_table.name, new_table_name, 1)
            test_cursor.drop_table(new_table_name)
            test_cursor.execute(create_table_sql)
            new_table = test_cursor.describe_table(new_table_name)
        with ref_conn.cursor() as ref_cursor:
            ref_cursor.drop_table(new_table_name)
            ref_cursor.create_table(new_table)

        copy_select_query = Query()
        copy_select_query.select_clause = SelectClause(
            [SelectItem(col) for col in src_table.cols])
        copy_select_query.from_clause = FromClause(src_table)

        if new_table.primary_keys:
            conflict_action = InsertClause.CONFLICT_ACTION_IGNORE
        else:
            conflict_action = InsertClause.CONFLICT_ACTION_DEFAULT

        table_copy_statement = InsertStatement(
            insert_clause=InsertClause(new_table,
                                       conflict_action=conflict_action),
            select_query=copy_select_query,
            execution=StatementExecutionMode.DML_SETUP)

        result = self.query_result_comparator.compare_query_results(
            table_copy_statement)
        if result.error:
            raise Exception('setup SQL to copy table failed: {0}'.format(
                result.error))
        self._dml_table_size = result.modified_rows_count

        return new_table
Beispiel #2
0
    def generate_statement(self, tables, dml_table):
        """
    Return a randomly generated INSERT statement.

    tables should be a list of Table objects. A typical source of such a list comes from
    db_connection.DbCursor.describe_common_tables(). This list describes the possible
    "sources" of the INSERT's WITH and FROM/WHERE clauses.

    dml_table is a required Table object. The INSERT will be into this table.
    """
        if not (isinstance(tables, list) and len(tables) > 0 and all(
            (isinstance(t, Table) for t in tables))):
            raise Exception('tables must be a not-empty list of Table objects')

        if not isinstance(dml_table, Table):
            raise Exception('dml_table must be a Table')

        self.select_stmt_generator = QueryGenerator(self.profile)

        if dml_table.primary_keys:
            insert_statement = InsertStatement(
                conflict_action=InsertStatement.CONFLICT_ACTION_IGNORE)
        else:
            insert_statement = InsertStatement(
                conflict_action=InsertStatement.CONFLICT_ACTION_DEFAULT)

        insert_statement.execution = StatementExecutionMode.DML_TEST

        # Choose whether this is a
        #   INSERT INTO table SELECT/VALUES
        # or
        #   INSERT INTO table (col1, col2, ...) SELECT/VALUES
        # If the method returns None, it's the former.
        insert_column_list = self.profile.choose_insert_column_list(dml_table)
        insert_statement.insert_clause = InsertClause(
            dml_table, column_list=insert_column_list)
        # We still need to internally track the columns we're inserting. Keep in mind None
        # means "all" without an explicit column list. Since we've already created the
        # InsertClause object though, we can fill this in for ourselves.
        if insert_column_list is None:
            insert_column_list = dml_table.cols
        insert_item_data_types = [col.type for col in insert_column_list]

        # Decide whether this is INSERT VALUES or INSERT SELECT
        insert_source_clause = self.profile.choose_insert_source_clause()

        if issubclass(insert_source_clause, Query):
            # Use QueryGenerator()'s public interface to generate the SELECT.
            select_query = self.select_stmt_generator.generate_statement(
                tables, select_item_data_types=insert_item_data_types)
            # To avoid many loss-of-precision errors, explicitly cast the SelectItems. The
            # generator's type system is not near sophisticated enough to know how random
            # expressions will be implicitly casted in the databases. This requires less work
            # to implement. IMPALA-4693 considers alternative approaches.
            self._cast_select_items(select_query, insert_column_list)
            insert_statement.with_clause = deepcopy(select_query.with_clause)
            select_query.with_clause = None
            insert_statement.select_query = select_query
        elif issubclass(insert_source_clause, ValuesClause):
            insert_statement.values_clause = self._generate_values_clause(
                insert_column_list)
        else:
            raise Exception('unsupported INSERT source clause: {0}'.format(
                insert_source_clause))
        return insert_statement
Beispiel #3
0
         'FROM fake_table'),
        select_item_counts={
            'items': 1,
            'basic_items': 0,
            'agg_items': 0,
            'analytic_items': 1,
        },
    ),
]

INSERT_QUERY_TEST_CASES = [
    InsertStatementTest(
        testid='insert into table select cols',
        query=InsertStatement(
            insert_clause=InsertClause(KUDU_TABLE),
            select_query=FakeQuery(
                select_clause=FakeSelectClause(*SIMPLE_TABLE.cols),
                from_clause=FromClause(SIMPLE_TABLE)),
        ),
        impala_query_string=('INSERT INTO kudu_table\n'
                             'SELECT\n'
                             'fake_table.int_col,\n'
                             'TRIM(fake_table.char_col)\n'
                             'FROM fake_table'),
        postgres_query_string=('INSERT INTO kudu_table\n'
                               'SELECT\n'
                               'fake_table.int_col,\n'
                               'fake_table.char_col\n'
                               'FROM fake_table'),
    ),
    InsertStatementTest(
        testid='insert into table column permutations select cols',
  def generate_statement(self, tables, dml_table):
    """
    Return a randomly generated INSERT or UPSERT statement. Note that UPSERTs are very
    similar to INSERTs, which is why this generator handles both.

    tables should be a list of Table objects. A typical source of such a list comes from
    db_connection.DbCursor.describe_common_tables(). This list describes the possible
    "sources" of the INSERT/UPSERT's WITH and FROM/WHERE clauses.

    dml_table is a required Table object. The INSERT/UPSERT will be into this table.
    """
    if not (isinstance(tables, list) and len(tables) > 0 and
            all((isinstance(t, Table) for t in tables))):
      raise Exception('tables must be a not-empty list of Table objects')

    if not isinstance(dml_table, Table):
      raise Exception('dml_table must be a Table')

    self.select_stmt_generator = QueryGenerator(self.profile)

    insert_statement = InsertStatement(execution=StatementExecutionMode.DML_TEST)

    # Choose whether this is a
    #   INSERT/UPSERT INTO table SELECT/VALUES
    # or
    #   INSERT/UPSERT INTO table (col1, col2, ...) SELECT/VALUES
    # If the method returns None, it's the former.
    insert_column_list = self.profile.choose_insert_column_list(dml_table)

    if dml_table.primary_keys:
      # Having primary keys implies the table is a Kudu table, which makes it subject to
      # both INSERTs (with automatic ignoring of primary key duplicates) and UPSERTs.
      conflict_action = self.profile.choose_insert_vs_upsert()
    else:
      conflict_action = InsertClause.CONFLICT_ACTION_DEFAULT
    insert_statement.insert_clause = InsertClause(
        dml_table, column_list=insert_column_list, conflict_action=conflict_action)
    # We still need to internally track the columns we're inserting. Keep in mind None
    # means "all" without an explicit column list. Since we've already created the
    # InsertClause object though, we can fill this in for ourselves.
    if insert_column_list is None:
      insert_column_list = dml_table.cols
    insert_item_data_types = [col.type for col in insert_column_list]

    # Decide whether this is INSERT/UPSERT VALUES or INSERT/UPSERT SELECT
    insert_source_clause = self.profile.choose_insert_source_clause()

    if issubclass(insert_source_clause, Query):
      # Use QueryGenerator()'s public interface to generate the SELECT.
      select_query = self.select_stmt_generator.generate_statement(
          tables, select_item_data_types=insert_item_data_types)
      # To avoid many loss-of-precision errors, explicitly cast the SelectItems. The
      # generator's type system is not near sophisticated enough to know how random
      # expressions will be implicitly casted in the databases. This requires less work
      # to implement. IMPALA-4693 considers alternative approaches.
      self._cast_select_items(select_query, insert_column_list)
      insert_statement.with_clause = deepcopy(select_query.with_clause)
      select_query.with_clause = None
      insert_statement.select_query = select_query
    elif issubclass(insert_source_clause, ValuesClause):
      insert_statement.values_clause = self._generate_values_clause(insert_column_list)
    else:
      raise Exception('unsupported INSERT/UPSERT source clause: {0}'.format(
          insert_source_clause))
    return insert_statement