Esempio n. 1
0
 def table_exprs(self):
   '''Provides a list of all table_exprs that are declared within this FROM
      block.
   '''
   table_exprs = \
       TableExprList(join_clause.table_expr for join_clause in self.join_clauses)
   table_exprs.append(self.table_expr)
   return table_exprs
Esempio n. 2
0
 def table_exprs(self):
     '''Provides a list of all table_exprs that are declared within this FROM
    block.
 '''
     table_exprs = \
         TableExprList(join_clause.table_expr for join_clause in self.join_clauses)
     table_exprs.append(self.table_expr)
     return table_exprs
Esempio n. 3
0
    def describe_common_tables(cursors):
        '''Find and return a TableExprList containing Table objects that the given conns
       have in common.
    '''
        common_table_names = None
        for cursor in cursors:
            table_names = set(cursor.list_table_names())
            if common_table_names is None:
                common_table_names = table_names
            else:
                common_table_names &= table_names
        common_table_names = sorted(common_table_names)

        tables = TableExprList()
        for table_name in common_table_names:
            common_table = None
            mismatch = False
            for cursor in cursors:
                table = cursor.describe_table(table_name)
                if common_table is None:
                    common_table = table
                    continue
                if not table.cols:
                    LOG.debug('%s has no remaining columns', table_name)
                    mismatch = True
                    break
                if len(common_table.cols) != len(table.cols):
                    LOG.debug(
                        'Ignoring table %s.'
                        ' It has a different number of columns across databases.',
                        table_name)
                    mismatch = True
                    break
                if common_table.primary_key_names != table.primary_key_names:
                    LOG.debug(
                        'Ignoring table {name} because of differing primary keys: '
                        '{common_table_keys} vs. {table_keys}'.format(
                            name=table_name,
                            common_table_keys=common_table.primary_key_names,
                            table_keys=table.primary_key_names))
                    mismatch = True
                    break
                for left, right in izip(common_table.cols, table.cols):
                    if not (left.name == right.name
                            and left.type == right.type):
                        LOG.debug(
                            'Ignoring table %s. It has different columns %s vs %s.'
                            % (table_name, left, right))
                        mismatch = True
                        break
                if mismatch:
                    break
            if not mismatch:
                tables.append(common_table)

        return tables
Esempio n. 4
0
 def table_exprs(self):
   """
   Return a list of all table expressions that are declared by this query. This is
   abstract as the clauses that do this differ across query types. Since all supported
   queries may have a WITH clause, getting table expressions from the WITH clause is
   supported here.
   """
   # This is an abstractproperty because it's only a *partial* implementation, however
   # for any statement or query that has a WITH clause, we can handle that here.
   table_exprs = TableExprList([])
   if self.with_clause:
     table_exprs.extend(self.with_clause.table_exprs)
   return table_exprs
Esempio n. 5
0
 def table_exprs(self):
   """
   Return a list of all table expressions that are declared by this query. This is
   abstract as the clauses that do this differ across query types. Since all supported
   queries may have a WITH clause, getting table expressions from the WITH clause is
   supported here.
   """
   # This is an abstractproperty because it's only a *partial* implementation, however
   # for any statement or query that has a WITH clause, we can handle that here.
   table_exprs = TableExprList([])
   if self.with_clause:
     table_exprs.extend(self.with_clause.table_exprs)
   return table_exprs
Esempio n. 6
0
  def describe_common_tables(cursors):
    '''Find and return a TableExprList containing Table objects that the given conns
       have in common.
    '''
    common_table_names = None
    for cursor in cursors:
      table_names = set(cursor.list_table_names())
      if common_table_names is None:
        common_table_names = table_names
      else:
        common_table_names &= table_names
    common_table_names = sorted(common_table_names)

    tables = TableExprList()
    for table_name in common_table_names:
      common_table = None
      mismatch = False
      for cursor in cursors:
        table = cursor.describe_table(table_name)
        if common_table is None:
          common_table = table
          continue
        if not table.cols:
          LOG.debug('%s has no remaining columns', table_name)
          mismatch = True
          break
        if len(common_table.cols) != len(table.cols):
          LOG.debug('Ignoring table %s.'
              ' It has a different number of columns across databases.', table_name)
          mismatch = True
          break
        if common_table.primary_key_names != table.primary_key_names:
          LOG.debug(
              'Ignoring table {name} because of differing primary keys: '
              '{common_table_keys} vs. {table_keys}'.format(
                  name=table_name, common_table_keys=common_table.primary_key_names,
                  table_keys=table.primary_key_names))
          mismatch = True
          break
        for left, right in izip(common_table.cols, table.cols):
          if not (left.name == right.name and left.type == right.type):
            LOG.debug('Ignoring table %s. It has different columns %s vs %s.' %
                (table_name, left, right))
            mismatch = True
            break
        if mismatch:
          break
      if not mismatch:
        tables.append(common_table)

    return tables
def test_use_nested_width_subquery():
  """
  Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this
  method to return False should prevent a WITH clause from being used inside a sub-query.
  """

  class MockQueryProfile(DefaultProfile):
    """
    A mock QueryProfile that sets use_nested_with to False and forces the
    QueryGenerator to created nested queries.
    """

    def __init__(self):
      super(MockQueryProfile, self).__init__()

      # Force the QueryGenerator to create nested queries
      self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4)

      # Force the QueryGenerator to use WITH clauses whenever possible
      self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1

      # Force the QueryGenerator to create inline views whenever possible
      self._probabilities['MISC']['INLINE_VIEW'] = 1

    def use_nested_with(self):
      return False

  mock_query_gen = QueryGenerator(MockQueryProfile())

  # Create two tables
  table_expr_list = TableExprList()

  right_table = Table("right_table")
  right_table.add_col(Column("right_table", "right_col", Int))
  table_expr_list.append(right_table)

  left_table = Table("left_table")
  left_table.add_col(Column("left_table", "left_col", Int))
  table_expr_list.append(left_table)

  # Check that each nested_query doesn't have a with clause
  for nested_query in mock_query_gen.create_query(table_expr_list).nested_queries:
    assert nested_query.with_clause is None
  def describe_common_tables(db_connections):
    '''Find and return a TableExprList containing Table objects that the given connections
       have in common.
    '''
    common_table_names = None
    for db_connection in db_connections:
      table_names = set(db_connection.list_table_names())
      if common_table_names is None:
        common_table_names = table_names
      else:
        common_table_names &= table_names
    common_table_names = sorted(common_table_names)

    tables = TableExprList()
    for table_name in common_table_names:
      common_table = None
      mismatch = False
      for db_connection in db_connections:
        table = db_connection.describe_table(table_name)
        if common_table is None:
          common_table = table
          continue
        if not table.cols:
          LOG.debug('%s has no remaining columns', table_name)
          mismatch = True
          break
        if len(common_table.cols) != len(table.cols):
          LOG.debug('Ignoring table %s.'
              ' It has a different number of columns across databases.', table_name)
          mismatch = True
          break
        for left, right in izip(common_table.cols, table.cols):
          if not left.name == right.name and left.type == right.type:
            LOG.debug('Ignoring table %s. It has different columns %s vs %s.' %
                (table_name, left, right))
            mismatch = True
            break
        if mismatch:
          break
      if not mismatch:
        tables.append(common_table)

    return tables
def test_hive_create_equality_only_joins():
  """
  Tests that QueryGenerator produces a join condition with only equality functions if the
  HiveProfile is used.
  """

  class FakeHiveQueryProfile(HiveProfile):
    """
    A fake QueryProfile that extends the HiveProfile, various weights are modified in
    order to ensure that this test is deterministic.
    """

    def choose_join_condition_count(self):
      """
      There should be only one operator in the JOIN condition
      """
      return 1

    def choose_conjunct_disjunct_fill_ratio(self):
      """
      There should be no AND or OR operators
      """
      return 0

    def choose_relational_func_fill_ratio(self):
      """
      Force all operators to be relational
      """
      return 1

  query_generator = QueryGenerator(FakeHiveQueryProfile())

  # Create two tables that have one joinable Column
  right_table_expr_list = TableExprList()
  right_table = Table("right_table")
  right_table.add_col(Column("right_table", "right_col", Int))
  right_table_expr_list.append(right_table)

  left_table_expr_list = TableExprList()
  left_table = Table("left_table")
  left_table.add_col(Column("left_table", "left_col", Int))
  left_table_expr_list.append(left_table)

  # Validate the root predicate is an Equals funcs
  assert isinstance(query_generator._create_relational_join_condition(
    right_table_expr_list, left_table_expr_list), Equals)
Esempio n. 10
0
def test_use_nested_width_subquery():
  """
  Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this
  method to return False should prevent a WITH clause from being used inside a sub-query.
  """

  class MockQueryProfile(DefaultProfile):
    """
    A mock QueryProfile that sets use_nested_with to False and forces the
    QueryGenerator to created nested queries.
    """

    def __init__(self):
      super(MockQueryProfile, self).__init__()

      # Force the QueryGenerator to create nested queries
      self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4)

      # Force the QueryGenerator to use WITH clauses whenever possible
      self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1

      # Force the QueryGenerator to create inline views whenever possible
      self._probabilities['MISC']['INLINE_VIEW'] = 1

    def use_nested_with(self):
      return False

  mock_query_gen = QueryGenerator(MockQueryProfile())

  # Create two tables
  table_expr_list = TableExprList()

  right_table = Table("right_table")
  right_table.add_col(Column("right_table", "right_col", Int))
  table_expr_list.append(right_table)

  left_table = Table("left_table")
  left_table.add_col(Column("left_table", "left_col", Int))
  table_expr_list.append(left_table)

  # Check that each nested_query doesn't have a with clause
  for nested_query in mock_query_gen.generate_statement(table_expr_list).nested_queries:
    assert nested_query.with_clause is None
Esempio n. 11
0
# multiple FakeQueries.
SIMPLE_TABLE = FakeTable('fake_table', [
    FakeColumn('int_col', Int),
    FakeColumn('char_col', Char),
])

KUDU_TABLE = FakeTable('kudu_table', [
    FakeColumn('int_col', Int, is_primary_key=True),
    FakeColumn('char_col', Char),
])

# This can't be used inline because we need its table expressions later.
SIMPLE_WITH_CLAUSE = WithClause(
    TableExprList([
        WithClauseInlineView(
            FakeQuery(select_clause=FakeSelectClause(SIMPLE_TABLE.cols[0]),
                      from_clause=FromClause(SIMPLE_TABLE)), 'with_view')
    ]))

# All tests involving SELECT queries should be written to use this data set.
SELECT_QUERY_TEST_CASES = [
    QueryTest(
        testid='select col from table',
        query=FakeQuery(
            select_clause=FakeSelectClause(*SIMPLE_TABLE.cols),
            from_clause=FromClause(SIMPLE_TABLE),
        ),
        impala_query_string=('SELECT\n'
                             'fake_table.int_col,\n'
                             'TRIM(fake_table.char_col)\n'
                             'FROM fake_table'),
Esempio n. 12
0
 def visible_table_exprs(self):
     '''Provides a list of all table_exprs that are declared within this FROM
    block and may be referenced in other clauses such as SELECT or WHERE.
 '''
     return TableExprList(table_expr for table_expr in self.table_exprs
                          if table_expr.is_visible)