예제 #1
0
def apply_sql(query: str, output_name: Optional[str],
              found: Dict[str, beam.PCollection]) -> Tuple[str, PValue]:
    """Applies a SqlTransform with the given sql and queried PCollections.

  Args:
    query: The SQL query executed in the magic.
    output_name: (optional) The output variable name in __main__ module.
    found: The PCollections with variable names found to be used in the query.

  Returns:
    A Tuple[str, PValue]. First str value is the output variable name in
    __main__ module (auto-generated if not provided). Second PValue is
    most likely a PCollection, depending on the query.
  """
    output_name = _generate_output_name(output_name, query, found)
    query, sql_source = _build_query_components(query, found)
    try:
        output = sql_source | SqlTransform(query)
        # Declare a variable with the output_name and output value in the
        # __main__ module so that the user can use the output smoothly.
        setattr(importlib.import_module('__main__'), output_name, output)
        ib.watch({output_name: output})
        _LOGGER.info(
            "The output PCollection variable is %s with element_type %s",
            output_name, pformat_namedtuple(output.element_type))
        return output_name, output
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        on_error('Error when applying the Beam SQL: %s', e)
예제 #2
0
 def test_pformat_namedtuple_with_unnamed_fields(self):
     actual = pformat_namedtuple(OptionalUnionType)
     # Parameters of an Union type can be in any order.
     possible_expected = (
         'OptionalUnionType(unnamed: typing.Union[int, str, NoneType])',
         'OptionalUnionType(unnamed: typing.Union[str, int, NoneType])')
     self.assertIn(actual, possible_expected)
예제 #3
0
def apply_sql(
    query: str,
    output_name: Optional[str],
    found: Dict[str, beam.PCollection],
    run: bool = True) -> Tuple[str, Union[PValue, SqlNode], SqlChain]:
  """Applies a SqlTransform with the given sql and queried PCollections.

  Args:
    query: The SQL query executed in the magic.
    output_name: (optional) The output variable name in __main__ module.
    found: The PCollections with variable names found to be used in the query.
    run: Whether to prepare the SQL pipeline for a local run or not.

  Returns:
    A tuple of values. First str value is the output variable name in
    __main__ module, auto-generated if not provided. Second value: if run,
    it's a PValue; otherwise, a SqlNode tracks the SQL without applying it or
    executing it. Third value: SqlChain is a chain of SqlNodes that have been
    applied.
  """
  output_name = _generate_output_name(output_name, query, found)
  query, sql_source, chain = _build_query_components(
      query, found, output_name, run)
  if run:
    try:
      output = sql_source | SqlTransform(query)
      # Declare a variable with the output_name and output value in the
      # __main__ module so that the user can use the output smoothly.
      output_name, output = create_var_in_main(output_name, output)
      _LOGGER.info(
          "The output PCollection variable is %s with element_type %s",
          output_name,
          pformat_namedtuple(output.element_type))
      return output_name, output, chain
    except (KeyboardInterrupt, SystemExit):
      raise
    except:  # pylint: disable=bare-except
      on_error('Error when applying the Beam SQL: %s', traceback.format_exc())
      raise
  else:
    return output_name, chain.current, chain
예제 #4
0
 def test_pformat_namedtuple(self):
     actual = pformat_namedtuple(ANamedTuple)
     self.assertEqual('ANamedTuple(a: int, b: str)', actual)
예제 #5
0
 def test_pformat_namedtuple(self):
     self.assertEqual('ANamedTuple(a: int, b: str)',
                      pformat_namedtuple(ANamedTuple))