예제 #1
0
  def test_get_execute_parameters(self, mock_notebook_item):
    mock_notebook_item.return_value = google.datalab.bigquery.Query(
        'SELECT @column FROM publicdata.samples.wikipedia where endpoint=@endpoint')

    transformation_config = {
      'query': 'foo_query'
    }
    output_config = {
      'table': 'foo_table',
      'mode': 'foo_mode'
    }
    parameters_config = [
      {
        'type': 'STRING',
        'name': 'endpoint',
        'value': 'Interact2'
      },
      {
        'type': 'INTEGER',
        'name': 'column',
        'value': '1234'
      }
    ]
    actual_execute_config = bq._get_execute_parameters('foo_load_task', transformation_config,
                                                       output_config, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'sql': 'SELECT @column FROM publicdata.samples.wikipedia where endpoint=@endpoint',
      'up_stream': ['foo_load_task'],
      'table': 'foo_table',
      'mode': 'foo_mode',
      'parameters': parameters_config
    }
    self.assertDictEqual(actual_execute_config, expected_execute_config)

    # With empty output config
    actual_execute_config = bq._get_execute_parameters('foo_load_task', transformation_config,
                                                       {}, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'sql': 'SELECT @column FROM publicdata.samples.wikipedia where endpoint=@endpoint',
      'up_stream': ['foo_load_task'],
      'parameters': parameters_config
    }
    self.assertDictEqual(actual_execute_config, expected_execute_config)
예제 #2
0
    def test_get_execute_parameters(self, mock_notebook_item):
        mock_notebook_item.return_value = google.datalab.bigquery.Query(
            """SELECT @column
FROM publicdata.samples.wikipedia
WHERE endpoint=@endpoint""")

        transformation_config = {'query': 'foo_query'}
        output_config = {
            'table': 'foo_table_%(_ts_month)s',
            'mode': 'foo_mode'
        }
        parameters_config = [{
            'type': 'STRING',
            'name': 'endpoint',
            'value': 'Interact2'
        }, {
            'type': 'INTEGER',
            'name': 'column',
            'value': '1234'
        }]

        # Empty input config
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', {}, transformation_config, output_config,
            parameters_config)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'table': 'foo_table_%(_ts_month)s',
            'mode': 'foo_mode',
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config,
                                       parameters_config)

        # Empty input and parameters config
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', {}, transformation_config, output_config, None)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'table': 'foo_table_%(_ts_month)s',
            'mode': 'foo_mode',
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config, None)

        # Empty input and empty output configs
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', {}, transformation_config, {}, parameters_config)
        expected_execute_config = {
            'type':
            'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config,
                                       parameters_config)

        # Empty output config. Expected config is same as output with empty input and empty output.
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', TestCases.test_input_config,
            transformation_config, {}, parameters_config)
        expected_execute_config = {
            'type':
            'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            """WITH input AS (
  SELECT * FROM `test_table`
)

SELECT @column
FROM publicdata.samples.wikipedia
WHERE endpoint=@endpoint""",
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config,
                                       parameters_config)

        # With no table, and implicit data_source
        input_config = TestCases.test_input_config.copy()
        del input_config['table']
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', input_config, transformation_config, {},
            parameters_config)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'data_source': 'input',
            'path': 'test_path_%(_ts_month)s',
            'schema': 'test_schema',
            'source_format': 'csv',
            'csv_options': {
                'delimiter': ';',
                'quote': '"',
                'skip': 9,
                'strict': False
            },
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config,
                                       parameters_config)

        # With no table, and explicit data_source
        input_config['data_source'] = 'foo_data_source'
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', input_config, transformation_config, {},
            parameters_config)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'data_source': 'foo_data_source',
            'path': 'test_path_%(_ts_month)s',
            'schema': 'test_schema',
            'source_format': 'csv',
            'csv_options': {
                'delimiter': ';',
                'quote': '"',
                'skip': 9,
                'strict': False
            },
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config,
                                       parameters_config)

        # With table and implicit sub-query
        mock_notebook_item.return_value = google.datalab.bigquery.Query(
            """SELECT @column
FROM input
WHERE endpoint=@endpoint""")
        input_config = {
            'path': 'test_path_%(_ds)s',
            'table': 'test_table_%(_ds)s',
        }
        actual_execute_config = bq._get_execute_parameters(
            None, input_config, transformation_config, {}, parameters_config)
        expected_execute_config = {
            'type':
            'pydatalab.bq.execute',
            'sql':
            """WITH input AS (
  SELECT * FROM `test_table_{{ ds }}`
)

SELECT @column
FROM input
WHERE endpoint=@endpoint"""
        }
        self.assertExecuteConfigEquals(actual_execute_config,
                                       expected_execute_config,
                                       parameters_config)
예제 #3
0
    def test_get_execute_parameters(self, mock_notebook_item):
        # Adding newlines to the query to mimic actual usage of %%bq query ...
        mock_notebook_item.return_value = google.datalab.bigquery.Query(
            """SELECT @column
FROM publicdata.samples.wikipedia
WHERE endpoint=@endpoint""")

        transformation_config = {'query': 'foo_query'}
        output_config = {'table': 'foo_table', 'mode': 'foo_mode'}
        parameters_config = [{
            'type': 'STRING',
            'name': 'endpoint',
            'value': 'Interact2'
        }, {
            'type': 'INTEGER',
            'name': 'column',
            'value': '1234'
        }]
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', {}, transformation_config, output_config,
            parameters_config)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'table': 'foo_table',
            'mode': 'foo_mode',
            'parameters': parameters_config
        }
        self.assertDictEqual(actual_execute_config, expected_execute_config)

        # With empty output config
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', {}, transformation_config, {}, parameters_config)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'parameters': parameters_config
        }

        input_config = TestCases.test_input_config
        input_config['data_source'] = 'foo_data_source'
        self.assertDictEqual(actual_execute_config, expected_execute_config)
        actual_execute_config = bq._get_execute_parameters(
            'foo_load_task', TestCases.test_input_config,
            transformation_config, {}, parameters_config)
        expected_execute_config = {
            'type': 'pydatalab.bq.execute',
            'up_stream': ['foo_load_task'],
            'sql':
            'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
            'data_source': 'foo_data_source',
            'path': 'test_path',
            'schema': 'test_schema',
            'source_format': 'csv',
            'csv_options': {
                'delimiter': ';',
                'quote': '"',
                'skip': 9,
                'strict': False
            },
            'parameters': parameters_config
        }

        self.assertDictEqual(actual_execute_config, expected_execute_config)
예제 #4
0
  def test_get_execute_parameters(self, mock_notebook_item):
    mock_notebook_item.return_value = google.datalab.bigquery.Query("""SELECT @column
FROM publicdata.samples.wikipedia
WHERE endpoint=@endpoint""")

    transformation_config = {
      'query': 'foo_query'
    }
    output_config = {
      'table': 'foo_table_%(_ts_month)s',
      'mode': 'foo_mode'
    }
    parameters_config = [
      {
        'type': 'STRING',
        'name': 'endpoint',
        'value': 'Interact2'
      },
      {
        'type': 'INTEGER',
        'name': 'column',
        'value': '1234'
      }
    ]

    # Empty input config
    actual_execute_config = bq._get_execute_parameters('foo_load_task', {}, transformation_config,
                                                       output_config, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'up_stream': ['foo_load_task'],
      'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
      'table': 'foo_table_%(_ts_month)s',
      'mode': 'foo_mode',
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   parameters_config)

    # Empty input and parameters config
    actual_execute_config = bq._get_execute_parameters('foo_load_task', {}, transformation_config,
                                                       output_config, None)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'up_stream': ['foo_load_task'],
      'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
      'table': 'foo_table_%(_ts_month)s',
      'mode': 'foo_mode',
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   None)

    # Empty input and empty output configs
    actual_execute_config = bq._get_execute_parameters('foo_load_task', {}, transformation_config,
                                                       {}, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'up_stream': ['foo_load_task'],
      'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   parameters_config)

    # Empty output config. Expected config is same as output with empty input and empty output.
    actual_execute_config = bq._get_execute_parameters('foo_load_task', TestCases.test_input_config,
                                                       transformation_config, {}, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'up_stream': ['foo_load_task'],
      'sql': """WITH input AS (
  SELECT * FROM `test_table`
)

SELECT @column
FROM publicdata.samples.wikipedia
WHERE endpoint=@endpoint""",
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   parameters_config)

    # With no table, and implicit data_source
    input_config = TestCases.test_input_config.copy()
    del input_config['table']
    actual_execute_config = bq._get_execute_parameters('foo_load_task', input_config,
                                                       transformation_config, {}, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'up_stream': ['foo_load_task'],
      'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
      'data_source': 'input',
      'path': 'test_path_%(_ts_month)s',
      'schema': 'test_schema',
      'source_format': 'csv',
      'csv_options': {'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False},
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   parameters_config)

    # With no table, and explicit data_source
    input_config['data_source'] = 'foo_data_source'
    actual_execute_config = bq._get_execute_parameters('foo_load_task', input_config,
                                                       transformation_config, {}, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'up_stream': ['foo_load_task'],
      'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint',
      'data_source': 'foo_data_source',
      'path': 'test_path_%(_ts_month)s',
      'schema': 'test_schema',
      'source_format': 'csv',
      'csv_options': {'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False},
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   parameters_config)

    # With table and implicit sub-query
    mock_notebook_item.return_value = google.datalab.bigquery.Query("""SELECT @column
FROM input
WHERE endpoint=@endpoint""")
    input_config = {
      'path': 'test_path_%(_ds)s',
      'table': 'test_table_%(_ds)s',
    }
    actual_execute_config = bq._get_execute_parameters(None, input_config, transformation_config,
                                                       {}, parameters_config)
    expected_execute_config = {
      'type': 'pydatalab.bq.execute',
      'sql': """WITH input AS (
  SELECT * FROM `test_table_{{ ds }}`
)

SELECT @column
FROM input
WHERE endpoint=@endpoint"""
    }
    self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config,
                                   parameters_config)