def test_get_execute_parameters(self, mock_notebook_item): mock_notebook_item.return_value = google.datalab.bigquery.Query( 'SELECT @column FROM publicdata.samples.wikipedia where endpoint=@endpoint') transformation_config = { 'query': 'foo_query' } output_config = { 'table': 'foo_table', 'mode': 'foo_mode' } parameters_config = [ { 'type': 'STRING', 'name': 'endpoint', 'value': 'Interact2' }, { 'type': 'INTEGER', 'name': 'column', 'value': '1234' } ] actual_execute_config = bq._get_execute_parameters('foo_load_task', transformation_config, output_config, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'sql': 'SELECT @column FROM publicdata.samples.wikipedia where endpoint=@endpoint', 'up_stream': ['foo_load_task'], 'table': 'foo_table', 'mode': 'foo_mode', 'parameters': parameters_config } self.assertDictEqual(actual_execute_config, expected_execute_config) # With empty output config actual_execute_config = bq._get_execute_parameters('foo_load_task', transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'sql': 'SELECT @column FROM publicdata.samples.wikipedia where endpoint=@endpoint', 'up_stream': ['foo_load_task'], 'parameters': parameters_config } self.assertDictEqual(actual_execute_config, expected_execute_config)
def test_get_execute_parameters(self, mock_notebook_item): mock_notebook_item.return_value = google.datalab.bigquery.Query( """SELECT @column FROM publicdata.samples.wikipedia WHERE endpoint=@endpoint""") transformation_config = {'query': 'foo_query'} output_config = { 'table': 'foo_table_%(_ts_month)s', 'mode': 'foo_mode' } parameters_config = [{ 'type': 'STRING', 'name': 'endpoint', 'value': 'Interact2' }, { 'type': 'INTEGER', 'name': 'column', 'value': '1234' }] # Empty input config actual_execute_config = bq._get_execute_parameters( 'foo_load_task', {}, transformation_config, output_config, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'table': 'foo_table_%(_ts_month)s', 'mode': 'foo_mode', } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # Empty input and parameters config actual_execute_config = bq._get_execute_parameters( 'foo_load_task', {}, transformation_config, output_config, None) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'table': 'foo_table_%(_ts_month)s', 'mode': 'foo_mode', } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, None) # Empty input and empty output configs actual_execute_config = bq._get_execute_parameters( 'foo_load_task', {}, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # Empty output config. Expected config is same as output with empty input and empty output. actual_execute_config = bq._get_execute_parameters( 'foo_load_task', TestCases.test_input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': """WITH input AS ( SELECT * FROM `test_table` ) SELECT @column FROM publicdata.samples.wikipedia WHERE endpoint=@endpoint""", } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # With no table, and implicit data_source input_config = TestCases.test_input_config.copy() del input_config['table'] actual_execute_config = bq._get_execute_parameters( 'foo_load_task', input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'data_source': 'input', 'path': 'test_path_%(_ts_month)s', 'schema': 'test_schema', 'source_format': 'csv', 'csv_options': { 'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False }, } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # With no table, and explicit data_source input_config['data_source'] = 'foo_data_source' actual_execute_config = bq._get_execute_parameters( 'foo_load_task', input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'data_source': 'foo_data_source', 'path': 'test_path_%(_ts_month)s', 'schema': 'test_schema', 'source_format': 'csv', 'csv_options': { 'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False }, } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # With table and implicit sub-query mock_notebook_item.return_value = google.datalab.bigquery.Query( """SELECT @column FROM input WHERE endpoint=@endpoint""") input_config = { 'path': 'test_path_%(_ds)s', 'table': 'test_table_%(_ds)s', } actual_execute_config = bq._get_execute_parameters( None, input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'sql': """WITH input AS ( SELECT * FROM `test_table_{{ ds }}` ) SELECT @column FROM input WHERE endpoint=@endpoint""" } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config)
def test_get_execute_parameters(self, mock_notebook_item): # Adding newlines to the query to mimic actual usage of %%bq query ... mock_notebook_item.return_value = google.datalab.bigquery.Query( """SELECT @column FROM publicdata.samples.wikipedia WHERE endpoint=@endpoint""") transformation_config = {'query': 'foo_query'} output_config = {'table': 'foo_table', 'mode': 'foo_mode'} parameters_config = [{ 'type': 'STRING', 'name': 'endpoint', 'value': 'Interact2' }, { 'type': 'INTEGER', 'name': 'column', 'value': '1234' }] actual_execute_config = bq._get_execute_parameters( 'foo_load_task', {}, transformation_config, output_config, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'table': 'foo_table', 'mode': 'foo_mode', 'parameters': parameters_config } self.assertDictEqual(actual_execute_config, expected_execute_config) # With empty output config actual_execute_config = bq._get_execute_parameters( 'foo_load_task', {}, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'parameters': parameters_config } input_config = TestCases.test_input_config input_config['data_source'] = 'foo_data_source' self.assertDictEqual(actual_execute_config, expected_execute_config) actual_execute_config = bq._get_execute_parameters( 'foo_load_task', TestCases.test_input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'data_source': 'foo_data_source', 'path': 'test_path', 'schema': 'test_schema', 'source_format': 'csv', 'csv_options': { 'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False }, 'parameters': parameters_config } self.assertDictEqual(actual_execute_config, expected_execute_config)
def test_get_execute_parameters(self, mock_notebook_item): mock_notebook_item.return_value = google.datalab.bigquery.Query("""SELECT @column FROM publicdata.samples.wikipedia WHERE endpoint=@endpoint""") transformation_config = { 'query': 'foo_query' } output_config = { 'table': 'foo_table_%(_ts_month)s', 'mode': 'foo_mode' } parameters_config = [ { 'type': 'STRING', 'name': 'endpoint', 'value': 'Interact2' }, { 'type': 'INTEGER', 'name': 'column', 'value': '1234' } ] # Empty input config actual_execute_config = bq._get_execute_parameters('foo_load_task', {}, transformation_config, output_config, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'table': 'foo_table_%(_ts_month)s', 'mode': 'foo_mode', } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # Empty input and parameters config actual_execute_config = bq._get_execute_parameters('foo_load_task', {}, transformation_config, output_config, None) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'table': 'foo_table_%(_ts_month)s', 'mode': 'foo_mode', } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, None) # Empty input and empty output configs actual_execute_config = bq._get_execute_parameters('foo_load_task', {}, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # Empty output config. Expected config is same as output with empty input and empty output. actual_execute_config = bq._get_execute_parameters('foo_load_task', TestCases.test_input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': """WITH input AS ( SELECT * FROM `test_table` ) SELECT @column FROM publicdata.samples.wikipedia WHERE endpoint=@endpoint""", } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # With no table, and implicit data_source input_config = TestCases.test_input_config.copy() del input_config['table'] actual_execute_config = bq._get_execute_parameters('foo_load_task', input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'data_source': 'input', 'path': 'test_path_%(_ts_month)s', 'schema': 'test_schema', 'source_format': 'csv', 'csv_options': {'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False}, } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # With no table, and explicit data_source input_config['data_source'] = 'foo_data_source' actual_execute_config = bq._get_execute_parameters('foo_load_task', input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'up_stream': ['foo_load_task'], 'sql': 'SELECT @column\nFROM publicdata.samples.wikipedia\nWHERE endpoint=@endpoint', 'data_source': 'foo_data_source', 'path': 'test_path_%(_ts_month)s', 'schema': 'test_schema', 'source_format': 'csv', 'csv_options': {'delimiter': ';', 'quote': '"', 'skip': 9, 'strict': False}, } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config) # With table and implicit sub-query mock_notebook_item.return_value = google.datalab.bigquery.Query("""SELECT @column FROM input WHERE endpoint=@endpoint""") input_config = { 'path': 'test_path_%(_ds)s', 'table': 'test_table_%(_ds)s', } actual_execute_config = bq._get_execute_parameters(None, input_config, transformation_config, {}, parameters_config) expected_execute_config = { 'type': 'pydatalab.bq.execute', 'sql': """WITH input AS ( SELECT * FROM `test_table_{{ ds }}` ) SELECT @column FROM input WHERE endpoint=@endpoint""" } self.assertExecuteConfigEquals(actual_execute_config, expected_execute_config, parameters_config)