def test_datetimeformat(self): # Double check this before adding anything else in here to see # if it has changed, but YYYY-MM-DD HH:MI:SS, YYYY-MM-DD # HH24:MI:SS and YYYY-MM-DD HH:MI:SS are the only formats # accepted by BigQuery as of this writing should_raise = { 'YYYY-MM-DD HH12:MI AM': True, 'MM/DD/YY HH24:MI': True, } processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) for datetimeformat in DATETIME_CASES: records_format =\ DelimitedRecordsFormat(variant='bigquery', hints={ 'datetimeformat': datetimeformat }) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) try: load_job_config(unhandled_hints, load_plan) except NotImplementedError: if should_raise[datetimeformat]: pass else: raise
def test_timeonlyformat(self): # Double check this before adding anything else in here to see # if it has changed, but HH:MI:SS is the only format accepted # by BigQuery as of this writing should_raise = { 'HH:MI:SS': False, 'HH24:MI:SS': False, 'HH12:MI AM': True, } processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) for timeonlyformat in TIMEONLY_CASES: records_format =\ DelimitedRecordsFormat(variant='bigquery', hints={ 'timeonlyformat': timeonlyformat, }) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) try: load_job_config(unhandled_hints, load_plan) except NotImplementedError: if should_raise[timeonlyformat]: pass else: raise
def test_load_job_config_vertica(self): records_format = DelimitedRecordsFormat(variant='vertica') processing_instructions = ProcessingInstructions(fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) load_plan = RecordsLoadPlan(processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) with self.assertRaisesRegex(NotImplementedError, r"Implement hint record-terminator='\\x02' " "or try again with fail_if_cant_handle_hint=False"): load_job_config(unhandled_hints, load_plan)
def test_load_job_config_unknown_quoting(self): records_format = DelimitedRecordsFormat(variant='bigquery', hints={'quoting': 'blah'}) processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) with self.assertRaises(NotImplementedError): load_job_config(unhandled_hints, load_plan)
def test_load_job_config_no_bzip_support(self): records_format = DelimitedRecordsFormat(variant='bigquery', hints={'compression': 'BZIP'}) processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) with self.assertRaisesRegex( NotImplementedError, r"Implement hint compression='BZIP' " "or try again with fail_if_cant_handle_hint=False"): load_job_config(unhandled_hints, load_plan)
def test_load_job_config_permissive(self): records_format = DelimitedRecordsFormat(variant='bigquery') processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=False) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) out = load_job_config(unhandled_hints, load_plan) expectations = { 'allowJaggedRows': True, 'allowQuotedNewlines': True, 'autodetect': False, 'createDisposition': 'CREATE_NEVER', 'destinationTableProperties': {}, 'encoding': 'UTF-8', 'fieldDelimiter': ',', 'ignoreUnknownValues': False, 'maxBadRecords': 999999, 'quote': '"', 'schemaUpdateOptions': None, 'skipLeadingRows': '1', 'sourceFormat': 'CSV', 'writeDisposition': 'WRITE_APPEND' } self.assertEqual(out.to_api_repr()['load'], expectations)
def test_load_job_config_unsupported_datetimeformattz(self): records_format = DelimitedRecordsFormat( variant='bigquery', hints={'datetimeformattz': 'MM/DD/YY HH:MI:SSOF'}) processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set(records_format.hints.keys()) with self.assertRaisesRegex( NotImplementedError, r"Implement hint datetimeformattz='MM/DD/YY HH:MI:SSOF' " "or try again with fail_if_cant_handle_hint=False"): load_job_config(unhandled_hints, load_plan)
def test_load_job_config_parquet(self): records_format = ParquetRecordsFormat() processing_instructions = ProcessingInstructions( fail_if_dont_understand=True, fail_if_cant_handle_hint=True, fail_if_row_invalid=True) load_plan = RecordsLoadPlan( processing_instructions=processing_instructions, records_format=records_format) unhandled_hints = set() out = load_job_config(unhandled_hints, load_plan) expectations = { 'allowJaggedRows': False, 'autodetect': False, 'createDisposition': 'CREATE_NEVER', 'destinationTableProperties': {}, 'ignoreUnknownValues': True, 'maxBadRecords': 0, 'schemaUpdateOptions': None, 'sourceFormat': 'PARQUET', 'writeDisposition': 'WRITE_APPEND' } self.assertEqual(expectations, out.to_api_repr()['load'])