def test_generate_header_fields_from_schema_none_mode(self): schema_non_reserved_fields = bigquery.TableSchema() schema_non_reserved_fields.fields.append( bigquery.TableFieldSchema( name='field', type=bigquery_util.TableFieldConstants.TYPE_STRING, description='desc')) header = schema_converter.generate_header_fields_from_schema( schema_non_reserved_fields) infos = OrderedDict([('field', Info('field', 1, 'String', 'desc', None, None))]) formats = OrderedDict() expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats) self.assertEqual(header, expected_header) schema_reserved_fields = bigquery.TableSchema() schema_reserved_fields.fields.append( bigquery.TableFieldSchema( name='AA', type=bigquery_util.TableFieldConstants.TYPE_STRING, description='desc')) header = schema_converter.generate_header_fields_from_schema( schema_reserved_fields) infos = OrderedDict([('AA', Info('AA', 1, 'String', 'desc', None, None))]) formats = OrderedDict() expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats) self.assertEqual(header, expected_header)
def test_add_format_fields_reserved_field_schema_compatibility(self): schema_conflict_format = bigquery.TableSchema() calls_record = bigquery.TableFieldSchema( name=bigquery_util.ColumnKeyConstants.CALLS, type=bigquery_util.TableFieldConstants.TYPE_RECORD, mode=bigquery_util.TableFieldConstants.MODE_REPEATED, description='One record for each call.') calls_record.fields.append( bigquery.TableFieldSchema( name='GQ', type=bigquery_util.TableFieldConstants.TYPE_STRING, mode=bigquery_util.TableFieldConstants.MODE_NULLABLE, description='desc')) schema_conflict_format.fields.append(calls_record) with self.assertRaises(ValueError): schema_converter.generate_header_fields_from_schema( schema_conflict_format) formats_allow_incompatible_schema = OrderedDict() schema_converter._add_format_fields(calls_record, formats_allow_incompatible_schema, allow_incompatible_schema=True) expected_formats = OrderedDict([('GQ', Format('GQ', 1, 'String', 'desc'))]) self.assertEqual(formats_allow_incompatible_schema, expected_formats)
def _write_vcf_meta_info(input_table, representative_header_file, allow_incompatible_schema): # type: (str, str, bool) -> None """Writes the meta information generated from BigQuery schema.""" header_fields = (schema_converter.generate_header_fields_from_schema( _get_schema(input_table), allow_incompatible_schema)) write_header_fn = vcf_header_io.WriteVcfHeaderFn( representative_header_file) write_header_fn.process(header_fields, _VCF_VERSION_LINE)
def test_schema_to_vcf_header_to_schema(self): original_schema = bigquery_schema_util.get_sample_table_schema() header = schema_converter.generate_header_fields_from_schema( original_schema) reconstructed_schema = ( schema_converter.generate_schema_from_header_fields( header, processed_variant.ProcessedVariantFactory(header))) self.assertEqual(_get_fields_from_schema(reconstructed_schema), _get_fields_from_schema(original_schema))
def test_generate_header_fields_from_schema_schema_compatibility(self): schema_conflict = bigquery.TableSchema() schema_conflict.fields.append( bigquery.TableFieldSchema( name='AA', type=bigquery_util.TableFieldConstants.TYPE_INTEGER, mode=bigquery_util.TableFieldConstants.MODE_NULLABLE, description='desc')) with self.assertRaises(ValueError): schema_converter.generate_header_fields_from_schema( schema_conflict) header = schema_converter.generate_header_fields_from_schema( schema_conflict, allow_incompatible_schema=True) infos = OrderedDict([('AA', Info('AA', 1, 'Integer', 'desc', None, None))]) expected_header = vcf_header_io.VcfHeader(infos=infos, formats=OrderedDict()) self.assertEqual(header, expected_header)
def test_generate_header_fields_from_schema_date_type(self): schema = bigquery.TableSchema() schema.fields.append( bigquery.TableFieldSchema( name='partition_date_please_ignore', type='Date', mode=bigquery_util.TableFieldConstants.MODE_NULLABLE, description='Column required by BigQuery partitioning logic.')) header = schema_converter.generate_header_fields_from_schema(schema) expected_header = vcf_header_io.VcfHeader(infos=OrderedDict(), formats=OrderedDict()) self.assertEqual(header, expected_header)
def test_generate_header_fields_from_schema(self): sample_schema = bigquery_schema_util.get_sample_table_schema() header = schema_converter.generate_header_fields_from_schema( sample_schema) infos = OrderedDict([ ('AF', createInfo('AF', 'A', 'Float', 'desc', None, None)), ('AA', createInfo('AA', 1, 'String', 'desc', None, None)), ('IFR', createInfo('IFR', '.', 'Float', 'desc', None, None)), ('IS', createInfo('IS', 1, 'String', 'desc', None, None))]) formats = OrderedDict([ ('FB', createFormat('FB', 1, 'String', 'desc')), ('GQ', createFormat('GQ', 1, 'Integer', 'desc'))]) expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats) self.assertEqual(header, expected_header)
def test_generate_header_fields_from_schema_invalid_description(self): schema = bigquery.TableSchema() schema.fields.append( bigquery.TableFieldSchema( name='invalid_description', type=bigquery_util.TableFieldConstants.TYPE_STRING, mode=bigquery_util.TableFieldConstants.MODE_NULLABLE, description='Desc\nThis is added intentionally.')) header = schema_converter.generate_header_fields_from_schema(schema) infos = OrderedDict([('invalid_description', Info('invalid_description', 1, 'String', 'Desc This is added intentionally.', None, None))]) expected_header = vcf_header_io.VcfHeader(infos=infos, formats=OrderedDict()) self.assertEqual(header, expected_header)
def test_vcf_header_to_schema_to_vcf_header(self): infos = OrderedDict([ ('I1', createInfo('I1', '.', 'String', 'desc', None, None)), ('IA', createInfo('IA', '.', 'Integer', 'desc', None, None))]) formats = OrderedDict([ ('F1', createFormat('F1', '.', 'String', 'desc')), ('F2', createFormat('F2', '.', 'Integer', 'desc')), ('FU', createFormat('FU', '.', 'Float', 'desc'))]) original_header = vcf_header_io.VcfHeader(infos=infos, formats=formats) schema = schema_converter.generate_schema_from_header_fields( original_header, processed_variant.ProcessedVariantFactory(original_header)) reconstructed_header = ( schema_converter.generate_header_fields_from_schema( schema)) self.assertEqual(original_header, reconstructed_header)
def test_generate_header_fields_from_schema_with_annotation(self): sample_schema = bigquery_schema_util.get_sample_table_schema( with_annotation_fields=True) header = schema_converter.generate_header_fields_from_schema( sample_schema) infos = OrderedDict([ ('AF', Info('AF', field_counts['A'], 'Float', 'desc', None, None)), ('CSQ', Info('CSQ', field_counts['.'], 'String', 'desc Format: Consequence|IMPACT', None, None)), ('AA', Info('AA', 1, 'String', 'desc', None, None)), ('IFR', Info('IFR', field_counts['.'], 'Float', 'desc', None, None)), ('IS', Info('IS', 1, 'String', 'desc', None, None)) ]) formats = OrderedDict([('FB', parser._Format('FB', 0, 'Flag', 'desc')), ('GQ', parser._Format('GQ', 1, 'Integer', 'desc'))]) expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats) self.assertEqual(header, expected_header)