예제 #1
0
    def test_generate_header_fields_from_schema_none_mode(self):
        schema_non_reserved_fields = bigquery.TableSchema()
        schema_non_reserved_fields.fields.append(
            bigquery.TableFieldSchema(
                name='field',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                description='desc'))
        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            schema_non_reserved_fields)
        infos = OrderedDict([('field',
                              Info('field', 1, 'String', 'desc', None, None))])
        formats = OrderedDict()
        expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
        self.assertEqual(header, expected_header)

        schema_reserved_fields = bigquery.TableSchema()
        schema_reserved_fields.fields.append(
            bigquery.TableFieldSchema(
                name='AA',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                description='desc'))
        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            schema_reserved_fields)
        infos = OrderedDict([('AA', Info('AA', 1, 'String', 'desc', None,
                                         None))])
        formats = OrderedDict()
        expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
        self.assertEqual(header, expected_header)
예제 #2
0
    def test_add_format_fields_reserved_field_schema_compatibility(self):
        schema_conflict_format = bigquery.TableSchema()
        calls_record = bigquery.TableFieldSchema(
            name=bigquery_util.ColumnKeyConstants.CALLS,
            type=bigquery_util.TableFieldConstants.TYPE_RECORD,
            mode=bigquery_util.TableFieldConstants.MODE_REPEATED,
            description='One record for each call.')
        calls_record.fields.append(
            bigquery.TableFieldSchema(
                name='GQ',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='desc'))
        schema_conflict_format.fields.append(calls_record)
        with self.assertRaises(ValueError):
            bigquery_vcf_schema_converter.generate_header_fields_from_schema(
                schema_conflict_format)

        formats_allow_incompatible_schema = OrderedDict()
        bigquery_vcf_schema_converter._add_format_fields(
            calls_record,
            formats_allow_incompatible_schema,
            allow_incompatible_schema=True)
        expected_formats = OrderedDict([('GQ', Format('GQ', 1, 'String',
                                                      'desc'))])
        self.assertEqual(formats_allow_incompatible_schema, expected_formats)
예제 #3
0
    def test_schema_to_vcf_header_to_schema(self):
        original_schema = bigquery_schema_util.get_sample_table_schema()
        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            original_schema)
        reconstructed_schema = (
            bigquery_vcf_schema_converter.generate_schema_from_header_fields(
                header, processed_variant.ProcessedVariantFactory(header)))

        self.assertEqual(_get_fields_from_schema(reconstructed_schema),
                         _get_fields_from_schema(original_schema))
예제 #4
0
    def test_generate_header_fields_from_schema_schema_compatibility(self):
        schema_conflict = bigquery.TableSchema()
        schema_conflict.fields.append(
            bigquery.TableFieldSchema(
                name='AA',
                type=bigquery_util.TableFieldConstants.TYPE_INTEGER,
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='desc'))
        with self.assertRaises(ValueError):
            bigquery_vcf_schema_converter.generate_header_fields_from_schema(
                schema_conflict)

        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            schema_conflict, allow_incompatible_schema=True)
        infos = OrderedDict([('AA', Info('AA', 1, 'Integer', 'desc', None,
                                         None))])
        expected_header = vcf_header_io.VcfHeader(infos=infos,
                                                  formats=OrderedDict())
        self.assertEqual(header, expected_header)
예제 #5
0
def _write_vcf_meta_info(input_table, representative_header_file,
                         allow_incompatible_schema):
    # type: (str, str, bool) -> None
    """Writes the meta information generated from BigQuery schema."""
    header_fields = (
        bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            _get_schema(input_table), allow_incompatible_schema))
    write_header_fn = vcf_header_io.WriteVcfHeaderFn(
        representative_header_file)
    write_header_fn.process(header_fields, _VCF_VERSION_LINE)
예제 #6
0
    def test_generate_header_fields_from_schema_date_type(self):
        schema = bigquery.TableSchema()
        schema.fields.append(
            bigquery.TableFieldSchema(
                name='partition_date_please_ignore',
                type='Date',
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='Column required by BigQuery partitioning logic.'))
        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            schema)

        expected_header = vcf_header_io.VcfHeader(infos=OrderedDict(),
                                                  formats=OrderedDict())
        self.assertEqual(header, expected_header)
예제 #7
0
    def test_generate_header_fields_from_schema(self):
        sample_schema = bigquery_schema_util.get_sample_table_schema()
        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            sample_schema)

        infos = OrderedDict([
            ('AF', Info('AF', field_counts['A'], 'Float', 'desc', None, None)),
            ('AA', Info('AA', 1, 'String', 'desc', None, None)),
            ('IFR', Info('IFR', field_counts['.'], 'Float', 'desc', None,
                         None)),
            ('IS', Info('IS', 1, 'String', 'desc', None, None))
        ])
        formats = OrderedDict([('FB', parser._Format('FB', 0, 'Flag', 'desc')),
                               ('GQ', parser._Format('GQ', 1, 'Integer',
                                                     'desc'))])
        expected_header = vcf_header_io.VcfHeader(infos=infos, formats=formats)
        self.assertEqual(header, expected_header)
예제 #8
0
    def test_generate_header_fields_from_schema_invalid_description(self):
        schema = bigquery.TableSchema()
        schema.fields.append(
            bigquery.TableFieldSchema(
                name='invalid_description',
                type=bigquery_util.TableFieldConstants.TYPE_STRING,
                mode=bigquery_util.TableFieldConstants.MODE_NULLABLE,
                description='Desc\nThis is added intentionally.'))
        header = bigquery_vcf_schema_converter.generate_header_fields_from_schema(
            schema)

        infos = OrderedDict([('invalid_description',
                              Info('invalid_description', 1, 'String',
                                   'Desc This is added intentionally.', None,
                                   None))])
        expected_header = vcf_header_io.VcfHeader(infos=infos,
                                                  formats=OrderedDict())
        self.assertEqual(header, expected_header)