def _add_info_fields(field, infos, allow_incompatible_schema=False):
    # type: (bigquery.TableFieldSchema, Dict[str, _Info], bool) -> None
    if field.name == bigquery_util.ColumnKeyConstants.ALTERNATE_BASES:
        _add_info_fields_from_alternate_bases(field, infos,
                                              allow_incompatible_schema)
    elif (field.name in list(vcf_reserved_fields.INFO_FIELDS.keys())
          and not allow_incompatible_schema):
        reserved_definition = vcf_reserved_fields.INFO_FIELDS.get(field.name)
        _validate_reserved_field(field, reserved_definition)
        infos.update({
            field.name:
            vcf_header_io.CreateInfoField(
                field.name, reserved_definition.num, reserved_definition.type,
                _remove_special_characters(field.description
                                           or reserved_definition.desc))
        })
    else:
        infos.update({
            field.name:
            vcf_header_io.CreateInfoField(
                field.name,
                bigquery_util.get_vcf_num_from_bigquery_schema(
                    field.mode, field.type),
                bigquery_util.get_vcf_type_from_bigquery_type(field.type),
                _remove_special_characters(field.description))
        })
def _add_format_fields(schema, formats, allow_incompatible_schema=False):
    # type: (bigquery.TableFieldSchema, Dict[str, _Format], bool) -> None
    for field in schema.fields:
        if field.name in _CONSTANT_CALL_FIELDS:
            continue
        if (field.name in list(vcf_reserved_fields.FORMAT_FIELDS.keys())
                and not allow_incompatible_schema):
            reserved_definition = vcf_reserved_fields.FORMAT_FIELDS.get(
                field.name)
            _validate_reserved_field(field, reserved_definition)
            formats.update({
                field.name:
                vcf_header_io.CreateFormatField(
                    field.name, reserved_definition.num,
                    reserved_definition.type,
                    _remove_special_characters(field.description
                                               or reserved_definition.desc))
            })
        else:
            formats.update({
                field.name:
                vcf_header_io.CreateFormatField(
                    field.name,
                    bigquery_util.get_vcf_num_from_bigquery_schema(
                        field.mode, field.type),
                    bigquery_util.get_vcf_type_from_bigquery_type(field.type),
                    _remove_special_characters(field.description))
            })
Beispiel #3
0
 def test_get_vcf_type_from_bigquery_type(self):
   self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.INTEGER,
                    bigquery_util.get_vcf_type_from_bigquery_type(
                        bigquery_util.TableFieldConstants.TYPE_INTEGER))
   self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.FLOAT,
                    bigquery_util.get_vcf_type_from_bigquery_type(
                        bigquery_util.TableFieldConstants.TYPE_FLOAT))
   self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.FLAG,
                    bigquery_util.get_vcf_type_from_bigquery_type(
                        bigquery_util.TableFieldConstants.TYPE_BOOLEAN))
   self.assertEqual(vcf_header_io.VcfHeaderFieldTypeConstants.STRING,
                    bigquery_util.get_vcf_type_from_bigquery_type(
                        bigquery_util.TableFieldConstants.TYPE_STRING))
   self.assertRaises(
       ValueError,
       bigquery_util.get_vcf_type_from_bigquery_type, 'DUMMY')
def _validate_reserved_field_type(field_schema, reserved_definition):
  schema_type = bigquery_util.get_vcf_type_from_bigquery_type(field_schema.type)
  reserved_type = reserved_definition.type
  if schema_type != reserved_type:
    raise ValueError(
        'The type of field {} is different from the VCF spec: {} vs {}.'
        .format(field_schema.name, schema_type, reserved_type))
def _add_info_fields_from_alternate_bases(schema,
                                          infos,
                                          allow_incompatible_schema=False):
    # type: (bigquery.TableFieldSchema, Dict[str, _Info], bool) -> None
    """Adds schema nested fields in alternate bases to `infos`.

  Notice that the validation of field mode is skipped for reserved fields since
  the mode (NULLABLE) of field in alternate bases is expected to be different
  from the mode (REPEATED) in reserved field definition.

  Any `Record` field within alternate bases is considered as an annotation
  field.
  """
    for field in schema.fields:
        if field.name in _CONSTANT_ALTERNATE_BASES_FIELDS:
            continue
        elif field.type == bigquery_util.TableFieldConstants.TYPE_RECORD:
            infos.update({
                field.name:
                _Info(id=field.name,
                      num=parser.field_counts[vcfio.MISSING_FIELD_VALUE],
                      type=bigquery_util._VcfHeaderTypeConstants.STRING,
                      desc=_remove_special_characters(
                          _get_annotation_description(field)),
                      source=None,
                      version=None)
            })
        elif (field.name in vcf_reserved_fields.INFO_FIELDS.keys()
              and not allow_incompatible_schema):
            reserved_definition = vcf_reserved_fields.INFO_FIELDS.get(
                field.name)
            _validate_reserved_field_type(field, reserved_definition)
            infos.update({
                field.name:
                _Info(id=field.name,
                      num=reserved_definition.num,
                      type=reserved_definition.type,
                      desc=_remove_special_characters(
                          field.description or reserved_definition.desc),
                      source=None,
                      version=None)
            })
        else:
            infos.update({
                field.name:
                _Info(id=field.name,
                      num=parser.field_counts[
                          vcfio.FIELD_COUNT_ALTERNATE_ALLELE],
                      type=bigquery_util.get_vcf_type_from_bigquery_type(
                          field.type),
                      desc=_remove_special_characters(field.description),
                      source=None,
                      version=None)
            })