Example #1
0
 def _gen_annotation_name_key_pairs(self, annot_field):
     #  type: (str) -> (str, str)
     annotation_names = annotation_parser.extract_annotation_names(
         self._header_fields.infos[annot_field][_HeaderKeyConstants.DESC])
     for name in annotation_names:
         type_key = infer_headers_util.get_inferred_annotation_type_header_key(
             annot_field, name)
         yield name, type_key
Example #2
0
    def add_annotation_data(self, proc_var, annotation_field_name, data):
        # type: (ProcessedVariant, str, List[str]) -> None
        """The main function for adding annotation data to `proc_var`.

    This adds the data for annotation INFO field `annotation_field_name` based
    on the format specified for it in the header. `data` items are split
    among `proc_var._alternate_datas` based on the ALT matching logic.

    The only assumption about `proc_var` is that its `_alternate_datas`
    has been initialized with valid `AlternateBaseData` objects.

    Args:
      proc_var: The object to which the annotations are being added.
      annotation_field_name: The name of the annotation field, e.g., ANN or CSQ.
      data: The data part of the field separated on comma. A single element
        of this list looks something like (taken from an Ensembl VEP run):

        G|upstream_gene_variant|MODIFIER|PSMF1|ENSG00000125818|...

        where the '|' character is the separator. The first element is a way
        to identify the allele (one of the ALTs) that this annotation data
        refers to. The rest of the elements are annotations corresponding to the
        `annotation_field_name` format description in the header, e.g.,

        Allele|Consequence|IMPACT|SYMBOL|Gene|...
    """
        alt_list = [a.alternate_bases for a in proc_var._alternate_datas]
        parser = annotation_parser.Parser(
            proc_var.reference_bases, alt_list,
            self._annotation_names_map[annotation_field_name],
            self._use_allele_num, self._minimal_match)
        for annotation_str in data:
            try:
                ind, annotation_map = parser.parse_and_match_alt(
                    annotation_str)
                for name, value in annotation_map.items():
                    if name == annotation_parser.ANNOTATION_ALT:
                        continue
                    type_key = infer_headers_util.get_inferred_annotation_type_header_key(
                        annotation_field_name, name)
                    vcf_type = self._vcf_type_from_annotation_header(
                        annotation_field_name, type_key)
                    typed_value = VCF_TYPE_TO_PY[vcf_type](
                        value) if value else None
                    annotation_map[name] = typed_value
                self._alt_match_counter.inc()
                alt_datas = proc_var._alternate_datas[ind]
                if annotation_field_name not in alt_datas._info:
                    alt_datas._info[annotation_field_name] = [annotation_map]
                else:
                    alt_datas._info[annotation_field_name].append(
                        annotation_map)
                alt_datas.annotation_field_names.add(annotation_field_name)
            except annotation_parser.AnnotationParserException as e:
                logging.warning(
                    'Parsing of annotation field %s failed at reference %s start %d: '
                    '%s', annotation_field_name, proc_var.reference_name,
                    proc_var.start, str(e))
                if isinstance(e, annotation_parser.AnnotationAltNotFound):
                    self._alt_mismatch_counter.inc()
                elif isinstance(e, annotation_parser.AlleleNumMissing):
                    self._allele_num_missing_counter.inc()
                elif isinstance(e, annotation_parser.InvalidAlleleNumValue):
                    self._allele_num_incorrect_counter.inc()
                elif isinstance(e,
                                annotation_parser.AmbiguousAnnotationAllele):
                    self._alt_minimal_ambiguous_counter.inc()