예제 #1
0
    def _parse_hit(cls, hit):
        # Sometimes (e.g. rs203319) a SNP will have two alleles, one with
        # a SnpEff annotation and other without one, so this key will be
        # missing:
        if 'snpeff' not in hit:
            return None

        annotations = hit['snpeff']['ann']

        # Make sure the annotations are always a *list* of annotations
        # Right now, myvariant client sometimes returns a list of annotaions
        # dictionaries and sometimes a single annotation dictionary.
        annotations = listify(annotations)

        for annotation in annotations:
            annotation['genomic_allele'] = hit['allele']
            annotation['coding_allele'] = \
                infer_annotated_allele(annotation.get('hgvs_c'))
            annotation['effects'] = annotation.get('effect', '').split('&')

            if 'effect' in annotation:
                del (annotation['effect'])

            if 'hgvs_p' in annotation:
                annotation['hgvs_p'] = parse_prot_change(annotation['hgvs_p'])

        return annotations
예제 #2
0
    def _parse_hit(cls, hit):
        annotation = {'myvariant_hgvs_g': hit['_id']}
        annotation['genomic_allele'] = infer_annotated_allele(hit['_id'])

        annotation.update(cls._parse_hgvs_from_clinvar(hit))
        annotation.update(cls._parse_hgvs_from_snpeff(hit))
        annotation.update(cls._parse_hgvs_from_evs(hit))
        annotation.update(cls._parse_hgvs_from_emv(hit))

        return annotation
예제 #3
0
    def _parse_hit(cls, hit):
        if 'clinvar' not in hit:
            return []

        rcv_annotations = listify(hit['clinvar']['rcv'])

        for annotation in rcv_annotations:
            if 'clinical_significance' in annotation:
                clinsig_string = annotation['clinical_significance']
                annotation['clinical_significances'] = \
                    cls._split_significance_string(clinsig_string)
                del (annotation['clinical_significance'])
            else:
                annotation['clinical_significances'] = []

            conditions = listify(annotation['conditions'])
            annotation['conditions'] = conditions
            annotation['condition_names'] = cls._extract_condition_names(
                conditions)

            for condition in annotation['conditions']:
                condition.update(cls._parse_condition_identifiers(condition))
                if 'identifiers' in condition:
                    del (condition['identifiers'])

            annotation['url'] = cls._url(annotation['accession'])
            annotation['variant_url'] = cls._variant_url(
                hit['clinvar']['variant_id'])

            name_info = cls._parse_preferred_name(annotation['preferred_name'])
            annotation.update(name_info)

            if 'prot_change' in annotation:
                annotation['prot_change'] = \
                    parse_prot_change(annotation['prot_change'])

            annotation['genomic_allele'] = hit['allele']
            annotation['coding_allele'] = \
                infer_annotated_allele(annotation.get('cds_change', ''))

            # Copy the variant info to each particular RCV entry:
            for key, value in hit['clinvar'].items():
                if key == 'rcv':
                    continue

                # Flatten the dicts one level
                if isinstance(value, dict):
                    for key2, val2 in value.items():
                        compound_key = '{}_{}'.format(key, key2)
                        annotation[compound_key] = val2
                    continue

                annotation[key] = value

        return rcv_annotations
예제 #4
0
    def _parse_annotation(cls, hits):
        # Gather the hits (i.e. different alleles) in the same order always:
        hits = sorted(hits, key=itemgetter('_id'))

        for hit in hits:
            hit['allele'] = infer_annotated_allele(hit['_id'])

        annotations = [cls._parse_hit(hit) for hit in hits]

        # Parsing hits might result in empty values, since some hits do not
        # contain any relevant annotation. We need to remove those explicitely:
        annotations = [ann for ann in annotations if ann]

        # If the annotations are already lists, merge them into a flat list:
        if all(isinstance(ann, list) for ann in annotations):
            annotations = list(chain.from_iterable(annotations))

        # Let subclasses do some optional extra parsing here
        annotations = cls._parse_annotations_hook(annotations)

        if annotations:
            return annotations
예제 #5
0
def test_infer_annotated_allele(cds_change, expected_allele):
    assert infer_annotated_allele(cds_change) == expected_allele