def _parse_hit(cls, hit): # Sometimes (e.g. rs203319) a SNP will have two alleles, one with # a SnpEff annotation and other without one, so this key will be # missing: if 'snpeff' not in hit: return None annotations = hit['snpeff']['ann'] # Make sure the annotations are always a *list* of annotations # Right now, myvariant client sometimes returns a list of annotaions # dictionaries and sometimes a single annotation dictionary. annotations = listify(annotations) for annotation in annotations: annotation['genomic_allele'] = hit['allele'] annotation['coding_allele'] = \ infer_annotated_allele(annotation.get('hgvs_c')) annotation['effects'] = annotation.get('effect', '').split('&') if 'effect' in annotation: del (annotation['effect']) if 'hgvs_p' in annotation: annotation['hgvs_p'] = parse_prot_change(annotation['hgvs_p']) return annotations
def _parse_hit(cls, hit): annotation = {'myvariant_hgvs_g': hit['_id']} annotation['genomic_allele'] = infer_annotated_allele(hit['_id']) annotation.update(cls._parse_hgvs_from_clinvar(hit)) annotation.update(cls._parse_hgvs_from_snpeff(hit)) annotation.update(cls._parse_hgvs_from_evs(hit)) annotation.update(cls._parse_hgvs_from_emv(hit)) return annotation
def _parse_hit(cls, hit): if 'clinvar' not in hit: return [] rcv_annotations = listify(hit['clinvar']['rcv']) for annotation in rcv_annotations: if 'clinical_significance' in annotation: clinsig_string = annotation['clinical_significance'] annotation['clinical_significances'] = \ cls._split_significance_string(clinsig_string) del (annotation['clinical_significance']) else: annotation['clinical_significances'] = [] conditions = listify(annotation['conditions']) annotation['conditions'] = conditions annotation['condition_names'] = cls._extract_condition_names( conditions) for condition in annotation['conditions']: condition.update(cls._parse_condition_identifiers(condition)) if 'identifiers' in condition: del (condition['identifiers']) annotation['url'] = cls._url(annotation['accession']) annotation['variant_url'] = cls._variant_url( hit['clinvar']['variant_id']) name_info = cls._parse_preferred_name(annotation['preferred_name']) annotation.update(name_info) if 'prot_change' in annotation: annotation['prot_change'] = \ parse_prot_change(annotation['prot_change']) annotation['genomic_allele'] = hit['allele'] annotation['coding_allele'] = \ infer_annotated_allele(annotation.get('cds_change', '')) # Copy the variant info to each particular RCV entry: for key, value in hit['clinvar'].items(): if key == 'rcv': continue # Flatten the dicts one level if isinstance(value, dict): for key2, val2 in value.items(): compound_key = '{}_{}'.format(key, key2) annotation[compound_key] = val2 continue annotation[key] = value return rcv_annotations
def _parse_annotation(cls, hits): # Gather the hits (i.e. different alleles) in the same order always: hits = sorted(hits, key=itemgetter('_id')) for hit in hits: hit['allele'] = infer_annotated_allele(hit['_id']) annotations = [cls._parse_hit(hit) for hit in hits] # Parsing hits might result in empty values, since some hits do not # contain any relevant annotation. We need to remove those explicitely: annotations = [ann for ann in annotations if ann] # If the annotations are already lists, merge them into a flat list: if all(isinstance(ann, list) for ann in annotations): annotations = list(chain.from_iterable(annotations)) # Let subclasses do some optional extra parsing here annotations = cls._parse_annotations_hook(annotations) if annotations: return annotations
def test_infer_annotated_allele(cds_change, expected_allele): assert infer_annotated_allele(cds_change) == expected_allele