def _make_vmc_allele(a): """given dict (from CAR json) for single genomicAllele or transcriptAllele, create a (Location, Allele) pair, add to the bundle, and return the allele. """ car_rsid = a["referenceSequence"].split("/")[-1] ir = self._refseqmapper[car_rsid] sequence_id = get_vmc_sequence_identifier(ir) # N.B. Double check CA coordinate semantics # If HGVS like re: insertions, then end -= 1 below if len(a["coordinates"]) > 1: _logger.warn(f"More than one coordinate set for resp[@id]; using only first") coords = a["coordinates"][0] interval = models.Interval(start=coords["start"] - 1, end=coords["end"]) location = models.Location(sequence_id=sequence_id, interval=interval) location.id = computed_id(location) allele = models.Allele(location_id=location.id, state=coords["allele"]) allele.id = computed_id(allele) return (ir, sequence_id, location, allele)
def from_hgvs(hgvs_string): hp = _get_hgvs_parser() sv = hp.parse_hgvs_variant(hgvs_string) ir = models.Identifier(namespace="NCBI", accession=sv.ac) sequence_id = "VMC:GS_Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO" #get_vmc_sequence_id(ir) if isinstance(sv.posedit.pos, hgvs.location.BaseOffsetInterval): if sv.posedit.pos.start.is_intronic or sv.posedit.pos.end.is_intronic: raise ValueError("Intronic HGVS variants are not supported".format( sv.posedit.edit.type)) if sv.posedit.edit.type == 'ins': interval = models.Interval(start=sv.posedit.pos.start.base, end=sv.posedit.pos.start.base) elif sv.posedit.edit.type in ('sub', 'del', 'delins'): interval = models.Interval(start=sv.posedit.pos.start.base - 1, end=sv.posedit.pos.end.base) else: raise ValueError("HGVS variant type {} is unsupported".format( sv.posedit.edit.type)) location = models.Location(sequence_id=sequence_id, interval=interval) location.id = computed_id(location) state = sv.posedit.edit.alt or '' allele = models.Allele(location_id=location.id, state=state) allele.id = computed_id(allele) bundle = models.Vmcbundle( alleles={allele.id: allele.as_dict()}, genotypes={}, haplotypes={}, identifiers={sequence_id: [ir.as_dict()]}, locations={location.id: location.as_dict()}, meta={"version": "0.1"}, ) return ppj(bundle)
import datetime import json from vmc import models, computed_id, serialize # Interval i = models.Interval(start=42, end=42) assert "<Interval|42|42>" == serialize(i) assert {"end": 42, "start": 42} == i.as_dict() # Location l = models.Location(sequence_id="VMC:GS_01234", interval=i) assert "<Location|VMC:GS_01234|<Interval|42|42>>" == serialize(l) l.id = computed_id(l) assert "VMC:GL_OUqODzxryILUEDmv7uF8R8NwREJAx7gN" == l.id assert { "id": "VMC:GL_OUqODzxryILUEDmv7uF8R8NwREJAx7gN", "interval": { "end": 42, "start": 42 }, "sequence_id": "VMC:GS_01234" } == l.as_dict() locations = {l.id: l.as_dict()} # Allele a = models.Allele(location_id=l.id, state="A") assert "<Allele|VMC:GL_OUqODzxryILUEDmv7uF8R8NwREJAx7gN|A>" == serialize(a) a.id = computed_id(a) assert "VMC:GA_xTR0mmMviMLoAI9SwmDMFYr_AZczkjyU" == a.id
def test_Genotype(test): o = vmc.models.Genotype(**test["in"]) assert test["out"]["serialize"] == vmc.serialize(o) assert test["out"]["computed_id"] == vmc.computed_id(o)
def test_Interval(test): o = vmc.models.Location(**test["in"]) assert test["out"]["serialize"] == vmc.serialize(o) assert test["out"]["computed_id"] == vmc.computed_id(o)
def build_loc(seq_id, start): interval = models.Interval(start=start, end=start + 1) location = models.Location(sequence_id=seq_id, interval=interval) location.id = computed_id(location) return location.id
def build_allele(loc_id, state): allele = models.Allele(location_id=loc_id, state=state) allele.id = computed_id(allele) return allele.id