Beispiel #1
0
    def validate(self, var, strict=None):
        assert isinstance(var, hgvs.sequencevariant.SequenceVariant
                          ), "variant must be a parsed HGVS sequence variant object"
        if strict is None: strict = self.strict
        fail_level = ValidationLevel.WARNING if strict else ValidationLevel.ERROR

        var_n = None
        if var.type == "n":
            var_n = var
        elif var.type == "c":
            var_n = self.vm.c_to_n(var)

        if var_n is not None:
            res, msg = self._n_within_transcript_bounds(var_n)
            if res != ValidationLevel.VALID:
                if hgvs.global_config.mapping.strict_bounds:
                    raise HGVSInvalidVariantError(msg)
                _logger.warning("{}: Variant outside transcript bounds;"
                                " no validation provided".format(var))
                return True         # no other checking performed

        res, msg = self._c_within_cds_bound(var)
        if res >= fail_level:
            raise HGVSInvalidVariantError(msg)

        res, msg = self._ref_is_valid(var)
        if res >= fail_level:
            raise HGVSInvalidVariantError(msg)

        return True
Beispiel #2
0
 def validate(self, var, strict=None):
     assert isinstance(var,
                       hgvs.sequencevariant.SequenceVariant), "variant must be a parsed HGVS sequence variant object"
     if strict is None: strict = self.strict
     fail_level = ValidationLevel.WARNING if strict else ValidationLevel.ERROR
     (res, msg) = self._ref_is_valid(var)
     if res >= fail_level:
         raise HGVSInvalidVariantError(msg)
     else:
         (res, msg) = self._c_within_cds_bound(var)
         if res >= fail_level:
             raise HGVSInvalidVariantError(msg)
     return True
Beispiel #3
0
    def n_to_c(self, var_n):
        """Given a parsed n. variant, return a c. variant on the specified
        transcript using the specified alignment method (default is
        "transcript" indicating a self alignment).

        :param hgvs.sequencevariant.SequenceVariant var_n: a variant object
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`)
        :raises HGVSInvalidVariantError: if var_n is not of type "n"

        """

        if not (var_n.type == "n"):
            raise HGVSInvalidVariantError("Expected n. variant; got " + str(var_n))
        if self._validator:
            self._validator.validate(var_n)
        var_n.fill_ref(self.hdp)
        tm = self._fetch_TranscriptMapper(tx_ac=var_n.ac, alt_ac=var_n.ac, alt_aln_method="transcript")
        pos_c = tm.n_to_c(var_n.posedit.pos)
        if (isinstance(var_n.posedit.edit, hgvs.edit.NARefAlt) or isinstance(var_n.posedit.edit, hgvs.edit.Dup)
                or isinstance(var_n.posedit.edit, hgvs.edit.Inv)):
            edit_c = copy.deepcopy(var_n.posedit.edit)
        else:
            raise HGVSUnsupportedOperationError("Only NARefAlt/Dup/Inv types are currently implemented")
        var_c = hgvs.sequencevariant.SequenceVariant(ac=var_n.ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
        if self.replace_reference:
            self._replace_reference(var_c)
        return var_c
Beispiel #4
0
    def c_to_g(self, var_c, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
        """Given a parsed c. variant, return a g. variant on the specified
        transcript using the specified alignment method (default is
        "splign" from NCBI).

        :param hgvs.sequencevariant.SequenceVariant var_c: a variant object
        :param str alt_ac: a reference sequence accession (e.g., NC_000001.11)
        :param str alt_aln_method: the alignment method; valid values depend on data source
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`)
        :raises HGVSInvalidVariantError: if var_c is not of type "c"

        """

        if not (var_c.type == "c"):
            raise HGVSInvalidVariantError("Expected a cDNA (c.); got " + str(var_c))
        if self._validator:
            self._validator.validate(var_c)
        var_c.fill_ref(self.hdp)
        tm = self._fetch_TranscriptMapper(tx_ac=var_c.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)

        pos_g = tm.c_to_g(var_c.posedit.pos)
        edit_g = self._convert_edit_check_strand(tm.strand, var_c.posedit.edit)

        var_g = hgvs.sequencevariant.SequenceVariant(ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
        if self.replace_reference:
            self._replace_reference(var_g)
        return var_g
Beispiel #5
0
    def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
        """Given a parsed g. variant, return a c. variant on the specified
        transcript using the specified alignment method (default is
        "splign" from NCBI).

        :param hgvs.sequencevariant.SequenceVariant var_g: a variant object
        :param str tx_ac: a transcript accession (e.g., NM_012345.6 or ENST012345678)
        :param str alt_aln_method: the alignment method; valid values depend on data source
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`) using CDS coordinates
        :raises HGVSInvalidVariantError: if var_g is not of type "g"

        """

        if not (var_g.type == "g"):
            raise HGVSInvalidVariantError("Expected a g. variant; got " + str(var_g))
        if self._validator:
            self._validator.validate(var_g)
        var_g.fill_ref(self.hdp)
        tm = self._fetch_TranscriptMapper(tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)
        pos_c = tm.g_to_c(var_g.posedit.pos)
        edit_c = self._convert_edit_check_strand(tm.strand, var_g.posedit.edit)
        var_c = hgvs.sequencevariant.SequenceVariant(ac=tx_ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
        if self.replace_reference:
            self._replace_reference(var_c)
        return var_c
Beispiel #6
0
    def c_to_p(self, var_c, pro_ac=None):
        """
        Converts a c. SequenceVariant to a p. SequenceVariant on the specified protein accession
        Author: Rudy Rico

        :param SequenceVariant var_c: hgvsc tag
        :param str pro_ac: protein accession
        :rtype: hgvs.sequencevariant.SequenceVariant

        """

        if not (var_c.type == "c"):
            raise HGVSInvalidVariantError("Expected a cDNA (c.) variant; got " + str(var_c))
        if self._validator:
            self._validator.validate(var_c)
        reference_data = RefTranscriptData(self.hdp, var_c.ac, pro_ac)
        builder = altseqbuilder.AltSeqBuilder(var_c, reference_data)

        # TODO: handle case where you get 2+ alt sequences back;
        # currently get list of 1 element loop structure implemented
        # to handle this, but doesn't really do anything currently.
        all_alt_data = builder.build_altseq()

        var_ps = []
        for alt_data in all_alt_data:
            builder = altseq_to_hgvsp.AltSeqToHgvsp(reference_data, alt_data)
            var_p = builder.build_hgvsp()
            var_ps.append(var_p)

        var_p = var_ps[0]

        if self.add_gene_symbol:
            self._update_gene_symbol(var_p, var_c.gene)

        return var_p
Beispiel #7
0
    def c_to_n(self, var_c):
        """Given a parsed c. variant, return a n. variant on the specified
        transcript using the specified alignment method (default is
        "transcript" indicating a self alignment).

        :param hgvs.sequencevariant.SequenceVariant var_c: a variant object
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`)
        :raises HGVSInvalidVariantError: if var_c is not of type "c"

        """

        if not (var_c.type == "c"):
            raise HGVSInvalidVariantError("Expected a cDNA (c.); got " + str(var_c))
        if self._validator:
            self._validator.validate(var_c)
        var_c.fill_ref(self.hdp)
        mapper = self._fetch_AlignmentMapper(
            tx_ac=var_c.ac, alt_ac=var_c.ac, alt_aln_method="transcript")
        pos_n = mapper.c_to_n(var_c.posedit.pos)
        if (isinstance(var_c.posedit.edit, hgvs.edit.NARefAlt)
                or isinstance(var_c.posedit.edit, hgvs.edit.Dup)
                or isinstance(var_c.posedit.edit, hgvs.edit.Inv)):
            edit_n = copy.deepcopy(var_c.posedit.edit)
        else:
            raise HGVSUnsupportedOperationError(
                "Only NARefAlt/Dup/Inv types are currently implemented")
        var_n = hgvs.sequencevariant.SequenceVariant(
            ac=var_c.ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
        if self.replace_reference:
            self._replace_reference(var_n)
        if self.add_gene_symbol:
            self._update_gene_symbol(var_n, var_c.gene)
        return var_n
Beispiel #8
0
    def g_to_c(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
        """Given a parsed g. variant, return a c. variant on the specified
        transcript using the specified alignment method (default is
        "splign" from NCBI).

        :param hgvs.sequencevariant.SequenceVariant var_g: a variant object
        :param str tx_ac: a transcript accession (e.g., NM_012345.6 or ENST012345678)
        :param str alt_aln_method: the alignment method; valid values depend on data source
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`) using CDS coordinates
        :raises HGVSInvalidVariantError: if var_g is not of type "g"

        """

        if not (var_g.type == "g"):
            raise HGVSInvalidVariantError("Expected a g. variant; got " + str(var_g))
        if self._validator:
            self._validator.validate(var_g)
        var_g.fill_ref(self.hdp)
        tm = self._fetch_AlignmentMapper(tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)
        pos_c = tm.g_to_c(var_g.posedit.pos)
        if not pos_c.uncertain:
            edit_c = self._convert_edit_check_strand(tm.strand, var_g.posedit.edit)
            if edit_c.type == 'ins' and pos_c.start.offset == 0 and pos_c.end.offset == 0 and pos_c.end - pos_c.start > 1:
                pos_c.start.base += 1
                pos_c.end.base -= 1
                edit_c.ref = ''
        else:
            # variant at alignment gap
            pos_g = tm.c_to_g(pos_c)
            edit_c = hgvs.edit.NARefAlt(ref='', alt=self._get_altered_sequence(tm.strand, pos_g, var_g))
        pos_c.uncertain = var_g.posedit.pos.uncertain
        var_c = hgvs.sequencevariant.SequenceVariant(ac=tx_ac, type="c", posedit=hgvs.posedit.PosEdit(pos_c, edit_c))
        if self.replace_reference:
            self._replace_reference(var_c)
        return var_c
Beispiel #9
0
    def n_to_g(self, var_n, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
        """Given a parsed n. variant, return a g. variant on the specified
        transcript using the specified alignment method (default is
        "splign" from NCBI).

        :param hgvs.sequencevariant.SequenceVariant var_n: a variant object
        :param str alt_ac: a reference sequence accession (e.g., NC_000001.11)
        :param str alt_aln_method: the alignment method; valid values depend on data source
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`)
        :raises HGVSInvalidVariantError: if var_n is not of type "n"

        """

        if not (var_n.type == "n"):
            raise HGVSInvalidVariantError("Expected a n. variant; got " + str(var_n))
        if self._validator:
            self._validator.validate(var_n)
        var_n.fill_ref(self.hdp)
        tm = self._fetch_AlignmentMapper(tx_ac=var_n.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
        pos_g = tm.n_to_g(var_n.posedit.pos)
        if not pos_g.uncertain:
            edit_g = self._convert_edit_check_strand(tm.strand, var_n.posedit.edit)
            if edit_g.type == 'ins' and pos_g.end - pos_g.start > 1:
                pos_g.start.base += 1
                pos_g.end.base -= 1
                edit_g.ref = ''
        else:
            # variant at alignment gap
            pos_n = tm.g_to_n(pos_g)
            edit_g = hgvs.edit.NARefAlt(ref='', alt=self._get_altered_sequence(tm.strand, pos_n, var_n))
        pos_g.uncertain = var_n.posedit.pos.uncertain
        var_g = hgvs.sequencevariant.SequenceVariant(ac=alt_ac, type="g", posedit=hgvs.posedit.PosEdit(pos_g, edit_g))
        if self.replace_reference:
            self._replace_reference(var_g)
        return var_g
Beispiel #10
0
 def g_to_t(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
     if not (var_g.type == "g"):
         raise HGVSInvalidVariantError("Expected a g. variant; got " + str(var_g))
     if self._validator:
         self._validator.validate(var_g)
     var_g.fill_ref(self.hdp)
     tm = self._fetch_TranscriptMapper(tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)
     if tm.is_coding_transcript:
         var_out = VariantMapper.g_to_c(self, var_g=var_g, tx_ac=tx_ac, alt_aln_method=alt_aln_method)
     else:
         var_out = VariantMapper.g_to_n(self, var_g=var_g, tx_ac=tx_ac, alt_aln_method=alt_aln_method)
     return var_out
Beispiel #11
0
 def t_to_g(self, var_t, alt_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
     if var_t.type not in "cn":
         raise HGVSInvalidVariantError("Expected a c. or n. variant; got " + str(var_t))
     if self._validator:
         self._validator.validate(var_t)
     var_t.fill_ref(self.hdp)
     tm = self._fetch_TranscriptMapper(tx_ac=var_t.ac, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
     if tm.is_coding_transcript:
         var_out = VariantMapper.c_to_g(self, var_c=var_t, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
     else:
         var_out = VariantMapper.n_to_g(self, var_n=var_t, alt_ac=alt_ac, alt_aln_method=alt_aln_method)
     return var_out
Beispiel #12
0
    def g_to_n(self, var_g, tx_ac, alt_aln_method=hgvs.global_config.mapping.alt_aln_method):
        """Given a parsed g. variant, return a n. variant on the specified
        transcript using the specified alignment method (default is
        "splign" from NCBI).

        :param hgvs.sequencevariant.SequenceVariant var_g: a variant object
        :param str tx_ac: a transcript accession (e.g., NM_012345.6 or ENST012345678)
        :param str alt_aln_method: the alignment method; valid values depend on data source
        :returns: variant object (:class:`hgvs.sequencevariant.SequenceVariant`) using transcript (n.) coordinates
        :raises HGVSInvalidVariantError: if var_g is not of type "g"

        """

        if not (var_g.type == "g"):
            raise HGVSInvalidVariantError("Expected a g. variant; got " + str(var_g))
        if self._validator:
            self._validator.validate(var_g)
        mapper = self._fetch_AlignmentMapper(
            tx_ac=tx_ac, alt_ac=var_g.ac, alt_aln_method=alt_aln_method)

        if (mapper.strand == -1
            and not hgvs.global_config.mapping.strict_bounds
            and not mapper.g_interval_is_inbounds(var_g.posedit.pos)):
            _logger.info("Renormalizing out-of-bounds minus strand variant on genomic sequence")
            var_g = self.left_normalizer.normalize(var_g)

        var_g.fill_ref(self.hdp)
        pos_n = mapper.g_to_n(var_g.posedit.pos)
        if not pos_n.uncertain:
            edit_n = self._convert_edit_check_strand(mapper.strand, var_g.posedit.edit)
            if edit_n.type == 'ins' and pos_n.start.offset == 0 and pos_n.end.offset == 0 and pos_n.end - pos_n.start > 1:
                pos_n.start.base += 1
                pos_n.end.base -= 1
                edit_n.ref = ''
        else:
            # variant at alignment gap
            pos_g = mapper.n_to_g(pos_n)
            edit_n = hgvs.edit.NARefAlt(
                ref='', alt=self._get_altered_sequence(mapper.strand, pos_g, var_g))
        pos_n.uncertain = var_g.posedit.pos.uncertain
        var_n = hgvs.sequencevariant.SequenceVariant(
            ac=tx_ac, type="n", posedit=hgvs.posedit.PosEdit(pos_n, edit_n))
        if (self.replace_reference
            and var_n.posedit.pos.start.base >= 0
            and var_n.posedit.pos.end.base < mapper.tgt_len):
            self._replace_reference(var_n)
        if self.add_gene_symbol:
            self._update_gene_symbol(var_n, var_g.gene)
        return var_n
Beispiel #13
0
    def _fetch_bounded_seq(self, var, start, end, boundary):
        """Fetch reference sequence from hgvs data provider.

        The start position is 0 and the interval is half open
        """

        start = start if start >= boundary[0] else boundary[0]
        end = end if end <= boundary[1] else boundary[1]
        if start >= end:
            return ""

        seq = self.hdp.get_seq(var.ac, start, end)

        if len(seq) < end - start:
            raise HGVSInvalidVariantError("Variant span is outside sequence bounds ({var})".format(var=var))

        return seq
Beispiel #14
0
    def t_to_p(self, var_t):
        """Return a protein variant, or "non-coding" for non-coding variant types

        CAUTION: Unlike other x_to_y methods that always return
        SequenceVariant instances, this method returns a string when
        the variant type is ``n``.  This is intended as a convenience,
        particularly when looping over ``relevant_transcripts``,
        projecting with ``g_to_t``, then desiring a protein
        representation for coding transcripts.

        """
        if var_t.type == "n":
            return "non-coding"
        if var_t.type == "c":
            return self.c_to_p(var_t)
        raise HGVSInvalidVariantError("Expected a coding (c.) or non-coding (n.) variant; got " +
                                      str(var_t))
Beispiel #15
0
    def c_to_p(self, var_c, pro_ac=None):
        """
        Converts a c. SequenceVariant to a p. SequenceVariant on the specified protein accession
        Author: Rudy Rico

        :param SequenceVariant var_c: hgvsc tag
        :param str pro_ac: protein accession
        :rtype: hgvs.sequencevariant.SequenceVariant

        """

        @attr.s(slots=True)
        class RefTranscriptData(object):
            transcript_sequence = attr.ib()
            aa_sequence = attr.ib()
            cds_start = attr.ib()
            cds_stop = attr.ib()
            protein_accession = attr.ib()

            @classmethod
            def setup_transcript_data(cls, hdp, tx_ac, pro_ac):
                """helper for generating RefTranscriptData from for c_to_p"""
                tx_info = hdp.get_tx_identity_info(var_c.ac)
                tx_seq = hdp.get_seq(tx_ac)

                if tx_info is None or tx_seq is None:
                    raise HGVSDataNotAvailableError("Missing transcript data for accession: {}".format(tx_ac))

                # use 1-based hgvs coords
                cds_start = tx_info["cds_start_i"] + 1
                cds_stop = tx_info["cds_end_i"]

                # padding list so biopython won't complain during the conversion
                tx_seq_to_translate = tx_seq[cds_start - 1:cds_stop]
                if len(tx_seq_to_translate) % 3 != 0:
                    "".join(list(tx_seq_to_translate).extend(["N"] * ((3 - len(tx_seq_to_translate) % 3) % 3)))

                tx_seq_cds = Seq(tx_seq_to_translate)
                protein_seq = str(tx_seq_cds.translate())

                if pro_ac is None:
                    # get_acs... will always return at least the MD5_ accession
                    pro_ac = (hdp.get_pro_ac_for_tx_ac(tx_ac) or hdp.get_acs_for_protein_seq(protein_seq)[0])

                transcript_data = RefTranscriptData(tx_seq, protein_seq, cds_start, cds_stop, pro_ac)

                return transcript_data

        if not (var_c.type == "c"):
            raise HGVSInvalidVariantError("Expected a cDNA (c.); got " + str(var_c))
        if self._validator:
            self._validator.validate(var_c)
        reference_data = RefTranscriptData.setup_transcript_data(self.hdp, var_c.ac, pro_ac)
        builder = altseqbuilder.AltSeqBuilder(var_c, reference_data)

        # TODO: handle case where you get 2+ alt sequences back;
        # currently get list of 1 element loop structure implemented
        # to handle this, but doesn't really do anything currently.
        all_alt_data = builder.build_altseq()

        var_ps = []
        for alt_data in all_alt_data:
            builder = altseq_to_hgvsp.AltSeqToHgvsp(reference_data, alt_data)
            var_p = builder.build_hgvsp()
            var_ps.append(var_p)

        var_p = var_ps[0]

        return var_p
Beispiel #16
0
    def normalize(self, var):
        """Perform sequence variants normalization for single variant
        """
        assert isinstance(
            var, hgvs.sequencevariant.SequenceVariant
        ), "variant must be a parsed HGVS sequence variant object"

        # keep a shallow reference to the original variant, to be returned
        # as-is under certain circumstances
        orig_var = var

        if self.validator:
            self.validator.validate(var)

        init_met = False
        if var.posedit is not None and isinstance(var.posedit,
                                                  hgvs.edit.AARefAlt):
            init_met = var.posedit.init_met

        if var.posedit is None or var.posedit.uncertain or init_met or var.posedit.pos is None:
            return var

        type = var.type

        if type == "p":
            raise HGVSUnsupportedOperationError(
                "Unsupported normalization of protein level variants: {0}".
                format(var))
        if var.posedit.edit.type == "con":
            raise HGVSUnsupportedOperationError(
                "Unsupported normalization of conversion variants: {0}",
                format(var))

        var.fill_ref(self.hdp)

        if var.posedit.edit.type == "identity":
            var_norm = copy.deepcopy(var)
            return var_norm

        # For c. variants normalization, first convert to n. variant
        # and perform normalization at the n. level, then convert the
        # normalized n. variant back to c. variant.
        if type == "c":
            var = self.vm.c_to_n(var)

        if var.type in "nr":
            if var.posedit.pos.start.offset != 0 or var.posedit.pos.end.offset != 0:
                raise HGVSUnsupportedOperationError(
                    "Normalization of intronic variants is not supported")

        def is_valid_pos(ac, pos):
            # tests whether the sequence position actually exists
            # This is *way* janky.
            # TODO: push functionality to hdp which can implement differently
            # based on capabilities of sequence backend
            try:
                s = self.hdp.get_seq(ac, pos - 1, pos)  # 0-based!
                return s != ""
            except HGVSDataNotAvailableError as e:
                # Bad Request indicates that we got to NCBI, but the request
                # was invalid.
                return "Bad Request" not in str(e)

        if var.posedit.pos.start.base < 0 or not is_valid_pos(
                var.ac, var.posedit.pos.end.base):
            if hgvs.global_config.mapping.strict_bounds:
                raise HGVSInvalidVariantError(
                    f"{var}: coordinates are out-of-bounds")
            _logger.warning(
                f"{var}: coordinates are out-of-bounds; returning as-is")
            return orig_var

        # restrict var types to those that use sequence start (i.e., not c.)
        assert var.type in "gmnr", "Internal Error: variant must be of type g, m, n, r"

        bound_s, bound_e = self._get_boundary(var)
        boundary = (bound_s, bound_e)
        start, end, (ref, alt) = self._normalize_alleles(var, boundary)

        ref_len = len(ref)
        alt_len = len(alt)

        # Generate normalized variant
        if alt_len == ref_len:
            ref_start = start
            ref_end = end - 1
            # inversion
            if ref_len > 1 and ref == reverse_complement(alt):
                edit = hgvs.edit.Inv(ref=ref)
            # ident
            elif ref_len == 0 and alt_len == 0:
                ref_start = ref_end
                edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)
            # substitution or delins
            else:
                edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)
        if alt_len < ref_len:
            # del or delins
            ref_start = start
            ref_end = end - 1
            edit = hgvs.edit.NARefAlt(ref=ref,
                                      alt=None if alt_len == 0 else alt)
        elif alt_len > ref_len:
            # ins or dup
            if ref_len == 0:
                if self.shuffle_direction == 3:
                    adj_seq = self._fetch_bounded_seq(var, start - alt_len - 1,
                                                      end - 1, 0, boundary)
                else:
                    adj_seq = self._fetch_bounded_seq(var, start - 1,
                                                      start + alt_len - 1, 0,
                                                      boundary)
                # ins
                if alt != adj_seq:
                    ref_start = start - 1
                    ref_end = end
                    edit = hgvs.edit.NARefAlt(ref=None, alt=alt)
                # dup
                else:
                    if self.shuffle_direction == 3:
                        ref_start = start - alt_len
                        ref_end = end - 1
                        edit = hgvs.edit.Dup(ref=alt)
                    else:
                        ref_start = start
                        ref_end = start + alt_len - 1
                        edit = hgvs.edit.Dup(ref=alt)
            # delins
            else:
                ref_start = start
                ref_end = end - 1
                edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)

        # ensure the start is not 0
        if ref_start == 0:
            ref = self._fetch_bounded_seq(var, 0, 1, 0, boundary)
            alt = alt + ref
            edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)
            ref_start = 1
            ref_end = 1

        # ensure the end is not outside of reference sequence
        tgt_len = self._get_tgt_length(var)
        if ref_end == tgt_len + 1:
            ref = self._fetch_bounded_seq(var, tgt_len - 1, tgt_len, 0,
                                          boundary)
            alt = ref + alt
            edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)
            ref_start = tgt_len
            ref_end = tgt_len

        var_norm = copy.deepcopy(var)
        var_norm.posedit.edit = edit
        var_norm.posedit.pos.start.base = ref_start
        var_norm.posedit.pos.end.base = ref_end

        if type == "c":
            var_norm = self.vm.n_to_c(var_norm)

        return var_norm