Ejemplo n.º 1
0
def delins_to_repeat(variant, sequences):
    new_variant = copy.deepcopy(variant)
    inserted_sequence = get_inserted_sequence(variant, sequences)
    repeat_seq, repeat_number = seq_present_before(
        sequences["reference"],
        inserted_sequence,
        get_start(variant["location"]),
        get_end(variant["location"]),
    )
    shift_left = len(repeat_seq)
    while True:
        if (get_start(variant) - len(repeat_seq) > 0
                and sequences["reference"][get_start(variant) - shift_left -
                                           len(repeat_seq):get_start(variant) -
                                           shift_left] == repeat_seq):
            shift_left += len(repeat_seq)
        else:
            break
    repeat_number += shift_left // len(repeat_seq)
    new_variant["location"]["start"]["position"] -= shift_left
    new_variant["type"] = "repeat"
    new_variant["inserted"] = [{
        "sequence": repeat_seq,
        "source": "description",
        "repeat_number": {
            "value": repeat_number
        },
    }]
    if new_variant["location"]["start"].get("shift"):
        new_variant["location"]["start"]["shift"] -= shift_left
    if new_variant["location"]["end"].get("shift"):
        new_variant["location"]["end"]["shift"] -= shift_left
    return new_variant
Ejemplo n.º 2
0
def is_duplication(variant, sequences):
    """
    Note that it works only in the context of the `de_to_hgvs` function flow.
    """
    inserted_sequence = get_inserted_sequence(variant, sequences)
    if len(inserted_sequence) < get_location_length(variant):
        return False
    elif (sequences["reference"][get_start(variant) -
                                 len(inserted_sequence):get_start(variant)] ==
          inserted_sequence):
        return True
    return False
Ejemplo n.º 3
0
def de_variants_clean(variants, sequences=None):
    """
    Apply the 3' rule to delins variants, get rid of equals, and substitute
    any slices relative to the observed sequence.
    """
    new_variants = []
    for variant in variants:
        if variant.get("type") == "inversion":
            new_variants.append(copy.deepcopy(variant))
        elif variant.get("type") == "deletion_insertion":
            variant["inserted"] = update_inserted_with_sequences(
                variant["inserted"], sequences)
            inserted_sequence = get_inserted_sequence(variant, sequences)
            new_variant = copy.deepcopy(variant)
            shift3 = 0
            shift5 = 0
            if get_location_length(
                    variant["location"]) and not inserted_sequence:
                shift5, shift3 = roll(
                    sequences["reference"],
                    variant["location"]["start"]["position"] + 1,
                    variant["location"]["end"]["position"],
                )
            elif not get_location_length(
                    variant["location"]) and inserted_sequence:
                rolled_sequence = (
                    sequences["reference"][:get_start(variant)] +
                    inserted_sequence +
                    sequences["reference"][get_end(variant):])
                shift5, shift3 = roll(
                    rolled_sequence,
                    get_start(variant) + 1,
                    get_end(variant) + len(inserted_sequence),
                )
                if shift3:
                    inserted_rolled_sequence = rolled_sequence[
                        get_start(variant) + shift3:get_end(variant) + shift3 +
                        len(inserted_sequence)]
                    new_variant["inserted"] = [{
                        "sequence": inserted_rolled_sequence,
                        "source": "description"
                    }]
            shift = shift3 + shift5
            new_variant["location"]["start"]["position"] += shift3
            new_variant["location"]["start"]["shift"] = shift
            new_variant["location"]["end"]["position"] += shift3
            new_variant["location"]["end"]["shift"] = shift
            new_variants.append(new_variant)

    return new_variants
Ejemplo n.º 4
0
def de_to_hgvs(variants, sequences=None):
    """
    Convert the description extractor variants to an HGVS format (e.g., a
    deletion insertion of one nucleotide is converted to a substitution).
    """
    if len(variants) == 1 and variants[0].get("type") == "equal":
        new_variant = copy.deepcopy(variants[0])
        new_variant.pop("location")
        return [new_variant]

    new_variants = []
    for variant in de_variants_clean(variants, sequences):
        if variant.get("type") == "inversion":
            new_variants.append(copy.deepcopy(variant))
        elif variant.get("type") == "deletion_insertion":
            inserted_sequence = get_inserted_sequence(variant, sequences)
            if len(inserted_sequence) == 0:
                new_variants.append(delins_to_del(variant))
            elif (get_location_length(variant["location"]) ==
                  len(inserted_sequence) == 1):
                new_variants.append(delins_to_substitution(variant, sequences))
            elif is_repeat(variant, sequences):
                new_variants.append(delins_to_repeat(variant, sequences))
            elif is_duplication(variant, sequences):
                new_variants.append(delins_to_duplication(variant, sequences))
            elif get_start(variant["location"]) == get_end(
                    variant["location"]):
                new_variants.append(delins_to_insertion(variant))
            else:
                new_variants.append(delins_to_delins(variant))

    return new_variants
Ejemplo n.º 5
0
def _location_in_same_intron(location, exons):
    start_i = bisect.bisect_right(exons, get_start(location))
    end_i = bisect.bisect_left(exons, get_end(location))
    if start_i == end_i and start_i % 2 == 0:
        return True
    else:
        return False
Ejemplo n.º 6
0
def delins_to_duplication(variant, sequences):
    new_variant = copy.deepcopy(variant)
    inserted_sequence = get_inserted_sequence(variant, sequences)
    new_variant["location"]["start"]["position"] = get_start(
        new_variant["location"]) - len(inserted_sequence)
    new_variant.pop("inserted")
    new_variant["type"] = "duplication"
    return new_variant
Ejemplo n.º 7
0
def update_inserted_with_sequences(inserted, sequences):
    new_inserted = []
    for insert in inserted:
        if insert["source"] == "observed":
            seq = sequences["observed"][
                get_start(insert["location"]):get_end(insert["location"])]
            if seq:
                new_inserted.append({"sequence": seq, "source": "description"})
        else:
            new_inserted.append(insert)
    return new_inserted
Ejemplo n.º 8
0
def to_exon_positions(variants, exons, cds):
    exons = _get_cds_into_exons(exons, cds)
    new_variants = []
    for variant in variants:
        if (
            variant.get("type") == "deletion_insertion"
            and variant.get("location")
            and not _location_in_same_intron(variant["location"], exons)
            and not (get_start(variant) <= exons[0] and get_end(variant) <= exons[0])
        ):
            n_v = copy.deepcopy(variant)
            exon_s = bisect.bisect(exons, get_start(n_v))
            if exon_s % 2 == 0 and exon_s < len(exons):
                n_v["location"]["start"]["position"] = exons[exon_s]

            exon_e = bisect.bisect(exons, get_end(n_v))
            if exon_e % 2 == 0 and exon_e < len(exons):
                n_v["location"]["end"]["position"] = exons[exon_e]

            new_variants.append(n_v)

    return new_variants
Ejemplo n.º 9
0
def is_repeat(variant, sequences):
    """
    Note that it works only in the context of the `de_to_hgvs` function flow.
    """
    inserted_sequence = get_inserted_sequence(variant, sequences)
    repeat_seq, repeat_number = seq_present_before(
        sequences["reference"],
        inserted_sequence,
        get_start(variant["location"]),
        get_end(variant["location"]),
    )
    if repeat_number > 1:
        return True
    return False
Ejemplo n.º 10
0
def _splice_site_removal(location, exons):
    start_i = bisect.bisect_right(exons, get_start(location))
    end_i = bisect.bisect_left(exons, get_end(location))
    if end_i - start_i == 1:
        return True
Ejemplo n.º 11
0
def get_inserted_sequence(insertion, sequences):
    return sequences[insertion["source"]][
        get_start(insertion["location"]) : get_end(insertion["location"])
    ]