def de_to_hgvs(variants, sequences=None):
    """
    Convert the description extractor variants to an HGVS format (e.g., a
    deletion insertion of one nucleotide is converted to a substitution).
    """
    if len(variants) == 1 and variants[0].get("type") == "equal":
        new_variant = copy.deepcopy(variants[0])
        new_variant.pop("location")
        return [new_variant]

    new_variants = []
    for variant in de_variants_clean(variants, sequences):
        if variant.get("type") == "inversion":
            new_variants.append(copy.deepcopy(variant))
        elif variant.get("type") == "deletion_insertion":
            inserted_sequence = get_inserted_sequence(variant, sequences)
            if len(inserted_sequence) == 0:
                new_variants.append(delins_to_del(variant))
            elif (get_location_length(variant["location"]) ==
                  len(inserted_sequence) == 1):
                new_variants.append(delins_to_substitution(variant, sequences))
            elif is_repeat(variant, sequences):
                new_variants.append(delins_to_repeat(variant, sequences))
            elif is_duplication(variant, sequences):
                new_variants.append(delins_to_duplication(variant, sequences))
            elif get_start(variant["location"]) == get_end(
                    variant["location"]):
                new_variants.append(delins_to_insertion(variant))
            else:
                new_variants.append(delins_to_delins(variant))

    return new_variants
def delins_to_repeat(variant, sequences):
    new_variant = copy.deepcopy(variant)
    inserted_sequence = get_inserted_sequence(variant, sequences)
    repeat_seq, repeat_number = seq_present_before(
        sequences["reference"],
        inserted_sequence,
        get_start(variant["location"]),
        get_end(variant["location"]),
    )
    shift_left = len(repeat_seq)
    while True:
        if (get_start(variant) - len(repeat_seq) > 0
                and sequences["reference"][get_start(variant) - shift_left -
                                           len(repeat_seq):get_start(variant) -
                                           shift_left] == repeat_seq):
            shift_left += len(repeat_seq)
        else:
            break
    repeat_number += shift_left // len(repeat_seq)
    new_variant["location"]["start"]["position"] -= shift_left
    new_variant["type"] = "repeat"
    new_variant["inserted"] = [{
        "sequence": repeat_seq,
        "source": "description",
        "repeat_number": {
            "value": repeat_number
        },
    }]
    if new_variant["location"]["start"].get("shift"):
        new_variant["location"]["start"]["shift"] -= shift_left
    if new_variant["location"]["end"].get("shift"):
        new_variant["location"]["end"]["shift"] -= shift_left
    return new_variant
def delins_to_duplication(variant, sequences):
    new_variant = copy.deepcopy(variant)
    inserted_sequence = get_inserted_sequence(variant, sequences)
    new_variant["location"]["start"]["position"] = get_start(
        new_variant["location"]) - len(inserted_sequence)
    new_variant.pop("inserted")
    new_variant["type"] = "duplication"
    return new_variant
def is_duplication(variant, sequences):
    """
    Note that it works only in the context of the `de_to_hgvs` function flow.
    """
    inserted_sequence = get_inserted_sequence(variant, sequences)
    if len(inserted_sequence) < get_location_length(variant):
        return False
    elif (sequences["reference"][get_start(variant) -
                                 len(inserted_sequence):get_start(variant)] ==
          inserted_sequence):
        return True
    return False
def de_variants_clean(variants, sequences=None):
    """
    Apply the 3' rule to delins variants, get rid of equals, and substitute
    any slices relative to the observed sequence.
    """
    new_variants = []
    for variant in variants:
        if variant.get("type") == "inversion":
            new_variants.append(copy.deepcopy(variant))
        elif variant.get("type") == "deletion_insertion":
            variant["inserted"] = update_inserted_with_sequences(
                variant["inserted"], sequences)
            inserted_sequence = get_inserted_sequence(variant, sequences)
            new_variant = copy.deepcopy(variant)
            shift3 = 0
            shift5 = 0
            if get_location_length(
                    variant["location"]) and not inserted_sequence:
                shift5, shift3 = roll(
                    sequences["reference"],
                    variant["location"]["start"]["position"] + 1,
                    variant["location"]["end"]["position"],
                )
            elif not get_location_length(
                    variant["location"]) and inserted_sequence:
                rolled_sequence = (
                    sequences["reference"][:get_start(variant)] +
                    inserted_sequence +
                    sequences["reference"][get_end(variant):])
                shift5, shift3 = roll(
                    rolled_sequence,
                    get_start(variant) + 1,
                    get_end(variant) + len(inserted_sequence),
                )
                if shift3:
                    inserted_rolled_sequence = rolled_sequence[
                        get_start(variant) + shift3:get_end(variant) + shift3 +
                        len(inserted_sequence)]
                    new_variant["inserted"] = [{
                        "sequence": inserted_rolled_sequence,
                        "source": "description"
                    }]
            shift = shift3 + shift5
            new_variant["location"]["start"]["position"] += shift3
            new_variant["location"]["start"]["shift"] = shift
            new_variant["location"]["end"]["position"] += shift3
            new_variant["location"]["end"]["shift"] = shift
            new_variants.append(new_variant)

    return new_variants
def is_repeat(variant, sequences):
    """
    Note that it works only in the context of the `de_to_hgvs` function flow.
    """
    inserted_sequence = get_inserted_sequence(variant, sequences)
    repeat_seq, repeat_number = seq_present_before(
        sequences["reference"],
        inserted_sequence,
        get_start(variant["location"]),
        get_end(variant["location"]),
    )
    if repeat_number > 1:
        return True
    return False