Exemple #1
0
def check_if_there_is_allele_with_4_diff_values(fam_d, alleles_names, invalid_cases):
    """
    if there are no parents, and there isn't an allele with 4 different values in the children,
    the algorithm could not be executed
    (because when no parents, we rely on the assumption that there are 4 different in one allele, at least)
    it's not an "invalid" input, but we reject the family because we can not analyze it
    """
    four_different_values = False  # flag to know if there is allele with 4 different values

    for al_name in alleles_names:
        if four_different_values:  # if there is allele with 4 diff, do not need to check more
            return

        alleles_values = Als()
        for child in fam_d:  # no parents, according the condition in the call to this function
            child_alleles = fam_d[child][al_name]
            if any(child_alleles):  # merge only if there is data in 'child_alleles'
                # merge the values of the current alleles_names _child_ to the values of the other children in this allele
                # e.g. : alleles_values: [02, 03]. child_alleles: [02:01, 04]. so the merging: [02, 03, 04]
                # note: the merging may save the low-res values (02 instead of 02:01),
                # but it's not matter, because just need the values amount
                alleles_values = alleles_values.merge(child_alleles)
        # after go over on all the children values in the current allele, check if there are 4 values
        if len(alleles_values) == 4:
            four_different_values = True

    if not four_different_values:
        invalid_cases.append(('6', 'All'))  # no parents, and no alleles_names with 4 diff values (algorithm can not executed)
Exemple #2
0
 def __init__(self, alleles_names):
     """
     create dict for each hoplotype, with empty Als (=special list for alleles_names) for each allele: {A: [], B: [], ...}
     :param alleles_names: alleles_names
     """
     self.hap1 = {}
     self.hap2 = {}
     for al_name in alleles_names:
         self.hap1[al_name] = Als()
         self.hap2[al_name] = Als()
Exemple #3
0
def create_merged_allele_values(children, al_name):
    """
    create a list (Als) with all the different value in children data about specific allele
    for example: if _child_ 1 has A : [02:01, 03:04], _child_ 2: A: [05, 03], _child_ 3: A: [02, 07],
    so lst will be [02:01, 03:04, 05, 07]
    :param children: children dict
    :param al_name: name of specific allele
    """
    lst = Als()
    for child in children:
        lst = lst.merge(children[child][al_name])
    return lst
Exemple #4
0
def check_too_much_alleles(fam_d, alleles_names, invalid_cases):
    """
    check if there are too much alleles_names in the family (more than 4 in an allele)
    """
    for al_name in alleles_names:
        lst = Als()
        for fam_member in fam_d:  # [F, M, 1 ...]
            if any(fam_d[fam_member][al_name]):  # not empty
                # lst = fam_d[fam_member][al_name].merge(lst)
                lst = lst.merge(fam_d[fam_member][al_name])
        if len(lst) > 4:
            invalid_cases.append(('4', 'All'))  # Too many alleles_names
Exemple #5
0
def convert_data_to_Als(fam_dict):
    """
    convert, for a family, the alleles_names data format: from a list to an Als
    (like list, just adjusted to alleles_names. see 'Als' class documentation)
    :param fam_dict: family dict
    """
    for fam_member in fam_dict:  # F, M, 1 ...
        for allele_name in fam_dict[fam_member]:  # A, B ...
            al1 = fam_dict[fam_member][allele_name][0]  # first allele
            al2 = fam_dict[fam_member][allele_name][1]  # second allele
            new_format = Als()  # new object of Als
            new_format.extend([al1, al2])  # add alleles_names data

            fam_dict[fam_member].update({allele_name: new_format})  # update the data in the dict to be in Als format
def remove_data_if_just_one_allele_full(h1, h2):
    """
    GRIMM cannot handle with case of allele that have data in one haplotype but no data in the second haplotype
    for example: A*02:01+A*ZZZZ (ZZZZ means empty).
    so in alleles_names like this, we delete the data from the allele in the full haplotype
    - pay attention: this function is called after the function 'duplicate_hap_if_one_empty', because otherwise,
      we might delete akk the data from the haplotypes
    :param h1: first haplotype
    :param h2: second haplotype
    """
    for (key1, value1), (key2, value2) in zip(h1.items(), h2.items()):
        if value1.empty_Als() and not value2.empty_Als():
            h2[key2] = Als()
        elif value2.empty_Als() and not value1.empty_Als():
            h1[key1] = Als()
Exemple #7
0
def validate(hap_1, hap_2, member, is_serology):  # todo: check it !!
    """
    compare two haplotype to person (2 from one parent if compare to parent, and 1 from each parent if compare to _child_)
    :param hap_1: first haplotype
    :param hap_2: second haplotype
    :param member: family member
    :param is_serology: flag, if serology, the validate is checked in another way
    :return: True if consistency, False otherwise
    """
    hap_1 = gl_string_to_dict(hap_1)
    hap_2 = gl_string_to_dict(hap_2)

    if not is_serology:
        for allele_name, allele_values in member.items():
            val_member1, val_member2, val_hap1, val_hap2 = \
                allele_values[0], allele_values[1], hap_1[allele_name], hap_2[allele_name]
            success_option1 = is_equal(val_member1, val_hap1) and is_equal(
                val_member2, val_hap2)
            success_option2 = is_equal(val_member1, val_hap2) and is_equal(
                val_member2, val_hap1)
            if not (success_option1 or success_option2):
                return False
        return True

    else:  # serology data. could be more than 2 options in 'member[allele_name]'
        pairs_consistent = [False] * len(member.keys())
        for idx_allele, (allele_name,
                         allele_values) in enumerate(member.items()):
            val_hap1, val_hap2 = hap_1[allele_name], hap_2[allele_name]
            val_member = Als()
            val_member.extend(member[allele_name])

            # the first two conditions are for [] and ["", ""]
            if not val_member or not any(
                    val_member
            ) or val_hap1 in val_member and val_hap2 in val_member:
                pairs_consistent[idx_allele] = True
                continue

        if all(pairs_consistent):
            return True
        return False
Exemple #8
0
def divide_alleles_to_2_groups(dict_children_one_allele):
    """
    dividing alleles_names of 3 children or more to 2 groups (one for each parent)
    for example, c1:[01, 02], c2:[02, 03], c3:[01, 04], so -> par1: 01~03, par2: 02~04
    there are 2 cases:
        1. easy case: there is homozygous _child_, so divide his alleles_names to the 2 groups, and then go over the other
        children alleles_names, and insert in some order (no matter how) to the groups, until each group is of size 2
        for example: c1:[01, 01], c2:[01, 02], c3:[02, 03]
        -->(iter1) par1: 01, par2: 01  -->(iter2) par1: 01~02, par2: 01  -->(iter3) par1: 01~02, par2: 01~03
        2. difficult case: no homozygous _child_, so add the alleles_names of the first _child_, and then, for the others, call
        to 'divide_2_alleles_to_non_empty_groups'
    :param dict_children_one_allele: children dict, that contains data about one allele only
    :return: 2 groups
    """
    gr1, gr2 = Als(), Als()

    is_homoz, homoz_allele = check_if_exist_homoz(dict_children_one_allele)
    if is_homoz:  # if a _child_ has [01, 01] so each parent has '01'
        gr1.append(homoz_allele)
        gr2.append(homoz_allele)
        for alleles in dict_children_one_allele.values():
            for al in alleles:
                if al not in gr1 and al not in gr2:
                    if len(gr1) < 2:
                        gr1.append(al)
                    elif len(gr2) < 2:
                        gr2.append(al)
            if len(gr1) == len(gr2) == 2:  # the groups are full
                break

    else:  # no homozygous
        # 'try_again' let us know if one allele did not succeed to be inserted to the groups in first time, so after
        # insertion the other, we try to insert it again. more explanations in documentation of 'divide_2_alleles..'
        try_again = []
        for alleles in dict_children_one_allele.values():
            if len(gr1) == len(
                    gr2) == 0:  # first insertion, the order is not matter
                gr1.append(alleles[0])
                gr2.append(alleles[1])
            else:
                divide_2_alleles_to_non_empty_groups(gr1, gr2, alleles,
                                                     try_again)
        if len(try_again) > 0:
            divide_2_alleles_to_non_empty_groups(gr1, gr2, try_again[0], [])

    return gr1, gr2