def child_ls_merge(child_ls, child_d, types): """ merge alleles_names of children to list (ex. [01:03, 08, 01, 08] -> [01:03, 08]) @param child_ls: _child_ list @param child_d: _child_ dictionary @param types: A/B/C/DR/DB @return: merged list """ lst = Als() for child in child_ls: if any(child_d[child][types]): # lst = child_d[_child_][alleles_names].merge(lst) lst = lst.merge(child_d[child][types]) return lst
def list_to_dict(family_ids, family_als, als_names): """ convert family data from list to dict :param family_ids: alleles_names of family members (F, M, 1, 2...) :param family_als: alleles_names data of family :param als_names: alleles_names alleles_names (A, B, C, DR, DQ) :return: dict with data about the family """ fam_dict = {} for i in range(len(family_ids)): fam_member = family_ids[i] fam_dict[fam_member] = {} for j in range(len(family_als[0])): als_data = Als() als_data.append(family_als[i][j][0]) als_data.append(family_als[i][j][1]) fam_dict[fam_member][als_names[j]] = als_data return fam_dict
def is_valid(fam_d, al_types, par_num): """ validation tests of alleles_names data return: 0 if valid. string with error description otherwise. """ for types in al_types: for key in fam_d: for single_al in fam_d[key][types]: if single_al == "" or single_al == " ": continue elif ":" in single_al: parts = single_al.split(":") for part in parts: if not (part.isnumeric() or part.isupper() or part == ''): return "There is an allele which contains an invalid character. " \ "(It must contain numbers or uppercase only)." else: if not (single_al.isnumeric() or single_al.isupper()): return "There is an allele which contains an invalid character. " \ "(It must contain numbers or uppercase only)." if len(fam_d) < 2: return "Missing data about the family (less than two people)." if len(fam_d) == 2 and par_num == 2: return "Missing data about the family - there are no children." for types in al_types: lst = Als() for key in fam_d: # F, M, 1, 2... if any(fam_d[key][types]): # not empty # lst = fam_d[key][alleles_names].merge(lst) lst = lst.merge(fam_d[key][types]) if len(lst) > 4: return "Too many alleles_names in the family." if par_num == 2: for types in al_types: fm_als = fam_d['F'][types] + fam_d['M'][types] for key in fam_d: if key != 'F' and key != 'M' and len(fm_als) == 4 and all( fm_als): in_fm = fam_d[key][types].sub_lst(fm_als) if not in_fm: return "There is an allele in a _child_ that does not exist in the parents." return 0
def data_pc_22(al_p, al_c, j, fm_in, fm_out, types): match, ind_c = al_p.match_par(al_c) if match == 1: # 1 pair same, 1 pair different ind_p = al_p.index_a(al_c[ind_c]) al_p[ind_p] = high_res( al_p[ind_p], al_c[ind_c]) # emb chromo with high res between par and _child_ al_p2 = Als() al_p2.append(al_p[1 - ind_p]) al_p.remove_a(al_p2[0]) # remove from par different allele al_c.remove_a(al_p[0]) # remove from _child_ same allele c2_inpar = fm_out[j][types] if fm_out[ j] else None # the second chromo in cur par c2_outpar = fm_in[1 - j][types] if fm_in[ 1 - j] else None # the second chromo in other par (that _child_ inheritance) # try checker = fm_out[1 - j][types] if fm_out[ 1 - j] else None # avoid case of incorrect deletion if c2_inpar and c2_outpar: add_or_remove( al_p, al_p2, c2_inpar, c2_outpar, checker) # add or remove allele to the connected chromo
def div_4als(spec_d): """ divide 4 alleles_names to 2 groups, with keep the logic of inheritance from parents for example : 01+02, 02+03, 03+04 --> [01, 03] [02, 04] @param spec_d: dict with data of children alleles_names @return: 2 groups """ gr0 = Als() gr1 = Als() is_ex, homoz = exist_homoz_d(spec_d) if is_ex: # there is homozygous gr0.append(homoz) gr1.append(homoz) for als in spec_d.values(): for al in als: if al not in gr0 and al not in gr1: # al not in groups. add to group that small than 2 als if len(gr0) < 2: gr0.append(al) elif len(gr1) < 2: gr1.append(al) if len(gr0) == len(gr1) == 2: # the groups are full break else: # no homozygous """ "after" is a list help to make more running if at first the als didnt embed for exa: first run: gr0[01], gr1[02]. als = [03, 04], so cant emb second run: g0[01, 03], gr1[02] so can emb als """ after = [] i = 0 for child, als in spec_d.items(): if i == 0: gr0.append(als[0]) gr1.append(als[1]) else: gr0, gr1 = div_2als(gr0, gr1, als, after) i += 1 if len(after) > 0: gr0, gr1 = div_2als(gr0, gr1, after[0], []) return gr0, gr1
def emb_wp(chF, chM, child_d, al_types): """ embed children to parents chromosomes, in case that parents exist @param chF: father chromosome @param chM: mother chromosome @param child_d: dict of _child_ @param al_types: alleles_names alleles_names (A, B ...) @return: list with the embedding of the _child_ """ i = 0 m_before = False flag = 0 first = True # TODO: why define as True but after as 0/1? emb_FM = [0, 0] # emb_FM signs which chromo of parents the _child_ emb. if chF2, chM1: [2,1] pars = [chF, chM] if not ( empty_dict(chM.ch1) or empty_dict(chM.ch2) ): # if M full and F empty, begin with M pars = [chM, chF] m_before = True # sign the opposite list. in the end of code - flip it for par in pars: was_emb = False if empty_dict(par.ch1) and empty_dict(par.ch2): emb_FM[i] = 1 # emb (random) in first chromo was_emb = True # TODO: something here looks strange (not used) elif not (empty_dict(par.ch1) or empty_dict(par.ch2)): # 2 chrom of par are full d1 = par.ch1 d2 = par.ch2 for types in al_types: if not was_emb and len(d1[types]) == len(d2[types]) == 1 and not \ equal_al(d1[types][0], d2[types][0]): # not embedded, par has singles and not homozygous par_pair = Als() par_pair.extend([d1[types][0], d2[types][0]]) match, m_par = child_d[types].match_par(par_pair) if len(child_d[types]) == 2 and match == 1: emb_FM[i] = m_par + 1 was_emb = True del_al = child_d[types][0] if child_d[types][0] in par_pair else child_d[types][1] # TODO: this the reason for creating copied dict del child_d[types][child_d[types].index_a(del_al)] # remove the emb allele elif len(child_d[types]) == 1: # _child_ has 1 option to this allele (the first allele was embedded) ind = par_pair.index_a(child_d[types][0]) if ind != -1: emb_FM[i] = ind + 1 was_emb = True if not was_emb and flag != 1 and first == 0: flag = 1 i = i - 1 pars.append(par) # add to more one running on this parents, to success the emb else: # 1 chrom full, one empty, (in this case we can embed only in the empty chrom, if the the full contradictory (if the full compatible we can't know certainly)) full_d = par.ch1 if not empty_dict(par.ch1) else par.ch2 full_ind = 0 if not empty_dict(par.ch1) else 1 for types in al_types: if not was_emb and not child_d[types].sub_lst(full_d[types]): emb_FM[i] = 2 - full_ind was_emb = True first = 1 i += 1 if m_before: # indexes opposite emb_FM.reverse() if flag == 1: # and emb_FM[0] != 0: emb_FM.reverse() return emb_FM
def __init__(self, al_types): self.ch1 = {} self.ch2 = {} for al in al_types: self.ch1[al] = Als() self.ch2[al] = Als()
def add_data_child(child, chF, chM, emb_FM, al_types, d_chi): f_in, f_out, m_in, m_out = common_chr( chF, chM, emb_FM) # f_in/m_in = chromos the _child_ inheritance fm_in = [f_in, m_in] fm_out = [f_out, m_out] # if who_fuller(f_in, m_in) == 2: # fm_in, fm_out = [m_in, f_in], [m_out, f_out] j = 0 # j = which iteration in pars loop for chr_par in fm_in: if not chr_par: # TODO: Im not sure it could be True (maybe yes..) j += 1 continue for types in al_types: al_c = d_chi[child][types] al_p = fm_in[j][types] if al_c.is_empty_a(): continue if al_p.is_empty_a(): if len(al_c) == 2 and al_c[0] == al_c[1]: al_c.remove_a(al_c[1]) chr_par[types] = copy.deepcopy( al_c) # emb data from _child_ to par elif len(al_c) == len(al_p) == 1: # par and _child_ has 1 chromo if al_c != al_p: return None, None al_p[0] = high_res( al_p[0], al_c[0] ) # emb chromo with high res between par and _child_ elif len(al_c) == 2 and len(al_p) == 1: # _child_ has 2, par has 1 # e.g. par: 01 , _child_: 01:08+02 ; so- par: 01:08, _child_: 02 if al_p[0] in al_c: al_p[0] = high_res(al_p[0], al_c[al_c.index_a(al_p[0])]) al_c.remove_a(al_p[0]) else: return None, None elif len(al_c) == len(al_p) == 2: # par and _child_ has 2 chromos data_pc_22(al_p, al_c, j, fm_in, fm_out, types) elif len(al_c) == 1 and len(al_p) == 2: if al_c[0] not in al_p: return None, None if equal_al(al_p[0], al_p[1]) and al_p[0] != al_p[1]: if len(al_c[0]) == 2 and equal_al( al_p[0], al_c[0] ): # e.g 01:02, 01:03 # TODO: it's not a good example! because they aren't equal continue ind_p = al_p.index_a(al_c[0]) al_p[ind_p] = high_res( al_p[ind_p], al_c[0] ) # high res # TODO: why not chr_par[alleles_names] = ... ? al_p2 = Als() al_p2.append(al_p[1 - ind_p]) al_p.remove_a(al_p2[0]) c2_inpar = fm_out[j][types] if fm_out[ j] else None # the second chromo in cur par c2_outpar = fm_in[1 - j][types] if fm_in[ 1 - j] else None # the second chromo in other par (that _child_ inheritance) # try checker = fm_out[1 - j][types] if fm_out[ 1 - j] else None # avoid case of incorrect deletion if c2_inpar and c2_outpar: add_or_remove( al_p, al_p2, c2_inpar, c2_outpar, checker ) # add or remove allele to the connected chromo j += 1 return chF, chM