def test_ehh_decay(): h = [[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0], [0, 1, 0, 0]] e = [2 / 6, 2 / 6, 1 / 6, 1 / 6, 0] a = ehh_decay(h) assert_array_equal(e, a) # with multiallelics h = [[0, 0, 2, 2], [0, 0, 1, 1], [0, 0, 0, 3], [0, 0, 0, 0], [0, 4, 0, 0]] e = [2 / 6, 2 / 6, 1 / 6, 1 / 6, 0] a = ehh_decay(h) assert_array_equal(e, a)
def get_ehh_decay(haparray, haplotype_pos, core_left, core_right, core_haps, flank): # Get the haplotype arrays pos_left, haps_left, pos_right, haps_right = extract_haplotype_array( haparray, haplotype_pos, core_left, core_right, flank) # Get the positions of interest all_pos = np.concatenate((pos_left, pos_right), axis=0) # Calculate the EHH all_ehh = pd.DataFrame(columns=all_pos) for (l, s) in core_haps: haps_left_core = haps_left.take(sorted(s), axis=1) haps_right_core = haps_right.take(sorted(s), axis=1) ehh_decay_left = allel.ehh_decay(haps_left_core[::-1]) ehh_decay_right = allel.ehh_decay(haps_right_core) all_ehh.loc[l, :] = np.concatenate( (ehh_decay_left[::-1], ehh_decay_right), axis=0) return all_ehh.transpose()
def test_ehh_decay(): h = [[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0], [0, 1, 0, 0]] e = [2 / 6, 2 / 6, 1 / 6, 1 / 6, 0] a = ehh_decay(h) assert_array_equal(e, a)
# plot pdf = PdfPages("%s/%s_%s.sel_EHHdecay.pdf" % (outdir,outcode,l_nom)) fig = plt.figure(figsize=(8,8)) ax3 = plt.subplot(2, 1, 1) for i,clui in enumerate(np.append(clu_list_ids_fil,np.append("no_wt","no_alt"))): clu_key = "cluster_"+str(clui) print("EHH %s" % clu_key) # which variants include in the cluster-wise analysis of selection? clu_sambool = np.isin(range(0,oc_haploty_hap_seg.n_haplotypes),test_elements=popdich_clu[clu_key]) clu_sambool = np.logical_and(clu_sambool,rmv_miss_bool) # calculate actual EHH clu_ehh_up_i = allel.ehh_decay(h=oc_haploty_hap_seg.subset(sel0=clu_varbool_up,sel1=clu_sambool)) clu_ehh_do_i = allel.ehh_decay(h=oc_haploty_hap_seg.subset(sel0=clu_varbool_do,sel1=clu_sambool)) clu_ehh_i = np.concatenate((clu_ehh_up_i[::-1],clu_ehh_do_i)) clu_ehh_i_ar = np.trapz(clu_ehh_i) ehh_above_start = clu_ehh_pos.compress(clu_ehh_i > ehh_above_thr)[0] ehh_above_end = clu_ehh_pos.compress(clu_ehh_i > ehh_above_thr)[-1] ehh_below_start = clu_ehh_pos.compress(clu_ehh_i < ehh_below_thr)[0] ehh_below_end = clu_ehh_pos.compress(clu_ehh_i < ehh_below_thr)[-1] # lab is data clu_lab = "%s, n=%i, a=%.3f\nEHH>%.2f: %i bp %i-%i\nEHH<%.2f: %i bp %i-%i" % ( clu_key, len(popdich_clu[clu_key]),clu_ehh_i_ar, ehh_above_thr, ehh_above_end-ehh_above_start, ehh_above_start, ehh_above_end, ehh_below_thr, ehh_below_end-ehh_below_start, ehh_below_start, ehh_below_end )