def runtest(infile, L, nboots, snpfreq, submitted): " print test " print L " split each locus " loci = open(infile).read().strip().split("|")[:-1] loci[0] = "\n" + loci[0] " returns a {} of Locus5 objects with data for tiptaxa L " Ldict = makeSNP(L, snpfreq, loci) " calculate discordant patterns for each locus " for loc in Ldict: if snpfreq: Ldict[loc] = IUAfreq(Ldict[loc], L) else: Ldict[loc] = IUA(Ldict[loc], L) ################################################ " final D12 " dft_12 = sum([Ldict[l].abbba - Ldict[l].babba for l in Ldict]) dbt_12 = sum([Ldict[l].abbba + Ldict[l].babba for l in Ldict]) if dbt_12 > 0: D12 = float(dft_12) / dbt_12 else: D12 = 0. " final D1 " dft_1 = sum([Ldict[l].abbaa - Ldict[l].babaa for l in Ldict]) dbt_1 = sum([Ldict[l].abbaa + Ldict[l].babaa for l in Ldict]) if dbt_1 > 0: D1 = float(dft_1) / dbt_1 else: D1 = 0. " final D2 " dft_2 = sum([Ldict[l].ababa - Ldict[l].baaba for l in Ldict]) dbt_2 = sum([Ldict[l].ababa + Ldict[l].baaba for l in Ldict]) if dbt_2 > 0: D2 = float(dft_2) / dbt_2 else: D2 = 0. " proportion of discordant loci " try: pdisc = len([ i for i in Ldict if any([Ldict[i].D12(), Ldict[i].D1(), Ldict[i].D2()]) ]) / float(len(Ldict)) except ValueError: pdisc = 0.0 ################################################# " do bootstrapping " BB12 = [] BB1 = [] BB2 = [] for i in xrange(nboots): which = iter(sample_wr(xrange(len(Ldict)), len(Ldict))) if snpfreq: bb12, bb1, bb2 = bootfreq(Ldict, which) else: #bb12,bb1,bb2 = bootfixed(Ldict, which) bb12, bb1, bb2 = bootfreq(Ldict, which) BB12.append(bb12) BB1.append(bb1) BB2.append(bb2) STD12 = numpy.std(BB12) STD1 = numpy.std(BB1) STD2 = numpy.std(BB2) ################################################## " stats out " if STD12 > 0: Z12 = (abs(D12 / STD12)) else: Z12 = 0. if STD1 > 0: Z1 = (abs(D1 / STD1)) else: Z1 = 0. if STD2 > 0: Z2 = (abs(D2 / STD2)) else: Z2 = 0. ## make loci files here ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0] BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0] ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0] BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0] ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0] BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0] " pickle to prevent multiprocessing from freezing on large returns " ret = [ L, D12, Z12, D1, Z1, D2, Z2, len(Ldict), sum([Ldict[l].abbba for l in Ldict]), sum([Ldict[l].babba for l in Ldict]), sum([Ldict[l].abbaa for l in Ldict]), sum([Ldict[l].babaa for l in Ldict]), sum([Ldict[l].ababa for l in Ldict]), sum([Ldict[l].baaba for l in Ldict]), pdisc, submitted, ABBBAloci, BABBAloci, ABBAAloci, BABAAloci, ABABAloci, BAABAloci, BB12, BB1, BB2 ] pickle.dump(ret, open(".save." + str(submitted), 'wb'))
def runtest(infile, L, nboots, snpfreq, submitted, noterminals): " print test " print L " split each locus " loci = open(infile).read().strip().split("|")[:-1] loci[0] = "\n" + loci[0] " returns a {} of Locusfoil objects with data for tiptaxa L " Ldict = makeSNP(L, snpfreq, loci, noterminals) " calculate discordant patterns for each locus " for loc in Ldict: if snpfreq: Ldict[loc] = IUAfreq(Ldict[loc], L) else: Ldict[loc] = IUA(Ldict[loc], L) ################################################ " final DFO " DFO_t = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) -\ (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict]) DFO_b = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) + \ (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict]) if DFO_b > 0: DFO = float(DFO_t) / DFO_b else: DFO = 0. " final DIL " DIL_t = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) - \ (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict]) DIL_b = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) + \ (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict]) if DIL_b > 0: DIL = float(DIL_t) / DIL_b else: DIL = 0. " final DFI " DFI_t = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) - \ (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict]) DFI_b = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) + \ (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict]) if DFI_b > 0: DFI = float(DFI_t) / DFI_b else: DFI = 0. " final DOL " DOL_t = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) - \ (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict]) DOL_b = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) + \ (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict]) if DOL_b > 0: DOL = float(DOL_t) / DOL_b else: DOL = 0. " proportion of discordant loci " #try: pdisc = len([i for i in Ldict if any([Ldict[i].D12(),Ldict[i].D1(),Ldict[i].D2()])]) / float(len(Ldict)) #except ValueError: # pdisc = 0.0 " TODO " pdisc = 0.0 ################################################# " do bootstrapping " BBFO = [] BBIL = [] BBFI = [] BBOL = [] for i in xrange(nboots): which = iter(sample_wr(xrange(len(Ldict)), len(Ldict))) bbfo, bbil, bbfi, bbol = bootfreq(Ldict, which) BBFO.append(bbfo) BBIL.append(bbil) BBFI.append(bbfi) BBOL.append(bbol) STDfo = numpy.std(BBFO) STDil = numpy.std(BBIL) STDfi = numpy.std(BBFI) STDol = numpy.std(BBOL) ################################################## " stats out " if STDfo > 0: ZFO = (abs(DFO / STDfo)) else: ZFO = 0. if STDil > 0: ZIL = (abs(DIL / STDil)) else: ZIL = 0. if STDfi > 0: ZFI = (abs(DFI / STDfi)) else: ZFI = 0. if STDol > 0: ZOL = (abs(DOL / STDol)) else: ZOL = 0. ## make loci files here #ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0] #BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0] #ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0] #BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0] #ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0] #BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0] return [ L, DFO, ZFO, DIL, ZIL, DFI, ZFI, DOL, ZOL, len(Ldict), sum([Ldict[l].babba for l in Ldict]), sum([Ldict[l].abbba for l in Ldict]), sum([Ldict[l].babaa for l in Ldict]), sum([Ldict[l].abbaa for l in Ldict]), sum([Ldict[l].baaba for l in Ldict]), sum([Ldict[l].ababa for l in Ldict]), sum([Ldict[l].bbbaa for l in Ldict]), sum([Ldict[l].bbaba for l in Ldict]), sum([Ldict[l].aabaa for l in Ldict]), sum([Ldict[l].aaaba for l in Ldict]), sum([Ldict[l].baaaa for l in Ldict]), sum([Ldict[l].abaaa for l in Ldict]), pdisc, submitted, BBFO, BBIL, BBFI, BBOL ]
def runtest(infile, L, nboots, snpfreq, submitted): " print test " print L " split each locus " loci = open(infile).read().strip().split("|")[:-1] loci[0] = "\n"+loci[0] " returns a {} of Locus5 objects with data for tiptaxa L " Ldict = makeSNP(L, snpfreq, loci) " calculate discordant patterns for each locus " for loc in Ldict: if snpfreq: Ldict[loc] = IUAfreq(Ldict[loc],L) else: Ldict[loc] = IUA(Ldict[loc],L) ################################################ " final D12 " dft_12 = sum([Ldict[l].abbba - Ldict[l].babba for l in Ldict]) dbt_12 = sum([Ldict[l].abbba + Ldict[l].babba for l in Ldict]) if dbt_12 > 0: D12 = float(dft_12)/dbt_12 else: D12 = 0. " final D1 " dft_1 = sum([Ldict[l].abbaa - Ldict[l].babaa for l in Ldict]) dbt_1 = sum([Ldict[l].abbaa + Ldict[l].babaa for l in Ldict]) if dbt_1 > 0: D1 = float(dft_1)/dbt_1 else: D1 = 0. " final D2 " dft_2 = sum([Ldict[l].ababa - Ldict[l].baaba for l in Ldict]) dbt_2 = sum([Ldict[l].ababa + Ldict[l].baaba for l in Ldict]) if dbt_2 > 0: D2 = float(dft_2)/dbt_2 else: D2 = 0. " proportion of discordant loci " try: pdisc = len([i for i in Ldict if any([Ldict[i].D12(),Ldict[i].D1(),Ldict[i].D2()])]) / float(len(Ldict)) except ValueError: pdisc = 0.0 ################################################# " do bootstrapping " BB12 = [] BB1 = [] BB2 = [] for i in xrange(nboots): which = iter(sample_wr(xrange(len(Ldict)), len(Ldict))) if snpfreq: bb12,bb1,bb2 = bootfreq(Ldict, which) else: #bb12,bb1,bb2 = bootfixed(Ldict, which) bb12,bb1,bb2 = bootfreq(Ldict, which) BB12.append(bb12) BB1.append(bb1) BB2.append(bb2) STD12 = numpy.std(BB12) STD1 = numpy.std(BB1) STD2 = numpy.std(BB2) ################################################## " stats out " if STD12 > 0: Z12 = (abs(D12/STD12)) else: Z12 = 0. if STD1 > 0: Z1 = (abs(D1/STD1)) else: Z1 = 0. if STD2 > 0: Z2 = (abs(D2/STD2)) else: Z2 = 0. ## make loci files here ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0] BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0] ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0] BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0] ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0] BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0] " pickle to prevent multiprocessing from freezing on large returns " ret = [L, D12,Z12, D1,Z1, D2,Z2, len(Ldict), sum([Ldict[l].abbba for l in Ldict]), sum([Ldict[l].babba for l in Ldict]), sum([Ldict[l].abbaa for l in Ldict]), sum([Ldict[l].babaa for l in Ldict]), sum([Ldict[l].ababa for l in Ldict]), sum([Ldict[l].baaba for l in Ldict]), pdisc, submitted, ABBBAloci, BABBAloci, ABBAAloci, BABAAloci, ABABAloci, BAABAloci, BB12, BB1, BB2] pickle.dump(ret, open(".save."+str(submitted),'wb'))
def runtest(infile, L, nboots, snpfreq, submitted, noterminals): " print test " print L " split each locus " loci = open(infile).read().strip().split("|")[:-1] loci[0] = "\n"+loci[0] " returns a {} of Locusfoil objects with data for tiptaxa L " Ldict = makeSNP(L, snpfreq, loci, noterminals) " calculate discordant patterns for each locus " for loc in Ldict: if snpfreq: Ldict[loc] = IUAfreq(Ldict[loc],L) else: Ldict[loc] = IUA(Ldict[loc],L) ################################################ " final DFO " DFO_t = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) -\ (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict]) DFO_b = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) + \ (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict]) if DFO_b > 0: DFO = float(DFO_t)/DFO_b else: DFO = 0. " final DIL " DIL_t = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) - \ (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict]) DIL_b = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) + \ (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict]) if DIL_b > 0: DIL = float(DIL_t)/DIL_b else: DIL = 0. " final DFI " DFI_t = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) - \ (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict]) DFI_b = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) + \ (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict]) if DFI_b > 0: DFI = float(DFI_t)/DFI_b else: DFI = 0. " final DOL " DOL_t = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) - \ (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict]) DOL_b = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) + \ (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict]) if DOL_b > 0: DOL = float(DOL_t)/DOL_b else: DOL = 0. " proportion of discordant loci " #try: pdisc = len([i for i in Ldict if any([Ldict[i].D12(),Ldict[i].D1(),Ldict[i].D2()])]) / float(len(Ldict)) #except ValueError: # pdisc = 0.0 " TODO " pdisc = 0.0 ################################################# " do bootstrapping " BBFO = [] BBIL = [] BBFI = [] BBOL = [] for i in xrange(nboots): which = iter(sample_wr(xrange(len(Ldict)), len(Ldict))) bbfo,bbil,bbfi,bbol = bootfreq(Ldict, which) BBFO.append(bbfo) BBIL.append(bbil) BBFI.append(bbfi) BBOL.append(bbol) STDfo = numpy.std(BBFO) STDil = numpy.std(BBIL) STDfi = numpy.std(BBFI) STDol = numpy.std(BBOL) ################################################## " stats out " if STDfo > 0: ZFO = (abs(DFO/STDfo)) else: ZFO = 0. if STDil > 0: ZIL = (abs(DIL/STDil)) else: ZIL = 0. if STDfi > 0: ZFI = (abs(DFI/STDfi)) else: ZFI = 0. if STDol > 0: ZOL = (abs(DOL/STDol)) else: ZOL = 0. ## make loci files here #ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0] #BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0] #ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0] #BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0] #ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0] #BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0] return [L, DFO,ZFO, DIL,ZIL, DFI,ZFI, DOL,ZOL, len(Ldict), sum([Ldict[l].babba for l in Ldict]), sum([Ldict[l].abbba for l in Ldict]), sum([Ldict[l].babaa for l in Ldict]), sum([Ldict[l].abbaa for l in Ldict]), sum([Ldict[l].baaba for l in Ldict]), sum([Ldict[l].ababa for l in Ldict]), sum([Ldict[l].bbbaa for l in Ldict]), sum([Ldict[l].bbaba for l in Ldict]), sum([Ldict[l].aabaa for l in Ldict]), sum([Ldict[l].aaaba for l in Ldict]), sum([Ldict[l].baaaa for l in Ldict]), sum([Ldict[l].abaaa for l in Ldict]), pdisc, submitted, BBFO, BBIL, BBFI, BBOL]