Esempio n. 1
0
def runtest(infile, L, nboots, snpfreq, submitted):
    " print test "
    print L

    " split each locus "
    loci = open(infile).read().strip().split("|")[:-1]
    loci[0] = "\n" + loci[0]

    " returns a {} of Locus5 objects with data for tiptaxa L "
    Ldict = makeSNP(L, snpfreq, loci)

    " calculate discordant patterns for each locus "
    for loc in Ldict:
        if snpfreq:
            Ldict[loc] = IUAfreq(Ldict[loc], L)
        else:
            Ldict[loc] = IUA(Ldict[loc], L)
    ################################################

    " final D12 "
    dft_12 = sum([Ldict[l].abbba - Ldict[l].babba for l in Ldict])
    dbt_12 = sum([Ldict[l].abbba + Ldict[l].babba for l in Ldict])
    if dbt_12 > 0:
        D12 = float(dft_12) / dbt_12
    else:
        D12 = 0.

    " final D1 "
    dft_1 = sum([Ldict[l].abbaa - Ldict[l].babaa for l in Ldict])
    dbt_1 = sum([Ldict[l].abbaa + Ldict[l].babaa for l in Ldict])
    if dbt_1 > 0:
        D1 = float(dft_1) / dbt_1
    else:
        D1 = 0.

    " final D2 "
    dft_2 = sum([Ldict[l].ababa - Ldict[l].baaba for l in Ldict])
    dbt_2 = sum([Ldict[l].ababa + Ldict[l].baaba for l in Ldict])
    if dbt_2 > 0:
        D2 = float(dft_2) / dbt_2
    else:
        D2 = 0.

    " proportion of discordant loci "
    try:
        pdisc = len([
            i for i in Ldict
            if any([Ldict[i].D12(), Ldict[i].D1(), Ldict[i].D2()])
        ]) / float(len(Ldict))
    except ValueError:
        pdisc = 0.0

    #################################################

    " do bootstrapping "
    BB12 = []
    BB1 = []
    BB2 = []
    for i in xrange(nboots):
        which = iter(sample_wr(xrange(len(Ldict)), len(Ldict)))
        if snpfreq:
            bb12, bb1, bb2 = bootfreq(Ldict, which)
        else:
            #bb12,bb1,bb2 = bootfixed(Ldict, which)
            bb12, bb1, bb2 = bootfreq(Ldict, which)
        BB12.append(bb12)
        BB1.append(bb1)
        BB2.append(bb2)
    STD12 = numpy.std(BB12)
    STD1 = numpy.std(BB1)
    STD2 = numpy.std(BB2)
    ##################################################

    " stats out "
    if STD12 > 0:
        Z12 = (abs(D12 / STD12))
    else:
        Z12 = 0.
    if STD1 > 0:
        Z1 = (abs(D1 / STD1))
    else:
        Z1 = 0.
    if STD2 > 0:
        Z2 = (abs(D2 / STD2))
    else:
        Z2 = 0.

    ## make loci files here
    ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0]
    BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0]
    ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0]
    BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0]
    ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0]
    BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0]

    " pickle to prevent multiprocessing from freezing on large returns "
    ret = [
        L, D12, Z12, D1, Z1, D2, Z2,
        len(Ldict),
        sum([Ldict[l].abbba for l in Ldict]),
        sum([Ldict[l].babba for l in Ldict]),
        sum([Ldict[l].abbaa for l in Ldict]),
        sum([Ldict[l].babaa for l in Ldict]),
        sum([Ldict[l].ababa for l in Ldict]),
        sum([Ldict[l].baaba for l in Ldict]), pdisc, submitted, ABBBAloci,
        BABBAloci, ABBAAloci, BABAAloci, ABABAloci, BAABAloci, BB12, BB1, BB2
    ]
    pickle.dump(ret, open(".save." + str(submitted), 'wb'))
Esempio n. 2
0
def runtest(infile, L, nboots, snpfreq, submitted, noterminals):
    " print test "
    print L

    " split each locus "
    loci = open(infile).read().strip().split("|")[:-1]
    loci[0] = "\n" + loci[0]

    " returns a {} of Locusfoil objects with data for tiptaxa L "
    Ldict = makeSNP(L, snpfreq, loci, noterminals)

    " calculate discordant patterns for each locus "
    for loc in Ldict:
        if snpfreq:
            Ldict[loc] = IUAfreq(Ldict[loc], L)
        else:
            Ldict[loc] = IUA(Ldict[loc], L)
    ################################################

    " final DFO "
    DFO_t = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) -\
                 (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict])
    DFO_b = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) + \
                 (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict])
    if DFO_b > 0:
        DFO = float(DFO_t) / DFO_b
    else:
        DFO = 0.

    " final DIL "
    DIL_t = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) - \
                 (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict])
    DIL_b = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) + \
                 (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict])
    if DIL_b > 0:
        DIL = float(DIL_t) / DIL_b
    else:
        DIL = 0.

    " final DFI "
    DFI_t = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) - \
                 (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict])
    DFI_b = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) + \
                 (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict])
    if DFI_b > 0:
        DFI = float(DFI_t) / DFI_b
    else:
        DFI = 0.

    " final DOL "
    DOL_t = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) - \
                 (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict])
    DOL_b = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) + \
                 (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict])
    if DOL_b > 0:
        DOL = float(DOL_t) / DOL_b
    else:
        DOL = 0.

    " proportion of discordant loci "
    #try: pdisc = len([i for i in Ldict if any([Ldict[i].D12(),Ldict[i].D1(),Ldict[i].D2()])]) / float(len(Ldict))
    #except ValueError:
    #    pdisc = 0.0

    " TODO "
    pdisc = 0.0

    #################################################

    " do bootstrapping "
    BBFO = []
    BBIL = []
    BBFI = []
    BBOL = []
    for i in xrange(nboots):
        which = iter(sample_wr(xrange(len(Ldict)), len(Ldict)))
        bbfo, bbil, bbfi, bbol = bootfreq(Ldict, which)
        BBFO.append(bbfo)
        BBIL.append(bbil)
        BBFI.append(bbfi)
        BBOL.append(bbol)
    STDfo = numpy.std(BBFO)
    STDil = numpy.std(BBIL)
    STDfi = numpy.std(BBFI)
    STDol = numpy.std(BBOL)
    ##################################################

    " stats out "
    if STDfo > 0:
        ZFO = (abs(DFO / STDfo))
    else:
        ZFO = 0.
    if STDil > 0:
        ZIL = (abs(DIL / STDil))
    else:
        ZIL = 0.
    if STDfi > 0:
        ZFI = (abs(DFI / STDfi))
    else:
        ZFI = 0.
    if STDol > 0:
        ZOL = (abs(DOL / STDol))
    else:
        ZOL = 0.

    ## make loci files here
    #ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0]
    #BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0]
    #ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0]
    #BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0]
    #ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0]
    #BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0]

    return [
        L, DFO, ZFO, DIL, ZIL, DFI, ZFI, DOL, ZOL,
        len(Ldict),
        sum([Ldict[l].babba for l in Ldict]),
        sum([Ldict[l].abbba for l in Ldict]),
        sum([Ldict[l].babaa for l in Ldict]),
        sum([Ldict[l].abbaa for l in Ldict]),
        sum([Ldict[l].baaba for l in Ldict]),
        sum([Ldict[l].ababa for l in Ldict]),
        sum([Ldict[l].bbbaa for l in Ldict]),
        sum([Ldict[l].bbaba for l in Ldict]),
        sum([Ldict[l].aabaa for l in Ldict]),
        sum([Ldict[l].aaaba for l in Ldict]),
        sum([Ldict[l].baaaa for l in Ldict]),
        sum([Ldict[l].abaaa for l in Ldict]), pdisc, submitted, BBFO, BBIL,
        BBFI, BBOL
    ]
Esempio n. 3
0
def runtest(infile, L, nboots, snpfreq, submitted):
    " print test "
    print L

    " split each locus "
    loci = open(infile).read().strip().split("|")[:-1]
    loci[0] = "\n"+loci[0]

    " returns a {} of Locus5 objects with data for tiptaxa L "
    Ldict = makeSNP(L, snpfreq, loci)

    " calculate discordant patterns for each locus "
    for loc in Ldict:
        if snpfreq:
            Ldict[loc] = IUAfreq(Ldict[loc],L)
        else:
            Ldict[loc] = IUA(Ldict[loc],L)
    ################################################

    " final D12 "
    dft_12 = sum([Ldict[l].abbba - Ldict[l].babba for l in Ldict])
    dbt_12 = sum([Ldict[l].abbba + Ldict[l].babba for l in Ldict])
    if dbt_12 > 0:
        D12 = float(dft_12)/dbt_12
    else: D12 = 0.

    " final D1 "
    dft_1 = sum([Ldict[l].abbaa - Ldict[l].babaa for l in Ldict])
    dbt_1 = sum([Ldict[l].abbaa + Ldict[l].babaa for l in Ldict])
    if dbt_1 > 0:
        D1 = float(dft_1)/dbt_1
    else: D1 = 0.

    " final D2 "
    dft_2 = sum([Ldict[l].ababa - Ldict[l].baaba for l in Ldict])
    dbt_2 = sum([Ldict[l].ababa + Ldict[l].baaba for l in Ldict])
    if dbt_2 > 0:
        D2 = float(dft_2)/dbt_2
    else: D2 = 0.

    " proportion of discordant loci "
    try: pdisc = len([i for i in Ldict if any([Ldict[i].D12(),Ldict[i].D1(),Ldict[i].D2()])]) / float(len(Ldict))
    except ValueError:
        pdisc = 0.0
    
    #################################################

    " do bootstrapping "
    BB12 = []
    BB1  = []
    BB2  = []
    for i in xrange(nboots):
        which = iter(sample_wr(xrange(len(Ldict)), len(Ldict)))
        if snpfreq:
            bb12,bb1,bb2 = bootfreq(Ldict, which)
        else:
            #bb12,bb1,bb2 = bootfixed(Ldict, which)
            bb12,bb1,bb2 = bootfreq(Ldict, which)
        BB12.append(bb12)
        BB1.append(bb1)
        BB2.append(bb2)
    STD12 = numpy.std(BB12)
    STD1  = numpy.std(BB1)
    STD2  = numpy.std(BB2)
    ##################################################

    " stats out "
    if STD12 > 0:
        Z12 = (abs(D12/STD12))
    else: Z12 = 0.
    if STD1 > 0:
        Z1 =  (abs(D1/STD1))
    else: Z1 = 0.
    if STD2 > 0:
        Z2 =  (abs(D2/STD2))
    else: Z2 = 0.

    ## make loci files here
    ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0]
    BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0]
    ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0]
    BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0]
    ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0]
    BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0]

    " pickle to prevent multiprocessing from freezing on large returns "
    ret = [L,
           D12,Z12,
           D1,Z1,
           D2,Z2,
           len(Ldict),
           sum([Ldict[l].abbba for l in Ldict]),
           sum([Ldict[l].babba for l in Ldict]),
           sum([Ldict[l].abbaa for l in Ldict]),
           sum([Ldict[l].babaa for l in Ldict]),
           sum([Ldict[l].ababa for l in Ldict]),
           sum([Ldict[l].baaba for l in Ldict]),
           pdisc, submitted,
           ABBBAloci, BABBAloci,
           ABBAAloci, BABAAloci,
           ABABAloci, BAABAloci,
           BB12, BB1, BB2]
    pickle.dump(ret, open(".save."+str(submitted),'wb'))
Esempio n. 4
0
def runtest(infile, L, nboots, snpfreq, submitted, noterminals):
    " print test "
    print L

    " split each locus "
    loci = open(infile).read().strip().split("|")[:-1]
    loci[0] = "\n"+loci[0]

    " returns a {} of Locusfoil objects with data for tiptaxa L "
    Ldict = makeSNP(L, snpfreq, loci, noterminals)

    " calculate discordant patterns for each locus "
    for loc in Ldict:
        if snpfreq:
            Ldict[loc] = IUAfreq(Ldict[loc],L)
        else:
            Ldict[loc] = IUA(Ldict[loc],L)
    ################################################

    " final DFO "
    DFO_t = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) -\
                 (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict])
    DFO_b = sum([(Ldict[l].babaa + Ldict[l].bbbaa + Ldict[l].ababa + Ldict[l].aaaba) + \
                 (Ldict[l].baaba + Ldict[l].bbaba + Ldict[l].abbaa + Ldict[l].aabaa) for l in Ldict])
    if DFO_b > 0:
        DFO = float(DFO_t)/DFO_b
    else: DFO = 0.
    
    " final DIL "
    DIL_t = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) - \
                 (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict])
    DIL_b = sum([(Ldict[l].abbaa + Ldict[l].bbbaa + Ldict[l].baaba + Ldict[l].aaaba) + \
                 (Ldict[l].ababa + Ldict[l].bbaba + Ldict[l].babaa + Ldict[l].aabaa) for l in Ldict])
    if DIL_b > 0:
        DIL = float(DIL_t)/DIL_b
    else: DIL = 0.

    " final DFI "
    DFI_t = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) - \
                 (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict])
    DFI_b = sum([(Ldict[l].babaa + Ldict[l].babba + Ldict[l].ababa + Ldict[l].abaaa) + \
                 (Ldict[l].abbaa + Ldict[l].abbba + Ldict[l].baaba + Ldict[l].baaaa) for l in Ldict])
    if DFI_b > 0:
        DFI = float(DFI_t)/DFI_b
    else: DFI = 0.

    " final DOL "
    DOL_t = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) - \
                 (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict])
    DOL_b = sum([(Ldict[l].baaba + Ldict[l].babba + Ldict[l].abbaa + Ldict[l].abaaa) + \
                 (Ldict[l].ababa + Ldict[l].abbba + Ldict[l].babaa + Ldict[l].baaaa) for l in Ldict])
    if DOL_b > 0:
        DOL = float(DOL_t)/DOL_b
    else: DOL = 0.

    " proportion of discordant loci "
    #try: pdisc = len([i for i in Ldict if any([Ldict[i].D12(),Ldict[i].D1(),Ldict[i].D2()])]) / float(len(Ldict))
    #except ValueError:
    #    pdisc = 0.0

    " TODO "
    pdisc = 0.0
    
    #################################################

    " do bootstrapping "
    BBFO = []
    BBIL = []
    BBFI = []
    BBOL = []
    for i in xrange(nboots):
        which = iter(sample_wr(xrange(len(Ldict)), len(Ldict)))
        bbfo,bbil,bbfi,bbol = bootfreq(Ldict, which)
        BBFO.append(bbfo)
        BBIL.append(bbil)
        BBFI.append(bbfi)
        BBOL.append(bbol)
    STDfo  = numpy.std(BBFO)
    STDil  = numpy.std(BBIL)
    STDfi  = numpy.std(BBFI)
    STDol  = numpy.std(BBOL)
    ##################################################

    " stats out "
    if STDfo > 0:
        ZFO = (abs(DFO/STDfo))
    else: ZFO = 0.
    if STDil > 0:
        ZIL =  (abs(DIL/STDil))
    else: ZIL = 0.
    if STDfi > 0:
        ZFI =  (abs(DFI/STDfi))
    else: ZFI = 0.
    if STDol > 0:
        ZOL =  (abs(DOL/STDol))
    else: ZOL = 0.

    ## make loci files here
    #ABBBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() > 0]
    #BABBAloci = [Ldict[l].number for l in Ldict if Ldict[l].D12() < 0]
    #ABBAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() > 0]
    #BABAAloci = [Ldict[l].number for l in Ldict if Ldict[l].D1() < 0]
    #ABABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() > 0]
    #BAABAloci = [Ldict[l].number for l in Ldict if Ldict[l].D2() < 0]

    return [L,
            DFO,ZFO,
            DIL,ZIL,
            DFI,ZFI,
            DOL,ZOL,
            len(Ldict),
            sum([Ldict[l].babba for l in Ldict]),
            sum([Ldict[l].abbba for l in Ldict]),
            sum([Ldict[l].babaa for l in Ldict]),
            sum([Ldict[l].abbaa for l in Ldict]),
            sum([Ldict[l].baaba for l in Ldict]),
            sum([Ldict[l].ababa for l in Ldict]),
            sum([Ldict[l].bbbaa for l in Ldict]),
            sum([Ldict[l].bbaba for l in Ldict]),
            sum([Ldict[l].aabaa for l in Ldict]),
            sum([Ldict[l].aaaba for l in Ldict]),
            sum([Ldict[l].baaaa for l in Ldict]),
            sum([Ldict[l].abaaa for l in Ldict]),
            pdisc, submitted,
            BBFO, BBIL, BBFI, BBOL]