def calcfst(pops, posdict, gtdict, L, snp=True, hud=True):
    """
    """
    ix = 0
    pw = len(pops)
    fstarray = np.zeros([len(posdict.keys()), int((pw * (pw - 1)) / 2)])
    # Observed FST
    pix = 0
    popdict = {}
    for p in pops:
        popdict[pix] = list(range(ix, ix + p))
        ix += p
        pix += 1
    for r in gtdict.keys():
        fst_obs = []
        if snp:
            pos_snp = [(np.random.choice(posdict[r]))]
            gt_snp = np.where(posdict[r] == pos_snp)[0]
            if len(gt_snp) > 1:
                gt_snp = gt_snp[0]
        else:
            pos_snp = list(posdict[r])
        for x, y in combinations(popdict.keys(), 2):
            popX = gtdict[r][popdict[x]]
            popY = gtdict[r][popdict[y]]
            sdfst = simData()
            geno = np.vstack([popX, popY])
            geno_fst = geno[:, gt_snp]
            gtpop = [''.join(str(n) for n in y) for y in geno_fst]
            gtpop_fst = [i.encode() for i in gtpop]
            sdfst.assign_sep(pos_snp, gtpop_fst)
            size = [popX.shape[0], popY.shape[0]]
            f1 = fst(sdfst, size)
            if hud:
                fst_obs.append(f1.hsm())
            else:
                fst_obs.append(f1.slatkin())
                # fst_obs.append(f1.hbk())


#        # pi
        for z in popdict.keys():
            sdpop = simData()
            popZ = gtdict[r][popdict[z]]
            geno = popZ[:, gt_snp]
            gtpop = [''.join(str(n) for n in y) for y in geno]
            gtpop_pi = [i.encode() for i in gtpop]
            sdpop.assign_sep(pos_snp, gtpop_pi)
            pspopr = polySIM(sdpop)
            pi = pspopr.thetapi()
            print("pop {} pi:{}".format(z, pi / L))
        fstarray[int(r), :] = fst_obs
    return (fstarray)
def permtest(gtdict, posdict, pops, n_perm, fstarray):
    """
    """
    nhap = sum(pops)
    fst_t = []
    r = random.choice(gtdict.keys())
    # FST random permutations
    for p in range(n_perm):
        popX = gtdict[r][np.random.randint(0, nhap, pops[0])]
        popY = gtdict[r][np.random.randint(0, nhap, pops[0])]
        sdfst = simData()
        geno_fst = np.vstack([popX, popY])
        gtpop_fst = [''.join(str(n) for n in y) for y in geno_fst]
        sdfst.assign_sep(posdict[r], gtpop_fst)
        size = [popX.shape[0], popY.shape[0]]
        f1 = fst(sdfst, size)
        #        fst_t.append(f1.slatkin())
        fst_t.append(f1.hsm())


#        fst_t.appen(f1.hbk())
# mark significant FST
    fst_tnp = np.array(fst_t)
    Fstdist = [len(np.where(f > fst_tnp)[0]) for f in fstarray]
    return ([1 - (f / float(n_perm)) for f in Fstdist])
Example #3
0
 def testShared(self):
     x = [(0.1, "0011"), (0.2, "1100"), (0.3, "0100"), (0.4, "1101"),
          (0.5, "0101")]
     d = simData()
     d.assign(x)
     f = fst(d, [2, 2])
     self.assertEqual(f.shared(0, 1), [0.5])
Example #4
0
def calcfst(pops, posdict, gtdict):
    """
    """
    fst_obs = []
    ix = 0
    popiix = []
    pw = len(pops)
    fstarray = np.zeros([len(posdict.keys()), (pw*(pw-1))/2])
    # Observed FST
    for p in pops:
        popiix.append(range(ix, ix + p))
        ix += p
    for r in gtdict.keys():
        fst_obs = []
        for i, pix in enumerate(popiix):
            for j, jix in enumerate(popiix):
                if i > j:
                    popX = gtdict[r][pix]
                    popY = gtdict[r][jix]
                    sdfst = simData()
                    geno_fst = np.vstack([popX, popY])
                    gtpop_fst = [''.join(str(n) for n in y) for y in geno_fst]
                    sdfst.assign_sep(posdict[r], gtpop_fst)
                    size = [popX.shape[0], popY.shape[0]]
                    f1 = fst(sdfst, size)
                    fst_obs.append(f1.slatkin())
        fstarray[int(r), :] = fst_obs
    return(fstarray)
Example #5
0
 def testShared(self):
     x = [(0.1,b"0011"),(0.2,b"1100"),
         (0.3,b"0100"),(0.4,b"1101"),
         (0.5,b"0101")]
     d = simData()
     d.assign(x)
     f = fst(d,[2,2])
     self.assertEqual(f.shared(0,1),[0.5])
Example #6
0
 def testPriv(self):
     x = [(0.1, "0011"), (0.2, "1100"), (0.3, "0100"), (0.4, "1101"),
          (0.5, "0101")]
     d = simData()
     d.assign(x)
     f = fst(d, [2, 2])
     p = f.priv(0, 1)
     expected = {0: [0.3], 1: [0.4]}
     self.assertEqual(p, expected)
Example #7
0
 def testFixed(self):
     x = [(0.1, "0011"), (0.2, "1100"), (0.3, "0100"), (0.4, "1101"),
          (0.5, "0101")]
     d = simData()
     d.assign(x)
     f = fst(d, [2, 2])
     p = f.fixed(0, 1)
     expected = [0.1, 0.2]
     self.assertEqual(p, expected)
Example #8
0
 def testExceptionPriv(self):
     with self.assertRaises(RuntimeError):
         x = [(0.1, "0011"), (0.2, "1100"), (0.3, "0100"), (0.4, "1101"),
              (0.5, "0101")]
         d = simData()
         d.assign(x)
         f = fst(d, [2, 2])
         #2 is out of range.
         sh = f.priv(2, 1)
Example #9
0
 def testExceptionPriv(self):
     with self.assertRaises(RuntimeError):
         x = [(0.1,b"0011"),(0.2,b"1100"),
         (0.3,b"0100"),(0.4,b"1101"),
         (0.5,b"0101")]
         d = simData()
         d.assign(x)
         f = fst(d,[2,2])
         #2 is out of range.
         sh = f.priv(2,1)
Example #10
0
 def testFixed(self):
     x = [(0.1,b"0011"),(0.2,b"1100"),
         (0.3,b"0100"),(0.4,b"1101"),
         (0.5,b"0101")]
     d = simData()
     d.assign(x)
     f = fst(d,[2,2])
     p = f.fixed(0,1)
     expected = [0.1,0.2]
     self.assertEqual(p,expected)
Example #11
0
 def testPriv(self):
     x = [(0.1,b"0011"),(0.2,b"1100"),
         (0.3,b"0100"),(0.4,b"1101"),
         (0.5,b"0101")]
     d = simData()
     d.assign(x)
     f = fst(d,[2,2])
     p = f.priv(0,1)
     expected = {0:[0.3],1:[0.4]}
     self.assertEqual(p,expected)
Example #12
0
 def testException1(self):
     with self.assertRaises(RuntimeError):
         x = [(0.1, "0011"), (0.2, "1100"), (0.3, "0100"), (0.4, "1101"),
              (0.5, "0101")]
         d = simData()
         d.assign(x)
         ##the second argument's sum is > total sample size
         ##libsequence will throw a SeqException here,
         ##which gets tranlated to a RuntimeError
         f = fst(d, [2, 3])
Example #13
0
 def testException1(self):
     with self.assertRaises(RuntimeError):
         x = [(0.1,b"0011"),(0.2,b"1100"),
             (0.3,b"0100"),(0.4,b"1101"),
             (0.5,b"0101")]
         d = simData()
         d.assign(x)
         ##the second argument's sum is > total sample size
         ##libsequence will throw a SeqException here,
         ##which gets tranlated to a RuntimeError
         f = fst(d,[2,3])
Example #14
0
def make_simData(g):
    """
    Construct a :class:`libsequence.polytable.simData` from 
    the output of msprime.

    :param g: The output from msprime

    .. note:: Thanks to Jerome Kelleher for pointing out the quick implementation using msprime >= 0.4.0.

    Example:

    >>> import msprime as msp
    >>> from libsequence.msprime import make_simData
    >>> g = msp.simulate(sample_size = 10,Ne=1e6, recombination_rate=1e-8,mutation_rate=1e-8,length=1e4)
    >>> s = make_simData(g)
    """
    return simData([(v.position, v.genotypes) for v in g.variants(as_bytes=True)])
Example #15
0
def calc_otherstats(positions, gtdict, popsizelist):
    """calculate stats from ms-type file

    Parameters
    ----------
    poistions : array, float
        matrix of mutational positions
    gtdict : default dict
        dictionary of genotypes per pop; gtdict['pop1'].append((gt_string))

    Returns
    -------
    ms_otherstats : pandas df

    """
    hapconfig_common = []
    gH12 = []
    gH1 = []
    gH21 = []
    sfs = []
    hap = []
    dfreq = []
    popiix = []
    ix = 0
    pos = 0.5  # need -Sp 0.5 and -Smark
    for pop in popsizelist:
        popiix.append(range(ix, ix + pop))
        ix += pop
    for rep in range(len(gtdict.keys())):
        dfreqt = []
        sfst = []
        hapt = []
        gH12t = []
        gH1t = []
        gH21t = []
        for iix in popiix:
            gtarray = gtdict[str(rep + 1)][iix]
            sdpop1 = simData()
            gtpop1 = [''.join(str(n) for n in y) for y in gtarray]
            sdpop1.assign_sep(positions[rep], gtpop1)
            # pspop1 = polySIM(sdpop1)
            # stats
            garudStats_t = (garudStats(sdpop1))  # garud 2015
            gH12t.append(garudStats_t['H12'])
            gH1t.append(garudStats_t['H1'])
            gH21t.append(garudStats_t['H2H1'])
            # lhaf_t = lhaf(sdpop1,1) #1-HAF is most common; Ronen 2016
            # sfs
            sfsarray, derived = site_freqspec_fx(gtarray, pos)
            sfst.append(sfsarray)
            # hapconfig
            hapt.append(haploconfig_fx(gtarray))
            # derived
            dfreqt.append(derived)
        gH12.append(gH12t)
        gH1.append(gH1t)
        gH21.append(gH21t)
        sfs.append(sfst)
        hap.append(hapt)
        dfreq.append(dfreqt)
    garudH12 = [np.mean(pop) for pop in zip(*gH12)]
    garudH1 = [np.mean(pop) for pop in zip(*gH1)]
    garudH21 = [np.mean(pop) for pop in zip(*gH21)]
    sfs_summ = [
        np.mean(pop, axis=0) / (popsizelist[i])
        for i, pop in enumerate(zip(*sfs))
    ]
    hap_summ = [np.mean(pop, axis=0) for i, pop in enumerate(zip(*hap))]
    haparray = [np.vstack(i) for i in zip(*hap)]
    for config in haparray:
        uniq, hapfreq = hapconfigfx(config)
        hapconfig_common.append(zip(uniq, hapfreq))
    derivedfreq = [np.mean(pop) for pop in zip(*dfreq)]
    # jointsfs_fx(gtdict, popiix)
    return (garudH12, garudH1, garudH21, sfs_summ, hap_summ, derivedfreq)
Example #16
0
def hapbaxVmig_stats(gtdict, posdict, demesizelist, sp, origcount, sel, mig):
    """calculates the haplotype diversity of haplotypes carrying the resistant
       allele. Also: number of origins, total haplotype diversity, resistant
       haplotype congfig
    """
    pdist = []
    hapbax = []
    for rep in range(len(gtdict.keys())):
        rep = str(rep)
        smark = np.where(posdict[rep] == sp)[0]
        if len(smark) > 1:
            print("\nSkipping rep {}, smark gt 1\n".format(rep))
            continue
        piix = gtdict[rep][0:demesizelist[0]]  # index for the first pop
        riix = np.where(piix[:, smark] > 0)[0]  # location of the selected
        if riix.any():
            hapr = gtdict[rep][riix]
            uniqhaps_IBS = np.array(
                [np.array(x) for x in set(tuple(x) for x in hapr)])
            hapfreq_IBS = np.array(
                [len(hapr[np.all(hapr == x, axis=1)]) for x in uniqhaps_IBS],
                dtype=int)
            uallel = hapr[:][:, smark]
            puallel = gtdict[rep][:, smark]
            hapr[:, smark] = 1  # sites that are IBS are read as uniqhaps
            # change all origins to 1 even if >1
            uniqhaps = np.array(
                [np.array(x) for x in set(tuple(x) for x in hapr)])
            hapfreq = np.array(
                [len(hapr[np.all(hapr == x, axis=1)]) for x in uniqhaps],
                dtype=int)
            print("\n#rep {}".format(rep))
            print("\nsel: {}\nmig: {}".format(sel, mig))
            print("#number of ORIGINS: {}".format(origcount[int(rep)]))
            print("#number of unique resistant ALLELES across ALL pops: {}".
                  format(len(np.unique(puallel[puallel > 0]))))
            print(
                "#number of unique resistant ALLELES in SAMPLE pop: {}".format(
                    len(np.unique(uallel[uallel > 0]))))
            if uniqhaps_IBS.shape[0] > uniqhaps.shape[0]:
                print("#number of resistant HAPLOTYPES (IBS) in SAMPLE pop: "
                      "{}, frequencies {}".format(len(uniqhaps_IBS),
                                                  hapfreq_IBS))
                print(
                    "#number of hidden resistant HAPLOTYPES (IBS): {}".format(
                        uniqhaps_IBS.shape[0] - uniqhaps.shape[0]))
            print("#number of observable resistant HAPLOTYPES in SAMPLE pop: "
                  "{}, frequencies {}".format(len(uniqhaps), hapfreq))
            hapbax.append(len(uniqhaps))
            # full hap config
            n = sum(hapfreq)
            C_freq, C_count = np.unique(hapfreq, return_counts=True)
            C = np.zeros(piix.shape[0])
            C[C_freq - 1] = C_count
            # haplotype diversity
            Hd = 1 - sum([(((i + 1) / float(n))**2) * c
                          for i, c in enumerate(C)])
            M = max(np.nonzero(C)[0]) + 1  # greatest non-zero position
            K = sum(C)  # number of haps
            # eveness from Chattopadhyay 2007
            lambda_e = sum([(float(hf) / sum(hapfreq))**2 for hf in hapfreq])
            Ds = 1.0 / lambda_e
            Ev = Ds / uniqhaps.shape[0]
            rAfreq = riix.shape[0] / float(piix.shape[0])
            com = "#stats_r: Hd:{}\tKhaps:{}\tMaxAbsFreq:{}\tEv:{}\trFreq:{}\n"
            print(com.format(Hd, K, M, Ev, rAfreq))
            # fill pdist with below
            pdist.extend([
                np.count_nonzero(a != b) for i, a in enumerate(hapr)
                for j, b in enumerate(hapr) if j > i
            ])
            # popgen stats resistant
            gtpopr = [''.join(str(n) for n in y) for y in hapr]
            sdpopr = simData()
            sdpopr.assign_sep(posdict[rep], gtpopr)
            pspopr = polySIM(sdpopr)
            theta_r = pspopr.thetaw()
            tajd_r = pspopr.tajimasd()
            hprime_r = pspopr.hprime()
            pi_r = pspopr.thetapi()
            garudStats_r = garudStats(sdpopr)  # garud 2015
            # popgen stats all
            piix[:, smark] = 1
            gtpopa = [''.join(str(v) for v in y) for y in piix]
            sdpopa = simData()
            sdpopa.assign_sep(posdict[rep], gtpopa)
            pspopa = polySIM(sdpopa)
            theta_a = pspopa.thetaw()
            tajd_a = pspopa.tajimasd()
            hprime_a = pspopa.hprime()
            pi_a = pspopa.thetapi()
            #            hapdiv_a = pspopa.hapdiv()
            #            nhaps_a = pspopa.nhaps()
            garudStats_a = garudStats(sdpopa)  # garud 2015
            print("#popgen_r: theta_w:{}\ttheta_pi:{}\ttajD:{}\tfaywuH:{}".
                  format(theta_r, pi_r, tajd_r, hprime_r))
            print("#popgen_all: theta_w:{}\ttheta_pi:{}\ttajD:{}\tfaywuH:{}".
                  format(theta_a, pi_a, tajd_a, hprime_a))
            print("#garud2015_r: H12:{}\tH1:{}\tH2H1:{}".format(
                garudStats_r['H12'], garudStats_r['H1'], garudStats_r['H2H1']))
            print("#garud2015_all: H12:{}\tH1:{}\tH2H1:{}\n".format(
                garudStats_a['H12'], garudStats_a['H1'], garudStats_a['H2H1']))
        else:
            C = np.zeros(piix.shape[0])

    hapbaxm = np.mean(hapbax)
    hapbaxSE = np.std(hapbax) / len(hapbax)
    return (hapbaxm, hapbaxSE)
def hapmigVsel_stats(gtdict, posdict, demesizelist, sp, origcount, sel, mig):
    """calculates the haplotype diversity of haplotypes carrying the resistant
       allele. Also: number of origins, total haplotype diversity, resistant
       haplotype congfig
    """
    rfreq = []  # frequency of resistant allele
    Rplot = np.array([], dtype=np.int64).reshape(0, demesizelist[0])
    Splot = []
    pdist = []
    for rep in range(len(gtdict.keys())):
        rep = str(rep)
        smark = np.where(posdict[rep] == sp)[0]
        if len(smark) > 1:
            print("\nSkipping rep {}, smark gt 1\n".format(rep))
            continue
        piix = gtdict[rep][0:demesizelist[0]]  # index for the first pop
        riix = np.where(piix[:, smark] > 0)[0]  # location of the selected
        if riix.any():
            hapr = gtdict[rep][riix]
            uniqhaps_IBS = np.array(
                [np.array(x) for x in set(tuple(x) for x in hapr)])
            hapfreq_IBS = np.array(
                [len(hapr[np.all(hapr == x, axis=1)]) for x in uniqhaps_IBS],
                dtype=int)
            uallel = hapr[:][:, smark]
            puallel = gtdict[rep][:, smark]
            hapr[:, smark] = 1  # sites that are IBS are read as uniqhaps
            # change all origins to 1 even if >1
            uniqhaps = np.array(
                [np.array(x) for x in set(tuple(x) for x in hapr)])
            hapfreq = np.array(
                [len(hapr[np.all(hapr == x, axis=1)]) for x in uniqhaps],
                dtype=int)
            print("\n#rep {}".format(rep))
            print("\nsel: {}\nmig: {}".format(sel, mig))
            print("#number of ORIGINS: {}".format(origcount[int(rep)]))
            print("#number of unique resistant ALLELES across ALL pops: {}".
                  format(len(np.unique(puallel[puallel > 0]))))
            print(
                "#number of unique resistant ALLELES in SAMPLE pop: {}".format(
                    len(np.unique(uallel[uallel > 0]))))
            if uniqhaps_IBS.shape[0] > uniqhaps.shape[0]:
                print("#number of resistant HAPLOTYPES (IBS) in SAMPLE pop: "
                      "{}, frequencies {}".format(len(uniqhaps_IBS),
                                                  hapfreq_IBS))
                print(
                    "#number of hidden resistant HAPLOTYPES (IBS): {}".format(
                        uniqhaps_IBS.shape[0] - uniqhaps.shape[0]))
            print("#number of observable resistant HAPLOTYPES in SAMPLE pop:"
                  "{}, frequencies {}".format(len(uniqhaps), hapfreq))
            # full hap config
            n = sum(hapfreq)
            C_freq, C_count = np.unique(hapfreq, return_counts=True)
            C = np.zeros(piix.shape[0])
            C[C_freq - 1] = C_count
            # haplotype diversity
            Hd = 1 - sum([(((i + 1) / float(n))**2) * c
                          for i, c in enumerate(C)])
            M = max(np.nonzero(C)[0]) + 1  # greatest non-zero position
            K = sum(C)  # number of haps
            # eveness from Chattopadhyay 2007
            lambda_e = sum([(float(hf) / sum(hapfreq))**2 for hf in hapfreq])
            Ds = 1.0 / lambda_e
            Ev = Ds / uniqhaps.shape[0]
            print("#stats_r: Hd:{}\tKhaps:{}\tMaxAbsFreq:{}\tEv:{}\n".format(
                Hd, K, M, Ev))
            # fill pdist with below
            pdist.extend([
                np.count_nonzero(a != b) for i, a in enumerate(hapr)
                for j, b in enumerate(hapr) if j > i
            ])
            # popgen stats resistant
            gtpopr = [''.join(str(n) for n in y) for y in hapr]
            sdpopr = simData()
            sdpopr.assign_sep(posdict[rep], gtpopr)
            pspopr = polySIM(sdpopr)
            theta_r = pspopr.thetaw()
            tajd_r = pspopr.tajimasd()
            hprime_r = pspopr.hprime()
            pi_r = pspopr.thetapi()
            garudStats_r = garudStats(sdpopr)  # garud 2015
            # popgen stats all
            piix[:, smark] = 1
            gtpopa = [''.join(str(n) for n in y) for y in piix]
            sdpopa = simData()
            sdpopa.assign_sep(posdict[rep], gtpopa)
            pspopa = polySIM(sdpopa)
            theta_a = pspopa.thetaw()
            tajd_a = pspopa.tajimasd()
            hprime_a = pspopa.hprime()
            pi_a = pspopa.thetapi()
            #            hapdiv_a = pspopa.hapdiv()
            #            nhaps_a = pspopa.nhaps()
            garudStats_a = garudStats(sdpopa)  # garud 2015
            print("#popgen_r: theta_w:{}\ttheta_pi:{}\ttajD:{}\tfaywuH:{}".
                  format(theta_r, pi_r, tajd_r, hprime_r))
            print("#popgen_all: theta_w:{}\ttheta_pi:{}\ttajD:{}\tfaywuH:{}".
                  format(theta_a, pi_a, tajd_a, hprime_a))
            print("#garud2015_r: H12:{}\tH1:{}\tH2H1:{}".format(
                garudStats_r['H12'], garudStats_r['H1'], garudStats_r['H2H1']))
            print("#garud2015_all: H12:{}\tH1:{}\tH2H1:{}\n".format(
                garudStats_a['H12'], garudStats_a['H1'], garudStats_a['H2H1']))
        else:
            C = np.zeros(piix.shape[0])

        Rplot = np.vstack((Rplot, C))
        Splot.append(piix.shape[0] - riix.shape[0])
        rfreq.append(riix.shape[0] / float(piix.shape[0]))
    # plot of singletons, doubletons ...
    Piplot = np.append(np.sum(Splot), np.sum(Rplot, axis=0))
    Rfreq = np.repeat(np.mean(rfreq), len(Piplot))
    # for pi plot
    pichart = [(np.nonzero(Rplot[sar])[0][::-1] + 1)
               for sar in range(Rplot.shape[0]) if Rplot[sar].size]
    avgresist = [
        np.average(i) for i in itertools.izip_longest(*pichart, fillvalue=0)
    ]
    rstr = ["R" + str(nx) for nx in range(0, len(avgresist) + 1)]
    rarray = np.array(avgresist)
    rarray = np.insert(rarray, 0, demesizelist[0] - sum(avgresist))
    # pairwise diff
    p_dist = np.unique(pdist, return_counts=True)

    return (Piplot, Rfreq, rarray, rstr, p_dist)
Example #18
0
for i in samples:
    windows = []
    start = 0
    while start < 3:
        ##We will only look at neutral mutations, which are element 0 of each sampl
        window = [j[0] for j in i[0] if (j[0] >=start and j[0] < start+0.1)]
        windows.append(window)
        start += 0.1
    ##We now have a full set of windows that we can do something with
    print (len(windows))  ##There should be 30, and many will be empy


# ### Using [pylibseq](https://github.com/molpopgen/pylibseq)

# In[31]:

from libsequence.windows import Windows
from libsequence.polytable import simData
for i in samples:
    ##We need to convert our list of tuples
    ##into types that pylibseq/libsequence understand:
    windows = Windows(simData(i[0]),0.1,0.1,0,3)
    ##Now, you can analyze the windows, etc.
    print(len(windows))


# Well, the pylibseq version is clearly more compact.  Of course, you can/should abstract the pure Python version into a standalone function.
# 
# Why would you ever use the manual version?  It can save you memory.  The pylibseq version constructs an iterable list of windows, meaning that there is an object allocated for each window.  For the manual version above, we grew a list of objects, but we could just have easily processed them and let them go out of scope.
Example #19
0
print(BigTable)


# ## Summary statistics from samples
# 
# We will use the [pylibseq](http://molpopgen.github.io/pylibseq/) package to calculate summary statistics.  pylibseq is a Python wrapper around [libsequence](http://molpopgen.github.io/libsequence/).

# In[15]:

import libsequence.polytable as polyt
import libsequence.summstats as sstats

#Convert neutral mutations into libsequence "SimData" objects, 
#which are intended to handle binary (0/1) data like
#what comes out of these simulations
n = [polyt.simData(i[0]) for i in samples]

#Create "factories" for calculating the summary stats
an = [sstats.polySIM(i) for i in n]

##Collect a bunch of summary stats into a pandas.DataFrame:
NeutralMutStats = pandas.DataFrame([ {'thetapi':i.thetapi(),'npoly':i.numpoly(),'thetaw':i.thetaw()} for i in an ])

NeutralMutStats


# ### The average $\pi$ under the model
# 
# Under the BGS model, the expectation of $\pi$ is $E[\pi]=\pi_0e^{-\frac{U}{2sh+r}},$ $U$ is the mutation rate to strongly-deleterious variants, $\pi_0$ is the value expected in the absence of BGS (_i.e._ $\pi_0 = \theta = 4N_e\mu$), $s$ and $h$ are the selection and dominance coefficients, and $r$ is the recombination rate.
# 
# Note that the definition of $U$ is _per diploid_, meaning twice the per gamete rate. (See Hudson and Kaplan (1995) PMC1206891 for details).
Example #20
0
# In[30]:

for i in samples:
    windows = []
    start = 0
    while start < 3:
        ##We will only look at neutral mutations, which are element 0 of each sampl
        window = [j[0] for j in i[0] if (j[0] >= start and j[0] < start + 0.1)]
        windows.append(window)
        start += 0.1
    ##We now have a full set of windows that we can do something with
    print(len(windows))  ##There should be 30, and many will be empy

# ### Using [pylibseq](https://github.com/molpopgen/pylibseq)

# In[31]:

from libsequence.windows import Windows
from libsequence.polytable import simData
for i in samples:
    ##We need to convert our list of tuples
    ##into types that pylibseq/libsequence understand:
    windows = Windows(simData(i[0]), 0.1, 0.1, 0, 3)
    ##Now, you can analyze the windows, etc.
    print(len(windows))

# Well, the pylibseq version is clearly more compact.  Of course, you can/should abstract the pure Python version into a standalone function.
#
# Why would you ever use the manual version?  It can save you memory.  The pylibseq version constructs an iterable list of windows, meaning that there is an object allocated for each window.  For the manual version above, we grew a list of objects, but we could just have easily processed them and let them go out of scope.