def test_from_file(self): """ Loading spectrum from file. """ commentsin = ['comment 1', 'comment 2'] filename = 'test.fs' data = numpy.random.rand(3, 3) fsin = dadi.Spectrum(data) fsin.to_file(filename, comment_lines=commentsin) # Read the file. fsout, commentsout = dadi.Spectrum.from_file(filename, return_comments=True) os.remove(filename) # Ensure that fs was read correctly. self.assert_(numpy.allclose(fsout.data, fsin.data)) self.assert_(numpy.all(fsout.mask == fsin.mask)) self.assertEqual(fsout.folded, fsin.folded) # Ensure comments were read correctly. for ii, line in enumerate(commentsin): self.assertEqual(line, commentsout[ii]) # Test using old file format fsin.to_file(filename, comment_lines=commentsin, foldmaskinfo=False) # Read the file. fsout, commentsout = dadi.Spectrum.from_file(filename, return_comments=True) os.remove(filename) # Ensure that fs was read correctly. self.assert_(numpy.allclose(fsout.data, fsin.data)) self.assert_(numpy.all(fsout.mask == fsin.mask)) self.assertEqual(fsout.folded, fsin.folded) # Ensure comments were read correctly. for ii, line in enumerate(commentsin): self.assertEqual(line, commentsout[ii]) # # Now test a file with folding and masking # fsin = dadi.Spectrum(data).fold() fsin.mask[0, 1] = True fsin.to_file(filename) fsout = dadi.Spectrum.from_file(filename) os.remove(filename) # Ensure that fs was read correctly. self.assert_(numpy.allclose(fsout.data, fsin.data)) self.assert_(numpy.all(fsout.mask == fsin.mask)) self.assertEqual(fsout.folded, fsin.folded)
def intersect_masks(m1, m2): """ Versions of m1 and m2 that are masked where either m1 or m2 were masked. If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns m1 and m2 wrapped as masked_arrays with identical masks. """ ma = numpy.ma import dadi if ma.isMaskedArray(m1) or ma.isMaskedArray(m2): joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2)) m1 = dadi.Spectrum(m1, mask=joint_mask.copy()) m2 = dadi.Spectrum(m2, mask=joint_mask.copy()) return m1,m2
def _generate_dadi_fs(neu_fs, nonneu_fs, output): """ Description: Outputs frequency spectra for dadi. Arguments: neu_fs numpy.ndarray: Frequency spectrum for neutral mutations. nonneu_fs numpy.ndarray: Frequency spectrum for non-neutral mutations. output list: Names of output files. """ neu_fs = dadi.Spectrum(neu_fs) nonneu_fs = dadi.Spectrum(nonneu_fs) neu_fs.to_file(output[0]) nonneu_fs.to_file(output[1])
def test_unfolding(self): ns = (3, 4) # We add some unusual masking. fs = dadi.Spectrum(numpy.random.uniform(size=ns)) fs.mask[0, 1] = fs.mask[1, 1] = True folded = fs.fold() unfolded = folded.unfold() # Check that it was properly recorded self.assertFalse(unfolded.folded) # Check that no data was lost self.assertAlmostEqual(fs.data.sum(), folded.data.sum()) self.assertAlmostEqual(fs.data.sum(), unfolded.data.sum()) # Note that fs.sum() need not be equal to folded.sum(), if fs had # some masked values. self.assertAlmostEqual(folded.sum(), unfolded.sum()) # Check that the proper entries are masked. self.assertTrue(unfolded.mask[0, 1]) self.assertTrue(unfolded.mask[(ns[0] - 1), (ns[1] - 1) - 1]) self.assertTrue(unfolded.mask[1, 1]) self.assertTrue(unfolded.mask[(ns[0] - 1) - 1, (ns[1] - 1) - 1])
def dadi_multiSFS(dd, pops, proj, unfold, outdir, prefix, dtype): print("Doing multiSFS for all pops") dadi_dir = os.path.join(outdir, "dadi") fsc_dir = os.path.join(outdir, "fastsimcoal2") dadi_multi_filename = os.path.join(dadi_dir, "-".join(pops) + ".sfs") ## Get the multiSFS fs = dadi.Spectrum.from_data_dict(dd, pops, proj, polarized=unfold) ## Do int bins rather than float if dtype == "int": dat = np.rint(np.array(fs.data)) fs = dadi.Spectrum(dat, data_folded=fs.folded, mask=fs.mask, fill_value=0, dtype=int) ## Write out the dadi file fs.to_file(dadi_multi_filename) ## Convert to fsc multiSFS format fsc_multi_filename = os.path.join(fsc_dir, prefix + "_MSFS.obs") with open(fsc_multi_filename, 'w') as outfile: outfile.write( "1 observations. No. of demes and sample sizes are on next line.\n" ) outfile.write( str(len(pops)) + "\t" + " ".join([str(x) for x in proj]) + "\n") with open(dadi_multi_filename) as infile: outfile.write(infile.readlines()[1]) outfile.write("\n") return dadi_multi_filename
def dadi_oneD_sfs_per_pop(dd, pops, proj, unfold, outdir, prefix, dtype): dadi_dir = os.path.join(outdir, "dadi") fsc_dir = os.path.join(outdir, "fastsimcoal2") M_or_D = "D" if unfold else "M" for i, pop in enumerate(pops): print("Doing 1D sfs - {}".format(pop)) dadi_sfs_file = os.path.join(dadi_dir, pop + "-" + str(proj[i]) + ".sfs") fs = dadi.Spectrum.from_data_dict(dd, [pop], [proj[i]], mask_corners=True, polarized=unfold) ## Do int bins rather than float if dtype == "int": dat = np.rint(np.array(fs.data)) fs = dadi.Spectrum(dat, data_folded=fs.folded, mask=fs.mask, fill_value=0, dtype=int) fs.to_file(dadi_sfs_file) ## Convert each 1D sfs to fsc format fsc_oneD_filename = os.path.join(fsc_dir, pop + "_{}AFpop0.obs".format(M_or_D)) with open(fsc_oneD_filename, 'w') as outfile: outfile.write("1 observation\n") outfile.write( "\t".join(["d0_" + str(x) for x in range(proj[i] + 1)]) + "\n") ## Grab the fs data from the dadi sfs with open(dadi_sfs_file) as infile: outfile.write(infile.readlines()[1]) outfile.write("\n")
def compare_msprime_dadi_OutOfAfrica(input_fids, output_path, sample_size=20): #For parameter reference #p0 = [nuAf, nuB, nuEu0, nuEu, nuAs0, nuAs, mAfB, mAfEu, mAfAs, mEuAs, TAf, TB, TEuAs] OoA_popt = [ 1.68, 0.287, 0.129, 3.74, 0.070, 7.29, 3.65, 0.44, 0.28, 1.40, 0.607, 0.396, 0.058 ] OoA_pts_l = [30, 40, 50] OoA_ns = [20, 20, 20] OoA_extrap_func = dadi.Numerics.make_extrap_func(OoA_func) OoA_model = OoA_extrap_func(OoA_popt, OoA_ns, OoA_pts_l) OoA_model = OoA_model.marginalize([2]) msprime_joint_sfs = dadi.Spectrum([[0] * (sample_size + 1)] * (sample_size + 1)) for fid in input_fids: msprime_joint_sfs_temp = dadi.Spectrum.from_file(fid) msprime_joint_sfs += msprime_joint_sfs_temp fig = plt.figure(219033) fig.clear() dadi.Plotting.plot_2d_comp_multinom(OoA_model, msprime_joint_sfs, vmin=1, resid_range=50, show=False) fig.savefig(output_path)
def test_1d_ic(self): # This just the standard neutral model func_ex = dadi.Numerics.make_extrap_log_func(dadi.Demographics1D.snm) fs = func_ex([], (17, ), [100, 120, 140]) answer = dadi.Spectrum(1. / numpy.arange(18)) self.assert_(numpy.ma.allclose(fs, answer, atol=1e-3))
def parse_fold_sfs(sfs, sampleSize = 0, fold=True, maskSingletons=False): # if fold=True, the sfs must already be a folded SFS! if fold==True: addZeroLength = sampleSize - len(sfs) if addZeroLength < 1: return("Wrong use of parse_fold_sfs function!") sfsout = [0.] + sfs + [0.]*addZeroLength if maskSingletons==True: sfsout = dadi.Spectrum(sfsout, data_folded=True, mask = [True]*2 + [False]*(len(sfs)-1) + [True]*addZeroLength) else: sfsout = dadi.Spectrum(sfsout, data_folded=True, mask = [True] + [False]*len(sfs) + [True]*addZeroLength) else: if sampleSize != len(sfs) + 1: return("sampleSize not equal 1+length(sfs)!") sfsout = [0.] + sfs + [0.] if maskSingletons==True: sfsout = dadi.Spectrum(sfsout, data_folded=False, mask = [True]*2 + [False]*(len(sfs)-1)) else: sfsout = dadi.Spectrum(sfsout, data_folded=False, mask = [True] + [False]*len(sfs)) return sfsout
def test_1d_stationary(self): func_ex = dadi.Numerics.\ make_extrap_log_func(dadi.Demographics1D.two_epoch) # We let a two-epoch model equilibrate for tau=10, which should # eliminate almost all traces of the size change. fs = func_ex((0.5, 10), (17, ), [40, 50, 60]) answer = dadi.Spectrum(0.5 / numpy.arange(18)) self.assert_(numpy.ma.allclose(fs, answer, atol=1e-2))
def ts_to_dadi_sfs(ts_path, out_path, out_path_nonvariant, sample_size=20, mask_file=None): ''' Generate however many different SFS with msprime and convert+save them into SFS for dadi to use. ''' ts = tskit.load(ts_path) #haps_pops_joint = np.array(ts.genotype_matrix()) haps = ts.genotype_matrix() total_length = ts.sequence_length # Masking retain = np.full(ts.get_num_mutations(), False) if mask_file: mask_table = pd.read_csv(mask_file, sep="\t", header=None) chrom = ts_path.split("/")[-1].split(".")[0] sub = mask_table[mask_table[0] == chrom] mask_ints = pd.IntervalIndex.from_arrays(sub[1], sub[2]) snp_locs = [int(x.site.position) for x in ts.variants()] tmp_bool = [mask_ints.contains(x) for x in snp_locs] retain = np.logical_or(retain, tmp_bool) #print(retain) total_length -= np.sum(mask_ints.length) #print(ts.sequence_length) #print(total_length) retain = np.logical_not(retain) haps_pops_joint = np.array(haps[retain, :]) #Break up the haplotypes into seperate populations based on sample_size haps_pop0_joint = haps_pops_joint[:, :sample_size] haps_pop1_joint = haps_pops_joint[:, sample_size:] genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes( ploidy=2) allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles() genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes( ploidy=2) allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles() sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1], allele_counts_pop1_joint[:, 1]) num_sites = sum(sum(sfs_joint)) #print(ts.num_sites) sfs_joint = dadi.Spectrum(sfs_joint) sfs_joint.to_file(out_path) sfs_joint[ 0, 0] = total_length - num_sites # need to get the number of nonvariant sites for the [0,0] entry sfs_joint.to_file(out_path_nonvariant)
def test_marginalize(self): ns = (7, 8, 6) fs = dadi.Spectrum(numpy.random.uniform(size=ns)) folded = fs.fold() marg1 = fs.marginalize([1]) # Do manual marginalization. manual = dadi.Spectrum(fs.data.sum(axis=1)) # Check that these are equal in the unmasked entries. self.assert_( numpy.allclose(numpy.where(marg1.mask, 0, marg1.data), numpy.where(manual.mask, 0, manual.data))) # Check folded Spectrum objects. I should get the same result if I # marginalize then fold, as if I fold then marginalize. mf1 = marg1.fold() mf2 = folded.marginalize([1]) self.assert_(numpy.allclose(mf1, mf2))
def marginalB(self): """ Marginal 1D frequency spectrum for B locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns + 1)) for fAB in range(ns): for faB in range(ns - fAB): marg[fAB + faB] += self[fAB, :, faB].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg
def test_folded_slices(self): ns = (3, 4) fs1 = dadi.Spectrum(numpy.random.rand(*ns)) folded1 = fs1.fold() self.assert_(fs1[:].folded == False) self.assert_(folded1[:].folded == True) self.assert_(fs1[0].folded == False) self.assert_(folded1[1].folded == True) self.assert_(fs1[:, 0].folded == False) self.assert_(folded1[:, 1].folded == True)
def test_masked_folding(self): """ Test folding when the minor allele is ambiguous. """ data = numpy.zeros((5, 6)) fs = dadi.Spectrum(data) # This folds to an entry that will already be masked. fs.mask[1, 2] = True # This folds to (1,1), which needs to be masked. fs.mask[3, 4] = True ff = fs.fold() # Ensure that all those are masked. for entry in [(1, 2), (3, 4), (1, 1)]: self.assert_(ff.mask[entry])
def test_to_file(self): """ Saving spectrum to file. """ comments = ['comment 1', 'comment 2'] filename = 'test.fs' data = numpy.random.rand(3, 3) fs = dadi.Spectrum(data) fs.to_file(filename, comment_lines=comments) os.remove(filename) fs.to_file(filename, comment_lines=comments, foldmaskinfo=False) os.remove(filename)
def _from_phi_1D_direct(phi, n, xx, mask_corners=True, het_ascertained=None): """ Compute sample Spectrum_mod.py from population frequency distribution phi. ns: Sequence of P sample sizes for each population. xx: Sequence of P one-dimensional grids on which phi is defined. See from_phi for explanation of arguments. """ n = round(n) data = np.zeros(n + 1) for ii in range(0, n + 1): factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii) if het_ascertained == 'xx': factorx *= xx * (1 - xx) data[ii] = trapz(factorx * phi, xx) return dadi.Spectrum(data, mask_corners=mask_corners)
def _fold(spectrum): spectrum = dadi.Spectrum(spectrum) if spectrum.mask[1, 2] == True: print "error: trying to fold a spectrum that is already folded" return spectrum else: spectrum = (spectrum + np.transpose(spectrum)) for ii in range(len(spectrum)): spectrum[ii, ii] /= 2 spectrum.mask[0, :] = True spectrum.mask[:, 0] = True for ii in range(len(spectrum)): spectrum.mask[ii, ii + 1:] = True spectrum.mask[ii, len(spectrum) - 1 - ii:] = True return spectrum
def _from_phi_1D_direct_dphi_directly(n, xx, mask_corners=True, het_ascertained=None): """ Compute derivative from sample Spectrum_mod.py from population frequency distribution phi. """ data = np.zeros( n + 1 ) # for example 20 samples, there are 21 element, - 0 - mutations for 0 samples for ii in range(0, n + 1): factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii) if het_ascertained == 'xx': factorx *= xx * (1 - xx) data[ii] = trapz(factorx, xx) return dadi.Spectrum(data, mask_corners=mask_corners)
def test_ambiguous_folding(self): """ Test folding when the minor allele is ambiguous. """ data = numpy.zeros((4, 4)) # Both these entries correspond to a an allele seen in 3 of 6 samples. # So the minor allele is ambiguous. In this case, we average the two # possible assignments. data[0, 3] = 1 data[3, 0] = 3 fs = dadi.Spectrum(data) ff = fs.fold() correct = numpy.zeros((4, 4)) correct[0, 3] = correct[3, 0] = 2 self.assert_(numpy.allclose(correct, ff.data))
def _from_phi_1D_direct_dphi_directly(n, xx, mask_corners=True, het_ascertained=None): """ Compute derivative from sample Spectrum_mod.py from population frequency distribution phi. See from_phi for explanation of arguments. """ n = round(n) data = np.zeros(n + 1) for ii in range(0, n + 1): factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii) if het_ascertained == 'xx': factorx *= xx * (1 - xx) data[ii] = trapz(factorx, xx) return dadi.Spectrum(data, mask_corners=mask_corners)
def test_folding(self): """ Folding a 2D spectrum. """ data = numpy.reshape(numpy.arange(12), (3, 4)) fs = dadi.Spectrum(data) ff = fs.fold() # Ensure no SNPs have gotten lost. self.assertAlmostEqual(fs.sum(), ff.sum(), 6) self.assertAlmostEqual(fs.data.sum(), ff.data.sum(), 6) # Ensure that the empty entries are actually empty. self.assert_(numpy.all(ff.data[::-1] == numpy.tril(ff.data[::-1]))) # This turns out to be the correct result. correct = numpy.tri(4)[::-1][-3:] * 11 self.assert_(numpy.allclose(correct, ff.data))
def test_projection(self): # Test that projecting a multi-dimensional Spectrum succeeds ns = (7, 8, 6) fs = dadi.Spectrum(numpy.random.uniform(size=ns)) p = fs.project([3, 4, 5]) # Also that we don't lose any data self.assertAlmostEqual(fs.data.sum(), p.data.sum()) # Check that when I project an equilibrium spectrum, I get back an # equilibrium spectrum fs = dadi.Spectrum(1. / numpy.arange(100)) p = fs.project([17]) self.assert_(numpy.allclose(p[1:-1], 1. / numpy.arange(1, len(p) - 1))) # Check that masked values are propagated correctly. fs = dadi.Spectrum(1. / numpy.arange(20)) # All values with 3 or fewer observed should be masked. fs.mask[3] = True p = fs.project([10]) self.assert_(numpy.all(p.mask[:4])) # Check that masked values are propagated correctly. fs = dadi.Spectrum(1. / numpy.arange(20)) fs.mask[-3] = True # All values with 3 or fewer observed should be masked. p = fs.project([10]) self.assert_(numpy.all(p.mask[-3:])) # A more complicated two dimensional projection problem... fs = dadi.Spectrum(numpy.random.uniform(size=(9, 7))) fs.mask[2, 3] = True p = fs.project([4, 4]) self.assert_(numpy.all(p.mask[:3, 1:4])) # Test that projecting a folded multi-dimensional Spectrum succeeds # Should get the same result if I fold then project as if I project # then fold. ns = (7, 8, 6) fs = dadi.Spectrum(numpy.random.uniform(size=ns)) fs.mask[2, 3, 1] = True folded = fs.fold() p = fs.project([3, 4, 5]) pf1 = p.fold() pf2 = folded.project([3, 4, 5]) # Check equality self.assert_(numpy.all(pf1.mask == pf2.mask)) self.assert_(numpy.allclose(pf1.data, pf2.data))
def msprime_to_dadi_simulation(path, seed, org, chrom, sample_size=20): ''' Generate however many different SFS with msprime and convert+save them into SFS for dadi to use. ''' #For testing # print(path, seed, chrom, sample_size) # chrom = homo_sapiens.genome.chromosomes[chrom] # model = homo_sapiens.GutenkunstThreePopOutOfAfrica() chrom = getattr(stdpopsim, '_'.join(org.split('_')[:-1])).genome.chromosomes[chrom] model = getattr(getattr(stdpopsim, '_'.join(org.split('_')[:-1])), org.split('_')[-1:][0])() samples_pops_joint = [ msprime.Sample(population=0, time=0) ] * sample_size + [msprime.Sample(population=1, time=0)] * sample_size ts_pops_joint = msprime.simulate( samples=samples_pops_joint, recombination_map=chrom.recombination_map(), mutation_rate=chrom.default_mutation_rate, random_seed=seed, **model.asdict()) haps_pops_joint = np.array(ts_pops_joint.genotype_matrix()) #Break up the haplotypes into seperate populations based on sample_size haps_pop0_joint = haps_pops_joint[:, :sample_size] haps_pop1_joint = haps_pops_joint[:, sample_size:] genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes( ploidy=2) allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles() genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes( ploidy=2) allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles() sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1], allele_counts_pop1_joint[:, 1]) sfs_joint = dadi.Spectrum(sfs_joint) sfs_joint.to_file(path)
def _from_phi_1D_direct_dphi_analytical(n, xx, dfactor, mask_corners=True, het_ascertained=None): """ Compute sample Spectrum_mod.py from population frequency distribution phi. See from_phi for explanation of arguments. """ """ test failed """ n = round(n) delta_dfactor = np.diff(dfactor) double_delta_xx = np.diff(xx, 2) data = np.zeros(n) for ii in range(0, n): factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii) if het_ascertained == 'xx': factorx *= xx * (1 - xx) # data[ii] = trapz(factorx, double_delta_xx/2) # data[ii] *= double_delta_xx/2 data[ii] *= delta_dfactor / 2 return dadi.Spectrum(data, mask_corners=mask_corners)
def alt_mut_mech_sample_spectrum(ns): """ alternate mutation mechanism, mutations inserted at [1,1] turns out that changing population size does not effect the distribution of mutations entering the population this way we implement Jenkins et al (2014) exact solution this is for neutral spectrum only, for selected spectrum, integrate as above with lam = 1 ns - number of sampled individuals from the population """ fs = np.zeros((ns + 1, ns + 1)) for ii in range(ns)[1:]: for jj in range(ns)[1:]: if ii + jj < ns: na = ns - ii - jj fs[ii, jj] = 2 * ns / (ns - 2) * 1. / ((ns - na - 1) * (ns - na) * (ns - na + 1)) fs = dadi.Spectrum(fs) fs[:, 0].mask = True fs[0, :].mask = True for ii in range(len(fs)): fs.mask[ii, ns - ii:] = True return fs
def sfs_from_binomial(mutdf, sub, cutoff=1, samples=10000, maxd=2000, mind=0, mode='MyAnn', germ=False): if not germ: sfvc = mutdf[(mutdf.SampleFreq < cutoff) & (mutdf.Depth > mind) & (mutdf.Depth < maxd) & (mutdf[mode] == sub) & (mutdf.PredFreq > 1e-6)].PredFreq.apply( get_binom, samples=samples).apply(np.around).value_counts() else: sfvc = mutdf[(mutdf.SampleFreq >= cutoff) & (mutdf.Depth > mind) & (mutdf.Depth < maxd) & (mutdf[mode] == sub) & (mutdf.PredFreq > 1e-6)].PredFreq.apply( get_binom, samples=samples).apply(np.around).value_counts() afs = [sfvc[i] if i in sfvc.index else 0 for i in range(0, samples + 1)] return dadi.Spectrum(afs)
def dadi_to_fsc_sfs(sfs_files, dadi_out_path, fsc_out_path, sample_size=20): ## get data sfs files and make them into one joint sfs and save msprime_joint_sfs = dadi.Spectrum([[0] * (sample_size + 1)] * (sample_size + 1)) for fid in sfs_files: msprime_joint_sfs += dadi.Spectrum.from_file(fid) msprime_joint_sfs.to_file(dadi_out_path) ## convert dadi 2D sfs to FSC 2D sfs ## NB: FSC joint format file names look like this: <prefix>_jointMAFpop1_0.obs ## Where the first pop specified is listed in the rows and the second pop ## specified is listed in the columns. with open(fsc_out_path, 'w') as outfile: outfile.write("1 observation\n") ## Format column headers (i.e. d0_0 d0_1 d0_2 .. d0_n for deme 0 up to sample size of n) outfile.write( "\t" + "\t".join(["d0_" + str(x) for x in range(sample_size + 1)]) + "\n") ## Format row headers row_headers = ["d1_" + str(x) for x in range(sample_size + 1)] with open(dadi_out_path) as infile: ## Get the second line of the dadi-style sfs which contains the data row_data = infile.readlines()[1].split() row_size = sample_size + 1 ## Slice the row data into evenly sized chunks based on the number of columns rows = [ row_data[i:i + row_size] for i in range(0, len(row_data), row_size) ] ## Write out each row to the file for i, row_head in enumerate(row_headers): outfile.write(row_head + "\t" + " ".join(rows[i]) + "\n")
dadi_ll_msmc_model = dadi.Inference.ll_multinom(model, fs ) optimalthetaFromDadi = dadi.Inference.optimal_sfs_scaling(model, fs) # header='\t'.join(str(x) for x in ("dadiLL","AnnabelLL","NancTheta","dadiOptimalTheta")) output='\t'.join(str(x) for x in (dadi_ll_msmc_model,multinom_LL_AB,scalingTheta,optimalthetaFromDadi)) outputFile.write(('{0}\n{1}\n').format(header,output)) outputFile.close() ########## plot an image: ############ #import pylab import matplotlib.pyplot as plt #fig=plt.figure(1) #pylab.ion() outputFigure=str(str(outdir)+"/"+str(modelName)+".expSFS.DadiScaling.figure.png") dadi.Plotting.plot_1d_comp_multinom(model, fs) pyplot.title((modelName)) plt.savefig(outputFigure) ######### this is currently a crappy way to do this; improve if going deeper down this path ######### ############## pulling from grid search, this is the best-fit sfs (relative to theta =1 ) from dadi with T = 35 for AL ########## modelName="bestFitDadiModel.T35.fromGridSearch" model = dadi.Spectrum([0, 0.78349086, 0.51350478 ,0.38226656, 0.30918362 ,0.26505468, 0.23705724 ,0.21899399, 0.20767940, 0.20143653, 0.09971952,0,0,0,0,0,0,0,0,0,0]).fold() # this is from R in my grid search for CA ; is w/in 1 pt of MLE with T = 35 gen model_freq_fold = model/sum(model) outputFile=open(str(outdir)+"/"+str(modelName)+".LLs.andOptimalTheta.txt","w") multinom_LL_AB= LhoodCalc(model_freq_fold,fs,ns/2) dadi_ll_msmc_model = dadi.Inference.ll_multinom(model, fs ) optimalthetaFromDadi = dadi.Inference.optimal_sfs_scaling(model, fs) # header='\t'.join(str(x) for x in ("dadiLL","AnnabelLL","NancTheta","dadiOptimalTheta")) output='\t'.join(str(x) for x in (dadi_ll_msmc_model,multinom_LL_AB,scalingTheta,optimalthetaFromDadi)) outputFile.write(('{0}\n{1}\n').format(header,output)) outputFile.close()
import pickle import pylab ############################################################################# print "Load SFS data" f1 = open("Lyrata_SFS.txt", "r") raw1 = f1.readlines() f1.close() sampleSize1 = int(raw1[1]) sfs_NS1 = numpy.array(raw1[5][:-1].split(" "), dtype='float64').tolist() lensfs = len(sfs_NS1) sfs_NS1 = [0.] + sfs_NS1 + numpy.zeros(shape=(sampleSize1-lensfs,)).tolist() sfs_NS1_noMask = dadi.Spectrum(sfs_NS1, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize1-lensfs)) sfs_S1 = numpy.array(raw1[3][:-1].split(" "), dtype='float64').tolist() lensfs = len(sfs_S1) sfs_S1 = [0.] + sfs_S1 + numpy.zeros(shape=(sampleSize1-lensfs,)).tolist() sfs_S1_noMask = dadi.Spectrum(sfs_S1, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize1-lensfs)) f2 = open("Thaliana_SFS.txt", "r") raw2 = f2.readlines() f2.close() sampleSize2 = int(raw2[1]) sfs_NS2 = numpy.array(raw2[5][:-1].split(" "), dtype='float64').tolist() lensfs = len(sfs_NS2) sfs_NS2 = [0.] + sfs_NS2 + numpy.zeros(shape=(sampleSize2-lensfs,)).tolist() sfs_NS2_noMask = dadi.Spectrum(sfs_NS2, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize2-lensfs)) # Mask singletons!!