def getSFSfromSLiM(inp): polymorphisms = [] fixations = 0 if len(inp) == 0: return None, None for l in inp.split('\n'): if len(l) == 0: continue if l[0] == 'm' or l[0] == 'g': continue x = l.split(' ') if int(x[7]) > int(x[6]): if int(x[6]) > 10000: # Assuming Ne = 1000, burn-in is 10,000 fixations += 1 elif int(x[7]) < int(x[6]): polymorphisms.append(int(x[7])) return fixations, SFS.SFS_from_all_frequencies(polymorphisms, 20)
def orgPolyDict(organ_mutations, N): sfsDict = {} for i in organ_mutations.keys(): orgDict = {} orgDictRaw = {} for q in organ_mutations[i]: if q[1] not in orgDictRaw.keys(): orgDictRaw[q[1]] = [int(q[7])] else: orgDictRaw[q[1]].append(int(q[7])) for o in orgDictRaw.keys(): orgDict[o] = sfs_tools.SFS_from_all_frequencies(orgDictRaw[o], N) # print orgDict[o] sfsDict[i] = orgDict return sfsDict
def get_sfs_dict_from_sample(slim_input): data = [i.strip() for i in gzip.open(slim_input).readlines()] x = ts.slim(data, fixed=True, give_genomes=True) if not x.sanity: return [None, None] # print x.name genomes = x.genome_dict() mutations = x.mutations_dict() lengthDict = parseLengths(x.organ_lengths()) individuals = [random.choice(genomes.keys()) for i in range(20)] # print individuals # if x.name == '/exports/csce/eddie/biology/groups/eddie_biology_ieb_keightley/toms_simulations/updated_DFE/longRuns/full_usfs/configs/3381.temp.slim': # individuals = ['p1:1398', 'p1:1646', 'p1:297', 'p1:165', 'p1:999', 'p1:1451', 'p1:982', 'p1:973', 'p1:615', 'p1:832', 'p1:12', 'p1:1109', 'p1:1137', 'p1:496', 'p1:164', 'p1:412', 'p1:1687', 'p1:1373', 'p1:72', 'p1:39'] muts_by_organ = x.organ_mutations() new_muts = Counter() for g in individuals: for m in genomes[g]: new_muts[m] += 1 polyDict = {} for h in muts_by_organ.keys(): mTypeDict = {} for m in muts_by_organ[h]: if new_muts[m[0]] == 0: continue if m[1] not in mTypeDict.keys(): mTypeDict[m[1]] = [new_muts[m[0]]] else: mTypeDict[m[1]].append(new_muts[m[0]]) # print h, mTypeDict mPoly = {} for k in mTypeDict.keys(): mPoly[k] = sfs_tools.SFS_from_all_frequencies(mTypeDict[k], 20) polyDict[h] = mPoly # print '!', x.name fixedDict = x.organ_fixed(threshold=int(x.N) * 20) fixD = orgFixDict(fixedDict) polyfix = combinePolyFix(polyDict, fixD) elDict = combineElements(polyfix, lengthDict) print 'processed ' + x.name return [x.name, elDict]
def get_sfs_dict_from_sample(slim_input): x = ts.slim(slim_input, fixed=True, give_genomes=True) if not x.sanity: return [None, None] genomes = x.genome_dict() mutations = x.mutations_dict() lengthDict = parseLengths(x.organ_lengths()) individuals = [random.choice(genomes.keys()) for i in range(20)] muts_by_organ = x.organ_mutations() new_muts = Counter() for g in individuals: for m in genomes[g]: new_muts[m] += 1 polyDict = {} for h in muts_by_organ.keys(): mTypeDict = {} for m in muts_by_organ[h]: if new_muts[m[0]] == 0: continue if m[1] not in mTypeDict.keys(): mTypeDict[m[1]] = [new_muts[m[0]]] else: mTypeDict[m[1]].append(new_muts[m[0]]) # print h, mTypeDict mPoly = {} for k in mTypeDict.keys(): mPoly[k] = sfs_tools.SFS_from_all_frequencies(mTypeDict[k], 20) polyDict[h] = mPoly thresh = x.N * 10 fixedDict = x.organ_fixed(threshold=int(x.N) * 10) fixD = orgFixDict(fixedDict) polyfix = combinePolyFix(polyDict, fixD) elDict = combineElements(polyfix, lengthDict) print 'processed ' + x.name return [x.name, elDict]
def sfsFromFreq(chunk, minCoverage=0): sfs_all = [] sfs_ncpg = [] rat_div_all = 0 fam_div_all = 0 rat_div_ncpg = 0 fam_div_ncpg = 0 for i in chunk: freq = i.strip().split() if int(freq[2]) <= minCoverage: continue if freq[3] == '.': pass ## Site is not a variant, so no need to look at HWE elif freq[3] != '.': if float(freq[3]) < 0.0002: continue ## Site is a variant, so need to check for HWE cast_alleles = freq[5].split(',') if cast_alleles[0] == '.': continue cast_alleles = map(int, cast_alleles) fam_alleles = freq[7].split(',') if fam_alleles[0] == '.': continue fam_alleles = map(int, fam_alleles) rat_alleles = freq[9].split(',') if rat_alleles[0] == '.': continue rat_alleles = map(int, rat_alleles) alleleFreq = getAlleleFreq(cast_alleles) sfs_all.append(alleleFreq) cast_cpg = freq[4] fam_cpg = freq[6] rat_cpg = freq[8] rat_div_temp = divergent(cast_alleles, rat_alleles, out_alleles=1) fam_div_temp = divergent(cast_alleles, fam_alleles, out_alleles=2) if rat_div_temp: rat_div_all += rat_div_temp if fam_div_temp: fam_div_all += fam_div_temp if '1' not in [cast_cpg, fam_cpg, rat_cpg]: sfs_ncpg.append(alleleFreq) rat_div_temp2 = divergent(cast_alleles, rat_alleles, out_alleles=1) fam_div_temp2 = divergent(cast_alleles, fam_alleles, out_alleles=2) if rat_div_temp2: rat_div_ncpg += rat_div_temp2 if fam_div_temp2: fam_div_ncpg += fam_div_temp2 divList = [fam_div_all, rat_div_all, fam_div_ncpg, rat_div_ncpg] allSitesSFS = site_frequency_spectrum.SFS_from_all_frequencies(sfs_all, 20) ncpgSitesSFS = site_frequency_spectrum.SFS_from_all_frequencies( sfs_ncpg, 20) return allSitesSFS, ncpgSitesSFS, divList