def summariseChunk(chunk, ncpg=False): all_SFS_list = [map(int, i.split(':')) for i in list(chunk[4])] ncpg_SFS_list = [map(int, i.split(':')) for i in list(chunk[5])] div_list = [map(int, i.split(':')) for i in list(chunk[6])] all_SFS = all_SFS_list[0] ncpg_SFS = ncpg_SFS_list[0] div = div_list[0] for i in range(len(all_SFS_list)): if i == 0: continue all_SFS = SFS_tools.merge_SFS(all_SFS, all_SFS_list[i]) ncpg_SFS = SFS_tools.merge_SFS(ncpg_SFS, ncpg_SFS_list[i]) div = SFS_tools.merge_SFS(div, div_list[i]) # print div # print div[2:] , 'ncpg' # print div[:2] , 'all' if ncpg: return ncpg_SFS, div[2:] else: return all_SFS, div[:2]
def get_sfs_dicts(file_name): x = open(file_name, "r") temp_sfs_dict = {} out_sfs_dict = {} out_sfs_dict["name"] = file_name for i in x: y = i.strip("\n").split("[") region = y[1].strip(", ").strip("''") region_sfs = map( int, y[2].strip("]]").split(",") ) ## This doozy gives you the processed sfs from the file of SFS values if region not in temp_sfs_dict.keys(): temp_sfs_dict[region] = [region_sfs] elif region in temp_sfs_dict.keys(): temp_sfs_dict[region].append(region_sfs) for key in temp_sfs_dict.keys(): sfs_list = temp_sfs_dict[key] if len(sfs_list) == 1: out_sfs_dict[key] = sfs_list elif len(sfs_list) > 1: grand_sfs = sfs_list[0] for temp in sfs_list[1:]: temp2 = merge_SFS(grand_sfs, temp) grand_sfs = temp2 out_sfs_dict[key] = grand_sfs return out_sfs_dict
def combineElements(polyfix, lengths): elDict = {} for i in polyfix.keys(): sfs = [] try: for j in polyfix[i].keys(): if i == 'g1' and j == 'm12': ## for the case of exons elDict['syn'] = polyfix[i][j] continue if len(sfs) == 0: sfs = polyfix[i][j] else: sfs = sfs_tools.merge_SFS(sfs, polyfix[i][j]) except KeyError: continue elDict[i] = sfs for i in elDict.keys(): # print i zero = lengths[i] - sum(elDict[i]) try: elDict[i][0] = zero except IndexError: return elDict return elDict
def mergeTheSFS(SFS): if len(SFS) == 1: return SFS[0] else: sfs = SFS[0] for i in SFS[1:]: sfs = merge_SFS(sfs, i) return sfs
def mergeManySFS(spectra): count = 0 for i in spectra: if count == 0: sfs = i else: sfs = SFS.merge_SFS(sfs, i) count += 1 return sfs
def get_full_sfs(sfs_dict_raw): grand = {} for i in sfs_dict_raw.keys(): full_sfs = sfs_dict_raw[i][0] for s in sfs_dict_raw[i][1:]: if not s: continue full_sfs = SFS.merge_SFS(full_sfs, s) grand[i] = full_sfs return grand
def main(): parser = argparse.ArgumentParser( description="Extract the SFS from a binch of merged SLiM output files") parser.add_argument( "-i", "--input", required=True, dest="input", type=str, help="The name of the directory containing the SLiM output") parser.add_argument( "-o", "--output", required=True, dest="output", type=str, help="The name of the output file you want to write to") args = parser.parse_args() # output = [] for m in range(1, 9): print 'm' + str(m) full_sfs = [] for i in glob.glob(args.input + '/R*'): num = i.split('/')[-1].split('.')[2] process = subprocess.Popen(['zgrep', 'm' + str(m), i], stdout=subprocess.PIPE).communicate()[0] fixations, sfs = getSFSfromSLiM(process) if fixations == None: continue polymorphs = sum(sfs) sfs[-1] += fixations sfs[0] = 0 print SFS.pi(sfs) / 140000. print ':'.join(map(str, sfs)) continue if len(full_sfs) == 0: full_sfs = sfs else: full_sfs = SFS.merge_SFS(full_sfs, sfs) # output.append(['m'+str(m),full_sfs]) return txt = open(args.output, 'w') for i in output: print i txt.write(i[0] + '\n') txt.write(' '.join(map(str, i[1])) + '\n') txt.close()
def main(): parser = argparse.ArgumentParser( description= "Combine all the sfs files coming out of the sfs_from_slim_update_bootstrap.py script" ) parser.add_argument( "-i", "--input", required=True, dest="input", type=str, help="The name of the file that contains the sfs files") parser.add_argument("-o", "--output", required=True, dest="output", type=str, help="The name of the output file") args = parser.parse_args() sfs_dict = {} for i in gzip.open(args.input): z = i.split('[') region = z[1].strip("'").replace("'", '') sfs_temp = map( int, z[2].replace(']', '').replace(',', '').strip().split(' ')) try: sfs_dict[region].append(sfs_temp) except KeyError: sfs_dict[region] = [sfs_temp] data = [] for i in sfs_dict.keys(): sfs = sfs_dict[i][0] for j in sfs_dict[i][1:]: sfs = SFS_tools.merge_SFS(sfs, j) stream = i.split('.')[0] dist = map(int, i.replace(',', '').split('.')[1].split('-')) if stream == 'u': mult = -1 else: mult = 1 mid = mult * sum(dist) / 2 data.append([mid, SFS_tools.pi(sfs), SFS_tools.tajima(sfs)]) pd.DataFrame(data, columns=['dist', 'pi', 'TD']).to_csv(args.output)
def get_boot_sfs(sfs_dict_raw): boot = {} for i in sfs_dict_raw.keys(): sample = [ sfs_dict_raw[i][j] for j in bootstrap_sample(len(sfs_dict_raw[i])) ] boot_sfs = sample[0] for s in sample[1:]: boot_sfs = SFS.merge_SFS(boot_sfs, s) boot[i] = boot_sfs # print boot['u.101-111'] return boot
def main(): parser = argparse.ArgumentParser(description="Take the dict of SFS for different SLiM runs and make a composite SFS or bootstrapped SFS") parser.add_argument("-i","--input", required = True, dest = "input", type =str, help = "Give the name of the gzipped pickle file") parser.add_argument("-o","--output", required = True, dest = "output", type =str, help = "the name of the output") parser.add_argument("-b","--boots", required = False, action = 'store_true', help = "Do you want a bootstrapped SFS?", default = False) args = parser.parse_args() pickle_jar = pickle.load(gzip.open(args.input,"rb")) names = [p for p in pickle_jar.keys() if p] for i in names: print i if args.boots: boots = [names[random.randint(0,len(names)-1)] for i in range(len(names))] names = boots bigDict = {} for n in names: z = pickle_jar[n] for j in z.keys(): if not z[j]: continue if j not in bigDict.keys(): bigDict[j] = z[j] else: bigDict[j] =SFS_tools.merge_SFS(z[j],bigDict[j]) output = open(args.output,'w') for i in bigDict.keys(): sfs = map(str,bigDict[i]) output.write(i+'\n'+' '.join(sfs)+'\n') output.close()
def combineMany(SFSs): sfs = map(int, SFSs[0].split(':')) for s in SFSs[1:]: sfs = SFS_tools.merge_SFS(sfs, map(int, s.split(':'))) return sfs