elif int(headers[i]) <= 63: cols['control'].append(i) else: sys.stderr.write("Don't understand header {} at column {}\n".format(headers[i], i)) else: data[p[0]] = map(int, p[1:]) allcontigs = data.keys() allcontigs.sort() # calculate the mean and stdev for each group and each contig means = {} std = {} for contig in allcontigs: means[contig] = {} std[contig] = {} for sample in cols: testdata = [data[contig][i] for i in cols[sample]] means[contig][sample] = roblib.mean(testdata) std[contig][sample] = 2 * roblib.stdev(testdata) # test the NS vs Control if means[contig]['plasma'] - std[contig]['plasma'] > means[contig]['control'] + std[contig]['control'] and \ means[contig]['buffy'] - std[contig]['buffy'] > means[contig]['control'] + std[contig]['control'] and \ means[contig]['csf'] - std[contig]['csf'] > means[contig]['control'] + std[contig]['control']: print("\t".join(map(str, ["ALL", contig, means[contig]['plasma'], means[contig]['buffy'], means[contig]['csf'], means[contig]['control']])))
points = map(float, p[hcols:]) nz = filter(notzero, points) psum = sum(points) total += psum data[p[0]] = [len(nz), psum] # now calculate the mean and stdev based on the beta distribtion xvalues = set() betad = {} for p in data: if data[p][0] not in betad: sys.stderr.write("alpha:" + str(data[p][0]) + " beta: " + str((ncols-data[p][0])+1)+ "\n") # samples = np.random.beta(data[p][0]+1, ncols-data[p][0], 1000) samples = np.random.beta(data[p][1]+1, total-data[p][1], 100000) betad[data[p][0]] = (roblib.mean(samples), roblib.stdev(samples)) seen = set() for p in data: x = str(1.0 * data[p][0]/ncols) sys.stdout.write(p + "\t" + str(x) + "\t" + str(1.0 * data[p][1]/total)) if x not in seen: sys.stdout.write( "\t" + str(betad[data[p][0]][0]) + "\t" + str(betad[data[p][0]][1])) seen.add(x) sys.stdout.write("\n")
sys.stderr.write("Can't add to position {}\n".format( pu.reference_pos)) # here we trim the coverage array to make the math easier! start = 1 end = args.l + 1 if args.e: end = args.e + 1 coverage = coverage[0:end] if args.s: start = args.s coverage = coverage[start:] # calculate the average over coverage av = mean(coverage) st = stdev(coverage) k = 0 for i, j in enumerate(coverage): k += (j - av)**4 k = k / (len(coverage) * (st**4)) k -= 3 if args.r: if args.c: print("Filename\tReference\tAverage\tStDev\tKurtosis") print(f"{args.f}\t{args.r}\t{av}\t{st}\t{k}") else: if args.c:
sys.stderr.write( "Don't understand header {} at column {}\n".format( headers[i], i)) else: data[p[0]] = map(int, p[1:]) allcontigs = data.keys() allcontigs.sort() # calculate the mean and stdev for each group and each contig means = {} std = {} for contig in allcontigs: means[contig] = {} std[contig] = {} for sample in cols: testdata = [data[contig][i] for i in cols[sample]] means[contig][sample] = roblib.mean(testdata) std[contig][sample] = 2 * roblib.stdev(testdata) # test the NS vs Control if means[contig]['plasma'] - std[contig]['plasma'] > means[contig]['control'] + std[contig]['control'] and \ means[contig]['buffy'] - std[contig]['buffy'] > means[contig]['control'] + std[contig]['control'] and \ means[contig]['csf'] - std[contig]['csf'] > means[contig]['control'] + std[contig]['control']: print("\t".join( map(str, [ "ALL", contig, means[contig]['plasma'], means[contig]['buffy'], means[contig]['csf'], means[contig]['control'] ])))
p = l.strip().split("\t") if len(p) > ncols: ncols = len(p) points = map(float, p[hcols:]) nz = filter(notzero, points) psum = sum(points) total += psum data[p[0]] = [len(nz), psum] # now calculate the mean and stdev based on the beta distribtion xvalues = set() betad = {} for p in data: if data[p][0] not in betad: sys.stderr.write("alpha:" + str(data[p][0]) + " beta: " + str((ncols - data[p][0]) + 1) + "\n") # samples = np.random.beta(data[p][0]+1, ncols-data[p][0], 1000) samples = np.random.beta(data[p][1] + 1, total - data[p][1], 100000) betad[data[p][0]] = (roblib.mean(samples), roblib.stdev(samples)) seen = set() for p in data: x = str(1.0 * data[p][0] / ncols) sys.stdout.write(p + "\t" + str(x) + "\t" + str(1.0 * data[p][1] / total)) if x not in seen: sys.stdout.write("\t" + str(betad[data[p][0]][0]) + "\t" + str(betad[data[p][0]][1])) seen.add(x) sys.stdout.write("\n")