def summary(cvsfile, write_func, *args): """Show the column names. """ fsnames = cvsfile.fieldnames data_dict = {} for f in fsnames: data_dict[f] = [] # print "\n".join(map( lambda x:":".join(map(str,x)) ,enumerate(fsnames)) ) for l in cvsfile: for f in fsnames: v = l.setdefault(f, None) if v and v != "NA": data_dict[f].append(v) write_func("colnum:colname\tsum,mean,median,std,cutoff\n") for (i, f) in enumerate(fsnames): try: v_array = map(float, data_dict[f]) v_sum = "%.2f" % sum(v_array) v_mean = "%.2f" % mean(v_array) v_median = "%.2f" % median(v_array) v_std = "%.2f" % std(v_array, float(v_mean)) v_cutoff = "%.2f" % (float(v_mean) + float(v_std)) except ValueError: (v_sum, v_mean, v_median, v_std, v_cutoff) = ["NA"] * 5 write_func("%d:%s\t%s,%s,%s,%s,%s\n" % (i, f, v_sum, v_mean, v_median, v_std, v_cutoff))
def summary(cvsfile, write_func, *args): """Show the column names. """ fsnames = cvsfile.fieldnames data_dict = {} for f in fsnames: data_dict[f] = [] #print "\n".join(map( lambda x:":".join(map(str,x)) ,enumerate(fsnames)) ) for l in cvsfile: for f in fsnames: v = l.setdefault(f, None) if v and v != "NA": data_dict[f].append(v) write_func("colnum:colname\tsum,mean,median,std,cutoff\n") for (i, f) in enumerate(fsnames): try: v_array = map(float, data_dict[f]) v_sum = "%.2f" % sum(v_array) v_mean = "%.2f" % mean(v_array) v_median = "%.2f" % median(v_array) v_std = "%.2f" % std(v_array, float(v_mean)) v_cutoff = "%.2f" % (float(v_mean) + float(v_std)) except ValueError: (v_sum, v_mean, v_median, v_std, v_cutoff) = ["NA"] * 5 write_func("%d:%s\t%s,%s,%s,%s,%s\n" % (i, f, v_sum, v_mean, v_median, v_std, v_cutoff))
# Time-stamp: <2011-01-31 17:17:00 Tao Liu> # simple normalization script for a bedGraph file from wig2bedGraphBins.py script import re import sys from taolib.CoreLib.BasicStat.Func import mean,std if len(sys.argv) < 2: sys.stderr.write("normalize bg file from wig2bedGraphBins.py, output zscores.\nneed 1 para: %s <bedGraph with fixed bins>\n" % sys.argv[0]) sys.exit(1) fhd = open(sys.argv[1]) d = [] v = [] for i in fhd: fs = i.rstrip().split() d.append( ["\t".join(fs[:3]),fs[3]] ) if fs[3] != "NA": v.append(float(fs[3])) m = mean(v) s = std(v,n_mean=m) for (i,j) in d: if j!="NA": print "%s\t%.6f" % (i,(float(j)-m)/s) else: print i+"\t"+j
# simple normalization script for a bedGraph file from wig2bedGraphBins.py script import re import sys from taolib.CoreLib.BasicStat.Func import mean, std if len(sys.argv) < 2: sys.stderr.write( "normalize bg file from wig2bedGraphBins.py, output zscores.\nneed 1 para: %s <bedGraph with fixed bins>\n" % sys.argv[0]) sys.exit(1) fhd = open(sys.argv[1]) d = [] v = [] for i in fhd: fs = i.rstrip().split() d.append(["\t".join(fs[:3]), fs[3]]) if fs[3] != "NA": v.append(float(fs[3])) m = mean(v) s = std(v, n_mean=m) for (i, j) in d: if j != "NA": print "%s\t%.6f" % (i, (float(j) - m) / s) else: print i + "\t" + j