Example #1
0
def summary(cvsfile, write_func, *args):
    """Show the column names.
    """
    fsnames = cvsfile.fieldnames
    data_dict = {}
    for f in fsnames:
        data_dict[f] = []
    # print "\n".join(map( lambda x:":".join(map(str,x)) ,enumerate(fsnames))  )
    for l in cvsfile:
        for f in fsnames:
            v = l.setdefault(f, None)
            if v and v != "NA":
                data_dict[f].append(v)
    write_func("colnum:colname\tsum,mean,median,std,cutoff\n")
    for (i, f) in enumerate(fsnames):
        try:
            v_array = map(float, data_dict[f])
            v_sum = "%.2f" % sum(v_array)
            v_mean = "%.2f" % mean(v_array)
            v_median = "%.2f" % median(v_array)
            v_std = "%.2f" % std(v_array, float(v_mean))
            v_cutoff = "%.2f" % (float(v_mean) + float(v_std))
        except ValueError:
            (v_sum, v_mean, v_median, v_std, v_cutoff) = ["NA"] * 5
        write_func("%d:%s\t%s,%s,%s,%s,%s\n" % (i, f, v_sum, v_mean, v_median, v_std, v_cutoff))
Example #2
0
def summary(cvsfile, write_func, *args):
    """Show the column names.
    """
    fsnames = cvsfile.fieldnames
    data_dict = {}
    for f in fsnames:
        data_dict[f] = []
    #print "\n".join(map( lambda x:":".join(map(str,x)) ,enumerate(fsnames))  )
    for l in cvsfile:
        for f in fsnames:
            v = l.setdefault(f, None)
            if v and v != "NA":
                data_dict[f].append(v)
    write_func("colnum:colname\tsum,mean,median,std,cutoff\n")
    for (i, f) in enumerate(fsnames):
        try:
            v_array = map(float, data_dict[f])
            v_sum = "%.2f" % sum(v_array)
            v_mean = "%.2f" % mean(v_array)
            v_median = "%.2f" % median(v_array)
            v_std = "%.2f" % std(v_array, float(v_mean))
            v_cutoff = "%.2f" % (float(v_mean) + float(v_std))
        except ValueError:
            (v_sum, v_mean, v_median, v_std, v_cutoff) = ["NA"] * 5
        write_func("%d:%s\t%s,%s,%s,%s,%s\n" %
                   (i, f, v_sum, v_mean, v_median, v_std, v_cutoff))
Example #3
0
# Time-stamp: <2011-01-31 17:17:00 Tao Liu>

# simple normalization script for a bedGraph file from wig2bedGraphBins.py script

import re
import sys
from taolib.CoreLib.BasicStat.Func import mean,std

if len(sys.argv) < 2:
    sys.stderr.write("normalize bg file from wig2bedGraphBins.py, output zscores.\nneed 1 para: %s <bedGraph with fixed bins>\n" % sys.argv[0])
    sys.exit(1)

fhd = open(sys.argv[1])

d = []
v = []
for i in fhd:
    fs = i.rstrip().split()
    d.append( ["\t".join(fs[:3]),fs[3]] )
    if fs[3] != "NA":
        v.append(float(fs[3]))

m = mean(v)
s = std(v,n_mean=m)

for (i,j) in d:
    if j!="NA":
        print "%s\t%.6f" % (i,(float(j)-m)/s)
    else:
        print i+"\t"+j
Example #4
0
# simple normalization script for a bedGraph file from wig2bedGraphBins.py script

import re
import sys
from taolib.CoreLib.BasicStat.Func import mean, std

if len(sys.argv) < 2:
    sys.stderr.write(
        "normalize bg file from wig2bedGraphBins.py, output zscores.\nneed 1 para: %s <bedGraph with fixed bins>\n"
        % sys.argv[0])
    sys.exit(1)

fhd = open(sys.argv[1])

d = []
v = []
for i in fhd:
    fs = i.rstrip().split()
    d.append(["\t".join(fs[:3]), fs[3]])
    if fs[3] != "NA":
        v.append(float(fs[3]))

m = mean(v)
s = std(v, n_mean=m)

for (i, j) in d:
    if j != "NA":
        print "%s\t%.6f" % (i, (float(j) - m) / s)
    else:
        print i + "\t" + j