def make_output_and(cov, control_cov, out_prefix, window): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-window/2,window/2+1): print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(2*range(-window/2,window/2+1)) cov_r = ro.FloatVector(cov+control_cov) labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov)) df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels}) # construct plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('Position relative to splice site') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def make_output_and(cov, control_cov, out_prefix, window): # dump raw counts to file raw_out = open("%s_raw.txt" % out_prefix, "w") for i in range(-window / 2, window / 2 + 1): print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1)) cov_r = ro.FloatVector(cov + control_cov) labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov)) df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels}) # construct plot gp = ( ggplot2.ggplot(df) + ggplot2.aes_string(x="splice_i", y="cov", colour="label") + ggplot2.geom_point() + ggplot2.scale_x_continuous("Position relative to splice site") + ggplot2.scale_y_continuous("Coverage") + ggplot2.scale_colour_discrete("") ) # plot to file grdevices.pdf(file="%s.pdf" % out_prefix) gp.plot() grdevices.dev_off()
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream): # clean raw counts dir if os.path.isdir('%s_raw' % out_prefix): shutil.rmtree('%s_raw' % out_prefix) os.mkdir('%s_raw' % out_prefix) # dump raw counts to file for te in te_tss_cov: if te[0] in [ 'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7' ] and te[1] in [ 'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR', 'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie', 'LTR/ERVK', 'DNA/TcMar-Tigger' ]: raw_out = open( '%s_raw/%s_%s.txt' % (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')), 'w') for i in range(-upstream, downstream + 1): print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][ upstream + i], control_te_tss_cov[te][upstream + i]) raw_out.close() # clean plot dirs if os.path.isdir('%s_plot' % out_prefix): shutil.rmtree('%s_plot' % out_prefix) os.mkdir('%s_plot' % out_prefix) # make data structures tss_i = ro.IntVector(2 * range(-upstream, downstream + 1)) labels = ro.StrVector(['Main'] * (upstream + downstream + 1) + ['Control'] * (upstream + downstream + 1)) for te in te_tss_cov: if te[0] in [ 'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7' ] and te[1] in [ 'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR', 'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie', 'LTR/ERVK', 'DNA/TcMar-Tigger' ]: cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te]) df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf( file='%s_plot/%s_%s.pdf' % (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_'))) gp.plot() grdevices.dev_off()
def main(): usage = 'usage: %prog [options] <raw file>' parser = OptionParser(usage) parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]') parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]') parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]') parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]') (options,args) = parser.parse_args() if len(args) != 1: parser.error('Must provide raw file') else: raw_file = args[0] # collect data coords = [] main_cov = [] control_cov = [] for line in open(raw_file): a = line.split() coords.append(int(a[0])) main_cov.append(float(a[1])) control_cov.append(float(a[2])) # data structures tss_i = ro.IntVector(range(-options.upstream,options.downstream+1)) labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1)) cov = ro.FloatVector(main_cov + control_cov) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels}) # plot ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_colour_discrete('') ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \ ggplot2.scale_x_continuous('TSS Position') + \ ggplot2.scale_colour_discrete('') + \ ggplot2.theme_bw() if options.ymax == None: gp += ggplot2.scale_y_continuous('Coverage') else: gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax])) # save to file grdevices.pdf(file='%s_and.pdf' % options.out_prefix) gp.plot() grdevices.dev_off()
def make_output_and(tss_cov, control_tss_cov, out_prefix, upstream, downstream): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-upstream,downstream+1): print >> raw_out, '%d\t%e\t%e' % (i, tss_cov[upstream+i], control_tss_cov[upstream+i]) raw_out.close() # make plot data structures tss_i = ro.IntVector(2*range(-upstream,downstream+1)) cov = ro.FloatVector(tss_cov+control_tss_cov) labels = ro.StrVector(['Main']*len(tss_cov)+['Control']*len(control_tss_cov)) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s_full.pdf' % out_prefix) gp.plot() grdevices.dev_off() # construct zoomed plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s_zoom.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream): # clean raw counts dir if os.path.isdir('%s_raw' % out_prefix): shutil.rmtree('%s_raw' % out_prefix) os.mkdir('%s_raw' % out_prefix) # dump raw counts to file for te in te_tss_cov: if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: raw_out = open('%s_raw/%s_%s.txt' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')),'w') for i in range(-upstream,downstream+1): print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][upstream+i], control_te_tss_cov[te][upstream+i]) raw_out.close() # clean plot dirs if os.path.isdir('%s_plot' % out_prefix): shutil.rmtree('%s_plot' % out_prefix) os.mkdir('%s_plot' % out_prefix) # make data structures tss_i = ro.IntVector(2*range(-upstream,downstream+1)) labels = ro.StrVector(['Main']*(upstream+downstream+1)+['Control']*(upstream+downstream+1)) for te in te_tss_cov: if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te]) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s_plot/%s_%s.pdf' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_'))) gp.plot() grdevices.dev_off()
def main(): usage = 'usage: %prog [options] <raw file>' parser = OptionParser(usage) parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]') parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]') parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]') parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]') (options, args) = parser.parse_args() if len(args) != 1: parser.error('Must provide raw file') else: raw_file = args[0] # collect data coords = [] main_cov = [] control_cov = [] for line in open(raw_file): a = line.split() coords.append(int(a[0])) main_cov.append(float(a[1])) control_cov.append(float(a[2])) # data structures tss_i = ro.IntVector(range(-options.upstream, options.downstream + 1)) labels = ro.StrVector(['Main'] * (options.upstream + options.downstream + 1) + ['Control'] * (options.upstream + options.downstream + 1)) cov = ro.FloatVector(main_cov + control_cov) df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels}) # plot ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_colour_discrete('') ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \ ggplot2.scale_x_continuous('TSS Position') + \ ggplot2.scale_colour_discrete('') + \ ggplot2.theme_bw() if options.ymax == None: gp += ggplot2.scale_y_continuous('Coverage') else: gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector( [0, options.ymax])) # save to file grdevices.pdf(file='%s_and.pdf' % options.out_prefix) gp.plot() grdevices.dev_off()
samples = pd.concat((samplebs, samplebg)) # re-index to avoid duplicate row.names in Rdf samples.index = npy.arange(len(samples)) samplesgrouped = samples.groupby(['model']) variances = samplesgrouped['Zweighted'].aggregate(npy.var) print variances print variances['BG'] / variances['BS'] print estimatesum(samples) print samplesgrouped['Zweighted'].aggregate(estimatesum) print trueZnsum # grdevices.png(file="sampled-Z.png", width=4, height=3, units="in", res=300) rsamples = com.convert_to_r_dataframe(samples) pp = ggplot2.ggplot(rsamples) + \ ggplot2.aes_string(x='Z', color='factor(model)') + \ ggplot2.scale_colour_discrete(name="model") + \ ggplot2.geom_density() + \ ggplot2.scale_x_log10() # ggplot2.scale_x_continuous(limits=FloatVector((0, 1))) pp.plot() # grdevices.dev_off() def makeestimate(sampler, numsamples, **kwargs): samples = sample(sampler, numsamples, **kwargs) return estimatesum(samples['Zweighted']) def makeestimates(sampler, numsamples, numestimates, **kwargs): estimates = [ makeestimate(sampler, numsamples, **kwargs)
logging.debug('True sum: %s', trueZnsum) emdf = pd.DataFrame({ 'BSdists' : distsbs, 'BGdists' : distsbg, 'truesums' : truesums, 'varratios' : varratios, }) # Plot sampled Z logging.info('Plotting sampled Zn') grdevices.png(file="sampled-Z.png", width=4, height=3, units="in", res=300) rsamples = com.convert_to_r_dataframe(samples) pp = ggplot2.ggplot(rsamples) + \ ggplot2.aes_string(x='Z', color='factor(model)') + \ ggplot2.scale_colour_discrete(name="model") + \ ggplot2.geom_density() + \ ggplot2.scale_x_log10() # ggplot2.scale_x_continuous(limits=FloatVector((0, 1))) pp.plot() grdevices.dev_off() # Plot likelihood ratios logging.info('Plotting likelihood ratios from binding site samples') grdevices.png(file="sampled-ratios.png", width=4, height=3, units="in", res=300) rsamplesbs = com.convert_to_r_dataframe(samples[samples['model'] == 'BS']) pp = ggplot2.ggplot(rsamplesbs) + \ ggplot2.aes_string(x='ir') + \ ggplot2.geom_density() + \ ggplot2.scale_x_log10()