def plot_boxplot_r(self, filename=None, n_highest_weights=1, n_highest_weights_for_quantity=1, weight_threshold=None, logy=False): from rpy import r logstring = '' if logy: logstring='y' if filename is not None: r.pdf(file=filename) for var, values in self.values_from_mr.iteritems(): plot_one_boxplot_r(values, var, logstring) if values.ndim == 1: v = resize(values, (1, values.size)) else: v = values ivar = self.get_index_for_quantity(var) if weight_threshold is not None: for i in range(0, v.shape[0]): iw = self.get_index_of_component_weights_over_threshold(ivar, weight_threshold) if iw.size > 0: r.points(i+1, v[i:(i+iw.size),iw], col='yellow', cex=0.5) iw = self.get_index_of_weights_over_threshold(weight_threshold) if iw.size > 0: r.points(i+1, v[i:(i+iw.size),iw], col='blue', cex=0.5) if n_highest_weights_for_quantity > 0: for i in range(0, v.shape[0]): r.points(i+1, v[i,self.get_index_of_n_highest_component_weights(ivar, n_highest_weights_for_quantity)], col='green', cex=0.5) if n_highest_weights > 0: for i in range(0, v.shape[0]): r.points(i+1, v[i,self.get_index_of_n_highest_weights(n_highest_weights)], col='red', cex=0.5) if filename is not None: r.dev_off()
def main(): in_fname = sys.argv[1] out_fname = sys.argv[2] try: columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1 except: stop_err( "Columns not specified, your query does not contain a column of numerical data." ) title = sys.argv[5] xlab = sys.argv[6] ylab = sys.argv[7] matrix = [] skipped_lines = 0 first_invalid_line = 0 invalid_value = '' invalid_column = 0 i = 0 for i, line in enumerate( file( in_fname ) ): valid = True line = line.rstrip( '\r\n' ) if line and not line.startswith( '#' ): row = [] fields = line.split( "\t" ) for column in columns: try: val = fields[column] if val.lower() == "na": row.append( float( "nan" ) ) else: row.append( float( fields[column] ) ) except: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 try: invalid_value = fields[column] except: invalid_value = '' invalid_column = column + 1 break else: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 if valid: matrix.append( row ) if skipped_lines < i: try: r.pdf( out_fname, 8, 8 ) r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 ) r.dev_off() except Exception, exc: stop_err( "%s" % str( exc ) )
def plots(regression_o, getData_o): """Plots the dataset with a regression line and a boxplot using R.""" fname1 = 'car_regress.pdf' r.pdf(fname1) r.plot(getData_o, ylab='dist', xlab='speed') r.abline(regression_o['(Intercept)'], regression_o['y'], col='red') r.dev_off() fname2 = 'car_hist.pdf' r.pdf(fname2) r.boxplot(getData_o, names=['dist', 'speed']) r.dev_off() return fname1, fname2
def output(self): self.matrix = array(self.result_array) p_value_list = map(str, self.p_value_list) self.of.write('p_value\t%s\n'%'\t'.join(p_value_list)) df = self.df_lower for cor_list in self.result_array: cor_list = map(str, cor_list) #string can be 'join'ed. easy to output self.of.write('%d\t%s\n'%(df, '\t'.join(cor_list))) df += 1 r.pdf('p_value_cor.pdf') #select a column to plot cor_list = self.matrix[:,self.column] p_value_label = self.p_value_list[self.column] df_list = range(self.df_lower, self.df_upper+1) r.plot(df_list, cor_list, type='o', pch='*', xlab='df', ylab='correlation', main='p_value: %s'%p_value_label) r.dev_off()
def plot(self): vertex_list = self.graph.node_list() number_of_nodes = len(vertex_list) for vertex in vertex_list: degree = self.graph.inc_degree(vertex) + self.graph.out_degree(vertex) if degree not in self.degree_dict: self.degree_dict[degree] = 1 else: self.degree_dict[degree] += 1 r.pdf('degree_distribution.pdf') x_list = [] y_list = [] for degree in self.degree_dict: x_list.append(r.log(degree)) y_list.append(r.log(float(self.degree_dict[degree])/number_of_nodes)) r.plot(x_list, y_list, type='p', xlab='log(k)', ylab='log(p(k))') r.dev_off()
def main(args): sourcefiles = get_src_files(args.get('in')) hashofhos = None parsedfiles = [] for i in range(len(sourcefiles) - 1): queryfile = sourcefiles[i] subjectfile = sourcefiles[i + 1] if hashofhos: idfile = 'keepids.tmp' fw = open(idfile, 'w') for id in hashofhos.keys(): fw.write(id + '\n') fw.flush() fw.close() outfile = 'red_' + get_basename(queryfile) + '.aa' os.system('reduce_fasta_file.py -f %s -i %s -o %s' % (queryfile, idfile, outfile)) queryfile = outfile blastout = blast(queryfile, subjectfile) parsedfile, hashofhos = parse_blastout(blastout, args) parsedfiles.append(parsedfile) infomsg("hits: %s" % len(hashofhos)) Homologs = integrate_all_homologs(parsedfiles, args) # stats no = [] for sid, orthlist in Homologs.iteritems(): n = len(orthlist) + 1 # infomsg( str(n) ) no.append(n) from rpy import r outfile = 'hist_size_homol_sets.pdf' title = 'Size of Homologous Sets' x = 'number of homologs' y = 'frequency' r.pdf(outfile) r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no)) r.dev_off()
def plot_values_as_boxplot_r(values_dict, filename=None, logy=False): """Create a set of boxplots (using R), one plot per variable in values_dict (dictionary of varible name and values (1- or 2-D array)), one box per row. If filename is given, the plot goes into that file as pdf. If 'logy' is True, the y-axis is plotted on the log scale. """ from rpy import r logstring = '' if logy: logstring='y' if filename is not None: r.pdf(file=filename) for var, values in values_dict.iteritems(): plot_one_boxplot_r(values, var, logstring) if filename is not None: r.dev_off()
def main( args ): sourcefiles = get_src_files( args.get('in') ) hashofhos = None parsedfiles = [] for i in range( len(sourcefiles)-1 ): queryfile = sourcefiles[i] subjectfile = sourcefiles[i+1] if hashofhos: idfile = 'keepids.tmp' fw = open( idfile, 'w' ) for id in hashofhos.keys(): fw.write( id + '\n') fw.flush() fw.close() outfile = 'red_' + get_basename(queryfile) + '.aa' os.system( 'reduce_fasta_file.py -f %s -i %s -o %s' %(queryfile,idfile,outfile) ) queryfile = outfile blastout = blast( queryfile, subjectfile ) parsedfile, hashofhos = parse_blastout( blastout, args ) parsedfiles.append( parsedfile ) infomsg( "hits: %s" %len(hashofhos) ) Homologs = integrate_all_homologs( parsedfiles, args ) # stats no = [] for sid, orthlist in Homologs.iteritems(): n = len(orthlist) + 1 # infomsg( str(n) ) no.append(n) from rpy import r outfile = 'hist_size_homol_sets.pdf' title = 'Size of Homologous Sets' x = 'number of homologs' y = 'frequency' r.pdf( outfile ) r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no)) r.dev_off()
def __init__(self, bedfile, dir, win, ma, out, ymin=0, ymax=0): self.L = 17 # number of letters per line Bed.__init__(self, bedfile) self.dir = dir self.win = win + ma self.ma = ma # window for moving average try: from rpy import r r.pdf(out + '.ConservationPlot.pdf') self.Run() if not ymin: ymin = self.mscore[self.mscore > 0].min() - 0.05 if not ymax: ymax = self.mscore.max() + 0.05 r.plot(range(-1*win, win+1), self.mscore, type = 'l', xlab = \ 'Distance from the Center of Enriched Regions', \ ylab = 'Conservation Score', lwd= 3, ylim = (ymin, ymax)) r.dev_off() except: print >> sys.stderr, 'error import r using rpy, will not generate phastCons plot' print sys.exc_info()[0], sys.exc_info()[1]
def plot(self): r.pdf("per_p_value05.pdf") r.plot(self.dataset_no, self.per_05, type='o', pch='*', xlab='dataset no.',\ ylab='percentage', main='p_value: 0.05. #edges compared with correlation cut_off 0.6') r.dev_off() r.pdf("per_p_value025.pdf") r.plot(self.dataset_no, self.per_025, type='o', pch='*', xlab='dataset no.',\ ylab='percentage', main='p_value: 0.025. #edges compared with correlation cut_off 0.6') r.dev_off() r.pdf("per_p_value01.pdf") r.plot(self.dataset_no, self.per_01, type='o', pch='*', xlab='dataset no.',\ ylab='percentage', main='p_value: 0.01. #edges compared with correlation cut_off 0.6') r.dev_off()
def plot(self, filename, list_to_plot, main_lab, xlab): max_length = max(list_to_plot) r.pdf(filename) r.hist(list_to_plot, breaks=range(max_length + 1), las=1, main=main_lab, xlab=xlab) r.dev_off()
# Make the CanonicalProperties try: cp = CanonicalProperties(options.muninn_log_file, options.which) except CanonicalException, e: print parser.error(e) # Store all the plotting data data = [] # Print which is used print "Using:", cp.fullname # Plot the required output r.pdf(options.output, width=options.width, height=options.height) r.par(cex=options.cex) inv_beta = arange(options.inv_beta_min, options.inv_beta_max, 0.01) beta = 1.0/inv_beta lnZ = vectorize(cp.lnZ)(beta) r.plot(inv_beta, lnZ, type='l', xlab=r("expression(beta**-1)"), ylab=r("""expression(paste("ln ", Z(beta)))""")) data.append((cp.number, "lnZ", (inv_beta, lnZ))) betaF = vectorize(cp.betaF)(beta) r.plot(inv_beta, betaF, type='l', xlab=r("expression(beta**-1)"), ylab=r("expression(F(beta) * beta)")) data.append((cp.number, "betaF", (inv_beta, betaF))) S = vectorize(cp.S)(beta) r.plot(inv_beta, S, type='l', xlab=r("expression(beta**-1)"), ylab=r("expression(S(beta) / k[B])"))
def plot(outfile, data, out_format='png'): w = int(round(len(data)/4.0)) if out_format == 'png': r.png(outfile, width=w*100, height=1000, res=72) elif out_format == 'pdf': r.pdf(outfile, width=w, height=10) else: raise Exception('Unrecognised format: ' + str(out_format)) print("total: " + str(len(data))) series = [] points = {'translate': [], 'preprocessing': []} for dat in data: points['translate'].append(float(dat['translate'])) points['preprocessing'].append(float(dat['preprocessing'])) xlabels = [] for k, v in data[0].iteritems(): if k not in ["problem", 'translate', 'preprocessing']: series.append(k) points[k] = [] index = 0 for dat in data: for k in series: if dat[k] != 'no-plan': points[k].append(float(dat[k]) + \ points['translate'][index] + \ points['preprocessing'][index]) else: points[k].append(-1000) xlabels.append(dat['problem']) index += 1 max_value = max(iter([max(iter(points[k])) for k in series])) yrange = (0, max_value) legend_labels = [] x = [i for i in range(1,len(points['translate'])+1)] y = [-1000 for i in x] r.par(mar=(7,5,4,2)) r.plot(x, y, main='', xlab="", ylab='', xaxt='n', yaxt='n', pch=0, ylim=yrange, mgp=(5,1,0)) r.mtext("Problem", side=1, line=5) r.mtext("CPU Time (s)", side=2, line=3) pch_start = 1 pch_index = pch_start # plotting "translate" #r.plot(x, points['translate'], main='', # xlab='', ylab='Time (s)', # xaxt='n', yaxt='n', # pch=0, ylim=yrange) #legend_labels.append('translate') r.lines(x, points['translate'], lty=1) # preprocessing -- Removed since it's insignificant #r.points(x, points['preprocessing'], pch=pch_index) #pch_index =+ 1 # planner output for k in series: if k != 'translate' and k != 'preporcessing': r.points(x, points[k], pch=pch_index) pch_index += 1 legend_labels.append("FD+" + k.upper()) # put x-axis labels for i in range(0, len(xlabels)): r.axis(side=1, at=i+1, labels=xlabels[i], las=2) # put y-axis labels base, step = get_y_step(max_value) print("base: " + str(base) + " -- step: " + str(step)) y = base for i in range(0, step): r.axis(side=2, at=y, labels=str(y), las=2) y += base # legend r.legend(1, max_value, legend_labels, pch=[i for i in range(pch_start, pch_index)]) r.dev_off()
type='float', help="ymin in the plot") parser.add_option("--ymax", dest="ymax", default=0, type='float', help="ymax in the plot") (options, args) = parser.parse_args() if len(sys.argv) < 2: parser.print_help() sys.exit() if options.make: MakeBinary(sys.argv[1]) sys.exit() for bed in args: out = bed.split('.')[0] phastCons(bed, options.dir, options.range, options.win, out, options.ymin, options.ymax) ''' from rpy import r self = phastCons('bedfile', '/misc/iris/acct/weili/database/humanhg17_May2004/phastCons', 3000, 250) r.pdf('ConservationPlot.pdf') r.plot(range(-3000, 3001), self.mscore, type = 'l', xlab = 'Distance from Center of Binding Sites', ylab = 'Conservation Score', lwd= 4, xlim = (-2000, 2000), ylim = (0.10, 0.25)) r.dev_off() '''
def plot(outfile, data, out_format='png'): w = int(round(len(data) / 4.0)) if out_format == 'png': r.png(outfile, width=w * 100, height=1000, res=72) elif out_format == 'pdf': r.pdf(outfile, width=w, height=10) else: raise Exception('Unrecognised format: ' + str(out_format)) print("total: " + str(len(data))) series = [] points = {'translate': [], 'preprocessing': []} for dat in data: points['translate'].append(float(dat['translate'])) points['preprocessing'].append(float(dat['preprocessing'])) xlabels = [] for k, v in data[0].iteritems(): if k not in ["problem", 'translate', 'preprocessing']: series.append(k) points[k] = [] index = 0 for dat in data: for k in series: if dat[k] != 'no-plan': points[k].append(float(dat[k]) + \ points['translate'][index] + \ points['preprocessing'][index]) else: points[k].append(-1000) xlabels.append(dat['problem']) index += 1 max_value = max(iter([max(iter(points[k])) for k in series])) yrange = (0, max_value) legend_labels = [] x = [i for i in range(1, len(points['translate']) + 1)] y = [-1000 for i in x] r.par(mar=(7, 5, 4, 2)) r.plot(x, y, main='', xlab="", ylab='', xaxt='n', yaxt='n', pch=0, ylim=yrange, mgp=(5, 1, 0)) r.mtext("Problem", side=1, line=5) r.mtext("CPU Time (s)", side=2, line=3) pch_start = 1 pch_index = pch_start # plotting "translate" #r.plot(x, points['translate'], main='', # xlab='', ylab='Time (s)', # xaxt='n', yaxt='n', # pch=0, ylim=yrange) #legend_labels.append('translate') r.lines(x, points['translate'], lty=1) # preprocessing -- Removed since it's insignificant #r.points(x, points['preprocessing'], pch=pch_index) #pch_index =+ 1 # planner output for k in series: if k != 'translate' and k != 'preporcessing': r.points(x, points[k], pch=pch_index) pch_index += 1 legend_labels.append("FD+" + k.upper()) # put x-axis labels for i in range(0, len(xlabels)): r.axis(side=1, at=i + 1, labels=xlabels[i], las=2) # put y-axis labels base, step = get_y_step(max_value) print("base: " + str(base) + " -- step: " + str(step)) y = base for i in range(0, step): r.axis(side=2, at=y, labels=str(y), las=2) y += base # legend r.legend(1, max_value, legend_labels, pch=[i for i in range(pch_start, pch_index)]) r.dev_off()