Beispiel #1
0
 def plot_boxplot_r(self, filename=None, n_highest_weights=1, n_highest_weights_for_quantity=1, weight_threshold=None, logy=False):
     from rpy import r
     logstring = ''
     if logy:
         logstring='y'
         
     if filename is not None:
         r.pdf(file=filename)
 
     for var, values in self.values_from_mr.iteritems():
         plot_one_boxplot_r(values, var, logstring)
         if values.ndim == 1:
             v = resize(values, (1, values.size))
         else:
             v = values
         ivar = self.get_index_for_quantity(var)
         if weight_threshold is not None:
             for i in range(0, v.shape[0]):
                 iw = self.get_index_of_component_weights_over_threshold(ivar, weight_threshold)
                 if iw.size > 0:
                     r.points(i+1, v[i:(i+iw.size),iw], col='yellow', cex=0.5)
                 iw = self.get_index_of_weights_over_threshold(weight_threshold)
                 if iw.size > 0:
                     r.points(i+1, v[i:(i+iw.size),iw], col='blue', cex=0.5)
         if n_highest_weights_for_quantity > 0:
             for i in range(0, v.shape[0]):
                 r.points(i+1, v[i,self.get_index_of_n_highest_component_weights(ivar, n_highest_weights_for_quantity)], col='green', cex=0.5)
         if n_highest_weights > 0:
             for i in range(0, v.shape[0]):
                 r.points(i+1, v[i,self.get_index_of_n_highest_weights(n_highest_weights)], col='red', cex=0.5)
                 
     if filename is not None:
         r.dev_off()
Beispiel #2
0
def main():

    in_fname = sys.argv[1]
    out_fname = sys.argv[2]
    try:
        columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
    except:
        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
    title = sys.argv[5]
    xlab = sys.argv[6]
    ylab = sys.argv[7]

    matrix = []
    skipped_lines = 0
    first_invalid_line = 0
    invalid_value = ''
    invalid_column = 0
    i = 0
    for i, line in enumerate( file( in_fname ) ):
        valid = True
        line = line.rstrip( '\r\n' )
        if line and not line.startswith( '#' ):
            row = []
            fields = line.split( "\t" )
            for column in columns:
                try:
                    val = fields[column]
                    if val.lower() == "na":
                        row.append( float( "nan" ) )
                    else:
                        row.append( float( fields[column] ) )
                except:
                    valid = False
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i + 1
                        try:
                            invalid_value = fields[column]
                        except:
                            invalid_value = ''
                        invalid_column = column + 1
                    break
        else:
            valid = False
            skipped_lines += 1
            if not first_invalid_line:
                first_invalid_line = i + 1

        if valid:
            matrix.append( row )

    if skipped_lines < i:
        try:
            r.pdf( out_fname, 8, 8 )
            r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
            r.dev_off()
        except Exception, exc:
            stop_err( "%s" % str( exc ) )
Beispiel #3
0
def plots(regression_o, getData_o):
    """Plots the dataset with a regression line and a boxplot using R."""
    fname1 = 'car_regress.pdf'
    r.pdf(fname1)
    r.plot(getData_o, ylab='dist', xlab='speed')
    r.abline(regression_o['(Intercept)'], regression_o['y'], col='red')
    r.dev_off()

    fname2 = 'car_hist.pdf'
    r.pdf(fname2)
    r.boxplot(getData_o, names=['dist', 'speed'])
    r.dev_off()

    return fname1, fname2
Beispiel #4
0
	def output(self):
		self.matrix = array(self.result_array)
		p_value_list = map(str, self.p_value_list)
		self.of.write('p_value\t%s\n'%'\t'.join(p_value_list))
		df = self.df_lower
		for cor_list in self.result_array:
			cor_list = map(str, cor_list)	#string can be 'join'ed. easy to output
			self.of.write('%d\t%s\n'%(df, '\t'.join(cor_list)))
			df += 1
		r.pdf('p_value_cor.pdf')
		#select a column to plot
		cor_list = self.matrix[:,self.column]
		p_value_label = self.p_value_list[self.column]
		df_list = range(self.df_lower, self.df_upper+1)
		r.plot(df_list, cor_list, type='o', pch='*', xlab='df', ylab='correlation', main='p_value: %s'%p_value_label)
		r.dev_off()
Beispiel #5
0
	def plot(self):
		vertex_list = self.graph.node_list()
		number_of_nodes = len(vertex_list)
		for vertex in vertex_list:
			degree = self.graph.inc_degree(vertex) + self.graph.out_degree(vertex)
			if degree not in self.degree_dict:
				self.degree_dict[degree] = 1
			else:
				self.degree_dict[degree] += 1
		r.pdf('degree_distribution.pdf')
		x_list = []
		y_list = []
		for degree in self.degree_dict:
			x_list.append(r.log(degree))
			y_list.append(r.log(float(self.degree_dict[degree])/number_of_nodes))
		r.plot(x_list, y_list, type='p', xlab='log(k)', ylab='log(p(k))')
		r.dev_off()
def main(args):

    sourcefiles = get_src_files(args.get('in'))
    hashofhos = None
    parsedfiles = []
    for i in range(len(sourcefiles) - 1):

        queryfile = sourcefiles[i]
        subjectfile = sourcefiles[i + 1]

        if hashofhos:
            idfile = 'keepids.tmp'
            fw = open(idfile, 'w')
            for id in hashofhos.keys():
                fw.write(id + '\n')
            fw.flush()
            fw.close()
            outfile = 'red_' + get_basename(queryfile) + '.aa'
            os.system('reduce_fasta_file.py -f %s -i %s -o %s' %
                      (queryfile, idfile, outfile))
            queryfile = outfile

        blastout = blast(queryfile, subjectfile)
        parsedfile, hashofhos = parse_blastout(blastout, args)
        parsedfiles.append(parsedfile)
        infomsg("hits: %s" % len(hashofhos))

    Homologs = integrate_all_homologs(parsedfiles, args)

    # stats
    no = []
    for sid, orthlist in Homologs.iteritems():
        n = len(orthlist) + 1
        #		infomsg( str(n) )
        no.append(n)

    from rpy import r
    outfile = 'hist_size_homol_sets.pdf'
    title = 'Size of Homologous Sets'
    x = 'number of homologs'
    y = 'frequency'
    r.pdf(outfile)
    r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no))
    r.dev_off()
Beispiel #7
0
def plot_values_as_boxplot_r(values_dict, filename=None, logy=False):
    """Create a set of boxplots (using R), one plot per variable in values_dict (dictionary of 
    varible name and values (1- or 2-D array)), one box per row.
    If filename is given, the plot goes into that file as pdf. If 'logy' is  True, the y-axis
    is plotted on the log scale.
    """
    from rpy import r
    logstring = ''
    if logy:
        logstring='y'
        
    if filename is not None:
        r.pdf(file=filename)

    for var, values in values_dict.iteritems():
        plot_one_boxplot_r(values, var, logstring)

    if filename is not None:
        r.dev_off()
def main( args ):
	
	sourcefiles = get_src_files( args.get('in') )
	hashofhos = None
	parsedfiles = []
	for i in range( len(sourcefiles)-1 ):
		
		queryfile = sourcefiles[i]
		subjectfile = sourcefiles[i+1]
		
		if hashofhos:
			idfile = 'keepids.tmp'
			fw = open( idfile, 'w' )
			for id in hashofhos.keys():	fw.write( id + '\n')			
			fw.flush()
			fw.close()
			outfile = 'red_' + get_basename(queryfile) + '.aa'
			os.system( 'reduce_fasta_file.py -f %s -i %s -o %s' %(queryfile,idfile,outfile) )
			queryfile = outfile
		
		blastout = blast( queryfile, subjectfile )
		parsedfile, hashofhos = parse_blastout( blastout, args )
		parsedfiles.append( parsedfile )
		infomsg( "hits: %s" %len(hashofhos) )
		
	Homologs = integrate_all_homologs( parsedfiles, args )
	
	# stats
	no = []
	for sid, orthlist in Homologs.iteritems():
		n = len(orthlist) + 1
#		infomsg( str(n) )
		no.append(n)
	
	from rpy import r
	outfile = 'hist_size_homol_sets.pdf'
	title = 'Size of Homologous Sets'
	x = 'number of homologs'
	y = 'frequency'
	r.pdf( outfile )
	r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no))
	r.dev_off()
Beispiel #9
0
    def __init__(self, bedfile, dir, win, ma, out, ymin=0, ymax=0):
        self.L = 17  # number of letters per line
        Bed.__init__(self, bedfile)
        self.dir = dir
        self.win = win + ma
        self.ma = ma  # window for moving average

        try:
            from rpy import r
            r.pdf(out + '.ConservationPlot.pdf')
            self.Run()
            if not ymin:
                ymin = self.mscore[self.mscore > 0].min() - 0.05
            if not ymax:
                ymax = self.mscore.max() + 0.05

            r.plot(range(-1*win, win+1), self.mscore, type = 'l', xlab = \
                   'Distance from the Center of Enriched Regions', \
                   ylab = 'Conservation Score', lwd= 3,  ylim = (ymin, ymax))
            r.dev_off()
        except:
            print >> sys.stderr, 'error import r using rpy, will not generate phastCons plot'
            print sys.exc_info()[0], sys.exc_info()[1]
Beispiel #10
0
	def plot(self):
		r.pdf("per_p_value05.pdf")
		r.plot(self.dataset_no, self.per_05, type='o', pch='*', xlab='dataset no.',\
			ylab='percentage', main='p_value: 0.05. #edges compared with correlation cut_off 0.6')
		r.dev_off()
		r.pdf("per_p_value025.pdf")
		r.plot(self.dataset_no, self.per_025, type='o', pch='*', xlab='dataset no.',\
			ylab='percentage', main='p_value: 0.025. #edges compared with correlation cut_off 0.6')
		r.dev_off()
		r.pdf("per_p_value01.pdf")
		r.plot(self.dataset_no, self.per_01, type='o', pch='*', xlab='dataset no.',\
			ylab='percentage', main='p_value: 0.01. #edges compared with correlation cut_off 0.6')
		r.dev_off()
Beispiel #11
0
 def plot(self, filename, list_to_plot, main_lab, xlab):
     max_length = max(list_to_plot)
     r.pdf(filename)
     r.hist(list_to_plot, breaks=range(max_length + 1), las=1, main=main_lab, xlab=xlab)
     r.dev_off()
Beispiel #12
0
    # Make the CanonicalProperties
    try:
        cp = CanonicalProperties(options.muninn_log_file, options.which)
    except CanonicalException, e:
        print parser.error(e)

    # Store all the plotting data
    data = []

    # Print which is used
    print "Using:", cp.fullname


    # Plot the required output
    r.pdf(options.output, width=options.width, height=options.height)      
    r.par(cex=options.cex)

    inv_beta = arange(options.inv_beta_min, options.inv_beta_max, 0.01)
    beta = 1.0/inv_beta

    lnZ = vectorize(cp.lnZ)(beta)
    r.plot(inv_beta, lnZ, type='l', xlab=r("expression(beta**-1)"), ylab=r("""expression(paste("ln ", Z(beta)))"""))
    data.append((cp.number, "lnZ", (inv_beta, lnZ)))

    betaF = vectorize(cp.betaF)(beta)
    r.plot(inv_beta, betaF, type='l', xlab=r("expression(beta**-1)"), ylab=r("expression(F(beta) * beta)"))
    data.append((cp.number, "betaF", (inv_beta, betaF)))

    S = vectorize(cp.S)(beta)
    r.plot(inv_beta, S, type='l', xlab=r("expression(beta**-1)"), ylab=r("expression(S(beta) / k[B])"))
Beispiel #13
0
def plot(outfile, data, out_format='png'):
    w = int(round(len(data)/4.0))

    if out_format == 'png':
        r.png(outfile, width=w*100, height=1000, res=72)
    elif out_format == 'pdf':
        r.pdf(outfile, width=w, height=10)
    else:
        raise Exception('Unrecognised format: ' + str(out_format))

    print("total: " + str(len(data)))

    series = []
    points = {'translate': [], 'preprocessing': []}

    for dat in data:
        points['translate'].append(float(dat['translate']))
        points['preprocessing'].append(float(dat['preprocessing']))

    xlabels = []
    for k, v in data[0].iteritems():
        if k not in ["problem", 'translate', 'preprocessing']:
            series.append(k)
            points[k] = []

    index = 0
    for dat in data:
        for k in series:
            if dat[k] != 'no-plan':
                points[k].append(float(dat[k]) + \
                                 points['translate'][index] + \
                                 points['preprocessing'][index])
            else:
                points[k].append(-1000)
        xlabels.append(dat['problem'])
        index += 1

    max_value = max(iter([max(iter(points[k]))  for k in series]))
    yrange = (0, max_value)
    legend_labels = []

    x = [i for i in range(1,len(points['translate'])+1)]
    y = [-1000 for i in x]
    r.par(mar=(7,5,4,2))
    r.plot(x, y, main='', xlab="", ylab='',
           xaxt='n', yaxt='n', pch=0, ylim=yrange,
           mgp=(5,1,0))
    r.mtext("Problem", side=1, line=5)
    r.mtext("CPU Time (s)", side=2, line=3)

    pch_start = 1
    pch_index = pch_start
    # plotting "translate"
    #r.plot(x, points['translate'], main='',
    #       xlab='', ylab='Time (s)',
    #       xaxt='n', yaxt='n',
    #       pch=0, ylim=yrange)
    #legend_labels.append('translate')
    r.lines(x, points['translate'], lty=1)
    
    # preprocessing -- Removed since it's insignificant
    #r.points(x, points['preprocessing'], pch=pch_index)
    #pch_index =+ 1

    # planner output
    for k in series:
        if k != 'translate' and k != 'preporcessing':
            r.points(x, points[k], pch=pch_index)
            pch_index += 1
            legend_labels.append("FD+" + k.upper())

    # put x-axis labels
    for i in range(0, len(xlabels)):
        r.axis(side=1, at=i+1, labels=xlabels[i], las=2)

    # put y-axis labels
    base, step = get_y_step(max_value)
    print("base: " + str(base) + " -- step: " + str(step))
    y = base
    for i in range(0, step):
        r.axis(side=2, at=y, labels=str(y), las=2)
        y += base

    # legend
    r.legend(1, max_value, legend_labels, pch=[i for i in range(pch_start, pch_index)])

    r.dev_off()
Beispiel #14
0
                      type='float',
                      help="ymin in the plot")

    parser.add_option("--ymax",
                      dest="ymax",
                      default=0,
                      type='float',
                      help="ymax in the plot")

    (options, args) = parser.parse_args()

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit()

    if options.make:
        MakeBinary(sys.argv[1])
        sys.exit()

    for bed in args:
        out = bed.split('.')[0]
        phastCons(bed, options.dir, options.range, options.win, out,
                  options.ymin, options.ymax)
'''
from rpy import r
self = phastCons('bedfile', '/misc/iris/acct/weili/database/humanhg17_May2004/phastCons', 3000, 250)
r.pdf('ConservationPlot.pdf')
r.plot(range(-3000, 3001), self.mscore, type = 'l', xlab = 'Distance from Center of Binding Sites', ylab = 'Conservation Score', lwd= 4, xlim = (-2000, 2000), ylim = (0.10, 0.25))
r.dev_off()
'''
Beispiel #15
0
def plot(outfile, data, out_format='png'):
    w = int(round(len(data) / 4.0))

    if out_format == 'png':
        r.png(outfile, width=w * 100, height=1000, res=72)
    elif out_format == 'pdf':
        r.pdf(outfile, width=w, height=10)
    else:
        raise Exception('Unrecognised format: ' + str(out_format))

    print("total: " + str(len(data)))

    series = []
    points = {'translate': [], 'preprocessing': []}

    for dat in data:
        points['translate'].append(float(dat['translate']))
        points['preprocessing'].append(float(dat['preprocessing']))

    xlabels = []
    for k, v in data[0].iteritems():
        if k not in ["problem", 'translate', 'preprocessing']:
            series.append(k)
            points[k] = []

    index = 0
    for dat in data:
        for k in series:
            if dat[k] != 'no-plan':
                points[k].append(float(dat[k]) + \
                                 points['translate'][index] + \
                                 points['preprocessing'][index])
            else:
                points[k].append(-1000)
        xlabels.append(dat['problem'])
        index += 1

    max_value = max(iter([max(iter(points[k])) for k in series]))
    yrange = (0, max_value)
    legend_labels = []

    x = [i for i in range(1, len(points['translate']) + 1)]
    y = [-1000 for i in x]
    r.par(mar=(7, 5, 4, 2))
    r.plot(x,
           y,
           main='',
           xlab="",
           ylab='',
           xaxt='n',
           yaxt='n',
           pch=0,
           ylim=yrange,
           mgp=(5, 1, 0))
    r.mtext("Problem", side=1, line=5)
    r.mtext("CPU Time (s)", side=2, line=3)

    pch_start = 1
    pch_index = pch_start
    # plotting "translate"
    #r.plot(x, points['translate'], main='',
    #       xlab='', ylab='Time (s)',
    #       xaxt='n', yaxt='n',
    #       pch=0, ylim=yrange)
    #legend_labels.append('translate')
    r.lines(x, points['translate'], lty=1)

    # preprocessing -- Removed since it's insignificant
    #r.points(x, points['preprocessing'], pch=pch_index)
    #pch_index =+ 1

    # planner output
    for k in series:
        if k != 'translate' and k != 'preporcessing':
            r.points(x, points[k], pch=pch_index)
            pch_index += 1
            legend_labels.append("FD+" + k.upper())

    # put x-axis labels
    for i in range(0, len(xlabels)):
        r.axis(side=1, at=i + 1, labels=xlabels[i], las=2)

    # put y-axis labels
    base, step = get_y_step(max_value)
    print("base: " + str(base) + " -- step: " + str(step))
    y = base
    for i in range(0, step):
        r.axis(side=2, at=y, labels=str(y), las=2)
        y += base

    # legend
    r.legend(1,
             max_value,
             legend_labels,
             pch=[i for i in range(pch_start, pch_index)])

    r.dev_off()