コード例 #1
0
def funcion(dato,opciones):  
    from rpy import r
    diccionario={}
    if opciones.has_key("Cuantiles"):
        if opciones["Cuantiles"][u"DirecciónCola"]=='izquierda':
            sentido=True
        else:
            sentido=False
        diccionario["cuantiles"]=r.qnorm([float(opciones["Cuantiles"]["Probabilidad"])],mean=float(opciones["Cuantiles"]["Media"]),sd=float(opciones["Cuantiles"][u"Desviación"]),lower_tail=sentido)
    if opciones.has_key("Probabilidades"):
        if opciones["Probabilidades"][u"DirecciónCola"]=='izquierda':
            sentido=True
        else:
            sentido=False
        diccionario["probabilidades"]=r.pnorm([float(opciones["Probabilidades"]["Valores"])],mean=float(opciones["Probabilidades"]["Media"]),sd=float(opciones["Probabilidades"][u"Desviación"]),lower_tail=sentido)
    if opciones.has_key(u"Gráfica"):
        import random
        nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png"
        diccionario["ruta"]=nombrefichero
        r.png(nombrefichero) #Directorio temporal de la config
        lista=r.seq(-3.291, 3.291, length=100)
        if opciones[u"Gráfica"]["Tipografica"]=="Densidad":
            etiquetay="Densidad"
            mifuncion=r.dnorm
        else:
            etiquetay="Probabilidad acumulada"
            mifuncion=r.pnorm
        r.plot(lista, mifuncion(lista, mean=float(opciones[u"Gráfica"]["Media"]), sd=float(opciones[u"Gráfica"][u"Desviación"])), xlab="x", ylab=etiquetay, main=r.expression(r.paste("Normal Distribution: ", "mu", " = 0, ", "sigma", " = 1")), type="l")
        r.abline(h=0, col="gray")
        r.dev_off()
    return diccionario
コード例 #2
0
ファイル: GenePair.py プロジェクト: polyactis/annot
	def scatter_plot(self, gene_id_list, output_fname='/tmp/scatter_plot.ps'):
		"""
		02-15-06
			1st gene is regarded as X, all others genes are treated as Y
		"""
		vector_list = []
		#gene_id_list may contain some inexistent genes
		real_gene_id_list = []
		for gene_id in gene_id_list:
			if gene_id in self.gene_id2expr_array:
				real_gene_id_list.append(gene_id)
				vector_list.append(self.gene_id2expr_array[gene_id])	
			else:
				sys.stderr.write("%s doesn't appear in the dataset\n"%(gene_id))
		
		if len(real_gene_id_list)>0:
		
			r.postscript("%s"%output_fname)
			axis_range = self.get_min_max(vector_list)
			no_of_curves = 1	#counting starts from 1st gene itself.
			
			no_of_curves += 1
			r.plot(vector_list[0], vector_list[1], xlab='value of %s'%real_gene_id_list[0], xlim=axis_range, ylim=axis_range, \
				ylab='other genes values', col=no_of_curves)
			for i in range(2, len(vector_list)):
				no_of_curves += 1
				r.points(vector_list[0], vector_list[i], col=no_of_curves)
			r.legend(axis_range[1], axis_range[1], gene_id_list, col=range(1, no_of_curves+1), lty=1, xjust=1)
			r.dev_off()
			return output_fname
		else:
			return None
コード例 #3
0
 def plot_boxplot_r(self, filename=None, n_highest_weights=1, n_highest_weights_for_quantity=1, weight_threshold=None, logy=False):
     from rpy import r
     logstring = ''
     if logy:
         logstring='y'
         
     if filename is not None:
         r.pdf(file=filename)
 
     for var, values in self.values_from_mr.iteritems():
         plot_one_boxplot_r(values, var, logstring)
         if values.ndim == 1:
             v = resize(values, (1, values.size))
         else:
             v = values
         ivar = self.get_index_for_quantity(var)
         if weight_threshold is not None:
             for i in range(0, v.shape[0]):
                 iw = self.get_index_of_component_weights_over_threshold(ivar, weight_threshold)
                 if iw.size > 0:
                     r.points(i+1, v[i:(i+iw.size),iw], col='yellow', cex=0.5)
                 iw = self.get_index_of_weights_over_threshold(weight_threshold)
                 if iw.size > 0:
                     r.points(i+1, v[i:(i+iw.size),iw], col='blue', cex=0.5)
         if n_highest_weights_for_quantity > 0:
             for i in range(0, v.shape[0]):
                 r.points(i+1, v[i,self.get_index_of_n_highest_component_weights(ivar, n_highest_weights_for_quantity)], col='green', cex=0.5)
         if n_highest_weights > 0:
             for i in range(0, v.shape[0]):
                 r.points(i+1, v[i,self.get_index_of_n_highest_weights(n_highest_weights)], col='red', cex=0.5)
                 
     if filename is not None:
         r.dev_off()
コード例 #4
0
def main():

    in_fname = sys.argv[1]
    out_fname = sys.argv[2]
    try:
        columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1
    except:
        stop_err( "Columns not specified, your query does not contain a column of numerical data." )
    title = sys.argv[5]
    xlab = sys.argv[6]
    ylab = sys.argv[7]

    matrix = []
    skipped_lines = 0
    first_invalid_line = 0
    invalid_value = ''
    invalid_column = 0
    i = 0
    for i, line in enumerate( file( in_fname ) ):
        valid = True
        line = line.rstrip( '\r\n' )
        if line and not line.startswith( '#' ):
            row = []
            fields = line.split( "\t" )
            for column in columns:
                try:
                    val = fields[column]
                    if val.lower() == "na":
                        row.append( float( "nan" ) )
                    else:
                        row.append( float( fields[column] ) )
                except:
                    valid = False
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i + 1
                        try:
                            invalid_value = fields[column]
                        except:
                            invalid_value = ''
                        invalid_column = column + 1
                    break
        else:
            valid = False
            skipped_lines += 1
            if not first_invalid_line:
                first_invalid_line = i + 1

        if valid:
            matrix.append( row )

    if skipped_lines < i:
        try:
            r.pdf( out_fname, 8, 8 )
            r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 )
            r.dev_off()
        except Exception, exc:
            stop_err( "%s" % str( exc ) )
コード例 #5
0
ファイル: pf_mon.py プロジェクト: michalliu/icecream
def drawCpuUsage(d,xat,xlbs,pic):
	data=r.c(d)

	pch = 22         # point like like a square
	lty = 1          # line style solid line
	col = "blue"     # line color
	ltype = "o"      # line only http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/plot.html
	ylim = (0,100)   # y domain
	xaxis=1
	yaxis=2
	vertical_text=2
	horizontal_text=1
	text_size=0.8
	yat=[x for x in range(0,110,10)]
	ylbs=["%d%%"%x for x in yat]
	legend_x=1
	legend_y=100

	# for r.text method
	warn_val=40      # val over this value will display a text on point
	xtext=[idx for idx,val in enumerate(d) if val > warn_val]    # text x pos
	ytext=[val for idx,val in enumerate(d) if val > warn_val]    # text y pos
	labtext=["%d%%"%val for idx,val in enumerate(d) if val > warn_val] # label for text

	# summary
	dmax=max(d)
	sumry="max %.2f%%" % dmax
	sumrycol="red" if dmax>50 else "green"

	# plot
	r.png(pic, width=900,height=450*0.6)
	r.plot(data, type=ltype, col=col, ylim=ylim, pch=pch, lty=lty, axes=False, ann=False)

	# draw text that over 40
	# http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/text.html
	r.text(xtext, ytext, labels=labtext, pos=3, cex=0.8, col="red")

	# summary text
	r.mtext(sumry, side=3, cex=1, col=sumrycol)

	# axis
	r.axis(xaxis, las=vertical_text, at=xat, lab=r.c(xlbs))
	r.axis(yaxis, las=horizontal_text, at=yat, lab=r.c(ylbs))

	r.box()

	# titles
	r.title(main="CPU Sampling")
	r.title(xlab="Time")
	r.title(ylab="CPU Usage")

	# reference line
	# r.abline(h=50, col="gray") # at 50%

	# legend
	r.legend(legend_x, legend_y, r.c(("trunk")), col=col, cex=text_size, pch=pch, lty=lty)

	r.dev_off()
コード例 #6
0
	def plot(self, outputfname, fix_index_ls, parameter_list, var_index, variant_ls, parameter_index2label, y_axis_ls, y_label):
		outputfname = '%s_%s2%s.png'%(outputfname, y_label, parameter_index2label[var_index])
		sys.stderr.write('Plotting %s'%outputfname)
		r.png(outputfname)
		r.plot(variant_ls, y_axis_ls, main='%s vs %s (%s=%s, %s=%s)'%(y_label, parameter_index2label[var_index],\
			parameter_index2label[fix_index_ls[0]], parameter_list[fix_index_ls[0]], parameter_index2label[fix_index_ls[1]],\
			parameter_list[fix_index_ls[1]]), xlab=parameter_index2label[var_index], ylab=y_label)
		r.dev_off()
		sys.stderr.write('Done.\n')
コード例 #7
0
	def draw_tfbs_similarity_ls_histogram(self, tfbs_similarity_ls, output_fname):
		sys.stderr.write("Drawing histogram for tfbs_similarity_ls...")
		if len(tfbs_similarity_ls)>10:
			r.png('%s'%output_fname)
			r.hist(tfbs_similarity_ls, main='histogram',xlab='tfbs_similarity',ylab='freq')
			r.dev_off()
			sys.stderr.write("Done.\n")
		else:
			sys.stderr.write("too short: %s, aborted\n"%tfbs_similarity_ls)
コード例 #8
0
ファイル: gene_stat_plot.py プロジェクト: polyactis/annot
	def hist_plot(self, dict, filename, xlabel, ylabel):
		#convert self.go_no2cluster and self.go_no2gene into histograms
		r.png('%s'%filename)
		x_list = []
		y_list = []
		for (key, value) in dict.iteritems():
			x_list.append(key)
			y_list.append(len(value))
		r.plot(x_list, y_list, type='h', xlab=xlabel, ylab=ylabel, main='%s v.s. %s'%(ylabel, xlabel))
		r.dev_off()
コード例 #9
0
ファイル: GenePair.py プロジェクト: polyactis/annot
	def plot(self, vector_list, gene_id_list):
		self.no_of_curves = 0
		x_range = (1, len(vector_list[0]))
		y_range = self.get_min_max(vector_list)
		r.postscript("%s"%self.plot_file)
		for vector in vector_list:
			(x_list, y_list) = self.xy_list_return(vector)
			self._plot(x_list, y_list, x_range, y_range)
		
		r.legend(x_range[1], y_range[1], gene_id_list, col=range(1, self.no_of_curves+1), lty=1, pch='*', xjust=1)
		r.dev_off()
コード例 #10
0
ファイル: barras.py プロジェクト: nesaro/driza
def funcionprincipal(dato, variable, opciones): 
    from rpy import r #pylint: disable=import-error
    diccionario = {"Diagrama de Barras":{}}
    lista = dato.query(variable)
    import random
    nombrefichero = "/tmp/driza" + str(random.randint(1, 99999)) + ".png"
    diccionario["Diagrama de Barras"]["ruta"] = nombrefichero
    r.png(nombrefichero) #Directorio temporal de la config
    r.barplot(lista, main = variable, axisnames = True, axes = True)
    r.dev_off()
    return diccionario
コード例 #11
0
ファイル: compare_random.py プロジェクト: pombredanne/biopsy
def create_p_value_boxplot_eps(best_p_values, filename):
    from rpy import r
    r.postscript(filename, horizontal=False, height=4.5, width=6, pointsize=10)
    try:
        keys = best_p_values.keys()
        keys.sort()
        r.boxplot(map(best_p_values.get, keys),
                  names=map(str, keys),
                  xlab="sample size",
                  ylab="p-score")
    finally:
        r.dev_off()
コード例 #12
0
def funcionprincipal(dato,variable,opciones): 
    from rpy import r
    diccionario={}
    diccionario["Histograma"]={}
    lista = dato.query(variable)
    import random
    nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png"
    diccionario["Histograma"]["ruta"]=nombrefichero
    r.png(nombrefichero) #Directorio temporal de la config
    r.hist(lista,main=variable,xlab=variable, nclass=int(opciones[u"NúmeroIntervalos"]))
    r.dev_off()
    return diccionario
コード例 #13
0
ファイル: compare_random.py プロジェクト: JohnReid/biopsy
def create_p_value_boxplot_eps(best_p_values, filename):
    from rpy import r
    r.postscript(filename, horizontal=False, height=4.5, width=6, pointsize=10)
    try:
        keys = best_p_values.keys()
        keys.sort()
        r.boxplot(
            map(best_p_values.get, keys), 
            names=map(str, keys),
            xlab="sample size", ylab="p-score")
    finally:
        r.dev_off()
コード例 #14
0
ファイル: gene_stat_plot.py プロジェクト: polyactis/annot
	def hist_plot_ratio(self, dict1, dict2, filename, xlabel, ylabel):
		#convert self.go_no2cluster and self.go_no2gene into histograms
		r.png('%s'%filename)
		x_list = []
		y_list = []
		keys = Set(dict1.keys()).union( Set(dict2.keys()) )
		for key in keys:
			value1 = dict1.get(key, [])
			value2 = dict2.get(key, [])
			ratio = float(len(value1))/(len(value1)+len(value2))
			x_list.append(key)
			y_list.append(ratio)
		r.plot(x_list, y_list, type='h', xlab=xlabel, ylab=ylabel, main='%s v.s. %s'%(ylabel, xlabel))
		r.dev_off()
コード例 #15
0
ファイル: ex2.py プロジェクト: franapoli/pyleaf
def plots(regression_o, getData_o):
    """Plots the dataset with a regression line and a boxplot using R."""
    fname1 = 'car_regress.pdf'
    r.pdf(fname1)
    r.plot(getData_o, ylab='dist', xlab='speed')
    r.abline(regression_o['(Intercept)'], regression_o['y'], col='red')
    r.dev_off()

    fname2 = 'car_hist.pdf'
    r.pdf(fname2)
    r.boxplot(getData_o, names=['dist', 'speed'])
    r.dev_off()

    return fname1, fname2
コード例 #16
0
ファイル: pf_mon.py プロジェクト: michalliu/icecream
def test():
	data=r.c([1.25,3.45,6.75,20.2,9.9])
	# draw image using rpy
	r.png("test.png", width=300,height=300)

	r.plot(data, type="o", col="blue", ylim=(0,100), pch=22, lty=1, axes=False, ann=False)
	r.axis(1, at=(1,2,3,4,5), lab=r.c("a","b","c","d","e"))
	r.axis(2, las=1, at=(0,50,100))
	r.box()

	r.title(main="CPU usage sampling result")
	r.title(xlab="Time")
	r.title(ylab="CPU")

	r.legend(1,100,r.c("trunk"), cex=0.8, col=r.c("blue"), pch=22, lty=1)
	r.dev_off()
コード例 #17
0
ファイル: stat_plot.py プロジェクト: polyactis/annot
	def plot(self):
		#this function deals with 3 fixed parameters and 1 varying parameter
		self.curs.execute("select distinct %s, %s, %s, %s, tag from\
			stat_plot_data where %s=%s and %s=%s and %s=%s and tag='%s' order by %s \
			"%(self.option_num_dict[0].label, self.option_num_dict[1].label, self.option_num_dict[2].label,\
			self.option_num_dict[3].label, self.option_num_dict[0].label, self.option_num_dict[0].value, \
			self.option_num_dict[1].label, self.option_num_dict[1].value, self.option_num_dict[2].label, \
			self.option_num_dict[2].value, self.tag, self.option_num_dict[3].label))
		rows = self.curs.fetchall()
		r.png('%s'%self.ofname)
		for row in rows:
			#position 0,1,2 are fixed values, 3 is varying value, 4 is the tag value.
			self._plot(row)
		#add the legend
		r.legend(self.x_range[1], self.y_range[1], self.varying_list, col=range(1, self.no_of_curves+1), lty=1, pch='*', xjust=1)
		r.dev_off()
コード例 #18
0
ファイル: p_value_cor.py プロジェクト: polyactis/annot
	def output(self):
		self.matrix = array(self.result_array)
		p_value_list = map(str, self.p_value_list)
		self.of.write('p_value\t%s\n'%'\t'.join(p_value_list))
		df = self.df_lower
		for cor_list in self.result_array:
			cor_list = map(str, cor_list)	#string can be 'join'ed. easy to output
			self.of.write('%d\t%s\n'%(df, '\t'.join(cor_list)))
			df += 1
		r.pdf('p_value_cor.pdf')
		#select a column to plot
		cor_list = self.matrix[:,self.column]
		p_value_label = self.p_value_list[self.column]
		df_list = range(self.df_lower, self.df_upper+1)
		r.plot(df_list, cor_list, type='o', pch='*', xlab='df', ylab='correlation', main='p_value: %s'%p_value_label)
		r.dev_off()
コード例 #19
0
ファイル: scale_free.py プロジェクト: polyactis/annot
	def plot(self):
		vertex_list = self.graph.node_list()
		number_of_nodes = len(vertex_list)
		for vertex in vertex_list:
			degree = self.graph.inc_degree(vertex) + self.graph.out_degree(vertex)
			if degree not in self.degree_dict:
				self.degree_dict[degree] = 1
			else:
				self.degree_dict[degree] += 1
		r.pdf('degree_distribution.pdf')
		x_list = []
		y_list = []
		for degree in self.degree_dict:
			x_list.append(r.log(degree))
			y_list.append(r.log(float(self.degree_dict[degree])/number_of_nodes))
		r.plot(x_list, y_list, type='p', xlab='log(k)', ylab='log(p(k))')
		r.dev_off()
コード例 #20
0
ファイル: gibbs.py プロジェクト: lindberg-m/pymotif
    def __print_entropies(self, entropies):

        """
        This is for debugging purposes.
        """

        try:
            from rpy import r
        except:
            print "Could not import rpy module"
            return

        r.postscript(DEBUG_ENTROPIES_FILE)
        r.plot(entropies, type='b', xlab="Iterations", ylab="Entropy")
        r.dev_off()

        return
コード例 #21
0
ファイル: DrawHistStd.py プロジェクト: polyactis/annot
	def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids):
		"""
		08-09-05
			add type
		08-29-05
			add no_of_valids to cut genes with too few valid values
		"""
		reader = csv.reader(file(src_pathname), delimiter=delimiter)
		filename = os.path.basename(src_pathname)
		output_filename = os.path.join(outputdir, filename)
		std_list = []
		for row in reader:
			gene_id = row[0]
			new_row = []
			mask_ls = []
			for i in range(1, len(row)):
				if row[i] == 'NA':
					new_row.append(1e20)
					mask_ls.append(1)
				elif row[i] == '':
					#ignore empty entry
					continue
				else:
					value = float(row[i])
					if type==1:
						if value<=10:
							value = 10
						value = math.log(value)
					new_row.append(value)
					mask_ls.append(0)
			ma_array = array(new_row, mask=mask_ls)
			if self.debug:
				print "The data vector is ",ma_array
				print "Its mask is ", ma_array.mask()
			if len(ma_array.compressed())>=no_of_valids:	#at least two samples, otherwise, correlation can't be calculated
				#08-29-05	no_of_valids controls not too many NA's, which is for graph_modeling
				std = MLab.std(ma_array.compressed())	#disregard the NAs
				if self.debug:
					print "std is ",std
					raw_input("Continue?(Y/n)")
				std_list.append(std)
		del reader
		if len(std_list)>100:
			r.png('%s.png'%output_filename)
			r.hist(std_list, main='histogram',xlab='std',ylab='freq')
			r.dev_off()
コード例 #22
0
ファイル: ddispersion.py プロジェクト: nesaro/driza
def funcionprincipal(dato,variables,opciones): 
    from rpy import r  #pylint: disable=import-error
    diccionario = {}
    diccionario[u"Diagrama de dispersión"]={}
    variable1=variables[0]
    variable2=variables[1]
    lista1=dato.query(variable1)
    lista2=dato.query(variable2)
    import random
    nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png"
    diccionario[u"Diagrama de dispersión"]["ruta"] = nombrefichero
    r.png(nombrefichero) #Directorio temporal de la config
    #r.require("car")
    #r.scatterplot(lista1,lista2,reg_line=False,labels=False,smooth=False,span=0.5,xlab=variable1,ylab=variable2)
    r.pairs([lista1,lista2])
    r.dev_off()
    return diccionario
コード例 #23
0
def main(args):

    sourcefiles = get_src_files(args.get('in'))
    hashofhos = None
    parsedfiles = []
    for i in range(len(sourcefiles) - 1):

        queryfile = sourcefiles[i]
        subjectfile = sourcefiles[i + 1]

        if hashofhos:
            idfile = 'keepids.tmp'
            fw = open(idfile, 'w')
            for id in hashofhos.keys():
                fw.write(id + '\n')
            fw.flush()
            fw.close()
            outfile = 'red_' + get_basename(queryfile) + '.aa'
            os.system('reduce_fasta_file.py -f %s -i %s -o %s' %
                      (queryfile, idfile, outfile))
            queryfile = outfile

        blastout = blast(queryfile, subjectfile)
        parsedfile, hashofhos = parse_blastout(blastout, args)
        parsedfiles.append(parsedfile)
        infomsg("hits: %s" % len(hashofhos))

    Homologs = integrate_all_homologs(parsedfiles, args)

    # stats
    no = []
    for sid, orthlist in Homologs.iteritems():
        n = len(orthlist) + 1
        #		infomsg( str(n) )
        no.append(n)

    from rpy import r
    outfile = 'hist_size_homol_sets.pdf'
    title = 'Size of Homologous Sets'
    x = 'number of homologs'
    y = 'frequency'
    r.pdf(outfile)
    r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no))
    r.dev_off()
コード例 #24
0
def plot_values_as_boxplot_r(values_dict, filename=None, logy=False):
    """Create a set of boxplots (using R), one plot per variable in values_dict (dictionary of 
    varible name and values (1- or 2-D array)), one box per row.
    If filename is given, the plot goes into that file as pdf. If 'logy' is  True, the y-axis
    is plotted on the log scale.
    """
    from rpy import r
    logstring = ''
    if logy:
        logstring='y'
        
    if filename is not None:
        r.pdf(file=filename)

    for var, values in values_dict.iteritems():
        plot_one_boxplot_r(values, var, logstring)

    if filename is not None:
        r.dev_off()
コード例 #25
0
ファイル: Stats.py プロジェクト: Rfam/rfam-website
    def plot(self, hardcopy = None):

        if hardcopy:
            R.png(hardcopy, width=1024, height=768, type="cairo")

        R.require('qvalue')

        # build a qobj
        R.assign( "pval", self.mPValues )
        R.assign( "pi0", self.mPi0 )
        R.assign( "qval", self.mQValues )
        R.assign( "lambda", self.mLambda )
        R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""")
        R(""" class(qobj) <- "qvalue" """)

        R("""qplot(qobj)""")

        if hardcopy:
            R.dev_off()
コード例 #26
0
def main( args ):
	
	sourcefiles = get_src_files( args.get('in') )
	hashofhos = None
	parsedfiles = []
	for i in range( len(sourcefiles)-1 ):
		
		queryfile = sourcefiles[i]
		subjectfile = sourcefiles[i+1]
		
		if hashofhos:
			idfile = 'keepids.tmp'
			fw = open( idfile, 'w' )
			for id in hashofhos.keys():	fw.write( id + '\n')			
			fw.flush()
			fw.close()
			outfile = 'red_' + get_basename(queryfile) + '.aa'
			os.system( 'reduce_fasta_file.py -f %s -i %s -o %s' %(queryfile,idfile,outfile) )
			queryfile = outfile
		
		blastout = blast( queryfile, subjectfile )
		parsedfile, hashofhos = parse_blastout( blastout, args )
		parsedfiles.append( parsedfile )
		infomsg( "hits: %s" %len(hashofhos) )
		
	Homologs = integrate_all_homologs( parsedfiles, args )
	
	# stats
	no = []
	for sid, orthlist in Homologs.iteritems():
		n = len(orthlist) + 1
#		infomsg( str(n) )
		no.append(n)
	
	from rpy import r
	outfile = 'hist_size_homol_sets.pdf'
	title = 'Size of Homologous Sets'
	x = 'number of homologs'
	y = 'frequency'
	r.pdf( outfile )
	r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no))
	r.dev_off()
コード例 #27
0
	def draw_hist_gene_freq(self,  files, frequency_presence_vector_gene_id_ls, exponent, output_dir):
		"""
		12-23-05
		12-26-05 if it's not empty, then draw it
		12-26-05 add an enrich_index_no_of_genes_filename_ls
		01-05-06 have >10 items, then draw it
		"""
		sys.stderr.write("Drawing gene frequency histogram for each dataset...\n")
		#initialize a structure to store frequency list in each dataset
		dataset_index_gene_freq_ls = []
		for i in range(len(files)):
			dataset_index_gene_freq_ls.append([])
		for row in frequency_presence_vector_gene_id_ls:
			frequency = row[0]
			for i in range(1, len(row)-1):
				if row[i] == 1:
					dataset_index_gene_freq_ls[i-1].append(frequency)	#WATCH i-1
		
		#12-26-05
		enrich_index_no_of_genes_filename_ls = []
		functor = lambda x: math.pow(x, exponent)
		
		for i in range(len(files)):
			sys.stderr.write("%s\t%s"%('\x08'*20, i))
			output_fname = os.path.join(output_dir, files[i])
			#12-26-05
			enrich_index_no_of_genes_filename_ls.append([sum(map(functor, dataset_index_gene_freq_ls[i])), len(dataset_index_gene_freq_ls[i]), files[i]])
			
			if len(dataset_index_gene_freq_ls[i])>10:	#01-05-06 have >10 items, then draw it
				r.png("%s.png"%output_fname)
				r.hist(dataset_index_gene_freq_ls[i], main='histogram',xlab='gene frequency',ylab='no of genes', labels=r.TRUE)
				r.dev_off()
		
		#12-26-05
		enrich_index_no_of_genes_filename_ls.sort()
		enrich_index_output_fname = os.path.join(output_dir, 'enrich_index.csv')
		writer = csv.writer(open(enrich_index_output_fname, 'w'), delimiter ='\t')
		for row in enrich_index_no_of_genes_filename_ls:
			writer.writerow(row)
		del writer
		
		sys.stderr.write('Done.\n')
コード例 #28
0
    def plot(self, hardcopy=None):

        if hardcopy:
            R.png(hardcopy, width=1024, height=768, type="cairo")

        R.require('qvalue')

        # build a qobj
        R.assign("pval", self.mPValues)
        R.assign("pi0", self.mPi0)
        R.assign("qval", self.mQValues)
        R.assign("lambda", self.mLambda)
        R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)"""
          )
        R(""" class(qobj) <- "qvalue" """)

        R("""qplot(qobj)""")

        if hardcopy:
            R.dev_off()
コード例 #29
0
    def __init__(self, bedfile, dir, win, ma, out, ymin=0, ymax=0):
        self.L = 17  # number of letters per line
        Bed.__init__(self, bedfile)
        self.dir = dir
        self.win = win + ma
        self.ma = ma  # window for moving average

        try:
            from rpy import r
            r.pdf(out + '.ConservationPlot.pdf')
            self.Run()
            if not ymin:
                ymin = self.mscore[self.mscore > 0].min() - 0.05
            if not ymax:
                ymax = self.mscore.max() + 0.05

            r.plot(range(-1*win, win+1), self.mscore, type = 'l', xlab = \
                   'Distance from the Center of Enriched Regions', \
                   ylab = 'Conservation Score', lwd= 3,  ylim = (ymin, ymax))
            r.dev_off()
        except:
            print >> sys.stderr, 'error import r using rpy, will not generate phastCons plot'
            print sys.exc_info()[0], sys.exc_info()[1]
コード例 #30
0
ファイル: stat_plot.py プロジェクト: polyactis/annot
	def single_plot(self):
		#this function deals with 4 fixed parameters and 1 varying parameter
		r.png('%s'%self.ofname)
		x_list = []
		y_list = []
		self.curs.execute("select  tp, tp_m, tp1, tp1_m, tn, fp, fp_m, fn from\
			stat_plot_data where %s=%s and %s=%s and %s=%s and %s=%s and tag='%s' order by %s \
			"%(self.option_num_dict[0].label, self.option_num_dict[0].value, \
			self.option_num_dict[1].label, self.option_num_dict[1].value, self.option_num_dict[2].label, \
			self.option_num_dict[2].value, self.option_num_dict[3].label, self.option_num_dict[3].value, \
			self.tag, self.option_num_dict[4].label))
		plot_data = self.curs.fetchall()
		for entry in plot_data:
			tn = entry[4]
			fn = entry[7]
			if self.based_on_clusters:
				#using the tp_m, tp1_m and fp_m
				tp = entry[1]
				tp1 = entry[3]
				fp = entry[6]
			else:
				#using the tp, tp1, fp
				tp = entry[0]
				tp1 = entry[2]
				fp = entry[5]
			if self.l1:
				#tp1 is counted as true positive
				tp += tp1
			else:
				#tp1 is counted as false positive
				fp += tp1
			x_list.append(tp)
			y_list.append(float(tp)/(tp+fp))
		
		r.plot(x_list, y_list, type='o',pch='*',xlab='consistent predictions',xlim=self.x_range,ylim=self.y_range, \
				ylab='percentage', main='%s'%(self.option_num_dict[4].label), col=1)
		r.dev_off()
コード例 #31
0
ファイル: DrawHistCor.py プロジェクト: polyactis/annot
	def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids):
		"""
		08-09-05
			add type
		08-29-05
			add no_of_valids to cut genes with too few valid values
		01-05-06
			deal with blank files
		"""
		reader = csv.reader(file(src_pathname), delimiter=delimiter)
		filename = os.path.basename(src_pathname)
		output_filename = os.path.join(outputdir, filename)
		cor_list = []
		counter=0	#01-05-06
		for row in reader:
			if counter>0:
				cor = float(row[3])
				cor_list.append(cor)
			counter += 1
		del reader
		if len(cor_list)>100:
			r.png('%s.png'%output_filename)
			r.hist(cor_list, main='histogram',xlab='cor',ylab='freq')
			r.dev_off()
コード例 #32
0
	def plot(self):
		r.pdf("per_p_value05.pdf")
		r.plot(self.dataset_no, self.per_05, type='o', pch='*', xlab='dataset no.',\
			ylab='percentage', main='p_value: 0.05. #edges compared with correlation cut_off 0.6')
		r.dev_off()
		r.pdf("per_p_value025.pdf")
		r.plot(self.dataset_no, self.per_025, type='o', pch='*', xlab='dataset no.',\
			ylab='percentage', main='p_value: 0.025. #edges compared with correlation cut_off 0.6')
		r.dev_off()
		r.pdf("per_p_value01.pdf")
		r.plot(self.dataset_no, self.per_01, type='o', pch='*', xlab='dataset no.',\
			ylab='percentage', main='p_value: 0.01. #edges compared with correlation cut_off 0.6')
		r.dev_off()
コード例 #33
0
    def plotBundle(self, bundleD, full_filename, colorsD=None, bundlePointsD=None, legendL=None, title=None, y_max=None):

        if y_max is None:
            y_max = 0.4
            
        if legendL is None:
            legendL = bundleD.keys()
            legendL.sort()
            
        if title is None:
            title = 'data'            

        bundleIdL = bundleD.keys()
        bundleIdL.sort()

        if colorsD is None:            
            colorsL = r.rainbow(len(bundleIdL))
            colorsD = dict(zip(bundleIdL, colorsL))
        
        colorsL = [colorsD[x] for x in bundleIdL]
        
        time_min = min([len(bundleD[x]) for x in bundleD.keys()])
        timeVec = [0.5 * x for x in range(time_min)]

        try:
            r.png(full_filename, width=800, height=600)
            oldPar = r.par(xpd = True, mar = [x + y for (x,y) in zip(r.par()['mar'], [0,0,0,6])])
        
            print 'plot %s' % full_filename
            r.plot(timeVec, timeVec,
                   type='n',
                   main=title, ylim=(0, y_max),
                   xlab="time in hours after transfection", ylab="Relative Cell Counts",
                   pch=20, lwd=1, lty = 1, 
                   cex=1.0, cex_lab=1.2, cex_main=1.5)
        
        
            for bundleId in bundleIdL:
                
                if not bundlePointsD is None:
                    r.points(timeVec, bundlePointsD[bundleId],
                             col=colorsD[bundleId], pch=20,
                             lwd=1)
                    r.lines(timeVec, bundlePointsD[bundleId],
                            col=colorsD[bundleId],
                            lwd=1, lty = 1)

                r.lines(timeVec, bundleD[bundleId],
                        col=colorsD[bundleId],
                        lwd=3, lty = 1)

            r.legend(max(timeVec) * 1.1, y_max, legend=legendL, fill=colorsL, cex=1.0, bg= 'whitesmoke')
            r.par(oldPar)
            r.grid(col="darkgrey")
        
            r.dev_off() 
        except:
            r.dev_off()
            print full_filename + ' has not been printed.'
            

        return
コード例 #34
0
    def generateCountsGraph(self, counts, sitename, widthpx=648, resol=72,  ):
        '''
            Static function to generate graph file via R.
            Graphs *all* of the counts records contained in counts List
        
    
        '''
        from rpy import r as robj
              
        # Calculate graph image information
        widthpx = int(widthpx)
        imgwidth = int( float(widthpx) / float(resol) )      
        ratio = float(self.config.get('data','graphratio'))
        imgheight =  int( (float(widthpx) * ratio) / float(resol) ), 
                
        counts_data = {"datetime":[],
                     "c1":[]}
        (fd, tmpgraphfile)= mkstemp()
        logobject.debug("DataHandler.generateCountsGraph(): Temp graph filename = %s" % tmpgraphfile)
    
        for cr in counts:
                #logobject.debug("%s" % c)
                epochsecs = time.mktime(cr.datetime.timetuple())
                counts_data["datetime"].append(  epochsecs  )
                #counts_data["datetime"].append( "%s" % c.datetime   )
                #logobject.debug("Datetime %s converted to epoch %d" % (c.datetime, epochsecs ))
                counts_data["c1"].append(cr.c1)

        
        cts = counts_data['c1']
        ctm = counts_data['datetime']
        if len(cts) > 0:
            robj.bitmap(tmpgraphfile, 
                     type = "png256", 
                     width = imgwidth , 
                     height = imgheight,
                     res = resol,
                     )
        
            ymin = int(self.config.get('data','counts.graph.ylim.min'))
            ymax = int(self.config.get('data','counts.graph.ylim.max'))
            robj.plot(ctm, cts, 
                       col="black", 
                       main="Counts: %s" % sitename ,
                       xlab="Time: (secs since 1970)", 
                       ylab="Counts/min",
                       type="l",
                       ylim=(ymin,ymax)
                       )
            robj.dev_off()
            import imghdr
            imgtype = imghdr.what(tmpgraphfile)
            logobject.debug("DataHandler.generateCountsGraph(): OK: What is our tempfile? = %s" % tmpgraphfile )
            f = open(tmpgraphfile)
        else:
            logobject.debug("DataHandler.generateCountsGraph(): No data. Generating proper error image...")
            #logobject.debug("DataHandler.generateCountsGraph(): Temp error image filename = %s" % tmpgraphfile)
            
            #import Image
            #import imghdr
            #imf = Image.open(self.config.get('data','nodatapng'))
            #imf.save(tmpgraphfile)
                        
            #imgtype = imghdr.what(tmpgraphfile)
            #logobject.debug("DataHandler.generateCountsGraph(): ERROR: What is our tempfile? = %s" % imgtype )
            f = open(self.config.get('data','nodatapng'))
        return f
コード例 #35
0
ファイル: context_specific.py プロジェクト: polyactis/annot
 def plot(self, filename, list_to_plot, main_lab, xlab):
     max_length = max(list_to_plot)
     r.pdf(filename)
     r.hist(list_to_plot, breaks=range(max_length + 1), las=1, main=main_lab, xlab=xlab)
     r.dev_off()
コード例 #36
0
ファイル: report.py プロジェクト: herry13/fdt
def plot(outfile, data, out_format='png'):
    w = int(round(len(data) / 4.0))

    if out_format == 'png':
        r.png(outfile, width=w * 100, height=1000, res=72)
    elif out_format == 'pdf':
        r.pdf(outfile, width=w, height=10)
    else:
        raise Exception('Unrecognised format: ' + str(out_format))

    print("total: " + str(len(data)))

    series = []
    points = {'translate': [], 'preprocessing': []}

    for dat in data:
        points['translate'].append(float(dat['translate']))
        points['preprocessing'].append(float(dat['preprocessing']))

    xlabels = []
    for k, v in data[0].iteritems():
        if k not in ["problem", 'translate', 'preprocessing']:
            series.append(k)
            points[k] = []

    index = 0
    for dat in data:
        for k in series:
            if dat[k] != 'no-plan':
                points[k].append(float(dat[k]) + \
                                 points['translate'][index] + \
                                 points['preprocessing'][index])
            else:
                points[k].append(-1000)
        xlabels.append(dat['problem'])
        index += 1

    max_value = max(iter([max(iter(points[k])) for k in series]))
    yrange = (0, max_value)
    legend_labels = []

    x = [i for i in range(1, len(points['translate']) + 1)]
    y = [-1000 for i in x]
    r.par(mar=(7, 5, 4, 2))
    r.plot(x,
           y,
           main='',
           xlab="",
           ylab='',
           xaxt='n',
           yaxt='n',
           pch=0,
           ylim=yrange,
           mgp=(5, 1, 0))
    r.mtext("Problem", side=1, line=5)
    r.mtext("CPU Time (s)", side=2, line=3)

    pch_start = 1
    pch_index = pch_start
    # plotting "translate"
    #r.plot(x, points['translate'], main='',
    #       xlab='', ylab='Time (s)',
    #       xaxt='n', yaxt='n',
    #       pch=0, ylim=yrange)
    #legend_labels.append('translate')
    r.lines(x, points['translate'], lty=1)

    # preprocessing -- Removed since it's insignificant
    #r.points(x, points['preprocessing'], pch=pch_index)
    #pch_index =+ 1

    # planner output
    for k in series:
        if k != 'translate' and k != 'preporcessing':
            r.points(x, points[k], pch=pch_index)
            pch_index += 1
            legend_labels.append("FD+" + k.upper())

    # put x-axis labels
    for i in range(0, len(xlabels)):
        r.axis(side=1, at=i + 1, labels=xlabels[i], las=2)

    # put y-axis labels
    base, step = get_y_step(max_value)
    print("base: " + str(base) + " -- step: " + str(step))
    y = base
    for i in range(0, step):
        r.axis(side=2, at=y, labels=str(y), las=2)
        y += base

    # legend
    r.legend(1,
             max_value,
             legend_labels,
             pch=[i for i in range(pch_start, pch_index)])

    r.dev_off()
コード例 #37
0
ファイル: analysis.py プロジェクト: amrhamedp/hop
 def r_dev_off():
     r.dev_off()
コード例 #38
0
ファイル: analysis.py プロジェクト: amrhamedp/hop
def kill_R():
    """Manual last resort to kill the R quartz() window."""
    from rpy import r
    r.dev_off()
コード例 #39
0
ファイル: data.py プロジェクト: bnl-sdcc/griddev
    def generateCountsGraph2(
        self,
        counts,
        sitename,
        widthpx=648,
        resol=72,
    ):
        '''
                Static function to generate graph file via R.
                Graphs *all* of the counts records contained in counts List
                This one uses more in-R processing to handle dates/times (since
                Rpy doesn't do automatic conversions). 
            '''
        log = logging.getLogger()
        log.info('Generating graph for %d counts from site %s' %
                 (len(counts), sitename))

        from rpy import r as robj

        # Calculate graph image information
        ratio = float(self.config.get('data', 'graphratio'))
        widthpx = int(widthpx)
        imgwidth = int(float(widthpx) / float(resol))
        imgheight = int(((float(widthpx) * ratio) / float(resol)))
        resol = int(resol)

        # Get unused file/name to put image data into...
        (fd, tmpgraphfile) = mkstemp()
        log.debug("Temp graph filename = %s" % tmpgraphfile)

        # Unpack CountsRecords into counts and timestamps.
        cts = []
        ctm = []
        for cr in counts:
            # cr.datetime = "2008-02-11 12:07:08.112117"
            # cr.c1 = 5440
            cts.append(cr.c1)
            ctm.append(str(cr.datetime))

        log.debug("Got list of %d counts." % len(cts))

        # If there is data for a graph, import into R.
        if len(cts) > 0:
            robj.assign('rcts', cts)
            robj.assign('rctm', ctm)

            # Convert timestamps to POSIXct objects within R.
            # datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S"))
            robj(
                '''rctmpct <- as.POSIXct(strptime(rctm, format="%Y-%m-%d %H:%M:%S"))'''
            )
            cmdstring = 'bitmap( "%s", type="png256", width=%s, height=%s, res=%s)' % (
                tmpgraphfile, imgwidth, imgheight, resol)
            log.debug("R cmdstring is %s" % cmdstring)
            robj(cmdstring)
            log.debug("Completed R command string %s" % cmdstring)

            ymin = int(self.config.get('data', 'counts.graph.ylim.min'))
            ymax = int(self.config.get('data', 'counts.graph.ylim.max'))
            #xlabel = " ctm[%s] -- ctm[%s] " % ("0",str( len(ctm)-1))
            xlabel = " %s -- %s " % (ctm[0], ctm[len(ctm) - 1])
            cmdstring = 'plot( rctmpct, rcts, col="black",main="Counts: %s", xlab="Dates:  %s",ylab="Counts/min",type="l", ylim=c(%d,%d) )' % (
                sitename, xlabel, ymin, ymax)
            log.debug("R cmdstring is %s" % cmdstring)
            robj(cmdstring)
            log.debug("Completed R command string %s" % cmdstring)
            robj.dev_off()

            # Pull written image and return to caller
            import imghdr
            imgtype = imghdr.what(tmpgraphfile)
            log.debug("OK: What is our tempfile? = %s" % tmpgraphfile)
            f = open(tmpgraphfile)
        else:
            log.debug("No data. Generating proper error image...")
            f = open(self.config.get('data', 'nodatapng'))
        return f
コード例 #40
0
def main():

    parser = E.OptionParser( version = "%prog version: $Id: rates2rates.py 2781 2009-09-10 11:33:14Z andreas $", usage = globals()["__doc__"])

    parser.add_option( "--output-filename-pattern", dest="output_filename_pattern", type="string",
                      help="pattern for additional output files [%default]."  )

    parser.add_option( "--input-filename-neutral", dest="input_filename_neutral", type="string",
                      help="a tab-separated file with rates and G+C content in neutrally evolving regions [%default]."  )

    parser.set_defaults(
        input_filename_neutral = None,
        output_filename_pattern = "%s",
        normalize = True,
        hardcopy = None,
        )

    (options, args) = E.Start( parser, add_csv_options = True )

    if not options.input_filename_neutral:
        raise ValueError( "please supply a file with neutral rates." )

    lines = options.stdin.readlines()
    if len(lines) == 0:
        raise IOError ( "no input" )

    from rpy import r as R
    import rpy

    R.png( options.output_filename_pattern % "fit" + ".png", width=1024, height=768, type="cairo")
    matrix, headers = readRates( open( options.input_filename_neutral, "r" ) )
    R.assign("matrix", matrix)
    R.assign("headers", headers)
    nref = R( """length( matrix[,1] )""" )

    dat = R("""dat <- data.frame(x = matrix[,2], y = matrix[,3])""")
    mod = R("""mod <- lm( y ~ x, dat)""")

    R("""plot( matrix[,2], matrix[,3], cex=%s, col="blue", pch="o", xlab="%s", ylab="%s" %s)""" % (0.3, headers[1], headers[2], "") )
    R("""new <- data.frame(x = seq( min(matrix[,2]), max(matrix[,2]), (max(matrix[,2]) - min(matrix[,2])) / 100))""")
    R("""predict(mod, new, se.fit = TRUE)""")
    R("""pred.w.plim <- predict(mod, new, interval="prediction")""")
    R("""pred.w.clim <- predict(mod, new, interval="confidence")""")
    R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""")
    R.mtext(
        "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], 
                                            mod["coefficients"]["(Intercept)"], 
                                            R("""cor( dat )[2]"""), 
                                            nref ),
        3,
        cex = 1.0)

    R("""mean_rate <- mean( matrix[,3] )""")

    data_matrix, data_headers = readRates( lines )
    R.assign("data_matrix", data_matrix)
    R.assign("data_headers", data_headers)
    ndata = R( """length( data_matrix[,1] )""" )
    
    R("""points( data_matrix[,2], data_matrix[,3], cex=%s, col="red", pch="o" %s)""" % (0.3, "") )
    R("""topred <- data.frame( x = data_matrix[,2] )""")
    R("""corrected_rates <- predict( mod, topred, se.fit = TRUE )""")
    uncorrected = R("""uncorrected <- data_matrix[,3] / mean_rate """) 
    corrected = R("""corrected <- as.vector(data_matrix[,3] / corrected_rates$fit)""")
    R.dev_off()
    
    R.png( options.output_filename_pattern % "correction" + ".png", width=1024, height=768, type="cairo")
    R("""plot( uncorrected, corrected, cex=%s, col="blue", pch="o", xlab="uncorrected rate", ylab="corrected rate" %s)""" % (0.3, "") )
    R.dev_off()

    E.Stop()