Example #1
0
	def draw_tfbs_similarity_ls_histogram(self, tfbs_similarity_ls, output_fname):
		sys.stderr.write("Drawing histogram for tfbs_similarity_ls...")
		if len(tfbs_similarity_ls)>10:
			r.png('%s'%output_fname)
			r.hist(tfbs_similarity_ls, main='histogram',xlab='tfbs_similarity',ylab='freq')
			r.dev_off()
			sys.stderr.write("Done.\n")
		else:
			sys.stderr.write("too short: %s, aborted\n"%tfbs_similarity_ls)
Example #2
0
def funcionprincipal(dato,variable,opciones): 
    from rpy import r
    diccionario={}
    diccionario["Histograma"]={}
    lista = dato.query(variable)
    import random
    nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png"
    diccionario["Histograma"]["ruta"]=nombrefichero
    r.png(nombrefichero) #Directorio temporal de la config
    r.hist(lista,main=variable,xlab=variable, nclass=int(opciones[u"NúmeroIntervalos"]))
    r.dev_off()
    return diccionario
Example #3
0
def test():
    a = N.random.normal(0.0, 1.0, 500)
    hist = r.hist(a)
    breaks = N.array(hist['breaks'])
    dhist = N.zeros(breaks.shape, N.float)
    dhist[:-1] = hist['density']
    xdens, ydens, ydenslow, ydenshigh = bootdensity(a, -5.0, 5.0, 100, 0.95)
    xg = (N.arange(1000) / 500.0 - 1.0) * 5.0
    yg = gaussian.gaussian(xg, 1.0, 0.0, 1.0)
    pgaqt()
    pgsetup()
    pgenv(-5.0, 5.0, 0.0, 0.5)
    pgbin(breaks, dhist, False)
    pgxsci('yellow')
    pgbin(breaks, dhist-2*(N.sqrt(dhist*1000)/1000.0), False)
    pgbin(breaks, dhist+2*(N.sqrt(dhist*1000)/1000.0), False)
    pgxsci('black')
    pgline(xdens, ydens)
    #pgxsci('lightgray')
    #for i in range(100):
    #    pgline(xdens, ydensboot[i])
    pgxsci('green')
    pgline(xdens, ydenslow)
    pgxsci('red')
    pgline(xdens, ydenshigh)
    pgxsci('blue')
    pgline(xg, yg)
    pgclos()
Example #4
0
	def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids):
		"""
		08-09-05
			add type
		08-29-05
			add no_of_valids to cut genes with too few valid values
		"""
		reader = csv.reader(file(src_pathname), delimiter=delimiter)
		filename = os.path.basename(src_pathname)
		output_filename = os.path.join(outputdir, filename)
		std_list = []
		for row in reader:
			gene_id = row[0]
			new_row = []
			mask_ls = []
			for i in range(1, len(row)):
				if row[i] == 'NA':
					new_row.append(1e20)
					mask_ls.append(1)
				elif row[i] == '':
					#ignore empty entry
					continue
				else:
					value = float(row[i])
					if type==1:
						if value<=10:
							value = 10
						value = math.log(value)
					new_row.append(value)
					mask_ls.append(0)
			ma_array = array(new_row, mask=mask_ls)
			if self.debug:
				print "The data vector is ",ma_array
				print "Its mask is ", ma_array.mask()
			if len(ma_array.compressed())>=no_of_valids:	#at least two samples, otherwise, correlation can't be calculated
				#08-29-05	no_of_valids controls not too many NA's, which is for graph_modeling
				std = MLab.std(ma_array.compressed())	#disregard the NAs
				if self.debug:
					print "std is ",std
					raw_input("Continue?(Y/n)")
				std_list.append(std)
		del reader
		if len(std_list)>100:
			r.png('%s.png'%output_filename)
			r.hist(std_list, main='histogram',xlab='std',ylab='freq')
			r.dev_off()
Example #5
0
def main(args):

    sourcefiles = get_src_files(args.get('in'))
    hashofhos = None
    parsedfiles = []
    for i in range(len(sourcefiles) - 1):

        queryfile = sourcefiles[i]
        subjectfile = sourcefiles[i + 1]

        if hashofhos:
            idfile = 'keepids.tmp'
            fw = open(idfile, 'w')
            for id in hashofhos.keys():
                fw.write(id + '\n')
            fw.flush()
            fw.close()
            outfile = 'red_' + get_basename(queryfile) + '.aa'
            os.system('reduce_fasta_file.py -f %s -i %s -o %s' %
                      (queryfile, idfile, outfile))
            queryfile = outfile

        blastout = blast(queryfile, subjectfile)
        parsedfile, hashofhos = parse_blastout(blastout, args)
        parsedfiles.append(parsedfile)
        infomsg("hits: %s" % len(hashofhos))

    Homologs = integrate_all_homologs(parsedfiles, args)

    # stats
    no = []
    for sid, orthlist in Homologs.iteritems():
        n = len(orthlist) + 1
        #		infomsg( str(n) )
        no.append(n)

    from rpy import r
    outfile = 'hist_size_homol_sets.pdf'
    title = 'Size of Homologous Sets'
    x = 'number of homologs'
    y = 'frequency'
    r.pdf(outfile)
    r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no))
    r.dev_off()
Example #6
0
def main( args ):
	
	sourcefiles = get_src_files( args.get('in') )
	hashofhos = None
	parsedfiles = []
	for i in range( len(sourcefiles)-1 ):
		
		queryfile = sourcefiles[i]
		subjectfile = sourcefiles[i+1]
		
		if hashofhos:
			idfile = 'keepids.tmp'
			fw = open( idfile, 'w' )
			for id in hashofhos.keys():	fw.write( id + '\n')			
			fw.flush()
			fw.close()
			outfile = 'red_' + get_basename(queryfile) + '.aa'
			os.system( 'reduce_fasta_file.py -f %s -i %s -o %s' %(queryfile,idfile,outfile) )
			queryfile = outfile
		
		blastout = blast( queryfile, subjectfile )
		parsedfile, hashofhos = parse_blastout( blastout, args )
		parsedfiles.append( parsedfile )
		infomsg( "hits: %s" %len(hashofhos) )
		
	Homologs = integrate_all_homologs( parsedfiles, args )
	
	# stats
	no = []
	for sid, orthlist in Homologs.iteritems():
		n = len(orthlist) + 1
#		infomsg( str(n) )
		no.append(n)
	
	from rpy import r
	outfile = 'hist_size_homol_sets.pdf'
	title = 'Size of Homologous Sets'
	x = 'number of homologs'
	y = 'frequency'
	r.pdf( outfile )
	r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no))
	r.dev_off()
	def draw_hist_gene_freq(self,  files, frequency_presence_vector_gene_id_ls, exponent, output_dir):
		"""
		12-23-05
		12-26-05 if it's not empty, then draw it
		12-26-05 add an enrich_index_no_of_genes_filename_ls
		01-05-06 have >10 items, then draw it
		"""
		sys.stderr.write("Drawing gene frequency histogram for each dataset...\n")
		#initialize a structure to store frequency list in each dataset
		dataset_index_gene_freq_ls = []
		for i in range(len(files)):
			dataset_index_gene_freq_ls.append([])
		for row in frequency_presence_vector_gene_id_ls:
			frequency = row[0]
			for i in range(1, len(row)-1):
				if row[i] == 1:
					dataset_index_gene_freq_ls[i-1].append(frequency)	#WATCH i-1
		
		#12-26-05
		enrich_index_no_of_genes_filename_ls = []
		functor = lambda x: math.pow(x, exponent)
		
		for i in range(len(files)):
			sys.stderr.write("%s\t%s"%('\x08'*20, i))
			output_fname = os.path.join(output_dir, files[i])
			#12-26-05
			enrich_index_no_of_genes_filename_ls.append([sum(map(functor, dataset_index_gene_freq_ls[i])), len(dataset_index_gene_freq_ls[i]), files[i]])
			
			if len(dataset_index_gene_freq_ls[i])>10:	#01-05-06 have >10 items, then draw it
				r.png("%s.png"%output_fname)
				r.hist(dataset_index_gene_freq_ls[i], main='histogram',xlab='gene frequency',ylab='no of genes', labels=r.TRUE)
				r.dev_off()
		
		#12-26-05
		enrich_index_no_of_genes_filename_ls.sort()
		enrich_index_output_fname = os.path.join(output_dir, 'enrich_index.csv')
		writer = csv.writer(open(enrich_index_output_fname, 'w'), delimiter ='\t')
		for row in enrich_index_no_of_genes_filename_ls:
			writer.writerow(row)
		del writer
		
		sys.stderr.write('Done.\n')
Example #8
0
	def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids):
		"""
		08-09-05
			add type
		08-29-05
			add no_of_valids to cut genes with too few valid values
		01-05-06
			deal with blank files
		"""
		reader = csv.reader(file(src_pathname), delimiter=delimiter)
		filename = os.path.basename(src_pathname)
		output_filename = os.path.join(outputdir, filename)
		cor_list = []
		counter=0	#01-05-06
		for row in reader:
			if counter>0:
				cor = float(row[3])
				cor_list.append(cor)
			counter += 1
		del reader
		if len(cor_list)>100:
			r.png('%s.png'%output_filename)
			r.hist(cor_list, main='histogram',xlab='cor',ylab='freq')
			r.dev_off()
Example #9
0
 def plot(self, filename, list_to_plot, main_lab, xlab):
     max_length = max(list_to_plot)
     r.pdf(filename)
     r.hist(list_to_plot, breaks=range(max_length + 1), las=1, main=main_lab, xlab=xlab)
     r.dev_off()
Example #10
0
from AlgoDPA import *
from rpy import r
import sys, sqlite3, scipy, pickle
import matplotlib.pyplot as plt

sys.path.append('/media/FreeAgent GoFlex Drive/research/attacks/resources')

dpaKey1 = pickle.load(open('../serial/[50].p', 'rb'))
dpaKey2 = pickle.load(open('../serial/[12].p', 'rb'))

peaks1 = [y for x in dpaKey1.peaks.values() for y in x]
peaks2 = [y for x in dpaKey2.peaks.values() for y in x]

pp = peaks1+peaks2

#plt.hist(pp)
r.png('output.png')
r.hist(pp)
r.dev_off()
#plt.savefig('../graphs/[50,12]_combinedHypoCurrentDistribution.png')
      pch=21, col='blue', bg='lightblue', type='o') 


# second plot
x = range(1,11)
y = [i**2 for i in x]
z = [i**3 for i in x]
r.plot(x, y, main='My second plot', xlab='x', ylab='y', type='l', col='blue')
r.lines(x, z, col='red') 

# cosine function, save to file
import math
r.png('cosine.png')
x = r.seq(0,50, by=0.1)
y = [math.cos(i) for i in x]
r.plot(x, y, main='COS(X)', xlab='x', ylab='cos(x)', type='l', col='blue')
r.dev_off() 

# histogram
x = range(10) + range(3,6) + range(5,10)
r.hist(x, main='A histogram', xlab='x', col='lightblue') 

# adust plotting area
from rpy import r
x = range(1,11)
y = [i**2 for i in x]
z = [i**3 for i in x]
r.plot(x, y, main='My second plot', xlab='x', ylab='y', type='l',
       col='blue', ylim=r.range(y,z))
r.lines(x, z, col='red')