def draw_tfbs_similarity_ls_histogram(self, tfbs_similarity_ls, output_fname): sys.stderr.write("Drawing histogram for tfbs_similarity_ls...") if len(tfbs_similarity_ls)>10: r.png('%s'%output_fname) r.hist(tfbs_similarity_ls, main='histogram',xlab='tfbs_similarity',ylab='freq') r.dev_off() sys.stderr.write("Done.\n") else: sys.stderr.write("too short: %s, aborted\n"%tfbs_similarity_ls)
def funcionprincipal(dato,variable,opciones): from rpy import r diccionario={} diccionario["Histograma"]={} lista = dato.query(variable) import random nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png" diccionario["Histograma"]["ruta"]=nombrefichero r.png(nombrefichero) #Directorio temporal de la config r.hist(lista,main=variable,xlab=variable, nclass=int(opciones[u"NúmeroIntervalos"])) r.dev_off() return diccionario
def test(): a = N.random.normal(0.0, 1.0, 500) hist = r.hist(a) breaks = N.array(hist['breaks']) dhist = N.zeros(breaks.shape, N.float) dhist[:-1] = hist['density'] xdens, ydens, ydenslow, ydenshigh = bootdensity(a, -5.0, 5.0, 100, 0.95) xg = (N.arange(1000) / 500.0 - 1.0) * 5.0 yg = gaussian.gaussian(xg, 1.0, 0.0, 1.0) pgaqt() pgsetup() pgenv(-5.0, 5.0, 0.0, 0.5) pgbin(breaks, dhist, False) pgxsci('yellow') pgbin(breaks, dhist-2*(N.sqrt(dhist*1000)/1000.0), False) pgbin(breaks, dhist+2*(N.sqrt(dhist*1000)/1000.0), False) pgxsci('black') pgline(xdens, ydens) #pgxsci('lightgray') #for i in range(100): # pgline(xdens, ydensboot[i]) pgxsci('green') pgline(xdens, ydenslow) pgxsci('red') pgline(xdens, ydenshigh) pgxsci('blue') pgline(xg, yg) pgclos()
def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids): """ 08-09-05 add type 08-29-05 add no_of_valids to cut genes with too few valid values """ reader = csv.reader(file(src_pathname), delimiter=delimiter) filename = os.path.basename(src_pathname) output_filename = os.path.join(outputdir, filename) std_list = [] for row in reader: gene_id = row[0] new_row = [] mask_ls = [] for i in range(1, len(row)): if row[i] == 'NA': new_row.append(1e20) mask_ls.append(1) elif row[i] == '': #ignore empty entry continue else: value = float(row[i]) if type==1: if value<=10: value = 10 value = math.log(value) new_row.append(value) mask_ls.append(0) ma_array = array(new_row, mask=mask_ls) if self.debug: print "The data vector is ",ma_array print "Its mask is ", ma_array.mask() if len(ma_array.compressed())>=no_of_valids: #at least two samples, otherwise, correlation can't be calculated #08-29-05 no_of_valids controls not too many NA's, which is for graph_modeling std = MLab.std(ma_array.compressed()) #disregard the NAs if self.debug: print "std is ",std raw_input("Continue?(Y/n)") std_list.append(std) del reader if len(std_list)>100: r.png('%s.png'%output_filename) r.hist(std_list, main='histogram',xlab='std',ylab='freq') r.dev_off()
def main(args): sourcefiles = get_src_files(args.get('in')) hashofhos = None parsedfiles = [] for i in range(len(sourcefiles) - 1): queryfile = sourcefiles[i] subjectfile = sourcefiles[i + 1] if hashofhos: idfile = 'keepids.tmp' fw = open(idfile, 'w') for id in hashofhos.keys(): fw.write(id + '\n') fw.flush() fw.close() outfile = 'red_' + get_basename(queryfile) + '.aa' os.system('reduce_fasta_file.py -f %s -i %s -o %s' % (queryfile, idfile, outfile)) queryfile = outfile blastout = blast(queryfile, subjectfile) parsedfile, hashofhos = parse_blastout(blastout, args) parsedfiles.append(parsedfile) infomsg("hits: %s" % len(hashofhos)) Homologs = integrate_all_homologs(parsedfiles, args) # stats no = [] for sid, orthlist in Homologs.iteritems(): n = len(orthlist) + 1 # infomsg( str(n) ) no.append(n) from rpy import r outfile = 'hist_size_homol_sets.pdf' title = 'Size of Homologous Sets' x = 'number of homologs' y = 'frequency' r.pdf(outfile) r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no)) r.dev_off()
def main( args ): sourcefiles = get_src_files( args.get('in') ) hashofhos = None parsedfiles = [] for i in range( len(sourcefiles)-1 ): queryfile = sourcefiles[i] subjectfile = sourcefiles[i+1] if hashofhos: idfile = 'keepids.tmp' fw = open( idfile, 'w' ) for id in hashofhos.keys(): fw.write( id + '\n') fw.flush() fw.close() outfile = 'red_' + get_basename(queryfile) + '.aa' os.system( 'reduce_fasta_file.py -f %s -i %s -o %s' %(queryfile,idfile,outfile) ) queryfile = outfile blastout = blast( queryfile, subjectfile ) parsedfile, hashofhos = parse_blastout( blastout, args ) parsedfiles.append( parsedfile ) infomsg( "hits: %s" %len(hashofhos) ) Homologs = integrate_all_homologs( parsedfiles, args ) # stats no = [] for sid, orthlist in Homologs.iteritems(): n = len(orthlist) + 1 # infomsg( str(n) ) no.append(n) from rpy import r outfile = 'hist_size_homol_sets.pdf' title = 'Size of Homologous Sets' x = 'number of homologs' y = 'frequency' r.pdf( outfile ) r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no)) r.dev_off()
def draw_hist_gene_freq(self, files, frequency_presence_vector_gene_id_ls, exponent, output_dir): """ 12-23-05 12-26-05 if it's not empty, then draw it 12-26-05 add an enrich_index_no_of_genes_filename_ls 01-05-06 have >10 items, then draw it """ sys.stderr.write("Drawing gene frequency histogram for each dataset...\n") #initialize a structure to store frequency list in each dataset dataset_index_gene_freq_ls = [] for i in range(len(files)): dataset_index_gene_freq_ls.append([]) for row in frequency_presence_vector_gene_id_ls: frequency = row[0] for i in range(1, len(row)-1): if row[i] == 1: dataset_index_gene_freq_ls[i-1].append(frequency) #WATCH i-1 #12-26-05 enrich_index_no_of_genes_filename_ls = [] functor = lambda x: math.pow(x, exponent) for i in range(len(files)): sys.stderr.write("%s\t%s"%('\x08'*20, i)) output_fname = os.path.join(output_dir, files[i]) #12-26-05 enrich_index_no_of_genes_filename_ls.append([sum(map(functor, dataset_index_gene_freq_ls[i])), len(dataset_index_gene_freq_ls[i]), files[i]]) if len(dataset_index_gene_freq_ls[i])>10: #01-05-06 have >10 items, then draw it r.png("%s.png"%output_fname) r.hist(dataset_index_gene_freq_ls[i], main='histogram',xlab='gene frequency',ylab='no of genes', labels=r.TRUE) r.dev_off() #12-26-05 enrich_index_no_of_genes_filename_ls.sort() enrich_index_output_fname = os.path.join(output_dir, 'enrich_index.csv') writer = csv.writer(open(enrich_index_output_fname, 'w'), delimiter ='\t') for row in enrich_index_no_of_genes_filename_ls: writer.writerow(row) del writer sys.stderr.write('Done.\n')
def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids): """ 08-09-05 add type 08-29-05 add no_of_valids to cut genes with too few valid values 01-05-06 deal with blank files """ reader = csv.reader(file(src_pathname), delimiter=delimiter) filename = os.path.basename(src_pathname) output_filename = os.path.join(outputdir, filename) cor_list = [] counter=0 #01-05-06 for row in reader: if counter>0: cor = float(row[3]) cor_list.append(cor) counter += 1 del reader if len(cor_list)>100: r.png('%s.png'%output_filename) r.hist(cor_list, main='histogram',xlab='cor',ylab='freq') r.dev_off()
def plot(self, filename, list_to_plot, main_lab, xlab): max_length = max(list_to_plot) r.pdf(filename) r.hist(list_to_plot, breaks=range(max_length + 1), las=1, main=main_lab, xlab=xlab) r.dev_off()
from AlgoDPA import * from rpy import r import sys, sqlite3, scipy, pickle import matplotlib.pyplot as plt sys.path.append('/media/FreeAgent GoFlex Drive/research/attacks/resources') dpaKey1 = pickle.load(open('../serial/[50].p', 'rb')) dpaKey2 = pickle.load(open('../serial/[12].p', 'rb')) peaks1 = [y for x in dpaKey1.peaks.values() for y in x] peaks2 = [y for x in dpaKey2.peaks.values() for y in x] pp = peaks1+peaks2 #plt.hist(pp) r.png('output.png') r.hist(pp) r.dev_off() #plt.savefig('../graphs/[50,12]_combinedHypoCurrentDistribution.png')
pch=21, col='blue', bg='lightblue', type='o') # second plot x = range(1,11) y = [i**2 for i in x] z = [i**3 for i in x] r.plot(x, y, main='My second plot', xlab='x', ylab='y', type='l', col='blue') r.lines(x, z, col='red') # cosine function, save to file import math r.png('cosine.png') x = r.seq(0,50, by=0.1) y = [math.cos(i) for i in x] r.plot(x, y, main='COS(X)', xlab='x', ylab='cos(x)', type='l', col='blue') r.dev_off() # histogram x = range(10) + range(3,6) + range(5,10) r.hist(x, main='A histogram', xlab='x', col='lightblue') # adust plotting area from rpy import r x = range(1,11) y = [i**2 for i in x] z = [i**3 for i in x] r.plot(x, y, main='My second plot', xlab='x', ylab='y', type='l', col='blue', ylim=r.range(y,z)) r.lines(x, z, col='red')