def main(): in_fname = sys.argv[1] out_fname = sys.argv[2] try: columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1 except: stop_err( "Columns not specified, your query does not contain a column of numerical data." ) title = sys.argv[5] xlab = sys.argv[6] ylab = sys.argv[7] matrix = [] skipped_lines = 0 first_invalid_line = 0 invalid_value = '' invalid_column = 0 i = 0 for i, line in enumerate( file( in_fname ) ): valid = True line = line.rstrip( '\r\n' ) if line and not line.startswith( '#' ): row = [] fields = line.split( "\t" ) for column in columns: try: val = fields[column] if val.lower() == "na": row.append( float( "nan" ) ) else: row.append( float( fields[column] ) ) except: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 try: invalid_value = fields[column] except: invalid_value = '' invalid_column = column + 1 break else: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i+1 if valid: matrix.append( row ) if skipped_lines < i: try: r.pdf( out_fname, 8, 8 ) r.plot( array(matrix), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 ) r['dev.off']() except Exception, exc: stop_err( "%s" %str( exc ) )
def plotIDR(output_file, input_prefixes): '''create IDR plots. This code is taken from the R script batch-consistency-plot.r within the IDR package. ''' dirname = os.path.dirname(__file__) R.source(os.path.join(dirname, "WrapperIDR.r")) R('''df.txt = 10''') R('''uri.list <- list() uri.list.match <- list() ez.list <- list() legend.txt <- c() em.output.list <- list() uri.output.list <- list()''') npair = len(input_prefixes) for x, input_prefix in enumerate(input_prefixes): R.load(input_prefix + "-uri.sav") R.load(input_prefix + "-em.sav") i = x + 1 R('''uri.output.list[[%(i)i]] <- uri.output; em.output.list[[%(i)i]] <- em.output; # reverse =T for error rate;''' % locals()) R(''' ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1, uri.output.list[[%(i)i]]$data12.enrich$merge2);''' % locals()) R(''' # URI for all peaks uri.list[[%(i)i]] <- uri.output$uri.n; # URI for matched peaks uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt); uri.list.match[[%(i)i]] <- uri.match$uri.n; ''' % locals()) legend = "%(i)i = %(input_prefix)s" % locals() R(''' legend.txt[%(i)i] <- '%(legend)s'; ''' % locals()) R.pdf(output_file) R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''') R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks"); plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks"); plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6)); plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); legend(0, 1, legend.txt, cex=0.6);''' % locals()) R["dev.off"]()
def plotIDR( output_file, input_prefixes ): '''create IDR plots. This code is taken from the R script batch-consistency-plot.r within the IDR package. ''' dirname = os.path.dirname(__file__) R.source(os.path.join( dirname, "WrapperIDR.r")) R('''df.txt = 10''') R('''uri.list <- list() uri.list.match <- list() ez.list <- list() legend.txt <- c() em.output.list <- list() uri.output.list <- list()''') npair = len(input_prefixes) for x, input_prefix in enumerate(input_prefixes): R.load( input_prefix + "-uri.sav" ) R.load( input_prefix + "-em.sav" ) i = x + 1 R( '''uri.output.list[[%(i)i]] <- uri.output; em.output.list[[%(i)i]] <- em.output; # reverse =T for error rate;''' % locals()) R(''' ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1, uri.output.list[[%(i)i]]$data12.enrich$merge2);''' % locals()) R(''' # URI for all peaks uri.list[[%(i)i]] <- uri.output$uri.n; # URI for matched peaks uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt); uri.list.match[[%(i)i]] <- uri.match$uri.n; ''' % locals() ) legend = "%(i)i = %(input_prefix)s" % locals() R(''' legend.txt[%(i)i] <- '%(legend)s'; '''% locals()) R.pdf( output_file ) R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''') R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks"); plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks"); plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6)); plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); legend(0, 1, legend.txt, cex=0.6);''' % locals()) R["dev.off"]()
def run(self): # open(self.outpaths(final=False)["visualization"], "w") edges = load_edges(self.options) cluster = edges.loc[edges["cluster"] == self.cluster] # breakpoints = get_cluster_breakpoints(self.options, self.cluster) from rpy2.robjects import r r.pdf(self.outpaths(final=False)["visualization"]) for sample, dataset in sorted(self.options.iter_10xdatasets()): graphing.plot_frags(cluster, self.options, sample, dataset) for i, row in cluster.iterrows(): plot_triangles(row, self.options) # print "::", breakpoints # graphing.visualize_frag_cluster(breakpoints, self.options) r["dev.off"]()
def visualize_frags(outdir, graphs, options): from rpy2.robjects import r utilities.ensure_dir(outdir) for i, graph in enumerate(graphs): r.pdf(os.path.join(outdir, "fragments.cluster_{}.pdf".format(i))) for component in networkx.connected_components(graph): subgraph = graph.subgraph(component) ends = [node for node,degree in subgraph.degree_iter() if degree==1] breakends = [node for node in list(networkx.shortest_simple_paths(subgraph, ends[0], ends[1]))[0]] # breakends = [breakend_from_label(node) for node in breakends] breakends = breakends[:-1:2] + breakends[-1:] # print ")"*100, breakends for sample, dataset in sorted(options.iter_10xdatasets()): plot_frags(breakends, options, sample, dataset) # plot_frags(breakpoints, options, sample, dataset) r["dev.off"]()
def val(self): """ Estimate value functions with b-splines and compare """ new_data = pd.DataFrame({'OverallRank': np.linspace(1, 194, 1000)}) fit_a = self.spline_est(self.policy_a['value'], new_data) fit_b = self.spline_est(self.policy_b['value'], new_data) r.pdf(os.path.join(os.path.dirname(self.out_dir), 'value.pdf')) r.plot(new_data['OverallRank'], fit_a, type='l', xlab='Rank_M', ylab='V(Rank)') r.lines(new_data['OverallRank'], fit_b, col='red') r.points(self.policy_a['value']['OverallRank'], self.policy_a['value']['val'], col='black') r.points(self.policy_b['value']['OverallRank'], self.policy_b['value']['val'], col='red') r.legend('topright', np.array(['No Info', 'Info']), lty=np.array([1, 1]), col=np.array(['black', 'red'])) r('dev.off()') diff = np.array(fit_b) - np.array(fit_a) r.pdf(os.path.join(os.path.dirname(self.out_dir), 'value_diff.pdf')) r.plot(new_data['OverallRank'], diff, type='l', xlab='Rank', ylab='V(Rank|info=1) - V(Rank|info=0)') r.abline(h=0, lty=2) r('dev.off()') diff = (np.array(fit_b) - np.array(fit_a)) / np.array(fit_a) r.pdf(os.path.join(os.path.dirname(self.out_dir), 'value_percent_diff.pdf')) r.plot(new_data['OverallRank'], diff, type='l', xlab='Rank', ylab='(V(Rank|info=1) - V(Rank|info=0)) / V(Rank|info=0)') r.abline(h=0, lty=2) r('dev.off()') data_path = dirname(dirname(__file__)) data_path = join(data_path, 'data', 'lawData.csv') data = pd.read_csv(data_path) new_data = deepcopy(data.loc[data['year'] == 2013, 'OverallRank']) #new_data = np.concatenate(( # new_data, np.zeros(lc.N_SCHOOLS - len(new_data)) #)) new_data = pd.DataFrame({'OverallRank': np.array(new_data)}) fit_a = self.spline_est(self.policy_a['value'], new_data) fit_b = self.spline_est(self.policy_b['value'], new_data) diff = np.sum(np.array(fit_b) - np.array(fit_a)) pdiff = diff / np.sum(fit_a) print(" - Change in Producer Surplus: {0}".format(diff)) print(" - Percent change in Producer Surplus: {0}".format(pdiff)) return diff
#r.barplot(py2ri(qc_df.num_reads)) # r.plot(py2ri(qc_df)) if __name__ == "__main__": print "Plotting" from rpy2.robjects.lib import ggplot2 from rpy2.robjects import r from rpy2.robjects.packages import importr scales = importr('scales') iris = r('iris') r.pdf("/home/yarden/jaen/Musashi/rtest.pdf") iris_py = pandas.read_csv("/home/yarden/iris.csv") iris_py = iris_py.rename(columns={"Name": "Species"}) corrs = [] from scipy.stats import spearmanr for species in set(iris_py.Species): entries = iris_py[iris_py["Species"] == species] c = spearmanr(entries["SepalLength"], entries["SepalWidth"]) print "c: ", c # compute r.cor(x, y) and divide up by Species # Assume we get a vector of length Species saying what the # correlation is for each Species' Petal Length/Width p = ggplot2.ggplot(iris) + \ ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \