Python par Examples

Programming Language: Python

Namespace/Package Name: rpy2.robjects.r

Method/Function: par

Examples at hotexamples.com: 13

Python par - 13 examples found. These are the top rated real world Python examples of rpy2.robjects.r.par extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: r_tools.py Project: TomLippincott/python

def plot_dendrogram(target, source, env):
    """
    """
    args = source[-1].read()
    mat = npz_to_df(source[0].rstr())
    modules["grDevices"].bitmap(target[0].rstr(), width=50 * mat.nrow, height=50 * mat.nrow, units="px", type="png256")
    #modules["grDevices"].png(target[0].rstr(),  width=50 * mat.nrow, height=50 * mat.nrow, units="px")
    colors = []
    if args.get("DIRECT", False):
        dm = r["as.dist"](mat)
    else:
        dm = r["dist"](mat)
    hc = r["hclust"](dm, method="average")
    d = r["as.dendrogram"](hc)
    r.par(mar=FloatVector([5, 5, 0, 25]))
    r.par(**{"ps" : ((60 * mat.nrow) / mat.nrow) / 2.0, "lwd" : 3})
    r["plot"](d, horiz=True, xlab="Cosine distance")
    modules["grDevices"].dev_off()
    return None

Example #2

Show file

File: plots.py Project: cancerit/pyCRISPRcleanR

 def plot_segments(cbs_fc, cbs_normfc, outdir='./'):
     """
     :param cbs_fc: raw fold chnages
     :param cbs_normfc: normalised fold changes
     :param outdir:
     :return:
     """
     pdf_prm = {'file': "{}/09_Raw_vs_postCRISPRcleanR_segmentation_fold_changes.pdf".format(outdir),
                'width': 7.5, 'height': 7.5}
     grdevices.pdf(**pdf_prm)
     r.par(mfrow=r.c(2, 1))
     for chr_name, (_, _, cnseg_raw) in cbs_fc.items():
         (_, _, cnseg_norm) = cbs_normfc[chr_name]
         plot_prm = {'main': "raw_FCs_chr{}".format(chr_name), 'xlab': 'sgRNA_Index',
                     'ylab': 'FCs'}
         dnacopy.plotSample(cnseg_raw, **plot_prm)
         # plot normalised fold changes
         plot_prm = {'main': "CRISPRcleanR_FCs_chr{}".format(chr_name), 'xlab': 'sgRNA_Index',
                     'ylab': 'FCs'}
         dnacopy.plotSample(cnseg_norm, **plot_prm)
     grdevices.dev_off()

Example #3

Show file

File: r_plot.py Project: yarden/biorpy

def plot_qc_percents(qc_df):
    """
    Plot percentage parts of pipeline QC file.
    """
    # Record NA values as 0
    qc_df = qc_df.fillna(0).set_index("sample")
    r.par(mfrow=np.array([1,2]))
    num_samples = len(qc_df.num_reads)
    r_opts = r.options(scipen=10)
    r.options(r_opts)
    r.par(bty="n", lwd=1.7, lty=2)
    r.dotchart(convert_to_r_matrix(qc_df[["percent_mapped",
                                          "percent_unique",
                                          "percent_ribo"]]),
               xlab="Percent reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
    r.par(bty="n")
    r.dotchart(convert_to_r_matrix(qc_df[["percent_exons",
                                          "percent_cds",
                                          "percent_3p_utr",
                                          "percent_5p_utr",                                          
                                          "percent_introns"]]),
               xlab="Percent reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)

Example #4

Show file

def draw_survival_curves(feature,
                         surv,
                         assignment=None,
                         filename='tmp.png',
                         show=False,
                         title=True,
                         labels=None,
                         colors=['blue', 'red'],
                         ann=None,
                         show_legend=True,
                         q=.25,
                         std=None):
    if assignment is None:
        num_panels = 1
        assignment = feature.map(lambda s: 1)
        name = lambda v: str(feature.name) if feature.name != None else ''
    else:
        num_panels = len(assignment.unique())
        name = lambda v: str(assignment.name) + ' = ' + str(v)
    if (labels is None) and ((len(feature) / feature.nunique()) > 10):
        labels = r.sort(r.c(*feature.unique()))  # R sorts bad
        colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black']
    if feature.dtype == 'bool':
        feature = feature.map({True: 'True', False: 'False'})

    r.png(filename=filename, width=200 * (num_panels + 1), height=300, res=75)

    fmla = robjects.Formula('Surv(days, event) ~ feature')
    r.par(mfrow=r.c(1, num_panels))
    r.par(mar=r.c(4, 5, 4, 1))
    r.par(xpd=True)

    if (get_vec_type(feature) == 'real') and (len(feature.unique()) > 10):
        colors = ['blue', 'orange', 'red']
        if q == .5:
            labels = ['Bottom 50%', 'Top 50%']
        else:
            labels = [
                'Bottom {}%'.format(int(q * 100)), 'Normal',
                'Top {}%'.format(int(q * 100))
            ]

    ls = r.c(*colors)

    def plot_me(sub_f, label):
        if (get_vec_type(sub_f) == 'real') and (len(sub_f.unique()) > 10):
            sub_f = to_quants(sub_f, q=q, std=std)

        m = get_cox_ph(surv, sub_f, formula=fmla)
        r_data = m.rx2('call')[2]
        p = log_rank(sub_f, surv)['p']
        ls = r.c(*colors)

        r.plot(survival.survfit(fmla, r_data),
               lty=1,
               col=ls,
               lwd=4,
               cex=1.25,
               xlab='Years to Event',
               ylab='Survival')
        r.title(label, cex=3.)
        if ann == 'p':
            r.text(.2, 0, labels='logrank p = {0:.1e}'.format(p), pos=4)
        elif ann != None:
            r.text(0, labels=ann, pos=4)

    if show_legend == 'out':
        r.par(xpd=True, mar=r.c(4, 5, 5, 8))
    for value in sorted(assignment.ix[feature.index].dropna().unique()):
        f = feature.ix[assignment[assignment == value].index]
        if len(f.unique()) > 1:
            plot_me(f, name(value))

    if show_legend == True:
        mean_s = surv.ix[:, 'event'].ix[assignment[assignment ==
                                                   value].index].mean()
        if mean_s < .5:
            r.legend(surv.ix[:, 'days'].max() * .05 / 365.,
                     .45,
                     labels,
                     lty=1,
                     col=ls,
                     lwd=3,
                     bty='o')
        else:
            r.legend(surv.ix[:, 'days'].max() * .4 / 365,
                     .9,
                     labels,
                     lty=1,
                     col=ls,
                     lwd=3,
                     bty='o')
    elif show_legend == 'out':
        r.legend(surv.ix[:, 'days'].max() * 1.1 / 365,
                 .9,
                 labels,
                 lty=1,
                 col=ls,
                 lwd=3,
                 bty='o')
    r('dev.off()')
    if show:
        return Show(filename)

Example #5

Show file

File: R_Wrappers.py Project: Krysia/TCGA

def draw_survival_curves(feature, surv, assignment=None, filename='tmp.png', show=False,
                        title=True, labels=None, colors=['blue', 'red'], ann=None,
                        show_legend=True, q=.25, std=None):
    if assignment is None:
        num_panels = 1
        assignment = feature.map(lambda s: 1)
        name = lambda v: str(feature.name) if feature.name != None else ''
    else:
        num_panels = len(assignment.unique())
        name = lambda v: str(assignment.name) + ' = ' + str(v)
    if (labels is None) and ((len(feature) / feature.nunique()) > 10):
        labels = r.sort(r.c(*feature.unique()))  # R sorts bad
        colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black']
    if feature.dtype == 'bool':
        feature = feature.map({True: 'True', False: 'False'})
        
    r.png(filename=filename, width=200 * (num_panels + 1), height=300, res=75)
        
    fmla = robjects.Formula('Surv(days, event) ~ feature')
    r.par(mfrow=r.c(1, num_panels))
    r.par(mar=r.c(4, 5, 4, 1))
    r.par(xpd=True)
    
    if (get_vec_type(feature) == 'real') and (len(feature.unique()) > 10):
        colors = ['blue', 'orange', 'red']
        if q == .5:
            labels = ['Bottom 50%', 'Top 50%']
        else:
            labels = ['Bottom {}%'.format(int(q * 100)), 'Normal', 'Top {}%'.format(int(q * 100))]
            
    ls = r.c(*colors)
    
    def plot_me(sub_f, label):
        if (get_vec_type(sub_f) == 'real') and (len(sub_f.unique()) > 10):
            sub_f = to_quants(sub_f, q=q, std=std)
            
        m = get_cox_ph(surv, sub_f, formula=fmla)
        r_data = m.rx2('call')[2]
        p = log_rank(sub_f, surv)['p']
        ls = r.c(*colors)
        
        r.plot(survival.survfit(fmla, r_data), lty=1, col=ls, lwd=4, cex=1.25,
                                xlab='Years to Event', ylab='Survival');
        r.title(label, cex=3.)
        if ann == 'p':
            r.text(.2, 0, labels='logrank p = {0:.1e}'.format(p), pos=4)
        elif ann != None:
            r.text(0, labels=ann, pos=4)

    if show_legend == 'out':  
        r.par(xpd=True, mar=r.c(4, 5, 5, 8))
    for value in sorted(assignment.ix[feature.index].dropna().unique()):
        f = feature.ix[assignment[assignment == value].index]
        if len(f.unique()) > 1:
            plot_me(f, name(value))

    if show_legend == True:
        mean_s = surv.ix[:, 'event'].ix[assignment[assignment == value].index].mean()
        if mean_s < .5:
            r.legend(surv.ix[:, 'days'].max() * .05 / 365., .45, labels,
                     lty=1, col=ls, lwd=3, bty='o')
        else:
            r.legend(surv.ix[:, 'days'].max() * .4 / 365, .9, labels,
                     lty=1, col=ls, lwd=3, bty='o')
    elif show_legend == 'out':
        r.legend(surv.ix[:, 'days'].max() * 1.1 / 365, .9, labels,
                     lty=1, col=ls, lwd=3, bty='o')
    r('dev.off()')
    if show:
        return Show(filename)

Example #6

Show file

File: r_table2scatter.py Project: CGATOxford/cgat

def main(argv=None):

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: r_table2scatter.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-c", "--columns", dest="columns", type="string",
                      help="columns to take from table. Choices are 'all', 'all-but-first' or a ','-separated list of columns.")

    parser.add_option("--logscale", dest="logscale", type="string",
                      help="log-transform one or both axes [default=%Default].")

    parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string",
                      help="write hardcopy to file [default=%default].",
                      metavar="FILE")

    parser.add_option("-f", "--file", dest="input_filename", type="string",
                      help="filename with table data [default=%default].",
                      metavar="FILE")

    parser.add_option("-2", "--file2", dest="input_filename2", type="string",
                      help="additional data file [default=%default].",
                      metavar="FILE")

    parser.add_option("-s", "--stats", dest="statistics", type="choice",
                      choices=("correlation", "spearman", "pearson", "count"),
                      help="statistical quantities to compute [default=%default]",
                      action="append")

    parser.add_option("-p", "--plot", dest="plot", type="choice",
                      choices=("scatter", "pairs", "panel", "bar", "bar-stacked",
                               "bar-besides", "1_vs_x", "matched", "boxplot", "scatter+marginal",
                               "scatter-regression"),
                      help="plots to plot [default=%default]",
                      action="append")

    parser.add_option("-t", "--threshold", dest="threshold", type="float",
                      help="min threshold to use for counting method [default=%default].")

    parser.add_option("-o", "--colours", dest="colours", type="int",
                      help="column with colour information [default=%default].")

    parser.add_option("-l", "--plot-labels", dest="labels", type="string",
                      help="column labels for x and y in matched plots [default=%default].")

    parser.add_option("-d", "--add-diagonal", dest="add_diagonal", action="store_true",
                      help="add diagonal to plot [default=%default].")

    parser.add_option("-e", "--plot-legend", dest="legend", type="int",
                      help="column with legend [default=%default].")

    parser.add_option("-r", "--options", dest="r_options", type="string",
                      help="R plotting options [default=%default].")

    parser.add_option("--format", dest="format", type="choice",
                      choices=("full", "sparse"),
                      help="output format [default=%default].")

    parser.add_option("--title", dest="title", type="string",
                      help="""plot title [default=%default].""")

    parser.add_option("", "--xrange", dest="xrange", type="string",
                      help="x viewing range of plot [default=%default].")

    parser.add_option("", "--yrange", dest="yrange", type="string",
                      help="y viewing range of plot[default=%default].")

    parser.add_option("--allow-empty-file", dest="fail_on_empty", action="store_false",
                      help="do not fail on empty input [default=%default].")

    parser.add_option("--fail-on-empty", dest="fail_on_empty", action="store_true",
                      help="fail on empty input [default=%default].")

    parser.set_defaults(
        hardcopy=None,
        input_filename="",
        input_filename2=None,
        columns="all",
        logscale=None,
        statistics=[],
        plot=[],
        threshold=0.0,
        labels="x,y",
        colours=None,
        diagonal=False,
        legend=None,
        title=None,
        xrange=None,
        yrange=None,
        r_options="",
        fail_on_empty=True,
        format="full")

    (options, args) = E.Start(parser)

    if len(args) == 1 and not options.input_filename:
        options.input_filename = args[0]

    if options.columns not in ("all", "all-but-first"):
        options.columns = [int(x) - 1 for x in options.columns.split(",")]

    if options.colours:
        options.colours -= 1
    if options.legend:
        options.legend -= 1

    table = {}
    headers = []

    # read data matrix
    if options.input_filename:
        lines = IOTools.openFile(options.input_filename, "r").readlines()
    else:
        # note: this will not work for interactive viewing, but
        # creating hardcopy plots works.
        lines = sys.stdin.readlines()

    lines = [x for x in lines if x[0] != "#"]

    if len(lines) == 0:
        if options.fail_on_empty:
            raise IOError("no input")
        E.warn("empty input")
        E.Stop()
        return

    matrix, headers, colours, legend = readTable(lines,
                                                 "matrix",
                                                 take_columns=options.columns,
                                                 headers=True,
                                                 colours=options.colours,
                                                 row_names=options.legend)

    if options.input_filename2:
        # read another matrix (should be of the same format.
        matrix2, headers2, colours2, legend2 = readTable(
            lines,
            "matrix2",
            take_columns=options.columns,
            headers=True,
            colours=options.colours,
            row_names=options.legend)

    R.assign("headers", headers)

    ndata = R("""length( matrix[,1] )""")[0]

    if options.loglevel >= 1:
        options.stdlog.write("# read matrix: %ix%i\n" % (len(headers), ndata))

    if colours:
        R.assign("colours", colours)

    for method in options.statistics:

        if method == "correlation":
            cor = R.cor(matrix, use="pairwise.complete.obs")
            writeMatrix(sys.stdout, cor, headers=headers, format="%5.2f")

        elif method == "pearson":
            options.stdout.write("\t".join(("var1",
                                            "var2",
                                            "coeff",
                                            "passed",
                                            "pvalue",
                                            "n",
                                            "method",
                                            "alternative")) + "\n")
            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    try:
                        result = R(
                            """cor.test( matrix[,%i], matrix[,%i] )""" % (x + 1, y + 1))
                    except rpy.RPyException as msg:
                        E.warn("correlation not computed for columns %i(%s) and %i(%s): %s" % (
                            x, headers[x], y, headers[y], msg))
                        options.stdout.write("%s\t%s\t%s\t%s\t%s\t%i\t%s\t%s\n" %
                                             (headers[x], headers[y],
                                              "na",
                                              "na",
                                              "na",
                                              0,
                                              "na",
                                              "na"))

                    else:
                        options.stdout.write(
                            "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                            (headers[x], headers[y],
                             result.rx2('estimate').rx2(
                                 'cor')[0],
                             Stats.getSignificance(
                                 float(result.rx2('p.value')[0])),
                             result.rx2('p.value')[0],
                             result.rx2('parameter').rx2(
                                 'df')[0],
                             result.rx2('method')[0],
                             result.rx2('alternative')[0]))

        elif method == "spearman":
            options.stdout.write("\t".join(("var1", "var2",
                                            "coeff",
                                            "passed",
                                            "pvalue",
                                            "method",
                                            "alternative")) + "\n")
            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    result = R(
                        """cor.test( matrix[,%i], matrix[,%i], method='spearman')""" % (x + 1, y + 1))
                    options.stdout.write(
                        "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                        (headers[x], headers[y],
                         result['estimate']['rho'],
                         Stats.getSignificance(float(result['p.value'])),
                         result['p.value'],
                         result['parameter']['df'],
                         result['method'],
                         result['alternative']))

        elif method == "count":
            # number of shared elements > threshold
            m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"),
                                             take=options.columns,
                                             headers=True)
            mask = numpy.greater(m, options.threshold)
            counts = numpy.dot(numpy.transpose(mask), mask)
            writeMatrix(options.stdout, counts, headers=c, format="%i")

    if options.plot:

        # remove columns that are completely empty
        if "pairs" in options.plot:
            colsums = R('''colSums( is.na(matrix ))''')
            take = [x for x in range(len(colsums)) if colsums[x] != ndata]
            if take:
                E.warn("removing empty columns %s before plotting" % str(take))
                matrix = R.subset(matrix, select=[x + 1 for x in take])
                R.assign("""matrix""", matrix)
                headers = [headers[x] for x in take]
                if legend:
                    legend = [headers[x] for x in take]

        if options.r_options:
            extra_options = ", %s" % options.r_options
        else:
            extra_options = ""

        if options.legend is not None and len(legend):
            extra_options += ", legend=c('%s')" % "','".join(legend)

        if options.labels:
            xlabel, ylabel = options.labels.split(",")
            extra_options += ", xlab='%s', ylab='%s'" % (xlabel, ylabel)
        else:
            xlabel, ylabel = "", ""

        if options.colours:
            extra_options += ", col=colours"

        if options.logscale:
            extra_options += ", log='%s'" % options.logscale

        if options.xrange:
            extra_options += ", xlim=c(%f,%f)" % tuple(
                map(float, options.xrange.split(",")))

        if options.yrange:
            extra_options += ", ylim=c(%f,%f)" % tuple(
                map(float, options.yrange.split(",")))

        if options.hardcopy:
            if options.hardcopy.endswith(".eps"):
                R.postscript(options.hardcopy)
            elif options.hardcopy.endswith(".png"):
                R.png(options.hardcopy, width=1024, height=768, type="cairo")
            elif options.hardcopy.endswith(".jpg"):
                R.jpg(options.hardcopy, width=1024, height=768, type="cairo")

        for method in options.plot:

            if ndata < 100:
                point_size = "1"
                pch = "o"
            elif ndata < 1000:
                point_size = "1"
                pch = "o"
            else:
                point_size = "0.5"
                pch = "."

            if method == "scatter":
                R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" % (
                    point_size, extra_options))

            if method == "scatter-regression":
                R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" % (
                    point_size, extra_options))
                dat = R(
                    """dat <- data.frame(x = matrix[,1], y = matrix[,2])""")
                R(
                    """new <- data.frame(x = seq( min(matrix[,1]), max(matrix[,1]), (max(matrix[,1]) - min(matrix[,1])) / 100))""")
                mod = R("""mod <- lm( y ~ x, dat)""")
                R("""predict(mod, new, se.fit = TRUE)""")
                R("""pred.w.plim <- predict(mod, new, interval="prediction")""")
                R("""pred.w.clim <- predict(mod, new, interval="confidence")""")
                R(
                    """matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""")
                R.mtext(
                    "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"],
                                                        mod["coefficients"][
                                                            "(Intercept)"],
                                                        R("""cor( dat )[2]"""),
                                                        ndata),
                    3,
                    cex=1.0)

            elif method == "pairs":
                if options.add_diagonal:
                    R(
                        """panel.hist <- function( x,y,...  ) { points(x,y,...); abline(0,1); }""")
                else:
                    R(
                        """panel.hist <- function( x,y,...  ) { points(x,y,...); }""")

                # There used to be a argument na_action="na.omit", but
                # removed this as there appeared error messages saying
                # "na.action is not a graphical parameter" and the
                # plots showed occasionally the wrong scale.
                # cex=point_size also caused trouble (error message:
                # "X11 used font size 8 when 2 was requested" or
                # similar)
                if options.colours:
                    R.pairs(matrix,
                            pch=pch,
                            col=colours,
                            main=options.title,
                            panel="panel.hist",
                            labels=headers,
                            cex_labels=2.0)
                else:
                    R.pairs(matrix,
                            pch=pch,
                            panel="panel.hist",
                            main=options.title,
                            labels=headers,
                            cex_labels=2.0)

            elif method == "boxplot":
                extra_options += ",main='%s'" % options.title

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R(
                        """op <- par(mar=c(11,4,4,2))""")  # the 10 allows the names.arg below the barplot

                R("""boxplot( matrix %s)""" % extra_options)

            elif method == "bar" or method == "bar-stacked":
                if not options.colours:
                    extra_options += ", col=rainbow(5)"

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R(
                        """op <- par(mar=c(11,4,4,2))""")  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), %s)""" % extra_options)

            elif method == "bar-besides":
                if not options.colours:
                    extra_options += ", col=rainbow(%i)" % ndata

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R(
                        """op <- par(mar=c(11,4,4,2))""")  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), beside=TRUE %s)""" %
                  extra_options)

            elif method == "scatter+marginal":

                if options.title:
                    # set the size of the outer margins - the title needs to be added at the end
                    # after plots have been created
                    R.par(oma=R.c(0, 0, 4, 0))

                R("""matrix""")
                R("""
x <- matrix[,1];
y <- matrix[,2];
xhist <- hist(x, breaks=20, plot=FALSE);
yhist <- hist(y, breaks=20, plot=FALSE);
top <- max(c(xhist$counts, yhist$counts));
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE );
par(mar=c(3,3,1,1)) ;
plot(x, y, cex=%s, pch="o" %s) ;
par(mar=c(0,3,1,1)) ;
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ;
par(mar=c(3,0,1,1)) ;
title(main='%s');
barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE ) ;
title(main='%s');
""" % (point_size, extra_options, xlabel, ylabel))

                if options.title:
                    R.mtext(options.title, 3, outer=True, line=1, cex=1.5)

            elif method in ("panel", "1_vs_x", "matched"):

                if method == "panel":
                    pairs = []
                    for x in range(len(headers) - 1):
                        for y in range(x + 1, len(headers)):
                            pairs.append((x, y))

                elif method == "1_vs_x":
                    pairs = []
                    for x in range(1, len(headers)):
                        pairs.append((0, x))

                # print matching columns
                elif method == "matched":
                    pairs = []
                    for x in range(len(headers) - 1):
                        for y in range(x + 1, len(headers)):
                            if headers[x] == headers[y]:
                                pairs.append((x, y))
                                break

                w = int(math.ceil(math.sqrt(len(pairs))))
                h = int(math.ceil(float(len(pairs)) / w))

                PosInf = 1e300000
                NegInf = -1e300000

                xlabel, ylabel = options.labels.split(",")

                R("""layout(matrix(seq(1,%i), %i, %i, byrow = TRUE))""" %
                  (w * h, w, h))
                for a, b in pairs:
                    new_matrix = [x for x in zip(
                        list(matrix[a].values())[0],
                        list(matrix[b].values())[0])
                                  if x[0] not in (float("nan"), PosInf, NegInf) and
                                  x[1] not in (float("nan"), PosInf, NegInf)]
                    try:
                        R("""plot(matrix[,%i], matrix[,%i], main='%s versus %s', cex=0.5, pch=".", xlab='%s', ylab='%s' )""" % (
                            a + 1, b + 1, headers[b], headers[a], xlabel, ylabel))
                    except rpy.RException as msg:
                        print("could not plot %s versus %s: %s" % (headers[b], headers[a], msg))

        if options.hardcopy:
            R['dev.off']()

    E.info("matrix added as >matrix< in R.")

    if not options.hardcopy:
        if options.input_filename:
            interpreter = code.InteractiveConsole(globals())
            interpreter.interact()
        else:
            E.info(
                "can not start new interactive session as input has come from stdin.")

    E.Stop()

Example #7

Show file

File: r_compare_distributions.py Project: CGATOxford/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: r_compare_distributions.py 2782 2009-09-10 11:40:29Z andreas $")

    parser.add_option("-m", "--method", dest="method", type="choice",
                      help="method to use: ks=Kolmogorov-Smirnov, mwu=Mann-WhitneyU, shapiro=Shapiro-Wilk, paired-mwu=paired Mann-WhitneyU, paired-t=paired t-test [default=%default]",
                      choices=("ks", "mwu", "shapiro", "paired-mwu", "paired-t"))
    parser.add_option("-a", "--hardcopy", dest="hardcopy", type="string",
                      help="write hardcopy to file.", metavar="FILE")
    parser.add_option("-1", "--infile1", dest="filename_input1", type="string",
                      help="input filename for distribution 1.")
    parser.add_option("-2", "--infile2", dest="filename_input2", type="string",
                      help="input filename for distribution 2.")
    parser.add_option("--plot-legend", dest="legend", type="string",
                      help="legend for histograms.""")
    parser.add_option("-f", "--infile-map", dest="filename_input_map", type="string",
                      help="input filename for mapping categories to values.")
    parser.add_option("-n", "--norm-test", dest="norm_test", action="store_true",
                      help="""test if a set of values is normally distributed. Mean and variance
                       are calculated from the data.""")
    parser.add_option("-b", "--num-bins", dest="num_bins", type="int",
                      help="""number of bins (for plotting purposes only).""")
    parser.add_option("--bin-size", dest="bin_size", type="float",
                      help="""bin size for plot.""")
    parser.add_option("--min-value", dest="min_value", type="float",
                      help="""minimum_value for plot.""")
    parser.add_option("--max-value", dest="max_value", type="float",
                      help="""maximum_value for plot.""")
    parser.add_option("--skip-plot", dest="plot", action="store_false",
                      help="""skipping plotting.""")
    parser.add_option("--header-names", dest="header", type="string",
                      help="""header of value column [default=%default].""")
    parser.add_option("--title", dest="title", type="string",
                      help="""plot title [default=%default].""")

    parser.set_defaults(
        method="ks",
        filename_input1=None,
        filename_input2=None,
        filename_input_map=None,
        legend=None,
        norm_test=False,
        num_bins=0,
        legend_range="2,2",
        bin_size=None,
        min_value=None,
        plot=True,
        header="value",
        title=None,
    )

    (options, args) = E.Start(parser,
                              add_pipe_options=True)

    kwargs = {}
    xargs = []
    for arg in args:
        if "=" in arg:
            key, value = arg.split("=")
            kwargs[key] = value
        else:
            xargs.append(arg)

    if options.legend:
        options.legend = options.legend.split(",")

    map_category2value = {}
    if options.filename_input_map:
        map_category2value = IOTools.ReadMap(open(options.filename_input_map, "r"),
                                             map_functions=(str, float))
        f = str
    else:
        f = float

    if options.filename_input1:
        infile1 = IOTools.openFile(options.filename_input1, "r")
    else:
        infile1 = sys.stdin

    values1, errors1 = IOTools.ReadList(infile1,
                                        map_function=f,
                                        map_category=map_category2value)

    if options.filename_input1:
        infile1.close()

    if errors1 and options.loglevel >= 3:
        options.stdlog.write("# errors in input1: %s\n" %
                             ";".join(map(str, errors1)))

    if options.norm_test:
        mean = R.mean(values1)
        stddev = R.sd(values1)
        options.stdlog.write("# creating %i samples from normal distribution with mean %f and stddev %f\n" % (
            len(values1), mean, stddev))

        values2 = R.rnorm(len(values1), mean, stddev)
        errors2 = ()
    else:
        values2, errors2 = IOTools.ReadList(open(options.filename_input2, "r"),
                                            map_function=f,
                                            map_category=map_category2value)

    if errors2 and options.loglevel >= 3:
        options.stdlog.write("# errors in input2: %s\n" %
                             ";".join(map(str, errors2)))

    if options.loglevel >= 1:
        options.stdlog.write("# ninput1=%i, nerrors1=%i, ninput2=%i, nerrors2=%i\n" % (len(values1), len(errors1),
                                                                                       len(values2), len(errors2)))

    if options.method in ("paired-mwu", "paired-t"):
        if len(values1) != len(values2):
            raise ValueError(
                "number of values must be equal for paired tests.")

    if options.hardcopy:
        R.png(options.hardcopy, width=1024, height=768)

    if options.method == "ks":
        result = R.ks_test(values1, values2, *xargs, **kwargs)
    elif options.method == "mwu":
        result = R.wilcox_test(
            values1, values2, paired=False, correct=True, *xargs, **kwargs)
    elif options.method == "paired-mwu":
        result = R.wilcox_test(
            values1, values2, paired=True, correct=True, *xargs, **kwargs)
    elif options.method == "paired-t":
        result = R.t_test(values1, values2, paired=True, *xargs, **kwargs)
    elif options.method == "shapiro":
        if len(values1) > 5000:
            E.warn(
                "shapiro-wilk test only accepts < 5000 values, a random sample has been created.")
            values1 = random.sample(values1, 5000)
        result = R.shapiro_test(values1, *xargs, **kwargs)

    if options.plot:
        R.assign("v1", values1)
        R.assign("v2", values2)

        if options.title:
            # set the size of the outer margins - the title needs to be added at the end
            # after plots have been created
            R.par(oma=R.c(0, 0, 4, 0))

        R.layout(R.matrix((1, 2, 3, 4), 2, 2, byrow=True))

        R.boxplot(values1, values2, col=('white', 'red'), main="Boxplot")
        R("""qqplot( v1, v2, main ='Quantile-quantile plot' ); lines( c(0,1), c(0,1) );""")

        # compute breaks:

        min_value = min(min(values1), min(values2))
        if options.min_value is not None:
            min_value = min(min_value, options.min_value)

        max_value = max(max(values1), max(values2))
        if options.max_value is not None:
            max_value = max(max_value, options.max_value)

        extra_options = ""
        if options.num_bins and not (options.min_value or options.max_value):
            extra_options += ", breaks=%i" % options.num_bins

        elif options.num_bins and (options.min_value or options.max_value):
            bin_size = float((max_value - min_value)) / (options.num_bins + 1)
            breaks = [
                min_value + x * bin_size for x in range(options.num_bins)]
            extra_options += ", breaks=c(%s)" % ",".join(map(str, breaks))

        elif options.bin_size is not None:
            num_bins = int(((max_value - min_value) / options.bin_size)) + 1
            breaks = [
                min_value + x * options.bin_size for x in range(num_bins + 1)]
            extra_options += ", breaks=c(%s)" % ",".join(map(str, breaks))

        R("""h1 <- hist( v1, freq=FALSE,           density=20, main='Relative frequency histogram' %s)""" %
          extra_options)
        R("""h2 <- hist( v2, freq=FALSE, add=TRUE, density=20, col='red', offset=0.5, angle=135 %s)""" %
          extra_options)
        if options.legend:
            R("""legend( ( max(c(h1$breaks[-1], h2$breaks[-1])) - min(c(h1$breaks[1], h2$breaks[1]) ) ) / 2,
            max( max(h1$density), max(h2$density)) / 2, c('%s'), fill=c('white','red'))""" % (
                "','".join(options.legend)))

        R("""h1 <- hist( v1, freq=TRUE,            density=20, main='Absolute frequency histogram' %s)""" %
          extra_options)
        R("""h2 <- hist( v2, freq=TRUE,  add=TRUE, density=20, col='red', offset=0.5, angle=135 %s )""" %
          extra_options)
        if options.legend:
            R("""legend( ( max(c(h1$breaks[-1], h2$breaks[-1])) - min(c(h1$breaks[1], h2$breaks[1]) ) ) / 2,
            max( max(h1$counts), max(h2$counts)) / 2, c('%s'), fill=c('white','red'))""" % (
                "','".join(options.legend)))

        if options.title:
            R.mtext(options.title, 3, outer=True, line=1, cex=1.5)

    if options.loglevel >= 1:
        options.stdout.write("## Results for %s\n" % result['method'])

    options.stdout.write("%s\t%s\n" % ("key", options.header))

    for key in list(result.keys()):
        if key == "data.name":
            continue
        options.stdout.write("\t".join((key, str(result[key]))) + "\n")

    stat = Stats.Summary(values1)
    for key, value in list(stat.items()):
        options.stdout.write("%s1\t%s\n" % (str(key), str(value)))

    stat = Stats.Summary(values2)
    for key, value in list(stat.items()):
        options.stdout.write("%s2\t%s\n" % (str(key), str(value)))

    if options.plot:
        if options.hardcopy:
            R.dev_off()

    E.Stop()

Example #8

Show file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: r_compare_distributions.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option(
        "-m",
        "--method",
        dest="method",
        type="choice",
        help=
        "method to use: ks=Kolmogorov-Smirnov, mwu=Mann-WhitneyU, shapiro=Shapiro-Wilk, paired-mwu=paired Mann-WhitneyU, paired-t=paired t-test [default=%default]",
        choices=("ks", "mwu", "shapiro", "paired-mwu", "paired-t"))
    parser.add_option("-a",
                      "--hardcopy",
                      dest="hardcopy",
                      type="string",
                      help="write hardcopy to file.",
                      metavar="FILE")
    parser.add_option("-1",
                      "--infile1",
                      dest="filename_input1",
                      type="string",
                      help="input filename for distribution 1.")
    parser.add_option("-2",
                      "--infile2",
                      dest="filename_input2",
                      type="string",
                      help="input filename for distribution 2.")
    parser.add_option("--plot-legend",
                      dest="legend",
                      type="string",
                      help="legend for histograms."
                      "")
    parser.add_option("-f",
                      "--infile-map",
                      dest="filename_input_map",
                      type="string",
                      help="input filename for mapping categories to values.")
    parser.add_option(
        "-n",
        "--norm-test",
        dest="norm_test",
        action="store_true",
        help=
        """test if a set of values is normally distributed. Mean and variance
                       are calculated from the data.""")
    parser.add_option("-b",
                      "--num-bins",
                      dest="num_bins",
                      type="int",
                      help="""number of bins (for plotting purposes only).""")
    parser.add_option("--bin-size",
                      dest="bin_size",
                      type="float",
                      help="""bin size for plot.""")
    parser.add_option("--min-value",
                      dest="min_value",
                      type="float",
                      help="""minimum_value for plot.""")
    parser.add_option("--max-value",
                      dest="max_value",
                      type="float",
                      help="""maximum_value for plot.""")
    parser.add_option("--skip-plot",
                      dest="plot",
                      action="store_false",
                      help="""skipping plotting.""")
    parser.add_option("--header-names",
                      dest="header",
                      type="string",
                      help="""header of value column [default=%default].""")
    parser.add_option("--title",
                      dest="title",
                      type="string",
                      help="""plot title [default=%default].""")

    parser.set_defaults(
        method="ks",
        filename_input1=None,
        filename_input2=None,
        filename_input_map=None,
        legend=None,
        norm_test=False,
        num_bins=0,
        legend_range="2,2",
        bin_size=None,
        min_value=None,
        plot=True,
        header="value",
        title=None,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    kwargs = {}
    xargs = []
    for arg in args:
        if "=" in arg:
            key, value = arg.split("=")
            kwargs[key] = value
        else:
            xargs.append(arg)

    if options.legend:
        options.legend = options.legend.split(",")

    map_category2value = {}
    if options.filename_input_map:
        map_category2value = IOTools.ReadMap(open(options.filename_input_map,
                                                  "r"),
                                             map_functions=(str, float))
        f = str
    else:
        f = float

    if options.filename_input1:
        infile1 = IOTools.openFile(options.filename_input1, "r")
    else:
        infile1 = sys.stdin

    values1, errors1 = IOTools.ReadList(infile1,
                                        map_function=f,
                                        map_category=map_category2value)

    if options.filename_input1:
        infile1.close()

    if errors1 and options.loglevel >= 3:
        options.stdlog.write("# errors in input1: %s\n" %
                             ";".join(map(str, errors1)))

    if options.norm_test:
        mean = R.mean(values1)
        stddev = R.sd(values1)
        options.stdlog.write(
            "# creating %i samples from normal distribution with mean %f and stddev %f\n"
            % (len(values1), mean, stddev))

        values2 = R.rnorm(len(values1), mean, stddev)
        errors2 = ()
    else:
        values2, errors2 = IOTools.ReadList(open(options.filename_input2, "r"),
                                            map_function=f,
                                            map_category=map_category2value)

    if errors2 and options.loglevel >= 3:
        options.stdlog.write("# errors in input2: %s\n" %
                             ";".join(map(str, errors2)))

    if options.loglevel >= 1:
        options.stdlog.write(
            "# ninput1=%i, nerrors1=%i, ninput2=%i, nerrors2=%i\n" %
            (len(values1), len(errors1), len(values2), len(errors2)))

    if options.method in ("paired-mwu", "paired-t"):
        if len(values1) != len(values2):
            raise ValueError(
                "number of values must be equal for paired tests.")

    if options.hardcopy:
        R.png(options.hardcopy, width=1024, height=768)

    if options.method == "ks":
        result = R.ks_test(values1, values2, *xargs, **kwargs)
    elif options.method == "mwu":
        result = R.wilcox_test(values1,
                               values2,
                               paired=False,
                               correct=True,
                               *xargs,
                               **kwargs)
    elif options.method == "paired-mwu":
        result = R.wilcox_test(values1,
                               values2,
                               paired=True,
                               correct=True,
                               *xargs,
                               **kwargs)
    elif options.method == "paired-t":
        result = R.t_test(values1, values2, paired=True, *xargs, **kwargs)
    elif options.method == "shapiro":
        if len(values1) > 5000:
            E.warn(
                "shapiro-wilk test only accepts < 5000 values, a random sample has been created."
            )
            values1 = random.sample(values1, 5000)
        result = R.shapiro_test(values1, *xargs, **kwargs)

    if options.plot:
        R.assign("v1", values1)
        R.assign("v2", values2)

        if options.title:
            # set the size of the outer margins - the title needs to be added at the end
            # after plots have been created
            R.par(oma=R.c(0, 0, 4, 0))

        R.layout(R.matrix((1, 2, 3, 4), 2, 2, byrow=True))

        R.boxplot(values1, values2, col=('white', 'red'), main="Boxplot")
        R("""qqplot( v1, v2, main ='Quantile-quantile plot' ); lines( c(0,1), c(0,1) );"""
          )

        # compute breaks:

        min_value = min(min(values1), min(values2))
        if options.min_value is not None:
            min_value = min(min_value, options.min_value)

        max_value = max(max(values1), max(values2))
        if options.max_value is not None:
            max_value = max(max_value, options.max_value)

        extra_options = ""
        if options.num_bins and not (options.min_value or options.max_value):
            extra_options += ", breaks=%i" % options.num_bins

        elif options.num_bins and (options.min_value or options.max_value):
            bin_size = float((max_value - min_value)) / (options.num_bins + 1)
            breaks = [
                min_value + x * bin_size for x in range(options.num_bins)
            ]
            extra_options += ", breaks=c(%s)" % ",".join(map(str, breaks))

        elif options.bin_size is not None:
            num_bins = int(((max_value - min_value) / options.bin_size)) + 1
            breaks = [
                min_value + x * options.bin_size for x in range(num_bins + 1)
            ]
            extra_options += ", breaks=c(%s)" % ",".join(map(str, breaks))

        R("""h1 <- hist( v1, freq=FALSE,           density=20, main='Relative frequency histogram' %s)"""
          % extra_options)
        R("""h2 <- hist( v2, freq=FALSE, add=TRUE, density=20, col='red', offset=0.5, angle=135 %s)"""
          % extra_options)
        if options.legend:
            R("""legend( ( max(c(h1$breaks[-1], h2$breaks[-1])) - min(c(h1$breaks[1], h2$breaks[1]) ) ) / 2,
            max( max(h1$density), max(h2$density)) / 2, c('%s'), fill=c('white','red'))"""
              % ("','".join(options.legend)))

        R("""h1 <- hist( v1, freq=TRUE,            density=20, main='Absolute frequency histogram' %s)"""
          % extra_options)
        R("""h2 <- hist( v2, freq=TRUE,  add=TRUE, density=20, col='red', offset=0.5, angle=135 %s )"""
          % extra_options)
        if options.legend:
            R("""legend( ( max(c(h1$breaks[-1], h2$breaks[-1])) - min(c(h1$breaks[1], h2$breaks[1]) ) ) / 2,
            max( max(h1$counts), max(h2$counts)) / 2, c('%s'), fill=c('white','red'))"""
              % ("','".join(options.legend)))

        if options.title:
            R.mtext(options.title, 3, outer=True, line=1, cex=1.5)

    if options.loglevel >= 1:
        options.stdout.write("## Results for %s\n" % result['method'])

    options.stdout.write("%s\t%s\n" % ("key", options.header))

    for key in list(result.keys()):
        if key == "data.name":
            continue
        options.stdout.write("\t".join((key, str(result[key]))) + "\n")

    stat = Stats.Summary(values1)
    for key, value in list(stat.items()):
        options.stdout.write("%s1\t%s\n" % (str(key), str(value)))

    stat = Stats.Summary(values2)
    for key, value in list(stat.items()):
        options.stdout.write("%s2\t%s\n" % (str(key), str(value)))

    if options.plot:
        if options.hardcopy:
            R.dev_off()

    E.Stop()

Example #9

Show file

File: r_table2scatter.py Project: wangdi2014/cgat

def main(argv=None):

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: r_table2scatter.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option(
        "-c",
        "--columns",
        dest="columns",
        type="string",
        help=
        "columns to take from table. Choices are 'all', 'all-but-first' or a ','-separated list of columns."
    )

    parser.add_option(
        "--logscale",
        dest="logscale",
        type="string",
        help="log-transform one or both axes [default=%Default].")

    parser.add_option("-a",
                      "--hardcopy",
                      dest="hardcopy",
                      type="string",
                      help="write hardcopy to file [default=%default].",
                      metavar="FILE")

    parser.add_option("-f",
                      "--file",
                      dest="input_filename",
                      type="string",
                      help="filename with table data [default=%default].",
                      metavar="FILE")

    parser.add_option("-2",
                      "--file2",
                      dest="input_filename2",
                      type="string",
                      help="additional data file [default=%default].",
                      metavar="FILE")

    parser.add_option(
        "-s",
        "--stats",
        dest="statistics",
        type="choice",
        choices=("correlation", "spearman", "pearson", "count"),
        help="statistical quantities to compute [default=%default]",
        action="append")

    parser.add_option("-p",
                      "--plot",
                      dest="plot",
                      type="choice",
                      choices=("scatter", "pairs", "panel", "bar",
                               "bar-stacked", "bar-besides", "1_vs_x",
                               "matched", "boxplot", "scatter+marginal",
                               "scatter-regression"),
                      help="plots to plot [default=%default]",
                      action="append")

    parser.add_option(
        "-t",
        "--threshold",
        dest="threshold",
        type="float",
        help="min threshold to use for counting method [default=%default].")

    parser.add_option(
        "-o",
        "--colours",
        dest="colours",
        type="int",
        help="column with colour information [default=%default].")

    parser.add_option(
        "-l",
        "--plot-labels",
        dest="labels",
        type="string",
        help="column labels for x and y in matched plots [default=%default].")

    parser.add_option("-d",
                      "--add-diagonal",
                      dest="add_diagonal",
                      action="store_true",
                      help="add diagonal to plot [default=%default].")

    parser.add_option("-e",
                      "--plot-legend",
                      dest="legend",
                      type="int",
                      help="column with legend [default=%default].")

    parser.add_option("-r",
                      "--options",
                      dest="r_options",
                      type="string",
                      help="R plotting options [default=%default].")

    parser.add_option("--format",
                      dest="format",
                      type="choice",
                      choices=("full", "sparse"),
                      help="output format [default=%default].")

    parser.add_option("--title",
                      dest="title",
                      type="string",
                      help="""plot title [default=%default].""")

    parser.add_option("",
                      "--xrange",
                      dest="xrange",
                      type="string",
                      help="x viewing range of plot [default=%default].")

    parser.add_option("",
                      "--yrange",
                      dest="yrange",
                      type="string",
                      help="y viewing range of plot[default=%default].")

    parser.add_option("--allow-empty-file",
                      dest="fail_on_empty",
                      action="store_false",
                      help="do not fail on empty input [default=%default].")

    parser.add_option("--fail-on-empty",
                      dest="fail_on_empty",
                      action="store_true",
                      help="fail on empty input [default=%default].")

    parser.set_defaults(hardcopy=None,
                        input_filename="",
                        input_filename2=None,
                        columns="all",
                        logscale=None,
                        statistics=[],
                        plot=[],
                        threshold=0.0,
                        labels="x,y",
                        colours=None,
                        diagonal=False,
                        legend=None,
                        title=None,
                        xrange=None,
                        yrange=None,
                        r_options="",
                        fail_on_empty=True,
                        format="full")

    (options, args) = E.Start(parser)

    if len(args) == 1 and not options.input_filename:
        options.input_filename = args[0]

    if options.columns not in ("all", "all-but-first"):
        options.columns = [int(x) - 1 for x in options.columns.split(",")]

    if options.colours:
        options.colours -= 1
    if options.legend:
        options.legend -= 1

    table = {}
    headers = []

    # read data matrix
    if options.input_filename:
        lines = IOTools.openFile(options.input_filename, "r").readlines()
    else:
        # note: this will not work for interactive viewing, but
        # creating hardcopy plots works.
        lines = sys.stdin.readlines()

    lines = [x for x in lines if x[0] != "#"]

    if len(lines) == 0:
        if options.fail_on_empty:
            raise IOError("no input")
        E.warn("empty input")
        E.Stop()
        return

    matrix, headers, colours, legend = readTable(lines,
                                                 "matrix",
                                                 take_columns=options.columns,
                                                 headers=True,
                                                 colours=options.colours,
                                                 row_names=options.legend)

    if options.input_filename2:
        # read another matrix (should be of the same format.
        matrix2, headers2, colours2, legend2 = readTable(
            lines,
            "matrix2",
            take_columns=options.columns,
            headers=True,
            colours=options.colours,
            row_names=options.legend)

    R.assign("headers", headers)

    ndata = R("""length( matrix[,1] )""")[0]

    if options.loglevel >= 1:
        options.stdlog.write("# read matrix: %ix%i\n" % (len(headers), ndata))

    if colours:
        R.assign("colours", colours)

    for method in options.statistics:

        if method == "correlation":
            cor = R.cor(matrix, use="pairwise.complete.obs")
            writeMatrix(sys.stdout, cor, headers=headers, format="%5.2f")

        elif method == "pearson":
            options.stdout.write("\t".join(("var1", "var2", "coeff", "passed",
                                            "pvalue", "n", "method",
                                            "alternative")) + "\n")
            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    try:
                        result = R("""cor.test( matrix[,%i], matrix[,%i] )""" %
                                   (x + 1, y + 1))
                    except rpy.RPyException as msg:
                        E.warn(
                            "correlation not computed for columns %i(%s) and %i(%s): %s"
                            % (x, headers[x], y, headers[y], msg))
                        options.stdout.write(
                            "%s\t%s\t%s\t%s\t%s\t%i\t%s\t%s\n" %
                            (headers[x], headers[y], "na", "na", "na", 0, "na",
                             "na"))

                    else:
                        options.stdout.write(
                            "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                            (headers[x], headers[y],
                             result.rx2('estimate').rx2('cor')[0],
                             Stats.getSignificance(
                                 float(result.rx2('p.value')[0])),
                             result.rx2('p.value')[0],
                             result.rx2('parameter').rx2('df')[0],
                             result.rx2('method')[0],
                             result.rx2('alternative')[0]))

        elif method == "spearman":
            options.stdout.write("\t".join(("var1", "var2", "coeff", "passed",
                                            "pvalue", "method",
                                            "alternative")) + "\n")
            for x in range(len(headers) - 1):
                for y in range(x + 1, len(headers)):
                    result = R(
                        """cor.test( matrix[,%i], matrix[,%i], method='spearman')"""
                        % (x + 1, y + 1))
                    options.stdout.write(
                        "%s\t%s\t%6.4f\t%s\t%e\t%i\t%s\t%s\n" %
                        (headers[x], headers[y], result['estimate']['rho'],
                         Stats.getSignificance(float(result['p.value'])),
                         result['p.value'], result['parameter']['df'],
                         result['method'], result['alternative']))

        elif method == "count":
            # number of shared elements > threshold
            m, r, c = MatlabTools.ReadMatrix(open(options.input_filename, "r"),
                                             take=options.columns,
                                             headers=True)
            mask = numpy.greater(m, options.threshold)
            counts = numpy.dot(numpy.transpose(mask), mask)
            writeMatrix(options.stdout, counts, headers=c, format="%i")

    if options.plot:

        # remove columns that are completely empty
        if "pairs" in options.plot:
            colsums = R('''colSums( is.na(matrix ))''')
            take = [x for x in range(len(colsums)) if colsums[x] != ndata]
            if take:
                E.warn("removing empty columns %s before plotting" % str(take))
                matrix = R.subset(matrix, select=[x + 1 for x in take])
                R.assign("""matrix""", matrix)
                headers = [headers[x] for x in take]
                if legend:
                    legend = [headers[x] for x in take]

        if options.r_options:
            extra_options = ", %s" % options.r_options
        else:
            extra_options = ""

        if options.legend is not None and len(legend):
            extra_options += ", legend=c('%s')" % "','".join(legend)

        if options.labels:
            xlabel, ylabel = options.labels.split(",")
            extra_options += ", xlab='%s', ylab='%s'" % (xlabel, ylabel)
        else:
            xlabel, ylabel = "", ""

        if options.colours:
            extra_options += ", col=colours"

        if options.logscale:
            extra_options += ", log='%s'" % options.logscale

        if options.xrange:
            extra_options += ", xlim=c(%f,%f)" % tuple(
                map(float, options.xrange.split(",")))

        if options.yrange:
            extra_options += ", ylim=c(%f,%f)" % tuple(
                map(float, options.yrange.split(",")))

        if options.hardcopy:
            if options.hardcopy.endswith(".eps"):
                R.postscript(options.hardcopy)
            elif options.hardcopy.endswith(".png"):
                R.png(options.hardcopy, width=1024, height=768, type="cairo")
            elif options.hardcopy.endswith(".jpg"):
                R.jpg(options.hardcopy, width=1024, height=768, type="cairo")

        for method in options.plot:

            if ndata < 100:
                point_size = "1"
                pch = "o"
            elif ndata < 1000:
                point_size = "1"
                pch = "o"
            else:
                point_size = "0.5"
                pch = "."

            if method == "scatter":
                R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" %
                  (point_size, extra_options))

            if method == "scatter-regression":
                R("""plot( matrix[,1], matrix[,2], cex=%s, pch="o" %s)""" %
                  (point_size, extra_options))
                dat = R(
                    """dat <- data.frame(x = matrix[,1], y = matrix[,2])""")
                R("""new <- data.frame(x = seq( min(matrix[,1]), max(matrix[,1]), (max(matrix[,1]) - min(matrix[,1])) / 100))"""
                  )
                mod = R("""mod <- lm( y ~ x, dat)""")
                R("""predict(mod, new, se.fit = TRUE)""")
                R("""pred.w.plim <- predict(mod, new, interval="prediction")"""
                  )
                R("""pred.w.clim <- predict(mod, new, interval="confidence")"""
                  )
                R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")"""
                  )
                R.mtext("y = %f * x + %f, r=%6.4f, n=%i" %
                        (mod["coefficients"]["x"],
                         mod["coefficients"]["(Intercept)"],
                         R("""cor( dat )[2]"""), ndata),
                        3,
                        cex=1.0)

            elif method == "pairs":
                if options.add_diagonal:
                    R("""panel.hist <- function( x,y,...  ) { points(x,y,...); abline(0,1); }"""
                      )
                else:
                    R("""panel.hist <- function( x,y,...  ) { points(x,y,...); }"""
                      )

                # There used to be a argument na_action="na.omit", but
                # removed this as there appeared error messages saying
                # "na.action is not a graphical parameter" and the
                # plots showed occasionally the wrong scale.
                # cex=point_size also caused trouble (error message:
                # "X11 used font size 8 when 2 was requested" or
                # similar)
                if options.colours:
                    R.pairs(matrix,
                            pch=pch,
                            col=colours,
                            main=options.title,
                            panel="panel.hist",
                            labels=headers,
                            cex_labels=2.0)
                else:
                    R.pairs(matrix,
                            pch=pch,
                            panel="panel.hist",
                            main=options.title,
                            labels=headers,
                            cex_labels=2.0)

            elif method == "boxplot":
                extra_options += ",main='%s'" % options.title

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""boxplot( matrix %s)""" % extra_options)

            elif method == "bar" or method == "bar-stacked":
                if not options.colours:
                    extra_options += ", col=rainbow(5)"

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), %s)""" % extra_options)

            elif method == "bar-besides":
                if not options.colours:
                    extra_options += ", col=rainbow(%i)" % ndata

                # set vertical orientation
                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), beside=TRUE %s)""" %
                  extra_options)

            elif method == "scatter+marginal":

                if options.title:
                    # set the size of the outer margins - the title needs to be added at the end
                    # after plots have been created
                    R.par(oma=R.c(0, 0, 4, 0))

                R("""matrix""")
                R("""
x <- matrix[,1];
y <- matrix[,2];
xhist <- hist(x, breaks=20, plot=FALSE);
yhist <- hist(y, breaks=20, plot=FALSE);
top <- max(c(xhist$counts, yhist$counts));
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE );
par(mar=c(3,3,1,1)) ;
plot(x, y, cex=%s, pch="o" %s) ;
par(mar=c(0,3,1,1)) ;
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ;
par(mar=c(3,0,1,1)) ;
title(main='%s');
barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE ) ;
title(main='%s');
""" % (point_size, extra_options, xlabel, ylabel))

                if options.title:
                    R.mtext(options.title, 3, outer=True, line=1, cex=1.5)

            elif method in ("panel", "1_vs_x", "matched"):

                if method == "panel":
                    pairs = []
                    for x in range(len(headers) - 1):
                        for y in range(x + 1, len(headers)):
                            pairs.append((x, y))

                elif method == "1_vs_x":
                    pairs = []
                    for x in range(1, len(headers)):
                        pairs.append((0, x))

                # print matching columns
                elif method == "matched":
                    pairs = []
                    for x in range(len(headers) - 1):
                        for y in range(x + 1, len(headers)):
                            if headers[x] == headers[y]:
                                pairs.append((x, y))
                                break

                w = int(math.ceil(math.sqrt(len(pairs))))
                h = int(math.ceil(float(len(pairs)) / w))

                PosInf = 1e300000
                NegInf = -1e300000

                xlabel, ylabel = options.labels.split(",")

                R("""layout(matrix(seq(1,%i), %i, %i, byrow = TRUE))""" %
                  (w * h, w, h))
                for a, b in pairs:
                    new_matrix = [
                        x for x in zip(
                            list(matrix[a].values())[0],
                            list(matrix[b].values())[0])
                        if x[0] not in (float("nan"), PosInf, NegInf)
                        and x[1] not in (float("nan"), PosInf, NegInf)
                    ]
                    try:
                        R("""plot(matrix[,%i], matrix[,%i], main='%s versus %s', cex=0.5, pch=".", xlab='%s', ylab='%s' )"""
                          % (a + 1, b + 1, headers[b], headers[a], xlabel,
                             ylabel))
                    except rpy.RException as msg:
                        print("could not plot %s versus %s: %s" %
                              (headers[b], headers[a], msg))

        if options.hardcopy:
            R['dev.off']()

    E.info("matrix added as >matrix< in R.")

    if not options.hardcopy:
        if options.input_filename:
            interpreter = code.InteractiveConsole(globals())
            interpreter.interact()
        else:
            E.info(
                "can not start new interactive session as input has come from stdin."
            )

    E.Stop()

Example #10

Show file

File: r_table2scatter.py Project: BioinformaticsArchive/cgat

                # set vertical orientation
                if max( [len(x) for x in headers] ) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub( ", xlab='[^']+'", "", extra_options )
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))""") # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), beside=TRUE %s)""" % extra_options)

            elif method == "scatter+marginal":

                if options.title:
                    # set the size of the outer margins - the title needs to be added at the end
                    # after plots have been created
                    R.par(oma=R.c(0,0,4,0) )                     

                R( """matrix""" )
                R( """
x <- matrix[,1];
y <- matrix[,2];
xhist <- hist(x, breaks=20, plot=FALSE);
yhist <- hist(y, breaks=20, plot=FALSE);
top <- max(c(xhist$counts, yhist$counts));
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE );
par(mar=c(3,3,1,1)) ;
plot(x, y, cex=%s, pch="o" %s) ;
par(mar=c(0,3,1,1)) ;
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ;
par(mar=c(3,0,1,1)) ;
title(main='%s');

Example #11

Show file

File: r_table2scatter.py Project: zpeng1989/cgat

                if max([len(x) for x in headers]) > 40 / len(headers):
                    # remove xlabel:
                    extra_options = re.sub(", xlab='[^']+'", "", extra_options)
                    extra_options += ", names.arg=headers, las=2"
                    R("""op <- par(mar=c(11,4,4,2))"""
                      )  # the 10 allows the names.arg below the barplot

                R("""barplot(as.matrix(matrix), beside=TRUE %s)""" %
                  extra_options)

            elif method == "scatter+marginal":

                if options.title:
                    # set the size of the outer margins - the title needs to be added at the end
                    # after plots have been created
                    R.par(oma=R.c(0, 0, 4, 0))

                R("""matrix""")
                R("""
x <- matrix[,1];
y <- matrix[,2];
xhist <- hist(x, breaks=20, plot=FALSE);
yhist <- hist(y, breaks=20, plot=FALSE);
top <- max(c(xhist$counts, yhist$counts));
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), respect=TRUE );
par(mar=c(3,3,1,1)) ;
plot(x, y, cex=%s, pch="o" %s) ;
par(mar=c(0,3,1,1)) ;
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0 ) ;
par(mar=c(3,0,1,1)) ;
title(main='%s');

Example #12

Show file

File: iCLIP_bam2heatmap.py Project: jefflee1103/iCLIPlib

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-g", "--gtf-file", dest="gtf", type="string",
                      help="GTF containing gene annotations")
    parser.add_option("-s", "--sort", dest="sort", type="choice",
                      default="length",
                      choices=sort_choices,
                      help="Property to sort rows by. Choices are %s"
                           % ", ".join(sort_choices))
    parser.add_option("-b", "--bin-size", dest="bin_size", type="int",
                      default=25,
                      help="Size of window over which to sum reads")
    parser.add_option("-u", "--upstream-window", dest="us_win", type="int",
                      default=500,
                      help="Amount of sequence upstream of alignment point (less introns)")
    parser.add_option("-d", "--downstream-window", dest="ds_win", type="int",
                      default=None,
                      help="Amount of sequence downstream of alignment point (default longest segment)")
    parser.add_option("-a", "--align-at", dest="align_at", type="choice",
                      default="start",
                      choices=align_choices,
                      help="Where to align genes/transcripts at. Choices are %s"
                            % ", ".join(align_choices))
    parser.add_option("-H", "--height", dest="height", type="int",
                      default=None,
                      help="Number of rows in output matrix/heigh of plot in px")
    parser.add_option("-w", "--width", dest="width", type="int",
                      default=None,
                      help="Number of columns in output/width of plot in px"
                           "default based on bin size")
    parser.add_option("-n", "--normalize", dest="normalize", type="choice",
                      default="none",
                      choices=norm_choices,
                      help="Row normalization to apply. Choices are: %s"
                           % ", ".join(norm_choices))
    parser.add_option("-r", "--renormalize", dest="renormalize", type="choice",
                      default="none",
                      choices=norm_choices,
                      help="Row normalization to apply after row/column compression")
    parser.add_option("--no-plot", dest="plot", action="store_false",
                      default=True,
                      help="Do not output plot - compute matrix only")
    parser.add_option("--use-matrix", dest="use_matrix", type="string",
                      default=None,
                      help="Use existing matrix")
    parser.add_option("--annotations", dest="annotations", type="choice",
                      action="append",
                      choices=annotation_choices,
                      help="Add annotations to the output plot")
    parser.add_option("--reverse-strand", dest="rstrand", action="store_true",
                      default=False,
                      help="Find reads on reverse strand")
    parser.add_option("-f", "--feature", dest="feature", type="choice",
                      choices=["gene", "transcript"],
                      default="gene",
                      help="use genes or transcripts")
    parser.add_option("--quantile", dest="quantile", type="float",
                      default=0.99,
                      help="Quantile to use in quantile normalization")
    parser.add_option("-o", "--outfile-prefix", dest="outfile_pattern", type="string",
                      default=None,
                      help="base of names for output files")
    parser.add_option("-c", "--crop", dest="crop", type="string",
                      default=None,
                      help="crop view to a certain range on the xaxis. Specify like"
                      "-500:1000")
    parser.add_option("--format", dest="format", type="string",
                      default="png",
                      help="Output format, use valid R graphics device")
    parser.add_option("--plus-wig", dest="plus_wig", type="string",
                      help="Use this wig for plus strand info rather than bam file")
    parser.add_option("--minus-wig", dest="minus_wig", type="string",
                      help="Use this wig for minus strand info rather than bam file")
    parser.add_option("--bed", dest="bed", type="string",
                      help="Use this bed for signal(must be indexed)")
    parser.add_option("--norm-mat", dest="norm_mat", type="string",
                      help="Use this matrix for normalizing (e.g. RNA data")
    parser.add_option("--sort-order-file", dest="sort_file", type="string",
                      default=None,
                      help="Two column file containing gene names in the first
                      column and a numeric value to sort on in the second")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if options.plot and (options.height is None):
        options.height = 100

    if options.gtf:
        
        f = IOTools.openFile(options.gtf)
        if options.feature == "gene":
            gtf_iterator = GTF.flat_gene_iterator(GTF.iterator(f))
        else:
            gtf_iterator = GTF.transcript_iterator(GTF.iterator(f))

        lengths = dict()
        utr3_lengths = dict()
        utr5_lengths = dict()
        first_exon_lengths = dict()
        for transcript in gtf_iterator:
            lengths[transcript[0].transcript_id] = sum(
                [e[1] - e[0] for e in GTF.asRanges(transcript, "exon")])

            exons = GTF.asRanges(transcript, "exon")
            utrs = GTF.asRanges(transcript, "UTR")
            coding = Intervals.truncate(exons, utrs)
            coding.sort()

            utr5 = [utr for utr in utrs if utr[1] <= coding[0][0]]
            utr3 = [utr for utr in utrs if utr[0] >= coding[-1][-1]]

            if transcript[0].strand == "-":
                utr3, utr5 = utr5, utr3
            
            if transcript[0].strand == "+" or len(exons) == 1:
                first_exon_lengths[transcript[0].transcript_id] = \
                    exons[0][1] - exons[0][0]
            else:
                first_exon_lengths[transcript[0].transcript_id] = \
                    exons[-1][1] - exons[-1][0]

            utr3_lengths[transcript[0].transcript_id] = sum(
                [e[1] - e[0] for e in utr3])

            utr5_lengths[transcript[0].transcript_id] = sum(
                [e[1] - e[0] for e in utr5])

        lengths = pandas.Series(lengths)
        utr3_lengths = pandas.Series(utr3_lengths)
        utr5_lengths = pandas.Series(utr5_lengths)
        first_exon_lengths = pandas.Series(first_exon_lengths)

    else:
        options.sort = "none"
        options.annotations = None

    if options.plus_wig:
        getter = iCLIP.make_getter(plus_wig=options.plus_wig,
                                   minus_wig=options.minus_wig)
    elif options.bed:
        getter = iCLIP.make_getter(bedfile=options.bed)
    else:
        try:
            getter = iCLIP.make_getter(bamfile=args[0])
        except IOError:
            E.error("Cannot open bamfile %s" % args[0])
            return(1)
        except IndexError:
            getter = None

    if options.use_matrix:
        raw_matrix = pandas.read_csv(options.use_matrix,
                                     sep="\t",
                                     index_col=0)
        raw_matrix.columns = raw_matrix.columns.astype("int")
    else:
        raw_matrix = get_matrix(getter, lengths, options)

    if options.crop:
        crop_from, crop_to = map(int, options.crop.split(":"))
        raw_matrix = raw_matrix.loc[:, crop_from:crop_to]

    if options.norm_mat:
        norm_matrix = pandas.read_csv(options.norm_mat,
                                     sep="\t",
                                     index_col=0)
        norm_matrix.columns = norm_matrix.columns.astype("int")

        if options.crop:
            norm_matrix = norm_matrix.loc[:, crop_from:crop_to]
        
        if all(norm_matrix.columns == raw_matrix.columns) and \
           all(raw_matrix.index.isin(norm_matrix.index.values)):
            norm_matrix = norm_matrix.loc[raw_matrix.index]
            norm_matrix = norm_matrix.replace(
                0, norm_matrix[norm_matrix > 0].min().min())
            raw_matrix = raw_matrix/norm_matrix
            norm_matrix = None

        else:
            raise ValueError("Incompatible normalisation matrix")

    normalized_matrix = normalize(raw_matrix, options.normalize,
                                  quantile=options.quantile)

    if options.sort == "length":
        sorter = lengths
    elif options.sort == "3utr":
        sorter = utr3_lengths
    elif options.sort == "5utr":
        sorter = utr5_lengths
    elif options.sort == "first-exon":
        sorter = first_exon_lengths
    elif options.sort == "manual":
        sorter = pandas.read_csv(options.sort_file, sep="\t",
                                 index_col=0, usecols=[0, 1])
        sorter = sorter[sorter.columns[0]]
    elif options.sort == "none":
        sorter = pandas.Series(range(raw_matrix.shape[0]),
                               index=raw_matrix.index[::-1])

    sorter = sorter[sorter.index.isin(normalized_matrix.index)]
    sorter = sorter.sort_values(ascending=False)
    sorted_matrix = normalized_matrix.loc[sorter.index.values]

    compress_matrix = iCLIP.compress_matrix(sorted_matrix,
                                            ncols=options.width,
                                            nrows=options.height)

    renormalized_matrix = normalize(compress_matrix, options.renormalize,
                                    quantile=options.quantile)

    if renormalized_matrix is raw_matrix and options.use_matrix is not None:
        E.info("Input and output matrices are identical, no matrix output")
    else:
        if options.outfile_pattern:
            mat_outfile = IOTools.openFile(
                options.outfile_pattern + ".matrix.tsv.gz", "w")
        else:
            mat_outfile = options.stdout

        renormalized_matrix.to_csv(mat_outfile, sep="\t")

    if options.plot:

        try:
            from rpy2.robjects import r as R
            from rpy2 import robjects as ro
        except:
            E.info("No rpy2. Not plotting image")
            return(0)

        from rpy2.robjects.numpy2ri import numpy2ri
        ro.conversion.py2ri = numpy2ri
        ro.numpy2ri.activate()

        if options.outfile_pattern:
            plot_outfile = options.outfile_pattern + ".png"
        else:
            plot_outfile = "bam2heatmap_out.png"

        c = R["c"]

        R[options.format](plot_outfile,
                          width=renormalized_matrix.shape[1] + 72,
                          height=renormalized_matrix.shape[0] + 72,
                          unit="px",
                          res=72)
        R.par(mai=c(1, 0.5, 0, 0.5))
        cols = R["colorRampPalette"](c("white", "blue"))(50)
        bases = renormalized_matrix.columns.values.astype("int")
        groups = renormalized_matrix.index.values.astype("int")
        mat = renormalized_matrix.as_matrix()
        mat[mat >= 1] = 1

        R.image(bases, groups, R.t(mat),
                zlim=c(0, 1),
                raster=True,
                col=cols,
                xlab="Base",
                yaxt="n")

        def _sort_and_compress_annotation(anno):
            sorted_anno = anno.loc[sorter.index]
            comp_anno = iCLIP.compress_matrix(
                sorted_anno, renormalized_matrix.shape[0])
            return comp_anno

        if options.annotations:
            ends = _sort_and_compress_annotation(lengths)
            starts = pandas.Series(0, index=renormalized_matrix.index)

            if options.align_at == "end":
                starts, ends = -1 * ends, starts

            if "start" in options.annotations:
                R.lines(starts.values, starts.index.values, col="black", pch=".")
            if "end" in options.annotations:
                R.lines(ends.values, ends.index.values,
                        pch=".", col="black")
            if "5utr" in options.annotations:
                utr5s = _sort_and_compress_annotation(utr5_lengths)
                utr5s = starts + utr5s
                R.lines(utr5s.values, utr5s.index.values, col="orange", pch=".")
            if "3utr" in options.annotations:
                utr3s = _sort_and_compress_annotation(utr3_lengths)
                utr3s = ends - utr3s
                R.lines(utr3s.values, utr3s.index.values, col="orange", pch=".")

        R["dev.off"]()

    # write footer and output benchmark information.
    E.Stop()

Example #13

Show file

File: r_plot.py Project: yarden/biorpy

def plot_qc_reads(qc_df):
    """
    Plot number of reads part of a pipeline QC file.
    """
    # Record NA values as 0
    qc_df = qc_df.fillna(0)#.set_index("sample")
    cols = ["sample",
            "num_reads",
            "num_mapped",
            "num_unique_mapped",
            "num_junctions"]
    qc_df = qc_df[cols]
    melted_qc = pandas.melt(qc_df, id_vars=["sample"])
    qc_r = conversion_pydataframe(melted_qc)
    labels = tuple(["num_reads",
                    "num_mapped",
                    "num_unique_mapped",
                    "num_junctions"])
    labels = robj.StrVector(labels)
    variable_i = qc_r.names.index('variable')
    qc_r[variable_i] = robj.FactorVector(qc_r[variable_i],
                                         levels = labels)
    ggplot2.theme_set(ggplot2.theme_bw(12))
    scales = importr("scales")
    r_opts = r.options(scipen=4)
    p = ggplot2.ggplot(qc_r) + \
        ggplot2.geom_point(aes_string(x="sample", y="value")) + \
        ggplot2.scale_y_continuous(trans=scales.log10_trans(),
                                   breaks=scales.trans_breaks("log10",
                                                              robj.r('function(x) 10^x')),
                                   labels=scales.trans_format("log10",
                                                              robj.r('math_format(10^.x)'))) + \
        r.xlab("CLIP-Seq samples") + \
        r.ylab("No. reads") + \
        ggplot2.coord_flip() + \
        ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \
        theme(**{"panel.grid.major.x": element_blank(),
                 "panel.grid.minor.x": element_blank(),
                 "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)})
    p.plot()

    return
    r.par(mfrow=np.array([1,2]))
    num_samples = len(qc_df.num_reads)
    r.par(bty="n", lwd=1.7, lty=2)
    r_opts = r.options(scipen=4)
    r.options(r_opts)
    r.dotchart(convert_to_r_matrix(qc_df[["num_reads",
                                          "num_mapped",
                                          "num_unique_mapped"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
    r.par(bty="n")
    r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped",
                                          "num_ribo",
                                          "num_junctions"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)