예제 #1
0
파일: plotting.py 프로젝트: yarden/biorpy
def boxPlot(dict_, keysInOrder=None, *args, **kwdargs):
    if not keysInOrder:
        keysInOrder = sorted(dict_.keys())
        
    t = TaggedList([])
    for key in keysInOrder:
        t.append(robj.FloatVector(dict_[key]), "X:"+str(key))

    x = r.boxplot(t, names=keysInOrder,*args, **kwdargs)
    return x
예제 #2
0
파일: examples.py 프로젝트: yarden/biorpy
def boxPlot(dict_, keysInOrder=None, *args, **kwdargs):
    # NEEDS A LITTLE WORK TO ACTUALLY WORK, BUT THIS IS THE GENERAL OUTLINE FOR GETTING
    # A BOXPLOT FROM RPY2

    if not keysInOrder:
        keysInOrder = dict_.keys()
        
    t = TaggedList([])
    for key in keysInOrder:
        t.append(robj.FloatVector(dict_[key]), "X:"+str(key))
        #print key, mean(dict_[key]), median(dict_[key])

    x = robj.r.boxplot(t, names=robj.StrVector(keysInOrder),*args, **kwdargs)
    return x
예제 #3
0
   def bmatrix(self, nrow=100, ncol=10):
       # create a numerical matrix of size 100x10 filled with NAs
       m = self.Bmatrix(NA_Real, nrow=nrow, ncol=ncol)

       # fill the matrix
       for row_i in xrange(1, nrow+1):
           for col_i in xrange(1, ncol+1):
               m.rx[TaggedList((row_i, col_i))] = row_i + col_i * 100
       return m
예제 #4
0
파일: util.py 프로젝트: ConnorJacobs/ymp
def R(code="", **kwargs):
    """Execute R code

    This function executes the R code given as a string. Additional arguments
    are injected into the R environment. The value of the last R statement
    is returned.

    The function requires rpy2 to be installed.

    Args:
        code (str): R code to be executed
        **kwargs (dict): variables to inject into R globalenv
    Yields:
        value of last R statement

    >>>  R("1*1", input=input)
    """
    try:
        import rpy2.robjects as robjects
        from rpy2.rlike.container import TaggedList
        from rpy2.rinterface import RNULLType
    except ImportError:
        raise ValueError("Python 3 package rpy2 needs to be installed to use"
                         "the R function.")

    activate_R()

    # translate Namedlists into rpy2's TaggedList to have named lists in R
    for key in kwargs:
        value = kwargs[key]
        if isinstance(value, Namedlist):
            kwargs[key] = TaggedList([y for x, y in value.allitems()],
                                     [x for x, y in value.allitems()])

    code = snake_format(textwrap.dedent(code), stepout=2)
    # wrap code in function to preserve clean global env and execute
    rval = robjects.r("function({}){{ {} }}"
                      "".format(",".join(kwargs), code))(**kwargs)

    # Reduce vectors of length 1 to scalar, as implicit in R.
    if isinstance(rval, RNULLType):
        rval = None
    if rval and len(rval) == 1:
        return rval[0]
    return rval
예제 #5
0
def build_model(data):
    model = tsglm(data.casos,
                  model=TaggedList([52, 3], tags=['past_obs', 'past_mean']),
                  distr='nbinom')
    return model
예제 #6
0
 def c(*args):
     return TaggedList(args)
예제 #7
0
def sequence_logo(pwm_or_seq,
                  path: Path = None,
                  width=369,
                  height=149,
                  dpi=72,
                  legend=False,
                  renumerate=True,
                  title: str = None,
                  **kwargs):
    """Generate a sequence logo from Position Weight Matrix (pwm)
    or a list of aligned sequences.

    and save it into a file if a path was provided.
    The logo will be generated with ggseqlogo (R).

    Args:
        pwm_or_seq: list of sequences or PWM matrix or dict where
            keys are names of facets and values are lists or PWMs
        path: where the file should be saved
        renumerate:
            change the labels of x axis to reflect relative position
            to the modified (central) residue (15-aa sequence is assumed)
        width: width in pixels
        height: height in pixels
        dpi: the DPI of the plotting device
        legend: whether and where the legend should be shown
        title: the title of the plot
    """
    gglogo = importr("ggseqlogo")
    ggplot2 = importr("ggplot2")

    if isinstance(pwm_or_seq, list):
        pwm_or_seq = StrVector(pwm_or_seq)
    elif isinstance(pwm_or_seq, dict):
        pwm_or_seq = TaggedList(pwm_or_seq.values(), pwm_or_seq.keys())

    theme_options = {
        'legend.position': legend or 'none',
        'legend.title': ggplot2.element_blank(),
        'legend.text': ggplot2.element_text(size=14),
        'legend.key.size': r.unit(0.2, 'in'),
        'plot.title': ggplot2.element_text(hjust=0.5, size=16),
        'axis.title.y': ggplot2.element_text(size=16),
        'text': ggplot2.element_text(size=20),
        'plot.margin': r.unit([0.03, 0.045, -0.2, 0.06], 'in'),
    }

    plot = GG(gglogo.ggseqlogo(pwm_or_seq, **kwargs)) + ggplot2.theme(
        **theme_options) + ggplot2.labs(y='bits')

    if renumerate:
        plot += ggplot2.scale_x_continuous(breaks=IntVector(range(1, 14 + 2)),
                                           labels=IntVector(range(-7, 7 + 1)))
    if title:
        plot += ggplot2.ggtitle(title)

    if path:
        ggplot2.ggsave(str(path),
                       width=width / dpi,
                       height=height / dpi,
                       dpi=dpi,
                       units='in',
                       bg='transparent')

    return plot
예제 #8
0
def facetedGGSeqLogo(logodata, chars, plotfile, width, height,
        ncol=None, char_colors=AA_COLORS_FG, xlabelsrotate=True):
    """Creates faceted logo plot.

    Designed to show several measurements on the same site
    site-by-side, potentially for many sites. Each site
    must have the same set of measurements.

    Makes panel of logo plots faceted on `logodata['facetlabel']`,
    where character stacks are labeled by `logodata['stacklabel']`
    and show the characters at the indicated heights.

    Args:
        `logodata` (pandas DataFrame)
            Contains data to plot. Should have the columns
            `facetlabel`, `stacklabel`, and a column giving the
            height of each character in `chars`.
        `chars` (list)
            Letters for which we plot heights.
        `plotfile` (str)
            Name of created plot.
        `width` (float)
            Width of plot in inches.
        `height` (float)
            Height of plot in inches.
        `ncol` (int or `None`)
            Number of columns in faceted plot. If `None`, use
            as many as needed to plot everything in one row.
        `char_colors` (dict)
            Values give color for every character in `chars`.
        `xlabelsrotate` (bool)
            Do we rotate the x-labels?

    Here is an example that creates two facets each with
    two stacks for the characters `A` and `C`:

    >>> logodata = pandas.read_csv(io.StringIO(
    ...     '''facetlabel  stacklabel   A   C
    ...            site-1       BF520 0.8 0.2
    ...            site-1       BG505 0.9 0.1
    ...            site-2       BF520 0.4 0.6
    ...            site-2       BG505 0.5 0.5'''),
    ...     delim_whitespace=True, index_col=False)
    >>> plotfile = '_facetedGGSeqLogo_test_plot.png'
    >>> facetedGGSeqLogo(logodata,
    ...         chars=['A', 'C'],
    ...         plotfile=plotfile,
    ...         width=3, height=2.5
    ...         )
    >>> os.path.isfile(plotfile)
    True

    Here is the plot created by the code block above:

    .. image:: _static/_facetedGGSeqLogo_test_plot.png
       :width: 40%
       :align: center

    """
    if os.path.isfile(plotfile):
        os.remove(plotfile)

    assert set(chars) <= set(char_colors.keys()), \
            "`char_colors` not defined for all chars"

    # get and order data columns
    df_cols = ['facetlabel', 'stacklabel'] + chars
    assert set(logodata.columns) >= set(df_cols), "df lacks required columns"
    logodata = logodata[df_cols] 

    facets = logodata['facetlabel'].unique()
    stacks = logodata['stacklabel'].unique()
    if ncol is None:
        ncol = len(facets)

    # generate list of matrices to facet
    matrices = []
    for f in facets:
        facetdata = (logodata.query('facetlabel == @f')
                     .drop('facetlabel', axis=1)
                     .set_index('stacklabel')
                     .reindex(stacks)
                     .fillna(0)
                     )
        m = r.matrix(
                facetdata.values.ravel(),
                ncol=len(stacks),
                dimnames=[chars, stacks]
                )
        matrices.append(m)
    matrices = ListVector(TaggedList(matrices,
            tags=facets.astype('str')))

    # make the plot
    with warnings.catch_warnings():
        warnings.simplefilter(SHOW_WARNINGS)
        _RFUNCS.facetedGGSeqLogo(
                matrices=matrices,
                plotfile=plotfile,
                ncol=ncol,
                width=width,
                height=height,
                xname='',
                xlabels=stacks,
                xlabelsrotate=xlabelsrotate,
                xline=True,
                yname='',
                chars=StrVector(chars),
                char_colors=StrVector([char_colors[x] for x in chars])
                )

    if not os.path.isfile(plotfile):
        raise RuntimeError("failed to create {0}".format(plotfile))