def boxPlot(dict_, keysInOrder=None, *args, **kwdargs): if not keysInOrder: keysInOrder = sorted(dict_.keys()) t = TaggedList([]) for key in keysInOrder: t.append(robj.FloatVector(dict_[key]), "X:"+str(key)) x = r.boxplot(t, names=keysInOrder,*args, **kwdargs) return x
def boxPlot(dict_, keysInOrder=None, *args, **kwdargs): # NEEDS A LITTLE WORK TO ACTUALLY WORK, BUT THIS IS THE GENERAL OUTLINE FOR GETTING # A BOXPLOT FROM RPY2 if not keysInOrder: keysInOrder = dict_.keys() t = TaggedList([]) for key in keysInOrder: t.append(robj.FloatVector(dict_[key]), "X:"+str(key)) #print key, mean(dict_[key]), median(dict_[key]) x = robj.r.boxplot(t, names=robj.StrVector(keysInOrder),*args, **kwdargs) return x
def bmatrix(self, nrow=100, ncol=10): # create a numerical matrix of size 100x10 filled with NAs m = self.Bmatrix(NA_Real, nrow=nrow, ncol=ncol) # fill the matrix for row_i in xrange(1, nrow+1): for col_i in xrange(1, ncol+1): m.rx[TaggedList((row_i, col_i))] = row_i + col_i * 100 return m
def R(code="", **kwargs): """Execute R code This function executes the R code given as a string. Additional arguments are injected into the R environment. The value of the last R statement is returned. The function requires rpy2 to be installed. Args: code (str): R code to be executed **kwargs (dict): variables to inject into R globalenv Yields: value of last R statement >>> R("1*1", input=input) """ try: import rpy2.robjects as robjects from rpy2.rlike.container import TaggedList from rpy2.rinterface import RNULLType except ImportError: raise ValueError("Python 3 package rpy2 needs to be installed to use" "the R function.") activate_R() # translate Namedlists into rpy2's TaggedList to have named lists in R for key in kwargs: value = kwargs[key] if isinstance(value, Namedlist): kwargs[key] = TaggedList([y for x, y in value.allitems()], [x for x, y in value.allitems()]) code = snake_format(textwrap.dedent(code), stepout=2) # wrap code in function to preserve clean global env and execute rval = robjects.r("function({}){{ {} }}" "".format(",".join(kwargs), code))(**kwargs) # Reduce vectors of length 1 to scalar, as implicit in R. if isinstance(rval, RNULLType): rval = None if rval and len(rval) == 1: return rval[0] return rval
def build_model(data): model = tsglm(data.casos, model=TaggedList([52, 3], tags=['past_obs', 'past_mean']), distr='nbinom') return model
def c(*args): return TaggedList(args)
def sequence_logo(pwm_or_seq, path: Path = None, width=369, height=149, dpi=72, legend=False, renumerate=True, title: str = None, **kwargs): """Generate a sequence logo from Position Weight Matrix (pwm) or a list of aligned sequences. and save it into a file if a path was provided. The logo will be generated with ggseqlogo (R). Args: pwm_or_seq: list of sequences or PWM matrix or dict where keys are names of facets and values are lists or PWMs path: where the file should be saved renumerate: change the labels of x axis to reflect relative position to the modified (central) residue (15-aa sequence is assumed) width: width in pixels height: height in pixels dpi: the DPI of the plotting device legend: whether and where the legend should be shown title: the title of the plot """ gglogo = importr("ggseqlogo") ggplot2 = importr("ggplot2") if isinstance(pwm_or_seq, list): pwm_or_seq = StrVector(pwm_or_seq) elif isinstance(pwm_or_seq, dict): pwm_or_seq = TaggedList(pwm_or_seq.values(), pwm_or_seq.keys()) theme_options = { 'legend.position': legend or 'none', 'legend.title': ggplot2.element_blank(), 'legend.text': ggplot2.element_text(size=14), 'legend.key.size': r.unit(0.2, 'in'), 'plot.title': ggplot2.element_text(hjust=0.5, size=16), 'axis.title.y': ggplot2.element_text(size=16), 'text': ggplot2.element_text(size=20), 'plot.margin': r.unit([0.03, 0.045, -0.2, 0.06], 'in'), } plot = GG(gglogo.ggseqlogo(pwm_or_seq, **kwargs)) + ggplot2.theme( **theme_options) + ggplot2.labs(y='bits') if renumerate: plot += ggplot2.scale_x_continuous(breaks=IntVector(range(1, 14 + 2)), labels=IntVector(range(-7, 7 + 1))) if title: plot += ggplot2.ggtitle(title) if path: ggplot2.ggsave(str(path), width=width / dpi, height=height / dpi, dpi=dpi, units='in', bg='transparent') return plot
def facetedGGSeqLogo(logodata, chars, plotfile, width, height, ncol=None, char_colors=AA_COLORS_FG, xlabelsrotate=True): """Creates faceted logo plot. Designed to show several measurements on the same site site-by-side, potentially for many sites. Each site must have the same set of measurements. Makes panel of logo plots faceted on `logodata['facetlabel']`, where character stacks are labeled by `logodata['stacklabel']` and show the characters at the indicated heights. Args: `logodata` (pandas DataFrame) Contains data to plot. Should have the columns `facetlabel`, `stacklabel`, and a column giving the height of each character in `chars`. `chars` (list) Letters for which we plot heights. `plotfile` (str) Name of created plot. `width` (float) Width of plot in inches. `height` (float) Height of plot in inches. `ncol` (int or `None`) Number of columns in faceted plot. If `None`, use as many as needed to plot everything in one row. `char_colors` (dict) Values give color for every character in `chars`. `xlabelsrotate` (bool) Do we rotate the x-labels? Here is an example that creates two facets each with two stacks for the characters `A` and `C`: >>> logodata = pandas.read_csv(io.StringIO( ... '''facetlabel stacklabel A C ... site-1 BF520 0.8 0.2 ... site-1 BG505 0.9 0.1 ... site-2 BF520 0.4 0.6 ... site-2 BG505 0.5 0.5'''), ... delim_whitespace=True, index_col=False) >>> plotfile = '_facetedGGSeqLogo_test_plot.png' >>> facetedGGSeqLogo(logodata, ... chars=['A', 'C'], ... plotfile=plotfile, ... width=3, height=2.5 ... ) >>> os.path.isfile(plotfile) True Here is the plot created by the code block above: .. image:: _static/_facetedGGSeqLogo_test_plot.png :width: 40% :align: center """ if os.path.isfile(plotfile): os.remove(plotfile) assert set(chars) <= set(char_colors.keys()), \ "`char_colors` not defined for all chars" # get and order data columns df_cols = ['facetlabel', 'stacklabel'] + chars assert set(logodata.columns) >= set(df_cols), "df lacks required columns" logodata = logodata[df_cols] facets = logodata['facetlabel'].unique() stacks = logodata['stacklabel'].unique() if ncol is None: ncol = len(facets) # generate list of matrices to facet matrices = [] for f in facets: facetdata = (logodata.query('facetlabel == @f') .drop('facetlabel', axis=1) .set_index('stacklabel') .reindex(stacks) .fillna(0) ) m = r.matrix( facetdata.values.ravel(), ncol=len(stacks), dimnames=[chars, stacks] ) matrices.append(m) matrices = ListVector(TaggedList(matrices, tags=facets.astype('str'))) # make the plot with warnings.catch_warnings(): warnings.simplefilter(SHOW_WARNINGS) _RFUNCS.facetedGGSeqLogo( matrices=matrices, plotfile=plotfile, ncol=ncol, width=width, height=height, xname='', xlabels=stacks, xlabelsrotate=xlabelsrotate, xline=True, yname='', chars=StrVector(chars), char_colors=StrVector([char_colors[x] for x in chars]) ) if not os.path.isfile(plotfile): raise RuntimeError("failed to create {0}".format(plotfile))