def fetch(self, alias): """ Fetch the documentation page associated with a given alias. For S4 classes, the class name is *often* suffixed with '-class'. For example, the alias to the documentation for the class AnnotatedDataFrame in the package Biobase is 'AnnotatedDataFrame-class'. """ c = self._dbcon.execute('SELECT rd_meta_rowid, alias FROM rd_alias_meta WHERE alias=?', (alias, )) res_alias = c.fetchall() if len(res_alias) == 0: raise HelpNotFoundError("No help could be fetched", topic = alias, package = self.__package_name) c = self._dbcon.execute('SELECT file, name, type FROM rd_meta WHERE rowid=?', (res_alias[0][0], )) # since the selection is on a verified rowid we are sure to exactly get one row res = c.fetchall() rkey = StrSexpVector((res[0][0][:-3], )) _type = res[0][2] rpath = StrSexpVector((os.path.join(self.package_path, 'help', self.__package_name + '.rdb'),)) rdx_variables = self._rdx[self._rdx.do_slot('names').index('variables')] _eval = rinterface.baseenv['eval'] devnull_func = rinterface.parse('function(x) {}') devnull_func = _eval(devnull_func) res = _lazyload_dbfetch(rdx_variables[rdx_variables.do_slot('names').index(rkey[0])], rpath, self._rdx[self._rdx.do_slot('names').index("compressed")], devnull_func) p_res = Page(res, _type = _type) return p_res
def fetch(self, key): """ Fetch the documentation page associated with a given key. - for S4 classes, the class name is *often* prefixed with 'class.'. For example, the key to the documentation for the class AnnotatedDataFrame in the package Biobase is 'class.AnnotatedDataFrame'. """ rdx_variables = self._rdx.rx2('variables') if key not in rdx_variables.names: raise HelpNotFoundError("No help could be fetched", topic=key, package=self.__package_name) rkey = StrSexpVector(rinterface.StrSexpVector((key, ))) rpath = StrSexpVector((os.path.join(self.package_path, 'help', self.__package_name + '.rdb'), )) _eval = rinterface.baseenv['eval'] devnull_func = rinterface.parse('function(x) {}') devnull_func = _eval(devnull_func) res = lazyload_dbfetch( rdx_variables.rx(rkey)[0], rpath, self._rdx.rx2("compressed"), devnull_func) p_res = Page(res) return p_res
def convert_to_r_posixct(obj): """ Convert DatetimeIndex or np.datetime array to R POSIXct using m8[s] format. Parameters ---------- obj : source pandas object (one of [DatetimeIndex, np.datetime]) Returns ------- An R POSIXct vector (rpy2.robjects.vectors.POSIXct) """ import time from rpy2.rinterface import StrSexpVector # convert m8[ns] to m8[s] vals = robj.vectors.FloatSexpVector(obj.values.view('i8') / 1E9) as_posixct = robj.baseenv.get('as.POSIXct') origin = StrSexpVector([time.strftime("%Y-%m-%d", time.gmtime(0)), ]) # We will be sending ints as UTC tz = obj.tz.zone if hasattr( obj, 'tz') and hasattr(obj.tz, 'zone') else 'UTC' tz = StrSexpVector([tz]) utc_tz = StrSexpVector(['UTC']) posixct = as_posixct(vals, origin=origin, tz=utc_tz) posixct.do_slot_assign('tzone', tz) return posixct
def __init__(self, package_name, package_path = None): self.__package_name = package_name if package_path is None: package_path = packages.get_packagepath(package_name) self.__package_path = package_path #FIXME: handle the case of missing "aliases.rds" rpath = StrSexpVector((os.path.join(package_path, 'help', self.__aliases_info), )) rds = readRDS(rpath) rds = StrSexpVector(rds) class2methods = {} object2alias = {} for k, v in itertools.izip(rds.do_slot('names'), rds): if v.startswith("class."): classname = v[len("class."):] if classname in class2methods: methods = class2methods[classname] else: methods = [] methods.append(k.split(',')[0]) class2methods[classname] = methods else: object2alias[v] = k self.class2methods = class2methods self.object2alias = object2alias rpath = StrSexpVector((os.path.join(package_path, 'help', package_name + '.rdx'), )) self._rdx = conversion.ri2py(readRDS(rpath))
def __init__(self, package_name, package_path=None): self.__package_name = package_name if package_path is None: package_path = packages.get_packagepath(package_name) self.__package_path = package_path #FIXME: handle the case of missing "aliases.rds" rpath = StrSexpVector((os.path.join(package_path, 'help', self.__aliases_info), )) rds = readRDS(rpath) rds = StrSexpVector(rds) class2methods = {} object2alias = {} for k, v in itertools.izip(rds.do_slot('names'), rds): if v.startswith("class."): classname = v[len("class."):] if classname in class2methods: methods = class2methods[classname] else: methods = [] methods.append(k.split(',')[0]) class2methods[classname] = methods else: object2alias[v] = k self.class2methods = class2methods self.object2alias = object2alias rpath = StrSexpVector((os.path.join(package_path, 'help', package_name + '.rdx'), )) self._rdx = conversion.ri2py(readRDS(rpath))
def py2ri_categoryseries(obj): for c in obj.cat.categories: if not isinstance(c, str): raise ValueError('Converting pandas "Category" series to R factor is only possible when categories are strings.') res = IntSexpVector(list(x+1 for x in obj.cat.codes)) res.do_slot_assign('levels', StrSexpVector(obj.cat.categories)) if obj.cat.ordered: res.rclass = StrSexpVector('ordered', 'factor') else: res.rclass = StrSexpVector('factor') return res
def populate_metaRd_db(package_name: str, dbcon, package_path: typing.Optional[str] = None) -> None: """ Populate a database with the meta-information associated with an R package: version, description, title, and aliases (those are what the R help system is organised around). - package_name: a string - dbcon: a database connection - package_path: path the R package installation (default: None) """ if package_path is None: package_path = get_packagepath(package_name) rpath = StrSexpVector((os.path.join(package_path, __package_meta),)) rds = readRDS(rpath) desc = rds[rds.do_slot('names').index('DESCRIPTION')] db_res = dbcon.execute('insert into package values (?,?,?,?)', (desc[desc.do_slot('names').index('Package')], desc[desc.do_slot('names').index('Title')], desc[desc.do_slot('names').index('Version')], desc[desc.do_slot('names').index('Description')], )) package_rowid = db_res.lastrowid rpath = StrSexpVector((os.path.join(package_path, __rd_meta),)) rds = readRDS(rpath) FILE_I = rds.do_slot("names").index('File') NAME_I = rds.do_slot("names").index('Name') TYPE_I = rds.do_slot("names").index('Type') TITLE_I = rds.do_slot("names").index('Title') ENCODING_I = rds.do_slot("names").index('Encoding') ALIAS_I = rds.do_slot("names").index('Aliases') for row_i in range(len(rds[0])): db_res = dbcon.execute('insert into rd_meta values (?,?,?,?,?,?,?)', (row_i, rds[FILE_I][row_i], rds[NAME_I][row_i], rds[TYPE_I][row_i], rds[TITLE_I][row_i], rds[ENCODING_I][row_i], package_rowid)) rd_rowid = db_res.lastrowid for alias in rds[ALIAS_I][row_i]: dbcon.execute('insert into rd_alias_meta values (?,?)', (rd_rowid, alias))
def numpy_O_py2ri(o): if all((isinstance(x, str) or isinstance(x, bytes) or isinstance(x, unicode)) for x in o): res = StrSexpVector(o) else: res = conversion.py2ri(list(o)) return res
def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj]) ] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if type(x) is not homogeneous_type: raise ValueError('Series can only be of one type, or None.') # TODO: Could this be merged with obj.type.name == 'O' case above ? res = { int: IntVector, bool: BoolVector, None: BoolVector, str: StrVector, bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray] }[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def _sexp_from_seq(seq, tz_info_getter, isodatetime_columns): """ return a POSIXct vector from a sequence of time.struct_time elements. """ tz_count = 0 tz_info = None for elt in conversion.noconversion(seq): tmp = tz_info_getter(elt) if tz_info is None: tz_info = tmp tz_count = 1 elif tz_info == tmp: tz_count += 1 else: # different time zones # TODO: create a list of time zones with tz_count times # tz_info, add the current tz_info and append further. raise ValueError( 'Sequences of dates with different time zones not ' 'yet allowed.') if tz_info is None: tz_info = tzname[0] # We could use R's as.POSIXct instead of ISOdatetime # since as.POSIXct is used by it anyway, but the overall # interface for dates and conversion between formats # is not exactly straightforward. Someone with more # time should look into this. d = isodatetime_columns(seq) sexp = POSIXct._ISOdatetime(*d, tz=StrSexpVector((tz_info, ))) return sexp
def numpy_O_py2rpy(o): if all(isinstance(x, str) for x in o): res = StrSexpVector(o) elif all(isinstance(x, bytes) for x in o): res = ByteSexpVector(o) else: res = conversion.py2rpy(list(o)) return res
def set_accessors(cls, cls_name, where, acs): # set accessors (to be abandonned for the metaclass above ?) if where is None: where = rinterface.globalenv else: where = "package:" + str(where) where = StrSexpVector((where, )) for r_name, python_name, as_property, docstring in acs: if python_name is None: python_name = r_name r_meth = getmethod(StrSexpVector((r_name, )), signature = StrSexpVector((cls_name, )), where = where) r_meth = conversion.rpy2py(r_meth) if as_property: setattr(cls, python_name, property(r_meth, None, None)) else: setattr(cls, python_name, lambda self: r_meth(self))
def read(path): pandas2ri.activate() rpath = StrSexpVector((path, )) readrds = baseenv['readRDS'] my_converter = Converter('lme4-aware converter', template=default_converter) my_converter.ri2ro.register(SexpS4, ri2ro_s4) with localconverter(my_converter) as cv: obj = robjects.r("readRDS('%s')" % path) if isinstance(obj, lmermod.LMerMod) or isinstance(obj, glmermod.GLMerMod): return obj else: return robjects.conversion.ri2py(obj)
def __init__(self, package_name, package_path = None): self.__package_name = package_name if package_path is None: package_path = packages.get_packagepath(package_name) self.__package_path = package_path rd_meta_dbcon = sqlite3.connect(':memory:') create_metaRd_db(rd_meta_dbcon) populate_metaRd_db(package_name, rd_meta_dbcon, package_path = package_path) self._dbcon = rd_meta_dbcon path = os.path.join(package_path, 'help', package_name + '.rdx') self._rdx = readRDS(StrSexpVector((path, )))
def __new__(mcs, name, bases, cls_dict): try: cls_rname = cls_dict['__rname__'] except KeyError: cls_rname = name try: accessors = cls_dict['__accessors__'] except KeyError: accessors = [] for rname, where, \ python_name, as_property, \ docstring in accessors: if where is None: where = rinterface.globalenv else: where = StrSexpVector(('package:%s' % where, )) if python_name is None: python_name = rname signature = StrSexpVector((cls_rname, )) r_meth = getmethod(StrSexpVector((rname, )), signature=signature, where=where) r_meth = conversion.rpy2py(r_meth) if as_property: cls_dict[python_name] = property(r_meth, None, None, doc=docstring) else: cls_dict[python_name] = lambda self: r_meth(self) return type.__new__(mcs, name, bases, cls_dict)
def __init__(self, obj, levels = rinterface.MissingArg, labels = rinterface.MissingArg, exclude = rinterface.MissingArg, ordered = rinterface.MissingArg): if not isinstance(obj, Sexp): obj = StrSexpVector(obj) res = self._factor(obj, levels = levels, labels = labels, exclude = exclude, ordered = ordered) self.__sexp__ = res.__sexp__ self.ro = VectorOperationsDelegator(self) self.rx = ExtractDelegator(self) self.rx2 = DoubleExtractDelegator(self)
def __init__(self, seq): """ """ if isinstance(seq, Sexp): super(self, Vector)(seq) else: for elt in seq: if not isinstance(elt, struct_time): raise ValueError('All elements must inherit from time.struct_time') as_posixlt = baseenv_ri['as.POSIXlt'] origin = StrSexpVector([time.strftime("%Y-%m-%d", time.gmtime(0)),]) rvec = FloatSexpVector([mktime(x) for x in seq]) sexp = as_posixlt(rvec, origin = origin) self.__sexp__ = sexp.__sexp__
def pages(topic): """ Get help pages corresponding to a given topic. """ res = list() for path in packages._libpaths(): for name in packages._packages(**{'all.available': True, 'lib.loc': StrSexpVector((path,))}): #FIXME: what if the same package is installed # at different locations ? pack = Package(name) try: page = pack.fetch(topic) res.append(page) except HelpNotFoundError, hnfe: pass
def _iter_formatted(self, max_items=9): ln = len(self) half_items = max_items // 2 if ln == 0: return elif ln < max_items: str_vec = StrVector(as_character(self)) else: str_vec = r_concat( as_character( self.rx(IntSexpVector(tuple(range(1, (half_items - 1)))))), StrSexpVector(['...']), as_character( self.rx(IntSexpVector(tuple(range((ln - half_items), ln)))))) for str_elt in str_vec: yield self.repr_format_elt(str_elt)
def _iter_formatted(self, max_items=9): format_elt = self.repr_format_elt l = len(self) half_items = max_items // 2 max_width = math.floor(52 / l) if l == 0: return elif l < max_items: str_vec = StrVector(as_character(self)) else: str_vec = r_concat( as_character( self.rx(IntSexpVector(tuple(range(1, (half_items - 1)))))), StrSexpVector(['...']), as_character( self.rx(IntSexpVector(tuple(range((l - half_items), l)))))) for str_elt in str_vec: yield self.repr_format_elt(str_elt)
def __init__(self, obj, levels=rinterface.MissingArg, labels=rinterface.MissingArg, exclude=rinterface.MissingArg, ordered=rinterface.MissingArg): if not isinstance(obj, Sexp): obj = StrSexpVector(obj) if ('factor' in obj.rclass) and \ all(p is rinterface.MissingArg for p in (labels, exclude, ordered)): res = obj else: res = self._factor(obj, levels=levels, labels=labels, exclude=exclude, ordered=ordered) super(FactorVector, self).__init__(res)
def py2ri_pandasseries(obj): if numpy.dtype.name == 'O': warnings.warn( 'Element "%s" is of dtype "O" and converted to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2ri_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), IntVector([x.second for x in obj]) ] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) #FIXME: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) else: # converted as a numpy array func = numpy2ri.converter.py2ri.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index))) return res
def __init__(self, obj, levels=rinterface.MissingArg, labels=rinterface.MissingArg, exclude=rinterface.MissingArg, ordered=rinterface.MissingArg): if not isinstance(obj, Sexp): obj = StrSexpVector(obj) if ('factor' in obj.rclass) and \ all(p is rinterface.MissingArg for p in (labels, exclude, ordered)): res = obj else: res = self._factor(obj, levels=levels, labels=labels, exclude=exclude, ordered=ordered) self.__sexp__ = res.__sexp__ self.ro = VectorOperationsDelegator(self) self.rx = ExtractDelegator(self) self.rx2 = DoubleExtractDelegator(self)
def tuple_str(tpl): res = StrSexpVector(tpl) return res
def getclassdef(cls_name, cls_packagename): cls_def = methods_env['getClassDef'](StrSexpVector((cls_name,)), StrSexpVector((cls_packagename, ))) cls_def = ClassRepresentation(cls_def) cls_def.__rname__ = cls_name return cls_def
def get_classnames(packname): res = methods_env['getClasses'](where = StrSexpVector(("package:%s" %packname, ))) return tuple(res)
def __new__(mcs, name, bases, cls_dict): try: cls_rname = cls_dict['__rname__'] except KeyError as ke: cls_rname = name try: cls_rpackagename = cls_dict['__rpackagename__'] except KeyError as ke: cls_rpackagename = None try: cls_attr_translation = cls_dict['__attr_translation__'] except KeyError as ke: cls_attr_translation = {} try: cls_meth_translation = cls_dict['__meth_translation__'] except KeyError as ke: cls_meth_translation = {} cls_def = getclassdef(cls_rname, cls_rpackagename) # documentation / help if cls_rpackagename is None: cls_dict['__doc__'] = "Undocumented class from the R workspace." else: pack_help = rhelp.Package(cls_rpackagename) page_help = None try: #R's classes are sometimes documented with a prefix 'class.' page_help = pack_help.fetch(cls_def.__rname__ + "-class") except rhelp.HelpNotFoundError as hnf: pass if page_help is None: try: page_help = pack_help.fetch(cls_def.__rname__) except rhelp.HelpNotFoundError as hnf: pass if page_help is None: cls_dict['__doc__'] = 'Unable to fetch R documentation for the class' else: cls_dict['__doc__'] = ''.join(page_help.to_docstring()) for slt_name in cls_def.slots: #FIXME: sanity check on the slot name try: slt_name = cls_attr_translation[slt_name] except KeyError as ke: # no translation: abort pass #FIXME: isolate the slot documentation and have it here cls_dict[slt_name] = property(lambda self: self.do_slot(slt_name), None, None, None) # Now tackle the methods all_generics = methods_env['getGenerics']() findmethods = methods_env['findMethods'] # does not seem elegant, but there is probably nothing else to do # than loop across all generics r_cls_rname = StrSexpVector((cls_rname, )) for funcname in all_generics: all_methods = findmethods(StrSexpVector((funcname, )), classes = r_cls_rname) # skip if no methods (issue #301). R's findMethods() result # does not have an attribute "names" if of length zero. if len(all_methods) == 0: continue # all_methods contains all method/signature pairs # having the class we are considering somewhere in the signature # (the R/S4 systems allows multiple dispatch) for name, meth in zip(all_methods.do_slot("names"), all_methods): # R/S4 is storing each method/signature as a string, # with the argument type separated by the character '#' # We will re-use that name for the Python name # (no multiple dispatch in python, the method name # will not be enough), replacing the '#'s with '__'s. signature = name.split("#") meth_name = '__'.join(signature) # function names ending with '<-' indicate that the function # is a setter of some sort. We reflect that by adding a 'set_' # prefix to the Python name (and of course remove the suffix '<-'). if funcname.endswith('<-'): meth_name = 'set_' + funcname[:-2] + '__' + meth_name else: meth_name = funcname + '__' + meth_name # finally replace remaining '.'s in the Python name with '_'s meth_name = meth_name.replace('.', '_') #FIXME: sanity check on the function name try: meth_name = cls_meth_translation[meth_name] except KeyError as ke: # no translation: abort pass #FIXME: isolate the slot documentation and have it here if meth_name in cls_dict: raise Error("Duplicated attribute/method name.") cls_dict[meth_name] = meth return type.__new__(mcs, name, bases, cls_dict)
import sys from types import SimpleNamespace from rpy2.robjects.robject import RObjectMixin import rpy2.rinterface as rinterface from rpy2.rinterface import StrSexpVector from rpy2.robjects import help as rhelp from rpy2.robjects import conversion _get_exported_value = rinterface.baseenv['::'] getmethod = _get_exported_value('methods', 'getMethod') require = rinterface.baseenv.find('require') require(StrSexpVector(('methods', )), quiet = rinterface.BoolSexpVector((True, ))) class RS4(RObjectMixin, rinterface.SexpS4): """ Python representation of an R instance of class 'S4'. """ def slotnames(self): """ Return the 'slots' defined for this object """ return methods_env['slotNames'](self) def do_slot(self, name): return conversion.rpy2py(super(RS4, self).do_slot(name)) @staticmethod def isclass(name): """ Return whether the given name is a defined class. """ name = conversion.py2rpy(name)
def __init__(self, obj): obj = StrSexpVector(obj) super(StrVector, self).__init__(obj)
import itertools from rpy2.robjects.robject import RObjectMixin import rpy2.rinterface as rinterface from rpy2.rinterface import StrSexpVector import help as rhelp import conversion getmethod = rinterface.baseenv.get("getMethod") require = rinterface.baseenv.get('require') require(StrSexpVector(('methods', )), quiet=rinterface.BoolSexpVector((True, ))) class RS4(RObjectMixin, rinterface.SexpS4): """ Python representation of an R instance of class 'S4'. """ def slotnames(self): """ Return the 'slots' defined for this object """ return methods_env['slotNames'](self) def do_slot(self, name): return conversion.ri2py(super(RS4, self).do_slot(name)) @staticmethod def isclass(name): """ Return whether the given name is a defined class. """ name = conversion.py2ri(name) return methods_env['isClass'](name)[0] def validobject(self, test=False, complete=False): """ Return whether the instance is 'valid' for its class. """
def from_length(length): """ Create a list of given length """ res = ListVector._vector(StrSexpVector(("list", )), length) res = conversion.rpy2py(res) return res