def numpy2ri(o): """ Augmented conversion function, converting numpy arrays into rpy2.rinterface-level R structures. """ if not o.dtype.isnative: raise(ValueError("Cannot pass numpy arrays with non-native byte orders at the moment.")) # Most types map onto R arrays: if o.dtype.kind in _kinds: # "F" means "use column-major order" vec = SexpVector(o.ravel("F"), _kinds[o.dtype.kind]) dim = SexpVector(o.shape, INTSXP) #FIXME: no dimnames ? #FIXME: optimize what is below needed/possible ? (other ways to create R arrays ?) res = rinterface.baseenv['array'](vec, dim=dim) # R does not support unsigned types: elif o.dtype.kind == "u": raise(ValueError("Cannot convert numpy array of unsigned values -- R does not have unsigned integers.")) # Array-of-PyObject is treated like a Python list: elif o.dtype.kind == "O": res = conversion.py2ri(list(o)) # Record arrays map onto R data frames: elif o.dtype.kind == "V": if o.dtype.names is None: raise(ValueError("Nothing can be done for this numpy array type %s at the moment." % (o.dtype,))) df_args = [] for field_name in o.dtype.names: df_args.append((field_name, conversion.py2ri(o[field_name]))) res = ro.baseenv["data.frame"].rcall(tuple(df_args), ro.globalenv) # It should be impossible to get here: else: raise(ValueError("Unknown numpy array type.")) return res
def py2ri_pandasseries(obj): if obj.dtype.name == 'category': res = py2ri_categoryseries(obj) res = FactorVector(res) elif obj.dtype == dt_datetime64ns_type: # time series d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), IntVector([x.second for x in obj]) ] res = ISOdatetime(*d) #FIXME: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) else: # converted as a numpy array func = numpy2ri.converter.py2ri.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index))) return res
def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj]) ] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if type(x) is not homogeneous_type: raise ValueError('Series can only be of one type, or None.') # TODO: Could this be merged with obj.type.name == 'O' case above ? res = { int: IntVector, bool: BoolVector, None: BoolVector, str: StrVector, bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray] }[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def py2ri_pandasseries(obj): if obj.dtype == '<M8[ns]': # time series d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), IntVector([x.second for x in obj]) ] res = ISOdatetime(*d) #FIXME: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) else: # converted as a numpy array res = numpy2ri.numpy2ri(obj.values) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index))) return res
def _convert_to_r_array(x, force_2d=False): # see <https://bitbucket.org/rpy2/rpy2/src/62be5f3e447776bcb808f48cb68cbbcd8a75d4d3/rpy/robjects/numpy2ri.py> # line 76-82 # copy to avoid any problem. x = np.asarray(x).copy() if force_2d: assert x.ndim in {1, 2} # this is necessary for y variable. otherwise, it would fail for cv.glmnet. if x.ndim == 1: x = x[:, np.newaxis] if x.dtype == np.float64: vec = SexpVector(x.ravel("F"), REALSXP) elif x.dtype == np.int64: vec = SexpVector(x.ravel("F"), INTSXP) else: raise RuntimeError('not implemented for this dtype {}'.format(x.dtype)) dim = SexpVector(x.shape, INTSXP) return rinterface.baseenv['array'](vec, dim=dim)
def py2rpy_pandasseries(obj): if numpy.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), IntVector([x.second for x in obj])] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) #FIXME: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res=as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif obj.dtype.type == str: res = _PANDASTYPE2RPY2[str](obj) elif obj.dtype.name in integer_array_types: res = _PANDASTYPE2RPY2[int](obj) if len(obj.shape) == 1: if obj.dtype != dt_O_type: # force into an R vector res = as_vector(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if ((type(x) is not homogeneous_type) and not ((isinstance(x, float) and math.isnan(x)) or pandas.isna(x))): raise ValueError( 'Series can only be of one type, or None ' '(and here we have %s and %s). If happening with ' 'a pandas DataFrame the method infer_objects() ' 'will normalize data types before conversion.' % (homogeneous_type, type(x))) # TODO: Could this be merged with obj.type.name == 'O' case above ? res = _PANDASTYPE2RPY2[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj.values) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def simple_string_vector_repr(self): if len(self) == 1: return self[0] return SexpVector.__old_repr__(self)
def simple_string_vector_repr(self): if len(self) == 1: return self[0] return SexpVector.__old_repr__(self)