def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj]) ] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if type(x) is not homogeneous_type: raise ValueError('Series can only be of one type, or None.') # TODO: Could this be merged with obj.type.name == 'O' case above ? res = { int: IntVector, bool: BoolVector, None: BoolVector, str: StrVector, bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray] }[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif obj.dtype.type == str: res = _PANDASTYPE2RPY2[str](obj) elif obj.dtype.name in integer_array_types: res = _PANDASTYPE2RPY2[int](obj) if len(obj.shape) == 1: if obj.dtype != dt_O_type: # force into an R vector res = as_vector(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if ((type(x) is not homogeneous_type) and not ((isinstance(x, float) and math.isnan(x)) or pandas.isna(x))): raise ValueError( 'Series can only be of one type, or None ' '(and here we have %s and %s). If happening with ' 'a pandas DataFrame the method infer_objects() ' 'will normalize data types before conversion.' % (homogeneous_type, type(x))) # TODO: Could this be merged with obj.type.name == 'O' case above ? res = _PANDASTYPE2RPY2[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj.values) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res