Beispiel #1
0
def numpy2ri(o):
    """ Augmented conversion function, converting numpy arrays into
    rpy2.rinterface-level R structures. """
    if not o.dtype.isnative:
        raise(ValueError("Cannot pass numpy arrays with non-native byte orders at the moment."))

    # Most types map onto R arrays:
    if o.dtype.kind in _kinds:
        # "F" means "use column-major order"
        vec = SexpVector(o.ravel("F"), _kinds[o.dtype.kind])
        dim = SexpVector(o.shape, INTSXP)
        #FIXME: no dimnames ?
        #FIXME: optimize what is below needed/possible ? (other ways to create R arrays ?)
        res = rinterface.baseenv['array'](vec, dim=dim)
    # R does not support unsigned types:
    elif o.dtype.kind == "u":
        raise(ValueError("Cannot convert numpy array of unsigned values -- R does not have unsigned integers."))
    # Array-of-PyObject is treated like a Python list:
    elif o.dtype.kind == "O":
        res = conversion.py2ri(list(o))
    # Record arrays map onto R data frames:
    elif o.dtype.kind == "V":
        if o.dtype.names is None:
            raise(ValueError("Nothing can be done for this numpy array type %s at the moment." % (o.dtype,)))
        df_args = []
        for field_name in o.dtype.names:
            df_args.append((field_name,
                            conversion.py2ri(o[field_name])))
        res = ro.baseenv["data.frame"].rcall(tuple(df_args), ro.globalenv)
    # It should be impossible to get here:
    else:
        raise(ValueError("Unknown numpy array type."))
    return res
Beispiel #2
0
def py2ri_pandasseries(obj):
    if obj.dtype.name == 'category':
        res = py2ri_categoryseries(obj)
        res = FactorVector(res)
    elif obj.dtype == dt_datetime64ns_type:
        # time series
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            IntVector([x.second for x in obj])
        ]
        res = ISOdatetime(*d)
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2ri.registry[numpy.ndarray]
        # current conversion as performed by numpy
        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
    return res
Beispiel #3
0
def py2rpy_pandasseries(obj):
    if obj.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted '
                      'to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])
        ]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        # TODO: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    elif (obj.dtype == dt_O_type):
        homogeneous_type = None
        for x in obj.values:
            if x is None:
                continue
            if homogeneous_type is None:
                homogeneous_type = type(x)
                continue
            if type(x) is not homogeneous_type:
                raise ValueError('Series can only be of one type, or None.')
        # TODO: Could this be merged with obj.type.name == 'O' case above ?
        res = {
            int: IntVector,
            bool: BoolVector,
            None: BoolVector,
            str: StrVector,
            bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        }[homogeneous_type](obj)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy

        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
Beispiel #4
0
def py2ri_pandasseries(obj):
    if obj.dtype == '<M8[ns]':
        # time series
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            IntVector([x.second for x in obj])
        ]
        res = ISOdatetime(*d)
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        res = numpy2ri.numpy2ri(obj.values)
    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
    return res
Beispiel #5
0
def _convert_to_r_array(x, force_2d=False):
    # see <https://bitbucket.org/rpy2/rpy2/src/62be5f3e447776bcb808f48cb68cbbcd8a75d4d3/rpy/robjects/numpy2ri.py>
    # line 76-82
    # copy to avoid any problem.
    x = np.asarray(x).copy()
    if force_2d:
        assert x.ndim in {1, 2}
        # this is necessary for y variable. otherwise, it would fail for cv.glmnet.
        if x.ndim == 1:
            x = x[:, np.newaxis]
    if x.dtype == np.float64:
        vec = SexpVector(x.ravel("F"), REALSXP)
    elif x.dtype == np.int64:
        vec = SexpVector(x.ravel("F"), INTSXP)
    else:
        raise RuntimeError('not implemented for this dtype {}'.format(x.dtype))
    dim = SexpVector(x.shape, INTSXP)
    return rinterface.baseenv['array'](vec, dim=dim)
Beispiel #6
0
def py2rpy_pandasseries(obj):
    if numpy.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [IntVector([x.year for x in obj]),
             IntVector([x.month for x in obj]),
             IntVector([x.day for x in obj]),
             IntVector([x.hour for x in obj]),
             IntVector([x.minute for x in obj]),
             IntVector([x.second for x in obj])]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy
        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res=as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
Beispiel #7
0
def py2rpy_pandasseries(obj):
    if obj.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted '
                      'to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [IntVector([x.year for x in obj]),
             IntVector([x.month for x in obj]),
             IntVector([x.day for x in obj]),
             IntVector([x.hour for x in obj]),
             IntVector([x.minute for x in obj]),
             FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        # TODO: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    elif obj.dtype.type == str:
        res = _PANDASTYPE2RPY2[str](obj)
    elif obj.dtype.name in integer_array_types:
        res = _PANDASTYPE2RPY2[int](obj)
        if len(obj.shape) == 1:
            if obj.dtype != dt_O_type:
                # force into an R vector
                res = as_vector(res)
    elif (obj.dtype == dt_O_type):
        homogeneous_type = None
        for x in obj.values:
            if x is None:
                continue
            if homogeneous_type is None:
                homogeneous_type = type(x)
                continue
            if ((type(x) is not homogeneous_type)
                and not
                ((isinstance(x, float) and math.isnan(x))
                 or pandas.isna(x))):
                raise ValueError(
                    'Series can only be of one type, or None '
                    '(and here we have %s and %s). If happening with '
                    'a pandas DataFrame the method infer_objects() '
                    'will normalize data types before conversion.' %
                    (homogeneous_type, type(x)))
        # TODO: Could this be merged with obj.type.name == 'O' case above ?
        res = _PANDASTYPE2RPY2[homogeneous_type](obj)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy

        res = func(obj.values)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
Beispiel #8
0
def simple_string_vector_repr(self):
    if len(self) == 1:
        return self[0]
    return SexpVector.__old_repr__(self)
Beispiel #9
0
def simple_string_vector_repr(self):
    if len(self) == 1:
        return self[0]
    return SexpVector.__old_repr__(self)