Esempio n. 1
0
def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
    from pandas.core.series import Series

    if not len(arr):
        return np.ndarray(0, dtype=dtype)

    if isinstance(arr, Series):
        arr = arr.values
    if not isinstance(arr, np.ndarray):
        arr = np.asarray(arr, dtype=object)
    if na_mask:
        mask = isnull(arr)
        try:
            result = lib.map_infer_mask(arr, f, mask.view(np.uint8))
        except (TypeError, AttributeError):
            def g(x):
                try:
                    return f(x)
                except (TypeError, AttributeError):
                    return na_value
            return _map(g, arr, dtype=dtype)
        if na_value is not np.nan:
            np.putmask(result, mask, na_value)
            if result.dtype == object:
                result = lib.maybe_convert_objects(result)
        return result
    else:
        return lib.map_infer(arr, f)
Esempio n. 2
0
    def _format_strings(self):
        if self.float_format is None:
            float_format = print_config.float_format
            if float_format is None:
                fmt_str = '%% .%dg' % print_config.precision
                float_format = lambda x: fmt_str % x
        else:
            float_format = self.float_format

        formatter = com.pprint_thing if self.formatter is None else self.formatter

        def _format(x):
            if self.na_rep is not None and lib.checknull(x):
                if x is None:
                    return 'None'
                return self.na_rep
            else:
                # object dtype
                return '%s' % formatter(x)

        vals = self.values

        is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
        leading_space = is_float.any()

        fmt_values = []
        for i, v in enumerate(vals):
            if not is_float[i] and leading_space:
                fmt_values.append(' %s' % _format(v))
            elif is_float[i]:
                fmt_values.append(float_format(v))
            else:
                fmt_values.append(' %s' % _format(v))

        return fmt_values
Esempio n. 3
0
def _map(f, arr, na_mask=False, na_value=np.nan):
    if isinstance(arr, Series):
        arr = arr.values
    if not isinstance(arr, np.ndarray):
        arr = np.asarray(arr, dtype=object)
    if na_mask:
        mask = isnull(arr)
        try:
            result = lib.map_infer_mask(arr, f, mask.view(np.uint8))
        except (TypeError, AttributeError):

            def g(x):
                try:
                    return f(x)
                except (TypeError, AttributeError):
                    return na_value

            return _map(g, arr)
        if na_value is not np.nan:
            np.putmask(result, mask, na_value)
            if result.dtype == object:
                result = lib.maybe_convert_objects(result)
        return result
    else:
        return lib.map_infer(arr, f)
Esempio n. 4
0
def str_get_dummies(arr, sep='|'):
    """
    Split each string in the Series by sep and return a frame of
    dummy/indicator variables.

    Parameters
    ----------
    sep : string, default "|"
        String to split on.

    Returns
    -------
    dummies : DataFrame

    Examples
    --------
    >>> Series(['a|b', 'a', 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  1  0  0
    2  1  0  1

    >>> Series(['a|b', np.nan, 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  0  0  0
    2  1  0  1

    See Also
    --------
    pandas.get_dummies
    """
    from pandas.core.frame import DataFrame
    from pandas.core.index import Index

    # GH9980, Index.str does not support get_dummies() as it returns a frame
    if isinstance(arr, Index):
        raise TypeError(
            "get_dummies is not supported for string methods on Index")

    # TODO remove this hack?
    arr = arr.fillna('')
    try:
        arr = sep + arr + sep
    except TypeError:
        arr = sep + arr.astype(str) + sep

    tags = set()
    for ts in arr.str.split(sep):
        tags.update(ts)
    tags = sorted(tags - set([""]))

    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)

    for i, t in enumerate(tags):
        pat = sep + t + sep
        dummies[:, i] = lib.map_infer(arr.values, lambda x: pat in x)
    return DataFrame(dummies, arr.index, tags)
Esempio n. 5
0
def str_get_dummies(arr, sep='|'):
    """
    Split each string in the Series by sep and return a frame of
    dummy/indicator variables.

    Parameters
    ----------
    sep : string, default "|"
        String to split on.

    Returns
    -------
    dummies : DataFrame

    Examples
    --------
    >>> Series(['a|b', 'a', 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  1  0  0
    2  1  0  1

    >>> Series(['a|b', np.nan, 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  0  0  0
    2  1  0  1

    See Also
    --------
    pandas.get_dummies
    """
    from pandas.core.frame import DataFrame
    from pandas.core.index import Index

    # GH9980, Index.str does not support get_dummies() as it returns a frame
    if isinstance(arr, Index):
        raise TypeError("get_dummies is not supported for string methods on "
                        "Index")

    # TODO remove this hack?
    arr = arr.fillna('')
    try:
        arr = sep + arr + sep
    except TypeError:
        arr = sep + arr.astype(str) + sep

    tags = set()
    for ts in arr.str.split(sep):
        tags.update(ts)
    tags = sorted(tags - set([""]))

    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)

    for i, t in enumerate(tags):
        pat = sep + t + sep
        dummies[:, i] = lib.map_infer(arr.values, lambda x: pat in x)
    return DataFrame(dummies, arr.index, tags)
Esempio n. 6
0
def _dt_box_array(arr, offset=None, tz=None):
    if arr is None:
        return arr

    if not isinstance(arr, np.ndarray):
        return arr

    boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz)
    return lib.map_infer(arr, boxfunc)
Esempio n. 7
0
def _dt_box_array(arr, offset=None, tz=None):
    if arr is None:
        return arr

    if not isinstance(arr, np.ndarray):
        return arr

    boxfunc = lambda x: Timestamp(x, offset=offset, tz=tz)
    return lib.map_infer(arr, boxfunc)
Esempio n. 8
0
def auto_map(arr, f, otherargs, n_results=1, required="all"):
    from pandas.core.series import Series

    if all(np.isscalar(a) for a in otherargs):
        res = lib.map_infer(arr, lambda v: f(v, *otherargs))
        return Series(res, index=arr.index, copy=False)

    n_otherargs = len(otherargs)
    if required == "all":
        required = list(range(n_otherargs))
    res = map_iter_args(arr, f, azip(*otherargs), n_otherargs, required, n_results)
    res = [Series(col, index=arr.index, copy=False) for col in res]
    if n_results == 1:
        return res[0]
    return res
Esempio n. 9
0
def auto_map(arr, f, otherargs, n_results=1, required='all'):
    from pandas.core.series import Series

    if all(np.isscalar(a) for a in otherargs):
        res = lib.map_infer(arr, lambda v: f(v, *otherargs))
        return Series(res, index=arr.index, copy=False)

    n_otherargs = len(otherargs)
    if required == 'all':
        required = list(range(n_otherargs))
    res = map_iter_args(arr, f, azip(*otherargs), n_otherargs, required,
                        n_results)
    res = [Series(col, index=arr.index, copy=False) for col in res]
    if n_results == 1:
        return res[0]
    return res
Esempio n. 10
0
    def _format_strings(self, use_unicode=False):
        if self.float_format is None:
            float_format = print_config.float_format
            if float_format is None:
                fmt_str = "%% .%dg" % print_config.precision
                float_format = lambda x: fmt_str % x
        else:
            float_format = self.float_format

        if use_unicode:

            def _strify(x):
                return _stringify(x, print_config.encoding)

            formatter = _strify if self.formatter is None else self.formatter
        else:
            formatter = str if self.formatter is None else self.formatter

        def _format(x):
            if self.na_rep is not None and lib.checknull(x):
                if x is None:
                    return "None"
                return self.na_rep
            else:
                # object dtype
                return "%s" % formatter(x)

        vals = self.values

        is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
        leading_space = is_float.any()

        fmt_values = []
        for i, v in enumerate(vals):
            if not is_float[i] and leading_space:
                fmt_values.append(" %s" % _format(v))
            elif is_float[i]:
                fmt_values.append(float_format(v))
            else:
                fmt_values.append(" %s" % _format(v))

        return fmt_values
Esempio n. 11
0
def str_get_dummies(arr, sep='|'):
    """
    Split each string by sep and return a frame of dummy/indicator variables.

    Examples
    --------
    >>> Series(['a|b', 'a', 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  1  0  0
    2  1  0  1

    >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  0  0  0
    2  1  0  1

    See also ``pd.get_dummies``.

    """
    from pandas.core.frame import DataFrame

    # TODO remove this hack?
    arr = arr.fillna('')
    try:
        arr = sep + arr + sep
    except TypeError:
        arr = sep + arr.astype(str) + sep

    tags = set()
    for ts in arr.str.split(sep):
        tags.update(ts)
    tags = sorted(tags - set([""]))

    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)

    for i, t in enumerate(tags):
        pat = sep + t + sep
        dummies[:, i] = lib.map_infer(arr.values, lambda x: pat in x)
    return DataFrame(dummies, arr.index, tags)
Esempio n. 12
0
def str_get_dummies(arr, sep='|'):
    """
    Split each string by sep and return a frame of dummy/indicator variables.

    Examples
    --------
    >>> Series(['a|b', 'a', 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  1  0  0
    2  1  0  1

    >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
       a  b  c
    0  1  1  0
    1  0  0  0
    2  1  0  1

    See also ``pd.get_dummies``.

    """
    from pandas.core.frame import DataFrame

    # TODO remove this hack?
    arr = arr.fillna('')
    try:
        arr = sep + arr + sep
    except TypeError:
        arr = sep + arr.astype(str) + sep

    tags = set()
    for ts in arr.str.split(sep):
        tags.update(ts)
    tags = sorted(tags - set([""]))

    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)

    for i, t in enumerate(tags):
        pat = sep + t + sep
        dummies[:, i] = lib.map_infer(arr.values, lambda x: pat in x)
    return DataFrame(dummies, arr.index, tags)
Esempio n. 13
0
    def _format_strings(self):
        if self.float_format is None:
            float_format = get_option("display.float_format")
            if float_format is None:
                fmt_str = '%% .%dg' % get_option("display.precision")
                float_format = lambda x: fmt_str % x
        else:
            float_format = self.float_format

        formatter = (lambda x: com.pprint_thing(x,escape_chars=('\t','\r','\n'))) \
                    if self.formatter is None else self.formatter

        def _format(x):
            if self.na_rep is not None and lib.checknull(x):
                if x is None:
                    return 'None'
                return self.na_rep
            else:
                # object dtype
                return '%s' % formatter(x)

        vals = self.values

        is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
        leading_space = is_float.any()

        fmt_values = []
        for i, v in enumerate(vals):
            if not is_float[i] and leading_space:
                fmt_values.append(' %s' % _format(v))
            elif is_float[i]:
                fmt_values.append(float_format(v))
            else:
                fmt_values.append(' %s' % _format(v))

        return fmt_values
Esempio n. 14
0
    def _format_strings(self):
        if self.float_format is None:
            float_format = get_option("print.float_format")
            if float_format is None:
                fmt_str = '%% .%dg' % get_option("print.precision")
                float_format = lambda x: fmt_str % x
        else:
            float_format = self.float_format

        formatter = (lambda x: com.pprint_thing(x,escape_chars=('\t','\r','\n'))) \
                    if self.formatter is None else self.formatter

        def _format(x):
            if self.na_rep is not None and lib.checknull(x):
                if x is None:
                    return 'None'
                return self.na_rep
            else:
                # object dtype
                return '%s' % formatter(x)

        vals = self.values

        is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
        leading_space = is_float.any()

        fmt_values = []
        for i, v in enumerate(vals):
            if not is_float[i] and leading_space:
                fmt_values.append(' %s' % _format(v))
            elif is_float[i]:
                fmt_values.append(float_format(v))
            else:
                fmt_values.append(' %s' % _format(v))

        return fmt_values
Esempio n. 15
0
    def _convert_to_indexer(self, obj, axis=0):
        """
        Convert indexing key into something we can use to do actual fancy
        indexing on an ndarray

        Examples
        ix[:5] -> slice(0, 5)
        ix[[1,2,3]] -> [1,2,3]
        ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

        Going by Zen of Python?
        "In the face of ambiguity, refuse the temptation to guess."
        raise AmbiguousIndexError with integer labels?
        - No, prefer label-based indexing
        """
        labels = self.obj._get_axis(axis)
        is_int_index = _is_integer_index(labels)

        if com.is_integer(obj) and not is_int_index:
            return obj

        try:
            return labels.get_loc(obj)
        except (KeyError, TypeError):
            pass

        if isinstance(obj, slice):
            ltype = labels.inferred_type

            if ltype == 'floating':
                int_slice = _is_int_slice(obj)
            else:
                # floats that are within tolerance of int used
                int_slice = _is_index_slice(obj)

            null_slice = obj.start is None and obj.stop is None
            # could have integers in the first level of the MultiIndex
            position_slice = (int_slice and not ltype == 'integer'
                              and not isinstance(labels, MultiIndex))

            start, stop = obj.start, obj.stop

            # last ditch effort: if we are mixed and have integers
            try:
                if 'mixed' in ltype and int_slice:
                    if start is not None:
                        i = labels.get_loc(start)
                    if stop is not None:
                        j = labels.get_loc(stop)
                    position_slice = False
            except KeyError:
                if ltype == 'mixed-integer-float':
                    raise

            if null_slice or position_slice:
                slicer = obj
            else:
                try:
                    i, j = labels.slice_locs(start, stop)
                    slicer = slice(i, j, obj.step)
                except Exception:
                    if _is_index_slice(obj):
                        if labels.inferred_type == 'integer':
                            raise
                        slicer = obj
                    else:
                        raise

            return slicer

        elif _is_list_like(obj):
            if com._is_bool_indexer(obj):
                objarr = _check_bool_indexer(labels, obj)
                return objarr
            else:
                if isinstance(obj, Index):
                    objarr = obj.values
                else:
                    objarr = _asarray_tuplesafe(obj)

                # If have integer labels, defer to label-based indexing
                if _is_integer_dtype(objarr) and not is_int_index:
                    if labels.inferred_type != 'integer':
                        objarr = np.where(objarr < 0,
                                          len(labels) + objarr, objarr)
                    return objarr

                # this is not the most robust, but...
                if (isinstance(labels, MultiIndex)
                        and not isinstance(objarr[0], tuple)):
                    level = 0
                    _, indexer = labels.reindex(objarr, level=level)

                    check = labels.levels[0].get_indexer(objarr)
                else:
                    level = None
                    # XXX
                    if labels.is_unique:
                        indexer = check = labels.get_indexer(objarr)
                    else:
                        mask = np.zeros(len(labels), dtype=bool)
                        lvalues = labels.values
                        for x in objarr:
                            # ugh
                            to_or = lib.map_infer(lvalues, x.__eq__)
                            if not to_or.any():
                                raise KeyError('%s not in index' % str(x))
                            mask |= to_or

                        indexer = check = mask.nonzero()[0]

                mask = check == -1
                if mask.any():
                    raise KeyError('%s not in index' % objarr[mask])

                return indexer
        else:
            return labels.get_loc(obj)
Esempio n. 16
0
 def _box_values(self, values):
     f = lambda x: Period(ordinal=x, freq=self.freq)
     return lib.map_infer(values, f)
Esempio n. 17
0
def _get_ordinals(data, freq):
    f = lambda x: Period(x, freq=freq).ordinal
    if isinstance(data[0], Period):
        return period.extract_ordinals(data, freq)
    else:
        return lib.map_infer(data, f)
Esempio n. 18
0
    def get_chunk(self, rows=None):
        if rows is not None and self.skip_footer:
            raise ValueError('skip_footer not supported for iteration')

        try:
            content = self._get_lines(rows)
        except StopIteration:
            if self._first_chunk:
                content = []
            else:
                raise

        # done with first read, next time raise StopIteration
        self._first_chunk = False

        if len(content) == 0:  # pragma: no cover
            if self.index_col is not None:
                if np.isscalar(self.index_col):
                    index = Index([], name=self.index_name)
                else:
                    index = MultiIndex.from_arrays([[]] * len(self.index_col),
                                                   names=self.index_name)
            else:
                index = Index([])

            return DataFrame(index=index, columns=self.columns)

        zipped_content = list(lib.to_object_array(content).T)

        if not self._has_complex_date_col and self.index_col is not None:
            index = self._get_simple_index(zipped_content)
            index = self._agg_index(index)
        else:
            index = Index(np.arange(len(content)))

        col_len, zip_len = len(self.columns), len(zipped_content)
        if col_len != zip_len:
            row_num = -1
            for (i, l) in enumerate(content):
                if len(l) != col_len:
                    break

            footers = 0
            if self.skip_footer:
                footers = self.skip_footer
            row_num = self.pos - (len(content) - i + footers)

            msg = ('Expecting %d columns, got %d in row %d' %
                   (col_len, zip_len, row_num))
            raise ValueError(msg)

        data = dict((k, v) for k, v in izip(self.columns, zipped_content))

        # apply converters
        for col, f in self.converters.iteritems():
            if isinstance(col, int) and col not in self.columns:
                col = self.columns[col]
            data[col] = lib.map_infer(data[col], f)

        columns = list(self.columns)
        if self.parse_dates is not None:
            data, columns = self._process_date_conversion(data)

        data = _convert_to_ndarrays(data, self.na_values, self.verbose)

        df = DataFrame(data=data, columns=columns, index=index)
        if self._has_complex_date_col and self.index_col is not None:
            if not self._name_processed:
                self.index_name = self._get_index_name(list(columns))
                self._name_processed = True
            data = dict(((k, v) for k, v in df.iteritems()))
            index = self._get_complex_date_index(data,
                                                 col_names=columns,
                                                 parse_dates=False)
            index = self._agg_index(index, False)
            data = dict(((k, v.values) for k, v in data.iteritems()))
            df = DataFrame(data=data, columns=columns, index=index)

        if self.squeeze and len(df.columns) == 1:
            return df[df.columns[0]]
        return df
Esempio n. 19
0
 def _box_values(self, values):
     """
     apply box func to passed values
     """
     return lib.map_infer(values, self._box_func)
Esempio n. 20
0
    def get_chunk(self, rows=None):
        if rows is not None and self.skip_footer:
            raise ValueError("skip_footer not supported for iteration")

        try:
            content = self._get_lines(rows)
        except StopIteration:
            if self._first_chunk:
                content = []
            else:
                raise

        # done with first read, next time raise StopIteration
        self._first_chunk = False

        columns = list(self.orig_columns)
        if len(content) == 0:  # pragma: no cover
            if self.index_col is not None:
                if np.isscalar(self.index_col):
                    index = Index([], name=self.index_name)
                    columns.pop(self.index_col)
                else:
                    index = MultiIndex.from_arrays([[]] * len(self.index_col), names=self.index_name)
                    for n in self.index_col:
                        columns.pop(n)
            else:
                index = Index([])

            return DataFrame(index=index, columns=columns)

        alldata = self._rows_to_cols(content)
        data = self._exclude_implicit_index(alldata)

        # apply converters
        for col, f in self.converters.iteritems():
            if isinstance(col, int) and col not in self.orig_columns:
                col = self.orig_columns[col]
            data[col] = lib.map_infer(data[col], f)

        data = _convert_to_ndarrays(data, self.na_values, self.verbose)

        if self.parse_dates is not None:
            data, columns = self._process_date_conversion(data)

        if self.index_col is None:
            numrows = len(content)
            index = Index(np.arange(numrows))

        elif not self._has_complex_date_col:
            index = self._get_simple_index(alldata, columns)
            index = self._agg_index(index)

        elif self._has_complex_date_col:
            if not self._name_processed:
                self.index_name = self._explicit_index_names(list(columns))
                self._name_processed = True
            index = self._get_complex_date_index(data, columns)
            index = self._agg_index(index, False)

        df = DataFrame(data=data, columns=columns, index=index)

        if self.squeeze and len(df.columns) == 1:
            return df[df.columns[0]]
        return df
Esempio n. 21
0
    def _convert_to_indexer(self, obj, axis=0):
        """
        Convert indexing key into something we can use to do actual fancy
        indexing on an ndarray

        Examples
        ix[:5] -> slice(0, 5)
        ix[[1,2,3]] -> [1,2,3]
        ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

        Going by Zen of Python?
        "In the face of ambiguity, refuse the temptation to guess."
        raise AmbiguousIndexError with integer labels?
        - No, prefer label-based indexing
        """
        labels = self.obj._get_axis(axis)
        is_int_index = _is_integer_index(labels)

        if com.is_integer(obj) and not is_int_index:
            return obj

        try:
            return labels.get_loc(obj)
        except (KeyError, TypeError):
            pass

        if isinstance(obj, slice):
            ltype = labels.inferred_type

            if ltype == 'floating':
                int_slice = _is_int_slice(obj)
            else:
                # floats that are within tolerance of int used
                int_slice = _is_index_slice(obj)

            null_slice = obj.start is None and obj.stop is None
            # could have integers in the first level of the MultiIndex
            position_slice = (int_slice
                              and not ltype == 'integer'
                              and not isinstance(labels, MultiIndex))

            start, stop = obj.start, obj.stop

            # last ditch effort: if we are mixed and have integers
            try:
                if 'mixed' in ltype and int_slice:
                    if start is not None:
                        i = labels.get_loc(start)
                    if stop is not None:
                        j = labels.get_loc(stop)
                    position_slice = False
            except KeyError:
                if ltype == 'mixed-integer-float':
                    raise

            if null_slice or position_slice:
                slicer = obj
            else:
                try:
                    i, j = labels.slice_locs(start, stop)
                    slicer = slice(i, j, obj.step)
                except Exception:
                    if _is_index_slice(obj):
                        if labels.inferred_type == 'integer':
                            raise
                        slicer = obj
                    else:
                        raise

            return slicer

        elif _is_list_like(obj):
            if com._is_bool_indexer(obj):
                objarr = _check_bool_indexer(labels, obj)
                return objarr
            else:
                if isinstance(obj, Index):
                    objarr = obj.values
                else:
                    objarr = _asarray_tuplesafe(obj)

                # If have integer labels, defer to label-based indexing
                if _is_integer_dtype(objarr) and not is_int_index:
                    if labels.inferred_type != 'integer':
                        objarr = np.where(objarr < 0,
                                          len(labels) + objarr, objarr)
                    return objarr

                # this is not the most robust, but...
                if (isinstance(labels, MultiIndex) and
                    not isinstance(objarr[0], tuple)):
                    level = 0
                    _, indexer = labels.reindex(objarr, level=level)

                    check = labels.levels[0].get_indexer(objarr)
                else:
                    level = None
                    # XXX
                    if labels.is_unique:
                        indexer = check = labels.get_indexer(objarr)
                    else:
                        mask = np.zeros(len(labels), dtype=bool)
                        lvalues = labels.values
                        for x in objarr:
                            # ugh
                            to_or = lib.map_infer(lvalues, x.__eq__)
                            if not to_or.any():
                                raise KeyError('%s not in index' % str(x))
                            mask |= to_or

                        indexer = check = mask.nonzero()[0]

                mask = check == -1
                if mask.any():
                    raise KeyError('%s not in index' % objarr[mask])

                return indexer
        else:
            return labels.get_loc(obj)
Esempio n. 22
0
def _map(f, arr):
    if not isinstance(arr, np.ndarray):
        arr = np.asarray(arr, dtype=object)
    return lib.map_infer(arr, f)
Esempio n. 23
0
 def _box_values(self, values):
     """
     apply box func to passed values
     """
     import pandas.lib as lib
     return lib.map_infer(values, self._box_func)
Esempio n. 24
0
 def _have_unicode(self):
     mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode))
     return mask.any()
Esempio n. 25
0
 def _box_values(self, values):
     f = lambda x: Period(ordinal=x, freq=self.freq)
     return lib.map_infer(values, f)
Esempio n. 26
0
 def _box_values(self, values):
     return lib.map_infer(values, lib.Timestamp)
Esempio n. 27
0
 def _box_values(self, values):
     return lib.map_infer(values, lib.Timestamp)
Esempio n. 28
0
 def _get_object_index(self):
     boxfunc = lambda x: Timestamp(x, offset=self.offset, tz=self.tz)
     boxed_values = lib.map_infer(self.asi8, boxfunc)
     return Index(boxed_values, dtype=object)
Esempio n. 29
0
 def _have_unicode(self):
     mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode))
     return mask.any()
Esempio n. 30
0
def _map(f, arr):
    if not isinstance(arr, np.ndarray):
        arr = np.asarray(arr, dtype=object)
    return lib.map_infer(arr, f)
 def _box_values(self, values):
     """
     apply box func to passed values
     """
     import pandas.lib as lib
     return lib.map_infer(values, self._box_func)
Esempio n. 32
0
 def _box_values(self, values):
     """
     apply box func to passed values
     """
     return lib.map_infer(values, self._box_func)
Esempio n. 33
0
 def _get_object_index(self):
     boxfunc = lambda x: Timestamp(x, offset=self.offset, tz=self.tz)
     boxed_values = lib.map_infer(self.asi8, boxfunc)
     return Index(boxed_values, dtype=object)
Esempio n. 34
0
File: period.py Progetto: DT021/wau
def _get_ordinals(data, freq):
    f = lambda x: Period(x, freq=freq).ordinal
    if isinstance(data[0], Period):
        return period.extract_ordinals(data, freq)
    else:
        return lib.map_infer(data, f)
Esempio n. 35
0
    def get_chunk(self, rows=None):
        if rows is not None and self.skip_footer:
            raise ValueError('skip_footer not supported for iteration')

        try:
            content = self._get_lines(rows)
        except StopIteration:
            if self._first_chunk:
                content = []
            else:
                raise

        # done with first read, next time raise StopIteration
        self._first_chunk = False

        if len(content) == 0: # pragma: no cover
            if self.index_col is not None:
                if np.isscalar(self.index_col):
                    index = Index([], name=self.index_name)
                else:
                    index = MultiIndex.from_arrays([[]] * len(self.index_col),
                                                   names=self.index_name)
            else:
                index = Index([])

            return DataFrame(index=index, columns=self.columns)

        zipped_content = list(lib.to_object_array(content).T)

        if not self._has_complex_date_col and self.index_col is not None:
            index = self._get_simple_index(zipped_content)
            index = self._agg_index(index)
        else:
            index = Index(np.arange(len(content)))

        col_len, zip_len = len(self.columns), len(zipped_content)
        if col_len != zip_len:
            row_num = -1
            for (i, l) in enumerate(content):
                if len(l) != col_len:
                    break

            footers = 0
            if self.skip_footer:
                footers = self.skip_footer
            row_num = self.pos - (len(content) - i + footers)

            msg = ('Expecting %d columns, got %d in row %d' %
                   (col_len, zip_len, row_num))
            raise ValueError(msg)

        data = dict((k, v) for k, v in izip(self.columns, zipped_content))

        # apply converters
        for col, f in self.converters.iteritems():
            if isinstance(col, int) and col not in self.columns:
                col = self.columns[col]
            data[col] = lib.map_infer(data[col], f)

        columns = list(self.columns)
        if self.parse_dates is not None:
            data, columns = self._process_date_conversion(data)

        data = _convert_to_ndarrays(data, self.na_values, self.verbose)

        df = DataFrame(data=data, columns=columns, index=index)
        if self._has_complex_date_col and self.index_col is not None:
            if not self._name_processed:
                self.index_name = self._get_index_name(list(columns))
                self._name_processed = True
            data = dict(((k, v) for k, v in df.iteritems()))
            index = self._get_complex_date_index(data, col_names=columns,
                                                 parse_dates=False)
            index = self._agg_index(index, False)
            data = dict(((k, v.values) for k, v in data.iteritems()))
            df = DataFrame(data=data, columns=columns, index=index)

        if self.squeeze and len(df.columns) == 1:
            return df[df.columns[0]]
        return df