コード例 #1
0
ファイル: reshape.py プロジェクト: woodworker2017/cudf
def concat(objs, axis=0, ignore_index=False, sort=None):
    """Concatenate DataFrames, Series, or Indices row-wise.

    Parameters
    ----------
    objs : list of DataFrame, Series, or Index
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along.
    ignore_index : bool, default False
        Set True to ignore the index of the *objs* and provide a
        default range index instead.

    Returns
    -------
    A new object of like type with rows from each object in ``objs``.
    """
    if sort not in (None, False):
        raise NotImplementedError("sort parameter is not yet supported")

    if not objs:
        raise ValueError("Need at least one object to concatenate")

    # no-op for single object
    if len(objs) == 1:
        return objs[0]

    typs = set(type(o) for o in objs)
    allowed_typs = {Series, DataFrame}

    param_axis = _axis_map.get(axis, None)
    if param_axis is None:
        raise ValueError(
            '`axis` must be 0 / "index" or 1 / "columns", got: {0}'.format(
                param_axis))
    else:
        axis = param_axis

    # when axis is 1 (column) we can concat with Series and Dataframes
    if axis == 1:
        assert typs.issubset(allowed_typs)
        df = DataFrame()
        for idx, o in enumerate(objs):
            if isinstance(o, Series):
                name = o.name
                if o.name is None:
                    # pandas uses 0-offset
                    name = idx - 1
                df[name] = o
            else:
                for col in o.columns:
                    df[col] = o[col]
        return df

    if len(typs) > 1:
        raise ValueError("`concat` expects all objects to be of the same "
                         "type. Got mix of %r." % [t.__name__ for t in typs])
    typ = list(typs)[0]

    if typ is DataFrame:
        return DataFrame._concat(objs, axis=axis, ignore_index=ignore_index)
    elif typ is Series:
        return Series._concat(objs, axis=axis)
    elif issubclass(typ, Index):
        return Index._concat(objs)
    else:
        raise ValueError("Unknown type %r" % typ)
コード例 #2
0
ファイル: reshape.py プロジェクト: vuule/cudf
def concat(objs, axis=0, ignore_index=False, sort=None):
    """Concatenate DataFrames, Series, or Indices row-wise.

    Parameters
    ----------
    objs : list of DataFrame, Series, or Index
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along.
    ignore_index : bool, default False
        Set True to ignore the index of the *objs* and provide a
        default range index instead.

    Returns
    -------
    A new object of like type with rows from each object in ``objs``.
    """
    if sort not in (None, False):
        raise NotImplementedError("sort parameter is not yet supported")

    if not objs:
        raise ValueError("Need at least one object to concatenate")

    objs = [obj for obj in objs if obj is not None]

    # Return for single object
    if len(objs) == 1:
        return objs[0]

    if len(objs) == 0:
        raise ValueError("All objects passed were None")

    typs = set(type(o) for o in objs)
    allowed_typs = {Series, DataFrame}

    param_axis = _axis_map.get(axis, None)
    if param_axis is None:
        raise ValueError(
            '`axis` must be 0 / "index" or 1 / "columns", got: {0}'.format(
                param_axis
            )
        )
    else:
        axis = param_axis

    # when axis is 1 (column) we can concat with Series and Dataframes
    if axis == 1:
        assert typs.issubset(allowed_typs)
        df = DataFrame()

        sr_name = 0
        for idx, o in enumerate(objs):
            if isinstance(o, Series):
                name = o.name
                if name is None:
                    name = sr_name
                    sr_name += 1
                objs[idx] = o.to_frame(name=name)

        for idx, o in enumerate(objs):
            if idx == 0:
                df.index = o.index
            for col in o._data.names:
                if col in df._data:
                    raise NotImplementedError(
                        "A Column with duplicate name found: {0}, cuDF\
                        doesn't support having multiple columns with\
                        same names yet.".format(
                            col
                        )
                    )
                df[col] = o._data[col]

        result_columns = objs[0].columns
        for o in objs[1:]:
            result_columns = result_columns.append(o.columns)

        df.columns = result_columns
        return df

    typ = list(typs)[0]

    if len(typs) > 1:
        raise ValueError(
            "`concat` expects all objects to be of the same "
            "type. Got mix of %r." % [t.__name__ for t in typs]
        )
    typ = list(typs)[0]

    if typ is DataFrame:
        return DataFrame._concat(objs, axis=axis, ignore_index=ignore_index)
    elif typ is Series:
        return Series._concat(objs, axis=axis)
    elif typ is cudf.MultiIndex:
        return cudf.MultiIndex._concat(objs)
    elif issubclass(typ, Index):
        return Index._concat(objs)
    else:
        raise ValueError("Unknown type %r" % typ)
コード例 #3
0
ファイル: reshape.py プロジェクト: cambricorp/cudf
def concat(objs, axis=0, ignore_index=False, sort=None):
    """Concatenate DataFrames, Series, or Indices row-wise.

    Parameters
    ----------
    objs : list of DataFrame, Series, or Index
    axis : {0/'index', 1/'columns'}, default 0
        The axis to concatenate along.
    ignore_index : bool, default False
        Set True to ignore the index of the *objs* and provide a
        default range index instead.
    sort : bool, default False
        Sort non-concatenation axis if it is not already aligned.

    Returns
    -------
    A new object of like type with rows from each object in ``objs``.

    Examples
    --------
    Combine two ``Series``.

    >>> import cudf
    >>> s1 = cudf.Series(['a', 'b'])
    >>> s2 = cudf.Series(['c', 'd'])
    >>> s1
    0    a
    1    b
    dtype: object
    >>> s2
    0    c
    1    d
    dtype: object
    >>> cudf.concat([s1, s2])
    0    a
    1    b
    0    c
    1    d
    dtype: object

    Clear the existing index and reset it in the
    result by setting the ``ignore_index`` option to ``True``.

    >>> cudf.concat([s1, s2], ignore_index=True)
    0    a
    1    b
    2    c
    3    d
    dtype: object

    Combine two DataFrame objects with identical columns.

    >>> df1 = cudf.DataFrame([['a', 1], ['b', 2]],
    ...                    columns=['letter', 'number'])
    >>> df1
      letter  number
    0      a       1
    1      b       2
    >>> df2 = cudf.DataFrame([['c', 3], ['d', 4]],
    ...                    columns=['letter', 'number'])
    >>> df2
      letter  number
    0      c       3
    1      d       4
    >>> cudf.concat([df1, df2])
      letter  number
    0      a       1
    1      b       2
    0      c       3
    1      d       4

    Combine DataFrame objects with overlapping columns and return
    everything. Columns outside the intersection will
    be filled with ``null`` values.

    >>> df3 = cudf.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
    ...                    columns=['letter', 'number', 'animal'])
    >>> df3
    letter  number animal
    0      c       3    cat
    1      d       4    dog
    >>> cudf.concat([df1, df3], sort=False)
      letter  number animal
    0      a       1   None
    1      b       2   None
    0      c       3    cat
    1      d       4    dog

    Combine ``DataFrame`` objects horizontally along the
    x axis by passing in ``axis=1``.

    >>> df4 = cudf.DataFrame([['bird', 'polly'], ['monkey', 'george']],
    ...                    columns=['animal', 'name'])
    >>> df4
       animal    name
    0    bird   polly
    1  monkey  george
    >>> cudf.concat([df1, df4], axis=1)
      letter  number  animal    name
    0      a       1    bird   polly
    1      b       2  monkey  george
    """

    if not objs:
        raise ValueError("No objects to concatenate")

    objs = [obj for obj in objs if obj is not None]

    # Return for single object
    if len(objs) == 1:
        if ignore_index:
            result = cudf.DataFrame(
                data=objs[0]._data.copy(deep=True),
                index=cudf.RangeIndex(len(objs[0])),
            )
        else:
            result = objs[0].copy()
        return result

    if len(objs) == 0:
        raise ValueError("All objects passed were None")

    # Retrieve the base types of `objs`. In order to support sub-types
    # and object wrappers, we use `isinstance()` instead of comparing
    # types directly
    typs = set()
    for o in objs:
        if isinstance(o, cudf.MultiIndex):
            typs.add(cudf.MultiIndex)
        if issubclass(type(o), Index):
            typs.add(type(o))
        elif isinstance(o, DataFrame):
            typs.add(DataFrame)
        elif isinstance(o, Series):
            typs.add(Series)
        else:
            raise ValueError(f"cannot concatenate object of type {type(o)}")

    allowed_typs = {Series, DataFrame}

    param_axis = _axis_map.get(axis, None)
    if param_axis is None:
        raise ValueError(
            '`axis` must be 0 / "index" or 1 / "columns", got: {0}'.format(
                param_axis
            )
        )
    else:
        axis = param_axis

    # when axis is 1 (column) we can concat with Series and Dataframes
    if axis == 1:

        assert typs.issubset(allowed_typs)
        df = DataFrame()
        _normalize_series_and_dataframe(objs, axis=axis)

        objs, match_index = _align_objs(objs)

        for idx, o in enumerate(objs):
            if not ignore_index and idx == 0:
                df.index = o.index
            for col in o._data.names:
                if col in df._data:
                    raise NotImplementedError(
                        "A Column with duplicate name found: {0}, cuDF\
                        doesn't support having multiple columns with\
                        same names yet.".format(
                            col
                        )
                    )
                df[col] = o._data[col]

        result_columns = objs[0].columns
        for o in objs[1:]:
            result_columns = result_columns.append(o.columns)

        df.columns = result_columns.unique()
        if ignore_index:
            df.index = None
            return df
        elif not match_index:
            return df.sort_index()
        else:
            return df

    typ = list(typs)[0]

    if len(typs) > 1:
        if allowed_typs == typs:
            # This block of code will run when `objs` has
            # both Series & DataFrame kind of inputs.
            _normalize_series_and_dataframe(objs, axis=axis)
            typ = DataFrame
        else:
            raise ValueError(
                "`concat` cannot concatenate objects of "
                "types: %r." % sorted([t.__name__ for t in typs])
            )

    if typ is DataFrame:
        objs = [obj for obj in objs if obj.shape != (0, 0)]
        if len(objs) == 0:
            # If objs is empty, that indicates all of
            # objs are empty dataframes.
            return cudf.DataFrame()
        elif len(objs) == 1:
            if ignore_index:
                result = cudf.DataFrame(
                    data=objs[0]._data.copy(deep=True),
                    index=cudf.RangeIndex(len(objs[0])),
                )
            else:
                result = objs[0].copy()
            return result
        else:
            return DataFrame._concat(
                objs, axis=axis, ignore_index=ignore_index, sort=sort
            )
    elif typ is Series:
        return Series._concat(
            objs, axis=axis, index=None if ignore_index else True
        )
    elif typ is cudf.MultiIndex:
        return cudf.MultiIndex._concat(objs)
    elif issubclass(typ, Index):
        return Index._concat(objs)
    else:
        raise ValueError(f"cannot concatenate object of type {typ}")