Esempio n. 1
0
def _get_dummy_frame(data, column):
    from pandas import Factor
    factor = Factor(data[column])
    dummy_mat = np.eye(len(factor.levels)).take(factor.labels)
    dummy_cols = ['%s.%s' % (column, v) for v in factor.levels]
    dummies = DataFrame(dummy_mat, index=data.index, columns=dummy_cols)

    return dummies
Esempio n. 2
0
def make_column_dummies(data, column, prefix=False, prefix_sep='_'):
    from pandas import Factor
    factor = Factor(data[column].values)
    dummy_mat = np.eye(len(factor.levels)).take(factor.labels, axis=0)

    if prefix:
        dummy_cols = ['%s%s%s' % (column, prefix_sep, str(v))
                      for v in factor.levels]
    else:
        dummy_cols = factor.levels
    dummies = DataFrame(dummy_mat, index=data.index, columns=dummy_cols)
    return dummies
Esempio n. 3
0
def make_axis_dummies(frame, axis='minor', transform=None):
    """
    Construct 1-0 dummy variables corresponding to designated axis
    labels

    Parameters
    ----------
    axis : {'major', 'minor'}, default 'minor'
    transform : function, default None
        Function to apply to axis labels first. For example, to
        get "day of week" dummies in a time series regression you might
        call:
            make_axis_dummies(panel, axis='major',
                              transform=lambda d: d.weekday())
    Returns
    -------
    dummies : DataFrame
        Column names taken from chosen axis
    """
    from pandas import Factor

    numbers = {
        'major' : 0,
        'minor' : 1
    }
    num = numbers.get(axis, axis)

    items = frame.index.levels[num]
    labels = frame.index.labels[num]
    if transform is not None:
        mapped_items = items.map(transform)
        factor = Factor(mapped_items.take(labels))
        labels = factor.labels
        items = factor.levels

    values = np.eye(len(items), dtype=float)
    values = values.take(labels, axis=0)

    return DataFrame(values, columns=items, index=frame.index)