def make_column_dummies(data, column, prefix=False, prefix_sep="_"): from pandas import Factor factor = Factor.from_array(data[column].values) dummy_mat = np.eye(len(factor.levels)).take(factor.labels, axis=0) if prefix: dummy_cols = ["%s%s%s" % (column, prefix_sep, str(v)) for v in factor.levels] else: dummy_cols = factor.levels dummies = DataFrame(dummy_mat, index=data.index, columns=dummy_cols) return dummies
def make_column_dummies(data, column, prefix=False, prefix_sep='_'): from pandas import Factor factor = Factor.from_array(data[column].values) dummy_mat = np.eye(len(factor.levels)).take(factor.labels, axis=0) if prefix: dummy_cols = [ '%s%s%s' % (column, prefix_sep, str(v)) for v in factor.levels ] else: dummy_cols = factor.levels dummies = DataFrame(dummy_mat, index=data.index, columns=dummy_cols) return dummies
def make_axis_dummies(frame, axis='minor', transform=None): """ Construct 1-0 dummy variables corresponding to designated axis labels Parameters ---------- axis : {'major', 'minor'}, default 'minor' transform : function, default None Function to apply to axis labels first. For example, to get "day of week" dummies in a time series regression you might call: make_axis_dummies(panel, axis='major', transform=lambda d: d.weekday()) Returns ------- dummies : DataFrame Column names taken from chosen axis """ from pandas import Factor numbers = { 'major' : 0, 'minor' : 1 } num = numbers.get(axis, axis) items = frame.index.levels[num] labels = frame.index.labels[num] if transform is not None: mapped_items = items.map(transform) factor = Factor.from_array(mapped_items.take(labels)) labels = factor.labels items = factor.levels values = np.eye(len(items), dtype=float) values = values.take(labels, axis=0) return DataFrame(values, columns=items, index=frame.index)
def make_axis_dummies(frame, axis='minor', transform=None): """ Construct 1-0 dummy variables corresponding to designated axis labels Parameters ---------- axis : {'major', 'minor'}, default 'minor' transform : function, default None Function to apply to axis labels first. For example, to get "day of week" dummies in a time series regression you might call: make_axis_dummies(panel, axis='major', transform=lambda d: d.weekday()) Returns ------- dummies : DataFrame Column names taken from chosen axis """ from pandas import Factor numbers = {'major': 0, 'minor': 1} num = numbers.get(axis, axis) items = frame.index.levels[num] labels = frame.index.labels[num] if transform is not None: mapped_items = items.map(transform) factor = Factor.from_array(mapped_items.take(labels)) labels = factor.labels items = factor.levels values = np.eye(len(items), dtype=float) values = values.take(labels, axis=0) return DataFrame(values, columns=items, index=frame.index)