def _generate_labels(self, inputs: container.DataFrame) -> None: self._labels = {} for col_idx, (label, col) in enumerate(inputs.iteritems()): # Get all the unique data in the column and assign each element an int representation. # We reserve 0 for unseen labels so we increment the encodings by one unique_data = col.unique() self._labels[col_idx] = { label: encoded + 1 for encoded, label in enumerate(unique_data) }
def _encode_labels(self, inputs: container.DataFrame) -> container.DataFrame: for col_idx, (label, col) in enumerate(inputs.iteritems()): encodes = [self._labels[col_idx].get(label, 0) for label in col] inputs.iloc[:, col_idx] = encodes return inputs