def process_ordinal_input(data, key, drop_percent=None, drop_val=np.nan, nac=False, _print=print): '''Helper function to perform processing on ordinal input, where note this definition of ordinal means categorical ordinal... so converting input to ordinal, but from categorical! Parameters ---------- data : pandas DataFrame BPt formatted df. Must contain a column with `key` key : str Column key of the column to process within `data` input. drop_percent : float % to drop drop_val : NaN or int If a row needs to be dropped, replace with drop_val (default = np.nan) Returns ---------- pandas DataFrame The post-processed BPt formatted input df. sklearn LabelEncoder The sklearn labelencoder object mapping input to transformed ordinal label ''' # If requested, perform categorical dropping by percent if drop_percent: data = cat_drop_by_percent(data, key, drop_percent, drop_val, _print) # Work on only non_dropped data / non NaN data if applicable non_drop_data, non_drop_subjects =\ get_non_drop(data, key, drop_val) # Encode ordinally label_encoder = LabelEncoder() non_drop_data[key] = label_encoder.fit_transform(non_drop_data[key]) non_drop_data[key] = non_drop_data[key].astype('category') # Re-create data data = put_non_drop_back(data, non_drop_subjects, non_drop_data) # Check if nan was encoded if nac if nac: try: nan_val = label_encoder.transform(['nan'])[0] label_encoder.nan_val = nan_val # Still need a NaN value, even if not already set except ValueError: label_encoder.classes_ =\ np.array(list(label_encoder.classes_) + ['nan']) label_encoder.nan_val = len(label_encoder.classes_) - 1 return data, label_encoder