def fit_categorical_labels(df_train, df_test, df_labels, fit_type='regressor', fit_framework='theanets', labels_list=None): from rep.estimators import SklearnClassifier, SklearnRegressor from sklearn.ensemble import GradientBoostingClassifier from sklearn.ensemble import GradientBoostingRegressor from rep.estimators.neurolab import NeurolabRegressor from rep.estimators.theanets import TheanetsRegressor #from rep.estimators import XGBoostRegressor #from rep.estimators import XGBoostRegressor # Using gradient boosting with default settings if fit_framework == 'sklearn': if fit_type == 'classifier': sk = SklearnClassifier(GradientBoostingClassifier(), features=df_train.columns.values) elif fit_type == 'regressor': sk = SklearnRegressor(GradientBoostingRegressor(), features=df_train.columns.values) elif fit_framework == 'neural': if fit_type == 'regressor': sk = NeurolabRegressor(features=df_train.columns.values, ) elif fit_framework == 'xgboost': if fit_type == 'regressor': sk = XGBoostRegressor(features=df_train.columns.values, ) elif fit_framework == 'theanets': if fit_type == 'regressor': sk = TheanetsRegressor(features=df_train.columns.values, ) else: raise ValueError('No correct combo of fit_type and fit_framework found') prediction_array = np.empty((len(df_test), len(df_labels.columns))) for i, column in enumerate(df_labels.columns.values): # get a single column to predict labels = df_labels[column] # fit the data with the training set sk.fit(df_train, labels) # predict new countries prediction = np.squeeze(sk.predict(df_test)) prediction_array[:, i] = prediction #prediction = pd.read_pickle(filename).squeeze() df_predict = pd.DataFrame(prediction_array, columns=df_labels.columns.values) df_predict = gather_dummy_predictions(df_predict, labels_list) #print('unique labels', np.unique(df_predict)) return df_predict