format= "%(asctime)s :: %(filename)s:%(lineno)s :: %(funcName)s() :: %(message)s" ) logger = logging.getLogger('userMovement_rf') data_path = '../../allan_data/DataPredictMovement_half.p' x, y = np.load(data_path) df = pd.DataFrame( x, columns=[f"{cha}{qrt}" for cha in "CSGB" for qrt in range(1, 9)]) df = df[['B7', 'B3', 'G7']] x = df.values del df # x = x.astype(float) logger.info(f"Loaded data") jn = pushbulletNotifier.JobNotification(devices="phone") jn.send(message="Started CV for RF grid with just 3 features: B7, B3 and G7.") processes = 12 try: x_re, x_va, y_re, y_va = model_selection.train_test_split(x, y, test_size=0.2, stratify=y) logger.info(f"Split data in to training set and validation set.") pipe = Pipeline([('rf', RandomForestClassifier(criterion='entropy', class_weight=None))]) param_grid = { 'rf__n_estimators': np.arange(40, 100, 10), 'rf__max_depth': np.arange(13, 20)
from sklearn import pipeline # from imblearn import over_sampling # from imblearn import pipeline as imb_pipeline # from imblearn import metrics as imb_metrics # import warnings # noqa # warnings.simplefilter("ignore", category=DeprecationWarning) # warnings.simplefilter("ignore", category=mpl.cbook.mplDeprecation) # warnings.simplefilter("ignore", category=UserWarning) # **************************************************************************** # * Instantiate Pushbullet notifier * # **************************************************************************** pbn = pushbulletNotifier.JobNotification() # **************************************************************************** # * Settings for cross validation * # **************************************************************************** k_folds = 7 n_jobs = 40 cv_args = dict( scoring='roc_auc', # noqa cv=k_folds, # noqa verbose=49, # noqa refit=True, # noqa n_jobs=n_jobs, # noqa # pre_dispatch = 2 * n_jobs, # noqa