def main(params): # Read params and make variables from text inputs = forest.read_params(params) for i in inputs: exec("{0} = str({1})").format(i, inputs[i]) # Check that variables were specified in params try: str_check = sample_txt, target_col, var_txt max_trees = int(max_trees) step = int(step) except NameError as e: print '' missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) # Raise an error if var_txt doesn't exist. Otherwise, just read it in if not os.path.exists(var_txt): print '' msg = 'var_text path specified does not exist:\n%s\n\n' % var_txt raise IOError(msg) df_var = pd.read_csv(var_txt, sep='\t', index_col='var_name') df_train = pd.read_csv(sample_txt, sep='\t', index_col='obs_id') predict_cols = sorted( np.unique( [c for c in df_train.columns for v in df_var.index if v in c])) x_train = df_train.reindex(columns=predict_cols) y_train = df_train[target_col] out_dir = os.path.dirname(sample_txt) test(out_dir, x_train, y_train, max_trees, step)
def main(params, constant_vars=[], silent=False, return_results=True): # Read params and make variables from text inputs = forest.read_params(params, silent=silent) for i in inputs: exec ("{0} = str({1})").format(i, inputs[i]) # Check that variables were specified in params try: str_check = sample_txt, target_col, var_txt max_trees = int(max_trees) step = int(step) except NameError as e: print '' missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) # Try to read var_txt if not os.path.exists(var_txt): print '' msg = 'var_text path specified does not exist:\n%s\n\n' % var_txt raise IOError(msg) df_var = pd.read_csv(var_txt, sep='\t', index_col='var_name') if 'constant_vars' in inputs: constant_vars = sorted([i.strip() for i in constant_vars.split(',')]) df_train = pd.read_csv(sample_txt, sep='\t', index_col='obs_id') predict_cols = sorted(np.unique([c for c in df_train.columns for v in df_var.index if v in c] + constant_vars)) y_train = df_train[target_col] x_train = df_train.reindex(columns=predict_cols) out_dir = os.path.dirname(sample_txt) scores = test(out_dir, x_train, y_train, max_trees, step, silent=silent) shutil.copy2(var_txt, out_dir) if return_results: return scores
def main(params, n_pieces=False, ydims=None, constant_vars=None, year='', agg_method=None): t0 = time.time() print 'Predicting Random Forest... %s\n' % time.ctime(t0) # Set optional params to default: split_predictors = False # Read params and make variables from text inputs = forest.read_params(params) for i in inputs: exec("{0} = str({1})").format(i, inputs[i]) # Check that variables were specified in params try: nodata = int(nodata) str_check = train_params, rf_path, mask_path, out_dir except NameError as e: missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) # Raise an error if the var_txt path doesn't exist. Otherwise, just read it in train_dict = forest.read_params(train_params) train_txt_bn = os.path.basename(train_dict['var_txt'][:-1]) if 'var_txt' not in locals(): var_txt = os.path.join(os.path.dirname(rf_path), train_txt_bn) if not os.path.exists(var_txt): print '' msg = 'Could not find var_txt:\n%s\n' % var_txt raise IOError(msg) df_var = pd.read_csv(var_txt, sep='\t', index_col='var_name') # Make sure vars are sorted alphabetically since they were for training pred_vars = sorted(df_var.index) df_var = df_var.reindex(pred_vars) '''if 'constant_vars' in inputs: constant_vars = parse_constant_vars(constant_vars) #year = constant_vars['YEAR'] year = 2012 pred_constants = sorted(constant_vars.keys()) else: df_var.search_str = [s.format(2007) for s in df_var.search_str]''' #out_dir = os.path.dirname(out_raster) if not os.path.exists(out_dir): os.mkdir(out_dir) else: print ('WARNING: out_dir already exists:\n%s\nAny existing files ' + \ 'will be overwritten...\n') % out_dir new_params = os.path.join(out_dir, os.path.basename(params)) shutil.copy2(params, new_params.replace('.txt', '_%s.txt' % year)) # Load the Random Forest model print 'Loading the RandomForest model from \n%s... \n%s\n' % ( rf_path, time.ctime(time.time())) if not os.path.exists(rf_path): raise IOError('%s does not exist' % rf_path) with open(rf_path) as f: rf_model = pickle.load(f) n_features = rf_model.n_features_ n_vars = len(df_var.index.tolist()) if 'constant_vars' in inputs: n_vars += len(pred_constants) if n_features != n_vars: print df_var.index.tolist() + pred_constants sys.exit(('\nKeyError: Number of features of the random forest model does not match the number of variables in df_var.' +\ '\nNumber of features of the model: {0} \nNumber of variables in var_txt: {1}' + \ '\nCheck that all predictors for used in var_txt to train the model are in this var_txt ' +\ '\nPath of Random Forest model: {2}\nPath of var_txt: {3}').format(n_features, n_vars, rf_path, var_txt)) #""" if 'agg_method' in inputs: agg_method = inputs['agg_method'] # Get mask and raster info ds = gdal.Open(mask_path) ar = ds.ReadAsArray() nodata_mask = ar != 0 xsize = ds.RasterXSize ysize = ds.RasterYSize tx = ds.GetGeoTransform() prj = ds.GetProjection() driver = gdal.GetDriverByName('gtiff') ul_x, x_res, x_rot, ul_y, y_rot, y_res = tx # Predict #print 'Predicting with %s processors... %s' % (rf_model.n_jobs, time.ctime(time.time())) t1 = time.time() predict_pieces = [] if 'n_tiles' not in inputs: print 'n_tiles not specified. Using default: 25 x 15 ...\n' n_tiles = 25, 15 else: n_tiles = [int(i) for i in n_tiles.split(',')] if 'n_tiles' in inputs: df_tiles, df_tiles_rc, tile_size = stem.get_tiles( n_tiles, xsize, ysize, tx) empty_tiles = [] ar_out = np.full((ysize, xsize), nodata, dtype=np.uint8) tile_dir = os.path.join(out_dir, 'predict_tiles') if not os.path.isdir(tile_dir): os.mkdir(tile_dir) for i, (ind, tile_coords) in enumerate(df_tiles.iterrows()): print 'Predicting for tile %s of %s...' % (i + 1, len(df_tiles)) t1 = time.time() coords = tile_coords[['ul_x', 'ul_y', 'lr_x', 'lr_y']].tolist() tsa_ar, tsa_off = mosaic.extract_kernel(ds, 1, coords, tx, xsize, ysize, nodata=nodata) tsa_mask = tsa_ar == 0 if tsa_mask.all(): print 'Tile %s empty. Skipping...' % ind continue tsa_ar[tsa_mask] = nodata # Get the ids of TSAs this kernel covers tsa_ids = np.unique(tsa_ar) #tsa_strs = ['0' + str(tsa) for tsa in tsa_ids if tsa!=nodata] tsa_strs = [str(tsa) for tsa in tsa_ids if tsa != nodata] array_shape = tsa_ar.shape # Get an array of predictors where each column is a flattened 2D array of a # single predictor variable temp_nodata = -9999 ar_predictors = stem.get_predictors(df_var, tx, tsa_strs, tsa_ar, coords, tsa_mask, temp_nodata, 1) nodata_mask = ~np.any(ar_predictors == temp_nodata, axis=1) predictors = ar_predictors[nodata_mask] t2 = time.time() if agg_method == 'mode': args = [] for dt in rf_model.estimators_: args.append([dt, predictors]) pool = Pool(rf_model.n_jobs) t3 = time.time() dt_predictions = np.vstack( pool.map(forest.par_predict_from_dt, args, 1)) print 'Prediction time: %.1f minutes' % ( (time.time() - t3) / 60) t3 = time.time() predictions = stem.mode(dt_predictions, axis=0) print 'Aggregation time: %.1f minutes' % ( (time.time() - t3) / 60) del dt_predictions t3 = time.time() pool.close() pool.join() print 'Closing time: %.1f minutes' % ((time.time() - t3) / 60) else: predictions = rf_model.predict(ar_predictors[nodata_mask]) print 'Prediction time: %.1f minutes' % ((time.time() - t2) / 60) ar_tile = np.full(ar_predictors.shape[0], nodata, dtype=np.uint8) ar_tile[nodata_mask] = predictions.astype(np.uint8) ul_r, lr_r, ul_c, lr_c = df_tiles_rc.ix[ind] ar_out[ul_r:lr_r, ul_c:lr_c] = ar_tile.reshape(array_shape) tx_tile = tile_coords.ul_x, x_res, x_rot, tile_coords.ul_y, y_rot, y_res mosaic.array_to_raster(ar_tile.reshape(array_shape), tx_tile, prj, driver, os.path.join(tile_dir, 'tile_%s.tif' % ind), dtype=gdal.GDT_Byte, nodata=nodata) print 'Total time for this piece: %.1f minutes\n' % ( (time.time() - t1) / 60) #del ar_predictors, nodata_mask, ar_prediction''' #ar_prediction = np.concatenate(predict_pieces) #del predict_pieces '''ar_out = np.full((ysize, xsize), nodata, dtype=np.uint8) for ind, tile_coords in df_tiles_rc.iterrows(): if ind in empty_tiles: continue ul_r, lr_r, ul_c, lr_c = tile_coords tile_file = os.path.join(tile_dir, 'tile_%s.tif' % ind) if not os.path.exists(tile_file): continue ds_t = gdal.Open(tile_file) ar_tile = ds_t.ReadAsArray() t_ulx = df_tiles.ix[ind, ['ul_x', 'ul_y']] ar_out[ul_r : lr_r, ul_c : lr_c] = ar_tile''' else: ar_predictors, nodata_mask = forest.get_predictors(df_var, nodata) # If the predictions are too large (i.e. cause memory errors), split the predictor array into pieces and predict # separately, then stack them back together if split_predictors: split_predictors = int(split_predictors) predictions = [] for i, p in enumerate( np.array_split(ar_predictors, split_predictors)): t1 = time.time() print '\nPredicting for %s of %s pieces of the final array...' % ( i + 1, split_predictors) predictions.append(rf_model.predict(p)) print '%.1f minutes' % ((time.time() - t1) / 60) predictions = np.concatenate(predictions) print '' else: print 'Predicting in one chunk...' predictions = rf_model.predict(ar_predictors) ar_prediction = np.full(nodata_mask.shape[0], nodata, dtype=np.float32) ar_prediction[nodata_mask] = predictions del ar_predictors, predictions # Save the prediction array to disk stamp = os.path.basename(out_dir) out_path = os.path.join(out_dir, '%s_rf_vote.tif' % stamp) #ar_prediction = ar_prediction.reshape(ysize, xsize) if constant_vars: out_path = out_path.replace('.tif', '_yr%s.tif' % year) forest.array_to_raster(ar_out, tx, prj, driver, out_path, gdal.GDT_Byte, nodata) #""" # Delete the tiles shutil.rmtree(tile_dir) ds = None '''stamp = os.path.basename(out_dir) path = os.path.join(out_dir, 'final_%s_yr2011.tif' % stamp) stamp = os.path.basename(os.path.dirname(path)) ds = gdal.Open(path) ar_prediction = ds.ReadAsArray() ds = None#''' if 'test_params' in inputs: #df_test = pd.read_csv(test_samples, sep='\t', index_col='obs_id') print '\nEvaluating the model...' t1 = time.time() test_dict = forest.read_params(test_params) for i in test_dict: exec("{0} = str({1})").format(i, test_dict[i]) if 'n_trials' in test_dict: n_trials = int(n_trials) else: 'n_trials not specified. Setting default to 50...\n' n_trials = 50 if 'year' in test_dict: year = int(year) else: year = None cell_size = [int(i) for i in cell_size.split(',')] n_per_cell = int(n_per_cell) param_bn = os.path.basename(test_params) shutil.copy2( test_params, os.path.join(out_dir, param_bn.replace('.txt', '_%s.txt' % year))) df, samples, roc_curves = evaluate_ebird(sample_txt, ar_prediction, tx, cell_size, target_col, n_per_cell, n_trials, year) if len(roc_curves) > 0: for fpr, tpr, thresholds in roc_curves: plt.plot(fpr, tpr, 'k', alpha=.1) out_png = os.path.join(out_dir, '{0}_roc_curve_{1}.png'.format(stamp, year)) plt.savefig(out_png) if 'lc_path' in test_dict: '''df_lc = evaluate_by_lc(samples, ar_prediction, lc_path, target_col) out_txt = os.path.join('/vol/v2/stem/ebird/results/performance_by_lc', '{0}_eval_{1}_land_cover.txt'.format(stamp, year)) df_lc.to_csv(out_txt, sep='\t')''' #df_samples = pd.read_csv(sample_txt, sep='\t', index_col='obs_id') df_lc = evaluate_by_lc(samples, ar_prediction, lc_path, target_col) out_txt = os.path.join( out_dir, '{0}_eval_{1}_land_cover_all_samples.txt'.format(stamp, year)) df_lc.to_csv(out_txt, sep='\t') if 'inventory_txt' in test_dict: score_cols = sorted(df.columns) df_inv = pd.read_csv(inventory_txt, sep='\t', index_col='stamp') for col in score_cols: score_mean = df[col].mean() df_inv.ix[stamp, col] = score_mean print 'Average %s: %2.3f' % (col.upper(), score_mean) df_inv.to_csv(inventory_txt, sep='\t') out_txt = os.path.join(out_dir, '{0}_eval_{1}.txt'.format(stamp, year)) df.to_csv(out_txt, sep='\t', index=False) samples.to_csv(out_txt.replace('.txt', '_samples.txt'), sep='\t') print '\nTotal eval time: %.1f minutes\n' % ((time.time() - t1) / 60) else: print '\nEither "test_samples" or "inventory_txt" was not specified.' +\ ' This model will not be evaluated...' print '\nTotal runtime: %.1f minutes' % ((time.time() - t0) / 60) return out_path
def main(params): # Read params and make variables from text inputs = forest.read_params(params) for i in inputs: exec("{0} = str({1})").format(i, inputs[i]) # Check that variables were specified in params try: str_check = sample_txt, target_col, var_txt, out_dir except NameError as e: print '' missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) # Make optional numeric arguments numeric if 'n_trees' in locals(): n_trees = int(n_trees) else: n_trees = 50 if 'n_jobs' in locals(): n_jobs = int(n_jobs) else: n_jobs = 12 if 'max_depth' in locals(): max_depth = int(max_depth) else: max_depth = None # Raise an error if var_txt doesn't exist. Otherwise, just read it in if not os.path.exists(var_txt): print '' msg = 'var_text path specified does not exist:\n%s\n\n' % var_txt raise IOError(msg) df_var = pd.read_csv(var_txt, sep='\t', index_col='var_name') # Make the output directory now = datetime.now() date_str = str(now.date()).replace('-', '') time_str = str(now.time()).replace(':', '')[:4] stamp = '{0}_{1}_{2}'.format('susceptibility', date_str, time_str) out_dir = os.path.join(out_dir, stamp) os.makedirs( out_dir ) # With a timestamp in dir, no need to check if it already exists shutil.copy2(params, out_dir) #Copy the params so the parameters used are saved #shutil.copy2(var_txt, out_dir) # Read in training samples df_train = pd.read_csv(sample_txt, sep='\t', index_col='obs_id') # Check that df_train has exactly the same columns as variables specified in df_vars train_columns = df_train.columns.tolist() unmatched_vars = [v for v in df_var.index if v not in train_columns] if len(unmatched_vars) != 0: unmatched_str = '\n'.join(unmatched_vars) msg = 'Columns not in sample_txt but specified in params:\n' + unmatched_str raise NameError(msg) # Sort the predictors in alphabetical order so that train columns can be in the same order as the predict array when # predicting later on predict_cols = sorted( np.unique( [c for c in df_train.columns for v in df_var.index if v in c])) df_var = df_var.sort_index() x_train = df_train.reindex(columns=predict_cols) y_train = df_train[target_col] rf_model = forest.train_rf_regressor(x_train, y_train, ntrees=n_trees, njobs=n_jobs, max_depth=max_depth) df_var['importance'] = rf_model.feature_importances_ rf_path = os.path.join(out_dir, 'regressor_model_%s' % stamp) forest.save_rfmodel(rf_model, rf_path) oob_score = round(rf_model.oob_score_, 3) out_var_txt = os.path.join(out_dir, os.path.basename(var_txt)) df_var.to_csv(out_var_txt, sep='\t') # Record params in inventory text file df_inv = pd.read_csv(inventory_txt, sep='\t') col_str = re.sub('[\]\[\'\"]', '', str(predict_cols)) raster_res = sample_txt.split('_')[-2].replace('m', '') df_inv = df_inv.append(pd.DataFrame([[ stamp, oob_score, '', '', '', '', len(df_train), raster_res, col_str ]], columns=df_inv.columns), ignore_index=True) existing_models = fnmatch.filter(os.listdir(os.path.dirname(out_dir)), 'susc*') df_inv = df_inv[df_inv.stamp.isin(existing_models)] df_inv.to_csv(inventory_txt, sep='\t', index=False) print 'Random Forest Regressor model written to:\n', rf_path print '\nOOB score: ', oob_score print 'Relative importance:' print df_var.importance
def main(params): t0 = time.time() inputs = read_params(params) for var in inputs: exec("{0} = str({1})").format(var, inputs[var]) out_dir = os.path.dirname(out_txt) if not os.path.exists(out_dir): print 'WARNING: output directory does not exist. Creating new directory:\n', out_dir os.makedirs(out_dir) # Make optional numeric arguments numeric if 'data_band' in locals(): data_band = int(data_band) else: data_band = 1 '''if 'nodata' in locals(): nodata = int(nodata) else: nodata = None''' if 'pct_train' in locals(): pct_train = float(pct_train) else: pct_train = None # Check that all required params were specified try: bin_list = [b.split(':') for b in bins.split(',')] bins = [(int(mn), int(mx)) for mn, mx in bin_list] n_samples = int(n_samples) nodata = int(nodata) str_check = raster_path, col_name, out_txt except NameError as e: missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) # Get training and testing samples df_train, df_test, raster_res = forest.get_stratified_sample( raster_path, col_name, data_band, n_samples, bins, pct_train, nodata) df_train['obs_id'] = df_train.index # Write samples to text file now = datetime.now() date_str = str(now.date()).replace('-', '') time_str = str(now.time()).replace(':', '')[:4] stamp = '{0}_{1}_{2}_{3}m'.format(len(df_train), date_str, time_str, int(raster_res)) out_txt = out_txt.replace('.txt', stamp + '.txt') bn = os.path.basename(out_txt) out_dir = os.path.join(os.path.dirname(out_txt), bn[:-4]) out_txt = os.path.join(out_dir, bn) if not os.path.exists(out_dir): os.mkdir(out_dir) df_train.to_csv(out_txt, sep='\t', index=False) print 'Samples written to:\n', out_txt, '\n' if 'var_txt' in locals(): df_var = pd.read_csv(var_txt, sep='\t', index_col='var_name') df_predictors = df_train.copy() df_predictors = forest.sample_predictors(df_predictors, df_var, nodata) df_predictors.to_csv(out_txt.replace('.txt', '_predictors.txt'), sep='\t', index=False) # If pct train was specified, then there should be some testing samples so write them to disk if pct_train: df_test['obs_id'] = df_test.index test_txt = out_txt.replace('%s.txt' % stamp, '_test_%s.txt' % stamp) df_test.to_csv(test_txt, sep='\t', index=False) print 'Test samples written to:\n', test_txt, '\n' if 'test_n_trees_params' in locals(): if not 'var_txt' in locals(): print 'Cannot test number of trees because no predictors were sampled. Try specifying a var_txt path.' x_train = df_predictors[df_var.index] y_train = df_predictors[col_name] test_params = forest.read_params(test_n_trees_params) max_trees = int(test_params['max_trees'].replace('"', '')) step = int(test_params['step'].replace('"', '')) test.test(out_dir, x_train, y_train, max_trees, step) print 'Total time for sampling predictors: %.1f seconds' % (time.time() - t0)
def main(params): # Read params and make variables from text inputs = forest.read_params(params) for i in inputs: #import pdb; pdb.set_trace() exec("{0} = str({1})").format(i, inputs[i]) # Check that variables were specified in params try: str_check = sample_txt, target_col, var_txt, out_dir except NameError as e: print '' missing_var = str(e).split("'")[1] msg = "Variable '%s' not specified in param file:\n%s" % (missing_var, params) raise NameError(msg) # Make optional numeric arguments numeric if 'n_trees' in locals(): n_trees = int(n_trees) else: n_trees = 200 if 'n_jobs' in locals(): n_jobs = int(n_jobs) else: n_jobs = 1 if 'max_depth' in locals(): max_depth = int(max_depth) else: max_depth = None # Raise an error if var_txt doesn't exist. Otherwise, just read it in if not os.path.exists(var_txt): print '' msg = 'var_text path specified does not exist:\n%s\n\n' % var_txt raise IOError(msg) df_var = pd.read_csv(var_txt, sep='\t', index_col='var_name') # Make the output directory now = datetime.now() date_str = str(now.date()).replace('-', '') time_str = str(now.time()).replace(':', '')[:4] if not 'out_dirname' in locals(): out_dirname = target_col stamp = '{0}_{1}_{2}'.format(out_dirname, date_str, time_str) out_dir = os.path.join(out_dir, stamp) os.makedirs( out_dir ) # With a timestamp in dir, no need to check if it already exists shutil.copy2(params, out_dir) #Copy the params so the parameters used are saved shutil.copy2(sample_txt, out_dir) # Read in training samples df_train = pd.read_csv(sample_txt, sep='\t', index_col='obs_id') # Check that df_train has exactly the same columns as variables specified in df_vars train_columns = df_train.columns.tolist() unmatched_vars = [v for v in df_var.index if v not in train_columns] if len(unmatched_vars) != 0: unmatched_str = '\n'.join(unmatched_vars) msg = 'Columns not in sample_txt but specified in params:\n' + unmatched_str raise NameError(msg) # Sort the predictors in alphabetical order so that train columns can be in the same order as the predict array when # predicting later on predict_cols = sorted( np.unique([c for c in df_train.columns if c in df_var.index])) predict_cols = [c for c in predict_cols if c in df_var.index] if target_col in predict_cols: predict_cols.remove(target_col) df_var = df_var.sort_index() if 'constant_vars' in inputs: constant_vars = sorted([i.strip() for i in constant_vars.split(',')]) unmatched_vars = [v for v in constant_vars if v not in train_columns] if len(unmatched_vars) != 0: unmatched_str = '\n'.join(unmatched_vars) msg = 'Columns not in sample_txt but specified in params:\n' + unmatched_str raise NameError(msg) predict_cols += constant_vars x_train = df_train.reindex(columns=predict_cols) y_train = df_train[target_col] rf_model = forest.train_rf_regressor(x_train, y_train, ntrees=n_trees, njobs=n_jobs, max_depth=max_depth) if 'constant_vars' in inputs: for v in constant_vars: df_var = df_var.append(pd.Series(name=v)) importance = rf_model.feature_importances_ df_var['importance'] = importance df_var['rank'] = [ int(r) for r in df_var.importance.rank(method='first', ascending=False) ] out_txt = os.path.join(out_dir, '%s_importance.txt' % stamp) rf_path = os.path.join(out_dir, 'regressor_model_%s' % stamp) forest.save_rfmodel(rf_model, rf_path) oob_score = round(rf_model.oob_score_, 3) out_var_txt = os.path.join(out_dir, os.path.basename(var_txt)) df_var.to_csv(out_var_txt, sep='\t') # Record params in inventory text file if 'inventory_txt' in inputs: df_inv = pd.read_csv(inventory_txt, sep='\t') cols = df_inv.columns try: res = int(re.search('[0-9]{1,2}', out_dirname).group()) except: res = None df_inv = df_inv.append(pd.DataFrame([{ 'stamp': stamp, 'temporal_res': res, 'oob_score': oob_score, 'auc': None, 'rmse': None, 'rmse_n': None, 'rmse_p': None, 'n_samples': len(df_train), 'n_trees': n_trees }]), ignore_index=True) df_inv = df_inv.reindex(columns=cols) existing_models = fnmatch.filter(os.listdir(os.path.dirname(out_dir)), '*res*') df_inv = df_inv[df_inv.stamp.isin(existing_models)] df_inv.to_csv(inventory_txt, sep='\t', index=False) print 'Random Forest Regressor model written to:\n', rf_path print '\nOOB score: ', oob_score print 'Relative importance:' print df_var.importance.sort_values(ascending=False)