def simqso_test(): # -------------------------------------------------------------------------- # Preparing the feature matrix # -------------------------------------------------------------------------- df_train = pd.read_hdf('../class_photoz/data/brightqsos_sim_2k_new.hdf5', 'data') passband_names = [\ 'SDSS_u','SDSS_g','SDSS_r','SDSS_i','SDSS_z', \ # 'TMASS_j','TMASS_h','TMASS_k', \ 'WISE_w1','WISE_w2', \ # 'WISE_w3' \ ] for name in passband_names: df_train.rename(columns={'obsFlux_' + name: name}, inplace=True) df_train.rename(columns={'obsFluxErr_' + name: 'sigma_' + name}, inplace=True) df_train.replace(np.inf, np.nan, inplace=True) # df_train.query('obsMag_SDSS_i <= 18.5',inplace=True) df_train, features = qs.prepare_flux_ratio_catalog(df_train, passband_names) # -------------------------------------------------------------------------- # Random Forest Regression Grid Search # -------------------------------------------------------------------------- features = ['SDSS_i', 'WISE_w1', 'ug', 'gr', 'ri', 'iz', 'zw1', 'w1w2'] # features = ['SDSS_i','ug','gr','ri','iz'] label = 'z' rand_state = 1 params = { 'n_estimators': 300, 'max_depth': 20, 'min_samples_split': 4, 'n_jobs': 2, 'random_state': rand_state } rf.rf_reg_example(df_train, features, label, params, rand_state, save=True, save_filename='rf_sim_sdssw1w2')
def dr7dr12_test(): # -------------------------------------------------------------------------- # Preparing the feature matrix # -------------------------------------------------------------------------- df_train = pd.read_hdf('../class_photoz/data/DR7DR12Q_clean_flux_cat.hdf5', 'data') passband_names = [\ 'SDSS_u','SDSS_g','SDSS_r','SDSS_i','SDSS_z', \ # 'TMASS_j','TMASS_h','TMASS_k', \ 'WISE_w1','WISE_w2', \ # 'WISE_w3' \ ] df_train.replace(np.inf, np.nan, inplace=True) df_train = df_train.query('0 < Z_VI < 10') # df_train.query('SDSS_mag_i <= 18.5',inplace=True) df_train, features = qs.prepare_flux_ratio_catalog(df_train, passband_names) # -------------------------------------------------------------------------- # Random Forest Regression Grid Search # -------------------------------------------------------------------------- features = ['SDSS_i', 'WISE_w1', 'ug', 'gr', 'ri', 'iz', 'zw1', 'w1w2'] # features = ['SDSS_i','ug','gr','ri','iz'] label = 'Z_VI' rand_state = 1 params = { 'n_estimators': 300, 'max_depth': 20, 'min_samples_split': 2, 'n_jobs': 2, 'random_state': rand_state } print df_train.shape[0] rf.rf_reg_example(df_train, features, label, params, rand_state, save=True, save_filename='rf_sdssw1w2')
def test_example(): # -------------------------------------------------------------------------- # Preparing the feature matrix # -------------------------------------------------------------------------- df_train = pd.read_hdf('../class_photoz/data/DR7DR12Q_clean_flux_cat.hdf5', 'data') # df_train = pd.read_hdf('../class_photoz/data/brightqsos_sim_2k.hdf5','data') passband_names = [\ 'SDSS_u','SDSS_g','SDSS_r','SDSS_i','SDSS_z', \ # 'TMASS_j','TMASS_h','TMASS_k', \ 'WISE_w1','WISE_w2', \ # 'WISE_w3' \ ] df_train.replace(np.inf, np.nan, inplace=True) df_train.query('10 > Z_VI > 0.0 and PSFMAG_I < 18.5', inplace=True) df_train, features = qs.prepare_flux_ratio_catalog(df_train, passband_names) # df_train = df_train.sample(frac=0.5) # -------------------------------------------------------------------------- # Random Forest Regression Grid Search # -------------------------------------------------------------------------- features = ['SDSS_i', 'WISE_w1', 'ug', 'gr', 'ri', 'iz', 'zw1', 'w1w2'] # features = ['SDSS_i','WISE_w1','TMASS_j','ug','gr','ri','iz','zj','jh', 'hk', 'kw1', 'w1w2'] label = 'z' rand_state = 1 params = { 'n_estimators': 200, 'max_depth': 25, 'min_samples_split': 2, 'n_jobs': 2, 'random_state': rand_state, } rf.rf_reg_example(df_train, features, label, params, rand_state, save=True, save_filename='test')