Ejemplo n.º 1
0
def simqso_test():
    # --------------------------------------------------------------------------
    # Preparing the feature matrix
    # --------------------------------------------------------------------------

    df_train = pd.read_hdf('../class_photoz/data/brightqsos_sim_2k_new.hdf5',
                           'data')
    passband_names = [\
            'SDSS_u','SDSS_g','SDSS_r','SDSS_i','SDSS_z', \
            # 'TMASS_j','TMASS_h','TMASS_k', \
            'WISE_w1','WISE_w2', \
            # 'WISE_w3' \
            ]

    for name in passband_names:
        df_train.rename(columns={'obsFlux_' + name: name}, inplace=True)
        df_train.rename(columns={'obsFluxErr_' + name: 'sigma_' + name},
                        inplace=True)

    df_train.replace(np.inf, np.nan, inplace=True)
    # df_train.query('obsMag_SDSS_i <= 18.5',inplace=True)

    df_train, features = qs.prepare_flux_ratio_catalog(df_train,
                                                       passband_names)

    # --------------------------------------------------------------------------
    # Random Forest Regression Grid Search
    # --------------------------------------------------------------------------

    features = ['SDSS_i', 'WISE_w1', 'ug', 'gr', 'ri', 'iz', 'zw1', 'w1w2']
    # features = ['SDSS_i','ug','gr','ri','iz']

    label = 'z'
    rand_state = 1

    params = {
        'n_estimators': 300,
        'max_depth': 20,
        'min_samples_split': 4,
        'n_jobs': 2,
        'random_state': rand_state
    }

    rf.rf_reg_example(df_train,
                      features,
                      label,
                      params,
                      rand_state,
                      save=True,
                      save_filename='rf_sim_sdssw1w2')
Ejemplo n.º 2
0
def dr7dr12_test():

    # --------------------------------------------------------------------------
    # Preparing the feature matrix
    # --------------------------------------------------------------------------
    df_train = pd.read_hdf('../class_photoz/data/DR7DR12Q_clean_flux_cat.hdf5',
                           'data')

    passband_names = [\
            'SDSS_u','SDSS_g','SDSS_r','SDSS_i','SDSS_z', \
            # 'TMASS_j','TMASS_h','TMASS_k', \
            'WISE_w1','WISE_w2', \
            # 'WISE_w3' \
            ]

    df_train.replace(np.inf, np.nan, inplace=True)
    df_train = df_train.query('0 < Z_VI < 10')

    # df_train.query('SDSS_mag_i <= 18.5',inplace=True)

    df_train, features = qs.prepare_flux_ratio_catalog(df_train,
                                                       passband_names)

    # --------------------------------------------------------------------------
    # Random Forest Regression Grid Search
    # --------------------------------------------------------------------------

    features = ['SDSS_i', 'WISE_w1', 'ug', 'gr', 'ri', 'iz', 'zw1', 'w1w2']
    # features = ['SDSS_i','ug','gr','ri','iz']
    label = 'Z_VI'
    rand_state = 1

    params = {
        'n_estimators': 300,
        'max_depth': 20,
        'min_samples_split': 2,
        'n_jobs': 2,
        'random_state': rand_state
    }

    print df_train.shape[0]

    rf.rf_reg_example(df_train,
                      features,
                      label,
                      params,
                      rand_state,
                      save=True,
                      save_filename='rf_sdssw1w2')
def test_example():
    # --------------------------------------------------------------------------
    # Preparing the feature matrix
    # --------------------------------------------------------------------------
    df_train = pd.read_hdf('../class_photoz/data/DR7DR12Q_clean_flux_cat.hdf5',
                           'data')
    # df_train = pd.read_hdf('../class_photoz/data/brightqsos_sim_2k.hdf5','data')

    passband_names = [\
            'SDSS_u','SDSS_g','SDSS_r','SDSS_i','SDSS_z', \
            # 'TMASS_j','TMASS_h','TMASS_k', \
            'WISE_w1','WISE_w2', \
            # 'WISE_w3' \
            ]

    df_train.replace(np.inf, np.nan, inplace=True)
    df_train.query('10 > Z_VI > 0.0 and PSFMAG_I < 18.5', inplace=True)

    df_train, features = qs.prepare_flux_ratio_catalog(df_train,
                                                       passband_names)

    # df_train = df_train.sample(frac=0.5)

    # --------------------------------------------------------------------------
    # Random Forest Regression Grid Search
    # --------------------------------------------------------------------------

    features = ['SDSS_i', 'WISE_w1', 'ug', 'gr', 'ri', 'iz', 'zw1', 'w1w2']
    # features = ['SDSS_i','WISE_w1','TMASS_j','ug','gr','ri','iz','zj','jh', 'hk', 'kw1', 'w1w2']
    label = 'z'
    rand_state = 1

    params = {
        'n_estimators': 200,
        'max_depth': 25,
        'min_samples_split': 2,
        'n_jobs': 2,
        'random_state': rand_state,
    }

    rf.rf_reg_example(df_train,
                      features,
                      label,
                      params,
                      rand_state,
                      save=True,
                      save_filename='test')