Python load_feature 예제들, common.load_feature Python 예제들

예제 #1

0

파일 보기

파일: f10xx_misc.py 프로젝트: nyanp/kaggle-PLASTiCC

def f1003_luminosity_by_estimated_redshift(input: Input, **kw):
    aggregate = common.load_feature("f000")
    redshift = common.load_feature("f600")
    meta_ = pd.merge(input.meta, redshift, on='object_id', how='left')
    meta_ = pd.merge(meta_, aggregate, on='object_id', how='left')

    meta_['Mpc'] = meta_['hostgal_z_predicted'].apply(z2pc)
    meta_['Gpc'] = meta_['Mpc'] / 1000.0

    features = []
    for i in range(6):
        ch = i
        meta_['flux_diff_ch{}'.format(ch)] = meta_['max(flux)_ch{}'.format(
            ch)] - meta_['min(flux)_ch{}'.format(ch)]
        meta_['luminosity_diff_ch{}_estimated'.format(ch)] = meta_[
            'flux_diff_ch{}'.format(ch)] * meta_['Gpc'] * meta_['Gpc']
        features.append('luminosity_diff_ch{}_estimated'.format(ch))

    features_renamed = ['luminosity_est_diff_ch{}'.format(i) for i in range(6)]

    rename = {features[i]: features_renamed[i] for i in range(6)}

    meta_.rename(columns=rename, inplace=True)

    return meta_[['object_id'] + features_renamed]

예제 #2

0

파일 보기

파일: f10xx_misc.py 프로젝트: nyanp/kaggle-PLASTiCC

def f1002_detected_to_falltime_ratio(input: Input, **kw):
    delta = common.load_feature("f050")
    falltime = common.load_feature("f053")

    meta_ = pd.merge(input.meta, delta, on='object_id', how='left')
    meta_ = pd.merge(meta_, falltime, on='object_id', how='left')

    features = []
    for i in range(6):
        f = 'delta(max(flux), last(detected))_ch{}_to_delta_ratio'.format(i)
        meta_[f] = meta_['delta(max(flux), last(detected))_ch{}'.format(
            i)] / meta_['delta']
        features.append(f)

    return meta_[['object_id'] + features]

예제 #3

0

파일 보기

def _make_df(input_feature_list: List[str],
             remove_galactic_test_data: bool = True,
             drop_features: List[str] = None):
    df = common.load_metadata()

    for f in tqdm(input_feature_list):
        df = pd.merge(df, common.load_feature(f), on='object_id', how='left')

    if drop_features is not None:
        df.drop(drop_features, axis=1, inplace=True)

    df.set_index('object_id', inplace=True)

    x_train = df[df.hostgal_specz > 0.0]

    if remove_galactic_test_data:
        x_test = df[df.hostgal_specz.isnull() & (df.hostgal_photoz > 0.0)]
    else:
        x_test = df[df.hostgal_specz.isnull()]

    x_train.drop('target', axis=1, inplace=True)
    x_test.drop('target', axis=1, inplace=True)

    y_train = x_train.hostgal_specz
    x_train.drop('hostgal_specz', axis=1, inplace=True)
    x_test.drop('hostgal_specz', axis=1, inplace=True)

    return x_train, x_test, y_train

예제 #4

0

파일 보기

    def _setup(self,
               df,
               features,
               drop,
               cache_path=None,
               use_cache=False) -> pd.DataFrame:

        if use_cache and cache_path is not None:
            try:
                print('load from cache: {}'.format(cache_path))
                return pd.read_feather(cache_path)
            except:
                pass

        for f in tqdm(features):
            tmp = common.load_feature(f)
            tmp['object_id'] = tmp['object_id'].astype(np.int32)
            df = pd.merge(df, tmp, on='object_id', how='left')

        if drop is not None:
            drop_ = [d for d in drop if d in df]
            print('dropped: {}'.format(drop_))
            df.drop(drop_, axis=1, inplace=True)

        if use_cache and cache_path:
            df.to_feather(cache_path)
        return df

예제 #5

0

파일 보기

파일: step08_make_shared_features.py 프로젝트: nyanp/kaggle-PLASTiCC

def save_v4():
    features = ['f513', 'f515', 'f517']
    base = common.load_metadata()[['object_id', 'target']]

    for f in features:
        tmp = common.load_feature(f)
        base = pd.merge(base, tmp, on='object_id', how='left')
    _save(base, 'features_nyanp_all_v4_{}')

예제 #6

0

파일 보기

파일: f517_blending_salts.py 프로젝트: nyanp/kaggle-PLASTiCC

def f517_blending_salts():
    meta = common.load_metadata()
    f500 = common.load_feature('f500')
    f515 = common.load_feature('f515')
    f516 = common.load_feature('f516')
    df = pd.merge(meta[['object_id', 'target', 'hostgal_photoz', 'ddf']],
                  f500,
                  on='object_id',
                  how='left')
    df = pd.merge(df, f515, on='object_id', how='left')
    df = pd.merge(df, f516, on='object_id', how='left')

    prefix = [
        'sn_salt2_', 'salt2-extended_p_sn3_salt2-extended_',
        'salt2_p_sn3_salt2_'
    ]
    params = ['x0', 't0', 'z', 'c', 'x1']

    for p in params:
        print('param: {}'.format(p))

        # weighted average based on error
        weights = []
        weighted_sum = []
        for m in prefix:
            col = 'w_{}{}'.format(p, m)
            df[col] = 1 / (df['{}{}_err'.format(m, p)] *
                           df['{}{}_err'.format(m, p)])
            weights.append(col)
            df[col + '_s'] = df[col] * df[m + p]
            weighted_sum.append(col + '_s')

        df['salt2-{}-weighted-avg'.format(p)] = df[weighted_sum].sum(axis=1)
        df['tmp'] = df[weights].sum(axis=1)
        df['salt2-{}-weighted-avg'.format(
            p)] = df['salt2-{}-weighted-avg'.format(p)] / df['tmp']
        df.drop('tmp', axis=1, inplace=True)
        df.drop(weighted_sum, axis=1, inplace=True)
        df.drop(weights, axis=1, inplace=True)

    common.save_feature(
        df[['object_id'] + ['salt2-{}-weighted-avg'.format(p)
                            for p in params]], 'f517')

예제 #7

0

파일 보기

파일: f10xx_misc.py 프로젝트: nyanp/kaggle-PLASTiCC

def f1000_salt2_normalized_chisq(input: Input, **kw):
    salt2 = common.load_feature("f500")
    meta_ = pd.merge(input.meta, salt2, on='object_id', how='left')

    count = input.lc.groupby('object_id')['mjd'].count().reset_index()
    count.columns = ['object_id', 'n_observed']

    meta_ = pd.merge(meta_, count, on='object_id', how='left')
    meta_[
        'sn_salt2_chisq_norm'] = meta_['sn_salt2_chisq'] / meta_['n_observed']

    return meta_[['object_id', 'sn_salt2_chisq_norm']]

예제 #8

0

파일 보기

def f701_redshift_difference():
    f601_estimate_redshift()
    estimated = common.load_feature("f601")
    meta = common.load_metadata()
    dst = pd.merge(meta[['object_id', 'hostgal_photoz']],
                   estimated,
                   on='object_id',
                   how='left')
    dst['hostgal_photoz_predicted_diff'] = dst['hostgal_photoz'] - dst[
        'hostgal_z_predicted']

    common.save_feature(dst[['object_id', 'hostgal_photoz_predicted_diff']],
                        "f701")

예제 #9

0

파일 보기

파일: f10xx_misc.py 프로젝트: nyanp/kaggle-PLASTiCC

def f1085_luminosity_diff_within_snr3(input: Input, **kw):
    redshift = common.load_feature('f600')
    max_flux = common.load_feature('f1083')
    min_flux = common.load_feature('f1084')

    meta = pd.merge(input.meta, redshift, on='object_id', how='left')
    meta = pd.merge(meta, max_flux, on='object_id', how='left')
    meta = pd.merge(meta, min_flux, on='object_id', how='left')

    meta['Mpc'] = meta['hostgal_z_predicted'].apply(z2pc)
    meta['Gpc'] = meta['Mpc'] / 1000.0

    features = []
    for i in range(6):
        ch = i
        meta['snr3_flux_diff_ch{}'.format(
            ch)] = meta['snr3_max(flux)_ch{}'.format(ch)] - meta[
                'snr3_min(flux)_ch{}'.format(ch)]
        meta['snr3_luminosity_diff_ch{}'.format(ch)] = meta[
            'snr3_flux_diff_ch{}'.format(ch)] * meta['Gpc'] * meta['Gpc']
        features.append('snr3_luminosity_diff_ch{}'.format(ch))

    return meta[['object_id'] + features]

예제 #10

0

파일 보기

def f1010_redshift_difference_perch():
    meta = common.load_metadata()
    meta = pd.merge(meta,
                    common.load_feature('f603'),
                    on='object_id',
                    how='left')
    meta = pd.merge(meta,
                    common.load_feature('f000'),
                    on='object_id',
                    how='left')

    meta['Mpc'] = meta['hostgal_z_predicted'].apply(z2pc)
    meta['Gpc'] = meta['Mpc'] / 1000.0

    features = []
    for i in range(6):
        ch = i
        meta['flux_diff_ch{}'.format(ch)] = meta['max(flux)_ch{}'.format(
            ch)] - meta['min(flux)_ch{}'.format(ch)]
        meta['luminosity_diff_ch{}'.format(ch)] = meta['flux_diff_ch{}'.format(
            ch)] * meta['Gpc'] * meta['Gpc']
        features.append('luminosity_diff_ch{}'.format(ch))

    common.save_feature(meta[['object_id'] + features], "f1010")

예제 #11

0

파일 보기

파일: step08_make_shared_features.py 프로젝트: nyanp/kaggle-PLASTiCC

def save_v1():
    features = ['f000', 'f001', 'f002', 'f010', 'f026', 'f050', 'f051', 'f052', 'f053', 'f054',
                'f061', 'f063', 'f100', 'f1000', 'f1001', 'f1002', 'f1003', 'f1004', 'f1005',
                'f1006', 'f101', 'f1010', 'f102', 'f103', 'f104', 'f106', 'f107', 'f108', 'f1080',
                'f1081', 'f1082', 'f1083', 'f1085', 'f1086', 'f1087', 'f1088', 'f1089',
                'f109', 'f110', 'f140', 'f141', 'f142', 'f143', 'f144', 'f150', 'f151', 'f152',
                'f153', 'f200', 'f201', 'f202', 'f203', 'f204', 'f205', 'f300', 'f301', 'f302',
                'f303', 'f304', 'f305', 'f306', 'f307', 'f308', 'f309', 'f310', 'f311', 'f330',
                'f340', 'f350', 'f370', 'f400', 'f500', 'f505', 'f506', 'f507', 'f600', 'f701']

    best_subset_v1 = ['object_id', 'hostgal_photoz_err', 'distmod', 'mwebv', 'mean(flux)_ch0', 'mean(flux)_ch1',
               'mean(flux)_ch2', 'mean(flux)_ch3', 'mean(flux)_ch4', 'mean(flux)_ch5', 'max(flux)_ch0',
               'max(flux)_ch1', 'min(flux)_ch0', 'min(flux)_ch1', 'min(flux)_ch2', 'min(flux)_ch3', 'min(flux)_ch4',
               'min(flux)_ch5', 'median(flux)_ch0', 'median(flux)_ch1', 'median(flux)_ch2', 'median(flux)_ch3',
               'median(flux)_ch4', 'median(flux)_ch5', 'std(flux)_ch0', 'std(flux)_ch1', 'std(flux)_ch4',
               'std(flux)_ch5', 'timescale_th0.35_max_ch0', 'timescale_th0.35_max_ch1', 'timescale_th0.35_max_ch2',
               'timescale_th0.35_max_ch3', 'timescale_th0.35_max_ch4', 'timescale_th0.35_max_ch5',
               'diff(max(flux))_0_1', 'diff(max(flux))_1_2', 'diff(max(flux))_2_3', 'diff(max(flux))_3_4',
               'diff(max(flux))_4_5', 'mean(detected)_ch1', 'mean(detected)_ch2', 'mean(detected)_ch3',
               'mean(detected)_ch5', 'std(detected)_ch3', 'std(detected)_ch5', 'diff(max(flux))_0_3',
               'diff(max(flux))_1_4', 'diff(max(flux))_2_5', 'timescale_th0.5_min_ch0', 'timescale_th0.5_min_ch1',
               'timescale_th0.5_min_ch2', 'timescale_th0.5_min_ch4', 'timescale_th0.5_min_ch5', 'mean(flux)',
               'max(flux)', 'min(flux)', 'timescale_th0.35_min_ch0', 'timescale_th0.35_min_ch1',
               'timescale_th0.35_min_ch2', 'timescale_th0.35_min_ch4', 'timescale_th0.35_min_ch5',
               'timescale_th0.15_max_ch0', 'timescale_th0.15_max_ch1', 'timescale_th0.15_max_ch2',
               'timescale_th0.15_max_ch3', 'timescale_th0.15_max_ch4', 'timescale_th0.15_max_ch5',
               'max(flux_slope)_ch0', 'max(flux_slope)_ch1', 'max(flux_slope)_ch2', 'max(flux_slope)_ch3',
               'max(flux_slope)_ch4', 'max(flux_slope)_ch5', 'min(flux_slope)_ch0', 'min(flux_slope)_ch1',
               'min(flux_slope)_ch2', 'min(flux_slope)_ch3', 'min(flux_slope)_ch4', 'min(flux_slope)_ch5',
               'flux__c3__lag_1_ch0', 'flux__c3__lag_1_ch1', 'flux__c3__lag_1_ch2', 'flux__c3__lag_1_ch3',
               'flux__c3__lag_1_ch4', 'flux__c3__lag_1_ch5', 'flux__autocorrelation__lag_1_ch0',
               'flux__autocorrelation__lag_1_ch1', 'flux__autocorrelation__lag_1_ch2',
               'flux__autocorrelation__lag_1_ch3', 'flux__autocorrelation__lag_1_ch4',
               'flux__autocorrelation__lag_1_ch5', 'delta', 'max(astropy.lombscargle.power)_ch0',
               'max(astropy.lombscargle.power)_ch1', 'max(astropy.lombscargle.power)_ch2',
               'max(astropy.lombscargle.power)_ch3', 'max(astropy.lombscargle.power)_ch4',
               'max(astropy.lombscargle.power)_ch5', 'astropy.lombscargle.timescale_ch0',
               'astropy.lombscargle.timescale_ch1', 'astropy.lombscargle.timescale_ch2',
               'astropy.lombscargle.timescale_ch3', 'astropy.lombscargle.timescale_ch4',
               'astropy.lombscargle.timescale_ch5', 'diff(max(flux))_0_4', 'diff(max(flux))_1_5',
               'diff(max(flux))_0_5', 'diff(min(flux))_0_1', 'diff(min(flux))_2_3', 'diff(min(flux))_4_5',
               'amp(flux)_ch0/ch1', 'amp(flux)_ch1/ch2', 'amp(flux)_ch2/ch3', 'amp(flux)_ch3/ch4', 'amp(flux)_ch4/ch5',
               'amp(flux)_ch0/ch2', 'amp(flux)_ch1/ch3', 'amp(flux)_ch2/ch4', 'amp(flux)_ch3/ch5', 'amp(flux)_ch0/ch3',
               'amp(flux)_ch1/ch4', 'amp(flux)_ch2/ch5', 'amp(flux)_ch0/ch4', 'amp(flux)_ch1/ch5', 'amp(flux)_ch0/ch5',
               'delta(max(flux), last(detected))', 'delta(first(detected), max(flux))',
               'delta(max(flux), last(detected))_ch1', 'delta(max(flux), last(detected))_ch2',
               'delta(max(flux), last(detected))_ch3', 'delta(max(flux), last(detected))_ch4',
               'delta(max(flux), last(detected))_ch5', 'delta(first(detected), max(flux))_ch1',
               'delta(first(detected), max(flux))_ch2', 'delta(first(detected), max(flux))_ch3',
               'delta(first(detected), max(flux))_ch4', 'delta(first(detected), max(flux))_ch5',
               'detected_median(flux)_ch1', 'detected_median(flux)_ch2', 'detected_median(flux)_ch3',
               'detected_median(flux)_ch4', 'detected_median(flux)_ch5', 'detected_diff(median(flux))_0_1',
               'detected_diff(median(flux))_1_2', 'detected_diff(median(flux))_2_3', 'detected_diff(median(flux))_3_4',
               'detected_diff(median(flux))_4_5', '0__fft_coefficient__coeff_0__attr_"abs"',
               '0__fft_coefficient__coeff_1__attr_"abs"', '0__kurtosis', '0__skewness',
               '1__fft_coefficient__coeff_0__attr_"abs"', '1__fft_coefficient__coeff_1__attr_"abs"', '1__kurtosis',
               '1__skewness', '2__fft_coefficient__coeff_1__attr_"abs"', '2__kurtosis', '2__skewness',
               '3__fft_coefficient__coeff_0__attr_"abs"', '3__kurtosis', '3__skewness',
               '4__fft_coefficient__coeff_0__attr_"abs"', '4__fft_coefficient__coeff_1__attr_"abs"', '4__kurtosis',
               '4__skewness', '5__fft_coefficient__coeff_0__attr_"abs"', '5__fft_coefficient__coeff_1__attr_"abs"',
               '5__kurtosis', '5__skewness', 'hostgal_z_predicted', 'sn_salt2_chisq', 'sn_salt2_z', 'sn_salt2_t0',
               'sn_salt2_x0', 'sn_salt2_x1', 'sn_salt2_c', 'sn_salt2_z_err', 'sn_salt2_t0_err', 'sn_salt2_x0_err',
               'sn_salt2_x1_err', 'sn_salt2_c_err', 'luminosity_est_diff_ch0', 'luminosity_est_diff_ch1',
               'luminosity_est_diff_ch2', 'luminosity_est_diff_ch3', 'luminosity_est_diff_ch4',
               'luminosity_est_diff_ch5']

    best16_v1 = ['object_id', 'sn_salt2_c', 'delta', 'sn_salt2_x1', 'distmod',
                 'luminosity_est_diff_ch4', 'luminosity_est_diff_ch5',
                 'luminosity_est_diff_ch3', 'luminosity_est_diff_ch2',
                 'hostgal_photoz_err', 'hostgal_z_predicted', 'luminosity_est_diff_ch0',
                 'luminosity_est_diff_ch1', 'sn_salt2_chisq', 'sn_salt2_z',
                 'amp(flux)_ch3/ch5', '0__skewness']

    base = common.load_metadata()[['object_id', 'distmod', 'hostgal_photoz_err', 'mwebv', 'target']]

    for f in tqdm(features):
        tmp = common.load_feature(f)
        if f == 'f1080':
            tmp.columns = ['object_id', 'delta_SNR3']
        if f == 'f1010':
            tmp.columns = ['object_id'] + [c + '_estimated' for c in tmp.columns.tolist()[1:]]

        for c in tmp:
            if c == 'object_id':
                continue

            if c in base:
                print('{} is already in base(f): {}, {}'.format(c, f, base.columns.tolist()))
            assert c not in base
        tmp['object_id'] = tmp['object_id'].astype(np.int32)
        base = pd.merge(base, tmp, on='object_id', how='left')

    # -> yuval
    _save(base[best_subset_v1+['target']], 'nyanp_feat_v1_{}')
    _save(base[best16_v1+['target']], 'nyanp_feat_v1_{}_top16')

    # add prefix to oof features
    xlist = [
        'hostgal_z_predicted',
        'hostgal_photoz_predicted_diff',
        'luminosity_est_diff_ch0',
        'luminosity_est_diff_ch1',
        'luminosity_est_diff_ch2',
        'luminosity_est_diff_ch3',
        'luminosity_est_diff_ch4',
        'luminosity_est_diff_ch5',
        'luminosity_diff_ch0_estimated',
        'luminosity_diff_ch1_estimated',
        'luminosity_diff_ch2_estimated',
        'luminosity_diff_ch3_estimated',
        'luminosity_diff_ch4_estimated',
        'luminosity_diff_ch5_estimated'
    ]

    renames = {x: 'xxx_' + x for x in xlist}
    base.rename(columns=renames, inplace=True)

    # -> mamas
    _save(base.drop(['distmod', 'hostgal_photoz_err', 'mwebv'], axis=1), 'features_nyanp_all_v1_{}')

예제 #12

0

파일 보기

파일: step08_make_shared_features.py 프로젝트: nyanp/kaggle-PLASTiCC

def save_v2():
    base = common.load_metadata()[['object_id', 'target']]
    base = pd.merge(base, common.load_feature('f509'), on='object_id', how='inner')
    _save(base, 'features_nyanp_all_v2_{}')