def f1003_luminosity_by_estimated_redshift(input: Input, **kw): aggregate = common.load_feature("f000") redshift = common.load_feature("f600") meta_ = pd.merge(input.meta, redshift, on='object_id', how='left') meta_ = pd.merge(meta_, aggregate, on='object_id', how='left') meta_['Mpc'] = meta_['hostgal_z_predicted'].apply(z2pc) meta_['Gpc'] = meta_['Mpc'] / 1000.0 features = [] for i in range(6): ch = i meta_['flux_diff_ch{}'.format(ch)] = meta_['max(flux)_ch{}'.format( ch)] - meta_['min(flux)_ch{}'.format(ch)] meta_['luminosity_diff_ch{}_estimated'.format(ch)] = meta_[ 'flux_diff_ch{}'.format(ch)] * meta_['Gpc'] * meta_['Gpc'] features.append('luminosity_diff_ch{}_estimated'.format(ch)) features_renamed = ['luminosity_est_diff_ch{}'.format(i) for i in range(6)] rename = {features[i]: features_renamed[i] for i in range(6)} meta_.rename(columns=rename, inplace=True) return meta_[['object_id'] + features_renamed]
def f1002_detected_to_falltime_ratio(input: Input, **kw): delta = common.load_feature("f050") falltime = common.load_feature("f053") meta_ = pd.merge(input.meta, delta, on='object_id', how='left') meta_ = pd.merge(meta_, falltime, on='object_id', how='left') features = [] for i in range(6): f = 'delta(max(flux), last(detected))_ch{}_to_delta_ratio'.format(i) meta_[f] = meta_['delta(max(flux), last(detected))_ch{}'.format( i)] / meta_['delta'] features.append(f) return meta_[['object_id'] + features]
def _make_df(input_feature_list: List[str], remove_galactic_test_data: bool = True, drop_features: List[str] = None): df = common.load_metadata() for f in tqdm(input_feature_list): df = pd.merge(df, common.load_feature(f), on='object_id', how='left') if drop_features is not None: df.drop(drop_features, axis=1, inplace=True) df.set_index('object_id', inplace=True) x_train = df[df.hostgal_specz > 0.0] if remove_galactic_test_data: x_test = df[df.hostgal_specz.isnull() & (df.hostgal_photoz > 0.0)] else: x_test = df[df.hostgal_specz.isnull()] x_train.drop('target', axis=1, inplace=True) x_test.drop('target', axis=1, inplace=True) y_train = x_train.hostgal_specz x_train.drop('hostgal_specz', axis=1, inplace=True) x_test.drop('hostgal_specz', axis=1, inplace=True) return x_train, x_test, y_train
def _setup(self, df, features, drop, cache_path=None, use_cache=False) -> pd.DataFrame: if use_cache and cache_path is not None: try: print('load from cache: {}'.format(cache_path)) return pd.read_feather(cache_path) except: pass for f in tqdm(features): tmp = common.load_feature(f) tmp['object_id'] = tmp['object_id'].astype(np.int32) df = pd.merge(df, tmp, on='object_id', how='left') if drop is not None: drop_ = [d for d in drop if d in df] print('dropped: {}'.format(drop_)) df.drop(drop_, axis=1, inplace=True) if use_cache and cache_path: df.to_feather(cache_path) return df
def save_v4(): features = ['f513', 'f515', 'f517'] base = common.load_metadata()[['object_id', 'target']] for f in features: tmp = common.load_feature(f) base = pd.merge(base, tmp, on='object_id', how='left') _save(base, 'features_nyanp_all_v4_{}')
def f517_blending_salts(): meta = common.load_metadata() f500 = common.load_feature('f500') f515 = common.load_feature('f515') f516 = common.load_feature('f516') df = pd.merge(meta[['object_id', 'target', 'hostgal_photoz', 'ddf']], f500, on='object_id', how='left') df = pd.merge(df, f515, on='object_id', how='left') df = pd.merge(df, f516, on='object_id', how='left') prefix = [ 'sn_salt2_', 'salt2-extended_p_sn3_salt2-extended_', 'salt2_p_sn3_salt2_' ] params = ['x0', 't0', 'z', 'c', 'x1'] for p in params: print('param: {}'.format(p)) # weighted average based on error weights = [] weighted_sum = [] for m in prefix: col = 'w_{}{}'.format(p, m) df[col] = 1 / (df['{}{}_err'.format(m, p)] * df['{}{}_err'.format(m, p)]) weights.append(col) df[col + '_s'] = df[col] * df[m + p] weighted_sum.append(col + '_s') df['salt2-{}-weighted-avg'.format(p)] = df[weighted_sum].sum(axis=1) df['tmp'] = df[weights].sum(axis=1) df['salt2-{}-weighted-avg'.format( p)] = df['salt2-{}-weighted-avg'.format(p)] / df['tmp'] df.drop('tmp', axis=1, inplace=True) df.drop(weighted_sum, axis=1, inplace=True) df.drop(weights, axis=1, inplace=True) common.save_feature( df[['object_id'] + ['salt2-{}-weighted-avg'.format(p) for p in params]], 'f517')
def f1000_salt2_normalized_chisq(input: Input, **kw): salt2 = common.load_feature("f500") meta_ = pd.merge(input.meta, salt2, on='object_id', how='left') count = input.lc.groupby('object_id')['mjd'].count().reset_index() count.columns = ['object_id', 'n_observed'] meta_ = pd.merge(meta_, count, on='object_id', how='left') meta_[ 'sn_salt2_chisq_norm'] = meta_['sn_salt2_chisq'] / meta_['n_observed'] return meta_[['object_id', 'sn_salt2_chisq_norm']]
def f701_redshift_difference(): f601_estimate_redshift() estimated = common.load_feature("f601") meta = common.load_metadata() dst = pd.merge(meta[['object_id', 'hostgal_photoz']], estimated, on='object_id', how='left') dst['hostgal_photoz_predicted_diff'] = dst['hostgal_photoz'] - dst[ 'hostgal_z_predicted'] common.save_feature(dst[['object_id', 'hostgal_photoz_predicted_diff']], "f701")
def f1085_luminosity_diff_within_snr3(input: Input, **kw): redshift = common.load_feature('f600') max_flux = common.load_feature('f1083') min_flux = common.load_feature('f1084') meta = pd.merge(input.meta, redshift, on='object_id', how='left') meta = pd.merge(meta, max_flux, on='object_id', how='left') meta = pd.merge(meta, min_flux, on='object_id', how='left') meta['Mpc'] = meta['hostgal_z_predicted'].apply(z2pc) meta['Gpc'] = meta['Mpc'] / 1000.0 features = [] for i in range(6): ch = i meta['snr3_flux_diff_ch{}'.format( ch)] = meta['snr3_max(flux)_ch{}'.format(ch)] - meta[ 'snr3_min(flux)_ch{}'.format(ch)] meta['snr3_luminosity_diff_ch{}'.format(ch)] = meta[ 'snr3_flux_diff_ch{}'.format(ch)] * meta['Gpc'] * meta['Gpc'] features.append('snr3_luminosity_diff_ch{}'.format(ch)) return meta[['object_id'] + features]
def f1010_redshift_difference_perch(): meta = common.load_metadata() meta = pd.merge(meta, common.load_feature('f603'), on='object_id', how='left') meta = pd.merge(meta, common.load_feature('f000'), on='object_id', how='left') meta['Mpc'] = meta['hostgal_z_predicted'].apply(z2pc) meta['Gpc'] = meta['Mpc'] / 1000.0 features = [] for i in range(6): ch = i meta['flux_diff_ch{}'.format(ch)] = meta['max(flux)_ch{}'.format( ch)] - meta['min(flux)_ch{}'.format(ch)] meta['luminosity_diff_ch{}'.format(ch)] = meta['flux_diff_ch{}'.format( ch)] * meta['Gpc'] * meta['Gpc'] features.append('luminosity_diff_ch{}'.format(ch)) common.save_feature(meta[['object_id'] + features], "f1010")
def save_v1(): features = ['f000', 'f001', 'f002', 'f010', 'f026', 'f050', 'f051', 'f052', 'f053', 'f054', 'f061', 'f063', 'f100', 'f1000', 'f1001', 'f1002', 'f1003', 'f1004', 'f1005', 'f1006', 'f101', 'f1010', 'f102', 'f103', 'f104', 'f106', 'f107', 'f108', 'f1080', 'f1081', 'f1082', 'f1083', 'f1085', 'f1086', 'f1087', 'f1088', 'f1089', 'f109', 'f110', 'f140', 'f141', 'f142', 'f143', 'f144', 'f150', 'f151', 'f152', 'f153', 'f200', 'f201', 'f202', 'f203', 'f204', 'f205', 'f300', 'f301', 'f302', 'f303', 'f304', 'f305', 'f306', 'f307', 'f308', 'f309', 'f310', 'f311', 'f330', 'f340', 'f350', 'f370', 'f400', 'f500', 'f505', 'f506', 'f507', 'f600', 'f701'] best_subset_v1 = ['object_id', 'hostgal_photoz_err', 'distmod', 'mwebv', 'mean(flux)_ch0', 'mean(flux)_ch1', 'mean(flux)_ch2', 'mean(flux)_ch3', 'mean(flux)_ch4', 'mean(flux)_ch5', 'max(flux)_ch0', 'max(flux)_ch1', 'min(flux)_ch0', 'min(flux)_ch1', 'min(flux)_ch2', 'min(flux)_ch3', 'min(flux)_ch4', 'min(flux)_ch5', 'median(flux)_ch0', 'median(flux)_ch1', 'median(flux)_ch2', 'median(flux)_ch3', 'median(flux)_ch4', 'median(flux)_ch5', 'std(flux)_ch0', 'std(flux)_ch1', 'std(flux)_ch4', 'std(flux)_ch5', 'timescale_th0.35_max_ch0', 'timescale_th0.35_max_ch1', 'timescale_th0.35_max_ch2', 'timescale_th0.35_max_ch3', 'timescale_th0.35_max_ch4', 'timescale_th0.35_max_ch5', 'diff(max(flux))_0_1', 'diff(max(flux))_1_2', 'diff(max(flux))_2_3', 'diff(max(flux))_3_4', 'diff(max(flux))_4_5', 'mean(detected)_ch1', 'mean(detected)_ch2', 'mean(detected)_ch3', 'mean(detected)_ch5', 'std(detected)_ch3', 'std(detected)_ch5', 'diff(max(flux))_0_3', 'diff(max(flux))_1_4', 'diff(max(flux))_2_5', 'timescale_th0.5_min_ch0', 'timescale_th0.5_min_ch1', 'timescale_th0.5_min_ch2', 'timescale_th0.5_min_ch4', 'timescale_th0.5_min_ch5', 'mean(flux)', 'max(flux)', 'min(flux)', 'timescale_th0.35_min_ch0', 'timescale_th0.35_min_ch1', 'timescale_th0.35_min_ch2', 'timescale_th0.35_min_ch4', 'timescale_th0.35_min_ch5', 'timescale_th0.15_max_ch0', 'timescale_th0.15_max_ch1', 'timescale_th0.15_max_ch2', 'timescale_th0.15_max_ch3', 'timescale_th0.15_max_ch4', 'timescale_th0.15_max_ch5', 'max(flux_slope)_ch0', 'max(flux_slope)_ch1', 'max(flux_slope)_ch2', 'max(flux_slope)_ch3', 'max(flux_slope)_ch4', 'max(flux_slope)_ch5', 'min(flux_slope)_ch0', 'min(flux_slope)_ch1', 'min(flux_slope)_ch2', 'min(flux_slope)_ch3', 'min(flux_slope)_ch4', 'min(flux_slope)_ch5', 'flux__c3__lag_1_ch0', 'flux__c3__lag_1_ch1', 'flux__c3__lag_1_ch2', 'flux__c3__lag_1_ch3', 'flux__c3__lag_1_ch4', 'flux__c3__lag_1_ch5', 'flux__autocorrelation__lag_1_ch0', 'flux__autocorrelation__lag_1_ch1', 'flux__autocorrelation__lag_1_ch2', 'flux__autocorrelation__lag_1_ch3', 'flux__autocorrelation__lag_1_ch4', 'flux__autocorrelation__lag_1_ch5', 'delta', 'max(astropy.lombscargle.power)_ch0', 'max(astropy.lombscargle.power)_ch1', 'max(astropy.lombscargle.power)_ch2', 'max(astropy.lombscargle.power)_ch3', 'max(astropy.lombscargle.power)_ch4', 'max(astropy.lombscargle.power)_ch5', 'astropy.lombscargle.timescale_ch0', 'astropy.lombscargle.timescale_ch1', 'astropy.lombscargle.timescale_ch2', 'astropy.lombscargle.timescale_ch3', 'astropy.lombscargle.timescale_ch4', 'astropy.lombscargle.timescale_ch5', 'diff(max(flux))_0_4', 'diff(max(flux))_1_5', 'diff(max(flux))_0_5', 'diff(min(flux))_0_1', 'diff(min(flux))_2_3', 'diff(min(flux))_4_5', 'amp(flux)_ch0/ch1', 'amp(flux)_ch1/ch2', 'amp(flux)_ch2/ch3', 'amp(flux)_ch3/ch4', 'amp(flux)_ch4/ch5', 'amp(flux)_ch0/ch2', 'amp(flux)_ch1/ch3', 'amp(flux)_ch2/ch4', 'amp(flux)_ch3/ch5', 'amp(flux)_ch0/ch3', 'amp(flux)_ch1/ch4', 'amp(flux)_ch2/ch5', 'amp(flux)_ch0/ch4', 'amp(flux)_ch1/ch5', 'amp(flux)_ch0/ch5', 'delta(max(flux), last(detected))', 'delta(first(detected), max(flux))', 'delta(max(flux), last(detected))_ch1', 'delta(max(flux), last(detected))_ch2', 'delta(max(flux), last(detected))_ch3', 'delta(max(flux), last(detected))_ch4', 'delta(max(flux), last(detected))_ch5', 'delta(first(detected), max(flux))_ch1', 'delta(first(detected), max(flux))_ch2', 'delta(first(detected), max(flux))_ch3', 'delta(first(detected), max(flux))_ch4', 'delta(first(detected), max(flux))_ch5', 'detected_median(flux)_ch1', 'detected_median(flux)_ch2', 'detected_median(flux)_ch3', 'detected_median(flux)_ch4', 'detected_median(flux)_ch5', 'detected_diff(median(flux))_0_1', 'detected_diff(median(flux))_1_2', 'detected_diff(median(flux))_2_3', 'detected_diff(median(flux))_3_4', 'detected_diff(median(flux))_4_5', '0__fft_coefficient__coeff_0__attr_"abs"', '0__fft_coefficient__coeff_1__attr_"abs"', '0__kurtosis', '0__skewness', '1__fft_coefficient__coeff_0__attr_"abs"', '1__fft_coefficient__coeff_1__attr_"abs"', '1__kurtosis', '1__skewness', '2__fft_coefficient__coeff_1__attr_"abs"', '2__kurtosis', '2__skewness', '3__fft_coefficient__coeff_0__attr_"abs"', '3__kurtosis', '3__skewness', '4__fft_coefficient__coeff_0__attr_"abs"', '4__fft_coefficient__coeff_1__attr_"abs"', '4__kurtosis', '4__skewness', '5__fft_coefficient__coeff_0__attr_"abs"', '5__fft_coefficient__coeff_1__attr_"abs"', '5__kurtosis', '5__skewness', 'hostgal_z_predicted', 'sn_salt2_chisq', 'sn_salt2_z', 'sn_salt2_t0', 'sn_salt2_x0', 'sn_salt2_x1', 'sn_salt2_c', 'sn_salt2_z_err', 'sn_salt2_t0_err', 'sn_salt2_x0_err', 'sn_salt2_x1_err', 'sn_salt2_c_err', 'luminosity_est_diff_ch0', 'luminosity_est_diff_ch1', 'luminosity_est_diff_ch2', 'luminosity_est_diff_ch3', 'luminosity_est_diff_ch4', 'luminosity_est_diff_ch5'] best16_v1 = ['object_id', 'sn_salt2_c', 'delta', 'sn_salt2_x1', 'distmod', 'luminosity_est_diff_ch4', 'luminosity_est_diff_ch5', 'luminosity_est_diff_ch3', 'luminosity_est_diff_ch2', 'hostgal_photoz_err', 'hostgal_z_predicted', 'luminosity_est_diff_ch0', 'luminosity_est_diff_ch1', 'sn_salt2_chisq', 'sn_salt2_z', 'amp(flux)_ch3/ch5', '0__skewness'] base = common.load_metadata()[['object_id', 'distmod', 'hostgal_photoz_err', 'mwebv', 'target']] for f in tqdm(features): tmp = common.load_feature(f) if f == 'f1080': tmp.columns = ['object_id', 'delta_SNR3'] if f == 'f1010': tmp.columns = ['object_id'] + [c + '_estimated' for c in tmp.columns.tolist()[1:]] for c in tmp: if c == 'object_id': continue if c in base: print('{} is already in base(f): {}, {}'.format(c, f, base.columns.tolist())) assert c not in base tmp['object_id'] = tmp['object_id'].astype(np.int32) base = pd.merge(base, tmp, on='object_id', how='left') # -> yuval _save(base[best_subset_v1+['target']], 'nyanp_feat_v1_{}') _save(base[best16_v1+['target']], 'nyanp_feat_v1_{}_top16') # add prefix to oof features xlist = [ 'hostgal_z_predicted', 'hostgal_photoz_predicted_diff', 'luminosity_est_diff_ch0', 'luminosity_est_diff_ch1', 'luminosity_est_diff_ch2', 'luminosity_est_diff_ch3', 'luminosity_est_diff_ch4', 'luminosity_est_diff_ch5', 'luminosity_diff_ch0_estimated', 'luminosity_diff_ch1_estimated', 'luminosity_diff_ch2_estimated', 'luminosity_diff_ch3_estimated', 'luminosity_diff_ch4_estimated', 'luminosity_diff_ch5_estimated' ] renames = {x: 'xxx_' + x for x in xlist} base.rename(columns=renames, inplace=True) # -> mamas _save(base.drop(['distmod', 'hostgal_photoz_err', 'mwebv'], axis=1), 'features_nyanp_all_v1_{}')
def save_v2(): base = common.load_metadata()[['object_id', 'target']] base = pd.merge(base, common.load_feature('f509'), on='object_id', how='inner') _save(base, 'features_nyanp_all_v2_{}')