def wrapper(*args, **kwargs): try: print('start : {}'.format(func.__name__)) s = time.time() meta = kwargs['input'].meta if required_feature is not None and _top(required_feature) not in meta: kwargs['input'].meta = requires(meta, required_feature, required_feature_in, kwargs['target_dir'], debug=kwargs['debug']) ret = func(*args, **kwargs) if kwargs['debug']: with_csv = True else: with_csv = False common.save_feature(ret.reset_index(drop=True), name, with_csv) print('end : {} (time: {})'.format(func.__name__, time.time() - s)) gc.collect() except Exception as e: print(colored('error on function {}'.format(func.__name__), 'red')) print(type(e)) traceback.print_exc()
def f701_redshift_difference(): f601_estimate_redshift() estimated = common.load_feature("f601") meta = common.load_metadata() dst = pd.merge(meta[['object_id', 'hostgal_photoz']], estimated, on='object_id', how='left') dst['hostgal_photoz_predicted_diff'] = dst['hostgal_photoz'] - dst[ 'hostgal_z_predicted'] common.save_feature(dst[['object_id', 'hostgal_photoz_predicted_diff']], "f701")
def merge_features(): p = Path('features_all/') for feature in features_to_run: print('Merge feature {}'.format(feature)) files = [str(f) for f in list(p.glob('f{}_*.f'.format(feature)))] print('Total {} files found'.format(len(files))) dfs = [] for file in tqdm(files): dfs.append(pd.read_feather(file)) df = pd.concat(dfs).reset_index(drop=True) df.drop_duplicates(inplace=True) common.save_feature(df, 'f{}'.format(feature))
def _make_redshift_feature(params, src_features: List[str], feature_id: str, nfolds: int, remove_galactic_test_data: bool = True, drop_features: List[str] = None): x_train, x_test, y_train = _make_df(src_features, remove_galactic_test_data, drop_features) feature = _estimate_redshift(params, x_train, x_test, y_train, nfolds=nfolds) common.save_feature(feature, feature_id)
def f517_blending_salts(): meta = common.load_metadata() f500 = common.load_feature('f500') f515 = common.load_feature('f515') f516 = common.load_feature('f516') df = pd.merge(meta[['object_id', 'target', 'hostgal_photoz', 'ddf']], f500, on='object_id', how='left') df = pd.merge(df, f515, on='object_id', how='left') df = pd.merge(df, f516, on='object_id', how='left') prefix = [ 'sn_salt2_', 'salt2-extended_p_sn3_salt2-extended_', 'salt2_p_sn3_salt2_' ] params = ['x0', 't0', 'z', 'c', 'x1'] for p in params: print('param: {}'.format(p)) # weighted average based on error weights = [] weighted_sum = [] for m in prefix: col = 'w_{}{}'.format(p, m) df[col] = 1 / (df['{}{}_err'.format(m, p)] * df['{}{}_err'.format(m, p)]) weights.append(col) df[col + '_s'] = df[col] * df[m + p] weighted_sum.append(col + '_s') df['salt2-{}-weighted-avg'.format(p)] = df[weighted_sum].sum(axis=1) df['tmp'] = df[weights].sum(axis=1) df['salt2-{}-weighted-avg'.format( p)] = df['salt2-{}-weighted-avg'.format(p)] / df['tmp'] df.drop('tmp', axis=1, inplace=True) df.drop(weighted_sum, axis=1, inplace=True) df.drop(weights, axis=1, inplace=True) common.save_feature( df[['object_id'] + ['salt2-{}-weighted-avg'.format(p) for p in params]], 'f517')
def f1010_redshift_difference_perch(): meta = common.load_metadata() meta = pd.merge(meta, common.load_feature('f603'), on='object_id', how='left') meta = pd.merge(meta, common.load_feature('f000'), on='object_id', how='left') meta['Mpc'] = meta['hostgal_z_predicted'].apply(z2pc) meta['Gpc'] = meta['Mpc'] / 1000.0 features = [] for i in range(6): ch = i meta['flux_diff_ch{}'.format(ch)] = meta['max(flux)_ch{}'.format( ch)] - meta['min(flux)_ch{}'.format(ch)] meta['luminosity_diff_ch{}'.format(ch)] = meta['flux_diff_ch{}'.format( ch)] * meta['Gpc'] * meta['Gpc'] features.append('luminosity_diff_ch{}'.format(ch)) common.save_feature(meta[['object_id'] + features], "f1010")
516: { 'source': 'salt2', 'normalize': False, 'snr': 3, 'zbounds': 'default', 'clip_bounds': True, 't_bounds': True, }, } if len(sys.argv) < 5: print(sys.argv) raise RuntimeError('Specify Data Index') type = int(sys.argv[1]) data_index = int(sys.argv[2]) skip = int(sys.argv[3]) end = int(sys.argv[4]) lc = common.load_partial_lightcurve(data_index) dst_id = 'f{}_{}_{}_{}'.format(type, data_index, skip, end) param = parameters[type] print('param: {}'.format(param)) print('data_index: {}, skip: {}, end: {}, dst_id: {}'.format(data_index, skip, end, dst_id)) meta = common.load_metadata() meta = meta[meta.hostgal_photoz > 0].reset_index(drop=True) dst = extract_features(meta, lc, skip=skip, end=end, **param) common.save_feature(dst, dst_id, with_csv_dump=config.USE_FIRST_CHUNK_FOR_TEMPLATE_FITTING)