Python save_feature Examples, common.save_feature Python Examples

Example #1

0

Show file

        def wrapper(*args, **kwargs):
            try:
                print('start : {}'.format(func.__name__))
                s = time.time()

                meta = kwargs['input'].meta
                if required_feature is not None and _top(required_feature) not in meta:
                    kwargs['input'].meta = requires(meta, required_feature, required_feature_in, kwargs['target_dir'],
                                                    debug=kwargs['debug'])

                ret = func(*args, **kwargs)

                if kwargs['debug']:
                    with_csv = True
                else:
                    with_csv = False

                common.save_feature(ret.reset_index(drop=True), name, with_csv)

                print('end : {} (time: {})'.format(func.__name__, time.time() - s))
                gc.collect()
            except Exception as e:
                print(colored('error on function {}'.format(func.__name__), 'red'))
                print(type(e))
                traceback.print_exc()

Example #2

0

Show file

def f701_redshift_difference():
    f601_estimate_redshift()
    estimated = common.load_feature("f601")
    meta = common.load_metadata()
    dst = pd.merge(meta[['object_id', 'hostgal_photoz']],
                   estimated,
                   on='object_id',
                   how='left')
    dst['hostgal_photoz_predicted_diff'] = dst['hostgal_photoz'] - dst[
        'hostgal_z_predicted']

    common.save_feature(dst[['object_id', 'hostgal_photoz_predicted_diff']],
                        "f701")

Example #3

0

Show file

def merge_features():
    p = Path('features_all/')
    for feature in features_to_run:
        print('Merge feature {}'.format(feature))
        files = [str(f) for f in list(p.glob('f{}_*.f'.format(feature)))]
        print('Total {} files found'.format(len(files)))

        dfs = []
        for file in tqdm(files):
            dfs.append(pd.read_feather(file))
        df = pd.concat(dfs).reset_index(drop=True)
        df.drop_duplicates(inplace=True)
        common.save_feature(df, 'f{}'.format(feature))

Example #4

0

Show file

def _make_redshift_feature(params,
                           src_features: List[str],
                           feature_id: str,
                           nfolds: int,
                           remove_galactic_test_data: bool = True,
                           drop_features: List[str] = None):
    x_train, x_test, y_train = _make_df(src_features,
                                        remove_galactic_test_data,
                                        drop_features)

    feature = _estimate_redshift(params,
                                 x_train,
                                 x_test,
                                 y_train,
                                 nfolds=nfolds)
    common.save_feature(feature, feature_id)

Example #5

0

Show file

File: f517_blending_salts.py Project: nyanp/kaggle-PLASTiCC

def f517_blending_salts():
    meta = common.load_metadata()
    f500 = common.load_feature('f500')
    f515 = common.load_feature('f515')
    f516 = common.load_feature('f516')
    df = pd.merge(meta[['object_id', 'target', 'hostgal_photoz', 'ddf']],
                  f500,
                  on='object_id',
                  how='left')
    df = pd.merge(df, f515, on='object_id', how='left')
    df = pd.merge(df, f516, on='object_id', how='left')

    prefix = [
        'sn_salt2_', 'salt2-extended_p_sn3_salt2-extended_',
        'salt2_p_sn3_salt2_'
    ]
    params = ['x0', 't0', 'z', 'c', 'x1']

    for p in params:
        print('param: {}'.format(p))

        # weighted average based on error
        weights = []
        weighted_sum = []
        for m in prefix:
            col = 'w_{}{}'.format(p, m)
            df[col] = 1 / (df['{}{}_err'.format(m, p)] *
                           df['{}{}_err'.format(m, p)])
            weights.append(col)
            df[col + '_s'] = df[col] * df[m + p]
            weighted_sum.append(col + '_s')

        df['salt2-{}-weighted-avg'.format(p)] = df[weighted_sum].sum(axis=1)
        df['tmp'] = df[weights].sum(axis=1)
        df['salt2-{}-weighted-avg'.format(
            p)] = df['salt2-{}-weighted-avg'.format(p)] / df['tmp']
        df.drop('tmp', axis=1, inplace=True)
        df.drop(weighted_sum, axis=1, inplace=True)
        df.drop(weights, axis=1, inplace=True)

    common.save_feature(
        df[['object_id'] + ['salt2-{}-weighted-avg'.format(p)
                            for p in params]], 'f517')

Example #6

0

Show file

def f1010_redshift_difference_perch():
    meta = common.load_metadata()
    meta = pd.merge(meta,
                    common.load_feature('f603'),
                    on='object_id',
                    how='left')
    meta = pd.merge(meta,
                    common.load_feature('f000'),
                    on='object_id',
                    how='left')

    meta['Mpc'] = meta['hostgal_z_predicted'].apply(z2pc)
    meta['Gpc'] = meta['Mpc'] / 1000.0

    features = []
    for i in range(6):
        ch = i
        meta['flux_diff_ch{}'.format(ch)] = meta['max(flux)_ch{}'.format(
            ch)] - meta['min(flux)_ch{}'.format(ch)]
        meta['luminosity_diff_ch{}'.format(ch)] = meta['flux_diff_ch{}'.format(
            ch)] * meta['Gpc'] * meta['Gpc']
        features.append('luminosity_diff_ch{}'.format(ch))

    common.save_feature(meta[['object_id'] + features], "f1010")

Example #7

0

Show file

File: run_template.py Project: nyanp/kaggle-PLASTiCC

        516: {
            'source': 'salt2',
            'normalize': False,
            'snr': 3,
            'zbounds': 'default',
            'clip_bounds': True,
            't_bounds': True,
        },
    }

    if len(sys.argv) < 5:
        print(sys.argv)
        raise RuntimeError('Specify Data Index')

    type = int(sys.argv[1])
    data_index = int(sys.argv[2])
    skip = int(sys.argv[3])
    end = int(sys.argv[4])
    lc = common.load_partial_lightcurve(data_index)
    dst_id = 'f{}_{}_{}_{}'.format(type, data_index, skip, end)

    param = parameters[type]
    print('param: {}'.format(param))
    print('data_index: {}, skip: {}, end: {}, dst_id: {}'.format(data_index, skip, end, dst_id))

    meta = common.load_metadata()
    meta = meta[meta.hostgal_photoz > 0].reset_index(drop=True)

    dst = extract_features(meta, lc, skip=skip, end=end, **param)
    common.save_feature(dst, dst_id, with_csv_dump=config.USE_FIRST_CHUNK_FOR_TEMPLATE_FITTING)