コード例 #1
0
def main():

    #if options.dataset_file is None or options.output_file is None:
    dbh = DBHandler(options.db_config_file, options.db_config_name)
    dbh.return_df = False
    fh = FileHandler(s3_bucket=options.bucket)

    with open(options.example_config_file) as f:
        setups = yaml.load(f.read(), Loader=yaml.FullLoader)

    for setup in setups['examples']:

        output_file = '{}/{}-{}.csv'.format(
            setup['output_dir'], setup['starttime'].strftime('%Y%m%dT%H%M%S'),
            setup['endtime'].strftime('%Y%m%dT%H%M%S'))

        # Read in data and extract data arrays
        logging.info("Reading input data for {} - {}...".format(
            setup['starttime'], setup['endtime']))

        features, meta_params, labels, all_params = get_param_names(
            options.param_config_file)

        data = fh.read_data(
            [setup['dataset_file']],
            options,
            return_meta=True,
            starttime=setup['starttime'],  #.strftime('%Y-%m-%dT%H:%M:%S'),
            endtime=setup['endtime'])[0]  #.strftime('%Y-%m-%dT%H:%M:%S'))[0]
        X, y, meta = data

        model = fh.load_model(setup['model_file'])
        scaler = fh.load_model(setup['scaler_file'])

        logging.info('Predicting with {} samples...'.format(len(X)))
        y_pred = model.predict(X)

        df = pd.DataFrame(meta, columns=options.meta_params)
        X_inv = pd.DataFrame(scaler.inverse_transform(X), columns=X.columns)
        df = pd.concat(
            [df.reset_index(drop=True),
             X_inv.reset_index(drop=True)], axis=1)
        df = dbh.get_geom_for_dataset_rows(df)
        df['y_pred'] = y_pred
        df['y'] = y
        fh.df_to_csv(df, output_file)
コード例 #2
0
def main():

    # Read in data and extract data arrays
    logging.info("Reading input data for {} - {}...".format(
        options.starttime, options.endtime))

    #if options.dataset_file is None or options.output_file is None:
    dbh = DBHandler(options.db_config_filename, options.db_config_name)
    dbh.return_df = False
    fh = FileHandler(s3_bucket=options.bucket)

    starttime = dt.datetime.strptime(options.starttime, "%Y-%m-%dT%H:%M:%S")
    endtime = dt.datetime.strptime(options.endtime, "%Y-%m-%dT%H:%M:%S")

    features, meta_params, labels, all_params = get_param_names(
        options.param_config_filename)

    scaler = fh.load_model(options.scaler_file)

    # TODO change to read from operational data
    data = pd.DataFrame(dbh.get_dataset(all_params), columns=all_params)

    # TODO use original id stored in db. The id is used to assign predicted classes to a storm object (while saving to db)
    # As far as we do not have operational data, dummy ids are used
    data.loc[:, 'id'] = 0

    data = data.loc[data['weather_parameter'] == 'WindGust']

    # Add week
    data['point_in_time'] = pd.to_datetime(data['point_in_time'], utc=True)
    data['week'] = data['point_in_time'].dt.week

    X = data.loc[:, features]
    X = scaler.transform(X)

    model = fh.load_model(options.model_file)

    logging.info('Predicting with {} {} samples...'.format(
        options.model, len(X)))
    y_pred = model.predict(X)

    # Save to db
    logging.info('Saving...')
    dbh.save_classes(data.loc[:, 'id'], y_pred)
    logging.info('done.')