def main(): #if options.dataset_file is None or options.output_file is None: dbh = DBHandler(options.db_config_file, options.db_config_name) dbh.return_df = False fh = FileHandler(s3_bucket=options.bucket) with open(options.example_config_file) as f: setups = yaml.load(f.read(), Loader=yaml.FullLoader) for setup in setups['examples']: output_file = '{}/{}-{}.csv'.format( setup['output_dir'], setup['starttime'].strftime('%Y%m%dT%H%M%S'), setup['endtime'].strftime('%Y%m%dT%H%M%S')) # Read in data and extract data arrays logging.info("Reading input data for {} - {}...".format( setup['starttime'], setup['endtime'])) features, meta_params, labels, all_params = get_param_names( options.param_config_file) data = fh.read_data( [setup['dataset_file']], options, return_meta=True, starttime=setup['starttime'], #.strftime('%Y-%m-%dT%H:%M:%S'), endtime=setup['endtime'])[0] #.strftime('%Y-%m-%dT%H:%M:%S'))[0] X, y, meta = data model = fh.load_model(setup['model_file']) scaler = fh.load_model(setup['scaler_file']) logging.info('Predicting with {} samples...'.format(len(X))) y_pred = model.predict(X) df = pd.DataFrame(meta, columns=options.meta_params) X_inv = pd.DataFrame(scaler.inverse_transform(X), columns=X.columns) df = pd.concat( [df.reset_index(drop=True), X_inv.reset_index(drop=True)], axis=1) df = dbh.get_geom_for_dataset_rows(df) df['y_pred'] = y_pred df['y'] = y fh.df_to_csv(df, output_file)
def main(): # Read in data and extract data arrays logging.info("Reading input data for {} - {}...".format( options.starttime, options.endtime)) #if options.dataset_file is None or options.output_file is None: dbh = DBHandler(options.db_config_filename, options.db_config_name) dbh.return_df = False fh = FileHandler(s3_bucket=options.bucket) starttime = dt.datetime.strptime(options.starttime, "%Y-%m-%dT%H:%M:%S") endtime = dt.datetime.strptime(options.endtime, "%Y-%m-%dT%H:%M:%S") features, meta_params, labels, all_params = get_param_names( options.param_config_filename) scaler = fh.load_model(options.scaler_file) # TODO change to read from operational data data = pd.DataFrame(dbh.get_dataset(all_params), columns=all_params) # TODO use original id stored in db. The id is used to assign predicted classes to a storm object (while saving to db) # As far as we do not have operational data, dummy ids are used data.loc[:, 'id'] = 0 data = data.loc[data['weather_parameter'] == 'WindGust'] # Add week data['point_in_time'] = pd.to_datetime(data['point_in_time'], utc=True) data['week'] = data['point_in_time'].dt.week X = data.loc[:, features] X = scaler.transform(X) model = fh.load_model(options.model_file) logging.info('Predicting with {} {} samples...'.format( options.model, len(X))) y_pred = model.predict(X) # Save to db logging.info('Saving...') dbh.save_classes(data.loc[:, 'id'], y_pred) logging.info('done.')