vis = visualize.Visualizer() vis.add_line_ser(test.df['GHI'], 'GHI') vis.add_line_ser(test.df['Clearsky GHI pvlib'], 'GHI_cs') vis.add_circle_ser(test.df[ml_clear & ~nsrdb_clear]['GHI'], 'ML clear only') vis.add_circle_ser(test.df[~ml_clear & nsrdb_clear]['GHI'], 'NSRDB clear only') vis.add_circle_ser(test.df[ml_clear & nsrdb_clear]['GHI'], 'Both clear') vis.show() # In[51]: probas = clf.predict_proba(test.df[feature_cols].values) test.df['probas'] = 0 test.df['probas'] = probas[:, 1] visualize.plot_ts_slider_highligther(test.df, prob='probas') ## 15 min freq ground dataground = cs_detection.ClearskyDetection.read_pickle('abq_ground_1.pkl.gz') ground.df.index = ground.df.index.tz_convert('MST') test = cs_detection.ClearskyDetection(ground.df)test.trim_dates('10-01-2015', '10-17-2015')test.time_from_solar_noon('Clearsky GHI pvlib', 'tfn')test.df = test.df[test.df.index.minute % 15 == 0]pred = test.iter_predict_daily(feature_cols, 'GHI', 'Clearsky GHI pvlib', clf, 5, multiproc=True, by_day=True).astype(bool)train2 = cs_detection.ClearskyDetection(train.df) train2.trim_dates('10-01-2015', '10-17-2015') train2.df = train2.df.reindex(pd.date_range(start=train2.df.index[0], end=train2.df.index[-1], freq='15min')) train2.df['sky_status'] = train2.df['sky_status'].fillna(False)nsrdb_clear = train2.df['sky_status'] ml_clear = test.df['sky_status iter'] vis = visualize.Visualizer() vis.add_line_ser(test.df['GHI'], 'GHI') vis.add_line_ser(test.df['Clearsky GHI pvlib'], 'GHI_cs') vis.add_circle_ser(test.df[ml_clear & ~nsrdb_clear]['GHI'], 'ML clear only') vis.add_circle_ser(test.df[~ml_clear & nsrdb_clear]['GHI'], 'NSRDB clear only') vis.add_circle_ser(test.df[ml_clear & nsrdb_clear]['GHI'], 'Both clear') vis.show()probas = clf.predict_proba(test.df[feature_cols].values)
test.df[(test.df['sky_status pvlib'] == 1) & (~pred)]['GHI'], 'PVLib clear only') vis.add_circle_ser(test.df[(test.df['sky_status pvlib'] == 1) & (pred)]['GHI'], 'ML+PVLib clear only') # vis.add_line_ser(test.df['abs_ideal_ratio_diff'] * 100) # In[118]: vis.show() # In[119]: probas = clf.predict_proba(test.df[feature_cols].values) test.df['probas'] = 0 test.df['probas'] = probas[:, 1] visualize.plot_ts_slider_highligther(test.df, prob='probas') # In[120]: ground = cs_detection.ClearskyDetection.read_pickle('srrl_ground_1.pkl.gz') # In[121]: ground.df.index = ground.df.index.tz_convert('MST') # In[122]: ground.trim_dates('10-01-2011', '10-08-2011') # In[123]:
print(metrics.f1_score(test.df['sky_status'].values, pred)) # In[104]: print(metrics.accuracy_score(test.df['sky_status'], pred)) test2 = cs_detection.ClearskyDetection(test.df) test2.trim_dates('10-01-2015', None) probas = clf.predict_proba(test2.df[feature_cols].values) test2.df['probas'] = 0 test2.df['probas'] = probas[:, 1] visualize.plot_ts_slider_highligther(test2.df, prob='probas') # # Train on all NSRDB data, test various freq of ground data # In[105]: train = cs_detection.ClearskyDetection(nsrdb.df) train.scale_model('GHI', 'Clearsky GHI pvlib', 'sky_status') utils.calc_all_window_metrics(train.df, 3, meas_col='GHI', model_col='Clearsky GHI pvlib', overwrite=True) clf.fit(train.df[feature_cols].values, train.df[target_cols].values.flatten()) # In[106]: bar = go.Bar(x=feature_cols, y=clf.feature_importances_)