import importlib import pandas as pd import API.dalmp_Dayzer as dalmp importlib.reload(dalmp) realDalmp = dalmp.dalmp_real("PJMISO", "51288", "dalmp", "2019-03-20", "2019-04-23", hourly=True) realDalmp = pd.melt(realDalmp, id_vars='flow_date', value_vars=["he" + str(i) for i in range(1, 25)], value_name="DALMP") realDalmp = realDalmp.rename(columns={'variable': "hour", }) \ .assign(hour=lambda df: df.hour.str[2:].astype(int)) \ .sort_values(by=["flow_date", "hour"]) predictDalmp = dalmp.dalmp_pred("PJMISO", "51288", "dalmp", "2019-03-20", "2019-04-23", hourly=True) predictDalmp = predictDalmp.dropna() \ .sort_values(by=["flow_date", "update_time"]) \
import statsmodels.api as sm from scipy.stats import shapiro from statsmodels.graphics.gofplots import ProbPlot #importlib.reload(dalmp) product = products.prodInfo("PDA") startdate = "2019-03-20" enddate = "2019-05-13" ''' Accuracy: MAE,MAPE,MSE ''' realDalmp = dalmp.dalmp_real(product.iso(), product.nodeid(), "dalmp", startdate, enddate, hourly=True) realDalmpAgg = dalmp.dalmp_real(product.iso(), product.nodeid(), "dalmp", startdate, enddate, hourly=False) realDalmp = pd.melt(realDalmp, id_vars='flow_date', value_vars=["he" + str(i) for i in range(1, 25)], value_name="DALMP")
def getDalmpPred(method, timestamp, product, flow_date): iso = product.iso() node_id = product.nodeid() data_type = "dalmp" peak_type = product.peaktype() peak_adjust_value = 0.8795 offpeak_adjust_value = 0.9036 if method == 'DayzerHourly': dalmpPred = dalmp_dayzer.dalmp_pred_closest(iso=iso, node_id=node_id, data_type=data_type, flow_date=flow_date, deal_datetime=timestamp) dalmpPred = dalmpPred[ "onpeak_pred"] if peak_type == "onpeak" else dalmpPred[ "offpeak_pred"] return dalmpPred elif method == 'DayzerHourly_adjusted': dalmpPred = dalmp_dayzer.dalmp_pred_closest(iso=iso, node_id=node_id, data_type=data_type, flow_date=flow_date, deal_datetime=timestamp) dalmpPred = dalmpPred[ "onpeak_pred"] * peak_adjust_value if peak_type == "onpeak" else dalmpPred[ "offpeak_pred"] * offpeak_adjust_value return dalmpPred elif method == 'DayzerDaily': dalmpPred = dalmp_dayzer.dalmp_pred_vintage(iso=iso, node_id=node_id, data_type=data_type, start_date=flow_date, end_date=flow_date) dalmpPred = dalmpPred[ "onpeak_pred"] if peak_type == "onpeak" else dalmpPred[ "offpeak_pred"] return dalmpPred elif method == 'DayzerDaily_adjusted': dalmpPred = dalmp_dayzer.dalmp_pred_vintage(iso=iso, node_id=node_id, data_type=data_type, start_date=flow_date, end_date=flow_date) dalmpPred = dalmpPred[ "onpeak_pred"] * peak_adjust_value if peak_type == "onpeak" else dalmpPred[ "offpeak_pred"] * offpeak_adjust_value return dalmpPred elif method == 'allshort': return pd.DataFrame({'onpeak_pred': [-np.Inf]}).iloc[0] elif method == 'alllong': return pd.DataFrame({'onpeak_pred': [np.Inf]}).iloc[0] elif method == 'truevalue': dalmpPred = dalmp_dayzer.dalmp_real(iso=iso, node_id=node_id, data_type=data_type, start_date=flow_date, end_date=flow_date, hourly=False) dalmpPred = dalmpPred[ "onpeak_avg"] if peak_type == "onpeak" else dalmpPred["offpeak_avg"] return dalmpPred elif method == 'LH': ''' flow_date = "2019-01-11" end_date = "2019-01-01" iso = "PJMISO" node_id = "51288" node_name = "WESTERN HUB" product = products.prodInfo("PDA") ''' dalmpPred = dalmp_likehour.dalmp_LH(iso, node_id, "WESTERN HUB", flow_date, flow_date) dalmpPred = dalmpPred[ "onpeak_avg"] if peak_type == "onpeak" else dalmpPred["offpeak_avg"] return dalmpPred elif method == 'linearensemble': ''' flow_date = "2019-01-15" end_date = "2019-01-01" iso = "PJMISO" node_id = "51288" node_name = "WESTERN HUB" product = products.prodInfo("PDA") peak_type = product.peaktype() data_type = "dalmp" ''' LH = dalmp_likehour.dalmp_LH(iso, node_id, "WESTERN HUB", flow_date, flow_date) LHdalmpPred = LH["onpeak_avg"] if peak_type == "onpeak" else LH[ "offpeak_avg"] dayzerdaily = dalmp_dayzer.dalmp_pred_vintage(iso=iso, node_id=node_id, data_type=data_type, start_date=flow_date, end_date=flow_date) dayzerdalmpPred = dayzerdaily[ "onpeak_pred"] if peak_type == "onpeak" else dayzerdaily[ "offpeak_pred"] if LHdalmpPred.shape[0] == 0: return LHdalmpPred elif dayzerdalmpPred.shape[0] == 0: return dayzerdalmpPred else: return (LHdalmpPred * 0.4 + dayzerdalmpPred * 0.6) elif method == 'MLemsemble': ''' flow_date = "2019-01-11" end_date = "2019-01-01" iso = "PJMISO" node_id = "51288" node_name = "WESTERN HUB" product = products.prodInfo("PDA") ''' LH = dalmp_likehour.dalmp_LH(iso, node_id, "WESTERN HUB", flow_date, flow_date) LHdalmpPred = LH["peak"] if peak_type == "onpeak" else LH["offpeak"] dayzerdaily = dalmp_dayzer.dalmp_pred_vintage(iso=iso, node_id=node_id, data_type=data_type, start_date=flow_date, end_date=flow_date) dayzerdalmpPred = dayzerdaily[ "onpeak_pred"] if peak_type == "onpeak" else dayzerdaily[ "offpeak_pred"] return LHdalmpPred * 0.4 + dayzerdalmpPred * 0.6 else: print("Prediction method not found!") return None
}) \ .query('is_block != "Y"') \ .assign(flow_date=lambda df: pd.to_datetime(df.strip_begin).dt.to_pydatetime(), deal_time=lambda df: pd.to_datetime(df.deal_time).dt.to_pydatetime()) \ .pipe(util.select,['flow_date','deal_time','order_price','volume','type']) \ .query('flow_date >= @datePivotStart') \ .query('flow_date <= @datePivotEnd') \ .sort_values('deal_time')\ .assign(flow_date=lambda df: df.flow_date.dt.strftime('%Y-%m-%d')) peakType = "onpeak_avg" if product.peaktype() == "onpeak" else "offpeak_avg" histDalmp = dalmp_dayzer.dalmp_real(product.iso(), product.nodeid(), "dalmp", dateStart, dateEnd)[["flow_date", peakType]] \ .rename(columns={peakType: 'settleDalmp', 'flow_date': 'flowDate'}) histDalmp.flowDate = histDalmp.flowDate.astype(str) predict_dalmp = btu.getDalmpPred("DayzerDaily_adjusted", "", product, dateStart) iso = product.iso() node_id = product.nodeid() data_type = "dalmp" peak_type = product.peaktype() peak_adjust_value = 0.8795
def performance(product, tickDataFrame): if tickDataFrame.shape[0] == 0: return peakType = "onpeak_avg" if product.peaktype( ) == "onpeak" else "offpeak_avg" dailyResult = tickDataFrame.assign(type = np.where(tickDataFrame.volume > 0, "Long","Short")) \ .pipe(util.select,["flowDate", "type","volume", "notionalValue"]) startFlowDate = min(dailyResult.flowDate) endFlowDate = max(dailyResult.flowDate) histDalmp = dalmp_dayzer.dalmp_real(product.iso(), product.nodeid(), "dalmp", startFlowDate, endFlowDate)[["flow_date", peakType]] \ .rename(columns={peakType: 'settleDalmp', 'flow_date': 'flowDate'}) histDalmp.flowDate = histDalmp.flowDate.astype(str) dayzerDalmp = dalmp_dayzer\ .dalmp_pred_vintage(product.iso(),product.nodeid(),"dalmp",startFlowDate,endFlowDate)\ .pipe(util.select,['flow_date','onpeak_pred' if product.peaktype() == "onpeak" else "offpeak_pred" ])\ .rename(columns={'flow_date':'flowDate'}) dayzerDalmp.flowDate = dayzerDalmp.flowDate.astype(str) likehourDalmp = dalmp_likehour\ .dalmp_LH(product.iso(),product.nodeid(),"WESTERN HUB",startFlowDate,endFlowDate)\ .pipe(util.select,['flow_date',peakType])\ .rename(columns = {peakType:'likehour_pred', 'flow_date':'flowDate'}) likehourDalmp.flowDate = likehourDalmp.flowDate.astype(str) dailyResult = dailyResult\ .merge(histDalmp, how='left', on='flowDate')\ .merge(likehourDalmp, how='left', on='flowDate')\ .merge(dayzerDalmp,how='left', on='flowDate')\ .assign(pred_delta = lambda df: df.likehour_pred - df.onpeak_pred) winRate = dailyResult.assign(type=np.where(dailyResult.volume > 0, "Long", "Short"), PnL = lambda df: df.notionalValue + df.volume * df.settleDalmp, win = np.where(dailyResult.notionalValue + dailyResult.volume * dailyResult.settleDalmp > 0, 1, 0)) \ .pipe(util.select,["type","win"])\ .groupby('type')\ .agg(['sum','count'])\ .reset_index() winRate.columns = ['type', 'win', 'total'] winRate = winRate.assign(winrate=lambda df: df.win / df.total) winrateshort = 0 if winRate.query( "type=='Short'").winrate.shape[0] == 0 else float( winRate.query("type=='Short'").winrate) winratelong = 0 if winRate.query( "type=='Long'").winrate.shape[0] == 0 else float( winRate.query("type=='Long'").winrate) winRate = {'long': winratelong, 'short': winrateshort} dailyResult = dailyResult.assign(type = np.where(dailyResult.volume > 0, "Long", "Short"), PnL = lambda df: df.notionalValue + df.volume * df.settleDalmp) \ .groupby(['flowDate','type'])\ .agg({'volume':['sum','count'], 'PnL':['sum']}) \ .reset_index() dailyResult.columns = ["flowDate", "type", "volume", "count", "PnL"] if set(dailyResult.type) == {'Short'}: dailyResult = dailyResult.rename(columns={"volume":"volume_short", 'count':'count_short', 'PnL':'PnL_short'})\ .assign(volume_long = 0, count_long = 0, PnL_long = 0)\ .pipe(util.select,["flowDate", "volume_long", "volume_short", "count_long", "count_short", "PnL_long", "PnL_short"]) elif set(dailyResult.type) == {'Long'}: dailyResult = dailyResult.rename(columns={"volume": "volume_long", 'count': 'count_long', 'PnL': 'PnL_long'}) \ .assign(volume_short=0, count_short=0, PnL_short=0) \ .pipe(util.select, ["flowDate", "volume_long", "volume_short", "count_long", "count_short", "PnL_long", "PnL_short"]) else: dailyResult = dailyResult.pivot(index='flowDate', columns='type').reset_index() dailyResult.columns = [ "flowDate", "volume_long", "volume_short", "count_long", "count_short", "PnL_long", "PnL_short" ] return dailyResult, winRate