Example #1
0
import importlib
import pandas as pd
import API.dalmp_Dayzer as dalmp

importlib.reload(dalmp)

realDalmp = dalmp.dalmp_real("PJMISO",
                             "51288",
                             "dalmp",
                             "2019-03-20",
                             "2019-04-23",
                             hourly=True)

realDalmp = pd.melt(realDalmp,
                    id_vars='flow_date',
                    value_vars=["he" + str(i) for i in range(1, 25)],
                    value_name="DALMP")

realDalmp = realDalmp.rename(columns={'variable': "hour", }) \
    .assign(hour=lambda df: df.hour.str[2:].astype(int)) \
    .sort_values(by=["flow_date", "hour"])

predictDalmp = dalmp.dalmp_pred("PJMISO",
                                "51288",
                                "dalmp",
                                "2019-03-20",
                                "2019-04-23",
                                hourly=True)

predictDalmp = predictDalmp.dropna() \
    .sort_values(by=["flow_date", "update_time"]) \
Example #2
0
import statsmodels.api as sm
from scipy.stats import shapiro
from statsmodels.graphics.gofplots import ProbPlot

#importlib.reload(dalmp)

product = products.prodInfo("PDA")
startdate = "2019-03-20"
enddate = "2019-05-13"
'''
Accuracy:
MAE,MAPE,MSE
'''
realDalmp = dalmp.dalmp_real(product.iso(),
                             product.nodeid(),
                             "dalmp",
                             startdate,
                             enddate,
                             hourly=True)

realDalmpAgg = dalmp.dalmp_real(product.iso(),
                                product.nodeid(),
                                "dalmp",
                                startdate,
                                enddate,
                                hourly=False)

realDalmp = pd.melt(realDalmp,
                    id_vars='flow_date',
                    value_vars=["he" + str(i) for i in range(1, 25)],
                    value_name="DALMP")
def getDalmpPred(method, timestamp, product, flow_date):
    iso = product.iso()
    node_id = product.nodeid()
    data_type = "dalmp"
    peak_type = product.peaktype()

    peak_adjust_value = 0.8795
    offpeak_adjust_value = 0.9036

    if method == 'DayzerHourly':
        dalmpPred = dalmp_dayzer.dalmp_pred_closest(iso=iso,
                                                    node_id=node_id,
                                                    data_type=data_type,
                                                    flow_date=flow_date,
                                                    deal_datetime=timestamp)

        dalmpPred = dalmpPred[
            "onpeak_pred"] if peak_type == "onpeak" else dalmpPred[
                "offpeak_pred"]

        return dalmpPred

    elif method == 'DayzerHourly_adjusted':

        dalmpPred = dalmp_dayzer.dalmp_pred_closest(iso=iso,
                                                    node_id=node_id,
                                                    data_type=data_type,
                                                    flow_date=flow_date,
                                                    deal_datetime=timestamp)

        dalmpPred = dalmpPred[
            "onpeak_pred"] * peak_adjust_value if peak_type == "onpeak" else dalmpPred[
                "offpeak_pred"] * offpeak_adjust_value
        return dalmpPred

    elif method == 'DayzerDaily':
        dalmpPred = dalmp_dayzer.dalmp_pred_vintage(iso=iso,
                                                    node_id=node_id,
                                                    data_type=data_type,
                                                    start_date=flow_date,
                                                    end_date=flow_date)
        dalmpPred = dalmpPred[
            "onpeak_pred"] if peak_type == "onpeak" else dalmpPred[
                "offpeak_pred"]
        return dalmpPred

    elif method == 'DayzerDaily_adjusted':
        dalmpPred = dalmp_dayzer.dalmp_pred_vintage(iso=iso,
                                                    node_id=node_id,
                                                    data_type=data_type,
                                                    start_date=flow_date,
                                                    end_date=flow_date)
        dalmpPred = dalmpPred[
            "onpeak_pred"] * peak_adjust_value if peak_type == "onpeak" else dalmpPred[
                "offpeak_pred"] * offpeak_adjust_value
        return dalmpPred

    elif method == 'allshort':
        return pd.DataFrame({'onpeak_pred': [-np.Inf]}).iloc[0]

    elif method == 'alllong':
        return pd.DataFrame({'onpeak_pred': [np.Inf]}).iloc[0]

    elif method == 'truevalue':
        dalmpPred = dalmp_dayzer.dalmp_real(iso=iso,
                                            node_id=node_id,
                                            data_type=data_type,
                                            start_date=flow_date,
                                            end_date=flow_date,
                                            hourly=False)
        dalmpPred = dalmpPred[
            "onpeak_avg"] if peak_type == "onpeak" else dalmpPred["offpeak_avg"]
        return dalmpPred
    elif method == 'LH':
        '''
        flow_date = "2019-01-11"
        end_date = "2019-01-01"
        iso = "PJMISO"
        node_id = "51288"
        node_name = "WESTERN HUB"
        product = products.prodInfo("PDA")
        '''
        dalmpPred = dalmp_likehour.dalmp_LH(iso, node_id, "WESTERN HUB",
                                            flow_date, flow_date)
        dalmpPred = dalmpPred[
            "onpeak_avg"] if peak_type == "onpeak" else dalmpPred["offpeak_avg"]
        return dalmpPred
    elif method == 'linearensemble':
        '''
        flow_date = "2019-01-15"
        end_date = "2019-01-01"
        iso = "PJMISO"
        node_id = "51288"
        node_name = "WESTERN HUB"
        product = products.prodInfo("PDA")
        peak_type = product.peaktype()
        data_type = "dalmp"
        '''
        LH = dalmp_likehour.dalmp_LH(iso, node_id, "WESTERN HUB", flow_date,
                                     flow_date)
        LHdalmpPred = LH["onpeak_avg"] if peak_type == "onpeak" else LH[
            "offpeak_avg"]

        dayzerdaily = dalmp_dayzer.dalmp_pred_vintage(iso=iso,
                                                      node_id=node_id,
                                                      data_type=data_type,
                                                      start_date=flow_date,
                                                      end_date=flow_date)
        dayzerdalmpPred = dayzerdaily[
            "onpeak_pred"] if peak_type == "onpeak" else dayzerdaily[
                "offpeak_pred"]

        if LHdalmpPred.shape[0] == 0:
            return LHdalmpPred
        elif dayzerdalmpPred.shape[0] == 0:
            return dayzerdalmpPred
        else:
            return (LHdalmpPred * 0.4 + dayzerdalmpPred * 0.6)

    elif method == 'MLemsemble':
        '''
        flow_date = "2019-01-11"
        end_date = "2019-01-01"
        iso = "PJMISO"
        node_id = "51288"
        node_name = "WESTERN HUB"
        product = products.prodInfo("PDA")
        '''
        LH = dalmp_likehour.dalmp_LH(iso, node_id, "WESTERN HUB", flow_date,
                                     flow_date)
        LHdalmpPred = LH["peak"] if peak_type == "onpeak" else LH["offpeak"]

        dayzerdaily = dalmp_dayzer.dalmp_pred_vintage(iso=iso,
                                                      node_id=node_id,
                                                      data_type=data_type,
                                                      start_date=flow_date,
                                                      end_date=flow_date)
        dayzerdalmpPred = dayzerdaily[
            "onpeak_pred"] if peak_type == "onpeak" else dayzerdaily[
                "offpeak_pred"]

        return LHdalmpPred * 0.4 + dayzerdalmpPred * 0.6

    else:
        print("Prediction method not found!")
        return None
                     }) \
    .query('is_block != "Y"') \
    .assign(flow_date=lambda df: pd.to_datetime(df.strip_begin).dt.to_pydatetime(),
            deal_time=lambda df: pd.to_datetime(df.deal_time).dt.to_pydatetime()) \
    .pipe(util.select,['flow_date','deal_time','order_price','volume','type']) \
    .query('flow_date >= @datePivotStart') \
    .query('flow_date <= @datePivotEnd') \
    .sort_values('deal_time')\
    .assign(flow_date=lambda df: df.flow_date.dt.strftime('%Y-%m-%d'))

peakType = "onpeak_avg" if product.peaktype() == "onpeak" else "offpeak_avg"

histDalmp = dalmp_dayzer.dalmp_real(product.iso(),
                                        product.nodeid(),
                                        "dalmp",
                                        dateStart,
                                        dateEnd)[["flow_date", peakType]] \
        .rename(columns={peakType: 'settleDalmp',
                         'flow_date': 'flowDate'})

histDalmp.flowDate = histDalmp.flowDate.astype(str)


predict_dalmp = btu.getDalmpPred("DayzerDaily_adjusted", "", product, dateStart)

iso = product.iso()
node_id = product.nodeid()
data_type = "dalmp"
peak_type = product.peaktype()

peak_adjust_value = 0.8795
def performance(product, tickDataFrame):
    if tickDataFrame.shape[0] == 0:
        return
    peakType = "onpeak_avg" if product.peaktype(
    ) == "onpeak" else "offpeak_avg"
    dailyResult = tickDataFrame.assign(type = np.where(tickDataFrame.volume > 0, "Long","Short")) \
                               .pipe(util.select,["flowDate", "type","volume", "notionalValue"])

    startFlowDate = min(dailyResult.flowDate)
    endFlowDate = max(dailyResult.flowDate)

    histDalmp = dalmp_dayzer.dalmp_real(product.iso(),
                                        product.nodeid(),
                                        "dalmp",
                                        startFlowDate,
                                        endFlowDate)[["flow_date", peakType]] \
        .rename(columns={peakType: 'settleDalmp',
                         'flow_date': 'flowDate'})

    histDalmp.flowDate = histDalmp.flowDate.astype(str)

    dayzerDalmp = dalmp_dayzer\
        .dalmp_pred_vintage(product.iso(),product.nodeid(),"dalmp",startFlowDate,endFlowDate)\
        .pipe(util.select,['flow_date','onpeak_pred' if product.peaktype() == "onpeak" else "offpeak_pred" ])\
        .rename(columns={'flow_date':'flowDate'})

    dayzerDalmp.flowDate = dayzerDalmp.flowDate.astype(str)

    likehourDalmp = dalmp_likehour\
        .dalmp_LH(product.iso(),product.nodeid(),"WESTERN HUB",startFlowDate,endFlowDate)\
        .pipe(util.select,['flow_date',peakType])\
        .rename(columns = {peakType:'likehour_pred',
                           'flow_date':'flowDate'})

    likehourDalmp.flowDate = likehourDalmp.flowDate.astype(str)

    dailyResult = dailyResult\
        .merge(histDalmp, how='left', on='flowDate')\
        .merge(likehourDalmp, how='left', on='flowDate')\
        .merge(dayzerDalmp,how='left', on='flowDate')\
        .assign(pred_delta = lambda df: df.likehour_pred - df.onpeak_pred)

    winRate = dailyResult.assign(type=np.where(dailyResult.volume > 0, "Long", "Short"),
                       PnL = lambda df: df.notionalValue + df.volume * df.settleDalmp,
                       win = np.where(dailyResult.notionalValue + dailyResult.volume * dailyResult.settleDalmp > 0, 1, 0)) \
        .pipe(util.select,["type","win"])\
        .groupby('type')\
        .agg(['sum','count'])\
        .reset_index()
    winRate.columns = ['type', 'win', 'total']
    winRate = winRate.assign(winrate=lambda df: df.win / df.total)
    winrateshort = 0 if winRate.query(
        "type=='Short'").winrate.shape[0] == 0 else float(
            winRate.query("type=='Short'").winrate)
    winratelong = 0 if winRate.query(
        "type=='Long'").winrate.shape[0] == 0 else float(
            winRate.query("type=='Long'").winrate)
    winRate = {'long': winratelong, 'short': winrateshort}
    dailyResult = dailyResult.assign(type = np.where(dailyResult.volume > 0, "Long", "Short"),
                       PnL = lambda df: df.notionalValue + df.volume * df.settleDalmp) \
        .groupby(['flowDate','type'])\
        .agg({'volume':['sum','count'],
              'PnL':['sum']}) \
        .reset_index()

    dailyResult.columns = ["flowDate", "type", "volume", "count", "PnL"]

    if set(dailyResult.type) == {'Short'}:
        dailyResult = dailyResult.rename(columns={"volume":"volume_short",
                                                  'count':'count_short',
                                                  'PnL':'PnL_short'})\
            .assign(volume_long = 0,
                    count_long = 0,
                    PnL_long = 0)\
            .pipe(util.select,["flowDate",
                           "volume_long",
                           "volume_short",
                           "count_long",
                           "count_short",
                           "PnL_long",
                           "PnL_short"])
    elif set(dailyResult.type) == {'Long'}:
        dailyResult = dailyResult.rename(columns={"volume": "volume_long",
                                                  'count': 'count_long',
                                                  'PnL': 'PnL_long'}) \
            .assign(volume_short=0,
                    count_short=0,
                    PnL_short=0) \
            .pipe(util.select, ["flowDate",
                                "volume_long",
                                "volume_short",
                                "count_long",
                                "count_short",
                                "PnL_long",
                                "PnL_short"])
    else:
        dailyResult = dailyResult.pivot(index='flowDate',
                                        columns='type').reset_index()

        dailyResult.columns = [
            "flowDate", "volume_long", "volume_short", "count_long",
            "count_short", "PnL_long", "PnL_short"
        ]

    return dailyResult, winRate