Python makeUsefulDf Examples

Programming Language: Python

Namespace/Package Name: omf.forecast

Method/Function: makeUsefulDf

Examples at hotexamples.com: 7

Python makeUsefulDf - 7 examples found. These are the top rated real world Python examples of omf.forecast.makeUsefulDf extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: nn_storagePeakShave.py Project: sammatuba/omf

def forecastWork(modelDir, ind):
    ''' Run the model in its directory.'''

    (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \
     [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')]
    demandCharge = float(ind['demandCharge'])
    retailCost = float(ind['retailCost'])

    battEff = float(ind.get("batteryEfficiency")) / 100.0
    dodFactor = float(ind.get('dodFactor')) / 100.0
    projYears = int(ind.get('projYears'))
    batteryCycleLife = int(ind.get('batteryCycleLife'))
    battCapacity = cellQuantity * float(ind['cellCapacity']) * dodFactor

    o = {}

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['histCurve'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates'])
        df['month'] = df['dates'].dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
        assert df.shape[0] >= 26280  # must be longer than 3 years
        assert df.shape[1] == 5
    except:
        raise Exception("CSV file is incorrect format.")

    confidence = float(ind['confidence']) / 100

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    predictions = fc.neural_net_predictions(all_X, all_y)

    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]
    weather = df['tempc'][-8760:]
    dailyWeatherPredictions = [
        weather[i:i + 24] for i in range(0, len(weather), 24)
    ]
    month = df['month'][-8760:]

    dispatched = [False] * 365
    # decide to implement VBAT every day for a year
    VB_power, VB_energy = [], []
    for i, (load24, temp24, m) in enumerate(
            zip(dailyLoadPredictions, dailyWeatherPredictions, month)):
        peak = max(load24)
        if fc.shouldDispatchPS(peak, m, df, confidence):
            dispatched[i] = True
            vbp, vbe = fc.pulp24hrBattery(load24, dischargeRate * cellQuantity,
                                          cellCapacity * cellQuantity, battEff)
            VB_power.extend(vbp)
            VB_energy.extend(vbe)
        else:
            VB_power.extend([0] * 24)
            VB_energy.extend([0] * 24)

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #
    o['predictedLoad'] = predictions
    o['trainAccuracy'] = 0  #round(model.score(X_train, y_train) * 100, 2)
    o['testAccuracy'] = 0  #round(model.score(X_test, y_test) * 100, 2)

    # PRECISION AND RECALL
    maxDays = []
    for month in range(1, 13):
        test = df[df['month'] == month]
        maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear'])

    shouldHaveDispatched = [False] * 365
    for day in maxDays:
        shouldHaveDispatched[day] = True

    truePositive = len([
        b
        for b in [i and j for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falsePositive = len([
        b for b in
        [i and (not j) for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falseNegative = len([
        b for b in [(not i) and j
                    for (i, j) in zip(dispatched, shouldHaveDispatched)] if b
    ])
    o['precision'] = round(
        truePositive / float(truePositive + falsePositive) * 100, 2)
    o['recall'] = round(
        truePositive / float(truePositive + falseNegative) * 100, 2)
    o['number_of_dispatches'] = len([i for i in dispatched if i])
    o['MAE'] = round(
        sum([
            abs(l - m) / m * 100
            for l, m in zip(predictions, list(all_y[-8760:]))
        ]) / 8760., 2)

    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(all_y[-8760:])
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, VB_power)]
    peakDemandAdj = [max(demandAdj[s:f]) for s, f in monthHours]
    discharges = [f if f < 0 else 0 for f in VB_power]

    # Monthly Cost Comparison Table
    o['monthlyDemand'] = peakDemand
    o['monthlyDemandRed'] = peakDemandAdj
    o['ps'] = [p - s for p, s in zip(peakDemand, peakDemandAdj)]
    o['benefitMonthly'] = [x * demandCharge for x in o['ps']]

    # Demand Before and After Storage Graph
    o['demand'] = demand
    o['demandAfterBattery'] = demandAdj
    o['batteryDischargekW'] = VB_power
    o['batteryDischargekWMax'] = max(VB_power)

    batteryCycleLife = float(ind['batteryCycleLife'])
    # Battery State of Charge Graph
    # Turn dc's SoC into a percentage, with dodFactor considered.

    o['batterySoc'] = SoC = [100 - (e / battCapacity * 100) for e in VB_energy]

    # Estimate number of cyles the battery went through. Sums the percent of SoC.
    cycleEquivalents = sum([
        SoC[i] - SoC[i + 1]
        for i, x in enumerate(SoC[:-1]) if SoC[i + 1] < SoC[i]
    ]) / 100.0
    o['cycleEquivalents'] = cycleEquivalents
    o['batteryLife'] = batteryCycleLife / cycleEquivalents

    # Cash Flow Graph
    # inserting battery efficiency only into the cashflow calculation
    # cashFlowCurve is $ in from peak shaving minus the cost to recharge the battery every day of the year
    cashFlowCurve = [sum(o['ps']) * demandCharge for year in range(projYears)]
    cashFlowCurve.insert(0, -1 * cellCost *
                         cellQuantity)  # insert initial investment
    # simplePayback is also affected by the cost to recharge the battery every day of the year
    o['SPP'] = (cellCost * cellQuantity) / (sum(o['ps']) * demandCharge)
    o['netCashflow'] = cashFlowCurve
    o['cumulativeCashflow'] = [
        sum(cashFlowCurve[:i + 1]) for i, d in enumerate(cashFlowCurve)
    ]
    o['NPV'] = npv(float(ind['discountRate']), cashFlowCurve)

    battCostPerCycle = cellQuantity * cellCost / batteryCycleLife
    lcoeTotCost = cycleEquivalents * retailCost + battCostPerCycle * cycleEquivalents
    o['LCOE'] = lcoeTotCost / (cycleEquivalents * battCapacity)

    # Other
    o['startDate'] = '2011-01-01'  # dc[0]['datetime'].isoformat()
    o['stderr'] = ''
    # Seemingly unimportant. Ask permission to delete.
    o['stdout'] = 'Success'
    o['months'] = [
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
        "Nov", "Dec"
    ]

    return o

Example #2

Show file

File: f_vbatDispatch.py Project: sammatuba/omf

def workForecast(modelDir, ind):
    ''' Run the model in its directory.'''
    o = {}

    # Grab data from CSV,
    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['histCurve'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates'])
        df['month'] = df['dates'].dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
        assert df.shape[0] >= 26280  # must be longer than 3 years
        assert df.shape[1] == 5
    except:
        raise Exception("CSV file is incorrect format.")

    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    X_train, y_train = all_X[:-8760], all_y[:-8760]
    clf = linear_model.SGDRegressor(max_iter=10000, tol=1e-4)
    clf.fit(X_train, y_train)

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    X_test, y_test = all_X[-8760:], all_y[-8760:]
    predictions = clf.predict(X_test)
    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]

    P_lower, P_upper, E_UL = vbat24hr(ind, df['tempc'][-8760:])
    dailyPl = [P_lower[i:i + 24] for i in range(0, len(P_lower), 24)]
    dailyPu = [P_upper[i:i + 24] for i in range(0, len(P_upper), 24)]
    dailyEu = [E_UL[i:i + 24] for i in range(0, len(E_UL), 24)]

    vbp, vbe = [], []
    dispatched_d = [False] * 365
    # Decide what days to dispatch
    zipped = zip(dailyLoadPredictions, df['month'][-8760:], dailyPl, dailyPu,
                 dailyEu)
    for i, (load, m, pl, pu, eu) in enumerate(zipped):
        peak = max(load)
        if fc.shouldDispatchPS(peak, m, df, float(ind['confidence']) / 100):
            dispatched_d[i] = True
            p, e = fc.pulp24hrVbat(ind, load, pl, pu, eu)
            vbp.extend(p)
            vbe.extend(e)
        else:
            vbp.extend([0] * 24)
            vbe.extend([0] * 24)

    ### TESTING FOR ACCURACY ###
    assert len(dailyPl) == 365
    assert all([len(i) == 24 for i in dailyPl])

    VB_power, VB_energy = vbp, vbe

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #

    o['predictedLoad'] = list(clf.predict(X_test))
    o['trainAccuracy'] = round(clf.score(X_train, y_train) * 100, 2)
    o['testAccuracy'] = round(clf.score(X_test, y_test) * 100, 2)

    # PRECISION AND RECALL
    maxDays = []
    for month in range(1, 13):
        test = df[df['month'] == month]
        maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear'])

    shouldHaveDispatched = [False] * 365
    for day in maxDays:
        shouldHaveDispatched[day] = True

    truePositive = len([
        b for b in
        [i and j for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b
    ])
    falsePositive = len([
        b for b in
        [i and (not j) for (i, j) in zip(dispatched_d, shouldHaveDispatched)]
        if b
    ])
    falseNegative = len([
        b for b in [(not i) and j
                    for (i, j) in zip(dispatched_d, shouldHaveDispatched)] if b
    ])
    o['confidence'] = ind['confidence']
    o['precision'] = round(
        truePositive / float(truePositive + falsePositive) * 100, 2)
    o['recall'] = round(
        truePositive / float(truePositive + falseNegative) * 100, 2)
    o['number_of_dispatches'] = len([i for i in dispatched_d if i])
    o['MAE'] = round(
        sum([abs(l - m) / m * 100
             for l, m in zip(predictions, list(y_test))]) / 8760., 2)

    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    o['VBpower'], o['VBenergy'] = list(VB_power), list(VB_energy)

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(y_test)
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    energyMonthly = [sum(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, o['VBpower'])]
    peakAdjustedDemand = [max(demandAdj[s:f]) for s, f in monthHours]
    energyAdjustedMonthly = [sum(demandAdj[s:f]) for s, f in monthHours]

    o['demand'] = demand
    o['peakDemand'] = peakDemand
    o['energyMonthly'] = energyMonthly
    o['demandAdjusted'] = demandAdj
    o['peakAdjustedDemand'] = peakAdjustedDemand
    o['energyAdjustedMonthly'] = energyAdjustedMonthly

    cellCost = float(ind['unitDeviceCost']) * float(ind['number_devices'])
    eCost = float(ind['electricityCost'])
    dCharge = float(ind['demandChargeCost'])

    o['VBdispatch'] = [dal - d for dal, d in zip(demandAdj, demand)]
    o['energyCost'] = [em * eCost for em in energyMonthly]
    o['energyCostAdjusted'] = [eam * eCost for eam in energyAdjustedMonthly]
    o['demandCharge'] = [peak * dCharge for peak in peakDemand]
    o['demandChargeAdjusted'] = [
        pad * dCharge for pad in o['peakAdjustedDemand']
    ]
    o['totalCost'] = [
        ec + dcm for ec, dcm in zip(o['energyCost'], o['demandCharge'])
    ]
    o['totalCostAdjusted'] = [
        eca + dca
        for eca, dca in zip(o['energyCostAdjusted'], o['demandChargeAdjusted'])
    ]
    o['savings'] = [
        tot - tota for tot, tota in zip(o['totalCost'], o['totalCostAdjusted'])
    ]

    annualEarnings = sum(o['savings']) - float(ind['unitUpkeepCost']) * float(
        ind['number_devices'])
    cashFlowList = [annualEarnings] * int(ind['projectionLength'])
    cashFlowList.insert(0, -1 * cellCost)

    o['NPV'] = np.npv(float(ind['discountRate']) / 100, cashFlowList)
    o['SPP'] = cellCost / annualEarnings
    o['netCashflow'] = cashFlowList
    o['cumulativeCashflow'] = [
        sum(cashFlowList[:i + 1]) for i, d in enumerate(cashFlowList)
    ]

    o['stdout'] = 'Success'
    return o

Example #3

Show file

def work(modelDir, inputDict):
    """ Run the model in its directory."""
    outData = {}
    rawData = []
    actual = []

    # write input file to modelDir sans carriage returns
    with open(pJoin(modelDir, "demandTemp.csv"), "w") as demandTempFile:
        demandTempFile.write(inputDict["demandTemp"].replace("\r", ""))

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(inputDict['nn'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
        assert df.shape[0] >= 26280  # must be longer than 3 years
        if 'dates' not in df.columns:
            df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x[
                'month']), int(x['day']), int(x['hour'])),
                                   axis=1)
    except:
        raise Exception("Neural Net CSV file is incorrect format.")

    # neural net time
    all_X = loadForecast.makeUsefulDf(df)
    all_y = df["load"]
    nn_pred, nn_accuracy = loadForecast.neural_net_predictions(all_X, all_y)
    outData["actual_nn"] = df['load'][-8760:].tolist()

    # read it in as a list of lists
    try:
        with open(pJoin(modelDir, "demandTemp.csv")) as inFile:
            df = pd.read_csv(inFile, header=None)
            df.columns = ["load", "tempc"]
            df["dates"] = pd.date_range(start=inputDict["simStartDate"],
                                        freq="H",
                                        periods=df.shape[0])
            print df.shape[0]
    except ZeroDivisionError:
        errorMessage = "CSV file is incorrect format. Please see valid format definition at <a target='_blank' href = 'https://github.com/dpinney/omf/wiki/Models-~-storagePeakShave#demand-file-csv-format'>\nOMF Wiki storagePeakShave - Demand File CSV Format</a>"
        raise Exception(errorMessage)

    rawData = df[["load", "tempc"]].fillna(0).values.tolist()
    del df
    """
	# None -> 0, float-> string
	for i in range(len(rawData)):
		rawData[i] = [a if a else 0 for a in rawData[i]]
	rawData = list(np.float_(rawData))
	"""

    # populate actual list
    for x in range(len(rawData)):
        actual.append(float(rawData[x][0]))

    (forecasted,
     MAPE) = loadForecast.rollingDylanForecast(rawData,
                                               float(inputDict["upBound"]),
                                               float(inputDict["lowBound"]))

    (exp, exp_MAPE) = loadForecast.exponentiallySmoothedForecast(
        rawData, float(inputDict["alpha"]), float(inputDict["beta"]))

    # parse json params for nextDayPeakKatrina
    try:
        params = json.loads(inputDict.get("katSpec", "{}"))
    except ValueError:
        params = {}

    pred_demand = loadForecast.nextDayPeakKatrinaForecast(
        rawData, inputDict["simStartDate"], modelDir, params)
    pred_demand = np.transpose(np.array(pred_demand)).tolist()

    # zucc it up
    prophet_partitions = int(inputDict.get("prophet", 0))
    if prophet_partitions > 1:
        prophet, prophet_low, prophet_high = loadForecast.prophetForecast(
            rawData, inputDict["simStartDate"], modelDir, inputDict["prophet"])

        # write our outData
    outData["startDate"] = inputDict["simStartDate"]
    outData["actual"] = actual
    outData["forecasted"] = forecasted
    outData["doubleExp"] = exp
    outData["neuralNet"] = nn_pred
    outData["MAPE"] = "%0.2f%%" % (MAPE * 100)
    outData["MAPE_exp"] = "%0.2f%%" % (exp_MAPE * 100)
    outData["MAPE_nn"] = "%0.2f%%" % nn_accuracy["test"]
    outData["peakDemand"] = pred_demand
    if prophet_partitions > 1:
        outData["prophet"] = prophet
        outData["prophetLow"] = prophet_low
        outData["prophetHigh"] = prophet_high
    return outData

Example #4

Show file

File: storagePeakShave.py Project: akbarnes/omf

def forecastWork(modelDir, ind):
    import tensorflow as tf
    ''' Run the model in its directory.'''
    (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \
     [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')]
    demandCharge = float(ind['demandCharge'])
    retailCost = float(ind['retailCost'])

    battEff = float(ind.get("batteryEfficiency")) / 100.0
    dodFactor = float(ind.get('dodFactor')) / 100.0
    projYears = int(ind.get('projYears'))
    batteryCycleLife = int(ind.get('batteryCycleLife'))
    battCapacity = cellQuantity * float(ind['cellCapacity']) * dodFactor

    o = {}

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['histCurve'].replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
        assert df.shape[0] >= 26280  # must be longer than 3 years
        if df.shape[1] == 6:
            df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x[
                'month']), int(x['day']), int(x['hour'])),
                                   axis=1)
        else:
            df = pd.read_csv(pJoin(modelDir, 'hist.csv'),
                             parse_dates=['dates'])
            df['month'] = df.dates.dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
    except:
        raise Exception("CSV file is incorrect format.")

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    if ind['newModel'] == 'True':
        model = None
    else:
        with open(pJoin(modelDir, 'neural_net.h5'), 'wb') as f:
            f.write(ind['model'].decode('base64'))
        model = tf.keras.models.load_model(pJoin(modelDir, 'neural_net.h5'))
        # model = tf.keras.models.load_model(ind['model'])
    predictions, accuracy = fc.neural_net_predictions(
        all_X,
        all_y,
        epochs=int(ind['epochs']),
        model=model,
        save_file=pJoin(modelDir, 'neural_net_model.h5'))

    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]
    weather = df['tempc'][-8760:]
    dailyWeatherPredictions = [
        weather[i:i + 24] for i in range(0, len(weather), 24)
    ]

    # decide to implement VBAT every day for a year
    VB_power, VB_energy = [], []
    for i, (load24, temp24) in enumerate(
            zip(dailyLoadPredictions, dailyWeatherPredictions)):
        vbp, vbe = pulp24hrBattery(load24, dischargeRate * cellQuantity,
                                   cellCapacity * cellQuantity, battEff)
        VB_power.extend(vbp)
        VB_energy.extend(vbe)

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #
    o['predictedLoad'] = predictions
    o['trainAccuracy'] = 100 - round(accuracy['train'], 1)
    o['testAccuracy'] = 100 - round(accuracy['test'], 1)
    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(df['load'][-8760:])
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, VB_power)]
    peakDemandAdj = [max(demandAdj[s:f]) for s, f in monthHours]

    # Monthly Cost Comparison Table
    o['monthlyDemand'] = peakDemand
    o['monthlyDemandRed'] = peakDemandAdj
    o['ps'] = [p - s for p, s in zip(peakDemand, peakDemandAdj)]
    o['benefitMonthly'] = [x * demandCharge for x in o['ps']]

    # Demand Before and After Storage Graph
    o['demand'] = demand
    o['demandAfterBattery'] = demandAdj
    o['batteryDischargekW'] = VB_power
    o['batteryDischargekWMax'] = max(VB_power)

    batteryCycleLife = float(ind['batteryCycleLife'])
    o['batterySoc'] = SoC = [100 - (e / battCapacity * 100) for e in VB_energy]
    cycleEquivalents = sum([
        SoC[i] - SoC[i + 1]
        for i, x in enumerate(SoC[:-1]) if SoC[i + 1] < SoC[i]
    ]) / 100.0
    o['cycleEquivalents'] = cycleEquivalents
    o['batteryLife'] = batteryCycleLife / (cycleEquivalents + 10)

    # Cash Flow Graph
    cashFlowCurve = [sum(o['ps']) * demandCharge for year in range(projYears)]
    cashFlowCurve.insert(0, -1 * cellCost *
                         cellQuantity)  # insert initial investment
    o['SPP'] = (cellCost * cellQuantity) / (sum(o['ps']) * demandCharge)
    o['netCashflow'] = cashFlowCurve
    o['cumulativeCashflow'] = [
        sum(cashFlowCurve[:i + 1]) for i, d in enumerate(cashFlowCurve)
    ]
    o['NPV'] = npv(float(ind['discountRate']), cashFlowCurve)

    battCostPerCycle = cellQuantity * cellCost / batteryCycleLife
    lcoeTotCost = cycleEquivalents * retailCost + battCostPerCycle * cycleEquivalents
    o['LCOE'] = lcoeTotCost / (cycleEquivalents * battCapacity + 10)

    model

    # Other
    o['startDate'] = '2011-01-01'
    o['stderr'] = ''
    o['stdout'] = 'Success'

    return o

Example #5

Show file

File: forecastTool.py Project: sammatuba/omf

def work(modelDir, ind):
	''' Model processing done here. '''
	epochs = int(ind['epochs'])
	o = {}  # See bottom of file for out's structure

	try:
		with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
			f.write(ind['histCurve'].replace('\r', ''))
		df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
		assert df.shape[0] >= 26280, 'At least 3 years of data is required'

		if 'dates' not in df.columns:
			df['dates'] = df.apply(
				lambda x: dt(
					int(x['year']), 
					int(x['month']), 
					int(x['day']), 
					int(x['hour'])), 
				axis=1
			)
	except:
		raise Exception("Load CSV file is incorrect format.")

	try:
		weather = [float(i) for i in ind['tempCurve'].split('\n') if i != '']
		assert len(weather) == 72, "weather csv in wrong format"
	except:
		raise Exception(ind['tempCurve'])

	# ---------------------- MAKE PREDICTIONS ------------------------------- #

	df = df.sort_values('dates')
	# df = autofill(df)
	d = dict(df.groupby(df.dates.dt.date)['dates'].count())
	df = df[df['dates'].dt.date.apply(lambda x: d[x] == 24)] # find all non-24

	df, tomorrow = lf.add_day(df, weather[:24])
	all_X, all_y = lf.makeUsefulDf(df, structure="3D")

	if ind['newModel'] == 'False':
		for day in ['one_day_model', 'two_day_model', 'three_day_model']:
			with open(pJoin(modelDir, ind[day+'_filename']), 'wb') as f:
					f.write(base64.standard_b64decode(ind[day]))
	
	tomorrow_load, model, tomorrow_accuracy = lf.neural_net_next_day(
		all_X, all_y, 
		epochs=epochs, save_file=pJoin(modelDir, 'one_day_model.h5'),
		model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['one_day_model_filename']))),
		structure="3D"
	)

	o['tomorrow_load'] = tomorrow_load
	o['month_start'] = dt(tomorrow.year, tomorrow.month, 1).strftime("%A, %B %-d, %Y")
	o['forecast_start'] = tomorrow.strftime("%A, %B %-d, %Y")
	
	# second day
	df, second_day = lf.add_day(df, weather[24:48])
	if second_day.month == tomorrow.month:
		all_X, all_y = lf.makeUsefulDf(df, hours_prior=48, noise=5, structure="3D")
		two_day_predicted_load, two_day_model, two_day_load_accuracy = lf.neural_net_next_day(
			all_X, all_y, 
			epochs=epochs, hours_prior=48, 
			save_file=pJoin(modelDir, 'two_day_model.h5'),
			model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['two_day_model_filename']))),
			structure="3D"
		)
		two_day_peak = max(two_day_predicted_load)

		# third day
		df, third_day = lf.add_day(df, weather[48:72])
		if third_day.month == tomorrow.month:
			all_X, all_y = lf.makeUsefulDf(df, hours_prior=72, noise=15, structure="3D")
			three_day_predicted_load, three_day_model, three_day_load_accuracy = lf.neural_net_next_day(
				all_X, all_y, 
				epochs=epochs, hours_prior=72, 
				save_file=pJoin(modelDir, 'three_day_model.h5'),
				model=(None if ind['newModel'] == 'True' else tf.keras.models.load_model(pJoin(modelDir, ind['three_day_model_filename']))),
				structure="3D"
			)
			three_day_peak = max(three_day_predicted_load)
		else:
			three_day_peak = 0
			three_day_load_accuracy = {'test': np.nan, 'train': np.nan}
			three_day_predicted_load = []
			
	else:
		two_day_peak = 0
		two_day_load_accuracy = {'test': np.nan, 'train': np.nan}
		two_day_predicted_load = []
		three_day_peak = 0
		three_day_load_accuracy = {'test': np.nan, 'train': np.nan}
		three_day_predicted_load = []

	tomorrow_peak = max(tomorrow_load)
	m = df[(df['month'] == tomorrow.month) & (df['year'] != tomorrow.year) ]
	hourly = m
	m = m.groupby(m.dates.dt.date)['load'].max()
	o['quantile'] = round(m[m < tomorrow_peak].shape[0]/float(m.shape[0])*100, 2)
	o['predicted_peak'] = [m.median(), highest_peak_this_month(df, tomorrow), tomorrow_peak, two_day_peak, three_day_peak]
	o['predicted_peak_limits'] = [
		[m.min(), m.max()],
		[0, 0],
		[tomorrow_peak*(1 + tomorrow_accuracy['test']*.01), tomorrow_peak*(1 - tomorrow_accuracy['test']*.01)],
		[two_day_peak*(1 + two_day_load_accuracy['test']*.01), two_day_peak*(1 - two_day_load_accuracy['test']*.01)],
		[three_day_peak*(1 + three_day_load_accuracy['test']*.01), three_day_peak*(1 - three_day_load_accuracy['test']*.01)]
	]
	m = hourly
	previous_months = [{
		'year': y,
		'load': m[m['year'] == y]['load'].tolist()
	} for y in m.year.unique()]

	# ---------------------- FORMAT FOR DISPLAY ------------------------------- #
	l = []
	for d in previous_months:
		l.append({
			'name': d['year'].item(),
			'color': 'lightgrey',
			'data': d['load'],
			'type': 'line',
			'opacity': .05,
			'enableMouseTracking': False
		})

	all_load = tomorrow_load + two_day_predicted_load + three_day_predicted_load
	load_leading_up = df[(df['month'] == tomorrow.month) & (df['year'] == tomorrow.year)]['load'].tolist()
	l.append({'name': tomorrow.year, 'color': 'black', 'data': load_leading_up[:-72], 'type': 'line'})
	l.append({'name':'forecast','color':'blue', 'data': [None]*(len(load_leading_up) - 72) + all_load, 'type': 'line', 'zIndex': 5 })

	# add uncertainty
	uncertainty = [2.02, 2.41, 2.78, 2.91, 3.48, 4.02, 4.2, 3.96, 3.63, 3.68, 4.19, 4.45, 4.77, 4.94, 4.79, 5.22, 5.58, 5.32, 5.44, 4.85, 5.05, 5.51, 5.71, 5.96, 7.84, 8.44, 8.96, 9.06, 8.81, 8.53, 8.4, 8.06, 7.33, 6.5, 6.15, 6.23, 6.43, 6.34, 6.84, 6.76, 7.17, 7.2, 6.93, 6.83, 6.71, 7.39, 8.49, 9.24, 9.36, 10.64, 9.95, 9.4, 9.6, 9.28, 8.52, 8.78, 8.71, 8.59, 8.34, 8.81, 9.12, 9.53, 10.3, 10.67, 10.89, 10.47, 9.67, 8.95, 8.79, 9.18, 9.92, 10.25]
	print(tomorrow_accuracy['test'])
	l.append({
		'name': 'uncertainty',
		'color': '#b3b3ff',
		'data': [None]*(len(load_leading_up) - 72) + [x*u*.01*2 for u, x in zip(uncertainty, all_load)],
	})

	l.append({
		'id': 'transparent',
		'color': 'rgba(255,255,255,0)',
		'data': [None]*(len(load_leading_up) - 72) + [x*(1-u*.01) for u, x in zip(uncertainty, all_load)]
	})
	

	o['previous_months'] = l

	o['load_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['load_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['tomorrow_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['tomorrow_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['two_day_peak_train_accuracy'] = round(two_day_load_accuracy['train'], 2)
	o['two_day_peak_test_accuracy'] = round(two_day_load_accuracy['test'], 2)
	o['three_day_peak_train_accuracy'] = round(three_day_load_accuracy['train'], 2)
	o['three_day_peak_test_accuracy'] = round(three_day_load_accuracy['test'], 2)


	o['peak_percent_chance'] = peak_likelihood(
		hist=highest_peak_this_month(df[:-48], tomorrow), 
		tomorrow=tomorrow_peak,
		tomorrow_std=tomorrow_peak*tomorrow_accuracy['test']*.01,
		two_day=two_day_peak,
		two_day_std=two_day_peak*two_day_load_accuracy['test']*.01,
		three_day=three_day_peak,
		three_day_std=three_day_peak*three_day_load_accuracy['test']*.01
	)

	o['stderr'] = ''

	with open(pJoin(modelDir,'one_day_model.h5'), 'rb') as f:
		one_day_model = base64.standard_b64encode(f.read()).decode()
	with open(pJoin(modelDir,'two_day_model.h5'), 'rb') as f:
		two_day_model = base64.standard_b64encode(f.read()).decode()
	with open(pJoin(modelDir,'three_day_model.h5'), 'rb') as f:
		three_day_model = base64.standard_b64encode(f.read()).decode()

	# re-input values (i.e. modify the mutable dictionary that is used in heavyprocessing!!!!!!)
	ind['newModel'] = 'False',
	ind['one_day_model'] = one_day_model,
	ind['one_day_model_filename'] = 'one_day_model.h5',
	ind['two_day_model'] = two_day_model,
	ind['two_day_model_filename'] = 'two_day_model.h5',
	ind['three_day_model'] = three_day_model,
	ind['three_day_model_filename'] = 'three_day_model.h5',

	return o

Example #6

Show file

def work(modelDir, ind):
    #print(ind)
    ''' Run the model in its directory.'''
    # drop inverter efficiency
    # drop DoD
    (cellCapacity, dischargeRate, chargeRate, cellQuantity, cellCost) = \
     [float(ind[x]) for x in ('cellCapacity', 'dischargeRate', 'chargeRate', 'cellQuantity', 'cellCost')]
    battEff = float(ind.get("batteryEfficiency")) / 100.0
    dodFactor = float(ind.get('dodFactor')) / 100.0
    projYears = int(ind.get('projectionLength'))
    batteryCycleLife = int(ind.get('batteryCycleLife'))

    o = {}

    try:
        with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
            f.write(ind['historicalData'])  #.replace('\r', ''))
        df = pd.read_csv(pJoin(modelDir, 'hist.csv'), parse_dates=['dates'])
        df['month'] = df['dates'].dt.month
        df['dayOfYear'] = df['dates'].dt.dayofyear
        assert df.shape[0] >= 26280  # must be longer than 3 years
        assert df.shape[1] == 5
    except ZeroDivisionError:
        raise Exception("CSV file is incorrect format.")

    # retrieve goal
    goal = ind['goal']
    threshold = float(ind['transformerThreshold']) * 1000
    confidence = float(ind['confidence']) / 100

    # train model on previous data
    all_X = fc.makeUsefulDf(df)
    all_y = df['load']
    X_train, y_train = all_X[:-8760], all_y[:-8760]
    clf = linear_model.SGDRegressor(max_iter=10000, tol=1e-4)
    clf.fit(X_train, y_train)

    # ---------------------- MAKE PREDICTIONS ------------------------------- #
    X_test, y_test = all_X[-8760:], all_y[-8760:]

    # Collect data necessary for dispatch calculations
    predictions = clf.predict(X_test)
    dailyLoadPredictions = [
        predictions[i:i + 24] for i in range(0, len(predictions), 24)
    ]
    weather = df['tempc'][-8760:]
    dailyWeatherPredictions = [
        weather[i:i + 24] for i in range(0, len(weather), 24)
    ]
    month = df['month'][-8760:]

    dispatched = [False] * 365
    # decide to implement VBAT every day for a year
    VB_power, VB_energy = [], []
    for i, (load24, temp24, m) in enumerate(
            zip(dailyLoadPredictions, dailyWeatherPredictions, month)):
        peak = max(load24)
        if fc.shouldDispatchDeferral(peak, df, confidence, threshold):
            dispatched[i] = True
            vbp, vbe = fc.pulp24hrBattery(load24, dischargeRate * cellQuantity,
                                          cellCapacity * cellQuantity, battEff)
            VB_power.extend(vbp)
            VB_energy.extend(vbe)
        else:
            VB_power.extend([0] * 24)
            VB_energy.extend([0] * 24)

    # -------------------- MODEL ACCURACY ANALYSIS -------------------------- #

    o['predictedLoad'] = list(clf.predict(X_test))
    o['trainAccuracy'] = round(clf.score(X_train, y_train) * 100, 2)
    o['testAccuracy'] = round(clf.score(X_test, y_test) * 100, 2)

    # PRECISION AND RECALL
    maxDays = []
    for month in range(1, 13):
        test = df[df['month'] == month]
        maxDays.append(test.loc[test['load'].idxmax()]['dayOfYear'])

    shouldHaveDispatched = [False] * 365
    for day in maxDays:
        shouldHaveDispatched[day] = True

    truePositive = len([
        b
        for b in [i and j for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falsePositive = len([
        b for b in
        [i and (not j) for (i, j) in zip(dispatched, shouldHaveDispatched)]
        if b
    ])
    falseNegative = len([
        b for b in [(not i) and j
                    for (i, j) in zip(dispatched, shouldHaveDispatched)] if b
    ])
    o['precision'] = round(
        truePositive / float(truePositive + falsePositive) * 100, 2)
    o['recall'] = round(
        truePositive / float(truePositive + falseNegative) * 100, 2)
    o['number_of_dispatches'] = len([i for i in dispatched if i])
    o['MAE'] = round(
        sum([abs(l - m) / m * 100
             for l, m in zip(predictions, list(y_test))]) / 8760., 2)

    # ---------------------- FINANCIAL ANALYSIS ----------------------------- #

    o['VBpower'], o['VBenergy'] = list(VB_power), list(VB_energy)

    # Calculate monthHours
    year = df[-8760:].copy()
    year.reset_index(inplace=True)
    year['hour'] = list(year.index)
    start = list(year.groupby('month').first()['hour'])
    finish = list(year.groupby('month').last()['hour'])
    monthHours = [(s, f + 1) for (s, f) in zip(start, finish)]

    demand = list(y_test)
    peakDemand = [max(demand[s:f]) for s, f in monthHours]
    energyMonthly = [sum(demand[s:f]) for s, f in monthHours]
    demandAdj = [d + p for d, p in zip(demand, o['VBpower'])]
    peakAdjustedDemand = [max(demandAdj[s:f]) for s, f in monthHours]
    energyAdjustedMonthly = [sum(demandAdj[s:f]) for s, f in monthHours]

    o['demand'] = demand
    o['peakDemand'] = peakDemand
    o['energyMonthly'] = energyMonthly
    o['demandAdjusted'] = demandAdj
    o['peakAdjustedDemand'] = peakAdjustedDemand
    o['energyAdjustedMonthly'] = energyAdjustedMonthly

    initInvestment = cellCost * cellQuantity
    eCost = float(ind['electricityCost'])
    dCharge = float(ind['demandChargeCost'])

    o['VBdispatch'] = [dal - d for dal, d in zip(demandAdj, demand)]
    o['energyCost'] = [em * eCost for em in energyMonthly]
    o['energyCostAdjusted'] = [eam * eCost for eam in energyAdjustedMonthly]
    o['demandCharge'] = [peak * dCharge for peak in peakDemand]
    o['demandChargeAdjusted'] = [
        pad * dCharge for pad in o['peakAdjustedDemand']
    ]
    o['totalCost'] = [
        ec + dcm for ec, dcm in zip(o['energyCost'], o['demandCharge'])
    ]
    o['totalCostAdjusted'] = [
        eca + dca
        for eca, dca in zip(o['energyCostAdjusted'], o['demandChargeAdjusted'])
    ]
    o['savings'] = [
        tot - tota for tot, tota in zip(o['totalCost'], o['totalCostAdjusted'])
    ]

    annualEarnings = sum(o['savings'])  # - something!
    cashFlowList = [annualEarnings] * int(ind['projectionLength'])
    cashFlowList.insert(0, -1 * initInvestment)

    o['NPV'] = np.npv(float(ind['discountRate']) / 100, cashFlowList)
    o['SPP'] = initInvestment / annualEarnings
    o['netCashflow'] = cashFlowList
    o['cumulativeCashflow'] = [
        sum(cashFlowList[:i + 1]) for i, d in enumerate(cashFlowList)
    ]

    o['dataCheck'] = 'Threshold exceeded' if any(
        [threshold > i for i in demandAdj]) and goal == 'deferral' else ''
    o['transformerThreshold'] = threshold if goal == 'deferral' else None

    o['stdout'] = 'Success'
    return o

Example #7

Show file

def work(modelDir, ind):
	''' Model processing done here. '''
	epochs = int(ind['epochs'])
	o = {}  # See bottom of file for out's structure

	try:
	 	with open(pJoin(modelDir, 'hist.csv'), 'w') as f:
	 		f.write(ind['histCurve'].replace('\r', ''))
		df = pd.read_csv(pJoin(modelDir, 'hist.csv'))
		assert df.shape[0] >= 26280 # must be longer than 3 years
	 	if 'dates' not in df.columns:
		 	df['dates'] = df.apply(
				lambda x: dt(
					int(x['year']), 
					int(x['month']), 
					int(x['day']), 
					int(x['hour'])), 
				axis=1
			)
	except:
		raise Exception("Load CSV file is incorrect format.")

	try:
		weather = [float(i) for i in ind['tempCurve'].split('\n')]
		assert len(weather) == 72, "weather csv in wrong format"
	except:
		raise Exception(ind['tempCurve'])

	# ---------------------- MAKE PREDICTIONS ------------------------------- #
	df, tomorrow = lf.add_day(df, weather[:24])
	all_X = lf.makeUsefulDf(df)
	all_y = df['load']

	#load prediction
	tomorrow_load, model, tomorrow_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, save_file=pJoin(modelDir, 'neural_net_1day.h5'))
	# tomorrow_load = [13044.3369140625, 12692.4453125, 11894.0712890625, 13391.0185546875, 13378.373046875, 14098.5048828125, 14984.5, 15746.6845703125, 14677.6064453125, 14869.6953125, 14324.302734375, 13727.908203125, 13537.51171875, 12671.90234375, 13390.9970703125, 12111.166015625, 13539.05078125, 15298.7939453125, 14620.8369140625, 15381.9404296875, 15116.42578125, 13652.3974609375, 13599.5986328125, 12882.5185546875]
	# tomorrow_accuracy = {'test': 4, 'train': 3}
	o['tomorrow_load'] = tomorrow_load
	o['month_start'] = dt(tomorrow.year, tomorrow.month, 1).strftime("%A, %B %-d, %Y")
	o['forecast_start'] = tomorrow.strftime("%A, %B %-d, %Y")
	
	# second day
	df, second_day = lf.add_day(df, weather[24:48])
	if second_day.month == tomorrow.month:
		all_X = lf.makeUsefulDf(df, hours_prior=48, noise=5)
		all_y = df['load']
		two_day_predicted_load, two_day_model, two_day_load_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, hours_prior=48, save_file=pJoin(modelDir, 'neural_net_2day.h5'))
		two_day_peak = max(two_day_predicted_load)

		# third day
		df, third_day = lf.add_day(df, weather[48:72])
		if third_day.month == tomorrow.month:
			all_X = lf.makeUsefulDf(df, hours_prior=72, noise=15)
			all_y = df['load']
			three_day_predicted_load, three_day_model, three_day_load_accuracy = lf.neural_net_next_day(all_X, all_y, epochs=epochs, hours_prior=72, save_file=pJoin(modelDir, 'neural_net_3day.h5'))
			three_day_peak = max(three_day_predicted_load)
		else:
			three_day_peak = 0
			three_day_load_accuracy = {'test': np.nan, 'train': np.nan}
			
	else:
		two_day_peak = 0
		two_day_load_accuracy = {'test': np.nan, 'train': np.nan}
		three_day_peak = 0
		three_day_load_accuracy = {'test': np.nan, 'train': np.nan}

	tomorrow_peak = max(tomorrow_load)
	m = df[(df['month'] == tomorrow.month) & (df['year'] != tomorrow.year) ]
	o['quantile'] = round(m[m['load'] < tomorrow_peak].shape[0]/float(m.shape[0])*100, 2)
	o['predicted_peak'] = [m['load'].median(), highest_peak_this_month(df, tomorrow), tomorrow_peak, two_day_peak, three_day_peak]
	o['predicted_peak_limits'] = [
		[m['load'].min(), m['load'].max()],
		[0, 0],
		[tomorrow_peak*(1 + tomorrow_accuracy['test']*.01), tomorrow_peak*(1 - tomorrow_accuracy['test']*.01)],
		[two_day_peak*(1 + two_day_load_accuracy['test']*.01), two_day_peak*(1 - two_day_load_accuracy['test']*.01)],
		[three_day_peak*(1 + three_day_load_accuracy['test']*.01), three_day_peak*(1 - three_day_load_accuracy['test']*.01)]
	]

	previous_months = [{
		'year': y,
		'load': m[m['year'] == y]['load'].tolist()
	} for y in m.year.unique()]

	# hard-code the input for highcharts
	o['cats_pred'] = list(range(744)) ### FIX THIS

	l = []
	for d in previous_months:
		l.append({
			'name': d['year'],
			'color': 'lightgrey',
			'data': d['load'],
			'type': 'line',
			'opacity': .05,
			'enableMouseTracking': False
		})

	load_leading_up = df[(df['month'] == tomorrow.month) & (df['year'] == tomorrow.year)]['load'].tolist()
	l.append({'name': tomorrow.year, 'color': 'black', 'data': load_leading_up[:-72], 'type': 'line'})
	l.append({'name':'forecast','color':'blue','data': [None]*(len(load_leading_up) - 72) + o['tomorrow_load'],'type': 'line'})

	o['previous_months'] = l

	o['load_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['load_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['tomorrow_test_accuracy'] = round(tomorrow_accuracy['test'], 2)
	o['tomorrow_train_accuracy'] = round(tomorrow_accuracy['train'], 2)
	o['two_day_peak_train_accuracy'] = round(two_day_load_accuracy['train'], 2)
	o['two_day_peak_test_accuracy'] = round(two_day_load_accuracy['test'], 2)
	o['three_day_peak_train_accuracy'] = round(three_day_load_accuracy['train'], 2)
	o['three_day_peak_test_accuracy'] = round(three_day_load_accuracy['test'], 2)


	o['peak_percent_chance'] = peak_likelihood(
		hist=highest_peak_this_month(df[:-48], tomorrow), 
		tomorrow=tomorrow_peak,
		tomorrow_std=tomorrow_peak*tomorrow_accuracy['test']*.01,
		two_day=two_day_peak,
		two_day_std=two_day_peak*two_day_load_accuracy['test']*.01,
		three_day=three_day_peak,
		three_day_std=three_day_peak*three_day_load_accuracy['test']*.01
	)

	o['stderr'] = ''

	return o