import sys
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append('..')
from threshold import get_threshold

# %% load datasets
test = pd.read_csv(
    '../../data/predicted/linear_regression/Safaricom-Ltd(SCOM).csv')
data = pd.read_csv('../../data/processed/Safaricom-Ltd(SCOM).csv')

threshold = get_threshold(data)
test.index = data[threshold:].index

# %% plot and save price prediction
plt.plot(data['Price'], label='Actual Price')
plt.plot(test['Prediction'], label='Predicted Price')
plt.xlabel('Time (Days)')
plt.ylabel('Share Price (KSh.)')
plt.title('Safaricom Price Prediction (Linear Regression)')
plt.legend()
plt.savefig('../../reports/figures/Safaricom_linear_regression.png')
plt.show()
Esempio n. 2
0
def start_bidding(fo, reqs, cpms, pred_ctr):
    day = 12  # 20130612这一天
    BGT = 10000000  # 一天的总预算30297100
    print('今天的总预算:', BGT)

    total_cost = 0  # 模拟过程中的实际总花费
    total_cost_true = 0  # 数据集中的实际总花费
    n = 12  # 将一天划分为12个时段

    nums = []
    for j in range(0, 12):
        nums.append(0)
    p = get_p(BGT)  # 分发概率 budgetpacing.py

    length = 0
    for i in range(0, 24, int(24 / n)):  # 划分时段

        # 2、无预算分配策略
        # budget = BGT - total_cost
        # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)

        # 2、平均分配
        # budget = BGT / n
        # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)

        # 3、基于点击率分配
        # sum_tmp = 0
        # ii = int(i / 2)
        # for k in range(ii, 12):
        #     sum_tmp += pred_ctrs[k]
        # budget = (BGT - total_cost) * pred_ctrs[int(i / 2)] / sum_tmp
        # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)

        # 4、基于流量分配
        # sum_tmp = 0
        # ii = int(i / 2)
        # for k in range(ii, 12):
        #     sum_tmp += reqs[k]
        # budget = (BGT - total_cost) * reqs[int(i / 2)] / sum_tmp
        # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)

        # 5、按时段长度占剩余时段总长度的比例分配
        # budget = (BGT - total_cost) * (1 / (12 - (i / 2)))
        # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)

        # 6、我们的
        if i == 0:
            budget = reqs[int(i / 2)] * 1 * cpms[int(i / 2)] * p[int(i / 2)]
            # budget = BGT
            pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)
        if i != 22 and i != 0:
            pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)
            # budget = (BGT - tocost) * pt_next / wr
            budget = reqs[int(i / 2)] * 1 * cpms[int(
                i / 2)] * pt_next  # 因为用了赢标日志模拟广告请求,所以将预测的赢标率都设为了1
        if i == 22:
            pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost)
            budget = min(reqs[int(i / 2)] * 1 * cpms[int(i / 2)] * pt_next,
                         BGT - total_cost)

        num = reqs[int(i / 2)] * pt_next
        nums[int(i / 2)] += int(num)
        threshold = float(get_threshold(int(i / 2), int(num)))
        # threshold = 0

        ccfm, cost_t, pctr = get_timeSlot_data(day, i, length)
        length += len(ccfm)
        total_cost_true += cost_t

        if total_cost <= BGT:
            print('\n{0}-{1}时段log中的实际花费:'.format(i, i + 2), cost_t)
            print('本时段预算:', budget)
            result, cost, winrate, cpm = simulateBidding(
                ccfm, budget, pctr, threshold, cpms[int(i / 2)], int(i / 2))
            fo.write(result + '\n')

            total_cost += cost
            print('预算剩余:', BGT - total_cost)

            header = 'budget\t\tspend\t\tbid\t\tipm\t\tclk\t\tclk_true\t\tcnv\t\tecpc\t\twr'
            print(header + '\n')
            print(result)
        else:
            print('今天的预算已用完!!!')
            break

    fo.close()
    print('\nlog中统计得到的一天的实际总花费:', total_cost_true)