import sys import pandas as pd import matplotlib.pyplot as plt sys.path.append('..') from threshold import get_threshold # %% load datasets test = pd.read_csv( '../../data/predicted/linear_regression/Safaricom-Ltd(SCOM).csv') data = pd.read_csv('../../data/processed/Safaricom-Ltd(SCOM).csv') threshold = get_threshold(data) test.index = data[threshold:].index # %% plot and save price prediction plt.plot(data['Price'], label='Actual Price') plt.plot(test['Prediction'], label='Predicted Price') plt.xlabel('Time (Days)') plt.ylabel('Share Price (KSh.)') plt.title('Safaricom Price Prediction (Linear Regression)') plt.legend() plt.savefig('../../reports/figures/Safaricom_linear_regression.png') plt.show()
def start_bidding(fo, reqs, cpms, pred_ctr): day = 12 # 20130612这一天 BGT = 10000000 # 一天的总预算30297100 print('今天的总预算:', BGT) total_cost = 0 # 模拟过程中的实际总花费 total_cost_true = 0 # 数据集中的实际总花费 n = 12 # 将一天划分为12个时段 nums = [] for j in range(0, 12): nums.append(0) p = get_p(BGT) # 分发概率 budgetpacing.py length = 0 for i in range(0, 24, int(24 / n)): # 划分时段 # 2、无预算分配策略 # budget = BGT - total_cost # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) # 2、平均分配 # budget = BGT / n # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) # 3、基于点击率分配 # sum_tmp = 0 # ii = int(i / 2) # for k in range(ii, 12): # sum_tmp += pred_ctrs[k] # budget = (BGT - total_cost) * pred_ctrs[int(i / 2)] / sum_tmp # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) # 4、基于流量分配 # sum_tmp = 0 # ii = int(i / 2) # for k in range(ii, 12): # sum_tmp += reqs[k] # budget = (BGT - total_cost) * reqs[int(i / 2)] / sum_tmp # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) # 5、按时段长度占剩余时段总长度的比例分配 # budget = (BGT - total_cost) * (1 / (12 - (i / 2))) # pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) # 6、我们的 if i == 0: budget = reqs[int(i / 2)] * 1 * cpms[int(i / 2)] * p[int(i / 2)] # budget = BGT pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) if i != 22 and i != 0: pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) # budget = (BGT - tocost) * pt_next / wr budget = reqs[int(i / 2)] * 1 * cpms[int( i / 2)] * pt_next # 因为用了赢标日志模拟广告请求,所以将预测的赢标率都设为了1 if i == 22: pt_next = p_next(int(i / 2), BGT) * (BGT - total_cost) budget = min(reqs[int(i / 2)] * 1 * cpms[int(i / 2)] * pt_next, BGT - total_cost) num = reqs[int(i / 2)] * pt_next nums[int(i / 2)] += int(num) threshold = float(get_threshold(int(i / 2), int(num))) # threshold = 0 ccfm, cost_t, pctr = get_timeSlot_data(day, i, length) length += len(ccfm) total_cost_true += cost_t if total_cost <= BGT: print('\n{0}-{1}时段log中的实际花费:'.format(i, i + 2), cost_t) print('本时段预算:', budget) result, cost, winrate, cpm = simulateBidding( ccfm, budget, pctr, threshold, cpms[int(i / 2)], int(i / 2)) fo.write(result + '\n') total_cost += cost print('预算剩余:', BGT - total_cost) header = 'budget\t\tspend\t\tbid\t\tipm\t\tclk\t\tclk_true\t\tcnv\t\tecpc\t\twr' print(header + '\n') print(result) else: print('今天的预算已用完!!!') break fo.close() print('\nlog中统计得到的一天的实际总花费:', total_cost_true)