def data_process(ecs_lines, input_lines): print '-----start data processing-----' # # ecs data time11 = time.time() # ecs_data_lg = {} ecs_data_lr = {} for line in ecs_lines: ecs_contents = line.split('\t') flavor = int(ecs_contents[1][6:]) if flavor <= 15: date = ecs_contents[2].split('\r')[0][:10].replace('-', '') if date not in ecs_data_lr: # ecs_data_lg[date] = Tool.zeros(NUM_OF_FLAVOR) # ecs_data_lg[date][flavor - 1] += 1 # 大bug 大bug!!! 分更低 ecs_data_lr[date] = Tool.zeros(NUM_OF_FLAVOR) ecs_data_lr[date][flavor - 1] += 1 # 大bug 大bug!!! 分更低 else: # ecs_data_lg[date][flavor - 1] += 1 ecs_data_lr[date][flavor - 1] += 1 # print 'ecs data example: ', ecs_data_lg.items()[0] # input data input_contents = [] for line in input_lines: input_contents.append(line) phy_server = [int(x) for x in input_contents[0][:-WIN_OR_LINUX].split(' ')] virtual_info = {} virtual_num = int(input_contents[2][:-WIN_OR_LINUX]) for i in range(virtual_num): temp = input_contents[3 + i][:-WIN_OR_LINUX].split(' ') virtual_info[int(temp[0][6:])] = [int(temp[1]), int(temp[2]) // 1024] if input_contents[-4].split('\r')[0] == 'CPU': cpu_if = 1 else: cpu_if = 0 # start_date = time.mktime(time.strptime(input_contents[-2].split('\r')[0], '%Y-%m-%d %H:%M:%S')) # end_date = time.mktime(time.strptime(input_contents[-1].split('\r')[0], '%Y-%m-%d %H:%M:%S')) start_date_temp = input_contents[-2].split('\r')[0][:10] start_date = [int(start_date_temp[:4]), int(start_date_temp[5:7]), int(start_date_temp[8:])] end_date_temp = input_contents[-1].split('\r')[0][:10] end_date = [int(end_date_temp[:4]), int(end_date_temp[5:7]), int(end_date_temp[8:])] print 'physical server info : %s' % phy_server print 'number of virtual machine : %s' % virtual_num print 'info of virtual machine: %s' % virtual_info print 'cpu & mem switch : %s' % cpu_if print "start date is: {}\nend date is: {}".format(start_date, end_date) time12 = time.time() print 'data prcess time using: {}'.format(time12 - time11) print '-----end data processing-----' return ecs_data_lr, phy_server, virtual_num, virtual_info, \ cpu_if, start_date_temp, end_date_temp
def magic(ecs_data, start, early_round, interval): print '-----start magic-----' end_date = start start_date = end_date - datetime.timedelta(days=interval) count_list = [[0 for i in range(NUM_OF_FLAVOR)] for j in range(early_round)] count_list_sum = [0 for j in range(early_round)] for i in range(early_round): count_list[i] = Tool.zeros(NUM_OF_FLAVOR) end_date -= datetime.timedelta(days=1) for date, flavor in ecs_data.items(): date = datetime.datetime.strptime(date, '%Y%m%d') j = 0 if end_date >= date >= start_date: for temp in flavor: count_list[i][j] += temp j += 1 count_list_sum[i] = sum(count_list[i]) start_date -= datetime.timedelta(days=1) print '-----end magic-----' return count_list
def predict(lg_count_list, lr_count_list, rf_count_list, es_count_list, virtual_info, lg_round, mix_rf, mix_lr, mix_es, rf_day_gap, lr_day_gap, es=3, alpha=0.5, seed=1000, floor=0.0, rf_diff=0): print '-----start predict-----' lg_predict_result = {} rf_predict_result = {} lr_predict_result = {} es_predict_result = {} for flavor, info in virtual_info.items(): # ----------- 拉格朗日 ---------- lg_window = [] for i in range(lg_round): lg_window.append( # Tool.mid([lg_count_list[j][flavor - 1] for j in range(i - 7, i + 7) if 0 <= j < lg_round])) Tool.mid([ lg_count_list[j][flavor - 1] for j in range(i - 3, i + 3) if 0 <= j < lg_round ])) # window_list = [Tool.mean(lg_window[15:29]), Tool.mean(lg_window[0:29]), Tool.mean(lg_window[0:14])] window_list = [ Tool.mean(lg_window[4:7]), Tool.mean(lg_window[0:7]), Tool.mean(lg_window[0:3]) ] lg_predict_result[flavor] = max( int(Tool.LG(3, list(range(0, 3)), window_list)), 0) # ----------- 随机森林 ---------- rf_predict_list = [] temp_list = [] for i in range(len(rf_count_list)): # temp_list.append(Tool.mid([rf_count_list[j][flavor - 1] for j in range(i - 4, i + 4) if 0 <= j < len(rf_count_list)])) temp_list.append(rf_count_list[i][flavor - 1]) test = temp_list[:rf_day_gap - 1][::-1] diff_list = Tool.line_diff(temp_list) if rf_diff == 0: for j in range(len(rf_count_list) - rf_day_gap): rf_predict_list.append(temp_list[j:j + rf_day_gap][::-1]) my_labels = [i for i in range(rf_day_gap)] rf_result = rf_predict(rf_predict_list, my_labels, test, seed) rf_predict_result[flavor] = rf_result else: for j in range(len(rf_count_list) - rf_day_gap): rf_predict_list.append(diff_list[j:j + rf_day_gap][::-1]) my_labels = [i for i in range(rf_day_gap)] rf_result = rf_predict(rf_predict_list, my_labels, test, seed) rf_predict_result[flavor] = rf_result + temp_list[-1] # ----------- 线性回归 ---------- lr_window = [] lr_data = [] for i in range(20 - lr_day_gap + 1): for j in range(1, lr_day_gap): # lr_window.append(lr_count_list[i + j][int(flavor) - 1]) lr_window.append( Tool.mid([ lr_count_list[i + jj][int(flavor) - 1] for jj in range(j - 3, j + 3) if 0 <= jj < lr_day_gap ])) # lr_window.append(Tool.mid([lr_count_list[i+jj][int(flavor) - 1] for jj in range(j - 3, j + 3) if 0 <= jj < lr_day_gap])) lr_window.append(lr_count_list[i][int(flavor) - 1]) lr_data.append(lr_window) lr_window = [] # print lr_data w = Tool.zeros(lr_day_gap) w = linear_regression(w, lr_data, 0.02, 700) # 0.03 500 x = [lr_data[0][lr_day_gap - 1]] + lr_data[0][:lr_day_gap - 2] x = x + [u * u for u in x] max_x = max(x) min_x = min(x) if max_x - min_x > 0: x = [1] + [(i - (sum(x) / len(x))) / (max_x - min_x) for i in x] else: x = [1] + x # lr_predict_result[flavor] = max(int(LR.w_mul_x(w, x)), 0) lr_predict_result[flavor] = max(w_mul_x(w, x), 0) # ---------- 指数平滑 ------------- es_predict_result = es_predict(es_count_list, virtual_info, es, alpha) print 'LG预测结果:{}'.format(lg_predict_result) print 'LR预测结果:{}'.format(lr_predict_result) print 'RF预测结果:{}'.format(rf_predict_result) print 'ES预测结果:{}'.format(es_predict_result) predict_result = lg_predict_result.copy() for key, value in predict_result.items(): # predict_result[key] = int(MIX_NUM*lg_predict_result[key] + (1-MIX_NUM)*lr_predict_result[key]) # 向下取整 predict_result[key] = int( mix_lr * lr_predict_result[key] + mix_rf * rf_predict_result[key] + mix_es * es_predict_result[key] + (1 - mix_lr - mix_rf - mix_es) * lg_predict_result[key] + floor) # 四舍五入 # predict_result[key] = int( # mix_lr * lr_predict_result[key] + mix_rf * rf_predict_result[key] + mix_es * es_predict_result[key] + # (1 - mix_lr - mix_rf - mix_es) * lg_predict_result[key]) # 地板除 print '最终预测结果:{}'.format(predict_result) print '-----end predict-----' return predict_result