def history_repeat_filter(bid_history_files, the_ctr_model, bid_model, candidate_paras,outfilename):
    '''
    这里仅有一个参数
    历史重演,对一批历史数据进行模拟投放
    paras:
        bid_history_files:join文件列表
        the_ctr_model:ctr模型
        bid_model:出价模型
        candidate_paras:候选参数
    return:
        paras_perf[camp_grp][para] =
            {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':}
    '''
    candidate_paras = {camp_grp:candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) == 1 }
    if len(candidate_paras) == 0:
        print 'there is no candidate parameter'
        return 
            
    click_num = 0
    imp_num = 0
    cost = 0
    click_num_old = 0
    imp_num_old = 0
    cost_old = 0
        
    outfile=open(outfilename,'w')
    for bid_history_file in bid_history_files:
        for line in open(bid_history_file):
            #line = line.rstrip('\n')
            field_dict = join.get_field_dict(line)
            if field_dict == None:
                continue
            win_price = float(field_dict['win_price'])
            camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id'])
            if (not candidate_paras.has_key(camp_grp)) or (not bid_model.has_key(camp_grp)) or win_price == None or win_price <= 0:
                continue
            pctr = the_ctr_model.predict_ctr(field_dict['feature_values'])
            candidate_bid_prices = bid_model[camp_grp].get_bids_auc(pctr, candidate_paras[camp_grp])
            if candidate_bid_prices[0] >= win_price:
                outfile.write(line)
                imp_num += 1
                click_num += (1 if field_dict['click_flag'] == True else 0)
                cost += win_price
            imp_num_old += 1
            click_num_old += (1 if field_dict['click_flag'] == True else 0)
            cost_old += win_price
    cost = float(cost)/1000
    cost_old = float(cost_old)/1000

    print 'file%s' % bid_history_files
    print 'old imp_num %d,click_num %d,cost %.4f' % (imp_num_old,click_num_old,cost_old)
    print 'old cpm %.4f,ctr %.8f,cpc %.4f' % (1000*cost_old/imp_num_old,float(click_num_old)/imp_num_old,cost_old/click_num_old)
    print 'imp_num %d,click_num %d,cost %.4f' % (imp_num,click_num,cost)
    print 'cpm %.4f,ctr %.8f,cpc %.4f' % (1000*cost/imp_num,float(click_num)/imp_num,cost/click_num)
Ejemplo n.º 2
0
def history_repeat_conditional(bid_history_files, the_ctr_model, bid_model,
                               candidate_paras, condition_type, stat_result):
    '''
    历史重演,对一批历史数据进行模拟投放
    paras:
        bid_history_files:join文件名列表,list类型
        the_ctr_model:ctr模型
        bid_model:出价模型
        candidate_paras:候选参数
    return:
        paras_perf[camp_grp][para] =
            {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':}
    '''
    candidate_paras = {
        camp_grp: candidate_paras[camp_grp]
        for camp_grp in candidate_paras.keys()
        if len(candidate_paras[camp_grp]) != 0
    }
    bpo_logger.info(
        '**************history_repeat function start**************')
    bpo_logger.info('files:%s' % ','.join(bid_history_files))
    bpo_logger.info('************** bid_model: %s**************' %
                    (bid_model.keys()))
    bpo_logger.info('************** candidate_paras: %s**************' %
                    (candidate_paras))
    bpo_logger.info('************** condition_type: %s**************' %
                    (condition_type))
    stop_flag_idx = {}
    result = {}  #预制所有的键
    for camp_grp in bid_model.keys():
        #bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不在bid_model中,则continue
        if (not candidate_paras.has_key(camp_grp)) or (
                not bid_model.has_key(camp_grp)):
            continue
        if not condition_type.has_key(camp_grp):
            continue
        tmp_num = len(candidate_paras[camp_grp])
        stop_flag_idx[camp_grp] = tmp_num
        result[camp_grp] = [None] * tmp_num
        for idx in range(0, tmp_num):
            result[camp_grp][idx] = {
                'impression': 0,
                'avg_pctr': 0,
                'click': 0,
                'cost': 0
            }
    req_parser = join.Parser()
    for bid_history_file in bid_history_files:
        for line in open(bid_history_file):
            req_parser.feed(line)
            field_dict = req_parser.get_all()
            if field_dict == None:
                continue
            win_price = float(field_dict['win_price'])
            camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id'])
            if win_price == None or win_price <= 0:
                continue
            if not result.has_key(camp_grp):
                continue
            pctr = the_ctr_model.predict_ctr(field_dict['feature_values'])
            candidate_bid_prices = bid_model[camp_grp].get_bids_auc(
                ctr=pctr, variable_paras=candidate_paras[camp_grp])
            first_ge_idx = binary_search(candidate_bid_prices, win_price)
            #非累积方式
            if first_ge_idx < stop_flag_idx[camp_grp]:
                for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]):
                    result[camp_grp][tmp_idx]['impression'] += 1
                    result[camp_grp][tmp_idx]['avg_pctr'] += pctr
                    result[camp_grp][tmp_idx]['click'] += (
                        1 if field_dict['click_flag'] == True else 0)
                    result[camp_grp][tmp_idx]['cost'] += win_price
            #指定资源限制方法:50%*总成本,50%*总点击
            if condition_type[camp_grp] == 'cost':
                stop_flag_idx[camp_grp] = binary_search([
                    result[camp_grp][idx]['cost']
                    for idx in range(0, stop_flag_idx[camp_grp])
                ], stat_result[camp_grp]['cost'] / 2)
            elif condition_type[camp_grp] == 'click':
                stop_flag_idx[camp_grp] = binary_search([
                    result[camp_grp][idx]['click']
                    for idx in range(0, stop_flag_idx[camp_grp])
                ], stat_result[camp_grp]['click'] / 2)
    #计算ctr/avg_pctr/ecpc/cpm等衍生度量
    for camp_grp in result:
        for tmp_idx in range(0, len(result[camp_grp])):
            result[camp_grp][tmp_idx]['cost'] /= 1000  # 修改单位,cpm为千次展示价格
            tmp_dict = result[camp_grp][tmp_idx]
            result[camp_grp][tmp_idx]['ctr'] = (
                (tmp_dict['click'] + 0.0) /
                tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0
            result[camp_grp][tmp_idx]['avg_pctr'] = (
                (tmp_dict['avg_pctr'] + 0.0) /
                tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0
            result[camp_grp][tmp_idx]['ecpc'] = (
                tmp_dict['cost']
            ) / tmp_dict['click'] if tmp_dict['click'] != 0 else 0
            result[camp_grp][tmp_idx][
                'cpm'] = 1000 * tmp_dict['cost'] / tmp_dict[
                    'impression'] if tmp_dict['impression'] != 0 else 0
    #提取出具体参数,而非参数索引
    paras_perf = {
        camp_grp: {
            candidate_paras[camp_grp][idx]: result[camp_grp][idx]
            for idx in range(0, len(candidate_paras[camp_grp]))
        }
        for camp_grp in result.keys()
    }
    #打印结果
    bpo_logger.info('**************history_repeat function end**************')
    formatter = 'paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n'
    for camp_grp in paras_perf.keys():
        log_perf_str = 'bid_history_files:%s\n' % ','.join(bid_history_files)
        log_perf_str += formatter.format(
            camp=camp_grp[0],
            grp=camp_grp[1],
            strategy=bid_model[camp_grp].bid_strategy_type,
            para=bid_model[camp_grp].fixed_parameter)
        log_perf_str += 'condition type:%s\n' % condition_type[camp_grp]
        log_perf_str += 'para click imp cost cpc ctr avg_pctr cpm\n'
        log_perf_str += 'unit: US dollar\n'
        for para in sorted(paras_perf[camp_grp].keys()):
            tmp_dict = paras_perf[camp_grp][para]
            log_perf_str += str(
                para
            ) + ' %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n' % tmp_dict
        bpo_logger.info('************parameter performance start************')
        bpo_logger.info(log_perf_str)
        bpo_logger.info('************parameter performance end************')
    return paras_perf
Ejemplo n.º 3
0
def history_repeat(bid_history_files, the_ctr_model, bid_model,
                   candidate_paras):
    '''
    历史重演,对一批历史数据进行模拟投放
    paras:
        bid_history_files:join文件列表
        the_ctr_model:ctr模型
        bid_model:出价模型
        candidate_paras:候选参数
    return:
        paras_perf[camp_grp][para] =
            {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':}
    '''
    candidate_paras = {
        camp_grp: candidate_paras[camp_grp]
        for camp_grp in candidate_paras.keys()
        if len(candidate_paras[camp_grp]) != 0
    }
    bpo_logger.info(
        '**************history_repeat function start**************')
    bpo_logger.info('files:%s' % ','.join(bid_history_files))
    bpo_logger.info('************** bid_model: %s**************' %
                    (bid_model.keys()))
    bpo_logger.info('************** candidate_paras: %s**************' %
                    (candidate_paras))
    stop_flag_idx = {}
    result = {}
    for (camp_id, grp_id) in bid_model.keys():
        tmp_num = len(candidate_paras[(camp_id, grp_id)])
        stop_flag_idx[(camp_id, grp_id)] = tmp_num
        result[(camp_id, grp_id)] = [None] * tmp_num
        for idx in range(0, tmp_num):
            result[(camp_id, grp_id)][idx] = {
                'impression': 0,
                'avg_pctr': 0,
                'click': 0,
                'cost': 0
            }
    req_parser = join.Parser()
    total_time = 0
    for bid_history_file in bid_history_files:
        for line in open(bid_history_file):
            req_parser.feed(line)
            field_dict = req_parser.get_all()
            if field_dict == None:
                continue
            win_price = float(field_dict['win_price'])
            camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id'])
            #bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不再bid_model中,则continue
            if (not candidate_paras.has_key(camp_grp)) or (
                    not bid_model.has_key(camp_grp)
            ) or win_price == None or win_price <= 0:
                continue
            pctr = the_ctr_model.predict_ctr(field_dict['feature_values'])
            candidate_bid_prices = bid_model[camp_grp].get_bids_auc(
                ctr=pctr, variable_paras=candidate_paras[camp_grp])
            first_ge_idx = binary_search(candidate_bid_prices, win_price)

            #非累积方式
            #if first_ge_idx < stop_flag_idx[camp_grp]:
            #    for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]):
            #        result[camp_grp][tmp_idx]['impression'] += 1
            #        result[camp_grp][tmp_idx]['avg_pctr'] += pctr
            #        result[camp_grp][tmp_idx]['click'] += (1 if field_dict['click_flag'] == '1' else 0)
            #        result[camp_grp][tmp_idx]['cost'] += win_price

            # 因为没有投放限制(比如总预算,总预订点击等),下列语句被注释了
            # 指定资源限制方法:这里是成本限制,50%*总成本
            # print float(total_budget[camp_grp])/2
            # stop_flag_idx[camp_grp] = binary_search([result[camp_grp][idx]['cost'] for idx in range(0,stop_flag_idx[camp_grp])],float(total_budget[camp_grp])/16)
            # print stop_flag_idx[camp_grp]

            #累积方式,累积方式是不能添加资源限制的。累计方式速度更快
            if first_ge_idx < stop_flag_idx[camp_grp]:
                result[camp_grp][first_ge_idx]['impression'] += 1
                result[camp_grp][first_ge_idx]['avg_pctr'] += pctr
                result[camp_grp][first_ge_idx]['click'] += (
                    1 if field_dict['click_flag'] == True else 0)
                result[camp_grp][first_ge_idx]['cost'] += win_price

    for camp_grp in result:
        for idx in range(1, len(result[camp_grp])):
            for tmp_metric in ['impression', 'avg_pctr', 'click', 'cost']:
                result[camp_grp][idx][tmp_metric] += result[camp_grp][
                    idx - 1][tmp_metric]

    #计算ctr/avg_pctr/ecpc/cpm等衍生度量
    for camp_grp in result:
        for tmp_idx in range(0, len(result[camp_grp])):
            result[camp_grp][tmp_idx]['cost'] /= 1000  # 修改单位
            # print result[camp_grp]
            tmp_dict = result[camp_grp][tmp_idx]
            result[camp_grp][tmp_idx]['ctr'] = (
                (tmp_dict['click'] + 0.0) /
                tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0
            result[camp_grp][tmp_idx]['avg_pctr'] = (
                (tmp_dict['avg_pctr'] + 0.0) /
                tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0
            result[camp_grp][tmp_idx]['ecpc'] = (
                tmp_dict['cost']
            ) / tmp_dict['click'] if tmp_dict['click'] != 0 else 0
            result[camp_grp][tmp_idx][
                'cpm'] = 1000 * tmp_dict['cost'] / tmp_dict[
                    'impression'] if tmp_dict['impression'] != 0 else 0

    #提取出具体参数,而非参数索引
    paras_perf = {}
    #for camp_grp in result.keys():
    #    paras_perf[camp_grp] = {}
    #    for idx in range(0, len(candidate_paras[camp_grp])):
    #        paras_perf[camp_grp][candidate_paras[camp_grp][idx]] = result[camp_grp][idx]
    paras_perf = {
        camp_grp: {
            candidate_paras[camp_grp][idx]: result[camp_grp][idx]
            for idx in range(0, len(candidate_paras[camp_grp]))
        }
        for camp_grp in result.keys()
    }

    #打印结果
    bpo_logger.info('**************history_repeat function end**************')
    formatter = 'paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n'
    for camp_grp in paras_perf.keys():
        log_perf_str = 'bid_history_files:%s\n' % ','.join(bid_history_files)
        log_perf_str += formatter.format(
            camp=camp_grp[0],
            grp=camp_grp[1],
            strategy=bid_model[camp_grp].bid_strategy_type,
            para=bid_model[camp_grp].fixed_parameter)
        log_perf_str += 'para click imp cost cpc ctr avg_pctr cpm\n'
        log_perf_str += 'unit: US dollar\n'
        for para in sorted(paras_perf[camp_grp].keys()):
            tmp_dict = paras_perf[camp_grp][para]
            log_perf_str += str(
                para
            ) + ' %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n' % tmp_dict

        bpo_logger.info('************parameter performance start************')
        bpo_logger.info(log_perf_str)
        bpo_logger.info('************parameter performance end************')
    #print paras_perf

    return paras_perf
Ejemplo n.º 4
0
def history_repeat_filter(bid_history_files, the_ctr_model, bid_model,
                          candidate_paras, outfilename):
    '''
    这里仅有一个参数
    历史重演,对一批历史数据进行模拟投放
    paras:
        bid_history_files:join文件列表
        the_ctr_model:ctr模型
        bid_model:出价模型
        candidate_paras:候选参数
    return:
        paras_perf[camp_grp][para] =
            {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':}
    '''
    candidate_paras = {
        camp_grp: candidate_paras[camp_grp]
        for camp_grp in candidate_paras.keys()
        if len(candidate_paras[camp_grp]) == 1
    }
    if len(candidate_paras) == 0:
        print 'there is no candidate parameter'
        return

    click_num = 0
    imp_num = 0
    cost = 0
    click_num_old = 0
    imp_num_old = 0
    cost_old = 0

    outfile = open(outfilename, 'w')
    for bid_history_file in bid_history_files:
        for line in open(bid_history_file):
            #line = line.rstrip('\n')
            field_dict = join.get_field_dict(line)
            if field_dict == None:
                continue
            win_price = float(field_dict['win_price'])
            camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id'])
            if (not candidate_paras.has_key(camp_grp)) or (
                    not bid_model.has_key(camp_grp)
            ) or win_price == None or win_price <= 0:
                continue
            pctr = the_ctr_model.predict_ctr(field_dict['feature_values'])
            candidate_bid_prices = bid_model[camp_grp].get_bids_auc(
                pctr, candidate_paras[camp_grp])
            if candidate_bid_prices[0] >= win_price:
                outfile.write(line)
                imp_num += 1
                click_num += (1 if field_dict['click_flag'] == True else 0)
                cost += win_price
            imp_num_old += 1
            click_num_old += (1 if field_dict['click_flag'] == True else 0)
            cost_old += win_price
    cost = float(cost) / 1000
    cost_old = float(cost_old) / 1000

    print 'file%s' % bid_history_files
    print 'old imp_num %d,click_num %d,cost %.4f' % (imp_num_old,
                                                     click_num_old, cost_old)
    print 'old cpm %.4f,ctr %.8f,cpc %.4f' % (
        1000 * cost_old / imp_num_old, float(click_num_old) / imp_num_old,
        cost_old / click_num_old)
    print 'imp_num %d,click_num %d,cost %.4f' % (imp_num, click_num, cost)
    print 'cpm %.4f,ctr %.8f,cpc %.4f' % (
        1000 * cost / imp_num, float(click_num) / imp_num, cost / click_num)
def history_repeat(bid_history_files, the_ctr_model, bid_model, candidate_paras):
    '''
    历史重演,对一批历史数据进行模拟投放
    paras:
        bid_history_files:join文件列表
        the_ctr_model:ctr模型
        bid_model:出价模型
        candidate_paras:候选参数
    return:
        paras_perf[camp_grp][para] =
            {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':}
    '''
    candidate_paras = {camp_grp:candidate_paras[camp_grp] for camp_grp in candidate_paras.keys() if len(candidate_paras[camp_grp]) != 0 }
    bpo_logger.info('**************history_repeat function start**************')
    bpo_logger.info('files:%s' % ','.join(bid_history_files))
    bpo_logger.info('************** bid_model: %s**************' % (bid_model.keys()))
    bpo_logger.info('************** candidate_paras: %s**************' % (candidate_paras))
    stop_flag_idx = {}
    result = {}
    for (camp_id, grp_id) in bid_model.keys():
        tmp_num = len(candidate_paras[(camp_id, grp_id)])
        stop_flag_idx[(camp_id, grp_id)] = tmp_num
        result[(camp_id, grp_id)] = [None] * tmp_num
        for idx in range(0, tmp_num):
            result[(camp_id, grp_id)][idx] = {'impression':0, 'avg_pctr':0, 'click':0, 'cost':0}
    req_parser = join.Parser()
    total_time = 0
    for bid_history_file in bid_history_files:
        for line in open(bid_history_file):
            req_parser.feed(line)
            field_dict = req_parser.get_all()
            if field_dict == None:
                continue
            win_price = float(field_dict['win_price'])
            camp_grp = (field_dict['campaign_id'], field_dict['adgroup_id'])
            #bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不再bid_model中,则continue
            if (not candidate_paras.has_key(camp_grp)) or (not bid_model.has_key(camp_grp)) or win_price == None or win_price <= 0:
                continue
            pctr = the_ctr_model.predict_ctr(field_dict['feature_values'])
            candidate_bid_prices = bid_model[camp_grp].get_bids_auc(ctr=pctr, variable_paras=candidate_paras[camp_grp])
            first_ge_idx = binary_search(candidate_bid_prices, win_price)

            #非累积方式
            #if first_ge_idx < stop_flag_idx[camp_grp]:
            #    for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]):
            #        result[camp_grp][tmp_idx]['impression'] += 1
            #        result[camp_grp][tmp_idx]['avg_pctr'] += pctr
            #        result[camp_grp][tmp_idx]['click'] += (1 if field_dict['click_flag'] == '1' else 0)
            #        result[camp_grp][tmp_idx]['cost'] += win_price

            # 因为没有投放限制(比如总预算,总预订点击等),下列语句被注释了
            # 指定资源限制方法:这里是成本限制,50%*总成本
            # print float(total_budget[camp_grp])/2
            # stop_flag_idx[camp_grp] = binary_search([result[camp_grp][idx]['cost'] for idx in range(0,stop_flag_idx[camp_grp])],float(total_budget[camp_grp])/16)
            # print stop_flag_idx[camp_grp]
            
            #累积方式,累积方式是不能添加资源限制的。累计方式速度更快
            if first_ge_idx < stop_flag_idx[camp_grp]:
                result[camp_grp][first_ge_idx]['impression'] += 1
                result[camp_grp][first_ge_idx]['avg_pctr'] += pctr
                result[camp_grp][first_ge_idx]['click'] += (1 if field_dict['click_flag'] == True else 0)
                result[camp_grp][first_ge_idx]['cost'] += win_price

    for camp_grp in result:
        for idx in range(1,len(result[camp_grp])):
            for tmp_metric in ['impression','avg_pctr','click','cost']:
                result[camp_grp][idx][tmp_metric] += result[camp_grp][idx-1][tmp_metric]

    #计算ctr/avg_pctr/ecpc/cpm等衍生度量
    for camp_grp in result:
        for tmp_idx in range(0, len(result[camp_grp])):
            result[camp_grp][tmp_idx]['cost'] /= 1000  # 修改单位
            # print result[camp_grp]
            tmp_dict = result[camp_grp][tmp_idx]
            result[camp_grp][tmp_idx]['ctr'] = ((tmp_dict['click'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0
            result[camp_grp][tmp_idx]['avg_pctr'] = ((tmp_dict['avg_pctr'] + 0.0) / tmp_dict['impression']) if tmp_dict['impression'] != 0 else 0
            result[camp_grp][tmp_idx]['ecpc'] = (tmp_dict['cost']) / tmp_dict['click'] if tmp_dict['click'] != 0 else 0
            result[camp_grp][tmp_idx]['cpm'] = 1000 * tmp_dict['cost'] / tmp_dict['impression']  if tmp_dict['impression'] != 0 else 0

    #提取出具体参数,而非参数索引
    paras_perf = {}
    #for camp_grp in result.keys():
    #    paras_perf[camp_grp] = {}
    #    for idx in range(0, len(candidate_paras[camp_grp])):
    #        paras_perf[camp_grp][candidate_paras[camp_grp][idx]] = result[camp_grp][idx]
    paras_perf ={camp_grp:{ candidate_paras[camp_grp][idx]:result[camp_grp][idx] for idx in range(0, len(candidate_paras[camp_grp]))}  for camp_grp in result.keys()}

    #打印结果
    bpo_logger.info('**************history_repeat function end**************')
    formatter = 'paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n'
    for camp_grp in paras_perf.keys():
        log_perf_str = 'bid_history_files:%s\n' % ','.join(bid_history_files)
        log_perf_str += formatter.format(
            camp=camp_grp[0],
            grp=camp_grp[1],
            strategy=bid_model[camp_grp].bid_strategy_type,
            para=bid_model[camp_grp].fixed_parameter)
        log_perf_str += 'para click imp cost cpc ctr avg_pctr cpm\n'
        log_perf_str += 'unit: US dollar\n'
        for para in sorted(paras_perf[camp_grp].keys()):
            tmp_dict = paras_perf[camp_grp][para]
            log_perf_str += str(para) + ' %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n' % tmp_dict


        bpo_logger.info('************parameter performance start************')
        bpo_logger.info(log_perf_str)
        bpo_logger.info('************parameter performance end************')
    #print paras_perf

    return paras_perf
def history_repeat_conditional(
    bid_history_files, the_ctr_model, bid_model, candidate_paras, condition_type, stat_result
):
    """
    历史重演,对一批历史数据进行模拟投放
    paras:
        bid_history_files:join文件名列表,list类型
        the_ctr_model:ctr模型
        bid_model:出价模型
        candidate_paras:候选参数
    return:
        paras_perf[camp_grp][para] =
            {'impression':,'avg_pctr':,'click':,'cost':,'ctr':,'ecpc':,'cpm':}
    """
    candidate_paras = {
        camp_grp: candidate_paras[camp_grp]
        for camp_grp in candidate_paras.keys()
        if len(candidate_paras[camp_grp]) != 0
    }
    bpo_logger.info("**************history_repeat function start**************")
    bpo_logger.info("files:%s" % ",".join(bid_history_files))
    bpo_logger.info("************** bid_model: %s**************" % (bid_model.keys()))
    bpo_logger.info("************** candidate_paras: %s**************" % (candidate_paras))
    bpo_logger.info("************** condition_type: %s**************" % (condition_type))
    stop_flag_idx = {}
    result = {}  # 预制所有的键
    for camp_grp in bid_model.keys():
        # bid_model的camp_grp 为有效的adgroup,如果该条req的adgroup不在bid_model中,则continue
        if (not candidate_paras.has_key(camp_grp)) or (not bid_model.has_key(camp_grp)):
            continue
        if not condition_type.has_key(camp_grp):
            continue
        tmp_num = len(candidate_paras[camp_grp])
        stop_flag_idx[camp_grp] = tmp_num
        result[camp_grp] = [None] * tmp_num
        for idx in range(0, tmp_num):
            result[camp_grp][idx] = {"impression": 0, "avg_pctr": 0, "click": 0, "cost": 0}
    req_parser = join.Parser()
    for bid_history_file in bid_history_files:
        for line in open(bid_history_file):
            req_parser.feed(line)
            field_dict = req_parser.get_all()
            if field_dict == None:
                continue
            win_price = float(field_dict["win_price"])
            camp_grp = (field_dict["campaign_id"], field_dict["adgroup_id"])
            if win_price == None or win_price <= 0:
                continue
            if not result.has_key(camp_grp):
                continue
            pctr = the_ctr_model.predict_ctr(field_dict["feature_values"])
            candidate_bid_prices = bid_model[camp_grp].get_bids_auc(ctr=pctr, variable_paras=candidate_paras[camp_grp])
            first_ge_idx = binary_search(candidate_bid_prices, win_price)
            # 非累积方式
            if first_ge_idx < stop_flag_idx[camp_grp]:
                for tmp_idx in range(first_ge_idx, stop_flag_idx[camp_grp]):
                    result[camp_grp][tmp_idx]["impression"] += 1
                    result[camp_grp][tmp_idx]["avg_pctr"] += pctr
                    result[camp_grp][tmp_idx]["click"] += 1 if field_dict["click_flag"] == True else 0
                    result[camp_grp][tmp_idx]["cost"] += win_price
            # 指定资源限制方法:50%*总成本,50%*总点击
            if condition_type[camp_grp] == "cost":
                stop_flag_idx[camp_grp] = binary_search(
                    [result[camp_grp][idx]["cost"] for idx in range(0, stop_flag_idx[camp_grp])],
                    stat_result[camp_grp]["cost"] / 2,
                )
            elif condition_type[camp_grp] == "click":
                stop_flag_idx[camp_grp] = binary_search(
                    [result[camp_grp][idx]["click"] for idx in range(0, stop_flag_idx[camp_grp])],
                    stat_result[camp_grp]["click"] / 2,
                )
    # 计算ctr/avg_pctr/ecpc/cpm等衍生度量
    for camp_grp in result:
        for tmp_idx in range(0, len(result[camp_grp])):
            result[camp_grp][tmp_idx]["cost"] /= 1000  # 修改单位,cpm为千次展示价格
            tmp_dict = result[camp_grp][tmp_idx]
            result[camp_grp][tmp_idx]["ctr"] = (
                ((tmp_dict["click"] + 0.0) / tmp_dict["impression"]) if tmp_dict["impression"] != 0 else 0
            )
            result[camp_grp][tmp_idx]["avg_pctr"] = (
                ((tmp_dict["avg_pctr"] + 0.0) / tmp_dict["impression"]) if tmp_dict["impression"] != 0 else 0
            )
            result[camp_grp][tmp_idx]["ecpc"] = (tmp_dict["cost"]) / tmp_dict["click"] if tmp_dict["click"] != 0 else 0
            result[camp_grp][tmp_idx]["cpm"] = (
                1000 * tmp_dict["cost"] / tmp_dict["impression"] if tmp_dict["impression"] != 0 else 0
            )
    # 提取出具体参数,而非参数索引
    paras_perf = {
        camp_grp: {
            candidate_paras[camp_grp][idx]: result[camp_grp][idx] for idx in range(0, len(candidate_paras[camp_grp]))
        }
        for camp_grp in result.keys()
    }
    # 打印结果
    bpo_logger.info("**************history_repeat function end**************")
    formatter = "paras_performance\ncampaign id:{camp}\nadgroup id:{grp}\nbid strategy:{strategy}\nbid strategy fixed parameter:{para}\n"
    for camp_grp in paras_perf.keys():
        log_perf_str = "bid_history_files:%s\n" % ",".join(bid_history_files)
        log_perf_str += formatter.format(
            camp=camp_grp[0],
            grp=camp_grp[1],
            strategy=bid_model[camp_grp].bid_strategy_type,
            para=bid_model[camp_grp].fixed_parameter,
        )
        log_perf_str += "condition type:%s\n" % condition_type[camp_grp]
        log_perf_str += "para click imp cost cpc ctr avg_pctr cpm\n"
        log_perf_str += "unit: US dollar\n"
        for para in sorted(paras_perf[camp_grp].keys()):
            tmp_dict = paras_perf[camp_grp][para]
            log_perf_str += (
                str(para) + " %(click)s %(impression)s %(cost)s %(ecpc)s %(ctr)s %(avg_pctr)s %(cpm)s\n" % tmp_dict
            )
        bpo_logger.info("************parameter performance start************")
        bpo_logger.info(log_perf_str)
        bpo_logger.info("************parameter performance end************")
    return paras_perf