Ejemplo n.º 1
0
def init():
    timestamp=datetime.datetime.now().strftime("%Y-%m-%d-%H")
    g_conf.read("../conf/ad_svr.conf")
    logging.basicConfig(level=logging.INFO,
                format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                datefmt='[%Y-%m_%d %H:%M:%S]',
                filename='../log/ad_svr.'+timestamp+'.log',
                filemode='a')
    service_para=g_conf.get("gearman","para")
    service_name=g_conf.get("gearman","name")
    process_num=int(g_conf.get("gearman","process_num"))
    service_para=service_para.split(',')

    #load adx map
    adx_id_obj=adx_id_map(g_conf)
    global g_adx_interface
    g_adx_interface=adx_interface_t(adx_id_obj,g_conf)
    
    global g_invert_idx_mgr
    g_invert_idx_mgr={}
    ridx_name_list=g_conf.get("index","ridx_list").split(",") 
    for name in ridx_name_list:
        obj=inverted_index_t()
        obj.load_file("../index/"+name+".ridx")
        g_invert_idx_mgr[name]=obj
        #print obj.inverted_dict,name       
    global g_idx_mgr
    g_idx_mgr={}
    idx_name_list=g_conf.get("index","idx_list").split(",") 
    for name in idx_name_list:
        obj=index_t()
        obj.load_file("../index/"+name+".idx")
        g_idx_mgr[name]=obj

    #init ip region
    global g_ip_obj
    g_ip_obj=ip_parse_t(g_conf.get("file","ip_table"),g_conf.get("file","ip_region"))   
    #init id obj
    global g_id_obj
    g_id_obj=transform_id_t(g_conf.get("file","fea_id_file"))
    
    #init fea obj
    extract_fea_obj=extract_feature_t(g_ip_obj,g_id_obj)
    #init model obj
    model_obj=predict_model_t(g_conf.get("file","model"))
    #init idea operator
    operator_obj=idea_operator_t(g_conf.get("file","idea_operate"))

    #init rank bit module
    global g_rank_bid
    g_rank_bid=rank_bid_t(extract_fea_obj,model_obj,operator_obj)
    logging.info("init complete")

    #init filter
    global g_filter_obj
    g_filter_obj=filter_t(g_invert_idx_mgr,g_conf)

    return [service_para,service_name,process_num]
Ejemplo n.º 2
0
def process(input_file, fea_output_file, ml_file):
    input_fp = open(input_file, "r")
    fea_fp = open(fea_output_file, "w")
    ml_fp = open(ml_file, "w")
    global g_ip_obj
    global g_id_obj
    g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt")
    g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file"))
    line_cnt = 0
    single_feature_list = ["region", "app", "time", "app_type", "manufacture"]
    # combine_feature_list=["combine-region-time","combine-app-region"]
    # combine_feature_list=["combine-region-time"]
    combine_feature_list = []
    header = "click\tconsume\t" + "\t".join(single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n"
    fea_fp.write(header)
    for line in input_fp:
        line_cnt += 1
        line = line.rstrip("\r\n").split("\t")
        click = int(line[2])
        consume = int(line[3])
        time_str = line[1]
        req_json = line[4]
        click = int(line[2])
        try:
            req_dict = json.loads(req_json)
            fea_json_dict = json.loads(req_dict["request"])
        except:
            logging.warning("load json failed[%d]" % (line_cnt))
            continue
        fea_str = ""
        feature_value_dict = extract_feature(time_str, fea_json_dict, single_feature_list, combine_feature_list)
        for feature in single_feature_list:
            fea_str += feature_value_dict[feature] + "\t"
        for feature in combine_feature_list:
            fea_str += feature_value_dict[feature] + "\t"
        fea_str = fea_str.rstrip("\t")
        fea_fp.write("%d\t%d\t%s\n" % (click, consume, fea_str))
        # transform to id
        fea_id_list = []
        fea_id_str = ""
        list_temp = fea_str.split("\t")
        for fea_name in list_temp:
            fea_id = g_id_obj.get_id(fea_name)
            fea_id_list.append(fea_id)
        fea_id_list.sort()
        for fea_id in fea_id_list:
            fea_id_str += str(fea_id) + ":1" + " "
        fea_id_str = fea_id_str.rstrip(" ")
        ml_fp.write("%d %s\n" % (click, fea_id_str))
Ejemplo n.º 3
0
def process(input_file,output_file):
    input_fp=open(input_file,"r")    
    output_fp=open(output_file,"w")
    global g_ip_obj
    g_ip_obj=ip_parse_t("../data/ip.all","../data/region.txt")
    line_cnt=0
    for line in input_fp:
        line_cnt+=1
        line=line.rstrip("\r\n").split("\t")
        req_json=line[4]
        click=int(line[2])
        try:
            req_dict=json.loads(req_json)
            fea_json_dict=json.loads(req_dict["request"])
        except:
            logging.warning("load json failed[%d]" %(line_cnt))
            continue
        region_feature=extract_feature(fea_json_dict)                         
        output_fp.write("%s\t%s\t%s\n" %(line[2],line[3],region_feature))
Ejemplo n.º 4
0
def tongji(req_file):
    ip_obj=ip_parse_t("../data/ip.all","../data/region.txt")
    output_fp=open("../data/region_result","w")
    line_cnt=0
    fp=open(req_file,"r")
    for line in fp:
        line_cnt+=1
        line=line.rstrip("\r\n").split("\001")
        try:
            json_dict=json.loads(line[1])
            request_dict=json.loads(json_dict["request"])
            ip_str=request_dict["device"]["ip"]
            region_result_list=ip_obj.search(ip_str)
            if region_result_list!=None:
                output_fp.write("%s\t%d\t%d\n" %(ip_str,region_result_list[2],region_result_list[3]))
            else:
                logging.warning("ip not found[%s]" %(ip_str))
                continue
        except:
            logging.warning("extract content failed[%d]" %(line_cnt))
            continue
    fp.close()
Ejemplo n.º 5
0
def process(input_file,fea_output_file,ml_file,type):
    input_fp=open(input_file,"r")    
    fea_fp=open(fea_output_file,"w")
    ml_fp=open(ml_file,"w")
    global g_ip_obj
    global g_id_obj
    g_ip_obj=ip_parse_t("../data/ip.all","../data/region.txt")
    g_id_obj=transform_id_t(g_conf.get("file","fea_id_file"))
    line_cnt=0
    single_feature_list=["region","app","time","app_type","manufacture"]
    #combine_feature_list=["combine-region-time","combine-app-region"]
    #combine_feature_list=["combine-region-time"]
    combine_feature_list=[]
    header="click\tdownload\tinstall\topen\tconsume\t"+"\t".join(single_feature_list)+"\t"+"\t".join(combine_feature_list)+"\n"
    fea_fp.write(header)
    for line in input_fp:
        line_cnt+=1
        line=line.rstrip("\r\n").split("\t")
        time_str=line[0]
        try:
            fea_json_dict=json.loads(line[2])
        except:
            logging.warning("load json failed[%d]" %(line_cnt))
            continue
        (fea_str,fea_id_str)=extract_feature(time_str,fea_json_dict,single_feature_list,combine_feature_list)                         
        ad_action_dict=json.loads(line[1])
        click=ad_action_dict["click"]
        download=ad_action_dict["download"]
        install=ad_action_dict["install"]
        app_open=ad_action_dict["open"]
        consume=ad_action_dict["cost"]
        fea_fp.write("%d\t%d\t%d\t%d\t%d\t%s\n" %(click,download,install,app_open,consume,fea_str))
        #transform to id
        if type=="train" and download==1:
            count=5
        else:
            count=1
        for i in range(0,count):
            ml_fp.write("%d %s\n" %(click,fea_id_str))
Ejemplo n.º 6
0
def tongji(req_file):
    ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt")
    output_fp = open("../data/region_result", "w")
    line_cnt = 0
    fp = open(req_file, "r")
    for line in fp:
        line_cnt += 1
        line = line.rstrip("\r\n").split("\001")
        try:
            json_dict = json.loads(line[1])
            request_dict = json.loads(json_dict["request"])
            ip_str = request_dict["device"]["ip"]
            region_result_list = ip_obj.search(ip_str)
            if region_result_list != None:
                output_fp.write(
                    "%s\t%d\t%d\n" %
                    (ip_str, region_result_list[2], region_result_list[3]))
            else:
                logging.warning("ip not found[%s]" % (ip_str))
                continue
        except:
            logging.warning("extract content failed[%d]" % (line_cnt))
            continue
    fp.close()