예제 #1
0
def init():
    timestamp=datetime.datetime.now().strftime("%Y-%m-%d-%H")
    g_conf.read("../conf/ad_svr.conf")
    logging.basicConfig(level=logging.INFO,
                format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                datefmt='[%Y-%m_%d %H:%M:%S]',
                filename='../log/ad_svr.'+timestamp+'.log',
                filemode='a')
    service_para=g_conf.get("gearman","para")
    service_name=g_conf.get("gearman","name")
    process_num=int(g_conf.get("gearman","process_num"))
    service_para=service_para.split(',')

    #load adx map
    adx_id_obj=adx_id_map(g_conf)
    global g_adx_interface
    g_adx_interface=adx_interface_t(adx_id_obj,g_conf)
    
    global g_invert_idx_mgr
    g_invert_idx_mgr={}
    ridx_name_list=g_conf.get("index","ridx_list").split(",") 
    for name in ridx_name_list:
        obj=inverted_index_t()
        obj.load_file("../index/"+name+".ridx")
        g_invert_idx_mgr[name]=obj
        #print obj.inverted_dict,name       
    global g_idx_mgr
    g_idx_mgr={}
    idx_name_list=g_conf.get("index","idx_list").split(",") 
    for name in idx_name_list:
        obj=index_t()
        obj.load_file("../index/"+name+".idx")
        g_idx_mgr[name]=obj

    #init ip region
    global g_ip_obj
    g_ip_obj=ip_parse_t(g_conf.get("file","ip_table"),g_conf.get("file","ip_region"))   
    #init id obj
    global g_id_obj
    g_id_obj=transform_id_t(g_conf.get("file","fea_id_file"))
    
    #init fea obj
    extract_fea_obj=extract_feature_t(g_ip_obj,g_id_obj)
    #init model obj
    model_obj=predict_model_t(g_conf.get("file","model"))
    #init idea operator
    operator_obj=idea_operator_t(g_conf.get("file","idea_operate"))

    #init rank bit module
    global g_rank_bid
    g_rank_bid=rank_bid_t(extract_fea_obj,model_obj,operator_obj)
    logging.info("init complete")

    #init filter
    global g_filter_obj
    g_filter_obj=filter_t(g_invert_idx_mgr,g_conf)

    return [service_para,service_name,process_num]
예제 #2
0
def process(input_file, fea_output_file, ml_file):
    input_fp = open(input_file, "r")
    fea_fp = open(fea_output_file, "w")
    ml_fp = open(ml_file, "w")
    global g_ip_obj
    global g_id_obj
    g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt")
    g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file"))
    line_cnt = 0
    single_feature_list = ["region", "app", "time", "app_type", "manufacture"]
    #combine_feature_list=["combine-region-time","combine-app-region"]
    #combine_feature_list=["combine-region-time"]
    combine_feature_list = []
    header = "click\tconsume\t" + "\t".join(
        single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n"
    fea_fp.write(header)
    for line in input_fp:
        line_cnt += 1
        line = line.rstrip("\r\n").split("\t")
        click = int(line[2])
        consume = int(line[3])
        time_str = line[1]
        req_json = line[4]
        click = int(line[2])
        try:
            req_dict = json.loads(req_json)
            fea_json_dict = json.loads(req_dict["request"])
        except:
            logging.warning("load json failed[%d]" % (line_cnt))
            continue
        fea_str = ""
        feature_value_dict = extract_feature(time_str, fea_json_dict,
                                             single_feature_list,
                                             combine_feature_list)
        for feature in single_feature_list:
            fea_str += feature_value_dict[feature] + "\t"
        for feature in combine_feature_list:
            fea_str += feature_value_dict[feature] + "\t"
        fea_str = fea_str.rstrip("\t")
        fea_fp.write("%d\t%d\t%s\n" % (click, consume, fea_str))
        #transform to id
        fea_id_list = []
        fea_id_str = ""
        list_temp = fea_str.split("\t")
        for fea_name in list_temp:
            fea_id = g_id_obj.get_id(fea_name)
            fea_id_list.append(fea_id)
        fea_id_list.sort()
        for fea_id in fea_id_list:
            fea_id_str += str(fea_id) + ":1" + " "
        fea_id_str = fea_id_str.rstrip(" ")
        ml_fp.write("%d %s\n" % (click, fea_id_str))
예제 #3
0
def process(input_file, fea_output_file, ml_file):
    input_fp = open(input_file, "r")
    fea_fp = open(fea_output_file, "w")
    ml_fp = open(ml_file, "w")
    global g_ip_obj
    global g_id_obj
    g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt")
    g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file"))
    line_cnt = 0
    single_feature_list = ["region", "app", "time", "app_type", "manufacture"]
    # combine_feature_list=["combine-region-time","combine-app-region"]
    # combine_feature_list=["combine-region-time"]
    combine_feature_list = []
    header = "click\tconsume\t" + "\t".join(single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n"
    fea_fp.write(header)
    for line in input_fp:
        line_cnt += 1
        line = line.rstrip("\r\n").split("\t")
        click = int(line[2])
        consume = int(line[3])
        time_str = line[1]
        req_json = line[4]
        click = int(line[2])
        try:
            req_dict = json.loads(req_json)
            fea_json_dict = json.loads(req_dict["request"])
        except:
            logging.warning("load json failed[%d]" % (line_cnt))
            continue
        fea_str = ""
        feature_value_dict = extract_feature(time_str, fea_json_dict, single_feature_list, combine_feature_list)
        for feature in single_feature_list:
            fea_str += feature_value_dict[feature] + "\t"
        for feature in combine_feature_list:
            fea_str += feature_value_dict[feature] + "\t"
        fea_str = fea_str.rstrip("\t")
        fea_fp.write("%d\t%d\t%s\n" % (click, consume, fea_str))
        # transform to id
        fea_id_list = []
        fea_id_str = ""
        list_temp = fea_str.split("\t")
        for fea_name in list_temp:
            fea_id = g_id_obj.get_id(fea_name)
            fea_id_list.append(fea_id)
        fea_id_list.sort()
        for fea_id in fea_id_list:
            fea_id_str += str(fea_id) + ":1" + " "
        fea_id_str = fea_id_str.rstrip(" ")
        ml_fp.write("%d %s\n" % (click, fea_id_str))
예제 #4
0
def process(input_file, fea_output_file, ml_file, type):
    input_fp = open(input_file, "r")
    fea_fp = open(fea_output_file, "w")
    ml_fp = open(ml_file, "w")
    global g_ip_obj
    global g_id_obj
    g_ip_obj = ip_parse_t("../data/ip.all", "../data/region.txt")
    g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file"))
    line_cnt = 0
    single_feature_list = ["region", "app", "time", "app_type", "manufacture"]
    #combine_feature_list=["combine-region-time","combine-app-region"]
    #combine_feature_list=["combine-region-time"]
    combine_feature_list = []
    header = "click\tdownload\tinstall\topen\tconsume\t" + "\t".join(
        single_feature_list) + "\t" + "\t".join(combine_feature_list) + "\n"
    fea_fp.write(header)
    for line in input_fp:
        line_cnt += 1
        line = line.rstrip("\r\n").split("\t")
        time_str = line[0]
        try:
            fea_json_dict = json.loads(line[2])
        except:
            logging.warning("load json failed[%d]" % (line_cnt))
            continue
        (fea_str, fea_id_str) = extract_feature(time_str, fea_json_dict,
                                                single_feature_list,
                                                combine_feature_list)
        ad_action_dict = json.loads(line[1])
        click = ad_action_dict["click"]
        download = ad_action_dict["download"]
        install = ad_action_dict["install"]
        app_open = ad_action_dict["open"]
        consume = ad_action_dict["cost"]
        fea_fp.write("%d\t%d\t%d\t%d\t%d\t%s\n" %
                     (click, download, install, app_open, consume, fea_str))
        #transform to id
        if type == "train" and download == 1:
            count = 5
        else:
            count = 1
        for i in range(0, count):
            ml_fp.write("%d %s\n" % (click, fea_id_str))
예제 #5
0
def process(input_file,fea_output_file,ml_file,type):
    input_fp=open(input_file,"r")    
    fea_fp=open(fea_output_file,"w")
    ml_fp=open(ml_file,"w")
    global g_ip_obj
    global g_id_obj
    g_ip_obj=ip_parse_t("../data/ip.all","../data/region.txt")
    g_id_obj=transform_id_t(g_conf.get("file","fea_id_file"))
    line_cnt=0
    single_feature_list=["region","app","time","app_type","manufacture"]
    #combine_feature_list=["combine-region-time","combine-app-region"]
    #combine_feature_list=["combine-region-time"]
    combine_feature_list=[]
    header="click\tdownload\tinstall\topen\tconsume\t"+"\t".join(single_feature_list)+"\t"+"\t".join(combine_feature_list)+"\n"
    fea_fp.write(header)
    for line in input_fp:
        line_cnt+=1
        line=line.rstrip("\r\n").split("\t")
        time_str=line[0]
        try:
            fea_json_dict=json.loads(line[2])
        except:
            logging.warning("load json failed[%d]" %(line_cnt))
            continue
        (fea_str,fea_id_str)=extract_feature(time_str,fea_json_dict,single_feature_list,combine_feature_list)                         
        ad_action_dict=json.loads(line[1])
        click=ad_action_dict["click"]
        download=ad_action_dict["download"]
        install=ad_action_dict["install"]
        app_open=ad_action_dict["open"]
        consume=ad_action_dict["cost"]
        fea_fp.write("%d\t%d\t%d\t%d\t%d\t%s\n" %(click,download,install,app_open,consume,fea_str))
        #transform to id
        if type=="train" and download==1:
            count=5
        else:
            count=1
        for i in range(0,count):
            ml_fp.write("%d %s\n" %(click,fea_id_str))
예제 #6
0
def init():
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H")
    g_conf.read("../conf/ad_svr.conf")
    logging.basicConfig(
        level=logging.INFO,
        format=
        '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
        datefmt='[%Y-%m_%d %H:%M:%S]',
        filename='../log/ad_svr.' + timestamp + '.log',
        filemode='a')
    service_para = g_conf.get("gearman", "para")
    service_name = g_conf.get("gearman", "name")
    process_num = int(g_conf.get("gearman", "process_num"))
    service_para = service_para.split(',')

    #load adx map
    adx_id_obj = adx_id_map(g_conf)
    global g_adx_interface
    g_adx_interface = adx_interface_t(adx_id_obj, g_conf)

    global g_invert_idx_mgr
    g_invert_idx_mgr = {}
    ridx_name_list = g_conf.get("index", "ridx_list").split(",")
    for name in ridx_name_list:
        obj = inverted_index_t()
        obj.load_file("../index/" + name + ".ridx")
        g_invert_idx_mgr[name] = obj
        #print obj.inverted_dict,name
    global g_idx_mgr
    g_idx_mgr = {}
    idx_name_list = g_conf.get("index", "idx_list").split(",")
    for name in idx_name_list:
        obj = index_t()
        obj.load_file("../index/" + name + ".idx")
        g_idx_mgr[name] = obj

    #init ip region
    global g_ip_obj
    g_ip_obj = ip_parse_t(g_conf.get("file", "ip_table"),
                          g_conf.get("file", "ip_region"))
    #init id obj
    global g_id_obj
    g_id_obj = transform_id_t(g_conf.get("file", "fea_id_file"))

    #init fea obj
    extract_fea_obj = extract_feature_t(g_ip_obj, g_id_obj)
    #init model obj
    model_obj = predict_model_t(g_conf.get("file", "model"))
    #init idea operator
    operator_obj = idea_operator_t(g_conf.get("file", "idea_operate"))

    #init rank bit module
    global g_rank_bid
    g_rank_bid = rank_bid_t(extract_fea_obj, model_obj, operator_obj)
    logging.info("init complete")

    #init filter
    global g_filter_obj
    g_filter_obj = filter_t(g_invert_idx_mgr, g_conf)

    return [service_para, service_name, process_num]