Ejemplo n.º 1
0
def run(Base_Dir_40p208, date, cid, bid, conf_dict, train_list, test_list):

    train_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date)
    train_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date)
    test_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date)
    test_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date)

    train_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_pos_processed".format(cid, bid, date)
    train_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_neg_processed".format(cid, bid, date)
    test_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_pos_processed".format(cid, bid, date)
    test_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_neg_processed".format(cid, bid, date)

    result_dir = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/".format(cid, bid, date)
    result_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/result".format(cid, bid, date)
    predict_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/predict".format(cid, bid,date)


    do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}".format(cid))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}".format(cid, bid))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}".format(cid, bid, date))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}".format(cid))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}".format(cid, bid))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}".format(cid, bid, date))

    # 本地开发
    '''
    train_pos = "./data/train_pos_dl"
    train_neg = "./data/train_neg_dl"
    test_pos = "./data/test_pos_dl"
    test_neg = "./data/test_neg_dl"
    train_pos_processed = "./data2/train_pos_processed"
    train_neg_processed = "./data2/train_neg_processed"
    test_pos_processed = "./data2/test_pos_processed"
    test_neg_processed = "./data2/test_neg_processed"
    result_dir = "./result/"
    result_file = "./result/result_file"
    predict_file = "./result/predict_file"
    '''

    process = PreProcess(train_pos, train_neg, test_pos, test_neg, result_dir, conf_dict)
    process_info_list, processed_length = process.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed)

    coef, auc = lr.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed, predict_file)

    writer = open(result_file, "w")
    writer.write("coef:\n")
    for i in range(len(processed_length)):
        writer.write("-------------------- " + process_info_list[i][1] + " --------------------\n")
        writer.write("--- " + process_info_list[i][2] + " ---\n")
        for weight in coef[0][sum(processed_length[:i]) : sum(processed_length[:i+1])]:
            writer.write("{0}\n".format(weight))
    writer.write("\nauc:\n")
    writer.write("{0}\n".format(auc))
    writer.close()

    # 将实验结果存入mysql
    save_model_lr.run(cid, bid, train_list, test_list, process_info_list, result_dir, result_file) 
Ejemplo n.º 2
0
def run(conf_dict):

    # train_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date)
    # train_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date)
    # test_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(cid, bid, date)
    # test_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(cid, bid, date)
    #
    # train_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_pos_processed".format(cid, bid, date)
    # train_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_neg_processed".format(cid, bid, date)
    # test_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_pos_processed".format(cid, bid, date)
    # test_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_neg_processed".format(cid, bid, date)
    #
    # result_dir = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/".format(cid, bid, date)
    # result_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/result".format(cid, bid, date)
    # predict_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/predict".format(cid, bid,date)
    #
    #
    # do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}".format(cid))
    # do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}".format(cid, bid))
    # do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}".format(cid, bid, date))
    # do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}".format(cid))
    # do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}".format(cid, bid))
    # do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}".format(cid, bid, date))

    # 本地开发

    DIR = os.path.abspath(os.curdir +"/../");
    train_pos = '%s/data/train_pos' % DIR
    train_neg = "%s/data/train_neg" % DIR
    test_pos = "%s/data/test_pos" % DIR
    test_neg = "%s/data/test_neg" % DIR
    train_pos_processed = "%s/data2/train_pos_processed" % DIR
    train_neg_processed = "%s/data2/train_neg_processed" % DIR
    test_pos_processed = "%s/data2/test_pos_processed" % DIR
    test_neg_processed = "%s/data2/test_neg_processed" % DIR
    result_dir = "%s/result/" % DIR
    result_file = "%s/result/result_file" % DIR
    predict_file = "%s/result/predict_file" % DIR


    process = PreProcess(train_pos, train_neg, test_pos, test_neg, result_dir, conf_dict)
    process_info_list, processed_length = process.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed)

    coef, auc = lr.run(train_pos_processed, train_neg_processed, test_pos_processed, test_neg_processed, predict_file)

    writer = open(result_file, "w")
    writer.write("coef:\n")
    for i in range(len(processed_length)):
        writer.write("-------------------- " + process_info_list[i][1] + " --------------------\n")
        writer.write("--- " + process_info_list[i][2] + " ---\n")
        for weight in coef[0][sum(processed_length[:i]) : sum(processed_length[:i+1])]:
            writer.write("{0}\n".format(weight))
    writer.write("\nauc:\n")
    print 'auc:',auc
    writer.write("{0}\n".format(auc))
    writer.close()
Ejemplo n.º 3
0
def run(Base_Dir_40p208, date, cid, bid, conf_dict, train_list, test_list):

    train_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(
        cid, bid, date)
    train_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(
        cid, bid, date)
    test_pos = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_pos".format(
        cid, bid, date)
    test_neg = Base_Dir_40p208 + "/../data/lr/feature_set/{0}/{1}/{2}/train_neg".format(
        cid, bid, date)

    train_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_pos_processed".format(
        cid, bid, date)
    train_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/train_neg_processed".format(
        cid, bid, date)
    test_pos_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_pos_processed".format(
        cid, bid, date)
    test_neg_processed = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/test_neg_processed".format(
        cid, bid, date)

    result_dir = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/".format(
        cid, bid, date)
    result_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/result".format(
        cid, bid, date)
    predict_file = Base_Dir_40p208 + "/../data/lr/result/{0}/{1}/{2}/predict".format(
        cid, bid, date)

    do_mkdir(Base_Dir_40p208 + "/../data/lr/feature_set/{0}".format(cid))
    do_mkdir(Base_Dir_40p208 +
             "/../data/lr/feature_set/{0}/{1}".format(cid, bid))
    do_mkdir(Base_Dir_40p208 +
             "/../data/lr/feature_set/{0}/{1}/{2}".format(cid, bid, date))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}".format(cid))
    do_mkdir(Base_Dir_40p208 + "/../data/lr/result/{0}/{1}".format(cid, bid))
    do_mkdir(Base_Dir_40p208 +
             "/../data/lr/result/{0}/{1}/{2}".format(cid, bid, date))

    # 本地开发
    '''
    train_pos = "./data/train_pos_dl"
    train_neg = "./data/train_neg_dl"
    test_pos = "./data/test_pos_dl"
    test_neg = "./data/test_neg_dl"
    train_pos_processed = "./data2/train_pos_processed"
    train_neg_processed = "./data2/train_neg_processed"
    test_pos_processed = "./data2/test_pos_processed"
    test_neg_processed = "./data2/test_neg_processed"
    result_dir = "./result/"
    result_file = "./result/result_file"
    predict_file = "./result/predict_file"
    '''

    process = PreProcess(train_pos, train_neg, test_pos, test_neg, result_dir,
                         conf_dict)
    process_info_list, processed_length = process.run(train_pos_processed,
                                                      train_neg_processed,
                                                      test_pos_processed,
                                                      test_neg_processed)

    coef, auc = lr.run(train_pos_processed, train_neg_processed,
                       test_pos_processed, test_neg_processed, predict_file)

    writer = open(result_file, "w")
    writer.write("coef:\n")
    for i in range(len(processed_length)):
        writer.write("-------------------- " + process_info_list[i][1] +
                     " --------------------\n")
        writer.write("--- " + process_info_list[i][2] + " ---\n")
        for weight in coef[0][sum(processed_length[:i]
                                  ):sum(processed_length[:i + 1])]:
            writer.write("{0}\n".format(weight))
    writer.write("\nauc:\n")
    writer.write("{0}\n".format(auc))
    writer.close()

    # 将实验结果存入mysql
    save_model_lr.run(cid, bid, train_list, test_list, process_info_list,
                      result_dir, result_file)