def record_offline_time(count):
    """
    尝试记录商户全部掉线时间
    :param count: 当前在线商户个数
    :return:
    """
    global SUMMARY_INFO
    SUMMARY_INFO["last_valid_count"] = SUMMARY_INFO.get(
        "current_valid_count", None)
    SUMMARY_INFO["current_valid_count"] = count

    if SUMMARY_INFO["last_valid_count"] and count <= 0:
        # 最近一次不同,且有非0->0,说明下线了
        SUMMARY_INFO['offline_time'] = get_now_str()

    if not SUMMARY_INFO["last_valid_count"] and count:
        SUMMARY_INFO['online_time'] = get_now_str()
Exemple #2
0
def run_lgbm_predict_submit():
    # name = 'Booster_bagging_fraction:0.85_bagging_freq:3_categorical_column:[0, 1, 2]_feature_fraction:0.7_learning_rate:0.04_max_bin:255_max_depth:8_num_leaves:200_08-13-12-16-48.pkl'
    # name = 'Booster_bagging_fraction:0.85_bagging_freq:3_categorical_column:[0, 1, 2]_feature_fraction:0.7_learning_rate:0.04_max_bin:255_max_depth:8_num_leaves:170_08-14-08-39-57.pkl'
    name = 'Booster_bagging_fraction:0.85_bagging_freq:3_categorical_column:[0, 1, 2]_feature_fraction:0.7_learning_rate:0.03_max_bin:255_max_depth:8_num_leaves:170_08-15-00-36-41.pkl'
    # name = 'Booster_bagging_fraction:0.85_bagging_freq:3_categorical_column:[0, 1, 2]_feature_fraction:0.6_learning_rate:0.03_max_bin:255_max_depth:8_num_leaves:170_08-15-02-58-34.pkl'
    predict_file = PREDICT_DIR + 'test_core_lgbm_submit.pkl'
    lgbm_predict(
        name, INPUT_DIR + 'test_diff_order_streaks_fix2_add_new.csv_core.csv',
        predict_file, True)
    convert_to_submit_max_f1(predict_file,
                             'submit_max_f1_' + utils.get_now_str() + '.csv')
Exemple #3
0
def run_convert():
    # proba_threshold = 0.53480
    # proba_threshold = 0.56982
    # proba_threshold = 0.70710
    # proba_threshold = 0.53781
    # proba_threshold = 0.70056
    # proba_threshold = 0.68611
    # proba_threshold = 0.55909
    proba_threshold = 0.5

    convert_to_submit(PREDICT_DIR + 'test_diff_stack.csv', proba_threshold,
                      'submit_' + utils.get_now_str() + '.csv')
Exemple #4
0
def validate_model_mean_f1(predict_file, save_analysis=False):
    import max_f1_predict
    begin_time = time.time()

    # df = pd.read_csv(PREDICT_DIR + predict_file)
    df = pd.read_pickle(PREDICT_DIR + predict_file)

    # df_validate = pd.read_csv(INPUT_DIR+ground_truth_file)
    # print df_validate[UID].nunique()
    df_positive = df[df.label == 1]
    # print df_positive[UID].nunique()
    df_positive = df_positive.groupby(UID)[PID].apply(lambda pids: list(
        pids)).reset_index().rename(columns={PID: 'true_products'})

    # df = pd.read_csv(PREDICT_DIR + predict_file)
    df_users = pd.DataFrame({UID: df[UID].unique()})
    df_users = df_users.merge(df_positive, how='left', on=[UID])
    for index in df_users[df_users['true_products'].isnull()].index:
        df_users.ix[index, 'true_products'] = [None]

    print 'step 1 elapsed: {}'.format(time.time() - begin_time)

    # df_pred = df.groupby(UID).apply(max_f1_predict.get_best_prediction_group).reset_index()
    df_pred = df.groupby(UID).apply(
        max_f1_predict.get_best_prediction_group_submit).reset_index()
    # print df_pred.head()

    df_users = df_users.merge(df_pred, how='left', on=[UID])

    df_users['scores'] = df_users.apply(apply_f_score, axis=1)

    if save_analysis:
        df_users.to_csv(PREDICT_DIR + '_'.join(
            ['analysis', utils.get_now_str(), predict_file]),
                        index=False)

    print 'mean_f_score: {}'.format(df_users['scores'].mean())
 def _save_stacker_train_set(S_train):
     S_train_name = 'stacker_train' + '_' + utils.get_now_str() + '.csv'
     S_train.to_csv(STACKING_DIR + S_train_name, index=False)
     print 'save stacker train set:', S_train_name
Exemple #6
0
def run_convert_to_submit_max_f1():
    # convert_to_submit_max_f1(PREDICT_DIR+'test_diff_stack', 'submit_max_f1' + utils.get_now_str() + '.csv')
    convert_to_submit_max_f1(
        PREDICT_DIR + 'validate_core_lgbm_08-13-11-00-07.pkl',
        'submit_max_f1_' + utils.get_now_str() + '.csv')