def combine_models(trained_models,
                   df,
                   y_target,
                   y_inj,
                   lable,
                   log_vote=True,
                   plot=True):
    y_test = asmatrix(y_inj)
    log_pred = {}
    linear_pred = {}
    for col in trained_models:
        model = trained_models[col]
        x_test = df[col].as_matrix().reshape(-1, 1)
        log_pred[col] = model.log_reg.predict_proba(x_test)[:,
                                                            1].reshape(-1, 1)
        linear_pred[col] = model.linear_reg.predict(
            np.log(x_test + model.eps)).reshape(-1, 1)

    a_log_pred = np.mean(np.hstack(log_pred.values()), axis=1)

    a_linear_pred = np.mean(np.hstack(linear_pred.values()), axis=1)
    ll = []
    for _, model in trained_models.items():
        if log_vote:

            result = model.mixl(y_test, a_log_pred, a_linear_pred)
        else:
            result = model.mixl(y_test, log_pred[_], linear_pred[_])
        ll.append(result)
    combined_result = np.mean(np.hstack(ll), axis=1)

    if plot:
        test_plot(combined_result, lable, y_test)
    pr, recall = precision_recall(combined_result, lable)
    return roc_metric(combined_result, lable), pr[0]
def test(trained_model,
         target_station,
         k_station,
         test_data,
         y_inj,
         t_lbl,
         plot=True):
    y, x = asmatrix(
        test_data[target_station]), test_data[k_station].as_matrix()
    ll_test = trained_model.predict(x=x, y=y_inj)
    if plot:
        test_plot(ll_test, t_lbl, asmatrix(y_inj))
    pr = ap(pred=ll_test, obs=t_lbl)

    return roc_metric(ll_test, t_lbl, False), pr
def main():
    df = pd.read_csv('sampledata.csv')

    x, y = df.ix[:, 2:].as_matrix(), df.ix[:, 1:2].as_matrix()

    # zero-one label
    print("Training sets.")
    y_binary = (y > 0.0).astype(int)
    model = MixLinearModel(linear_reg=Ridge(alpha=0.5))
    model.fit(x=x, y=y)

    dt, lbl = synthetic_fault(y, True)
    ll_ob = model.predict(x, y=dt)
    print(roc_metric(ll_ob, lbl, False))

    ## Join stations
    models = {}
    colmn = df.columns[2:]
    roc = {}
    predictions = []
    for col in colmn:
        train_col = df[col].as_matrix().reshape(-1, 1)
        models[col] = MixLinearModel(linear_reg=Ridge(alpha=0.5)).fit(
            x=train_col, y=y)
        # plt.subplot(3,2,2)

        predictions.append(models[col].predict(train_col, y=dt))
        roc[col] = evaluate_model(models[col], train_col, dt, lbl)
    pred = np.hstack(predictions)
    print
    "AUC of average likelihood"
    print
    roc_metric(np.sum(pred, axis=1), lbl)
    print
    "AUC of individual stations"
    print
    roc

    result = combine_models(models, df, dt)
    print
    roc_metric(result, lbl)
    print
    "Experiment on testing data."
    # Testing.
    test_data = pd.read_csv('sampletahmo_test.csv')
    x_t, y_t = test_data.ix[:, 2:].as_matrix(), test_data.ix[:,
                                                             1:2].as_matrix()
    y_insert, t_lbl = synthetic_fault(y_t)
    ll_test = model.predict(x=x_t, y=y_insert)
    print
    roc_metric(ll_test, t_lbl, False)

    print
    "AUC of test dataset for 2017"
    # #test_roc = roc_metric(ll_test, t_lbl, plot=True)
    roc_test = {}
    t_predictions = []
    for col in colmn:
        roc_test[col] = evaluate_model(
            models[col], test_data[col].as_matrix().reshape(-1, 1), y_t, t_lbl)
        t_predictions.append(models[col].predict(
            test_data[col].as_matrix().reshape(-1, 1), y=y_t))
    # ll_aggregate =
    print
    roc_metric(np.mean(np.hstack(t_predictions), axis=1), t_lbl)
    print
    "AUC of individual stations "
    print
    roc_test

    ## Combined src
    result = combine_models(models, test_data, y_t)
    print
    roc_metric(result, t_lbl)
def evaluate_model(trained_model, x_test, y_test, lbl):
    ll_ob = trained_model.predict(x_test, y=y_test)
    return roc_metric(ll_ob, lbl)
def group_detection(target_station, k=3):
    alpha = 0.05
    # k = 3
    train_result = {}
    train_result['station'] = target_station
    train_result['num_k'] = k
    train_result['anom'] = alpha

    # plt.subplot(211)
    # plt.title(target_station)
    # plt.xlabel('2016')
    y_train, groups, lbl = synthetic_groups(train_data[target_station],
                                            plot=False,
                                            alpha=alpha,
                                            threshold=2.0)

    model, k_station = train(target_station=target_station,
                             num_k=k,
                             train_data=train_data,
                             pairwise=False)
    # print "Training accuracy"

    ll_score = test_evaluate_group(trained_model=model,
                                   k_station=k_station,
                                   test_data=train_data,
                                   y_inj=y_train)

    # evaluate performance on event detection.
    # 1. Give max score to each element in the group
    injected_group = groups["injected_group"].keys()
    mx_ll_score = ll_score.copy()

    for ig in injected_group:
        ix_g = groups["group_events"][ig]
        ix_g = [ix for ix in ix_g if lbl[ix] == 1]
        max_score = np.max(mx_ll_score[ix_g])
        mx_ll_score[ix_g] = max_score

    # 2. detect colllective group with abnormal events.
    # plt.show()

    # print "with out group"
    train_result["auc_train"] = roc_metric(ll_score, lbl)
    train_result["pr_train"] = ap(ll_score, lbl)
    # print "With group"
    train_result["auc_train_grp"] = roc_metric(mx_ll_score, lbl)
    train_result["pr_train_grp"] = ap(mx_ll_score, lbl)

    # print "\n---------- Testing data ---------\n"
    try:
        y_train, tgroups, lblt = synthetic_groups(test_data[target_station],
                                                  plot=False,
                                                  alpha=alpha,
                                                  threshold=2.0)

        ll_score_test = test_evaluate_group(trained_model=model,
                                            k_station=k_station,
                                            test_data=test_data,
                                            y_inj=y_train)
        # test_plot(ll_score_test, lblt, y_train)
    except Exception as ex:
        print
        ex.message
        return train_result
    tmx_ll_score = ll_score_test.copy()
    for ig in tgroups["injected_group"].keys():
        ix_g = tgroups["group_events"][ig]
        max_score = np.max(tmx_ll_score[ix_g])
        tmx_ll_score[ix_g] = max_score

    # print "with out group"
    train_result["auc_test"] = roc_metric(ll_score_test,
                                          lblt)  # , ap(ll_score_test, lblt)
    train_result["pr_test"] = ap(ll_score_test, lblt)
    # print "With group"
    train_result["auc_test_grp"] = roc_metric(tmx_ll_score,
                                              lblt)  # , ap(tmx_ll_score, lblt)
    train_result["pr_test_grp"] = ap(tmx_ll_score, lblt)
    # print "With group"
    # plt.show()
    test_plot(tmx_ll_score, lblt, asmatrix(y_train))
    # plt.show()
    return train_result
Esempio n. 6
0
    dt = observations.copy()
    abnormal_report = range(200, 210)
    rainy_days = range(107, 117)
    dt[abnormal_report] = 20.0
    dt[rainy_days] = 0.0
    faulty_day = abnormal_report + rainy_days
    lbl = np.zeros([dt.shape[0]])
    lbl[faulty_day] = 1.0
    return dt, lbl


#plt.subplot(321)
print "injected faults"
dt, lbl = synthetic_fault(y, True)
ll_ob = model.predict(x, y=dt)
print roc_metric(ll_ob, lbl, False)
#src.residual_plot(np.log(observed_value+src.eps), np.log(y+src.eps), fitted_value)
#print roc_metric()
#yhat = -np.log(src.predict(x, y))


def plot_synthetic(dt, y):
    plt.plot(dt, '.r', label='inserted faults')
    plt.plot(y, '.b', label='ground truth')
    plt.xlabel('Days')
    plt.ylabel('Rainall mm')
    plt.legend(loc='best')
    plt.show()


def evaluate_model(trained_model, x_test, y_test, lbl):