def single_prediction(city,
                      state,
                      predictors,
                      predict_n,
                      look_back,
                      hidden,
                      epochs,
                      predict=True,
                      doenca='chick'):
    """
    Fit an LSTM model to generate predictions for a city, Using its cluster as regressors.
    :param city: geocode of the target city
    :param state: State containing the city
    :param predict_n: How many weeks ahead to predict
    :param look_back: Look-back time window length used by the model
    :param hidden: Number of hidden layers in each LSTM unit
    :param epochs: Number of epochs of training
    :param predict: Only generate predictions
    :param random: If the model should be trained on a random selection of ten cities of the same state.
    :return:
    """

    with open("../../analysis/clusters_{}.pkl".format(state), "rb") as fp:
        clusters = pickle.load(fp)
    data, group = get_cluster_data(geocode=city,
                                   clusters=clusters,
                                   data_types=DATA_TYPES,
                                   cols=predictors,
                                   doenca=doenca)
    data = data['2016-01-01':]
    x = data.index.shift(predict_n, freq='W')
    x = [i.date() for i in x]
    indice = list(data.index)
    indice = [i.date() for i in indice]

    city_name = get_city_names([city, 0])[0][1]
    if predict:
        ratio = 1
    else:
        ratio = 0.7

    if cluster:
        target_col = list(data.columns).index("casos_est_{}".format(city))
    else:
        target_col = list(data.columns).index("casos_est")
    norm_data, max_features = normalize_data(data)
    factor = max_features[target_col]
    ## split test and train
    X_train, Y_train, X_test, Y_test = split_data(
        norm_data,
        look_back=look_back,
        ratio=ratio,
        predict_n=predict_n,
        Y_column=target_col,
    )

    model = load_model("../saved_models/LSTM/{}/lstm_{}_epochs_{}.h5".format(
        state, city, epochs))
    predicted = np.stack(
        [model.predict(X_train, batch_size=1, verbose=1) for i in range(100)],
        axis=2)

    df_predicted = pd.DataFrame(np.percentile(predicted, 50, axis=2))
    df_predicted25 = pd.DataFrame(np.percentile(predicted, 2.5, axis=2))
    df_predicted975 = pd.DataFrame(np.percentile(predicted, 97.5, axis=2))

    plot_prediction(pred=df_predicted,
                    pred25=df_predicted25,
                    pred975=df_predicted975,
                    x=x,
                    ydata=Y_train,
                    factor=factor,
                    horizon=predict_n,
                    title="{}".format(city_name),
                    doenca=doenca)

    return predicted, indice, X_test, Y_test, Y_train, factor
Esempio n. 2
0
                     verbose=1)
    model.save_weights('trained_lstm_model.h5', overwrite=overwrite)
    return hist


if __name__ == "__main__":
    HIDDEN = 256
    TIME_WINDOW = 12
    BATCH_SIZE = 1
    prediction_window = 2  # weeks
    # data = get_example_table(3304557) #Nova Iguaçu: 3303500
    # data = get_complete_table(3304557)
    data = build_multicity_dataset('RJ')
    print(data.shape)
    target_col = list(data.columns).index('casos_est_3303500')
    time_index = data.index
    norm_data = normalize_data(data)
    print(norm_data.columns, norm_data.shape)
    # norm_data.casos_est.plot()
    # P.show()
    X_train, Y_train, X_test, Y_test = split_data(norm_data,
                                                  look_back=TIME_WINDOW,
                                                  ratio=.7,
                                                  predict_n=prediction_window,
                                                  Y_column=target_col)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    model = build_model(HIDDEN, X_train.shape[2], TIME_WINDOW, BATCH_SIZE)
    history = train(model, X_train, Y_train, batch_size=1, epochs=30)
    model.save('mlp_model')
Esempio n. 3
0
def train_evaluate_model(city,
                         data,
                         predict_n,
                         look_back,
                         hidden,
                         epochs,
                         ratio=0.7,
                         cluster=True,
                         load=False,
                         uncertainty=True):
    """
    Train the model
    :param city: Name of the city
    :param data: Dataset
    :param predict_n: Number of steps ahead to be predicted
    :param look_back: number of history steps to include in training window
    :param hidden: Number of Hidden layer
    :param epochs: number of training epochs
    :param ratio: ratio of the full dataset to use in training
    :param cluster: whether to train on features from the city's cluster
    :param load: Whether to load a previously saved model
    :return:
    """
    if cluster:
        target_col = list(data.columns).index("casos_est_{}".format(city))
    else:
        target_col = list(data.columns).index("casos_est")
    norm_data, max_features = normalize_data(data)
    factor = max_features[target_col]

    ##split test and train
    X_train, Y_train, X_test, Y_test = split_data(
        norm_data,
        look_back=look_back,
        ratio=ratio,
        predict_n=predict_n,
        Y_column=target_col,
    )
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    ## Run model
    model = build_model(hidden,
                        X_train.shape[2],
                        predict_n=predict_n,
                        look_back=look_back)
    if load:
        model.load_weights("trained_{}_model.h5".format(city))
    history = train(model,
                    X_train,
                    Y_train,
                    batch_size=1,
                    epochs=epochs,
                    geocode=city)
    model.save('../saved_models/LSTM/{}/lstm_{}_epochs_{}.h5'.format(
        STATE, city, epochs))

    predicted_out, metrics_out = evaluate(
        city,
        model,
        X_test,
        Y_test,
        label="out_of_sample_{}".format(city),
        uncertainty=uncertainty)
    predicted_in, metrics_in = evaluate(city,
                                        model,
                                        X_train,
                                        Y_train,
                                        label="in_sample_{}".format(city),
                                        uncertainty=uncertainty)
    if uncertainty:
        pout = np.percentile(predicted_out, 50, axis=2)
    else:
        pout = predicted_out
    metrics = calculate_metrics(pout, Y_test, factor)
    metrics.to_pickle("../saved_models/LSTM/{}/metrics_lstm_{}_8pw.pkl".format(
        STATE, city))

    predicted = np.concatenate((predicted_in, predicted_out), axis=0)
    with open(
            "../saved_models/LSTM/{}/predicted_lstm_{}_8pw.pkl".format(
                STATE, city), "wb") as f:
        pickle.dump(predicted, f)

    return predicted, X_test, Y_test, Y_train, factor
Esempio n. 4
0
def train_evaluate_model(
    city,
    data,
    predict_n,
    look_back,
    hidden,
    epochs,
    ratio=0.7,
    cluster=True,
    load=False,
):
    """
    Train the model
    :param city:
    :param data:
    :param predict_n:
    :param look_back:
    :param hidden:
    :param plot:
    :param epochs:
    :param ratio:
    :param cluster:
    :param load: Whether to load a previously saved model
    :return:
    """
    if cluster:
        target_col = list(data.columns).index("casos_est_{}".format(city))
    else:
        target_col = list(data.columns).index("casos_est")
    norm_data, max_features = normalize_data(data)
    factor = max_features[target_col]

    ##split test and train
    X_train, Y_train, X_test, Y_test = split_data(
        norm_data,
        look_back=look_back,
        ratio=ratio,
        predict_n=predict_n,
        Y_column=target_col,
    )
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    ## Run model
    model = build_model(hidden,
                        X_train.shape[2],
                        predict_n=predict_n,
                        look_back=look_back)
    if load:
        model.load_weights("trained_{}_model.h5".format(city))
    history = train(model,
                    X_train,
                    Y_train,
                    batch_size=1,
                    epochs=epochs,
                    geocode=city)
    # model.save('../saved_models/lstm_{}_epochs_{}.h5'.format(city, epochs))

    predicted_out, metrics_out = evaluate(
        city, model, X_test, Y_test, label="out_of_sample_{}".format(city))
    predicted_in, metrics_in = evaluate(city,
                                        model,
                                        X_train,
                                        Y_train,
                                        label="in_sample_{}".format(city))

    metrics = calculate_metrics(predicted_out, Y_test, factor)
    metrics.to_pickle("../saved_models/LSTM/{}/metrics_lstm_{}_8pw.pkl".format(
        STATE, city))

    predicted = np.concatenate((predicted_in, predicted_out), axis=0)
    with open(
            "../saved_models/LSTM/{}/predicted_lstm_{}_8pw.pkl".format(
                STATE, city), "wb") as f:
        pickle.dump(predicted, f)

    return predicted, X_test, Y_test, Y_train, factor