Beispiel #1
0
virus_name = "measles"
species = ["Avian", "Ebola", "Lassa", "Measles", "Mumps", "Zika"]
tree_fig = {}
mapbox_access_token = "pk.eyJ1IjoicGxvdGx5bWFwYm94IiwiYSI6ImNrOWJqb2F4djBnMjEzbG50amg0dnJieG4ifQ.Zme1-Uzoi75IaFbieBDl3A"

tree_file, metadata_file, metadata_file_stat = create_paths_file(virus_name,
                                                                 level1="",
                                                                 level2="",
                                                                 level3="")

# To know the minimum and the maximum values of date for slicer
df_stat_metadata = pd.read_csv(metadata_file_stat)
min_date, max_date = min_max_date(df_stat_metadata)

# create the dictionary of slider
marks_data = slicer(min_date, max_date)
min_max_date_value = [min_date, max_date]

fig = create_tree(virus_name, tree_file, metadata_file, "Country")
tree_fig[tree_file] = fig

fig_map_bubble = create_map_bubble_year(virus_name, metadata_file_stat, 2,
                                        min_date, max_date)

fig_curve_line = create_curve_line(df_stat_metadata, virus_name, min_date,
                                   max_date)

######################################### MAIN APP #########################################
app.layout = html.Div([
    # Banner display
    html.Div(
Beispiel #2
0
def _update_slicer(
    virus_name,
    mumps,
    dengue,
    lassa,
    avian_opt1,
    avian_opt2,
    flu_opt1,
    flu_opt2,
    flu_opt3,
):
    virus_name = virus_name.lower()
    if virus_name == "ebola" or virus_name == "zika" or virus_name == "measles":
        (
            tree_file_filtred,
            metadata_file_filtred,
            metadata_file_stat_filtred,
        ) = create_paths_file(virus_name, level1="", level2="", level3="")
    elif virus_name == "mumps":
        (
            tree_file_filtred,
            metadata_file_filtred,
            metadata_file_stat_filtred,
        ) = create_paths_file(virus_name, level1=mumps, level2="", level3="")
    elif virus_name == "dengue":
        (
            tree_file_filtred,
            metadata_file_filtred,
            metadata_file_stat_filtred,
        ) = create_paths_file(virus_name, level1=dengue, level2="", level3="")
    elif virus_name == "lassa":
        (
            tree_file_filtred,
            metadata_file_filtred,
            metadata_file_stat_filtred,
        ) = create_paths_file(virus_name, level1=lassa, level2="", level3="")
    elif virus_name == "avian":
        (
            tree_file_filtred,
            metadata_file_filtred,
            metadata_file_stat_filtred,
        ) = create_paths_file(virus_name,
                              level1=avian_opt1,
                              level2=avian_opt2,
                              level3="")
    elif virus_name == "flu":
        (
            tree_file_filtred,
            metadata_file_filtred,
            metadata_file_stat_filtred,
        ) = create_paths_file(virus_name,
                              level1=flu_opt1,
                              level2=flu_opt2,
                              level3=flu_opt3)
    df = pd.read_csv(metadata_file_stat_filtred)
    min_date, max_date = min_max_date(df)
    # create the dictionary of slider
    marks_data = slicer(min_date, max_date)
    min_max_date_value = [min_date, max_date]

    # To select only the data between min_date and max_date
    df = df[df["Year"] >= min_date]
    df = df[df["Year"] <= max_date]
    return dcc.RangeSlider(
        id="id-year",
        min=min_date,
        max=max_date,
        step=1,
        marks=marks_data,
        value=min_max_date_value,
    )
Beispiel #3
0
from utils import slicer, split

# cd simulations

dt_fl = "nn_data.h5"
dt_dst = "scaled_data"

n_train = 0.8
n_valid = 0.1

# Open data file
f = h5py.File(dt_fl, "r")
dt = f[dt_dst]

idxs = split(dt.shape[0], n_train, n_valid)
slc_trn, slc_vld, slc_tst = slicer(dt.shape, idxs)

trn = dt[slc_trn][:, :, :, np.newaxis]
vld = dt[slc_vld][:, :, :, np.newaxis]

act = 'tanh'
cnt_mm = cnt.MinMaxNorm(min_value=-1, max_value=2)
# Encoder
tf.keras.backend.clear_session()
inputs = layers.Input(shape=(200, 100, 1))
ed = fconv(inputs, 4, 2, 3)
e = fconv(ed, 3, 2, 9)
e = fconv(e, 3, 5, 27)

# Latent space
# l = layers.Flatten()(e)
def objective(trial):

    # Open data file
    f_in = h5py.File(DT_FL_IN, "r")
    dt_in = f_in[DT_DST_IN]

    f_out = h5py.File(DT_FL_OUT, "r")
    dt_out = f_out[DT_DST_OUT]

    WD = 2
    # Dummy y_data
    x_data, _ = format_data(dt_in, wd=WD, get_y=True)
    _, y_data = format_data(dt_out, wd=WD, get_y=True)
    x_data = np.squeeze(x_data)

    # Split data and get slices
    idxs = split(x_data.shape[0],
                 N_TRAIN,
                 N_VALID,
                 test_last=dt_in.attrs["idx"])
    slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs)

    # Get data
    x_train = x_data[slc_trn[0]]
    y_train = y_data[slc_trn[0]]
    x_val = x_data[slc_vld[0]]
    y_val = y_data[slc_vld[0]]

    conv_shape = y_train.shape[1:3]
    # Strides cfg
    strd = [2, 2, 5, 5]

    # Limits and options
    epochs = 60
    # Filters
    flt_lm = [[4, 128], [4, 128], [4, 128]]
    d_lm = [1, 50]
    # Kernel
    k_lm = [3, 5]
    # Regularizer
    l2_lm = [1e-7, 1e-3]
    # Activation functions
    act_opts = ["relu", "elu", "tanh", "linear"]
    # Latent space cfg
    lt_sz = [5, 150]
    lt_dv = [0.3, 0.7]
    # Learning rate
    lm_lr = [1e-5, 1e-1]

    # Clear tensorflow session
    tf.keras.backend.clear_session()
    # Input
    inputs = layers.Input(shape=x_train.shape[1:])
    d = inputs
    # Decoder
    n_layers = trial.suggest_int("n_layers", 1, 3)
    flt = trial.suggest_int("nl_flt", d_lm[0], d_lm[1])
    # Reduction from output
    red = np.prod(strd[:n_layers])
    # Decoder first shape
    lt_shp = (np.array(conv_shape) / red).astype(int)
    # Decoder dense size
    n_flat = np.prod(lt_shp) * flt
    # Format stride list
    strd = strd[::-1][-n_layers:]
    # Latent -> Decoder layer
    # Activation
    act_lt = trial.suggest_categorical("lt_activation", act_opts)
    # Regularization
    l2_lt = int(trial.suggest_loguniform("lt_l2", l2_lm[0], l2_lm[1]))
    l2_reg = regularizers.l2(l=l2_lt)
    # Flat input to the decoder
    d = layers.Dense(n_flat,
                     activation=act_lt,
                     kernel_regularizer=l2_reg,
                     name="l1_dense_decoder")(inputs)
    # Reshape to the output of the encoder
    d = layers.Reshape(list(lt_shp) + [flt])(d)
    # Generate the convolutional layers
    for i in range(n_layers):
        # Get number of filters
        flt = trial.suggest_int("n{}_flt".format(i), flt_lm[i][0],
                                flt_lm[i][1])
        # Get the kernel size
        k_sz = trial.suggest_categorical("d{}_kernel_size".format(i), k_lm)
        # Get the activation function
        act = trial.suggest_categorical("d{}_activation".format(i), act_opts)
        # Regularization value
        l2 = trial.suggest_loguniform("d{}_l2".format(i), l2_lm[0], l2_lm[1])
        l2_reg = regularizers.l2(l=l2)
        # Convolutional layer
        d = layers.Conv2DTranspose(
            flt,
            (k_sz, k_sz),
            strides=strd[i],
            activation=act,
            padding="same",
            kernel_regularizer=l2_reg,
            name="{}_decoder".format(i + 1),
        )(d)
        dp = 0
        # Dropout layers
        if dp > 0:
            d = layers.Dropout(dp, name="{}_dropout_decoder".format(i + 1))(d)

    decoded = layers.Conv2DTranspose(
        y_train.shape[3],
        (5, 5),
        activation="linear",
        padding="same",
        name="output_decoder",
    )(d)

    ae = Model(inputs, decoded, name="Decoder_nxt")

    # Earling stopping monitoring the loss of the validation dataset
    monitor = "val_loss_norm_error"
    patience = int(epochs * 0.3)
    es = EarlyStopping(monitor=monitor,
                       mode="min",
                       patience=patience,
                       restore_best_weights=True)

    opt = "adam"
    if opt == "adam":
        k_optf = optimizers.Adam
    elif opt == "nadam":
        k_optf = optimizers.Nadam
    elif opt == "adamax":
        k_optf = optimizers.Adamax

    lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1])
    if lr > 0:
        k_opt = k_optf(learning_rate=lr)
    else:
        k_opt = k_optf()

    ae.compile(optimizer=k_opt,
               loss=loss_norm_error,
               metrics=["mse", loss_norm_error])

    batch_size = int(trial.suggest_uniform("batch_sz", 2, 32))
    ae.summary()
    hist = ae.fit(
        x_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        shuffle=True,
        validation_data=(x_val, y_val),
        callbacks=[KerasPruningCallback(trial, "val_loss_norm_error"), es],
        verbose=1,
    )

    txt = PREFIX + SUFFIX
    ae.save(txt.format(RUN_VERSION, trial.number))
    return min(hist.history["val_loss_norm_error"])
Beispiel #5
0
n_valid = 0.1

# Select the variable to train
# 0: Temperature - 1: Pressure - 2: Velocity - None: all
var = 2

# %%
# Open data file
f = h5py.File(dt_fl, "r")
dt = f[dt_dst]

x_data, y_data = format_data(dt, wd=3, var=2, get_y=True, cont=True)

# Split data file
idxs = split(x_data.shape[0], n_train, n_valid)
slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs)
# Slice data
x_train = x_data[slc_trn]
x_val = x_data[slc_vld]

slc_trn, slc_vld, slc_tst = slicer(y_data.shape, idxs)
y_train = y_data[slc_trn]
y_val = y_data[slc_vld]

# %%
# LSTM neural network settings

# Activation function
act = "tanh"  # Convolutional layers activation function
# Number of filters of each layer
flt = [20, 20, 20, 30]
Beispiel #6
0
def objective(trial):

    # Open data file
    f = h5py.File(DT_FL, "r")
    dt = f[DT_DST]

    # Split data and get slices
    idxs = split(dt.shape[0], N_TRAIN, N_VALID)
    slc_trn, slc_vld, slc_tst = slicer(dt.shape, idxs)

    # Get data
    x_train = dt[slc_trn]
    x_val = dt[slc_vld]

    # Limits and options
    # Filters
    # flt_lm = [4, 128]
    flt_lm = [[4, 128], [4, 128], [4, 128]]
    # Kernel
    k_lm = [3, 5]
    # Regularizer
    l2_lm = [1e-7, 1e-3]
    # Activation functions
    act_opts = ["relu", "elu", "tanh", "linear"]
    # Latent space cfg
    lt_sz = [5, 150]
    lt_dv = [0.3, 0.7]
    # Learning rate
    lm_lr = [1e-5, 1e-2]

    # Clear tensorflow session
    tf.keras.backend.clear_session()
    # Input
    inputs = layers.Input(shape=x_train.shape[1:])
    e = inputs
    # Encoder
    flt, k_sz, act, l2 = [], [], [], []
    strd = [2, 2, 5]
    # n_layers = trial.suggest_int("n_layers", 2, 3)
    n_layers = 3
    for i in range(n_layers):
        # Get values
        flt += [
            trial.suggest_int("n{}_flts".format(i), flt_lm[i][0], flt_lm[i][1])
        ]
        k_sz += [trial.suggest_categorical("e{}_kernel_size".format(i), k_lm)]
        act += [
            trial.suggest_categorical("e{}_activation".format(i), act_opts)
        ]
        l2 += [
            trial.suggest_loguniform("e{}_l2".format(i), l2_lm[0], l2_lm[1])
        ]
        l2_reg = regularizers.l2(l=l2[-1])
        # l2_reg = regularizers.l2(l=0)
        # Set layer
        e = layers.Conv2D(
            flt[-1],
            (k_sz[-1], k_sz[-1]),
            strides=strd[i],
            activation=act[-1],
            padding="same",
            kernel_regularizer=l2_reg,
            name="{}_encoder".format(i + 1),
        )(e)
        # Add layers
        if i == 0:
            ed = layers.Conv2D(
                1,
                (1, 1),
                padding="same",
                kernel_regularizer=l2_reg,
                name="l2_input".format(i),
            )(e)
        # Dropout
        dp = 0
        if dp > 0:
            e = layers.Dropout(dp, name="{}_dropout_encoder".format(i + 1))(e)

    # Latent space
    act_lt = trial.suggest_categorical("lt_activation", act_opts)
    l2_lt = int(trial.suggest_loguniform("lt_l2", l2_lm[0], l2_lm[1]))
    l2_reg = regularizers.l2(l=l2_lt)
    sz_lt = trial.suggest_int("lt_sz", lt_sz[0], lt_sz[1])
    dv_lt = trial.suggest_uniform("lt_div", lt_dv[0], lt_dv[1])
    # Dense latent sizes
    latent_1 = int(sz_lt * dv_lt)
    latent_2 = sz_lt - latent_1

    lt1 = layers.Flatten()(e)
    lt1 = layers.Dense(latent_1,
                       activation=act_lt,
                       kernel_regularizer=l2_reg,
                       name="l1_latent")(lt1)

    lt2 = layers.Flatten()(ed)
    lt2 = layers.Dense(latent_2,
                       activation=act_lt,
                       kernel_regularizer=l2_reg,
                       name="l2_latent")(lt2)

    # Dencoder
    # Flat input to the decoder
    n_flat = np.prod(backend.int_shape(e)[1:])
    d = layers.Dense(n_flat,
                     activation=act_lt,
                     kernel_regularizer=l2_reg,
                     name="l1_dense_decoder")(lt1)
    # Consider uses only one filter with convolution
    # Reshape to the output of the encoder
    d = layers.Reshape(backend.int_shape(e)[1:])(d)
    # Generate the convolutional layers
    for i in range(n_layers):
        # Settings index
        j = -i - 1
        # Set the regularizer
        l2_reg = regularizers.l2(l=l2[j])
        # Add the latent space
        if i == n_layers - 1:
            d1 = layers.Dense(
                5000,
                activation="linear",
                kernel_regularizer=l2_reg,
                name="l2_dense_decoder",
            )(lt2)
            d1 = layers.Reshape(backend.int_shape(ed)[1:],
                                name="l2_reshape_decoder")(d1)
            d1 = layers.Conv2D(
                flt[j + 1],
                (1, 1),
                padding="same",
                name="l2_compat_decoder",
                kernel_regularizer=l2_reg,
            )(d1)
            d = layers.Add()([d1, d])
        # Convolutional layer
        d = layers.Conv2DTranspose(
            flt[j],
            (k_sz[j], k_sz[j]),
            strides=strd[j],
            activation=act[j],
            padding="same",
            kernel_regularizer=l2_reg,
            name="{}_decoder".format(i + 1),
        )(d)
        # Dropout layers
        if dp > 0:
            d = layers.Dropout(dp, name="{}_dropout_decoder".format(i + 1))(d)

    decoded = layers.Conv2DTranspose(
        x_train.shape[-1],
        (5, 5),
        activation="linear",
        padding="same",
        kernel_regularizer=l2_reg,
        name="output_decoder",
    )(d)

    ae = Model(inputs, decoded, name="auto_encoder_add")

    opt = "adam"
    if opt == "adam":
        k_optf = optimizers.Adam
    elif opt == "nadam":
        k_optf = optimizers.Nadam
    elif opt == "adamax":
        k_optf = optimizers.Adamax

    lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1])
    if lr > 0:
        k_opt = k_optf(learning_rate=lr)
    else:
        k_opt = k_optf()

    ae.compile(optimizer=k_opt,
               loss=loss_norm_error,
               metrics=["mse", loss_norm_error])

    batch_size = int(trial.suggest_uniform("batch_sz", 2, 32))
    ae.summary()
    hist = ae.fit(
        x_train,
        x_train,
        epochs=30,
        batch_size=batch_size,
        shuffle=True,
        validation_data=(x_val, x_val),
        callbacks=[KerasPruningCallback(trial, "val_loss_norm_error")],
        verbose=1,
    )

    txt = PREFIX + SUFFIX
    ae.save(txt.format(RUN_VERSION, trial.number))
    return hist.history["val_loss_norm_error"][-1]
def objective(trial):

    # Open data file
    f = h5py.File(DT_FL, "r")
    dt = f[DT_DST]

    # Format data for LSTM training
    x_data, y_data = format_data(dt, wd=WD, get_y=True)

    x_data = np.squeeze(x_data)
    # Split data and get slices
    idxs = split(x_data.shape[0], N_TRAIN, N_VALID)
    slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs)

    # Get data
    x_train = x_data[slc_trn[0]]
    y_train = y_data[slc_trn[0]] - x_train
    x_val = x_data[slc_vld[0]]
    y_val = y_data[slc_vld[0]] - x_val

    # Limits and options
    # Filters
    # n_lstm = [[4, 128], [4, 128], [4, 128]]
    n_lstm = [[4, 196], [4, 196], [4, 196]]
    # Regularizer
    l2_lm = [1e-7, 1e-3]
    # Activation functions
    act_opts = ["relu", "elu", "tanh", "linear"]
    # Latent space cfg
    lt_sz = [5, 150]
    lt_dv = [0.3, 0.7]
    # Learning rate
    lm_lr = [1e-5, 1]

    # Clear tensorflow session
    tf.keras.backend.clear_session()
    # Input
    inputs = layers.Input(shape=x_train.shape[1:])
    p = inputs
    # Dense layers
    # n_lyr_dense = trial.suggest_int("n_lyr_dense", 0, 2)
    n_lyr_dense = trial.suggest_int("n_lyr_dense", 1, 3)
    for i in range(n_lyr_dense):
        # For the current layer
        # Get number of filters
        l = trial.suggest_int("n{}_dense".format(i), n_lstm[i][0],
                              n_lstm[i][1])
        # Get the activation function
        act = trial.suggest_categorical("d{}_activation".format(i), act_opts)
        # Regularization value
        l2 = trial.suggest_loguniform("d{}_l2".format(i), l2_lm[0], l2_lm[1])
        l2_reg = regularizers.l2(l=l2)
        # Set layer
        p = layers.Dense(
            l,
            activation=act,
            # kernel_regularizer=l2_reg,
            name="{}_dense".format(i + 1),
        )(p)
        # Dropout
        dp = trial.suggest_uniform("d{}_dropout".format(i), 0, 1)
        p = layers.Dropout(dp, name="{}_dropout_dense".format(i + 1))(p)
        bn = trial.suggest_categorical("d{}_batchnorm".format(i), [0, 1])
        if bn == 1:
            p = layers.BatchNormalization(name="{}_bnorm_dense".format(i +
                                                                       1))(p)

    out = layers.Dense(y_data.shape[1], activation="linear")(p)

    pred = Model(inputs, out, name="auto_encoder_add")

    # opt_opts = ["adam", "nadam", "adamax", "RMSprop"]
    # opt = trial.suggest_categorical("optimizer", opt_opts)
    opt = "adam"
    if opt == "adam":
        k_optf = optimizers.Adam
    elif opt == "nadam":
        k_optf = optimizers.Nadam
    elif opt == "adamax":
        k_optf = optimizers.Adamax
    elif opt == "RMSprop":
        k_optf = optimizers.RMSprop

    lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1])
    if lr > 0:
        k_opt = k_optf(learning_rate=lr)
    else:
        k_opt = k_optf()

    pred.compile(optimizer=k_opt, loss="mse", metrics=["mse", loss_norm_error])

    batch_size = int(trial.suggest_uniform("batch_sz", 2, 32))
    pred.summary()
    hist = pred.fit(
        x_train,
        y_train,
        epochs=100,
        batch_size=batch_size,
        shuffle=True,
        validation_data=(x_val, y_val),
        callbacks=[KerasPruningCallback(trial, "val_mse")],
        verbose=1,
    )

    txt = PREFIX + SUFFIX
    pred.save(txt.format(RUN_VERSION, trial.number))
    return hist.history["val_mse"][-1]
Beispiel #8
0
def objective(trial):

    # Open data file
    f = h5py.File(DT_FL, "r")
    dt = f[DT_DST]

    y_data = np.empty_like(dt)
    for idx in dt.attrs['idx']:
        y_data[idx[0]:idx[1]] = np.gradient(dt[idx[0]:idx[1]], 10, axis=0)

    # Split data file
    idxs = split(dt.shape[0], N_TRAIN, N_VALID, test_last=dt.attrs['idx'])
    slc_trn, slc_vld, slc_tst = slicer(dt.shape, idxs)

    # Slice data
    x_train = dt[slc_trn]
    y_train = y_data[slc_trn]
    x_val = dt[slc_vld]
    y_val = y_data[slc_vld]

    # Limits and options
    epochs = 500
    # Filters
    n_n = [[30, 150], [30, 150]]
    # Regularizer
    l2_lm = [1e-7, 1e-2]
    # Activation functions
    act_opts = ["relu", "elu", "tanh", "linear"]
    # Learning rate
    lm_lr = [1e-5, 1e-1]

    # Clear tensorflow session
    tf.keras.backend.clear_session()
    # Input
    inputs = layers.Input(shape=x_train.shape[1:])
    d = inputs
    # FCNN
    n_layers = trial.suggest_int("n_layers", 1, 3)
    for i in range(n_layers):
        # For the current layer
        # Get number of filters
        n = trial.suggest_int("l{}_n_neurons".format(i), n_n[i][0], n_n[i][1])
        # Get the activation function
        act = trial.suggest_categorical("l{}_activation".format(i), act_opts)
        # Regularization value
        l2 = trial.suggest_loguniform("l{}_l2".format(i), l2_lm[0], l2_lm[1])
        l2_reg = regularizers.l2(l=l2)
        # Set layer
        d = layers.Dense(
            n,
            activation=act,
            kernel_regularizer=l2_reg,
            name="l{}_fc".format(i),
        )(d)
    dd = layers.Dense(x_train.shape[1], activation='linear')(d)

    fcnn = Model(inputs, dd, name="FCNN")

    monitor = "val_loss_norm_error"
    patience = int(epochs * 0.1)
    es = EarlyStopping(monitor=monitor,
                       mode="min",
                       patience=patience,
                       restore_best_weights=True)

    opt = "adam"
    if opt == "adam":
        k_optf = optimizers.Adam
    elif opt == "nadam":
        k_optf = optimizers.Nadam
    elif opt == "adamax":
        k_optf = optimizers.Adamax

    lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1])
    if lr > 0:
        k_opt = k_optf(learning_rate=lr)
    else:
        k_opt = k_optf()

    fcnn.compile(optimizer=k_opt,
                 loss=loss_norm_error,
                 metrics=["mse", loss_norm_error])

    batch_size = int(trial.suggest_uniform("batch_sz", 2, 32))
    fcnn.summary()
    hist = fcnn.fit(
        x_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        shuffle=True,
        validation_data=(x_val, y_val),
        callbacks=[KerasPruningCallback(trial, "val_loss_norm_error"), es],
        verbose=1,
    )

    txt = PREFIX + SUFFIX
    fcnn.save(txt.format(RUN_VERSION, trial.number))
    return hist.history["val_loss_norm_error"][-1]
Beispiel #9
0
from utils import devide_to_train_test, slicer

if __name__ == "__main__":
   devide_to_train_test()
   slicer()