Beispiel #1
0
def train_model(
    sample_size: int,
    workers: int,
    random_optimizer: bool,
    experiment: mlflow.entities.experiment.Experiment,
) -> None:
    logger.info("Load IMDB reviews")
    df_train, _ = load_data(folder=base_folder, sample_size=sample_size)

    # Anonymize data before pipeline, since this step is slow and constant
    logger.info("Preprocess reviews with spaCy. This may take a while..")
    anonymized_reviews = Anonymizer().transform(df_train.review)

    # Perform Hyperparameter optimization
    optimizer = optimize(
        X=anonymized_reviews,
        y=df_train.sentiment,
        workers=workers,
        random_optimizer=random_optimizer,
    )

    # MLflow logging of results
    logger.info("Write results to MLflow experiment: %s", experiment.name)
    mlflow_sklearn_logging(
        optimizer=optimizer,
        experiment_id=experiment.experiment_id,
        sample_size=sample_size,
        data=base_folder / "train.csv",
    )
Beispiel #2
0
def train_model(
    sample_size: int,
    workers: int,
    trials: int,
    experiment: mlflow.entities.experiment.Experiment,
) -> None:
    logger.info("Load IMDB reviews")
    df_train, _ = load_data(folder=base_folder, sample_size=sample_size)

    # Anonymize data before pipeline, since this step is slow and constant
    logger.info("Preprocess reviews with spaCy. This may take a while..")
    anonymized_reviews = Anonymizer().transform(df_train.review)

    logger.info("Explore search space")
    study = optuna.create_study(direction="maximize")
    study.set_user_attr(key="sample_size", value=sample_size)
    study.set_user_attr(key="experiment", value=experiment)
    study.set_user_attr(key="data", value=base_folder / "train.csv")

    # Perform Hyperparameter optimization and log results
    study.optimize(
        lambda trial: objective(
            trial,
            X=anonymized_reviews,
            y=df_train.sentiment,
            workers=workers,
        ),
        n_trials=trials,
        callbacks=[terminal_logging, mlflow_optuna_logging],
    )
Beispiel #3
0
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution3D
from keras.layers.recurrent_convolutional import LSTMConv2D
from keras.layers.core import TimeDistributedDense
from keras.callbacks import ModelCheckpoint
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from helpers.preprocessing import load_data
from helpers.util import shuffle_in_unison_inplace

# loading processed data
all_data = np.loadtxt('../final_weekly_data_250.txt')

# loading preprocessed with avg and proba data
input_data = load_data('weekly', 250)

height = 107
width = 72
input_length = 3

height_red = height/2
width_red = width/2

height_red = height_red + 1 if (height_red % 2) == 1 else height_red
# reshaping to samples x height x width x channel
all_data = all_data.reshape((all_data.shape[0], height, width, 1))
input_data = input_data.reshape(all_data.shape)

# get SE part only
SE_input = input_data[:, height_red:, 0:width_red, :]
Beispiel #4
0
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution3D
from keras.layers.recurrent_convolutional import LSTMConv2D
from keras.layers.core import TimeDistributedDense
from keras.callbacks import ModelCheckpoint
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from helpers.preprocessing import load_data
from helpers.util import shuffle_in_unison_inplace

# loading processed data
all_data = np.loadtxt('../final_weekly_data_750.txt')

# loading preprocessed with avg and proba data
input_data = load_data('weekly', 750)

height = 36
width = 24
input_length = 3

height_red = height / 2
width_red = width / 2

# height_red = height_red + 1 if (height_red % 2) == 1 else height_red
# reshaping to samples x height x width x channel
all_data = all_data.reshape((all_data.shape[0], height, width, 1))
input_data = input_data.reshape(all_data.shape)

# get SE part only
SE_input = input_data[:, height_red:, 0:width_red, :]
Beispiel #5
0
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution3D
from keras.layers.recurrent_convolutional import LSTMConv2D
from keras.layers.core import TimeDistributedDense
from keras.callbacks import ModelCheckpoint
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from helpers.preprocessing import load_data
from helpers.util import shuffle_in_unison_inplace

# loading processed data
all_data = np.loadtxt('../final_weekly_data_500.txt')

# loading preprocessed with avg and proba data
input_data = load_data('weekly', 500)

height = 54
width = 36
input_length = 3

height_red = height / 2
width_red = width / 2

# height_red = height_red + 1 if (height_red % 2) == 1 else height_red
# reshaping to samples x height x width x channel
all_data = all_data.reshape((all_data.shape[0], height, width, 1))
input_data = input_data.reshape(all_data.shape)

# get SE part only
SE_input = input_data[:, height_red:, 0:width_red, :]
Beispiel #6
0
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution3D
from keras.layers.recurrent_convolutional import LSTMConv2D
from keras.layers.core import TimeDistributedDense
from keras.callbacks import ModelCheckpoint
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from helpers.preprocessing import load_data
from helpers.util import shuffle_in_unison_inplace

# loading processed data
all_data = np.loadtxt('../final_daily_data_500.txt')

# loading preprocessed with avg and proba data
input_data = load_data('daily', 500)

height = 54
width = 36
input_length = 3

height_red = height/2
width_red = width/2

# height_red = height_red + 1 if (height_red % 2) == 1 else height_red
# reshaping to samples x height x width x channel
all_data = all_data.reshape((all_data.shape[0], height, width, 1))
input_data = input_data.reshape(all_data.shape)

# get SE part only
SE_input = input_data[:, height_red:, 0:width_red, :]
Beispiel #7
0
import numpy as np
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, Convolution3D
from keras.layers.recurrent_convolutional import LSTMConv2D
from keras.layers.core import TimeDistributedDense
from keras.callbacks import ModelCheckpoint
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from helpers.preprocessing import load_data
from helpers.util import shuffle_in_unison_inplace

# loading processed data
all_data = np.loadtxt('../final_weekly_data_500.txt')

# loading preprocessed with avg and proba data
input_data = load_data('weekly', 500)

height = 54
width = 36
input_length = 3

height_red = height/2
width_red = width/2

# height_red = height_red + 1 if (height_red % 2) == 1 else height_red
# reshaping to samples x height x width x channel
all_data = all_data.reshape((all_data.shape[0], height, width, 1))
input_data = input_data.reshape(all_data.shape)

# get SE part only
SE_input = input_data[:, height_red:, 0:width_red, :]