Example #1
0
    def run_callable(run_config: dict):
        experiment.print('Loading data')
        bearing_dataset = load_data(run_config['data_set'])
        x_train = bearing_dataset.train(column=config['data_column'], as_numpy=True)

        experiment.print('Initializing optimizer')
        experiment.print(f'max_iterations = {config["max_iterations"]}')
        optimizer = create_optimizer(
            run_config['optimizer'],
            config_space=config_space,
            model=KerasModel(
                create_model_function=create_deep_easing_feed_forward_autoencoder,
                evaluation_function=load_from_module(run_config['evaluation_function']),
            ),
            x=x_train,
            max_iterations=config['max_iterations'],
            min_budget=config['min_budget'],
            max_budget=config['max_budget'],
            run_id=experiment.run_id,
            validation_split=config['validation_split'],
        )

        try:
            experiment.print('Optimizing')
            optimization_result = optimizer.optimize()
        except:
            optimization_result = OptimizationResult(optimizer.evaluation_result_aggregator.get_evaluation_results())
            experiment.log('search/error', traceback.format_exc())

        experiment.print('Logging optimization results')
        experiment.log_optimization_result(optimization_result)

        log_model_configs = [
            {'key': 'best', 'percentile_rank': 1.0},
            {'key': 'average', 'percentile_rank': 0.5},
            {'key': 'worst_10th', 'percentile_rank': 0.1},
            {'key': 'worst', 'percentile_rank': 0.0},
        ]

        for log_model_config in log_model_configs:
            keras_model = cast(KerasModel, optimizer.refit_by_percentile_rank(log_model_config['percentile_rank']))

            experiment.log_keras_model(keras_model.model, key=log_model_config['key'])

            for threshold_percentile in config['threshold_percentiles']:
                experiment.log_keras_predictions(
                    keras_model,
                    bearing_dataset.as_dict(config['data_column']),
                    key=f'{log_model_config["key"]}_{threshold_percentile}',
                    labels=bearing_dataset.as_dict(config['data_column'], labels_only=True),
                    has_multiple_features=True,
                    threshold_percentile=threshold_percentile,
                )
Example #2
0
def run_callable(run_config: dict):
    def modifier(x):
        return x[x.rpm > 0]

    def pre_processing(data_frame):
        if run_config['scaling'] == 'min_max':
            samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(data_frame.to_numpy())
        else:
            samples = data_frame.to_numpy()

        return build_samples(samples, target_sample_length=config['input_size'], target_dimensions=3)

    experiment.print('Building model')
    model_function = load_from_module(run_config['model_function'])
    model = model_function(config['input_size'])
    experiment.log_keras_model(model)

    experiment.print('Loading data')
    bearing_dataset = load_data(run_config['data_set'])
    data_frames = bearing_dataset.as_dict(column=config['data_column'], modifier=modifier)
    train_samples = pre_processing(data_frames['train'])

    experiment.print('Fitting model')
    history = model.fit(
        train_samples,
        train_samples,
        verbose=0,
        epochs=config['epochs'],
        callbacks=experiment.keras_callbacks(),
        validation_split=config['validation_split'],
        shuffle=config['shuffle'],
    )

    experiment.log_history(history)
    experiment.log_keras_model(model)
    experiment.log_keras_predictions(
        model=model,
        data_frames=data_frames,
        pre_processing=pre_processing,
        has_multiple_features=True,
    )
Example #3
0
    def run_callable(run_config: dict):
        experiment.print('Loading data')
        bearing_dataset = load_data(run_config['data_set'])
        train = bearing_dataset.train(column=config['data_column'],
                                      as_numpy=True)
        test = bearing_dataset.test(column=config['data_column'],
                                    as_numpy=True)
        test_labels = bearing_dataset.test(column=config['data_column'],
                                           add_label=True)['label']
        threshold_percentile = config['threshold_percentile']
        x_train, x_valid, y_train, y_valid = train_test_split(
            train,
            train,
            test_size=config['validation_split'],
            shuffle=True,
        )
        model = KerasModel(
            create_model_function=create_deep_easing_feed_forward_autoencoder,
            evaluation_function=load_from_module(
                run_config['evaluation_function']),
        )
        history = pd.DataFrame(columns=[
            'cost', 'auc', 'accuracy', 'precision', 'recall', 'f_score',
            'matthews_cc'
        ])

        for i in range(1, config['num_evaluations']):
            experiment.print(
                f'Evaluating configuration {i} of {config["num_evaluations"]}')
            current_config = dict(config_space.sample_configuration())

            model.load_config(current_config)
            evaluation_result = model.evaluate(x_train,
                                               y_train,
                                               x_valid,
                                               y_valid,
                                               budget=config['budget'])

            train_reconstruction_error = compute_reconstruction_error(
                y_train, model.predict(x_train))
            train_z_scores = z_score(train_reconstruction_error)
            anomaly_threshold = percentile(train_z_scores,
                                           threshold_percentile)

            test_prediction = model.predict(test)
            test_reconstruction_error = compute_reconstruction_error(
                test, test_prediction)
            test_z_scores = z_score(
                test_reconstruction_error,
                given_median=median(train_reconstruction_error),
                given_mad=mad(train_reconstruction_error))

            if np.isnan(np.sum(test_reconstruction_error)):
                experiment.print(
                    'Got a NaN value in test reconstruction error, skip this evaluation.'
                )
                continue

            anomaly_prediction = (np.array(test_z_scores) >
                                  anomaly_threshold).astype(int)
            metrics = compute_classification_metrics(test_labels.values,
                                                     anomaly_prediction)
            roc = compute_roc(test_labels.values, test_reconstruction_error)

            history_record = {
                'cost': evaluation_result.cost,
                'auc': roc['auc'],
                'accuracy': metrics['accuracy'],
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f_score': metrics['f_score'],
                'matthews_cc': metrics['matthews_cc'],
                **{f'info_{k}': v
                   for k, v in evaluation_result.info.items()},
                **{f'config_{k}': v
                   for k, v in current_config.items()}
            }

            history = history.append(history_record, ignore_index=True)

            experiment.log('history', history)
Example #4
0
def create_deep_easing_feed_forward_autoencoder(
    input_dimension,
    latent_dimension,
    easing='ease_linear',
    number_of_hidden_layers=1,
    hidden_layer_activations='relu',
    output_layer_activation='relu',
    optimizer='adam',
    loss='mse',
    activity_regularizer=None,
    l1_activity_regularizer_factor=0.01,
    l2_activity_regularizer_factor=0.01,
    learning_rate=None,
    learning_rate_decay=0,
    sgd_momentum=None,
    dropout_rate_input=0,
    dropout_rate_hidden_layers=0,
    dropout_rate_output=0,
    dropout_rate_threshold=0.01,
    **kwargs,
) -> Model:
    if number_of_hidden_layers % 2 == 0:
        raise ValueError(
            f'Number of hidden layers must be odd, "{number_of_hidden_layers}" provided.'
        )

    if activity_regularizer == 'none':
        activity_regularizer = None
    elif activity_regularizer == 'l1':
        activity_regularizer = regularizers.l1(l1_activity_regularizer_factor)
    elif activity_regularizer == 'l2':
        activity_regularizer = regularizers.l2(l2_activity_regularizer_factor)

    number_of_encoding_layers = int((number_of_hidden_layers - 1) / 2)
    encoding_layer_dimensions = []

    input_layer = Input(shape=(input_dimension, ), name='input')
    layer = input_layer

    if dropout_rate_input > dropout_rate_threshold:
        layer = Dropout(dropout_rate_input)(layer)

    if isinstance(easing, str):
        easing_function = load_from_module(f'a2e.utility.easing.{easing}')

    for i in range(1, number_of_encoding_layers + 1):
        encoding_layer_dimensions.append(
            easing_function(input_dimension, latent_dimension, i,
                            number_of_encoding_layers))

    # encoding
    for i, layer_dimension in enumerate(encoding_layer_dimensions):
        layer = Dense(layer_dimension,
                      activation=hidden_layer_activations,
                      name=f'hidden_encoding_layer_{i}')(layer)

        if dropout_rate_hidden_layers > dropout_rate_threshold:
            layer = Dropout(dropout_rate_hidden_layers)(layer)

    encoded = Dense(latent_dimension,
                    activation=hidden_layer_activations,
                    activity_regularizer=activity_regularizer,
                    name='encoded')(layer)
    layer = encoded

    if len(encoding_layer_dimensions) > 0:
        encoding_layer_dimensions.pop()
        encoding_layer_dimensions.insert(0, input_dimension)

    # decoding
    for i, layer_dimension in enumerate(reversed(encoding_layer_dimensions)):
        layer = Dense(layer_dimension,
                      activation=hidden_layer_activations,
                      name=f'hidden_decoding_layer_{i}')(layer)

        if dropout_rate_hidden_layers > dropout_rate_threshold \
                and (i + 1 < len(encoding_layer_dimensions) or dropout_rate_output < dropout_rate_threshold):
            layer = Dropout(dropout_rate_hidden_layers)(layer)

    if dropout_rate_output > dropout_rate_threshold:
        layer = Dropout(dropout_rate_output)(layer)

    output_layer = Dense(input_dimension,
                         activation=output_layer_activation,
                         name='output')(layer)

    model = Model(input_layer, output_layer, name='a2e_deep_feed_forward')

    if optimizer == 'adam':
        optimizer = Adam(
            learning_rate=0.001 if learning_rate is None else learning_rate,
            decay=learning_rate_decay)
    elif optimizer == 'sgd':
        optimizer = SGD(
            learning_rate=0.01 if learning_rate is None else learning_rate,
            momentum=sgd_momentum,
            decay=learning_rate_decay)

    model.compile(optimizer=optimizer, loss=loss)

    return model
Example #5
0
    def test_load_from_module(self):
        loaded_function = load_from_module(
            'a2e.utility.timestamp_to_date_time')

        self.assertIsInstance(loaded_function, Callable)