def run_callable(run_config: dict): experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) x_train = bearing_dataset.train(column=config['data_column'], as_numpy=True) experiment.print('Initializing optimizer') experiment.print(f'max_iterations = {config["max_iterations"]}') optimizer = create_optimizer( run_config['optimizer'], config_space=config_space, model=KerasModel( create_model_function=create_deep_easing_feed_forward_autoencoder, evaluation_function=load_from_module(run_config['evaluation_function']), ), x=x_train, max_iterations=config['max_iterations'], min_budget=config['min_budget'], max_budget=config['max_budget'], run_id=experiment.run_id, validation_split=config['validation_split'], ) try: experiment.print('Optimizing') optimization_result = optimizer.optimize() except: optimization_result = OptimizationResult(optimizer.evaluation_result_aggregator.get_evaluation_results()) experiment.log('search/error', traceback.format_exc()) experiment.print('Logging optimization results') experiment.log_optimization_result(optimization_result) log_model_configs = [ {'key': 'best', 'percentile_rank': 1.0}, {'key': 'average', 'percentile_rank': 0.5}, {'key': 'worst_10th', 'percentile_rank': 0.1}, {'key': 'worst', 'percentile_rank': 0.0}, ] for log_model_config in log_model_configs: keras_model = cast(KerasModel, optimizer.refit_by_percentile_rank(log_model_config['percentile_rank'])) experiment.log_keras_model(keras_model.model, key=log_model_config['key']) for threshold_percentile in config['threshold_percentiles']: experiment.log_keras_predictions( keras_model, bearing_dataset.as_dict(config['data_column']), key=f'{log_model_config["key"]}_{threshold_percentile}', labels=bearing_dataset.as_dict(config['data_column'], labels_only=True), has_multiple_features=True, threshold_percentile=threshold_percentile, )
def run_callable(run_config: dict): def modifier(x): return x[x.rpm > 0] def pre_processing(data_frame): if run_config['scaling'] == 'min_max': samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(data_frame.to_numpy()) else: samples = data_frame.to_numpy() return build_samples(samples, target_sample_length=config['input_size'], target_dimensions=3) experiment.print('Building model') model_function = load_from_module(run_config['model_function']) model = model_function(config['input_size']) experiment.log_keras_model(model) experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) data_frames = bearing_dataset.as_dict(column=config['data_column'], modifier=modifier) train_samples = pre_processing(data_frames['train']) experiment.print('Fitting model') history = model.fit( train_samples, train_samples, verbose=0, epochs=config['epochs'], callbacks=experiment.keras_callbacks(), validation_split=config['validation_split'], shuffle=config['shuffle'], ) experiment.log_history(history) experiment.log_keras_model(model) experiment.log_keras_predictions( model=model, data_frames=data_frames, pre_processing=pre_processing, has_multiple_features=True, )
def run_callable(run_config: dict): experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) train = bearing_dataset.train(column=config['data_column'], as_numpy=True) test = bearing_dataset.test(column=config['data_column'], as_numpy=True) test_labels = bearing_dataset.test(column=config['data_column'], add_label=True)['label'] threshold_percentile = config['threshold_percentile'] x_train, x_valid, y_train, y_valid = train_test_split( train, train, test_size=config['validation_split'], shuffle=True, ) model = KerasModel( create_model_function=create_deep_easing_feed_forward_autoencoder, evaluation_function=load_from_module( run_config['evaluation_function']), ) history = pd.DataFrame(columns=[ 'cost', 'auc', 'accuracy', 'precision', 'recall', 'f_score', 'matthews_cc' ]) for i in range(1, config['num_evaluations']): experiment.print( f'Evaluating configuration {i} of {config["num_evaluations"]}') current_config = dict(config_space.sample_configuration()) model.load_config(current_config) evaluation_result = model.evaluate(x_train, y_train, x_valid, y_valid, budget=config['budget']) train_reconstruction_error = compute_reconstruction_error( y_train, model.predict(x_train)) train_z_scores = z_score(train_reconstruction_error) anomaly_threshold = percentile(train_z_scores, threshold_percentile) test_prediction = model.predict(test) test_reconstruction_error = compute_reconstruction_error( test, test_prediction) test_z_scores = z_score( test_reconstruction_error, given_median=median(train_reconstruction_error), given_mad=mad(train_reconstruction_error)) if np.isnan(np.sum(test_reconstruction_error)): experiment.print( 'Got a NaN value in test reconstruction error, skip this evaluation.' ) continue anomaly_prediction = (np.array(test_z_scores) > anomaly_threshold).astype(int) metrics = compute_classification_metrics(test_labels.values, anomaly_prediction) roc = compute_roc(test_labels.values, test_reconstruction_error) history_record = { 'cost': evaluation_result.cost, 'auc': roc['auc'], 'accuracy': metrics['accuracy'], 'precision': metrics['precision'], 'recall': metrics['recall'], 'f_score': metrics['f_score'], 'matthews_cc': metrics['matthews_cc'], **{f'info_{k}': v for k, v in evaluation_result.info.items()}, **{f'config_{k}': v for k, v in current_config.items()} } history = history.append(history_record, ignore_index=True) experiment.log('history', history)
def create_deep_easing_feed_forward_autoencoder( input_dimension, latent_dimension, easing='ease_linear', number_of_hidden_layers=1, hidden_layer_activations='relu', output_layer_activation='relu', optimizer='adam', loss='mse', activity_regularizer=None, l1_activity_regularizer_factor=0.01, l2_activity_regularizer_factor=0.01, learning_rate=None, learning_rate_decay=0, sgd_momentum=None, dropout_rate_input=0, dropout_rate_hidden_layers=0, dropout_rate_output=0, dropout_rate_threshold=0.01, **kwargs, ) -> Model: if number_of_hidden_layers % 2 == 0: raise ValueError( f'Number of hidden layers must be odd, "{number_of_hidden_layers}" provided.' ) if activity_regularizer == 'none': activity_regularizer = None elif activity_regularizer == 'l1': activity_regularizer = regularizers.l1(l1_activity_regularizer_factor) elif activity_regularizer == 'l2': activity_regularizer = regularizers.l2(l2_activity_regularizer_factor) number_of_encoding_layers = int((number_of_hidden_layers - 1) / 2) encoding_layer_dimensions = [] input_layer = Input(shape=(input_dimension, ), name='input') layer = input_layer if dropout_rate_input > dropout_rate_threshold: layer = Dropout(dropout_rate_input)(layer) if isinstance(easing, str): easing_function = load_from_module(f'a2e.utility.easing.{easing}') for i in range(1, number_of_encoding_layers + 1): encoding_layer_dimensions.append( easing_function(input_dimension, latent_dimension, i, number_of_encoding_layers)) # encoding for i, layer_dimension in enumerate(encoding_layer_dimensions): layer = Dense(layer_dimension, activation=hidden_layer_activations, name=f'hidden_encoding_layer_{i}')(layer) if dropout_rate_hidden_layers > dropout_rate_threshold: layer = Dropout(dropout_rate_hidden_layers)(layer) encoded = Dense(latent_dimension, activation=hidden_layer_activations, activity_regularizer=activity_regularizer, name='encoded')(layer) layer = encoded if len(encoding_layer_dimensions) > 0: encoding_layer_dimensions.pop() encoding_layer_dimensions.insert(0, input_dimension) # decoding for i, layer_dimension in enumerate(reversed(encoding_layer_dimensions)): layer = Dense(layer_dimension, activation=hidden_layer_activations, name=f'hidden_decoding_layer_{i}')(layer) if dropout_rate_hidden_layers > dropout_rate_threshold \ and (i + 1 < len(encoding_layer_dimensions) or dropout_rate_output < dropout_rate_threshold): layer = Dropout(dropout_rate_hidden_layers)(layer) if dropout_rate_output > dropout_rate_threshold: layer = Dropout(dropout_rate_output)(layer) output_layer = Dense(input_dimension, activation=output_layer_activation, name='output')(layer) model = Model(input_layer, output_layer, name='a2e_deep_feed_forward') if optimizer == 'adam': optimizer = Adam( learning_rate=0.001 if learning_rate is None else learning_rate, decay=learning_rate_decay) elif optimizer == 'sgd': optimizer = SGD( learning_rate=0.01 if learning_rate is None else learning_rate, momentum=sgd_momentum, decay=learning_rate_decay) model.compile(optimizer=optimizer, loss=loss) return model
def test_load_from_module(self): loaded_function = load_from_module( 'a2e.utility.timestamp_to_date_time') self.assertIsInstance(loaded_function, Callable)