def run_callable(run_config: dict): experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) data_frames = bearing_dataset.as_dict(column=config['data_column']) labels = bearing_dataset.as_dict(column=config['data_column'], labels_only=True) train_scaler = Scaler(MinMaxScaler) train_samples = train_scaler.fit_transform(data_frames['train'].to_numpy()) experiment.print('Fitting model') history = model.fit( train_samples, train_samples, verbose=0, epochs=config['epochs'], callbacks=experiment.keras_callbacks(), validation_split=config['validation_split'], shuffle=config['shuffle'], ) best_model = experiment.load_best_model() experiment.log_keras_model(best_model, key='best') experiment.log_history(history) for percentile in config['threshold_percentiles']: experiment.log_keras_predictions( model=best_model, data_frames=data_frames, labels=labels, pre_processing=lambda data_frame: train_scaler.transform(data_frame.to_numpy()), has_multiple_features=True, threshold_percentile=percentile, key=f'{percentile}_percentile', )
def run_callable(run_config: dict): experiment.print('Loading data') data_set_key = run_config['data_set'] bearing_dataset = load_data(data_set_key) train = bearing_dataset.train('fft') test = bearing_dataset.test('fft', add_label=True) model = train.median(axis=0) train_error = [] for index, row in train.iterrows(): row_error = (row - model).abs().sum() train_error.append(row_error) train_error_median = median(train_error) train_error_mad = mad(train_error) train_z_scores = list( map(lambda x: compute_z_score(x, train_error_median, train_error_mad), train_error)) zscore_threshold = percentile(train_z_scores, 99) experiment.print('Predicting test dataset') prediction = [] prediction_zscores = [] for index, row in test.iterrows(): is_anomaly = False z_score = compute_z_score((row.iloc[:-1] - model).abs().sum(), train_error_median, train_error_mad) is_anomaly = is_anomaly or (z_score > zscore_threshold) prediction.append(1 if is_anomaly else 0) prediction_zscores.append(z_score) experiment.plot(y=prediction_zscores, xlabel='time', ylabel='z-score', key='prediction_zscores_' + data_set_key) experiment.plot(y=prediction, xlabel='time', ylabel='is anomaly', label='prediction', key='prediction_' + data_set_key, close=False) experiment.plot(y=test['label'], label='truth', key='prediction_' + data_set_key, create_figure=False) roc = compute_roc(test['label'], prediction_zscores) metrics = compute_classification_metrics(test['label'], prediction) metrics['auc'] = roc['auc'] experiment.log('roc', roc, to_pickle=True) experiment.log('metrics', metrics) experiment.plot_roc('roc', roc['fpr'], roc['tpr']) experiment.print( f'metrics: accuracy={metrics["accuracy"]}, precision={metrics["precision"]}, recall={metrics["recall"]}, f_score={metrics["f_score"]}, auc={roc["auc"]}' )
def run_callable(run_config: dict): experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) x_train = bearing_dataset.train(column=config['data_column'], as_numpy=True) experiment.print('Initializing optimizer') experiment.print(f'max_iterations = {config["max_iterations"]}') optimizer = create_optimizer( run_config['optimizer'], config_space=config_space, model=KerasModel( create_model_function=create_deep_easing_feed_forward_autoencoder, evaluation_function=load_from_module(run_config['evaluation_function']), ), x=x_train, max_iterations=config['max_iterations'], min_budget=config['min_budget'], max_budget=config['max_budget'], run_id=experiment.run_id, validation_split=config['validation_split'], ) try: experiment.print('Optimizing') optimization_result = optimizer.optimize() except: optimization_result = OptimizationResult(optimizer.evaluation_result_aggregator.get_evaluation_results()) experiment.log('search/error', traceback.format_exc()) experiment.print('Logging optimization results') experiment.log_optimization_result(optimization_result) log_model_configs = [ {'key': 'best', 'percentile_rank': 1.0}, {'key': 'average', 'percentile_rank': 0.5}, {'key': 'worst_10th', 'percentile_rank': 0.1}, {'key': 'worst', 'percentile_rank': 0.0}, ] for log_model_config in log_model_configs: keras_model = cast(KerasModel, optimizer.refit_by_percentile_rank(log_model_config['percentile_rank'])) experiment.log_keras_model(keras_model.model, key=log_model_config['key']) for threshold_percentile in config['threshold_percentiles']: experiment.log_keras_predictions( keras_model, bearing_dataset.as_dict(config['data_column']), key=f'{log_model_config["key"]}_{threshold_percentile}', labels=bearing_dataset.as_dict(config['data_column'], labels_only=True), has_multiple_features=True, threshold_percentile=threshold_percentile, )
def run_callable(run_config: dict): def pre_processing_x(data_frame): numpy_data = data_frame.to_numpy() numpy_data = numpy_data[:-config['prediction_shift'], :] samples = build_samples(numpy_data.flatten(), config['input_size'], target_dimensions=3) if run_config['scaling'] == 'min_max': samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(numpy_data) return samples def pre_processing_y(data_frame): numpy_data = data_frame.to_numpy() numpy_data = numpy_data[config['prediction_shift']:, :] samples = build_samples(numpy_data.flatten(), config['output_size'], target_dimensions=3) if run_config['scaling'] is 'min_max': samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(numpy_data) return samples experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) data_frames = bearing_dataset.as_dict(column=run_config['data_column'], split_test=True) train_samples_x = pre_processing_x(data_frames['train']) train_samples_y = pre_processing_y(data_frames['train']) experiment.print('Fitting model') history = model.fit( train_samples_x, train_samples_y, verbose=0, epochs=config['epochs'], callbacks=experiment.keras_callbacks(), validation_split=config['validation_split'], shuffle=config['shuffle'], ) experiment.log_history(history) experiment.log_keras_model(model) experiment.log_keras_predictions( model=model, data_frames=data_frames, pre_processing=pre_processing_x, pre_processing_y=pre_processing_y )
def run_callable(run_config: dict): experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) data_frames = bearing_dataset.as_dict(column=config['data_column'], modifier=lambda x: x[x.rpm > 0]) labels = bearing_dataset.as_dict(column=config['data_column'], modifier=lambda x: x[x.rpm > 0], labels_only=True) if run_config['scaling'] == 'min_max': train_samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform( data_frames['train'].to_numpy()) else: train_samples = data_frames['train'].to_numpy() experiment.print('Fitting model') history = model.fit( train_samples, train_samples, verbose=0, epochs=config['epochs'], callbacks=experiment.keras_callbacks(), validation_split=config['validation_split'], shuffle=config['shuffle'], ) best_model = experiment.load_best_model() experiment.log_keras_model(model, key='current') experiment.log_keras_model(best_model, key='best') experiment.log_history(history) experiment.log_keras_predictions( model=best_model, data_frames=data_frames, labels=labels, pre_processing=lambda data_frame: Scaler( MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform( data_frame.to_numpy()) if run_config['scaling'] == 'min_max' else data_frame.to_numpy(), has_multiple_features=True, )
def run_callable(run_config: dict): def modifier(x): return x[x.rpm > 0] def pre_processing(data_frame): if run_config['scaling'] == 'min_max': samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(data_frame.to_numpy()) else: samples = data_frame.to_numpy() return build_samples(samples, target_sample_length=config['input_size'], target_dimensions=3) experiment.print('Building model') model_function = load_from_module(run_config['model_function']) model = model_function(config['input_size']) experiment.log_keras_model(model) experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) data_frames = bearing_dataset.as_dict(column=config['data_column'], modifier=modifier) train_samples = pre_processing(data_frames['train']) experiment.print('Fitting model') history = model.fit( train_samples, train_samples, verbose=0, epochs=config['epochs'], callbacks=experiment.keras_callbacks(), validation_split=config['validation_split'], shuffle=config['shuffle'], ) experiment.log_history(history) experiment.log_keras_model(model) experiment.log_keras_predictions( model=model, data_frames=data_frames, pre_processing=pre_processing, has_multiple_features=True, )
def run_callable(run_config: dict): def pre_processing(data_frame: DataFrame): samples = build_samples(data_frame.to_numpy().flatten(), config['input_size']) if run_config['fit_mode'] == 'train': return train_scaler.transform(samples) else: return Scaler( MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(samples) experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) data_frames = bearing_dataset.as_dict(column=run_config['data_column']) train_samples = build_samples(data_frames['train'].to_numpy().flatten(), config['input_size']) fit_mode = 'per_feature' if run_config[ 'fit_mode'] == 'train' else run_config['fit_mode'] train_scaler = Scaler(MinMaxScaler, fit_mode=fit_mode) train_samples = train_scaler.fit_transform(train_samples) experiment.print('Fitting model') history = model.fit( train_samples, train_samples, verbose=0, epochs=config['epochs'], callbacks=experiment.keras_callbacks(), validation_split=config['validation_split'], shuffle=config['shuffle'], ) experiment.log_history(history) experiment.log_keras_model(model) experiment.log_keras_predictions( model=model, data_frames=data_frames, pre_processing=pre_processing, )
def run_callable(run_config: dict): experiment.print('Loading data') bearing_dataset = load_data(run_config['data_set']) train = bearing_dataset.train(column=config['data_column'], as_numpy=True) test = bearing_dataset.test(column=config['data_column'], as_numpy=True) test_labels = bearing_dataset.test(column=config['data_column'], add_label=True)['label'] threshold_percentile = config['threshold_percentile'] x_train, x_valid, y_train, y_valid = train_test_split( train, train, test_size=config['validation_split'], shuffle=True, ) model = KerasModel( create_model_function=create_deep_easing_feed_forward_autoencoder, evaluation_function=load_from_module( run_config['evaluation_function']), ) history = pd.DataFrame(columns=[ 'cost', 'auc', 'accuracy', 'precision', 'recall', 'f_score', 'matthews_cc' ]) for i in range(1, config['num_evaluations']): experiment.print( f'Evaluating configuration {i} of {config["num_evaluations"]}') current_config = dict(config_space.sample_configuration()) model.load_config(current_config) evaluation_result = model.evaluate(x_train, y_train, x_valid, y_valid, budget=config['budget']) train_reconstruction_error = compute_reconstruction_error( y_train, model.predict(x_train)) train_z_scores = z_score(train_reconstruction_error) anomaly_threshold = percentile(train_z_scores, threshold_percentile) test_prediction = model.predict(test) test_reconstruction_error = compute_reconstruction_error( test, test_prediction) test_z_scores = z_score( test_reconstruction_error, given_median=median(train_reconstruction_error), given_mad=mad(train_reconstruction_error)) if np.isnan(np.sum(test_reconstruction_error)): experiment.print( 'Got a NaN value in test reconstruction error, skip this evaluation.' ) continue anomaly_prediction = (np.array(test_z_scores) > anomaly_threshold).astype(int) metrics = compute_classification_metrics(test_labels.values, anomaly_prediction) roc = compute_roc(test_labels.values, test_reconstruction_error) history_record = { 'cost': evaluation_result.cost, 'auc': roc['auc'], 'accuracy': metrics['accuracy'], 'precision': metrics['precision'], 'recall': metrics['recall'], 'f_score': metrics['f_score'], 'matthews_cc': metrics['matthews_cc'], **{f'info_{k}': v for k, v in evaluation_result.info.items()}, **{f'config_{k}': v for k, v in current_config.items()} } history = history.append(history_record, ignore_index=True) experiment.log('history', history)
def run_callable(run_config: dict): pulley_ratio = config['motor_diameter'] / config['bearing_diameter'] shaft_frequency = (run_config['rpm'] / 60) * pulley_ratio bandwidth = config['frequency_bandwidth'] data_set_key = run_config['data_set'] defect_frequencies = [] vlines = [] experiment.print('Loading data') bearing_dataset = load_data(data_set_key) train = bearing_dataset.train('fft') test = bearing_dataset.test('fft', add_label=True) test_healthy, test_anomalous = bearing_dataset.test('fft', split='2fold') for i, (defect_type, defect_frequency_order) in enumerate( config['defect_frequency_orders'].items(), 0): defect_frequency = defect_frequency_order * shaft_frequency train_means = train.iloc[:, max(int(defect_frequency) - bandwidth, 0):int(defect_frequency) + bandwidth].mean(axis=1) train_means_median = train_means.median() train_means_mad = mad(train_means) train_z_scores = list( map( lambda x: compute_z_score(x, train_means_median, train_means_mad), train_means)) defect_frequencies.append({ 'frequency': defect_frequency, 'train_median': train_means_median, 'train_mad': train_means_mad, 'train_z_scores': train_z_scores, }) vlines.append({ 'x': defect_frequency, 'color': config['colors'][i], 'label': f'{defect_type} = {"{:.2f}".format(defect_frequency)}Hz', 'linestyle': 'dashed' }) if config['show_bandwidth']: vlines.append({ 'x': int(defect_frequency) + bandwidth, 'color': config['colors'][i], 'label': '', 'linestyle': 'dashed' }) vlines.append({ 'x': max(int(defect_frequency) - bandwidth, 0), 'color': config['colors'][i], 'label': '', 'linestyle': 'dashed' }) experiment.print('Plotting data') train_median = train.median() test_healthy_median = test_healthy.median() test_anomalous_median = test_anomalous.median() ylim = [ 0, max(max(test_healthy_median), max(test_anomalous_median), max(train_median)) + 0.1 ] experiment.plot(y=train_median.values[:250], vlines=vlines, ylim=ylim, xlabel='frequency [Hz]', ylabel='amplitude', title=f'{run_config["data_set"]} (train)', key='train_' + data_set_key) experiment.plot(y=test_healthy_median.values[:250], vlines=vlines, ylim=ylim, xlabel='frequency [Hz]', ylabel='amplitude', title=f'{run_config["data_set"]} (healthy)', key='healthy_' + data_set_key) experiment.plot(y=test_anomalous_median.values[:250], vlines=vlines, ylim=ylim, xlabel='frequency [Hz]', ylabel='amplitude', title=f'{run_config["data_set"]} (anomalous)', key='anomalous_' + data_set_key) experiment.print('Predicting test dataset') prediction = [] prediction_zscores = [] for index, row in test.iterrows(): is_anomaly = False largest_zscore = 0 for defect_frequency in defect_frequencies: zscore_threshold = config[ 'zscore_threshold'] if 'zscore_threshold' in config else percentile( defect_frequency['train_z_scores'], run_config['threshold_percentile']) row_mean = row.iloc[ max(int(defect_frequency['frequency']) - bandwidth, 0):int(defect_frequency['frequency']) + bandwidth].mean() z_score = compute_z_score(row_mean, defect_frequency['train_median'], defect_frequency['train_mad']) is_anomaly = is_anomaly or (z_score > zscore_threshold) largest_zscore = largest_zscore if largest_zscore > z_score else z_score prediction.append(1 if is_anomaly else 0) prediction_zscores.append(largest_zscore) experiment.plot(y=prediction_zscores, xlabel='time', ylabel='z-score', key='prediction_zscores_' + data_set_key) experiment.plot(y=prediction, xlabel='time', ylabel='is anomaly', label='prediction', key='prediction_' + data_set_key, close=False) experiment.plot(y=test['label'], label='truth', key='prediction_' + data_set_key, create_figure=False) roc = compute_roc(test['label'], prediction_zscores) metrics = compute_classification_metrics(test['label'], prediction) metrics['auc'] = roc['auc'] experiment.log('roc', roc, to_pickle=True) experiment.log('metrics', metrics) experiment.plot_roc('roc', roc['fpr'], roc['tpr']) experiment.print( f'metrics: accuracy={metrics["accuracy"]}, precision={metrics["precision"]}, recall={metrics["recall"]}, f_score={metrics["f_score"]}, auc={roc["auc"]}' )