Ejemplo n.º 1
0
def run_callable(run_config: dict):
    experiment.print('Loading data')
    bearing_dataset = load_data(run_config['data_set'])
    data_frames = bearing_dataset.as_dict(column=config['data_column'])
    labels = bearing_dataset.as_dict(column=config['data_column'], labels_only=True)
    train_scaler = Scaler(MinMaxScaler)
    train_samples = train_scaler.fit_transform(data_frames['train'].to_numpy())

    experiment.print('Fitting model')
    history = model.fit(
        train_samples,
        train_samples,
        verbose=0,
        epochs=config['epochs'],
        callbacks=experiment.keras_callbacks(),
        validation_split=config['validation_split'],
        shuffle=config['shuffle'],
    )

    best_model = experiment.load_best_model()

    experiment.log_keras_model(best_model, key='best')
    experiment.log_history(history)

    for percentile in config['threshold_percentiles']:
        experiment.log_keras_predictions(
            model=best_model,
            data_frames=data_frames,
            labels=labels,
            pre_processing=lambda data_frame: train_scaler.transform(data_frame.to_numpy()),
            has_multiple_features=True,
            threshold_percentile=percentile,
            key=f'{percentile}_percentile',
        )
Ejemplo n.º 2
0
def run_callable(run_config: dict):
    experiment.print('Loading data')
    data_set_key = run_config['data_set']
    bearing_dataset = load_data(data_set_key)
    train = bearing_dataset.train('fft')
    test = bearing_dataset.test('fft', add_label=True)
    model = train.median(axis=0)
    train_error = []

    for index, row in train.iterrows():
        row_error = (row - model).abs().sum()
        train_error.append(row_error)

    train_error_median = median(train_error)
    train_error_mad = mad(train_error)
    train_z_scores = list(
        map(lambda x: compute_z_score(x, train_error_median, train_error_mad),
            train_error))
    zscore_threshold = percentile(train_z_scores, 99)

    experiment.print('Predicting test dataset')
    prediction = []
    prediction_zscores = []

    for index, row in test.iterrows():
        is_anomaly = False
        z_score = compute_z_score((row.iloc[:-1] - model).abs().sum(),
                                  train_error_median, train_error_mad)
        is_anomaly = is_anomaly or (z_score > zscore_threshold)

        prediction.append(1 if is_anomaly else 0)
        prediction_zscores.append(z_score)

    experiment.plot(y=prediction_zscores,
                    xlabel='time',
                    ylabel='z-score',
                    key='prediction_zscores_' + data_set_key)
    experiment.plot(y=prediction,
                    xlabel='time',
                    ylabel='is anomaly',
                    label='prediction',
                    key='prediction_' + data_set_key,
                    close=False)
    experiment.plot(y=test['label'],
                    label='truth',
                    key='prediction_' + data_set_key,
                    create_figure=False)

    roc = compute_roc(test['label'], prediction_zscores)
    metrics = compute_classification_metrics(test['label'], prediction)
    metrics['auc'] = roc['auc']

    experiment.log('roc', roc, to_pickle=True)
    experiment.log('metrics', metrics)
    experiment.plot_roc('roc', roc['fpr'], roc['tpr'])
    experiment.print(
        f'metrics: accuracy={metrics["accuracy"]}, precision={metrics["precision"]}, recall={metrics["recall"]}, f_score={metrics["f_score"]}, auc={roc["auc"]}'
    )
Ejemplo n.º 3
0
    def run_callable(run_config: dict):
        experiment.print('Loading data')
        bearing_dataset = load_data(run_config['data_set'])
        x_train = bearing_dataset.train(column=config['data_column'], as_numpy=True)

        experiment.print('Initializing optimizer')
        experiment.print(f'max_iterations = {config["max_iterations"]}')
        optimizer = create_optimizer(
            run_config['optimizer'],
            config_space=config_space,
            model=KerasModel(
                create_model_function=create_deep_easing_feed_forward_autoencoder,
                evaluation_function=load_from_module(run_config['evaluation_function']),
            ),
            x=x_train,
            max_iterations=config['max_iterations'],
            min_budget=config['min_budget'],
            max_budget=config['max_budget'],
            run_id=experiment.run_id,
            validation_split=config['validation_split'],
        )

        try:
            experiment.print('Optimizing')
            optimization_result = optimizer.optimize()
        except:
            optimization_result = OptimizationResult(optimizer.evaluation_result_aggregator.get_evaluation_results())
            experiment.log('search/error', traceback.format_exc())

        experiment.print('Logging optimization results')
        experiment.log_optimization_result(optimization_result)

        log_model_configs = [
            {'key': 'best', 'percentile_rank': 1.0},
            {'key': 'average', 'percentile_rank': 0.5},
            {'key': 'worst_10th', 'percentile_rank': 0.1},
            {'key': 'worst', 'percentile_rank': 0.0},
        ]

        for log_model_config in log_model_configs:
            keras_model = cast(KerasModel, optimizer.refit_by_percentile_rank(log_model_config['percentile_rank']))

            experiment.log_keras_model(keras_model.model, key=log_model_config['key'])

            for threshold_percentile in config['threshold_percentiles']:
                experiment.log_keras_predictions(
                    keras_model,
                    bearing_dataset.as_dict(config['data_column']),
                    key=f'{log_model_config["key"]}_{threshold_percentile}',
                    labels=bearing_dataset.as_dict(config['data_column'], labels_only=True),
                    has_multiple_features=True,
                    threshold_percentile=threshold_percentile,
                )
Ejemplo n.º 4
0
def run_callable(run_config: dict):
    def pre_processing_x(data_frame):
        numpy_data = data_frame.to_numpy()
        numpy_data = numpy_data[:-config['prediction_shift'], :]
        samples = build_samples(numpy_data.flatten(), config['input_size'], target_dimensions=3)

        if run_config['scaling'] == 'min_max':
            samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(numpy_data)

        return samples

    def pre_processing_y(data_frame):
        numpy_data = data_frame.to_numpy()
        numpy_data = numpy_data[config['prediction_shift']:, :]
        samples = build_samples(numpy_data.flatten(), config['output_size'], target_dimensions=3)

        if run_config['scaling'] is 'min_max':
            samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(numpy_data)

        return samples

    experiment.print('Loading data')
    bearing_dataset = load_data(run_config['data_set'])
    data_frames = bearing_dataset.as_dict(column=run_config['data_column'], split_test=True)
    train_samples_x = pre_processing_x(data_frames['train'])
    train_samples_y = pre_processing_y(data_frames['train'])

    experiment.print('Fitting model')
    history = model.fit(
        train_samples_x,
        train_samples_y,
        verbose=0,
        epochs=config['epochs'],
        callbacks=experiment.keras_callbacks(),
        validation_split=config['validation_split'],
        shuffle=config['shuffle'],
    )

    experiment.log_history(history)
    experiment.log_keras_model(model)
    experiment.log_keras_predictions(
        model=model,
        data_frames=data_frames,
        pre_processing=pre_processing_x,
        pre_processing_y=pre_processing_y
    )
Ejemplo n.º 5
0
def run_callable(run_config: dict):
    experiment.print('Loading data')
    bearing_dataset = load_data(run_config['data_set'])
    data_frames = bearing_dataset.as_dict(column=config['data_column'],
                                          modifier=lambda x: x[x.rpm > 0])
    labels = bearing_dataset.as_dict(column=config['data_column'],
                                     modifier=lambda x: x[x.rpm > 0],
                                     labels_only=True)

    if run_config['scaling'] == 'min_max':
        train_samples = Scaler(MinMaxScaler,
                               fit_mode=run_config['fit_mode']).fit_transform(
                                   data_frames['train'].to_numpy())
    else:
        train_samples = data_frames['train'].to_numpy()

    experiment.print('Fitting model')
    history = model.fit(
        train_samples,
        train_samples,
        verbose=0,
        epochs=config['epochs'],
        callbacks=experiment.keras_callbacks(),
        validation_split=config['validation_split'],
        shuffle=config['shuffle'],
    )

    best_model = experiment.load_best_model()

    experiment.log_keras_model(model, key='current')
    experiment.log_keras_model(best_model, key='best')
    experiment.log_history(history)
    experiment.log_keras_predictions(
        model=best_model,
        data_frames=data_frames,
        labels=labels,
        pre_processing=lambda data_frame: Scaler(
            MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(
                data_frame.to_numpy())
        if run_config['scaling'] == 'min_max' else data_frame.to_numpy(),
        has_multiple_features=True,
    )
Ejemplo n.º 6
0
def run_callable(run_config: dict):
    def modifier(x):
        return x[x.rpm > 0]

    def pre_processing(data_frame):
        if run_config['scaling'] == 'min_max':
            samples = Scaler(MinMaxScaler, fit_mode=run_config['fit_mode']).fit_transform(data_frame.to_numpy())
        else:
            samples = data_frame.to_numpy()

        return build_samples(samples, target_sample_length=config['input_size'], target_dimensions=3)

    experiment.print('Building model')
    model_function = load_from_module(run_config['model_function'])
    model = model_function(config['input_size'])
    experiment.log_keras_model(model)

    experiment.print('Loading data')
    bearing_dataset = load_data(run_config['data_set'])
    data_frames = bearing_dataset.as_dict(column=config['data_column'], modifier=modifier)
    train_samples = pre_processing(data_frames['train'])

    experiment.print('Fitting model')
    history = model.fit(
        train_samples,
        train_samples,
        verbose=0,
        epochs=config['epochs'],
        callbacks=experiment.keras_callbacks(),
        validation_split=config['validation_split'],
        shuffle=config['shuffle'],
    )

    experiment.log_history(history)
    experiment.log_keras_model(model)
    experiment.log_keras_predictions(
        model=model,
        data_frames=data_frames,
        pre_processing=pre_processing,
        has_multiple_features=True,
    )
Ejemplo n.º 7
0
def run_callable(run_config: dict):
    def pre_processing(data_frame: DataFrame):
        samples = build_samples(data_frame.to_numpy().flatten(),
                                config['input_size'])

        if run_config['fit_mode'] == 'train':
            return train_scaler.transform(samples)
        else:
            return Scaler(
                MinMaxScaler,
                fit_mode=run_config['fit_mode']).fit_transform(samples)

    experiment.print('Loading data')
    bearing_dataset = load_data(run_config['data_set'])
    data_frames = bearing_dataset.as_dict(column=run_config['data_column'])
    train_samples = build_samples(data_frames['train'].to_numpy().flatten(),
                                  config['input_size'])
    fit_mode = 'per_feature' if run_config[
        'fit_mode'] == 'train' else run_config['fit_mode']
    train_scaler = Scaler(MinMaxScaler, fit_mode=fit_mode)
    train_samples = train_scaler.fit_transform(train_samples)

    experiment.print('Fitting model')
    history = model.fit(
        train_samples,
        train_samples,
        verbose=0,
        epochs=config['epochs'],
        callbacks=experiment.keras_callbacks(),
        validation_split=config['validation_split'],
        shuffle=config['shuffle'],
    )

    experiment.log_history(history)
    experiment.log_keras_model(model)
    experiment.log_keras_predictions(
        model=model,
        data_frames=data_frames,
        pre_processing=pre_processing,
    )
Ejemplo n.º 8
0
    def run_callable(run_config: dict):
        experiment.print('Loading data')
        bearing_dataset = load_data(run_config['data_set'])
        train = bearing_dataset.train(column=config['data_column'],
                                      as_numpy=True)
        test = bearing_dataset.test(column=config['data_column'],
                                    as_numpy=True)
        test_labels = bearing_dataset.test(column=config['data_column'],
                                           add_label=True)['label']
        threshold_percentile = config['threshold_percentile']
        x_train, x_valid, y_train, y_valid = train_test_split(
            train,
            train,
            test_size=config['validation_split'],
            shuffle=True,
        )
        model = KerasModel(
            create_model_function=create_deep_easing_feed_forward_autoencoder,
            evaluation_function=load_from_module(
                run_config['evaluation_function']),
        )
        history = pd.DataFrame(columns=[
            'cost', 'auc', 'accuracy', 'precision', 'recall', 'f_score',
            'matthews_cc'
        ])

        for i in range(1, config['num_evaluations']):
            experiment.print(
                f'Evaluating configuration {i} of {config["num_evaluations"]}')
            current_config = dict(config_space.sample_configuration())

            model.load_config(current_config)
            evaluation_result = model.evaluate(x_train,
                                               y_train,
                                               x_valid,
                                               y_valid,
                                               budget=config['budget'])

            train_reconstruction_error = compute_reconstruction_error(
                y_train, model.predict(x_train))
            train_z_scores = z_score(train_reconstruction_error)
            anomaly_threshold = percentile(train_z_scores,
                                           threshold_percentile)

            test_prediction = model.predict(test)
            test_reconstruction_error = compute_reconstruction_error(
                test, test_prediction)
            test_z_scores = z_score(
                test_reconstruction_error,
                given_median=median(train_reconstruction_error),
                given_mad=mad(train_reconstruction_error))

            if np.isnan(np.sum(test_reconstruction_error)):
                experiment.print(
                    'Got a NaN value in test reconstruction error, skip this evaluation.'
                )
                continue

            anomaly_prediction = (np.array(test_z_scores) >
                                  anomaly_threshold).astype(int)
            metrics = compute_classification_metrics(test_labels.values,
                                                     anomaly_prediction)
            roc = compute_roc(test_labels.values, test_reconstruction_error)

            history_record = {
                'cost': evaluation_result.cost,
                'auc': roc['auc'],
                'accuracy': metrics['accuracy'],
                'precision': metrics['precision'],
                'recall': metrics['recall'],
                'f_score': metrics['f_score'],
                'matthews_cc': metrics['matthews_cc'],
                **{f'info_{k}': v
                   for k, v in evaluation_result.info.items()},
                **{f'config_{k}': v
                   for k, v in current_config.items()}
            }

            history = history.append(history_record, ignore_index=True)

            experiment.log('history', history)
Ejemplo n.º 9
0
def run_callable(run_config: dict):
    pulley_ratio = config['motor_diameter'] / config['bearing_diameter']
    shaft_frequency = (run_config['rpm'] / 60) * pulley_ratio
    bandwidth = config['frequency_bandwidth']
    data_set_key = run_config['data_set']
    defect_frequencies = []
    vlines = []

    experiment.print('Loading data')
    bearing_dataset = load_data(data_set_key)
    train = bearing_dataset.train('fft')
    test = bearing_dataset.test('fft', add_label=True)
    test_healthy, test_anomalous = bearing_dataset.test('fft', split='2fold')

    for i, (defect_type, defect_frequency_order) in enumerate(
            config['defect_frequency_orders'].items(), 0):
        defect_frequency = defect_frequency_order * shaft_frequency
        train_means = train.iloc[:,
                                 max(int(defect_frequency) -
                                     bandwidth, 0):int(defect_frequency) +
                                 bandwidth].mean(axis=1)
        train_means_median = train_means.median()
        train_means_mad = mad(train_means)
        train_z_scores = list(
            map(
                lambda x: compute_z_score(x, train_means_median,
                                          train_means_mad), train_means))

        defect_frequencies.append({
            'frequency': defect_frequency,
            'train_median': train_means_median,
            'train_mad': train_means_mad,
            'train_z_scores': train_z_scores,
        })

        vlines.append({
            'x': defect_frequency,
            'color': config['colors'][i],
            'label': f'{defect_type} = {"{:.2f}".format(defect_frequency)}Hz',
            'linestyle': 'dashed'
        })

        if config['show_bandwidth']:
            vlines.append({
                'x': int(defect_frequency) + bandwidth,
                'color': config['colors'][i],
                'label': '',
                'linestyle': 'dashed'
            })
            vlines.append({
                'x': max(int(defect_frequency) - bandwidth, 0),
                'color': config['colors'][i],
                'label': '',
                'linestyle': 'dashed'
            })

    experiment.print('Plotting data')
    train_median = train.median()
    test_healthy_median = test_healthy.median()
    test_anomalous_median = test_anomalous.median()
    ylim = [
        0,
        max(max(test_healthy_median), max(test_anomalous_median),
            max(train_median)) + 0.1
    ]

    experiment.plot(y=train_median.values[:250],
                    vlines=vlines,
                    ylim=ylim,
                    xlabel='frequency [Hz]',
                    ylabel='amplitude',
                    title=f'{run_config["data_set"]} (train)',
                    key='train_' + data_set_key)
    experiment.plot(y=test_healthy_median.values[:250],
                    vlines=vlines,
                    ylim=ylim,
                    xlabel='frequency [Hz]',
                    ylabel='amplitude',
                    title=f'{run_config["data_set"]} (healthy)',
                    key='healthy_' + data_set_key)
    experiment.plot(y=test_anomalous_median.values[:250],
                    vlines=vlines,
                    ylim=ylim,
                    xlabel='frequency [Hz]',
                    ylabel='amplitude',
                    title=f'{run_config["data_set"]} (anomalous)',
                    key='anomalous_' + data_set_key)

    experiment.print('Predicting test dataset')
    prediction = []
    prediction_zscores = []

    for index, row in test.iterrows():
        is_anomaly = False
        largest_zscore = 0

        for defect_frequency in defect_frequencies:
            zscore_threshold = config[
                'zscore_threshold'] if 'zscore_threshold' in config else percentile(
                    defect_frequency['train_z_scores'],
                    run_config['threshold_percentile'])
            row_mean = row.iloc[
                max(int(defect_frequency['frequency']) -
                    bandwidth, 0):int(defect_frequency['frequency']) +
                bandwidth].mean()
            z_score = compute_z_score(row_mean,
                                      defect_frequency['train_median'],
                                      defect_frequency['train_mad'])
            is_anomaly = is_anomaly or (z_score > zscore_threshold)
            largest_zscore = largest_zscore if largest_zscore > z_score else z_score

        prediction.append(1 if is_anomaly else 0)
        prediction_zscores.append(largest_zscore)

    experiment.plot(y=prediction_zscores,
                    xlabel='time',
                    ylabel='z-score',
                    key='prediction_zscores_' + data_set_key)
    experiment.plot(y=prediction,
                    xlabel='time',
                    ylabel='is anomaly',
                    label='prediction',
                    key='prediction_' + data_set_key,
                    close=False)
    experiment.plot(y=test['label'],
                    label='truth',
                    key='prediction_' + data_set_key,
                    create_figure=False)

    roc = compute_roc(test['label'], prediction_zscores)
    metrics = compute_classification_metrics(test['label'], prediction)
    metrics['auc'] = roc['auc']

    experiment.log('roc', roc, to_pickle=True)
    experiment.log('metrics', metrics)
    experiment.plot_roc('roc', roc['fpr'], roc['tpr'])
    experiment.print(
        f'metrics: accuracy={metrics["accuracy"]}, precision={metrics["precision"]}, recall={metrics["recall"]}, f_score={metrics["f_score"]}, auc={roc["auc"]}'
    )