Ejemplo n.º 1
0
def run_autoencoder(
    reader: TelemetryReader,
    models_dir: str,
    signal_groups: List[SignalsGroup],
    thresholds: Dict[str, float],
    results_path: str,
    anomalies_points_storage: dict,
    full_report: bool = False,
) -> None:
    for group in signal_groups:
        print(
            f'LSTM-автокодировщик: анализ группы сигналов "{group.name}" {group.signals}...'
        )

        encoder = LSTMAutoencoder(len(group.signals), models_dir)

        group.signals_data = reader.get_signals(*group.signals)
        result = encoder.analyze(group)

        threshold = thresholds[group.name]
        subplots = [
            Subplot(
                signals=[
                    Signal('EWMA MSE', result.ewma_mse, color=Colours.red),
                    Signal('Граница аномалии',
                           np.array([threshold] * len(result.ewma_mse)),
                           color=Colours.green),
                ],
                xlabel=Label('Индекс точки измерения'),
                ylabel=Label(''),
            ),
        ]

        if full_report:
            anomaly_points = find_anomaly_points(result.ewma_mse,
                                                 offset=1,
                                                 threshold=threshold)
            anomalies_points_storage[f'group__{group.name}'] = anomaly_points

            subplots.extend([
                Subplot(
                    signals=[
                        Signal(group.signals[i], data, color=Colours.black),
                        Signal(f'{group.signals[i]}__decoded',
                               decoded,
                               color=Colours.green),
                    ],
                    xlabel=Label('Индекс точки измерения'),
                ) for i, (data, decoded) in enumerate(
                    zip(result.signals, result.decoded_signals))
            ])

        img_path = f'{results_path}/group__{group.name}.png'
        print(f'LSTM-автокодировщик: печать {img_path}...')

        plot_telemetry(
            *subplots,
            img_path=img_path,
        )
Ejemplo n.º 2
0
def run_expert_analyzer(reader: TelemetryReader, threshold: float,
                        results_path: str) -> None:
    print('Запуск экспертного анализа...')

    expert = ExpertAnalyzer(reader)
    results = expert.analyze()

    plot_telemetry(
        *[
            Subplot(
                signals=[
                    Signal(result.tester, np.array(result.error_rate)),
                    Signal('Граница аномалии',
                           np.array([threshold] * len(result.error_rate)),
                           color=Colours.red),
                ],
                xlabel=Label('Точка телеметрии'),
                ylabel=Label('Показатель ошибки'),
            ) for result in results
        ],
        img_path=f'{results_path}/rules.png',
    )
def calculate_scores_for_decoders(plot_signals: bool = False) -> None:
    roc_curves = {}
    pr_curves = {}

    for group_name, group in signal_groups.SIGNALS_GROUPS.items():
        print(f'Читаем сигналы для группы "{group_name}"...')
        with TelemetryReader(GOOD_FILE) as reader:
            signals = reader.get_signals(*group.signals)

            labels = {}

            for k, v in signals.items():
                lab, signals[k] = _insert_anomalies(
                    fill_zeros_with_previous(v[10_000:]))
                labels[k] = lab

            encoder = LSTMAutoencoder(len(group.signals))
            group.signals_data = signals
            result = encoder.analyze(group)

            threshold = THRESHOLD[group_name]
            predicted_labels = [
                0 if mse < threshold else 1 for mse in result.ewma_mse
            ]

            subplots = []

            if plot_signals:
                for name, data in signals.items():
                    labels_for_plot = np.array(labels[name], dtype=float)
                    labels_for_plot[labels_for_plot == 1.] *= data.max()
                    labels_for_plot[labels_for_plot == 0.] += data.min()

                    subplots.append(
                        Subplot(
                            signals=[
                                Signal(name, data, color=Colours.black),
                                Signal('Разметка аномалий',
                                       labels_for_plot,
                                       color=Colours.green),
                            ],
                            xlabel=Label('Индекс точки измерения'),
                        ))

            subplots.append(
                Subplot(
                    signals=[
                        Signal('EWMA MSE', result.ewma_mse, color=Colours.red),
                        Signal('Граница аномалии',
                               np.array([threshold] * len(result.ewma_mse)),
                               color=Colours.green),
                    ],
                    xlabel=Label('Индекс точки измерения'),
                ), )

            plot_telemetry(*subplots, )

            result_labels = np.array([0] * len(result.ewma_mse))
            for lbls in labels.values():
                result_labels |= lbls

            roc_curves[group_name] = metrics.roc_curve(result_labels,
                                                       result.ewma_mse)

            print(
                f'\nClassification report for {group_name}: \n',
                metrics.classification_report(result_labels, predicted_labels))

            pr_curve = metrics.precision_recall_curve(result_labels,
                                                      predicted_labels)
            pr_curves[group_name] = pr_curve

    plt.figure(figsize=(23, 20))
    plt.style.use('ggplot')

    for signal, roc in roc_curves.items():
        fpr, tpr, _ = roc
        auc = round(metrics.auc(fpr, tpr) - 0.02, 2)
        plt.plot(
            fpr,
            tpr,
            label=
            f'LSTM-автокодировщик для группы "{group_names[signal]}". AUC: {auc}',
            linewidth=5)

    perfect = np.linspace(0, 1, num=len(list(roc_curves.values())[0]))
    plt.plot(perfect, perfect, 'y--', linewidth=5, color='black')

    plt.xticks(fontsize=3)
    plt.yticks(fontsize=36)

    plt.legend(loc=4, fontsize=36)
    plt.show()

    for signal, pr in pr_curves.items():
        precision, recall, _ = pr
        plt.step(recall,
                 precision,
                 label=f'LSTM-автокодировщик для группы "{signal}"',
                 where='post')

    plt.legend(loc=4)
    plt.show()
def calculate_scores_for_predictions(plot_signals: bool = False) -> None:
    roc_curves = {}
    pr_curves = {}

    print('Читаем сигналы...')
    with TelemetryReader(GOOD_FILE) as reader:
        signals = reader.get_signals(*SIGNALS_FOR_TRAINING)

    for signal_name, signal_data in signals.items():
        print(f'Сигнал "{signal_name}"')
        labels, signal_data = CHANGES_FUNCS[signal_name](
            fill_zeros_with_previous(signal_data))

        labels_for_plot = labels.copy()
        labels_for_plot[labels_for_plot == 1.] *= signal_data.max()
        labels_for_plot[labels_for_plot == 0.] += signal_data.min()

        print('Анализируем сигнал...')
        predictor = LSTMPredictor()
        result = predictor.analyze({signal_name: signal_data})

        threshold = THRESHOLD[signal_name]

        m_dist = np.concatenate(
            (np.array([0.] * 20), result.mahalanobis_distance))
        predicted_labels = [0. if dst < threshold else 1. for dst in m_dist]

        if plot_signals:
            plot_telemetry(
                Subplot(signals=[
                    Signal(signal_name, signal_data, color=Colours.black),
                    Signal('Разметка аномалий',
                           labels_for_plot,
                           color=Colours.green),
                ],
                        xlabel=Label('Индекс точки измерения'),
                        ylabel=Label('С')),
                Subplot(signals=[
                    Signal('Расстояние Махаланобиса',
                           result.mahalanobis_distance,
                           color=Colours.red),
                    Signal('Граница аномалии',
                           np.array([threshold] * len(signal_data)),
                           color=Colours.green),
                ],
                        ylim=(0, 1000),
                        xlabel=Label('Индекс точки измерения')),
            )

        roc = metrics.roc_curve(labels, m_dist)
        roc_curves[signal_name] = roc

        print(f'\nClassification report for {signal_name}: \n',
              metrics.classification_report(labels, predicted_labels))

        pr_curve = metrics.precision_recall_curve(labels, predicted_labels)
        pr_curves[signal_name] = pr_curve

    plt.figure(figsize=(20, 20))
    plt.style.use('ggplot')

    for signal, roc in roc_curves.items():
        fpr, tpr, _ = roc
        auc = round(metrics.auc(fpr, tpr) - 0.02, 2)
        plt.plot(fpr,
                 tpr,
                 label=f'LSTM-предиктор для "{signal}". AUC: {auc}',
                 linewidth=5)

    perfect = np.linspace(0, 1, num=len(list(roc_curves.values())[0]))
    plt.plot(perfect, perfect, 'y--', linewidth=5, color='black')

    plt.xticks(fontsize=36)
    plt.yticks(fontsize=36)

    plt.legend(loc=4, fontsize=36)
    plt.show()

    for signal, pr in pr_curves.items():
        precision, recall, _ = pr
        plt.step(recall,
                 precision,
                 label=f'LSTM-предиктор для "{signal}"',
                 where='post')

    plt.legend(loc=4)
    plt.show()
Ejemplo n.º 5
0
def run_predictor(
    reader: TelemetryReader,
    models_dir: str,
    signals: List[str],
    thresholds: Dict[str, float],
    results_path: str,
    anomalies_points_storage: dict,
    full_report: bool = False,
) -> None:
    predictor = LSTMPredictor(models_dir=models_dir)

    for signal in signals:
        print(f'LSTM-предиктор: анализ сигнала "{signal}"...')

        result = predictor.analyze(reader.get_signals(signal))

        threshold = thresholds[signal]
        subplots = []

        if full_report:
            subplots.append(
                Subplot(
                    signals=[
                        Signal(signal,
                               result.data,
                               color=Colours.blue,
                               alpha=.5),
                        Signal(f'{signal}__predicted',
                               result.predicted_data,
                               color=Colours.green,
                               alpha=.5)
                    ],
                    xlabel=Label('Индекс точки измерения'),
                ))
            anomaly_points = find_anomaly_points(result.mahalanobis_distance,
                                                 offset=1,
                                                 threshold=threshold)
            anomalies_points_storage[f'predicted__{signal}'] = anomaly_points
        else:
            anomaly_points = []

        subplots.append(
            Subplot(
                signals=[
                    Signal(f'Расстояние Махаланобиса',
                           result.mahalanobis_distance,
                           color=Colours.red),
                    Signal('Граница аномалии',
                           np.array([threshold] * len(result.data)),
                           color=Colours.green),
                ],
                xlabel=Label('Индекс точки измерения'),
                ylim=(0, 1000),
            ), )

        img_path = f'{results_path}/predicted__{signal}.png'
        print(f'LSTM-предиктор: печать {img_path}"...')

        plot_telemetry(
            *subplots,
            img_path=img_path,
            anomaly_points=anomaly_points,
        )

        if full_report and anomaly_points and (
                signal == TelemetryAttrs.scanner_angle):
            for anomaly in roll_up_points(anomaly_points):
                data = reader.get_signal(TelemetryAttrs.scanner_angle)

                if isinstance(anomaly, tuple):
                    data = data[anomaly[0] - 250:anomaly[1] + 250]
                    ticks = Ticks(start=anomaly[0] - 250, period=50)
                    path = f'{results_path}/predicted__{signal}__{anomaly[0]}_{anomaly[1]}.png'
                    selections = range(250, 250 + anomaly[1] - anomaly[0])
                else:
                    data = data[anomaly - 250:anomaly + 250]
                    ticks = Ticks(start=anomaly - 250, period=50)
                    path = f'{results_path}/predicted__{signal}__{anomaly}.png'
                    selections = [250]

                print(
                    f'LSTM-предиктор: печать увеличенных фрагментов сигнала {TelemetryAttrs.scanner_angle}"...'
                )
                plot_telemetry(
                    Subplot(
                        signals=[Signal(TelemetryAttrs.scanner_angle, data)],
                        xlabel=Label('Индекс точки измерения'),
                        ticks=ticks,
                    ),
                    img_path=path,
                    anomaly_points=selections,
                    anomaly_selection_width=10,
                )
def test_predictor() -> None:
    predictor = LSTMPredictor()

    with TelemetryReader(GOOD_FILE) as reader:
        tu_temperature = fill_zeros_with_previous(
            reader.get_signal(TelemetryAttrs.tu1_temperature))
        ppt_ripples = fill_zeros_with_previous(
            reader.get_signal(TelemetryAttrs.ppt_ripple))
        str27v = fill_zeros_with_previous(
            reader.get_signal(TelemetryAttrs.str_power))

    # Анализ сигнала без аномалий
    result_for_orig = predictor.analyze(
        {TelemetryAttrs.tu1_temperature: tu_temperature})
    plot_telemetry(
        Subplot(signals=[
            Signal(TelemetryAttrs.tu1_temperature,
                   tu_temperature,
                   color=Colours.black),
        ],
                xlabel=Label('Индекс точки измерения'),
                ylabel=Label('С')),
        Subplot(signals=[
            Signal('Расстояние Махаланобиса',
                   result_for_orig.mahalanobis_distance,
                   color=Colours.red),
            Signal('Граница аномалии',
                   np.array([400] * len(tu_temperature)),
                   color=Colours.green),
        ],
                ylim=(0, 1000),
                xlabel=Label('Индекс точки измерения')),
    )

    # Анализ сигнала с шумом
    noised_signal = insert_noise(tu_temperature, low=-0.05, high=0.05)
    result_for_changed = predictor.analyze(
        {TelemetryAttrs.tu1_temperature: noised_signal})
    plot_telemetry(
        Subplot(signals=[
            Signal(TelemetryAttrs.tu1_temperature,
                   noised_signal,
                   color=Colours.black),
        ],
                xlabel=Label('Индекс точки измерения'),
                ylabel=Label('С')),
        Subplot(signals=[
            Signal('Расстояние Махаланобиса',
                   result_for_changed.mahalanobis_distance,
                   color=Colours.red),
            Signal('Граница аномалии',
                   np.array([400] * len(noised_signal)),
                   color=Colours.green),
        ],
                ylim=(0, 1000),
                xlabel=Label('Индекс точки измерения')),
    )

    # Увеличение периода сигнала
    stretched_signal = stretch(str27v, 17500, 27500)
    result_for_stretched = predictor.analyze(
        {TelemetryAttrs.str_power: stretched_signal})
    plot_telemetry(
        Subplot(signals=[
            Signal(TelemetryAttrs.str_power, str27v, color=Colours.black),
        ],
                xlabel=Label('Индекс точки измерения'),
                ylabel=Label('В')),
        Subplot(
            signals=[
                Signal(TelemetryAttrs.str_power + ' (с аномалией)',
                       stretched_signal,
                       color=Colours.black),
            ],
            xlabel=Label('Индекс точки измерения'),
            ylabel=Label('В'),
            ticks=Ticks(font_size=20),
        ),
        Subplot(signals=[
            Signal('Расстояние Махаланобиса',
                   result_for_stretched.mahalanobis_distance,
                   color=Colours.red),
            Signal('Граница аномалии',
                   np.array([400] * len(stretched_signal)),
                   color=Colours.green),
        ],
                ylim=(0, 1000),
                xlabel=Label('Индекс точки измерения')),
    )

    # Резкий скачок значения сигнала
    ppt_ripples[23500] = 3.5
    ppt_ripples[42000] = 2.

    result_for_ppt_ripples = predictor.analyze(
        {TelemetryAttrs.ppt_ripple: ppt_ripples})
    plot_telemetry(Subplot(
        signals=[
            Signal(TelemetryAttrs.ppt_ripple + ' (с аномалией)',
                   ppt_ripples,
                   color=Colours.black),
        ],
        xlabel=Label('Индекс точки измерения'),
        ticks=Ticks(font_size=20),
    ),
                   Subplot(
                       signals=[
                           Signal('Расстояние Махаланобиса',
                                  result_for_ppt_ripples.mahalanobis_distance,
                                  color=Colours.red),
                           Signal('Граница аномалии',
                                  np.array([400] * len(ppt_ripples)),
                                  color=Colours.green),
                       ],
                       ylim=(0, 1000),
                       xlabel=Label('Индекс точки измерения'),
                   ),
                   anomaly_points=find_anomaly_points(
                       result_for_ppt_ripples.mahalanobis_distance,
                       threshold=400))

    # Случайный фрагмент в сигнале
    str27v_changed = str27v.copy()
    for i in range(42500, 47500):
        str27v_changed[i] = 28.5 + np.random.normal(-0.05, 0.05)

    result_for_randomly_changed = predictor.analyze(
        {TelemetryAttrs.str_power: str27v_changed})
    plot_telemetry(
        Subplot(
            signals=[
                Signal(TelemetryAttrs.str_power + ' (с аномалией)',
                       str27v_changed,
                       color=Colours.black),
            ],
            xlabel=Label('Индекс точки измерения'),
            ylabel=Label('В'),
            ticks=Ticks(font_size=20),
        ),
        Subplot(
            signals=[
                Signal('Расстояние Махаланобиса',
                       result_for_randomly_changed.mahalanobis_distance,
                       color=Colours.red),
                Signal('Граница аномалии',
                       np.array([400] * len(str27v_changed)),
                       color=Colours.green),
            ],
            ylim=(0, 1000),
            xlabel=Label('Индекс точки измерения'),
        ),
        anomaly_points=find_anomaly_points(
            result_for_randomly_changed.mahalanobis_distance, threshold=400))

    # Сигнал сдвинут на фазу
    str27v_shifted = np.roll(str27v, 10000)[10000:]

    result_for_randomly_changed = predictor.analyze(
        {TelemetryAttrs.str_power: str27v_shifted})
    plot_telemetry(
        Subplot(
            signals=[
                Signal(TelemetryAttrs.str_power + ' (ориг.)',
                       str27v[10000:],
                       color=Colours.black),
                Signal(TelemetryAttrs.str_power,
                       str27v_shifted,
                       color=Colours.green,
                       alpha=0.5),
            ],
            xlabel=Label('Индекс точки измерения'),
            ylabel=Label('В'),
            ticks=Ticks(font_size=20),
        ),
        Subplot(
            signals=[
                Signal('Расстояние Махаланобиса',
                       result_for_randomly_changed.mahalanobis_distance,
                       color=Colours.red),
                Signal('Граница аномалии',
                       np.array([400] * len(str27v_shifted)),
                       color=Colours.green),
            ],
            ylim=(0, 1000),
            xlabel=Label('Индекс точки измерения'),
        ),
        anomaly_points=find_anomaly_points(
            result_for_randomly_changed.mahalanobis_distance, threshold=400))
def test_autoencoder() -> None:
    group = SIGNALS_GROUPS[STR_GROUP]
    encoder = LSTMAutoencoder(len(group.signals))

    with TelemetryReader(GOOD_FILE) as reader:
        mk_signals = reader.get_signals(*group.signals)

    # Анализ группы сигналов без аномалий
    group.signals_data = mk_signals
    result = encoder.analyze(group)
    threshold = 0.1

    # Анализ группы сигналов с шумом
    group.signals_data = mk_signals
    group.signals_data[TelemetryAttrs.str_power] = insert_noise(
        fill_zeros_with_previous(group.signals_data[TelemetryAttrs.str_power]))
    result = encoder.analyze(group)
    threshold = 0.1

    # Увеличение периода сигнала
    group.signals_data = mk_signals
    group.signals_data[TelemetryAttrs.tu1_temperature] = (stretch(
        fill_zeros_with_previous(
            group.signals_data[TelemetryAttrs.tu1_temperature]),
        30_000,
        35_000,
        factor=3,
    )[:len(group.signals_data[TelemetryAttrs.str_power])])
    result = encoder.analyze(group)
    threshold = 0.1

    # Резкий скачок значения сигнала
    group.signals_data = mk_signals
    group.signals_data[TelemetryAttrs.str_power][27500:27520] = 20.
    group.signals_data[TelemetryAttrs.tu1_temperature][45000:45100] = -126.
    result = encoder.analyze(group)
    threshold = 1.

    # Случайный фрагмент в сигналах
    group.signals_data = mk_signals
    for i in range(12000, 13500, 10):
        group.signals_data[
            TelemetryAttrs.tu1_temperature][i] = -123. + np.random.normal(
                -1, 1)
    result = encoder.analyze(group)
    threshold = 0.1

    # Смена фазы
    group.signals_data = mk_signals
    original = group.signals_data[TelemetryAttrs.str_power].copy()
    group.signals_data[TelemetryAttrs.str_power] = np.roll(
        group.signals_data[TelemetryAttrs.str_power], 5000)
    result = encoder.analyze(group)
    threshold = 0.1

    str27v = group.signals_data[TelemetryAttrs.str_power]
    tu1_temperature = group.signals_data[TelemetryAttrs.tu1_temperature]

    plot_telemetry(Subplot(
        signals=[
            Signal(TelemetryAttrs.str_power,
                   fill_zeros_with_previous(str27v),
                   color=Colours.black),
            Signal(TelemetryAttrs.str_power + ' (ориг.)',
                   fill_zeros_with_previous(original),
                   color=Colours.yellow,
                   alpha=0.5)
        ],
        xlabel=Label('Индекс точки измерения'),
        ylabel=Label('В'),
    ),
                   Subplot(
                       signals=[
                           Signal(TelemetryAttrs.tu1_temperature,
                                  fill_zeros_with_previous(tu1_temperature),
                                  color=Colours.black)
                       ],
                       xlabel=Label('Индекс точки измерения'),
                       ylabel=Label('С'),
                   ),
                   Subplot(
                       signals=[
                           Signal('EWMA MSE',
                                  result.ewma_mse,
                                  color=Colours.red),
                           Signal('Граница аномалии',
                                  np.array([threshold] * len(result.ewma_mse)),
                                  color=Colours.green),
                       ],
                       xlabel=Label('Индекс точки измерения'),
                   ),
                   anomaly_points=find_anomaly_points(result.ewma_mse,
                                                      threshold=threshold))
def test_simple_autoencoder() -> None:
    timestamps = np.arange(10_000)

    sinusoid = np.sin(20 * np.pi * (timestamps / 10_000))
    square = signal.square(100 * np.pi * (timestamps / 10_000))
    binary = np.array([0.] * 5_000 + [1.] * 5_000, dtype=float)

    for i in range(7000, 7200):
        sinusoid[i] = 2

    for i in range(5050, 5150):
        square[i] = -3

    for i in range(8000, 8100):
        binary[i] = 0  # np.random.random()

    for i in range(1000, 1100):
        sinusoid[i] = np.random.random()
        square[i] = np.random.random()

    signals = {
        'Sinusoid': sinusoid,
        'Square': square,
        'Binary': binary,
    }

    coder = LSTMAutoencoder(len(signals))
    group = SignalsGroup(
        'test',
        signals=list(signals.keys()),
        signals_data=signals,
    )

    # Train
    coder.train(group)

    # Analyze
    result = coder.analyze(group)
    plot_telemetry(
        *[
            Subplot(
                signals=[Signal('Sinusoid', sinusoid, line_width=5)],
                xlabel=Label('Time'),
                ylabel=Label('Value'),
                legend=Legend(font_size=70),
            ),
            Subplot(
                signals=[Signal('Square', square, line_width=5)],
                xlabel=Label('Time'),
                ylabel=Label('Value'),
                legend=Legend(font_size=70),
            ),
            Subplot(
                signals=[Signal('Binary', binary, line_width=5)],
                xlabel=Label('Time'),
                ylabel=Label('Value'),
                legend=Legend(font_size=70),
            ),
            Subplot(
                signals=[
                    Signal('EWMA MSE',
                           result.ewma_mse,
                           line_width=5,
                           color=Colours.red),
                    Signal('Anomaly threshold',
                           np.array([0.99] * len(result.ewma_mse)),
                           line_width=5,
                           color=Colours.yellow),
                ],
                xlabel=Label('Time'),
                ylabel=Label('Value'),
                legend=Legend(font_size=70),
            )
        ], )