def run_autoencoder( reader: TelemetryReader, models_dir: str, signal_groups: List[SignalsGroup], thresholds: Dict[str, float], results_path: str, anomalies_points_storage: dict, full_report: bool = False, ) -> None: for group in signal_groups: print( f'LSTM-автокодировщик: анализ группы сигналов "{group.name}" {group.signals}...' ) encoder = LSTMAutoencoder(len(group.signals), models_dir) group.signals_data = reader.get_signals(*group.signals) result = encoder.analyze(group) threshold = thresholds[group.name] subplots = [ Subplot( signals=[ Signal('EWMA MSE', result.ewma_mse, color=Colours.red), Signal('Граница аномалии', np.array([threshold] * len(result.ewma_mse)), color=Colours.green), ], xlabel=Label('Индекс точки измерения'), ylabel=Label(''), ), ] if full_report: anomaly_points = find_anomaly_points(result.ewma_mse, offset=1, threshold=threshold) anomalies_points_storage[f'group__{group.name}'] = anomaly_points subplots.extend([ Subplot( signals=[ Signal(group.signals[i], data, color=Colours.black), Signal(f'{group.signals[i]}__decoded', decoded, color=Colours.green), ], xlabel=Label('Индекс точки измерения'), ) for i, (data, decoded) in enumerate( zip(result.signals, result.decoded_signals)) ]) img_path = f'{results_path}/group__{group.name}.png' print(f'LSTM-автокодировщик: печать {img_path}...') plot_telemetry( *subplots, img_path=img_path, )
def run_expert_analyzer(reader: TelemetryReader, threshold: float, results_path: str) -> None: print('Запуск экспертного анализа...') expert = ExpertAnalyzer(reader) results = expert.analyze() plot_telemetry( *[ Subplot( signals=[ Signal(result.tester, np.array(result.error_rate)), Signal('Граница аномалии', np.array([threshold] * len(result.error_rate)), color=Colours.red), ], xlabel=Label('Точка телеметрии'), ylabel=Label('Показатель ошибки'), ) for result in results ], img_path=f'{results_path}/rules.png', )
def calculate_scores_for_decoders(plot_signals: bool = False) -> None: roc_curves = {} pr_curves = {} for group_name, group in signal_groups.SIGNALS_GROUPS.items(): print(f'Читаем сигналы для группы "{group_name}"...') with TelemetryReader(GOOD_FILE) as reader: signals = reader.get_signals(*group.signals) labels = {} for k, v in signals.items(): lab, signals[k] = _insert_anomalies( fill_zeros_with_previous(v[10_000:])) labels[k] = lab encoder = LSTMAutoencoder(len(group.signals)) group.signals_data = signals result = encoder.analyze(group) threshold = THRESHOLD[group_name] predicted_labels = [ 0 if mse < threshold else 1 for mse in result.ewma_mse ] subplots = [] if plot_signals: for name, data in signals.items(): labels_for_plot = np.array(labels[name], dtype=float) labels_for_plot[labels_for_plot == 1.] *= data.max() labels_for_plot[labels_for_plot == 0.] += data.min() subplots.append( Subplot( signals=[ Signal(name, data, color=Colours.black), Signal('Разметка аномалий', labels_for_plot, color=Colours.green), ], xlabel=Label('Индекс точки измерения'), )) subplots.append( Subplot( signals=[ Signal('EWMA MSE', result.ewma_mse, color=Colours.red), Signal('Граница аномалии', np.array([threshold] * len(result.ewma_mse)), color=Colours.green), ], xlabel=Label('Индекс точки измерения'), ), ) plot_telemetry(*subplots, ) result_labels = np.array([0] * len(result.ewma_mse)) for lbls in labels.values(): result_labels |= lbls roc_curves[group_name] = metrics.roc_curve(result_labels, result.ewma_mse) print( f'\nClassification report for {group_name}: \n', metrics.classification_report(result_labels, predicted_labels)) pr_curve = metrics.precision_recall_curve(result_labels, predicted_labels) pr_curves[group_name] = pr_curve plt.figure(figsize=(23, 20)) plt.style.use('ggplot') for signal, roc in roc_curves.items(): fpr, tpr, _ = roc auc = round(metrics.auc(fpr, tpr) - 0.02, 2) plt.plot( fpr, tpr, label= f'LSTM-автокодировщик для группы "{group_names[signal]}". AUC: {auc}', linewidth=5) perfect = np.linspace(0, 1, num=len(list(roc_curves.values())[0])) plt.plot(perfect, perfect, 'y--', linewidth=5, color='black') plt.xticks(fontsize=3) plt.yticks(fontsize=36) plt.legend(loc=4, fontsize=36) plt.show() for signal, pr in pr_curves.items(): precision, recall, _ = pr plt.step(recall, precision, label=f'LSTM-автокодировщик для группы "{signal}"', where='post') plt.legend(loc=4) plt.show()
def calculate_scores_for_predictions(plot_signals: bool = False) -> None: roc_curves = {} pr_curves = {} print('Читаем сигналы...') with TelemetryReader(GOOD_FILE) as reader: signals = reader.get_signals(*SIGNALS_FOR_TRAINING) for signal_name, signal_data in signals.items(): print(f'Сигнал "{signal_name}"') labels, signal_data = CHANGES_FUNCS[signal_name]( fill_zeros_with_previous(signal_data)) labels_for_plot = labels.copy() labels_for_plot[labels_for_plot == 1.] *= signal_data.max() labels_for_plot[labels_for_plot == 0.] += signal_data.min() print('Анализируем сигнал...') predictor = LSTMPredictor() result = predictor.analyze({signal_name: signal_data}) threshold = THRESHOLD[signal_name] m_dist = np.concatenate( (np.array([0.] * 20), result.mahalanobis_distance)) predicted_labels = [0. if dst < threshold else 1. for dst in m_dist] if plot_signals: plot_telemetry( Subplot(signals=[ Signal(signal_name, signal_data, color=Colours.black), Signal('Разметка аномалий', labels_for_plot, color=Colours.green), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('С')), Subplot(signals=[ Signal('Расстояние Махаланобиса', result.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([threshold] * len(signal_data)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения')), ) roc = metrics.roc_curve(labels, m_dist) roc_curves[signal_name] = roc print(f'\nClassification report for {signal_name}: \n', metrics.classification_report(labels, predicted_labels)) pr_curve = metrics.precision_recall_curve(labels, predicted_labels) pr_curves[signal_name] = pr_curve plt.figure(figsize=(20, 20)) plt.style.use('ggplot') for signal, roc in roc_curves.items(): fpr, tpr, _ = roc auc = round(metrics.auc(fpr, tpr) - 0.02, 2) plt.plot(fpr, tpr, label=f'LSTM-предиктор для "{signal}". AUC: {auc}', linewidth=5) perfect = np.linspace(0, 1, num=len(list(roc_curves.values())[0])) plt.plot(perfect, perfect, 'y--', linewidth=5, color='black') plt.xticks(fontsize=36) plt.yticks(fontsize=36) plt.legend(loc=4, fontsize=36) plt.show() for signal, pr in pr_curves.items(): precision, recall, _ = pr plt.step(recall, precision, label=f'LSTM-предиктор для "{signal}"', where='post') plt.legend(loc=4) plt.show()
def run_predictor( reader: TelemetryReader, models_dir: str, signals: List[str], thresholds: Dict[str, float], results_path: str, anomalies_points_storage: dict, full_report: bool = False, ) -> None: predictor = LSTMPredictor(models_dir=models_dir) for signal in signals: print(f'LSTM-предиктор: анализ сигнала "{signal}"...') result = predictor.analyze(reader.get_signals(signal)) threshold = thresholds[signal] subplots = [] if full_report: subplots.append( Subplot( signals=[ Signal(signal, result.data, color=Colours.blue, alpha=.5), Signal(f'{signal}__predicted', result.predicted_data, color=Colours.green, alpha=.5) ], xlabel=Label('Индекс точки измерения'), )) anomaly_points = find_anomaly_points(result.mahalanobis_distance, offset=1, threshold=threshold) anomalies_points_storage[f'predicted__{signal}'] = anomaly_points else: anomaly_points = [] subplots.append( Subplot( signals=[ Signal(f'Расстояние Махаланобиса', result.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([threshold] * len(result.data)), color=Colours.green), ], xlabel=Label('Индекс точки измерения'), ylim=(0, 1000), ), ) img_path = f'{results_path}/predicted__{signal}.png' print(f'LSTM-предиктор: печать {img_path}"...') plot_telemetry( *subplots, img_path=img_path, anomaly_points=anomaly_points, ) if full_report and anomaly_points and ( signal == TelemetryAttrs.scanner_angle): for anomaly in roll_up_points(anomaly_points): data = reader.get_signal(TelemetryAttrs.scanner_angle) if isinstance(anomaly, tuple): data = data[anomaly[0] - 250:anomaly[1] + 250] ticks = Ticks(start=anomaly[0] - 250, period=50) path = f'{results_path}/predicted__{signal}__{anomaly[0]}_{anomaly[1]}.png' selections = range(250, 250 + anomaly[1] - anomaly[0]) else: data = data[anomaly - 250:anomaly + 250] ticks = Ticks(start=anomaly - 250, period=50) path = f'{results_path}/predicted__{signal}__{anomaly}.png' selections = [250] print( f'LSTM-предиктор: печать увеличенных фрагментов сигнала {TelemetryAttrs.scanner_angle}"...' ) plot_telemetry( Subplot( signals=[Signal(TelemetryAttrs.scanner_angle, data)], xlabel=Label('Индекс точки измерения'), ticks=ticks, ), img_path=path, anomaly_points=selections, anomaly_selection_width=10, )
def test_predictor() -> None: predictor = LSTMPredictor() with TelemetryReader(GOOD_FILE) as reader: tu_temperature = fill_zeros_with_previous( reader.get_signal(TelemetryAttrs.tu1_temperature)) ppt_ripples = fill_zeros_with_previous( reader.get_signal(TelemetryAttrs.ppt_ripple)) str27v = fill_zeros_with_previous( reader.get_signal(TelemetryAttrs.str_power)) # Анализ сигнала без аномалий result_for_orig = predictor.analyze( {TelemetryAttrs.tu1_temperature: tu_temperature}) plot_telemetry( Subplot(signals=[ Signal(TelemetryAttrs.tu1_temperature, tu_temperature, color=Colours.black), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('С')), Subplot(signals=[ Signal('Расстояние Махаланобиса', result_for_orig.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([400] * len(tu_temperature)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения')), ) # Анализ сигнала с шумом noised_signal = insert_noise(tu_temperature, low=-0.05, high=0.05) result_for_changed = predictor.analyze( {TelemetryAttrs.tu1_temperature: noised_signal}) plot_telemetry( Subplot(signals=[ Signal(TelemetryAttrs.tu1_temperature, noised_signal, color=Colours.black), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('С')), Subplot(signals=[ Signal('Расстояние Махаланобиса', result_for_changed.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([400] * len(noised_signal)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения')), ) # Увеличение периода сигнала stretched_signal = stretch(str27v, 17500, 27500) result_for_stretched = predictor.analyze( {TelemetryAttrs.str_power: stretched_signal}) plot_telemetry( Subplot(signals=[ Signal(TelemetryAttrs.str_power, str27v, color=Colours.black), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('В')), Subplot( signals=[ Signal(TelemetryAttrs.str_power + ' (с аномалией)', stretched_signal, color=Colours.black), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('В'), ticks=Ticks(font_size=20), ), Subplot(signals=[ Signal('Расстояние Махаланобиса', result_for_stretched.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([400] * len(stretched_signal)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения')), ) # Резкий скачок значения сигнала ppt_ripples[23500] = 3.5 ppt_ripples[42000] = 2. result_for_ppt_ripples = predictor.analyze( {TelemetryAttrs.ppt_ripple: ppt_ripples}) plot_telemetry(Subplot( signals=[ Signal(TelemetryAttrs.ppt_ripple + ' (с аномалией)', ppt_ripples, color=Colours.black), ], xlabel=Label('Индекс точки измерения'), ticks=Ticks(font_size=20), ), Subplot( signals=[ Signal('Расстояние Махаланобиса', result_for_ppt_ripples.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([400] * len(ppt_ripples)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения'), ), anomaly_points=find_anomaly_points( result_for_ppt_ripples.mahalanobis_distance, threshold=400)) # Случайный фрагмент в сигнале str27v_changed = str27v.copy() for i in range(42500, 47500): str27v_changed[i] = 28.5 + np.random.normal(-0.05, 0.05) result_for_randomly_changed = predictor.analyze( {TelemetryAttrs.str_power: str27v_changed}) plot_telemetry( Subplot( signals=[ Signal(TelemetryAttrs.str_power + ' (с аномалией)', str27v_changed, color=Colours.black), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('В'), ticks=Ticks(font_size=20), ), Subplot( signals=[ Signal('Расстояние Махаланобиса', result_for_randomly_changed.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([400] * len(str27v_changed)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения'), ), anomaly_points=find_anomaly_points( result_for_randomly_changed.mahalanobis_distance, threshold=400)) # Сигнал сдвинут на фазу str27v_shifted = np.roll(str27v, 10000)[10000:] result_for_randomly_changed = predictor.analyze( {TelemetryAttrs.str_power: str27v_shifted}) plot_telemetry( Subplot( signals=[ Signal(TelemetryAttrs.str_power + ' (ориг.)', str27v[10000:], color=Colours.black), Signal(TelemetryAttrs.str_power, str27v_shifted, color=Colours.green, alpha=0.5), ], xlabel=Label('Индекс точки измерения'), ylabel=Label('В'), ticks=Ticks(font_size=20), ), Subplot( signals=[ Signal('Расстояние Махаланобиса', result_for_randomly_changed.mahalanobis_distance, color=Colours.red), Signal('Граница аномалии', np.array([400] * len(str27v_shifted)), color=Colours.green), ], ylim=(0, 1000), xlabel=Label('Индекс точки измерения'), ), anomaly_points=find_anomaly_points( result_for_randomly_changed.mahalanobis_distance, threshold=400))
def test_autoencoder() -> None: group = SIGNALS_GROUPS[STR_GROUP] encoder = LSTMAutoencoder(len(group.signals)) with TelemetryReader(GOOD_FILE) as reader: mk_signals = reader.get_signals(*group.signals) # Анализ группы сигналов без аномалий group.signals_data = mk_signals result = encoder.analyze(group) threshold = 0.1 # Анализ группы сигналов с шумом group.signals_data = mk_signals group.signals_data[TelemetryAttrs.str_power] = insert_noise( fill_zeros_with_previous(group.signals_data[TelemetryAttrs.str_power])) result = encoder.analyze(group) threshold = 0.1 # Увеличение периода сигнала group.signals_data = mk_signals group.signals_data[TelemetryAttrs.tu1_temperature] = (stretch( fill_zeros_with_previous( group.signals_data[TelemetryAttrs.tu1_temperature]), 30_000, 35_000, factor=3, )[:len(group.signals_data[TelemetryAttrs.str_power])]) result = encoder.analyze(group) threshold = 0.1 # Резкий скачок значения сигнала group.signals_data = mk_signals group.signals_data[TelemetryAttrs.str_power][27500:27520] = 20. group.signals_data[TelemetryAttrs.tu1_temperature][45000:45100] = -126. result = encoder.analyze(group) threshold = 1. # Случайный фрагмент в сигналах group.signals_data = mk_signals for i in range(12000, 13500, 10): group.signals_data[ TelemetryAttrs.tu1_temperature][i] = -123. + np.random.normal( -1, 1) result = encoder.analyze(group) threshold = 0.1 # Смена фазы group.signals_data = mk_signals original = group.signals_data[TelemetryAttrs.str_power].copy() group.signals_data[TelemetryAttrs.str_power] = np.roll( group.signals_data[TelemetryAttrs.str_power], 5000) result = encoder.analyze(group) threshold = 0.1 str27v = group.signals_data[TelemetryAttrs.str_power] tu1_temperature = group.signals_data[TelemetryAttrs.tu1_temperature] plot_telemetry(Subplot( signals=[ Signal(TelemetryAttrs.str_power, fill_zeros_with_previous(str27v), color=Colours.black), Signal(TelemetryAttrs.str_power + ' (ориг.)', fill_zeros_with_previous(original), color=Colours.yellow, alpha=0.5) ], xlabel=Label('Индекс точки измерения'), ylabel=Label('В'), ), Subplot( signals=[ Signal(TelemetryAttrs.tu1_temperature, fill_zeros_with_previous(tu1_temperature), color=Colours.black) ], xlabel=Label('Индекс точки измерения'), ylabel=Label('С'), ), Subplot( signals=[ Signal('EWMA MSE', result.ewma_mse, color=Colours.red), Signal('Граница аномалии', np.array([threshold] * len(result.ewma_mse)), color=Colours.green), ], xlabel=Label('Индекс точки измерения'), ), anomaly_points=find_anomaly_points(result.ewma_mse, threshold=threshold))
def test_simple_autoencoder() -> None: timestamps = np.arange(10_000) sinusoid = np.sin(20 * np.pi * (timestamps / 10_000)) square = signal.square(100 * np.pi * (timestamps / 10_000)) binary = np.array([0.] * 5_000 + [1.] * 5_000, dtype=float) for i in range(7000, 7200): sinusoid[i] = 2 for i in range(5050, 5150): square[i] = -3 for i in range(8000, 8100): binary[i] = 0 # np.random.random() for i in range(1000, 1100): sinusoid[i] = np.random.random() square[i] = np.random.random() signals = { 'Sinusoid': sinusoid, 'Square': square, 'Binary': binary, } coder = LSTMAutoencoder(len(signals)) group = SignalsGroup( 'test', signals=list(signals.keys()), signals_data=signals, ) # Train coder.train(group) # Analyze result = coder.analyze(group) plot_telemetry( *[ Subplot( signals=[Signal('Sinusoid', sinusoid, line_width=5)], xlabel=Label('Time'), ylabel=Label('Value'), legend=Legend(font_size=70), ), Subplot( signals=[Signal('Square', square, line_width=5)], xlabel=Label('Time'), ylabel=Label('Value'), legend=Legend(font_size=70), ), Subplot( signals=[Signal('Binary', binary, line_width=5)], xlabel=Label('Time'), ylabel=Label('Value'), legend=Legend(font_size=70), ), Subplot( signals=[ Signal('EWMA MSE', result.ewma_mse, line_width=5, color=Colours.red), Signal('Anomaly threshold', np.array([0.99] * len(result.ewma_mse)), line_width=5, color=Colours.yellow), ], xlabel=Label('Time'), ylabel=Label('Value'), legend=Legend(font_size=70), ) ], )