def _evaluate(params): e, base, heuristic, strategy = params session = Session(get_engine()) model = session.query(Model).filter_by(file_name=e.model_file.name).first() session.close() # Generate evaluation frames y_pred = e.binarizer.binarize(base=base, heuristic=heuristic, strategy=strategy, go_backwards=False) y_true = e.binarizer.get_targets() evaluations = [] for axis in [0, 1, 2]: for i, attribute_name in enumerate(e.dataset.attribute_keys): def get_evaluation(label, precision, recall, f1): return Evaluation(model_id=model.id, file_name=model.file_name, label=label, perspective=perspective, attribute_name=attribute_name, axis=axis, base=base, heuristic=heuristic, strategy=strategy, precision=precision, recall=recall, f1=f1) perspective = 'Control Flow' if i == 0 else 'Data' if i > 0 and not e.ad_.supports_attributes: evaluations.append(get_evaluation('Normal', 0.0, 0.0, 0.0)) evaluations.append(get_evaluation('Anomaly', 0.0, 0.0, 0.0)) else: yp = label_collapse(y_pred[:, :, i:i + 1], axis=axis).compressed() yt = label_collapse(y_true[:, :, i:i + 1], axis=axis).compressed() p, r, f, _ = metrics.precision_recall_fscore_support(yt, yp, labels=[0, 1]) evaluations.append(get_evaluation('Normal', p[0], r[0], f[0])) evaluations.append(get_evaluation('Anomaly', p[1], r[1], f[1])) return evaluations
def fit_and_save(dataset_name, ad, ad_kwargs=None, fit_kwargs=None): if ad_kwargs is None: ad_kwargs = {} if fit_kwargs is None: fit_kwargs = {} # Save start time start_time = arrow.now() # Dataset dataset = Dataset(dataset_name) # AD ad = ad(**ad_kwargs) # Train and save ad.fit(dataset, **fit_kwargs) file_name = f'{dataset_name}_{ad.abbreviation}_{start_time.format(DATE_FORMAT)}' model_file = ad.save(file_name) # Save end time end_time = arrow.now() # Cache result #print(model_file.str_path) Evaluator(model_file.str_path).cache_result() # Calculate training time in seconds training_time = (end_time - start_time).total_seconds() # Write to database engine = get_engine() session = Session(engine) session.add( Model(creation_date=end_time.datetime, algorithm=ad.name, training_duration=training_time, file_name=model_file.file, training_event_log_id=EventLog.get_id_by_name(dataset_name), training_host=socket.gethostname(), hyperparameters=str(dict(**ad_kwargs, **fit_kwargs)))) session.commit() session.close() if isinstance(ad, NNAnomalyDetector): from keras.backend import clear_session clear_session()
if base is not None and base not in e.ad_.supported_bases: continue _params.append([e, base, heuristic, strategy]) return [_e for p in _params for _e in _evaluate(p)] models = sorted([m.name for m in get_model_files()]) evaluations = [] for i in range(len(models)): e = evaluate(models[i]) evaluations.append(e) # Write to database session = Session(get_engine()) for e in evaluations: session.bulk_save_objects(e) session.commit() session.close() out_dir = PLOT_DIR / 'isj-2019' eval_file = out_dir / 'eval.pkl' session = Session(get_engine()) evaluations = session.query(Evaluation).all() rows = [] for ev in tqdm(evaluations): m = ev.model el = ev.model.training_event_log rows.append([