def preprocess(): for log in get_event_log_files(): print(log.name) data = Dataset(log.name) data.event_log.save_csv("../Data/Nolle_Data/" + log.name + "_data.csv") with open("../Data/Nolle_Data/" + log.name + "_labels.csv", "w") as fout: for label in data.text_labels: if label == "Normal": fout.write("0\n") else: fout.write("1\n")
def fit_and_save(dataset_name, ad, ad_kwargs=None, fit_kwargs=None): if ad_kwargs is None: ad_kwargs = {} if fit_kwargs is None: fit_kwargs = {} # Save start time start_time = arrow.now() # Dataset dataset = Dataset(dataset_name) # AD ad = ad(**ad_kwargs) # Train and save ad.fit(dataset, **fit_kwargs) file_name = f'{dataset_name}_{ad.abbreviation}_{start_time.format(DATE_FORMAT)}' model_file = ad.save(file_name) # Save end time end_time = arrow.now() # Cache result #print(model_file.str_path) Evaluator(model_file.str_path).cache_result() # Calculate training time in seconds training_time = (end_time - start_time).total_seconds() # Write to database engine = get_engine() session = Session(engine) session.add( Model(creation_date=end_time.datetime, algorithm=ad.name, training_duration=training_time, file_name=model_file.file, training_event_log_id=EventLog.get_id_by_name(dataset_name), training_host=socket.gethostname(), hyperparameters=str(dict(**ad_kwargs, **fit_kwargs)))) session.commit() session.close() if isinstance(ad, NNAnomalyDetector): from keras.backend import clear_session clear_session()
event_log.save_json( str(EVENTLOG_DIR / f'{event_log_file.model}-{p}-{event_log_file.id}.json.gz')) ##### # Dataset Information ##### logs = sorted([e.name for e in get_event_log_files() if e.p == 0.3]) columns = [ 'name', 'base_name', 'num_cases', 'num_events', 'num_activities', 'num_attributes', 'attribute_keys', 'attribute_dims', 'min_attribute_dim', 'max_attribute_dim', 'min_case_len', 'max_case_len', 'mean_case_len' ] df = [] for log in tqdm(logs): d = Dataset(log) dim_min = d.attribute_dims[1:].astype( int).min() if d.attribute_dims[1:].size else None dim_max = d.attribute_dims[1:].astype( int).max() if d.attribute_dims[1:].size else None df.append([ log, log.split('-')[0], d.num_cases, d.num_events, d.attribute_dims[0].astype(int), d.num_attributes - 1, d.attribute_keys[1:], d.attribute_dims[1:].astype(int), dim_min, dim_max, d.case_lens.min(), d.case_lens.max(), d.case_lens.mean().round(2) ]) event_logs = pd.DataFrame(df, columns=columns)
def dataset(self): if self._dataset is None: self._dataset = Dataset(self.eventlog_name) return self._dataset