Example #1
0
def preprocess():
    for log in get_event_log_files():
        print(log.name)
        data = Dataset(log.name)

        data.event_log.save_csv("../Data/Nolle_Data/" + log.name + "_data.csv")
        with open("../Data/Nolle_Data/" + log.name + "_labels.csv",
                  "w") as fout:
            for label in data.text_labels:
                if label == "Normal":
                    fout.write("0\n")
                else:
                    fout.write("1\n")
Example #2
0
def fit_and_save(dataset_name, ad, ad_kwargs=None, fit_kwargs=None):
    if ad_kwargs is None:
        ad_kwargs = {}
    if fit_kwargs is None:
        fit_kwargs = {}

    # Save start time
    start_time = arrow.now()

    # Dataset
    dataset = Dataset(dataset_name)

    # AD
    ad = ad(**ad_kwargs)

    # Train and save
    ad.fit(dataset, **fit_kwargs)
    file_name = f'{dataset_name}_{ad.abbreviation}_{start_time.format(DATE_FORMAT)}'
    model_file = ad.save(file_name)

    # Save end time
    end_time = arrow.now()

    # Cache result
    #print(model_file.str_path)
    Evaluator(model_file.str_path).cache_result()

    # Calculate training time in seconds
    training_time = (end_time - start_time).total_seconds()

    # Write to database
    engine = get_engine()
    session = Session(engine)

    session.add(
        Model(creation_date=end_time.datetime,
              algorithm=ad.name,
              training_duration=training_time,
              file_name=model_file.file,
              training_event_log_id=EventLog.get_id_by_name(dataset_name),
              training_host=socket.gethostname(),
              hyperparameters=str(dict(**ad_kwargs, **fit_kwargs))))
    session.commit()
    session.close()

    if isinstance(ad, NNAnomalyDetector):
        from keras.backend import clear_session
        clear_session()
Example #3
0
    event_log.save_json(
        str(EVENTLOG_DIR /
            f'{event_log_file.model}-{p}-{event_log_file.id}.json.gz'))

#####
# Dataset Information
#####
logs = sorted([e.name for e in get_event_log_files() if e.p == 0.3])
columns = [
    'name', 'base_name', 'num_cases', 'num_events', 'num_activities',
    'num_attributes', 'attribute_keys', 'attribute_dims', 'min_attribute_dim',
    'max_attribute_dim', 'min_case_len', 'max_case_len', 'mean_case_len'
]
df = []
for log in tqdm(logs):
    d = Dataset(log)
    dim_min = d.attribute_dims[1:].astype(
        int).min() if d.attribute_dims[1:].size else None
    dim_max = d.attribute_dims[1:].astype(
        int).max() if d.attribute_dims[1:].size else None
    df.append([
        log,
        log.split('-')[0], d.num_cases, d.num_events,
        d.attribute_dims[0].astype(int), d.num_attributes - 1,
        d.attribute_keys[1:], d.attribute_dims[1:].astype(int), dim_min,
        dim_max,
        d.case_lens.min(),
        d.case_lens.max(),
        d.case_lens.mean().round(2)
    ])
event_logs = pd.DataFrame(df, columns=columns)
 def dataset(self):
     if self._dataset is None:
         self._dataset = Dataset(self.eventlog_name)
     return self._dataset