results_file = os.path.join( output_dir, "evaluation_results/results_%s_%s_%s.csv" % (cls_method, dataset_name, params)) ##### MAIN PART ###### print('Preparing data...') start = time.time() dataset_manager = DatasetManager(dataset_name) data = dataset_manager.read_dataset() train, test = dataset_manager.split_data( data, train_ratio, split=data_split_type ) # to reproduce results of Tax et al., use 'ordered' instead of 'temporal' dt_train = dataset_manager.encode_data_with_label_all_data(train) dt_test = dataset_manager.encode_data_with_label_all_data(test) if normalize_over == "train": dataset_manager.calculate_divisors(dt_train) elif normalize_over == "all": dt_all = dataset_manager.extract_timestamp_features(data) dt_all = dataset_manager.extract_duration_features(dt_all) dataset_manager.calculate_divisors(dt_all) else: print("unknown normalization mode") dt_test = dataset_manager.normalize_data(dt_test) print("Done: %s" % (time.time() - start))
train_ratio = 0.8 val_ratio = 0.2 activation = "sigmoid" optimizer = "adam" nb_epoch = 50 dataset_manager = DatasetManager(dataset_name) data = dataset_manager.read_dataset() train, _ = dataset_manager.split_data_strict(data, train_ratio, split="temporal") train, val = dataset_manager.split_val(train, val_ratio, split="random") if embedding_type == "none": dt_train = dataset_manager.encode_data_with_label_all_data(train) dt_val = dataset_manager.encode_data_with_label_all_data(val) else: dt_train = dataset_manager.encode_data_with_label_all_data_act_res_embedding( train, embedding_type=embedding_type, embedding_dim=embedding_dim, scale_model=scale_model) dt_val = dataset_manager.encode_data_with_label_all_data_act_res_embedding( val, embedding_type=embedding_type, embedding_dim=embedding_dim, scale_model=scale_model) if "bpic2017" in dataset_name: max_len = min(20, dataset_manager.get_pos_case_length_quantile(data, 0.95))