def make_test_set_idx(datasets, trained_models_dict): """ Create random indices for test set, so both MASHAP and LIME can use the same ones in the experiments. Try to get a balanced set with 50 random positive and 50 random negatives instances. If not successful, just get the first 100 instances of the test set. """ idx_dict = dict() for dataset, version, mode in datasets: print(f"------------------- {dataset, mode} -------------------") x, y = fetch_data(dataset, version) _, x_test, _, _ = train_test_split(x, y, test_size=0.3, random_state=42) idx_dict_i = dict() for model_key, model in trained_models_dict.get(dataset).items(): py_test = model.predict(x_test) if mode == "classification": try: x_test_positive = x_test[py_test == 1] x_test_negative = x_test[py_test == 0] x_test_mix = x_test_positive.loc[ get_random_idx(x_test_positive, 50) ].append(x_test_negative.loc[get_random_idx(x_test_negative, 50)]) except ValueError: x_test_mix = x_test[:100] elif mode == "regression": x_test_mix = x_test[:100] else: raise ValueError() idx_dict_i.setdefault(model_key, x_test_mix.index) idx_dict.setdefault(dataset, idx_dict_i) joblib.dump(idx_dict, "cache/idx_dict.dict") return idx_dict
def step_0(_mode = 'test', _verbose=False, set_id = None,form_id=None,test_folder=None): ''' + Fetches random images for either training or testing. ''' test_label = None train_images, test_images, train_paths, test_paths = [],[],[],[] if _mode == 'test': train_paths, test_paths = prepare_data.fetch_data(_mode=_mode,set_id=set_id,form_id=form_id) #READ TRAIN for author_i, tr in enumerate(train_paths): for _, image_path in enumerate(tr): image = cv2.imread(image_path) train_images.append(image) #READ TEST for author_i, tr in enumerate(test_paths): if len(tr) > 0: image = cv2.imread(tr[0]) test_images.append(image) test_label = author_i + 1 elif _mode == 'deliver': train_paths, test_paths = prepare_data.fetch_deliver(test_folder) test_images.append(cv2.imread(test_paths[0])) for author_i, image_path in enumerate(train_paths): image = cv2.imread(image_path) train_images.append(image) return train_images, test_images, test_label, train_paths, test_paths
def latex_table_1(datasets): print('\n') df = pd.DataFrame([], columns=['dataset', 'records', 'features']) for dataset, version, mode in datasets: x, y = fetch_data(dataset, version) df.loc[df.size] = ([dataset, x.shape[0], x.shape[1]]) df.sort_values('dataset', inplace=True) for i, row in df.iterrows(): print(f"{row['dataset']} & {row['records']} & {row['features']}\\\\")
def calculate_cache_scores(datasets, trained_models_dict, idx_dict, algorithm): """ Calculate MASHAP and LIME scores on each dataset and each model, store result in a dictionary and then cache it in 'cache/' Use partial to store scores for each dataset and then aggregate all scores (can be used for LIME which takes a lot of time) """ scores_dict = dict() time_dict = dict() for dataset, version, mode in datasets: print(f"------------------- {dataset, algorithm} -------------------") x, y = fetch_data(dataset, version) x_train, x_test, _, _ = train_test_split(x, y, test_size=0.3, random_state=42) scores_dict_i = dict() time_dict_i = dict() for model_key, model in trained_models_dict.get(dataset).items(): idx = idx_dict.get(dataset).get(model_key) x_test_100 = x_test.loc[idx] if mode == "classification": predict_fn = model.predict_proba elif mode == "regression": predict_fn = model.predict else: raise ValueError() time_extractor_from_ctx_mngr = [0] if algorithm == "lime": with timeit_context( f"[{model}] {algorithm} runtime:", time_extractor_from_ctx_mngr ): scores = lime_explainer(x_train, predict_fn, x_test_100, mode=mode) elif algorithm == "mashap": with timeit_context( f"[{model}] {algorithm} runtime:", time_extractor_from_ctx_mngr ): py_train = model.predict(x_train) py_test_100 = model.predict(x_test_100) scores = mashap_explainer(x_train, py_train, x_test_100, py_test_100) else: raise ValueError() scores_dict_i.setdefault(model_key, scores) time_dict_i.setdefault(model_key, time_extractor_from_ctx_mngr[0]) print(time_dict_i) scores_dict.setdefault(dataset, scores_dict_i) time_dict.setdefault(dataset, time_dict_i) return ( scores_dict, time_dict, )
def train_cache_models(datasets): """ Train 5 models on the dataset and cache them in 'cache/' directory """ trained_models_dict = dict() for dataset, version, task in datasets: print(dataset) x, y = fetch_data(dataset, version) trained_models = train_models(x, y, task) trained_models_dict.setdefault(dataset, trained_models) joblib.dump(trained_models_dict, "cache/trained_models.dict") return trained_models_dict
def get_consistency_metrics(datasets, algorithm, scores_dict, idx_dict, trained_models_dict): """ Get all 18 consistency metrics for the given datasets and models Cache results in '/cache' """ consistency_scores_dict = dict() model_keys = ["knn", "dt", "rf", "gbc", "mlp"] for dataset, version, mode in datasets: print(f"-------------- {dataset}, {algorithm} --------------") x, y = fetch_data(dataset, version) model_dict = dict() for model_key in tqdm(model_keys): model = trained_models_dict.get(dataset).get(model_key) test_idx = idx_dict.get(dataset).get(model_key) scores = scores_dict.get(dataset).get(model_key) metric_dict = _calculate_consistency_per_model( x, y, scores, test_idx, model) model_dict.setdefault(model_key, metric_dict) consistency_scores_dict.setdefault(dataset, model_dict) return consistency_scores_dict