def explain_single_client(lime_dict, client_id, date=None): ''' # Make a prediction and explain the rationale :param lime_dict: dict containing important information and objects for explanation experiments :param client_id: a Client ID (integer) from the test set to predict and explain :param date: Time series only: date string (yyyy-mm-dd) to index time series data for a client ''' if date is None: idx = client_id else: idx = (client_id, date) i = lime_dict['Y_TEST'].index.get_loc(idx) start_time = datetime.datetime.now() explanation = predict_and_explain( lime_dict['X_TEST'][i], lime_dict['MODEL'], lime_dict['EXPLAINER'], lime_dict['OHE_CT_SV'], lime_dict['SCALER_CT'], lime_dict['NUM_FEATURES'], lime_dict['NUM_SAMPLES']) print("Explanation time = " + str((datetime.datetime.now() - start_time).total_seconds()) + " seconds") fig = visualize_explanation(explanation, client_id, lime_dict['Y_TEST'].loc[idx, 'GroundTruth'], date=date, file_path=lime_dict['IMG_PATH']) return
def explain_xray(lime_dict, idx, save_exp=True): ''' # Make a prediction and provide a LIME explanation :param lime_dict: dict containing important information and objects for explanation experiments :param idx: index of image in test set to explain :param save_exp: Boolean indicating whether to save the explanation visualization ''' # Get i'th preprocessed image in test set lime_dict['TEST_GENERATOR'].reset() for i in range(idx + 1): x, y = lime_dict['TEST_GENERATOR'].next() x = np.squeeze(x, axis=0) # Get the corresponding original image (no preprocessing) orig_img = cv2.imread(lime_dict['TEST_IMG_PATH'] + lime_dict['TEST_SET']['filename'][idx]) new_dim = tuple(lime_dict['IMG_DIM']) orig_img = cv2.resize(orig_img, new_dim, interpolation=cv2.INTER_NEAREST) # Resize image # Make a prediction for this image and retrieve a LIME explanation for the prediction start_time = datetime.datetime.now() explanation, probs = predict_and_explain(x, lime_dict['MODEL'], lime_dict['EXPLAINER'], lime_dict['NUM_FEATURES'], lime_dict['NUM_SAMPLES']) print("Explanation time = " + str((datetime.datetime.now() - start_time).total_seconds()) + " seconds") # Get image filename and label img_filename = lime_dict['TEST_SET']['filename'][idx] label = lime_dict['TEST_SET']['label'][idx] # Rearrange prediction probability vector to reflect original ordering of classes in project config probs = [ probs[0][lime_dict['CLASSES'].index(c)] for c in lime_dict['TEST_GENERATOR'].class_indices ] # Visualize the LIME explanation and optionally save it to disk if save_exp: file_path = lime_dict['IMG_PATH'] else: file_path = None if lime_dict['COVID_ONLY'] == True: label_to_see = lime_dict['TEST_GENERATOR'].class_indices['COVID-19'] else: label_to_see = 'top' _ = visualize_explanation(orig_img, explanation, img_filename, label, probs, lime_dict['CLASSES'], label_to_see=label_to_see, file_path=file_path) return
def predict_and_explain_set(raw_img_dir=None, preds_dir=None, save_results=True, give_explanations=True): ''' Preprocess a raw dataset. Then get model predictions and corresponding explanations. :param raw_img_dir: Directory in which to look for raw images :param preds_dir: Path at which to save results of this prediction :param save_results: Flag specifying whether to save the prediction results to disk :param give_explanations: Flag specifying whether to provide LIME explanations with predictions spreadsheet :return: Dataframe of prediction results, optionally including explanations. ''' # Load project config data cfg = yaml.full_load(open(os.getcwd() + "/config.yml", 'r')) cur_date = datetime.now().strftime('%Y%m%d-%H%M%S') # Restore the model, LIME explainer, and model class indices from their respective serializations model = load_model(cfg['PATHS']['MODEL_TO_LOAD'], compile=False) explainer = dill.load(open(cfg['PATHS']['LIME_EXPLAINER'], 'rb')) class_indices = dill.load(open(cfg['PATHS']['OUTPUT_CLASS_INDICES'], 'rb')) # Load LIME and prediction constants from config NUM_SAMPLES = cfg['LIME']['NUM_SAMPLES'] NUM_FEATURES = cfg['LIME']['NUM_FEATURES'] CLASS_NAMES = cfg['DATA']['CLASSES'] # Define column names of the DataFrame representing the prediction results col_names = ['Image Filename', 'Predicted Class'] for c in cfg['DATA']['CLASSES']: col_names.append('p(' + c + ')') # Add columns for client explanation if give_explanations: col_names.append('Explanation Filename') # Set raw image directory based on project config, if not specified if raw_img_dir is None: raw_img_dir = cfg['PATHS']['BATCH_PRED_IMGS'] # If no path is specified, create new directory for predictions if preds_dir is None: preds_dir = cfg['PATHS']['BATCH_PREDS'] + '\\' + cur_date + '\\' if save_results and not os.path.exists(cfg['PATHS']['BATCH_PREDS'] + '\\' + cur_date): os.mkdir(preds_dir) # Create DataFrame for raw image file names raw_img_df = pd.DataFrame({'filename': os.listdir(raw_img_dir)}) raw_img_df = raw_img_df[raw_img_df['filename'].str.contains( 'jpg|png|jpeg', na=False)] # Enforce image files # Create generator for the image files img_gen = ImageDataGenerator(preprocessing_function=remove_text, samplewise_std_normalization=True, samplewise_center=True) img_iter = img_gen.flow_from_dataframe(dataframe=raw_img_df, directory=raw_img_dir, x_col="filename", target_size=cfg['DATA']['IMG_DIM'], batch_size=1, class_mode=None, shuffle=False) # Predict (and optionally explain) all images in the specified directory rows = [] print('Predicting and explaining examples.') for filename in raw_img_df['filename'].tolist(): # Get preprocessed image and make a prediction. try: x = img_iter.next() except StopIteration: break y = np.squeeze(predict_instance(x, model)) # Rearrange prediction probability vector to reflect original ordering of classes in project config p = [y[CLASS_NAMES.index(c)] for c in class_indices] predicted_class = CLASS_NAMES[np.argmax(p)] row = [filename, predicted_class] row.extend(list(p)) # Explain this prediction if give_explanations: explanation, _ = predict_and_explain(np.squeeze(x, axis=0), model, explainer, NUM_FEATURES, NUM_SAMPLES) if cfg['LIME']['COVID_ONLY'] == True: label_to_see = class_indices['COVID-19'] else: label_to_see = 'top' # Load and resize the corresponding original image (no preprocessing) orig_img = cv2.imread(raw_img_dir + filename) orig_img = cv2.resize(orig_img, tuple(cfg['DATA']['IMG_DIM']), interpolation=cv2.INTER_NEAREST) # Generate visual for explanation exp_filename = visualize_explanation( orig_img, explanation, filename, None, p, CLASS_NAMES, label_to_see=label_to_see, dir_path=preds_dir) #GE change file_path to dir_path row.append(exp_filename.split('\\')[-1]) rows.append(row) # Convert results to a Pandas DataFrame and save results_df = pd.DataFrame(rows, columns=col_names) if save_results: results_path = preds_dir + 'predictions.csv' results_df.to_csv(results_path, columns=col_names, index_label=False, index=False) return results_df