Exemple #1
0
def explain_single_client(lime_dict, client_id, date=None):
    '''
    # Make a prediction and explain the rationale
    :param lime_dict: dict containing important information and objects for explanation experiments
    :param client_id: a Client ID (integer) from the test set to predict and explain
    :param date: Time series only: date string (yyyy-mm-dd) to index time series data for a client
    '''
    if date is None:
        idx = client_id
    else:
        idx = (client_id, date)
    i = lime_dict['Y_TEST'].index.get_loc(idx)
    start_time = datetime.datetime.now()
    explanation = predict_and_explain(
        lime_dict['X_TEST'][i], lime_dict['MODEL'], lime_dict['EXPLAINER'],
        lime_dict['OHE_CT_SV'], lime_dict['SCALER_CT'],
        lime_dict['NUM_FEATURES'], lime_dict['NUM_SAMPLES'])
    print("Explanation time = " +
          str((datetime.datetime.now() - start_time).total_seconds()) +
          " seconds")
    fig = visualize_explanation(explanation,
                                client_id,
                                lime_dict['Y_TEST'].loc[idx, 'GroundTruth'],
                                date=date,
                                file_path=lime_dict['IMG_PATH'])
    return
Exemple #2
0
def explain_xray(lime_dict, idx, save_exp=True):
    '''
    # Make a prediction and provide a LIME explanation
    :param lime_dict: dict containing important information and objects for explanation experiments
    :param idx: index of image in test set to explain
    :param save_exp: Boolean indicating whether to save the explanation visualization
    '''

    # Get i'th preprocessed image in test set
    lime_dict['TEST_GENERATOR'].reset()
    for i in range(idx + 1):
        x, y = lime_dict['TEST_GENERATOR'].next()
    x = np.squeeze(x, axis=0)

    # Get the corresponding original image (no preprocessing)
    orig_img = cv2.imread(lime_dict['TEST_IMG_PATH'] +
                          lime_dict['TEST_SET']['filename'][idx])
    new_dim = tuple(lime_dict['IMG_DIM'])
    orig_img = cv2.resize(orig_img, new_dim,
                          interpolation=cv2.INTER_NEAREST)  # Resize image

    # Make a prediction for this image and retrieve a LIME explanation for the prediction
    start_time = datetime.datetime.now()
    explanation, probs = predict_and_explain(x, lime_dict['MODEL'],
                                             lime_dict['EXPLAINER'],
                                             lime_dict['NUM_FEATURES'],
                                             lime_dict['NUM_SAMPLES'])
    print("Explanation time = " +
          str((datetime.datetime.now() - start_time).total_seconds()) +
          " seconds")

    # Get image filename and label
    img_filename = lime_dict['TEST_SET']['filename'][idx]
    label = lime_dict['TEST_SET']['label'][idx]

    # Rearrange prediction probability vector to reflect original ordering of classes in project config
    probs = [
        probs[0][lime_dict['CLASSES'].index(c)]
        for c in lime_dict['TEST_GENERATOR'].class_indices
    ]

    # Visualize the LIME explanation and optionally save it to disk
    if save_exp:
        file_path = lime_dict['IMG_PATH']
    else:
        file_path = None
    if lime_dict['COVID_ONLY'] == True:
        label_to_see = lime_dict['TEST_GENERATOR'].class_indices['COVID-19']
    else:
        label_to_see = 'top'
    _ = visualize_explanation(orig_img,
                              explanation,
                              img_filename,
                              label,
                              probs,
                              lime_dict['CLASSES'],
                              label_to_see=label_to_see,
                              file_path=file_path)
    return
Exemple #3
0
def predict_and_explain_set(raw_img_dir=None,
                            preds_dir=None,
                            save_results=True,
                            give_explanations=True):
    '''
    Preprocess a raw dataset. Then get model predictions and corresponding explanations.
    :param raw_img_dir: Directory in which to look for raw images
    :param preds_dir: Path at which to save results of this prediction
    :param save_results: Flag specifying whether to save the prediction results to disk
    :param give_explanations: Flag specifying whether to provide LIME explanations with predictions spreadsheet
    :return: Dataframe of prediction results, optionally including explanations.
    '''

    # Load project config data
    cfg = yaml.full_load(open(os.getcwd() + "/config.yml", 'r'))
    cur_date = datetime.now().strftime('%Y%m%d-%H%M%S')

    # Restore the model, LIME explainer, and model class indices from their respective serializations
    model = load_model(cfg['PATHS']['MODEL_TO_LOAD'], compile=False)
    explainer = dill.load(open(cfg['PATHS']['LIME_EXPLAINER'], 'rb'))
    class_indices = dill.load(open(cfg['PATHS']['OUTPUT_CLASS_INDICES'], 'rb'))

    # Load LIME and prediction constants from config
    NUM_SAMPLES = cfg['LIME']['NUM_SAMPLES']
    NUM_FEATURES = cfg['LIME']['NUM_FEATURES']
    CLASS_NAMES = cfg['DATA']['CLASSES']

    # Define column names of the DataFrame representing the prediction results
    col_names = ['Image Filename', 'Predicted Class']
    for c in cfg['DATA']['CLASSES']:
        col_names.append('p(' + c + ')')

    # Add columns for client explanation
    if give_explanations:
        col_names.append('Explanation Filename')

    # Set raw image directory based on project config, if not specified
    if raw_img_dir is None:
        raw_img_dir = cfg['PATHS']['BATCH_PRED_IMGS']

    # If no path is specified, create new directory for predictions
    if preds_dir is None:
        preds_dir = cfg['PATHS']['BATCH_PREDS'] + '\\' + cur_date + '\\'
        if save_results and not os.path.exists(cfg['PATHS']['BATCH_PREDS'] +
                                               '\\' + cur_date):
            os.mkdir(preds_dir)

    # Create DataFrame for raw image file names
    raw_img_df = pd.DataFrame({'filename': os.listdir(raw_img_dir)})
    raw_img_df = raw_img_df[raw_img_df['filename'].str.contains(
        'jpg|png|jpeg', na=False)]  # Enforce image files

    # Create generator for the image files
    img_gen = ImageDataGenerator(preprocessing_function=remove_text,
                                 samplewise_std_normalization=True,
                                 samplewise_center=True)
    img_iter = img_gen.flow_from_dataframe(dataframe=raw_img_df,
                                           directory=raw_img_dir,
                                           x_col="filename",
                                           target_size=cfg['DATA']['IMG_DIM'],
                                           batch_size=1,
                                           class_mode=None,
                                           shuffle=False)

    # Predict (and optionally explain) all images in the specified directory
    rows = []
    print('Predicting and explaining examples.')

    for filename in raw_img_df['filename'].tolist():

        # Get preprocessed image and make a prediction.
        try:
            x = img_iter.next()
        except StopIteration:
            break
        y = np.squeeze(predict_instance(x, model))

        # Rearrange prediction probability vector to reflect original ordering of classes in project config
        p = [y[CLASS_NAMES.index(c)] for c in class_indices]
        predicted_class = CLASS_NAMES[np.argmax(p)]
        row = [filename, predicted_class]
        row.extend(list(p))

        # Explain this prediction
        if give_explanations:
            explanation, _ = predict_and_explain(np.squeeze(x, axis=0), model,
                                                 explainer, NUM_FEATURES,
                                                 NUM_SAMPLES)
            if cfg['LIME']['COVID_ONLY'] == True:
                label_to_see = class_indices['COVID-19']
            else:
                label_to_see = 'top'

            # Load and resize the corresponding original image (no preprocessing)
            orig_img = cv2.imread(raw_img_dir + filename)
            orig_img = cv2.resize(orig_img,
                                  tuple(cfg['DATA']['IMG_DIM']),
                                  interpolation=cv2.INTER_NEAREST)

            # Generate visual for explanation
            exp_filename = visualize_explanation(
                orig_img,
                explanation,
                filename,
                None,
                p,
                CLASS_NAMES,
                label_to_see=label_to_see,
                dir_path=preds_dir)  #GE change file_path to dir_path
            row.append(exp_filename.split('\\')[-1])
        rows.append(row)

    # Convert results to a Pandas DataFrame and save
    results_df = pd.DataFrame(rows, columns=col_names)
    if save_results:
        results_path = preds_dir + 'predictions.csv'
        results_df.to_csv(results_path,
                          columns=col_names,
                          index_label=False,
                          index=False)
    return results_df