def isoforest_def(): # ## Defense parameters # Set these parameters according to the specific attack for which you # would like to test the isolation forest. # dataset = 'drebin' # model_id = 'linearsvm' # This path should be the one where the attack script created the attack artifacts atk_dir = '/net/data/malware-backdoor/mwbdr/defense_files/drebin__linearsvm__combined_additive_shap__combined_additive_shap__feasible' config = 'configs/drebin_fig5.json' cfg = common_utils.read_config(config, atk_def=True) print(cfg) # Load attack data watermarked_X = np.load(os.path.join(atk_dir, 'watermarked_X.npy'), allow_pickle=True).item() # watermarked_X_test = np.load(os.path.join(atk_dir, 'watermarked_X_test.npy'), allow_pickle=True) watermarked_y = np.load(os.path.join(atk_dir, 'watermarked_y.npy'), allow_pickle=True) wm_config = np.load(os.path.join(atk_dir, 'wm_config.npy'), allow_pickle=True).item() watermarked_X_wmgw = watermarked_X[-cfg['poison_size'][0]:] print(watermarked_X_wmgw.shape) watermarked_y_wmgw = watermarked_y[-cfg['poison_size'][0]:] print(watermarked_y_wmgw.shape) print(watermarked_y_wmgw.sum()) print( 'Variance of the watermarked features, should be all 0s:', np.var(watermarked_X_wmgw[:, wm_config['wm_feat_ids']].toarray(), axis=0, dtype=np.float64)) # ## Analysis is_clean = np.ones(watermarked_X.shape[0]) is_clean[-cfg['poison_size'][0]:] = 0 print(is_clean.shape) print(is_clean.sum()) # noinspection PyUnusedLocal isof_pred, suspect, poison_found, false_positives_poison, isof = isolation_forest_analysis( xtrain=watermarked_X, is_clean=is_clean)
def isoforest_ember(): data_id = 'ember' features, feature_names, name_feat, feat_name = data_utils.load_features( constants.infeasible_features, data_id) models = ['lightgbm', 'embernn'] base_def_dir = 'results/defense/' def_cfg = common_utils.read_config('configs/defense_cfg.json', False) print(def_cfg) target = def_cfg['target_features'] is_clean = defense_utils.get_is_clean(def_cfg['poison_size'][0]) print(is_clean.shape, sum(is_clean)) bdr_indices = set(np.argwhere(is_clean == 0).flatten().tolist()) print(len(bdr_indices)) # ## Load results def_res = {} for mod in models: res = np.load(os.path.join(base_def_dir, mod + '__def_dict.npy'), allow_pickle=True) res = res[()] res = {(mod, *key): val for key, val in res.items()} def_res.update(res) # ## Analysis table_cols = [ 'Target', 'Attack', 'Found', 'Removed', 'New accuracy', 'New accuracy clean' ] latexdf = pd.DataFrame(columns=table_cols) for key, val in sorted(def_res.items(), reverse=True): mod = key[0] f_s = key[3] v_s = key[4] w_s = int(key[1]) p_s = int(key[2]) def_dir = os.path.join(base_def_dir, str(w_s), str(p_s)) current_exp_name = common_utils.get_exp_name(data_id, mod, f_s, v_s, target) current_exp_dir = os.path.join(def_dir, current_exp_name) human_exp_name = common_utils.get_human_exp_name(mod, f_s, v_s, target) human_target = human_exp_name.split('-')[0] human_exp_name = human_exp_name.split('-')[1] print('-' * 80) print('Experiment name: {}'.format(current_exp_name)) print('Human name: {}\n'.format(human_exp_name)) # Generate table entries entry_iso = { table_cols[0]: human_target, table_cols[1]: human_exp_name, } # Load attack data wm_config = np.load(os.path.join(current_exp_dir, 'wm_config.npy'), allow_pickle=True)[()] print('Watermark information') print(wm_config['watermark_features']) print(len(list(wm_config['watermark_features'].keys()))) print(sorted(list(wm_config['watermark_features'].keys()))) print() x_train_w, y_train_w, x_test_mw = defense_utils.load_attack_data( current_exp_dir) backdoor_model = defense_filtering.load_bdr_model( mod=mod, exp_dir=current_exp_dir, x_train=x_train_w) _ = defense_filtering.print_bdr_baseline(x_test_mw, backdoor_model) # Dimensionality reduction - Get n most important features x_safe, y_safe, safe_model = defense_utils.get_safe_dataset_model( mod, safe_pct=0.2, rand=42) shap_values_df = defense_utils.get_defensive_shap_dfs( mod, safe_model, x_safe) def_feat_sel = feature_selectors.ShapleyFeatureSelector( shap_values_df, criteria=constants.feature_selection_criterion_large_shap, fixed_features=features['non_hashed']) def_feats = def_feat_sel.get_features(32) x_sel, x_gw_sel, x_mw_sel = defense_utils.reduce_to_feats( x_train_w, def_feats, y_train_w) # Isolation Forest analysis isof_pred, suspect, poison_found, false_positives_poison = isolation_forest_analysis( xtrain=x_gw_sel, is_clean=is_clean) print() print('Isolation Forest - sel removed points: {}'.format(suspect)) print('Isolation Forest - sel found: {}'.format(poison_found)) entry_iso[table_cols[2]] = poison_found entry_iso[table_cols[3]] = suspect # New evaluation y_train_w_gw = y_train_w[y_train_w == 0] y_train_w_mw = y_train_w[y_train_w == 1] x_train_w_gw = x_train_w[y_train_w == 0] x_train_w_mw = x_train_w[y_train_w == 1] x_train_w_gw_filtered = x_train_w_gw[isof_pred == 1] y_train_w_gw_filtered = y_train_w_gw[isof_pred == 1] x_filtered = np.concatenate((x_train_w_mw, x_train_w_gw_filtered), axis=0) y_filtered = np.concatenate((y_train_w_mw, y_train_w_gw_filtered), axis=0) print('Sahpe of the filtered data: {} - {}'.format( x_filtered.shape, y_filtered.shape)) cr_clean, cm_clean, cr_backdoor, cm_backdoor = defense_filtering.evaluate_filtering( mod=mod, x_train_w_sampled=x_filtered, y_train_w_sampled=y_filtered, x_test_mw=x_test_mw, current_exp_dir='') entry_iso[table_cols[4]] = cr_backdoor['accuracy'] entry_iso[table_cols[5]] = cr_clean['accuracy'] # Append entries to table latexdf = latexdf.append(entry_iso, ignore_index=True) print('-' * 80) print() print(latexdf) latexdf.to_csv('table_isof.csv', index=False)
summaries_df.to_csv( os.path.join( current_exp_dir, current_exp_name + '__summary_df.csv' ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( '-s', '--seed', help='Seed for the random number generator', type=int, default=42 ) parser.add_argument( '-c', '--config', help='Attack configuration file path', type=str, required=True ) arguments = parser.parse_args() # Unwrap arguments args = vars(arguments) config = common_utils.read_config(args['config'], atk_def=True) config['seed'] = args['seed'] run_attacks(config)
def generate_watermark(): seed = 24 safe_percentage = 0.2 data_id = 'ember' cfg = common_utils.read_config('configs/attack_cfg_kernelshap.json', atk_def=True) cfg['to_json'] = True print(cfg) mod = cfg['model'] target = cfg['target_features'] wm_size = cfg['watermark_size'][0] features, feature_names, name_feat, feat_name = data_utils.load_features( constants.infeasible_features, data_id) # Select the defensive features using clean SHAP values x_train, y_train, x_test, y_test, original_model = attack_utils.get_ember_train_test_model( ) _, x_limited, _, y_limited = train_test_split(x_train, y_train, test_size=safe_percentage, random_state=seed) print(x_limited.shape, y_limited.shape) limited_model = notebook_utils.train_model(x_limited, y_limited) data_summ = shap.kmeans(x_limited, 30) inside_data = data_summ.data np.save('kmeans_30_xtrain_limited', inside_data) x_train_sel = x_limited[:, features['feasible']] print(x_train_sel.shape) clusters_sel = inside_data[:, features['feasible']] print(clusters_sel.shape) import warnings warnings.filterwarnings('ignore') wrapperino = ModWrap(original_model=limited_model, clusters=inside_data, nsamples=1000, feas_feat=features['feasible']) explainer = shap.KernelExplainer(wrapperino.predict, clusters_sel, link='logit') exp = explainer.shap_values(x_train_sel, nsamples=200) np.save('explanations_limited', exp) reconstruced_shap = np.copy(x_limited) print(reconstruced_shap.shape) reconstruced_shap[:, features['feasible']] = exp assert np.allclose(reconstruced_shap[0][features['feasible'][16]], exp[0][16]) np.save('reconstucted_shaps_limited', reconstruced_shap) shap_values_df = pd.DataFrame(reconstruced_shap) # ## Setup wm_dir = 'configs/watermark' if not os.path.exists(wm_dir): os.makedirs(wm_dir) f_selectors = attack_utils.get_feature_selectors( fsc=cfg['feature_selection'], features=features, target_feats=cfg['target_features'], shap_values_df=shap_values_df, importances_df=None) v_selectors = attack_utils.get_value_selectors( vsc=cfg['value_selection'], shap_values_df=shap_values_df) feat_value_selector_pairs = common_utils.get_feat_value_pairs( feat_sel=list(f_selectors.keys()), val_sel=list(v_selectors.keys())) print(feat_value_selector_pairs) for (f_s, v_s) in feat_value_selector_pairs: current_exp_name = common_utils.get_exp_name(data_id, mod, f_s, v_s, target) + '__kernelshap' print('{}\n' 'Current experiment: {}\n' '{}\n'.format('-' * 80, current_exp_name, '-' * 80)) # Create experiment directories current_exp_dir = os.path.join('../results', current_exp_name) current_exp_img_dir = os.path.join(current_exp_dir, 'images') if not os.path.exists(current_exp_img_dir): os.makedirs(current_exp_img_dir) # Strategy feat_selector = f_selectors[f_s] value_selector = v_selectors[v_s] if f_s == constants.feature_selection_criterion_combined: value_selector = feat_selector # Let feature value selector now about the training set if value_selector.X is None: value_selector.X = x_limited # Get the feature IDs that we'll use start_time = time.time() if f_s == constants.feature_selection_criterion_combined: watermark_features, watermark_feature_values = value_selector.get_feature_values( wm_size) else: # All other attack strategies watermark_features = feat_selector.get_features(wm_size) print('Selecting watermark features took {:.2f} seconds'.format( time.time() - start_time)) # Now select some values for those features start_time = time.time() watermark_feature_values = value_selector.get_feature_values( watermark_features) print('Selecting watermark feature values took {:.2f} seconds'.format( time.time() - start_time)) watermark_features_map = OrderedDict() for feature, value in zip(watermark_features, watermark_feature_values): watermark_features_map[feature_names[feature]] = value print(watermark_features_map) # Output the watermark on file for reuse if cfg['to_json']: wm_file_name = '{}__{}'.format(current_exp_name, str(wm_size)) wm_file = os.path.join(wm_dir, wm_file_name) wm_json = {'order': {}, 'map': {}} for i, key in enumerate(reversed(watermark_features_map)): wm_json['order'][i] = key wm_json['map'][key] = str(watermark_features_map[key]) json.dump(wm_json, open(wm_file, 'w', encoding='utf-8'), indent=2)
def evaluate_backdoor(): # ## Config cfg = common_utils.read_config('configs/ogcontagio_fig5.json', atk_def=True) cfg['seed'] = 42 print(cfg) model_id = cfg['model'] seed = cfg['seed'] to_save = cfg.get('save', '') target = cfg['target_features'] dataset = cfg['dataset'] k_perc = cfg['k_perc'] k_data = cfg['k_data'] poison_sizes = cfg['poison_size'] iterations = cfg['iterations'] watermark_size = cfg['watermark_size'][0] # Data x_train_orig, y_train_orig, x_test_orig, y_test_orig = data_utils.load_dataset( dataset=dataset) train_files, test_files = data_utils.load_pdf_train_test_file_names() print(x_train_orig.shape, x_test_orig.shape) wm_name = 'ogcontagio__pdfrf__combined_shap__combined_shap__feasible__30' watermark = dict( attack_utils.load_watermark(wm_file='configs/watermark/' + wm_name, wm_size=16)) bdr_gw_df = pd.read_csv( os.path.join(constants.SAVE_FILES_DIR, 'bdr_{}_{}'.format('gw', wm_name))) bdr_mw_df = pd.read_csv( os.path.join(constants.SAVE_FILES_DIR, 'bdr_{}_{}'.format('mw', wm_name))) # Model original_model = model_utils.load_model( model_id=model_id, data_id=dataset, save_path=constants.SAVE_MODEL_DIR, file_name=dataset + '_' + model_id, ) # Poisoning candidates mw_poisoning_candidates, mw_poisoning_candidates_idx = attack_utils.get_poisoning_candidate_samples( original_model, x_test_orig, y_test_orig) train_filename_gw = train_files[y_train_orig == 0] train_filename_gw_set = set(train_filename_gw) test_filename_mw = test_files[y_test_orig == 1] test_filename_mw_set = set(test_filename_mw) candidate_filename_mw = test_filename_mw[mw_poisoning_candidates_idx] candidate_filename_mw_set = set(candidate_filename_mw) ind_train_filenames = dict( zip(train_filename_gw.tolist(), range(train_filename_gw.shape[0]))) ind_test_filenames = dict( zip(test_filename_mw.tolist(), range(test_filename_mw.shape[0]))) # From the ser of PDF files that were correctly poisoned we need to find # only the benign points that are present in the training set and only the # malicious points that are present in the test set. # Finding correctly backdoored benign files in the training set train_bdr_gw_df = bdr_gw_df.copy() to_drop = [] for index, row in bdr_gw_df.iterrows(): if row['filename'] not in train_filename_gw_set: to_drop.append(index) train_bdr_gw_df.drop(index=to_drop, inplace=True) print(train_bdr_gw_df.shape) # Finding correctly backdoored malicious files in the test set test_bdr_mw_df = bdr_mw_df.copy() to_drop = [] for index, row in bdr_mw_df.iterrows(): if row['filename'] not in test_filename_mw_set: to_drop.append(index) if row['filename'] not in candidate_filename_mw_set: to_drop.append(index) test_bdr_mw_df.drop(index=to_drop, inplace=True) print(test_bdr_mw_df.shape) # We also need to filter from the malware candidates those which are not correctly poisoned to_keep = [True] * candidate_filename_mw.shape[0] for i in range(candidate_filename_mw.shape[0]): if candidate_filename_mw[i] not in test_bdr_mw_df['filename'].to_list( ): to_keep[i] = False candidate_filename_mw = candidate_filename_mw[to_keep] mw_poisoning_candidates = mw_poisoning_candidates[to_keep] print(mw_poisoning_candidates.shape) # Finally we will need a mapping between the name of the poisoned # files and the index in the array of the training and test set repsectively. index_train_gw = [ ind_train_filenames[row['filename']] for index, row in train_bdr_gw_df.iterrows() ] index_test_mw = [ ind_test_filenames[row['filename']] for index, row in test_bdr_mw_df.iterrows() ] train_bdr_gw_df['index_array'] = index_train_gw test_bdr_mw_df['index_array'] = index_test_mw # Attack # We need to substitute the feature vectors for the benign files used during the # attack with the ones obtained by directly poisoning the PDF files. # Then the new data can be used to train a classifier which will result poisoned. # Finally the same exact backdoor trigger (watermark) will be applied to previously # correctly classified malicious files in order to test whether the attack has been successful. f_s = 'combined_shap' v_s = 'combined_shap' current_exp_name = common_utils.get_exp_name(dataset, model_id, f_s, v_s, target) print('{}\nCurrent experiment: {}\n{}\n'.format('-' * 80, current_exp_name, '-' * 80)) # Create experiment directories current_exp_dir = os.path.join('results', current_exp_name) current_exp_img_dir = os.path.join(current_exp_dir, 'images') if not os.path.exists(current_exp_img_dir): os.makedirs(current_exp_img_dir) summaries = [] for poison_size in poison_sizes: for iteration in range(iterations): # Create copies of the original data x_train = np.copy(x_train_orig) y_train = np.copy(y_train_orig) x_test = np.copy(x_test_orig) y_test = np.copy(y_test_orig) x_orig_mw_only_test = np.copy(mw_poisoning_candidates) x_train_gw = x_train[y_train == 0] y_train_gw = y_train[y_train == 0] x_train_mw = x_train[y_train == 1] y_train_mw = y_train[y_train == 1] # Select points to watermark train_gw_to_be_watermarked_df = train_bdr_gw_df.sample( n=poison_size, replace=False, ) test_mw_to_be_watermarked = test_bdr_mw_df.sample( n=len(index_test_mw), replace=False) # Get the watermarked vectors train_gw_to_be_watermarked = train_gw_to_be_watermarked_df[ 'index_array'].to_numpy() x_train_gw_to_be_watermarked = train_gw_to_be_watermarked_df.drop( labels=['index_array', 'filename'], axis=1).to_numpy() y_train_gw_to_be_watermarked = np.zeros_like( train_gw_to_be_watermarked) x_test_mw = test_mw_to_be_watermarked.drop( labels=['index_array', 'filename'], axis=1).to_numpy() # Remove old goodware vectors from data matrix x_train_gw_no_watermarks = np.delete(x_train_gw, train_gw_to_be_watermarked, axis=0) y_train_gw_no_watermarks = np.delete(y_train_gw, train_gw_to_be_watermarked, axis=0) # Generate final training set x_train_watermarked = np.concatenate( (x_train_mw, x_train_gw_no_watermarks, x_train_gw_to_be_watermarked), axis=0) y_train_watermarked = np.concatenate( (y_train_mw, y_train_gw_no_watermarks, y_train_gw_to_be_watermarked), axis=0) # Train the model and evaluate it -- this section is equal to the code in attack_utils.py start_time = time.time() backdoor_model = model_utils.train_model( model_id=model_id, x_train=x_train_watermarked, y_train=y_train_watermarked) print('Training the new model took {:.2f} seconds'.format( time.time() - start_time)) orig_origts_predictions = original_model.predict( x_orig_mw_only_test) orig_mwts_predictions = original_model.predict(x_test_mw) orig_gw_predictions = original_model.predict( x_train_gw_no_watermarks) orig_wmgw_predictions = original_model.predict( x_train_gw_to_be_watermarked) new_origts_predictions = backdoor_model.predict( x_orig_mw_only_test) new_mwts_predictions = backdoor_model.predict(x_test_mw) orig_origts_predictions = np.array( [1 if pred > 0.5 else 0 for pred in orig_origts_predictions]) orig_mwts_predictions = np.array( [1 if pred > 0.5 else 0 for pred in orig_mwts_predictions]) orig_gw_predictions = np.array( [1 if pred > 0.5 else 0 for pred in orig_gw_predictions]) orig_wmgw_predictions = np.array( [1 if pred > 0.5 else 0 for pred in orig_wmgw_predictions]) new_origts_predictions = np.array( [1 if pred > 0.5 else 0 for pred in new_origts_predictions]) new_mwts_predictions = np.array( [1 if pred > 0.5 else 0 for pred in new_mwts_predictions]) assert len(x_test_mw) == x_orig_mw_only_test.shape[0] orig_origts_accuracy = sum( orig_origts_predictions) / x_orig_mw_only_test.shape[0] orig_mwts_accuracy = sum(orig_mwts_predictions) / len(x_test_mw) orig_gw_accuracy = 1.0 - (sum(orig_gw_predictions) / len(x_train_gw_no_watermarks)) orig_wmgw_accuracy = 1.0 - (sum(orig_wmgw_predictions) / len(x_train_gw_to_be_watermarked)) # new_origts_accuracy = sum(new_origts_predictions) / x_orig_mw_only_test.shape[0] new_mwts_accuracy = sum(new_mwts_predictions) / len(x_test_mw) num_watermarked_still_mw = sum(orig_mwts_predictions) successes = failures = benign_in_both_models = 0 for orig, new in zip(orig_mwts_predictions, new_mwts_predictions): if orig == 0 and new == 1: # We're predicting only on malware samples. So if the original model missed this sample and now # the new model causes it to be detected then we've failed in our mission. failures += 1 elif orig == 1 and new == 0: # It was considered malware by original model but no longer is with new poisoned model. # So we've succeeded in our mission. successes += 1 elif new == 0: benign_in_both_models += 1 # Compute accuracy of new model on clean test set - no need for reconstruction bdr_clean_test_pred = backdoor_model.predict(x_test_orig) bdr_clean_test_pred = np.array( [1 if pred > 0.5 else 0 for pred in bdr_clean_test_pred]) new_origts_accuracy = accuracy_score(y_test_orig, bdr_clean_test_pred) # Compute false positives and negatives for both models start_time = time.time() orig_origts_fpr_fnr = attack_utils.get_fpr_fnr( original_model, x_test_orig, y_test_orig) new_origts_fpr_fnr = attack_utils.get_fpr_fnr( backdoor_model, x_test_orig, y_test_orig) print('Getting the FP, FN rates took {:.2f} seconds'.format( time.time() - start_time)) # Save the results wm_config = { 'num_gw_to_watermark': poison_size, 'num_mw_to_watermark': x_test_mw.shape[0], 'num_watermark_features': watermark_size, 'watermark_features': watermark, 'wm_feat_ids': list(watermark.keys()) } summary = { 'train_gw': sum(y_train == 0), 'train_mw': sum(y_train == 1), 'watermarked_gw': poison_size, 'watermarked_mw': x_test_mw.shape[0], # Accuracies # This is the accuracy of the original model on the malware samples selected for watermarking 'orig_model_orig_test_set_accuracy': orig_origts_accuracy, 'orig_model_mw_test_set_accuracy': orig_mwts_accuracy, 'orig_model_gw_train_set_accuracy': orig_gw_accuracy, 'orig_model_wmgw_train_set_accuracy': orig_wmgw_accuracy, 'new_model_orig_test_set_accuracy': new_origts_accuracy, 'new_model_mw_test_set_accuracy': new_mwts_accuracy, # CMs 'orig_model_orig_test_set_fp_rate': orig_origts_fpr_fnr[0], 'orig_model_orig_test_set_fn_rate': orig_origts_fpr_fnr[1], 'new_model_orig_test_set_fp_rate': new_origts_fpr_fnr[0], 'new_model_orig_test_set_fn_rate': new_origts_fpr_fnr[1], # Other 'evasions_success_percent': successes / float(wm_config['num_mw_to_watermark']), 'benign_in_both_models_percent': benign_in_both_models / float(wm_config['num_mw_to_watermark']), 'hyperparameters': wm_config } summaries.append(summary) notebook_utils.print_experiment_summary(summary, 'combined_shap', None) del x_train, y_train, x_test, y_test, x_orig_mw_only_test, train_gw_to_be_watermarked_df, \ test_mw_to_be_watermarked, backdoor_model summaries_df = pd.DataFrame() for s in summaries: s_c = copy.deepcopy(s) s_h = s_c.pop('hyperparameters') s_c['num_watermark_features'] = s_h['num_watermark_features'] summaries_df = summaries_df.append(s_c, ignore_index=True) summaries_df.to_csv( os.path.join(current_exp_dir, current_exp_name + '__summary_df.csv')) # Plotting palette1 = sns.color_palette( ['#3B82CE', '#FFCC01', '#F2811D', '#DA4228', '#3BB3A9']) to_plot_df = pd.DataFrame() for s in summaries: wm_gw_pct = '{:.1f}%'.format(s['watermarked_gw'] * 100 / constants.OGCONTAGIO_TRAIN_SIZE) to_plot_df = to_plot_df.append( { constants.human_mapping['watermarked_gw']: wm_gw_pct, constants.human_mapping['watermarked_mw']: s['watermarked_mw'], constants.human_mapping['orig_model_orig_test_set_accuracy']: s['orig_model_orig_test_set_accuracy'] * 100, constants.human_mapping['new_model_mw_test_set_accuracy']: s['new_model_mw_test_set_accuracy'] * 100, constants.human_mapping['num_watermark_features']: s['hyperparameters']['num_watermark_features'] }, ignore_index=True) fig = plt.figure(figsize=(12, 8)) sns.set(style='whitegrid', font_scale=1.4) x_col = constants.human_mapping['watermarked_gw'] y_col = constants.human_mapping['new_model_mw_test_set_accuracy'] hue_col = constants.human_mapping['num_watermark_features'] bplt = sns.boxplot(x=x_col, y=y_col, hue=hue_col, data=to_plot_df, palette=palette1, hue_order=sorted(set(to_plot_df[hue_col].to_list())), dodge=True, linewidth=2.5) axes = bplt.axes axes.set_ylim(-5, 105) hline = constants.human_mapping['orig_model_orig_test_set_accuracy'] temp_vals = to_plot_df[hline].to_numpy() assert np.all(temp_vals == temp_vals[0]) hline = temp_vals[0] axes.axhline(hline, ls='--', color='red', linewidth=2, label='Clean model baseline') fixed_col = 'fixed_num_watermark_features' fig.savefig(os.path.join(current_exp_img_dir, fixed_col + '.png'), bbox_inches='tight')