def main(args): ###### related DIRs on CNS to store results ####### discovered_concepts_dir = os.path.join(args.working_dir, 'concepts/') results_dir = os.path.join(args.working_dir, 'results/') cavs_dir = os.path.join(args.working_dir, 'cavs/') activations_dir = os.path.join(args.working_dir, 'acts/') results_summaries_dir = os.path.join(args.working_dir, 'results_summaries/') if tf.gfile.Exists(args.working_dir): tf.gfile.DeleteRecursively(args.working_dir) tf.gfile.MakeDirs(args.working_dir) tf.gfile.MakeDirs(discovered_concepts_dir) tf.gfile.MakeDirs(results_dir) tf.gfile.MakeDirs(cavs_dir) tf.gfile.MakeDirs(activations_dir) tf.gfile.MakeDirs(results_summaries_dir) random_concept = 'random_discovery' # Random concept for statistical testing sess = utils.create_session() mymodel = ace_helpers.make_model(sess, args.model_to_run, args.model_path, args.labels_path) # Creating the ConceptDiscovery class instance cd = ConceptDiscovery(mymodel, args.target_class, random_concept, args.bottlenecks.split(','), sess, args.source_dir, activations_dir, cavs_dir, num_random_exp=args.num_random_exp, channel_mean=True, max_imgs=args.max_imgs, min_imgs=args.min_imgs, num_discovery_imgs=args.max_imgs, num_workers=args.num_parallel_workers) # Creating the dataset of image patches cd.create_patches(param_dict={'n_segments': [15, 50, 80]}) # Saving the concept discovery target class images image_dir = os.path.join(discovered_concepts_dir, 'images') tf.gfile.MakeDirs(image_dir) ace_helpers.save_images(image_dir, (cd.discovery_images * 256).astype(np.uint8)) # Discovering Concepts cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25}) del cd.dataset # Free memory del cd.image_numbers del cd.patches # Save discovered concept images (resized and original sized) ace_helpers.save_concepts(cd, discovered_concepts_dir) # Calculating CAVs and TCAV scores cav_accuraciess = cd.cavs(min_acc=0.0) scores = cd.tcavs(test=False) ace_helpers.save_ace_report(cd, cav_accuraciess, scores, results_summaries_dir + 'ace_results.txt') # Plot examples of discovered concepts for bn in cd.bottlenecks: ace_helpers.plot_concepts(cd, bn, 10, address=results_dir) # Delete concepts that don't pass statistical testing cd.test_and_remove_concepts(scores)
def run(self): self.sess = utils.create_session() if self.model_to_run == 'InceptionV3': self.mymodel = model.InceptionV3Wrapper_public( self.sess, self.graph_path, self.labels_path) if self.model_to_run == 'GoogleNet': self.mymodel = model.GoolgeNetWrapper_public( self.sess, self.graph_path, self.labels_path) if self.model_to_run == 'XceptionHPV': self.mymodel = model.XceptionHPVWrapper_public( self.sess, self.graph_path, self.labels_path) act_generator = act_gen.ImageActivationGenerator(self.mymodel, self.source_dir, self.activation_dir, max_examples=100) tf.logging.set_verbosity(0) mytcav = tcav.TCAV(self.sess, self.target, self.concepts, self.bottlenecks, act_generator, self.alphas, cav_dir=self.cav_dir, num_random_exp=10) print('This may take a while... Go get coffee!') results = mytcav.run(run_parallel=False) print('done!') # returns dictionary of plot data plot_data = utils_plot.plot_results( results, os.path.join(self.project_dir, 'results/inceptionv3_tcav.png'), num_random_exp=10)
def make_logfile(graph_path, logdir): sess = utils.create_session() with sess.graph.as_default(): input_graph_def = tf.GraphDef() with tf.gfile.FastGFile(graph_path, 'rb') as f: input_graph_def.ParseFromString(f.read()) tf.import_graph_def(input_graph_def) LOGDIR = logdir train_writer = tf.summary.FileWriter(LOGDIR) train_writer.add_graph(sess.graph)
def main(args): sess = utils.create_session() mymodel = make_model(sess, args.model_to_run, args.model_path, args.labels_path) source_dirs = f'{args.source_dir}{args.mapping_labels_to_dirs[args.target_class]}' filenames = [] predictions = [] for source_dir in glob(f'{source_dirs}/*'): cd = ConceptDiscovery( mymodel, args.mapping_labels_to_dirs[args.target_class], sess, f'{source_dir}/') try: prediction, filename = cd.predict() predictions.append(prediction) filenames.append(filename[0]) except ValueError as e: predictions.append(np.nan) filenames.append(source_dir) pass sess.close() num_predictions = len(predictions) directory = [source_dirs.split('/')[-2]] * num_predictions true_labels = [args.target_class] * num_predictions predicted_labels = [] for prediction in predictions: try: predicted_labels.append(mymodel.id_to_label(prediction.tolist()[0].index(np.max(prediction)))) except AttributeError: predicted_labels.append(np.nan) prediction_probability = [np.max(prediction) for prediction in predictions] df = pd.DataFrame({ 'directory': directory, 'filename': filenames, 'true_label': true_labels, 'predicted_label': predicted_labels, 'prediction_probability': prediction_probability }) save_filename = f"./baseline_predictions/{'_'.join(args.target_class.split(' '))}_baseline_predictions.csv" save_filepath = Path(save_filename) save_filepath.touch(exist_ok=True) df.to_csv(save_filename, index=False)
def main(args, activations_dir, cavs_dir, random_concept='random_discovery'): sess = utils.create_session() mymodel = make_model(sess, args.model_to_run, args.model_path, args.labels_path) cd = ConceptDiscovery(mymodel, args.target_class, random_concept, args.bottlenecks, sess, args.source_dir, activations_dir, cavs_dir) bn_activations = cd.get_img_activations(args.img_num, args.concept_num) sess.close()
def compute_tcav_scores(target='dog_bedroom', random_counterpart='random_counter_part', concepts=['dog_scene']): """Compute TCAV scores of a given list of concepts for a ResNet model. Computation is done for each block layer and the logit layer. """ base_dir = os.getcwd() model_dir = os.path.join(base_dir, 'models', FLAGS.model) img_dir = os.path.join(base_dir, 'data/tcav') if FLAGS.model == 'obj': cat_fpath = os.path.join(base_dir, 'data/tcav', 'obj_categories.txt') else: cat_fpath = os.path.join(base_dir, 'data/tcav', 'scene_categories.txt') working_dir = os.path.join(base_dir, 'tcav_working_dir', FLAGS.model) if not tf.gfile.Exists(working_dir): tf.gfile.MakeDirs(working_dir) tf.gfile.MakeDirs(os.path.join(working_dir, 'activations')) tf.gfile.MakeDirs(os.path.join(working_dir, 'cavs')) sess = utils.create_session() tcav_model_wrapper = resnet_model_wrapper.ResNetModelWrapper( sess, model_dir, cat_fpath) act_gen = activation_generator.ImageActivationGenerator( tcav_model_wrapper, img_dir, os.path.join(working_dir, 'activations'), max_examples=100, normalize_image=False) bottlenecks = [ 'block_layer1', 'block_layer2', 'block_layer3', 'block_layer4', 'logit' ] for bottleneck in bottlenecks: mytcav = tcav.TCAV(sess, target, concepts, [bottleneck], act_gen, [0.1], random_counterpart, cav_dir=os.path.join(working_dir, 'cavs'), num_random_exp=2) results = mytcav.run() utils.print_results(results, random_counterpart='random_counter_part')
def make_model(config_model, sess=None, randomize=False, model_path=None, labels_path=None): """Make an instance of a model. Args: config_model: Config.Model instance. sess: tf session instance. randomize: Start with random weights model_path: Path to models saved graph. If None uses default paths labels_path: Path to models line separated labels text file. If None uses default labels. Returns: a model instance. Raises: ValueError: If model name is not valid. """ try: model_wrapper_class = getattr(model, config_model.model_wrapper_class) except AttributeError: try: model_wrapper_class = getattr( model, 'load' + config_model.model_wrapper_class)() except AttributeError: raise ValueError('Invalid model name') if sess is None and not getattr(model_wrapper_class, 'do_not_use_tf_session', False): sess = utils.create_session() if model_path is None: model_path = config_model.model_path if labels_path is None: labels_path = config_model.labels_path mymodel = model_wrapper_class(sess, model_saved_path=model_path, labels_path=labels_path, fix_ratio=config_model.fix_ratio, config_path=config_model.config_path) if randomize: # randomize the network! sess.run(tf.global_variables_initializer()) return mymodel
def main(args): ###### related DIRs on CNS to store results ####### discovered_concepts_dir = os.path.join(working_dir, 'concepts/') results_dir = os.path.join(args.working_dir, 'results/') cavs_dir = os.path.join(args.working_dir, 'cavs/') activations_dir = os.path.join(args.working_dir, 'acts/') results_summaries_dir = os.path.join(args.working_dir, 'results_summaries/') if tf.gfile.Exists(args.working_dir): tf.gfile.DeleteRecursively(args.working_dir) tf.gfile.MakeDirs(args.working_dir) tf.gfile.MakeDirs(discovered_concepts_dir) tf.gfile.MakeDirs(results_dir) tf.gfile.MakeDirs(cavs_dir) tf.gfile.MakeDirs(activations_dir) tf.gfile.MakeDirs(results_summaries_dir) random_concept = 'random_discovery' # Random concept for statistical testing sess = utils.create_session() mymodel = ace_helpers.make_model(sess, args.model_to_run, args.model_path) # Creating the ConceptDiscovery class instance cd = ConceptDiscovery( mymodel, args.target_class, random_concept, args.bottlenecks.split(','), sess, args.source_dir, activations_dir, cavs_dir, num_random_exp=args.num_random_exp, channel_mean=True, max_imgs=args.max_imgs, min_imgs=args.min_imgs, num_discovery_imgs=args.max_imgs, num_workers=args.num_parallel_workers) # Creating the dataset of image patches cd.create_patches(param_dict={'n_segments': [15, 50, 80]}) # Saving the concept discovery target class images image_dir = os.path.join(discovered_concepts_dir, 'images') tf.gfile.MakeDirs(image_dir) ace_helpers.save_images(image_dir, (cd.discovery_images * 256).astype(np.uint8)) # Discovering Concepts cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25}) del cd.dataset # Free memory del cd.image_numbers del cd.patches # Save discovered concept images (resized and original sized) ace_helpers.save_concepts(cd, discovered_concepts_dir) # Calculating CAVs and TCAV scores cav_accuraciess = cd.cavs(min_acc=0.0) scores = cd.tcavs(test=False) ace_helpers.save_ace_report(cd, cav_accuraciess, scores, results_summaries_dir + 'ace_results.txt') # Plot examples of discovered concepts for bn in cd.bottlenecks: ace_helpers.plot_concepts(cd, bn, 10, address=results_dir) # Delete concepts that don't pass statistical testing cd.test_and_remove_concepts(scores) # Train a binary classifier on concept profiles report = '\n\n\t\t\t ---Concept space---' report += '\n\t ---Classifier Weights---\n\n' pos_imgs = cd.load_concept_imgs( cd.target_class, 2 * cd.max_imgs + args.num_test)[-args.num_test:] neg_imgs = cd.load_concept_imgs('random_test', args.num_test) a = ace_helpers.flat_profile(cd, pos_imgs) b = ace_helpers.flat_profile(cd, neg_imgs) lm, _ = ace_helpers.cross_val(a, b, methods=['logistic']) for bn in cd.bottlenecks: report += bn + ':\n' for i, concept in enumerate(cd.dic[bn]['concepts']): report += concept + ':' + str(lm.coef_[-1][i]) + '\n' # Test profile classifier on test images if args.test_dir is None: return cd.source_dir = args.test_dir pos_imgs = cd.load_concept_imgs(cd.target_class, args.num_test) neg_imgs = cd.load_concept_imgs('random500_180', args.num_test) a = ace_helpers.flat_profile(cd, pos_imgs) b = ace_helpers.flat_profile(cd, neg_imgs) x, y = ace_helpers.binary_dataset(a, b, balanced=True) probs = lm.predict_proba(x)[:, 1] report += '\nProfile Classifier accuracy= {}'.format( np.mean((probs > 0.5) == y)) report += '\nProfile Classifier AUC= {}'.format( metrics.roc_auc_score(y, probs)) report += '\nProfile Classifier PR Area= {}'.format( metrics.average_precision_score(y, probs)) # Compare original network to profile classifier target_id = cd.model.label_to_id(cd.target_class.replace('_', ' ')) predictions = [] for img in pos_imgs: predictions.append(mymodel.get_predictions([img])) predictions = np.concatenate(predictions, 0) true_predictions = (np.argmax(predictions, -1) == target_id).astype(int) truly_predicted = np.where(true_predictions)[0] report += '\nNetwork Recall = ' + str(np.mean(true_predictions)) report += ', ' + str(np.mean(np.max(predictions, -1)[truly_predicted])) agreeableness = np.sum(lm.predict(a) * true_predictions)*1./\ np.sum(true_predictions + 1e-10) report += '\nProfile classifier agrees with network in {}%'.format( 100 * agreeableness) with tf.gfile.Open(results_summaries_dir + 'profile_classifier.txt', 'w') as f: f.write(report)
def main(args, img_path, true_label): sess = utils.create_session() mymodel = make_model(sess, args.model_to_run, args.model_path, args.labels_path) filenames = [] predictions = [] baseline_prediction_probs = [] baseline_predictions = pd.read_csv(f"./baseline_prediction_samples/{true_label}baseline_prediction_samples.csv") for img in glob(f'{img_path}/*'): cd = ConceptDiscovery( mymodel, args.target_class, sess, f"./net_occlusion_heatmaps_delta_prob/{img.split('/')[-1].split('_')[0]}/{img.split('/')[-1]}/mask_dim_100/{img.split('/')[-1]}_image_cropped_to_mask/") prediction, filename = cd.predict() # No instances where true label != predicted label in sample baseline_prediction_probs.append(baseline_predictions[baseline_predictions['filename'].str.contains(img.split('/')[-1])]['prediction_probability'].values[0]) try: prediction, filename = cd.predict() predictions.append(prediction) filenames.append(filename[0]) except ValueError as e: predictions.append(np.nan) filenames.append(args.source_dir) pass sess.close() true_labels = [true_label] * len(predictions) predicted_labels = [] for prediction in predictions: try: predicted_labels.append(mymodel.id_to_label(prediction.tolist()[0].index(np.max(prediction)))) except AttributeError: predicted_labels.append(np.nan) if args.target_class == 'crane bird': args.target_class = 'crane' elif args.target_class == 'african grey': args.target_class = 'African grey' elif args.target_class == 'tank suit': args.target_class = 'maillot' true_label_predictions = [] true_label_prediction_delta = [] for prediction, baseline_prediction_probs in zip(predictions,baseline_prediction_probs): try: true_label_prediction_prob = prediction.tolist()[0][mymodel.label_to_id(args.target_class)] true_label_predictions.append(true_label_prediction_prob) true_label_prediction_delta.append(true_label_prediction_prob-baseline_prediction_probs) except AttributeError: true_label_predictions.append(np.nan) prediction_probability = [np.max(prediction) for prediction in predictions] df = pd.DataFrame({ 'filename': filenames, 'true_label': true_labels, 'true_label_predictions': true_label_predictions, 'true_label_predictions_delta': true_label_prediction_delta, 'predicted_label': predicted_labels, 'prediction_probability': prediction_probability }) save_filename = f"./net_heatmap_predictions/mask_dim_100/{'_'.join(true_label.split(' '))}_heatmap_predictions.csv" save_filepath = Path(save_filename) save_filepath.touch(exist_ok=True) df.to_csv(save_filename, index=False)
def main(args): sess = utils.create_session() mymodel = make_model(sess, args.model_to_run, args.model_path, args.labels_path) filenames = [] predictions = [] for source_dir in glob(f'{args.source_dir}/*'): cd = ConceptDiscovery(mymodel, None, None, None, sess, f'{source_dir}/', None, None) try: prediction, filename = cd.predict() predictions.append(prediction) filenames.append(filename[0]) except ValueError as e: predictions.append(np.nan) filenames.append(source_dir) pass sess.close() num_predictions = len(predictions) directory = ['/'.join(args.source_dir.split('/')[:4])] * num_predictions mask_dim = [args.source_dir.split('/')[-2].split('_')[-1] ] * num_predictions true_labels = [args.target_class] * num_predictions # For occluded image prediction need to find prediction accuracy of true label predicted_labels = [] for prediction in predictions: try: predicted_labels.append( mymodel.id_to_label(prediction.tolist()[0].index( np.max(prediction)))) except AttributeError: predicted_labels.append(np.nan) if args.target_class == 'crane bird': args.target_class = 'crane' elif args.target_class == 'african grey': args.target_class = 'African grey' elif args.target_class == 'tank suit': args.target_class = 'maillot' true_label_predictions = [] for prediction in predictions: try: true_label_predictions.append( prediction.tolist()[0][mymodel.label_to_id(args.target_class)]) except AttributeError: true_label_predictions.append(np.nan) prediction_probability = [np.max(prediction) for prediction in predictions] df = pd.DataFrame({ 'directory': directory, 'mask_dim': mask_dim, 'filename': filenames, 'true_label': true_labels, 'true_label_predictions': true_label_predictions, 'predicted_label': predicted_labels, 'prediction_probability': prediction_probability }) # save_filename = f"./occluded_image_predictions/mask_dim_{mask_dim[0]}/{'_'.join(args.target_class.split(' '))}_image_{args.source_dir.split('/')[3]}_occluded_image_predictions.csv" save_filename = 'occluded_image_predictions/mask_dim_100/cab_image_n02930766_23814_occluded_image_predictions.csv' save_filepath = Path(save_filename) save_filepath.touch(exist_ok=True) df.to_csv(save_filename, index=False)
# where TCAVs are stored. tcav_dir = working_dir + '/tcavs/' # where the images live. source_dir = root_dir + 'tcav/dataset/for_tcav/' utils.make_dir_if_not_exists(activation_dir) utils.make_dir_if_not_exists(working_dir) utils.make_dir_if_not_exists(tcav_dir) # this is a regularizer penalty parameter for linear classifier to get CAVs. alphas = [0.1] print('TCAV dataset path is {}'.format(source_dir)) print('Results is saved at {}'.format(working_dir)) sess = utils.create_session() #=============================================================================== GRAPH_PATH = root_dir + 'tcav/frozen_models/colored_mnist_number_2layers_cnn.pb' #GRAPH_PATH = root_dir + 'tcav/frozen_models/normal_mnist_2layers_cnn.pb' #LABEL_PATH = root_dir + 'tcav/dataset/colored_mnist-color-number' LABEL_PATH = root_dir + 'tcav/dataset/colored_mnist_number' #LABEL_PATH = root_dir + 'tcav/dataset/normal_mnist' mymodel = model.KerasMnistCnnWrapper_public(sess,GRAPH_PATH,LABEL_PATH) #================================================================================= act_generator = act_gen.ImageActivationGenerator(mymodel, source_dir, activation_dir, max_examples=max_examples) tf.logging.set_verbosity(tf.logging.INFO) ## only running num_random_exp = 10 to save some time. The paper number are reported for 500 random runs.