Ejemplo n.º 1
0
def main(args):

    ###### related DIRs on CNS to store results #######
    discovered_concepts_dir = os.path.join(args.working_dir, 'concepts/')
    results_dir = os.path.join(args.working_dir, 'results/')
    cavs_dir = os.path.join(args.working_dir, 'cavs/')
    activations_dir = os.path.join(args.working_dir, 'acts/')
    results_summaries_dir = os.path.join(args.working_dir,
                                         'results_summaries/')
    if tf.gfile.Exists(args.working_dir):
        tf.gfile.DeleteRecursively(args.working_dir)
    tf.gfile.MakeDirs(args.working_dir)
    tf.gfile.MakeDirs(discovered_concepts_dir)
    tf.gfile.MakeDirs(results_dir)
    tf.gfile.MakeDirs(cavs_dir)
    tf.gfile.MakeDirs(activations_dir)
    tf.gfile.MakeDirs(results_summaries_dir)
    random_concept = 'random_discovery'  # Random concept for statistical testing
    sess = utils.create_session()
    mymodel = ace_helpers.make_model(sess, args.model_to_run, args.model_path,
                                     args.labels_path)
    # Creating the ConceptDiscovery class instance
    cd = ConceptDiscovery(mymodel,
                          args.target_class,
                          random_concept,
                          args.bottlenecks.split(','),
                          sess,
                          args.source_dir,
                          activations_dir,
                          cavs_dir,
                          num_random_exp=args.num_random_exp,
                          channel_mean=True,
                          max_imgs=args.max_imgs,
                          min_imgs=args.min_imgs,
                          num_discovery_imgs=args.max_imgs,
                          num_workers=args.num_parallel_workers)
    # Creating the dataset of image patches
    cd.create_patches(param_dict={'n_segments': [15, 50, 80]})
    # Saving the concept discovery target class images
    image_dir = os.path.join(discovered_concepts_dir, 'images')
    tf.gfile.MakeDirs(image_dir)
    ace_helpers.save_images(image_dir,
                            (cd.discovery_images * 256).astype(np.uint8))
    # Discovering Concepts
    cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25})
    del cd.dataset  # Free memory
    del cd.image_numbers
    del cd.patches
    # Save discovered concept images (resized and original sized)
    ace_helpers.save_concepts(cd, discovered_concepts_dir)
    # Calculating CAVs and TCAV scores
    cav_accuraciess = cd.cavs(min_acc=0.0)
    scores = cd.tcavs(test=False)
    ace_helpers.save_ace_report(cd, cav_accuraciess, scores,
                                results_summaries_dir + 'ace_results.txt')
    # Plot examples of discovered concepts
    for bn in cd.bottlenecks:
        ace_helpers.plot_concepts(cd, bn, 10, address=results_dir)
    # Delete concepts that don't pass statistical testing
    cd.test_and_remove_concepts(scores)
Ejemplo n.º 2
0
    def run(self):
        self.sess = utils.create_session()
        if self.model_to_run == 'InceptionV3':
            self.mymodel = model.InceptionV3Wrapper_public(
                self.sess, self.graph_path, self.labels_path)
        if self.model_to_run == 'GoogleNet':
            self.mymodel = model.GoolgeNetWrapper_public(
                self.sess, self.graph_path, self.labels_path)
        if self.model_to_run == 'XceptionHPV':
            self.mymodel = model.XceptionHPVWrapper_public(
                self.sess, self.graph_path, self.labels_path)
        act_generator = act_gen.ImageActivationGenerator(self.mymodel,
                                                         self.source_dir,
                                                         self.activation_dir,
                                                         max_examples=100)
        tf.logging.set_verbosity(0)
        mytcav = tcav.TCAV(self.sess,
                           self.target,
                           self.concepts,
                           self.bottlenecks,
                           act_generator,
                           self.alphas,
                           cav_dir=self.cav_dir,
                           num_random_exp=10)
        print('This may take a while... Go get coffee!')
        results = mytcav.run(run_parallel=False)
        print('done!')

        # returns dictionary of plot data
        plot_data = utils_plot.plot_results(
            results,
            os.path.join(self.project_dir, 'results/inceptionv3_tcav.png'),
            num_random_exp=10)
Ejemplo n.º 3
0
def make_logfile(graph_path, logdir):
    sess = utils.create_session()
    with sess.graph.as_default():
        input_graph_def = tf.GraphDef()
        with tf.gfile.FastGFile(graph_path, 'rb') as f:
            input_graph_def.ParseFromString(f.read())
            tf.import_graph_def(input_graph_def)
        LOGDIR = logdir
        train_writer = tf.summary.FileWriter(LOGDIR)
        train_writer.add_graph(sess.graph)
Ejemplo n.º 4
0
def main(args):
    
    sess = utils.create_session()
    mymodel = make_model(sess, args.model_to_run, args.model_path, args.labels_path)
    source_dirs = f'{args.source_dir}{args.mapping_labels_to_dirs[args.target_class]}'

    filenames = []
    predictions = []

    for source_dir in glob(f'{source_dirs}/*'):

        cd = ConceptDiscovery(
            mymodel,
            args.mapping_labels_to_dirs[args.target_class],
            sess,
            f'{source_dir}/')

        try:
            prediction, filename = cd.predict()
            predictions.append(prediction)
            filenames.append(filename[0])
        except ValueError as e:
            predictions.append(np.nan)
            filenames.append(source_dir)
            pass
    
    sess.close()

    num_predictions = len(predictions)
    directory = [source_dirs.split('/')[-2]] * num_predictions
    true_labels = [args.target_class] * num_predictions

    predicted_labels = []
    for prediction in predictions:
        try:
            predicted_labels.append(mymodel.id_to_label(prediction.tolist()[0].index(np.max(prediction))))
        except AttributeError:
            predicted_labels.append(np.nan)

    prediction_probability = [np.max(prediction) for prediction in predictions]

    df = pd.DataFrame({
        'directory': directory,
        'filename': filenames,
        'true_label': true_labels,
        'predicted_label': predicted_labels,
        'prediction_probability': prediction_probability
    })
    
    save_filename = f"./baseline_predictions/{'_'.join(args.target_class.split(' '))}_baseline_predictions.csv"
    save_filepath = Path(save_filename)
    save_filepath.touch(exist_ok=True)

    df.to_csv(save_filename, index=False)
def main(args, activations_dir, cavs_dir, random_concept='random_discovery'):

    sess = utils.create_session()
    mymodel = make_model(sess, args.model_to_run, args.model_path,
                         args.labels_path)

    cd = ConceptDiscovery(mymodel, args.target_class, random_concept,
                          args.bottlenecks, sess, args.source_dir,
                          activations_dir, cavs_dir)

    bn_activations = cd.get_img_activations(args.img_num, args.concept_num)

    sess.close()
Ejemplo n.º 6
0
def compute_tcav_scores(target='dog_bedroom',
                        random_counterpart='random_counter_part',
                        concepts=['dog_scene']):
    """Compute TCAV scores of a given list of concepts for a ResNet model.

  Computation is done for each block layer and the logit layer.
  """

    base_dir = os.getcwd()
    model_dir = os.path.join(base_dir, 'models', FLAGS.model)
    img_dir = os.path.join(base_dir, 'data/tcav')
    if FLAGS.model == 'obj':
        cat_fpath = os.path.join(base_dir, 'data/tcav', 'obj_categories.txt')
    else:
        cat_fpath = os.path.join(base_dir, 'data/tcav', 'scene_categories.txt')
    working_dir = os.path.join(base_dir, 'tcav_working_dir', FLAGS.model)
    if not tf.gfile.Exists(working_dir):
        tf.gfile.MakeDirs(working_dir)
        tf.gfile.MakeDirs(os.path.join(working_dir, 'activations'))
        tf.gfile.MakeDirs(os.path.join(working_dir, 'cavs'))

    sess = utils.create_session()
    tcav_model_wrapper = resnet_model_wrapper.ResNetModelWrapper(
        sess, model_dir, cat_fpath)
    act_gen = activation_generator.ImageActivationGenerator(
        tcav_model_wrapper,
        img_dir,
        os.path.join(working_dir, 'activations'),
        max_examples=100,
        normalize_image=False)

    bottlenecks = [
        'block_layer1', 'block_layer2', 'block_layer3', 'block_layer4', 'logit'
    ]
    for bottleneck in bottlenecks:
        mytcav = tcav.TCAV(sess,
                           target,
                           concepts, [bottleneck],
                           act_gen, [0.1],
                           random_counterpart,
                           cav_dir=os.path.join(working_dir, 'cavs'),
                           num_random_exp=2)
        results = mytcav.run()
        utils.print_results(results, random_counterpart='random_counter_part')
Ejemplo n.º 7
0
def make_model(config_model,
               sess=None,
               randomize=False,
               model_path=None,
               labels_path=None):
    """Make an instance of a model.

  Args:
    config_model: Config.Model instance.
    sess: tf session instance.
    randomize: Start with random weights
    model_path: Path to models saved graph. If None uses default paths
    labels_path: Path to models line separated labels text file. If None uses
                 default labels.

  Returns:
    a model instance.

  Raises:
    ValueError: If model name is not valid.
  """
    try:
        model_wrapper_class = getattr(model, config_model.model_wrapper_class)
    except AttributeError:
        try:
            model_wrapper_class = getattr(
                model, 'load' + config_model.model_wrapper_class)()
        except AttributeError:
            raise ValueError('Invalid model name')
    if sess is None and not getattr(model_wrapper_class,
                                    'do_not_use_tf_session', False):
        sess = utils.create_session()
    if model_path is None:
        model_path = config_model.model_path
    if labels_path is None:
        labels_path = config_model.labels_path
    mymodel = model_wrapper_class(sess,
                                  model_saved_path=model_path,
                                  labels_path=labels_path,
                                  fix_ratio=config_model.fix_ratio,
                                  config_path=config_model.config_path)
    if randomize:  # randomize the network!
        sess.run(tf.global_variables_initializer())
    return mymodel
Ejemplo n.º 8
0
def main(args):

  ###### related DIRs on CNS to store results #######
  discovered_concepts_dir = os.path.join(working_dir, 'concepts/')
  results_dir = os.path.join(args.working_dir, 'results/')
  cavs_dir = os.path.join(args.working_dir, 'cavs/')
  activations_dir = os.path.join(args.working_dir, 'acts/')
  results_summaries_dir = os.path.join(args.working_dir, 'results_summaries/')
  if tf.gfile.Exists(args.working_dir):
    tf.gfile.DeleteRecursively(args.working_dir)
  tf.gfile.MakeDirs(args.working_dir)
  tf.gfile.MakeDirs(discovered_concepts_dir)
  tf.gfile.MakeDirs(results_dir)
  tf.gfile.MakeDirs(cavs_dir)
  tf.gfile.MakeDirs(activations_dir)
  tf.gfile.MakeDirs(results_summaries_dir)
  random_concept = 'random_discovery'  # Random concept for statistical testing
  sess = utils.create_session()
  mymodel = ace_helpers.make_model(sess, args.model_to_run, args.model_path)
  # Creating the ConceptDiscovery class instance
  cd = ConceptDiscovery(
      mymodel,
      args.target_class,
      random_concept,
      args.bottlenecks.split(','),
      sess,
      args.source_dir,
      activations_dir,
      cavs_dir,
      num_random_exp=args.num_random_exp,
      channel_mean=True,
      max_imgs=args.max_imgs,
      min_imgs=args.min_imgs,
      num_discovery_imgs=args.max_imgs,
      num_workers=args.num_parallel_workers)
  # Creating the dataset of image patches
  cd.create_patches(param_dict={'n_segments': [15, 50, 80]})
  # Saving the concept discovery target class images
  image_dir = os.path.join(discovered_concepts_dir, 'images')
  tf.gfile.MakeDirs(image_dir)
  ace_helpers.save_images(image_dir,
                            (cd.discovery_images * 256).astype(np.uint8))
  # Discovering Concepts
  cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25})
  del cd.dataset  # Free memory
  del cd.image_numbers
  del cd.patches
  # Save discovered concept images (resized and original sized)
  ace_helpers.save_concepts(cd, discovered_concepts_dir)
  # Calculating CAVs and TCAV scores
  cav_accuraciess = cd.cavs(min_acc=0.0)
  scores = cd.tcavs(test=False)
  ace_helpers.save_ace_report(cd, cav_accuraciess, scores,
                                 results_summaries_dir + 'ace_results.txt')
  # Plot examples of discovered concepts
  for bn in cd.bottlenecks:
    ace_helpers.plot_concepts(cd, bn, 10, address=results_dir)
  # Delete concepts that don't pass statistical testing
  cd.test_and_remove_concepts(scores)
  # Train a binary classifier on concept profiles
  report = '\n\n\t\t\t ---Concept space---'
  report += '\n\t ---Classifier Weights---\n\n'
  pos_imgs = cd.load_concept_imgs(
    cd.target_class,
    2 * cd.max_imgs + args.num_test)[-args.num_test:]
  neg_imgs = cd.load_concept_imgs('random_test', args.num_test)
  a = ace_helpers.flat_profile(cd, pos_imgs)
  b = ace_helpers.flat_profile(cd, neg_imgs)
  lm, _ = ace_helpers.cross_val(a, b, methods=['logistic'])
  for bn in cd.bottlenecks:
    report += bn + ':\n'
    for i, concept in enumerate(cd.dic[bn]['concepts']):
      report += concept + ':' + str(lm.coef_[-1][i]) + '\n'
  # Test profile classifier on test images
  if args.test_dir is None:
    return
  cd.source_dir = args.test_dir
  pos_imgs = cd.load_concept_imgs(cd.target_class, args.num_test)
  neg_imgs = cd.load_concept_imgs('random500_180', args.num_test)
  a = ace_helpers.flat_profile(cd, pos_imgs)
  b = ace_helpers.flat_profile(cd, neg_imgs)
  x, y = ace_helpers.binary_dataset(a, b, balanced=True)
  probs = lm.predict_proba(x)[:, 1]
  report += '\nProfile Classifier accuracy= {}'.format(
      np.mean((probs > 0.5) == y))
  report += '\nProfile Classifier AUC= {}'.format(
      metrics.roc_auc_score(y, probs))
  report += '\nProfile Classifier PR Area= {}'.format(
      metrics.average_precision_score(y, probs))
  # Compare original network to profile classifier
  target_id = cd.model.label_to_id(cd.target_class.replace('_', ' '))
  predictions = []
  for img in pos_imgs:
    predictions.append(mymodel.get_predictions([img]))
  predictions = np.concatenate(predictions, 0)
  true_predictions = (np.argmax(predictions, -1) == target_id).astype(int)
  truly_predicted = np.where(true_predictions)[0]
  report += '\nNetwork Recall = ' + str(np.mean(true_predictions))
  report += ', ' + str(np.mean(np.max(predictions, -1)[truly_predicted]))
  agreeableness = np.sum(lm.predict(a) * true_predictions)*1./\
      np.sum(true_predictions + 1e-10)
  report += '\nProfile classifier agrees with network in {}%'.format(
      100 * agreeableness)
  with tf.gfile.Open(results_summaries_dir + 'profile_classifier.txt', 'w') as f:
    f.write(report)
def main(args, img_path, true_label):
    
    sess = utils.create_session()
    mymodel = make_model(sess, args.model_to_run, args.model_path, args.labels_path)

    filenames = []
    predictions = []
    baseline_prediction_probs = []

    baseline_predictions = pd.read_csv(f"./baseline_prediction_samples/{true_label}baseline_prediction_samples.csv")

    for img in glob(f'{img_path}/*'):

        cd = ConceptDiscovery(
            mymodel,
            args.target_class,
            sess,
            f"./net_occlusion_heatmaps_delta_prob/{img.split('/')[-1].split('_')[0]}/{img.split('/')[-1]}/mask_dim_100/{img.split('/')[-1]}_image_cropped_to_mask/")

        prediction, filename = cd.predict()

        # No instances where true label != predicted label in sample
        baseline_prediction_probs.append(baseline_predictions[baseline_predictions['filename'].str.contains(img.split('/')[-1])]['prediction_probability'].values[0])
    
        try:
            prediction, filename = cd.predict()
            predictions.append(prediction)
            filenames.append(filename[0])
        except ValueError as e:
            predictions.append(np.nan)
            filenames.append(args.source_dir)
            pass

    sess.close()

    true_labels = [true_label] * len(predictions)
    
    predicted_labels = []
    for prediction in predictions:
        try:
            predicted_labels.append(mymodel.id_to_label(prediction.tolist()[0].index(np.max(prediction))))
        except AttributeError:
            predicted_labels.append(np.nan)

    if args.target_class == 'crane bird':
        args.target_class = 'crane'
    elif args.target_class == 'african grey':
        args.target_class = 'African grey'
    elif args.target_class == 'tank suit':
        args.target_class = 'maillot'

    true_label_predictions = []
    true_label_prediction_delta = []
    for prediction, baseline_prediction_probs in zip(predictions,baseline_prediction_probs):
        try:
            true_label_prediction_prob = prediction.tolist()[0][mymodel.label_to_id(args.target_class)]
            true_label_predictions.append(true_label_prediction_prob)
            true_label_prediction_delta.append(true_label_prediction_prob-baseline_prediction_probs)
        except AttributeError:
            true_label_predictions.append(np.nan)

    prediction_probability = [np.max(prediction) for prediction in predictions]

    df = pd.DataFrame({
        'filename': filenames,
        'true_label': true_labels,
        'true_label_predictions': true_label_predictions,
        'true_label_predictions_delta': true_label_prediction_delta,
        'predicted_label': predicted_labels,
        'prediction_probability': prediction_probability
    })
    
    save_filename = f"./net_heatmap_predictions/mask_dim_100/{'_'.join(true_label.split(' '))}_heatmap_predictions.csv"
    save_filepath = Path(save_filename)
    save_filepath.touch(exist_ok=True)

    df.to_csv(save_filename, index=False)
def main(args):

    sess = utils.create_session()
    mymodel = make_model(sess, args.model_to_run, args.model_path,
                         args.labels_path)

    filenames = []
    predictions = []

    for source_dir in glob(f'{args.source_dir}/*'):

        cd = ConceptDiscovery(mymodel, None, None, None, sess,
                              f'{source_dir}/', None, None)

        try:
            prediction, filename = cd.predict()
            predictions.append(prediction)
            filenames.append(filename[0])
        except ValueError as e:
            predictions.append(np.nan)
            filenames.append(source_dir)
            pass

    sess.close()

    num_predictions = len(predictions)
    directory = ['/'.join(args.source_dir.split('/')[:4])] * num_predictions
    mask_dim = [args.source_dir.split('/')[-2].split('_')[-1]
                ] * num_predictions
    true_labels = [args.target_class] * num_predictions

    # For occluded image prediction need to find prediction accuracy of true label
    predicted_labels = []
    for prediction in predictions:
        try:
            predicted_labels.append(
                mymodel.id_to_label(prediction.tolist()[0].index(
                    np.max(prediction))))
        except AttributeError:
            predicted_labels.append(np.nan)

    if args.target_class == 'crane bird':
        args.target_class = 'crane'
    elif args.target_class == 'african grey':
        args.target_class = 'African grey'
    elif args.target_class == 'tank suit':
        args.target_class = 'maillot'

    true_label_predictions = []
    for prediction in predictions:
        try:
            true_label_predictions.append(
                prediction.tolist()[0][mymodel.label_to_id(args.target_class)])
        except AttributeError:
            true_label_predictions.append(np.nan)

    prediction_probability = [np.max(prediction) for prediction in predictions]

    df = pd.DataFrame({
        'directory': directory,
        'mask_dim': mask_dim,
        'filename': filenames,
        'true_label': true_labels,
        'true_label_predictions': true_label_predictions,
        'predicted_label': predicted_labels,
        'prediction_probability': prediction_probability
    })

    # save_filename = f"./occluded_image_predictions/mask_dim_{mask_dim[0]}/{'_'.join(args.target_class.split(' '))}_image_{args.source_dir.split('/')[3]}_occluded_image_predictions.csv"
    save_filename = 'occluded_image_predictions/mask_dim_100/cab_image_n02930766_23814_occluded_image_predictions.csv'
    save_filepath = Path(save_filename)
    save_filepath.touch(exist_ok=True)

    df.to_csv(save_filename, index=False)
Ejemplo n.º 11
0
# where TCAVs are stored.
tcav_dir = working_dir + '/tcavs/'
# where the images live.
source_dir = root_dir + 'tcav/dataset/for_tcav/'

utils.make_dir_if_not_exists(activation_dir)
utils.make_dir_if_not_exists(working_dir)
utils.make_dir_if_not_exists(tcav_dir)

# this is a regularizer penalty parameter for linear classifier to get CAVs.
alphas = [0.1]

print('TCAV dataset path is {}'.format(source_dir))
print('Results is saved at {}'.format(working_dir))

sess = utils.create_session()

#===============================================================================
GRAPH_PATH = root_dir + 'tcav/frozen_models/colored_mnist_number_2layers_cnn.pb'
#GRAPH_PATH = root_dir + 'tcav/frozen_models/normal_mnist_2layers_cnn.pb'
#LABEL_PATH = root_dir + 'tcav/dataset/colored_mnist-color-number'
LABEL_PATH = root_dir + 'tcav/dataset/colored_mnist_number'
#LABEL_PATH = root_dir + 'tcav/dataset/normal_mnist'

mymodel = model.KerasMnistCnnWrapper_public(sess,GRAPH_PATH,LABEL_PATH)
#=================================================================================

act_generator = act_gen.ImageActivationGenerator(mymodel, source_dir, activation_dir, max_examples=max_examples)

tf.logging.set_verbosity(tf.logging.INFO)
## only running num_random_exp = 10 to save some time. The paper number are reported for 500 random runs.