Ejemplo n.º 1
0
def main(args):

    ###### related DIRs on CNS to store results #######
    discovered_concepts_dir = os.path.join(args.working_dir, 'concepts/')
    results_dir = os.path.join(args.working_dir, 'results/')
    cavs_dir = os.path.join(args.working_dir, 'cavs/')
    activations_dir = os.path.join(args.working_dir, 'acts/')
    results_summaries_dir = os.path.join(args.working_dir,
                                         'results_summaries/')
    if tf.gfile.Exists(args.working_dir):
        tf.gfile.DeleteRecursively(args.working_dir)
    tf.gfile.MakeDirs(args.working_dir)
    tf.gfile.MakeDirs(discovered_concepts_dir)
    tf.gfile.MakeDirs(results_dir)
    tf.gfile.MakeDirs(cavs_dir)
    tf.gfile.MakeDirs(activations_dir)
    tf.gfile.MakeDirs(results_summaries_dir)
    random_concept = 'random_discovery'  # Random concept for statistical testing
    sess = utils.create_session()
    mymodel = ace_helpers.make_model(sess, args.model_to_run, args.model_path,
                                     args.labels_path)
    # Creating the ConceptDiscovery class instance
    cd = ConceptDiscovery(mymodel,
                          args.target_class,
                          random_concept,
                          args.bottlenecks.split(','),
                          sess,
                          args.source_dir,
                          activations_dir,
                          cavs_dir,
                          num_random_exp=args.num_random_exp,
                          channel_mean=True,
                          max_imgs=args.max_imgs,
                          min_imgs=args.min_imgs,
                          num_discovery_imgs=args.max_imgs,
                          num_workers=args.num_parallel_workers)
    # Creating the dataset of image patches
    cd.create_patches(param_dict={'n_segments': [15, 50, 80]})
    # Saving the concept discovery target class images
    image_dir = os.path.join(discovered_concepts_dir, 'images')
    tf.gfile.MakeDirs(image_dir)
    ace_helpers.save_images(image_dir,
                            (cd.discovery_images * 256).astype(np.uint8))
    # Discovering Concepts
    cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25})
    del cd.dataset  # Free memory
    del cd.image_numbers
    del cd.patches
    # Save discovered concept images (resized and original sized)
    ace_helpers.save_concepts(cd, discovered_concepts_dir)
    # Calculating CAVs and TCAV scores
    cav_accuraciess = cd.cavs(min_acc=0.0)
    scores = cd.tcavs(test=False)
    ace_helpers.save_ace_report(cd, cav_accuraciess, scores,
                                results_summaries_dir + 'ace_results.txt')
    # Plot examples of discovered concepts
    for bn in cd.bottlenecks:
        ace_helpers.plot_concepts(cd, bn, 10, address=results_dir)
    # Delete concepts that don't pass statistical testing
    cd.test_and_remove_concepts(scores)
Ejemplo n.º 2
0
def main(args):

  ###### related DIRs on CNS to store results #######
  discovered_concepts_dir = os.path.join(working_dir, 'concepts/')
  results_dir = os.path.join(args.working_dir, 'results/')
  cavs_dir = os.path.join(args.working_dir, 'cavs/')
  activations_dir = os.path.join(args.working_dir, 'acts/')
  results_summaries_dir = os.path.join(args.working_dir, 'results_summaries/')
  if tf.gfile.Exists(args.working_dir):
    tf.gfile.DeleteRecursively(args.working_dir)
  tf.gfile.MakeDirs(args.working_dir)
  tf.gfile.MakeDirs(discovered_concepts_dir)
  tf.gfile.MakeDirs(results_dir)
  tf.gfile.MakeDirs(cavs_dir)
  tf.gfile.MakeDirs(activations_dir)
  tf.gfile.MakeDirs(results_summaries_dir)
  random_concept = 'random_discovery'  # Random concept for statistical testing
  sess = utils.create_session()
  mymodel = ace_helpers.make_model(sess, args.model_to_run, args.model_path)
  # Creating the ConceptDiscovery class instance
  cd = ConceptDiscovery(
      mymodel,
      args.target_class,
      random_concept,
      args.bottlenecks.split(','),
      sess,
      args.source_dir,
      activations_dir,
      cavs_dir,
      num_random_exp=args.num_random_exp,
      channel_mean=True,
      max_imgs=args.max_imgs,
      min_imgs=args.min_imgs,
      num_discovery_imgs=args.max_imgs,
      num_workers=args.num_parallel_workers)
  # Creating the dataset of image patches
  cd.create_patches(param_dict={'n_segments': [15, 50, 80]})
  # Saving the concept discovery target class images
  image_dir = os.path.join(discovered_concepts_dir, 'images')
  tf.gfile.MakeDirs(image_dir)
  ace_helpers.save_images(image_dir,
                            (cd.discovery_images * 256).astype(np.uint8))
  # Discovering Concepts
  cd.discover_concepts(method='KM', param_dicts={'n_clusters': 25})
  del cd.dataset  # Free memory
  del cd.image_numbers
  del cd.patches
  # Save discovered concept images (resized and original sized)
  ace_helpers.save_concepts(cd, discovered_concepts_dir)
  # Calculating CAVs and TCAV scores
  cav_accuraciess = cd.cavs(min_acc=0.0)
  scores = cd.tcavs(test=False)
  ace_helpers.save_ace_report(cd, cav_accuraciess, scores,
                                 results_summaries_dir + 'ace_results.txt')
  # Plot examples of discovered concepts
  for bn in cd.bottlenecks:
    ace_helpers.plot_concepts(cd, bn, 10, address=results_dir)
  # Delete concepts that don't pass statistical testing
  cd.test_and_remove_concepts(scores)
  # Train a binary classifier on concept profiles
  report = '\n\n\t\t\t ---Concept space---'
  report += '\n\t ---Classifier Weights---\n\n'
  pos_imgs = cd.load_concept_imgs(
    cd.target_class,
    2 * cd.max_imgs + args.num_test)[-args.num_test:]
  neg_imgs = cd.load_concept_imgs('random_test', args.num_test)
  a = ace_helpers.flat_profile(cd, pos_imgs)
  b = ace_helpers.flat_profile(cd, neg_imgs)
  lm, _ = ace_helpers.cross_val(a, b, methods=['logistic'])
  for bn in cd.bottlenecks:
    report += bn + ':\n'
    for i, concept in enumerate(cd.dic[bn]['concepts']):
      report += concept + ':' + str(lm.coef_[-1][i]) + '\n'
  # Test profile classifier on test images
  if args.test_dir is None:
    return
  cd.source_dir = args.test_dir
  pos_imgs = cd.load_concept_imgs(cd.target_class, args.num_test)
  neg_imgs = cd.load_concept_imgs('random500_180', args.num_test)
  a = ace_helpers.flat_profile(cd, pos_imgs)
  b = ace_helpers.flat_profile(cd, neg_imgs)
  x, y = ace_helpers.binary_dataset(a, b, balanced=True)
  probs = lm.predict_proba(x)[:, 1]
  report += '\nProfile Classifier accuracy= {}'.format(
      np.mean((probs > 0.5) == y))
  report += '\nProfile Classifier AUC= {}'.format(
      metrics.roc_auc_score(y, probs))
  report += '\nProfile Classifier PR Area= {}'.format(
      metrics.average_precision_score(y, probs))
  # Compare original network to profile classifier
  target_id = cd.model.label_to_id(cd.target_class.replace('_', ' '))
  predictions = []
  for img in pos_imgs:
    predictions.append(mymodel.get_predictions([img]))
  predictions = np.concatenate(predictions, 0)
  true_predictions = (np.argmax(predictions, -1) == target_id).astype(int)
  truly_predicted = np.where(true_predictions)[0]
  report += '\nNetwork Recall = ' + str(np.mean(true_predictions))
  report += ', ' + str(np.mean(np.max(predictions, -1)[truly_predicted]))
  agreeableness = np.sum(lm.predict(a) * true_predictions)*1./\
      np.sum(true_predictions + 1e-10)
  report += '\nProfile classifier agrees with network in {}%'.format(
      100 * agreeableness)
  with tf.gfile.Open(results_summaries_dir + 'profile_classifier.txt', 'w') as f:
    f.write(report)