Example #1
0
 def test_get_key(self):
     self.assertEqual(
         CAV.cav_key(self.concepts, self.bottleneck,
                     self.hparams['model_type'], self.hparams['alpha']),
         '-'.join([str(c)
                   for c in self.concepts]) + '-' + self.bottleneck + '-' +
         self.hparams['model_type'] + '-' + str(self.hparams['alpha']))
Example #2
0
    def setUp(self):
        """Makes a cav instance and writes it to tmp direcotry.

    The cav instance uses preset values.
    """
        self.hparams = {
            'model_type': 'linear',
            'alpha': .01,
            'max_iter': 1000,
            'tol': 1e-3
        }
        self.concepts = ['concept1', 'concept2']
        self.bottleneck = 'bottleneck'
        self.accuracies = {'concept1': 0.8, 'concept2': 0.5, 'overall': 0.65}
        self.cav_vecs = [[1, 2, 3], [4, 5, 6]]

        self.test_subdirectory = os.path.join(FLAGS.tcav_test_tmpdir, 'test')
        self.cav_dir = self.test_subdirectory
        self.cav_file_name = CAV.cav_key(self.concepts, self.bottleneck,
                                         self.hparams['model_type'],
                                         self.hparams['alpha']) + '.pkl'
        self.save_path = os.path.join(self.cav_dir, self.cav_file_name)
        self.cav = CAV(self.concepts, self.bottleneck, self.hparams)
        # pretend that it was trained and cavs are stored
        self.cav.cavs = np.array(self.cav_vecs)
        shape = (1, 3)
        self.acts = {
            concept: {
                self.bottleneck: np.tile(i * np.ones(shape), (4, 1))
            }
            for i, concept in enumerate(self.concepts)
        }

        if os.path.exists(self.cav_dir):
            shutil.rmtree(self.cav_dir)
        os.mkdir(self.cav_dir)
        with tf.io.gfile.GFile(self.save_path, 'w') as pkl_file:
            pickle.dump(
                {
                    'concepts': self.concepts,
                    'bottleneck': self.bottleneck,
                    'hparams': self.hparams,
                    'accuracies': self.accuracies,
                    'cavs': self.cav_vecs,
                    'saved_path': self.save_path
                }, pkl_file)
Example #3
0
    def _run_single_set(self, param, overwrite=False, run_parallel=False):
        """Run TCAV with provided for one set of (target, concepts).

    Args:
      param: parameters to run
      overwrite: if True, overwrite any saved CAV files.
      run_parallel: run this parallel.

    Returns:
      a dictionary of results (panda frame)
    """
        bottleneck = param.bottleneck
        concepts = param.concepts
        target_class = param.target_class
        activation_generator = param.activation_generator
        alpha = param.alpha
        mymodel = param.model
        cav_dir = param.cav_dir
        # first check if target class is in model.

        tf.logging.info('running %s %s' % (target_class, concepts))

        # Get acts
        acts = activation_generator.process_and_load_activations(
            [bottleneck], concepts + [target_class])
        # Get CAVs
        cav_hparams = CAV.default_hparams()
        cav_hparams.alpha = alpha
        cav_instance = get_or_train_cav(concepts,
                                        bottleneck,
                                        acts,
                                        cav_dir=cav_dir,
                                        cav_hparams=cav_hparams,
                                        overwrite=overwrite)
        # clean up
        for c in concepts:
            del acts[c]

        # Hypo testing
        a_cav_key = CAV.cav_key(concepts, bottleneck, cav_hparams.model_type,
                                cav_hparams.alpha)
        target_class_for_compute_tcav_score = target_class

        cav_concept = concepts[0]
        i_up = self.compute_tcav_score(
            mymodel,
            target_class_for_compute_tcav_score,
            cav_concept,
            cav_instance,
            acts[target_class][cav_instance.bottleneck],
            activation_generator.get_examples_for_concept(target_class),
            run_parallel=run_parallel)
        val_directional_dirs = self.get_directional_dir(
            mymodel, target_class_for_compute_tcav_score, cav_concept,
            cav_instance, acts[target_class][cav_instance.bottleneck],
            activation_generator.get_examples_for_concept(target_class))
        result = {
            'cav_key': a_cav_key,
            'cav_concept': cav_concept,
            'negative_concept': concepts[1],
            'target_class': target_class,
            'cav_accuracies': cav_instance.accuracies,
            'i_up': i_up,
            'val_directional_dirs_abs_mean':
            np.mean(np.abs(val_directional_dirs)),
            'val_directional_dirs_mean': np.mean(val_directional_dirs),
            'val_directional_dirs_std': np.std(val_directional_dirs),
            'val_directional_dirs': val_directional_dirs,
            'note': 'alpha_%s ' % (alpha),
            'alpha': alpha,
            'bottleneck': bottleneck
        }
        del acts
        return result
Example #4
0
class CavTest(googletest.TestCase):
    def setUp(self):
        """Makes a cav instance and writes it to tmp direcotry.

    The cav instance uses preset values.
    """
        self.hparams = {
            'model_type': 'linear',
            'alpha': .01,
            'max_iter': 1000,
            'tol': 1e-3
        }
        self.concepts = ['concept1', 'concept2']
        self.bottleneck = 'bottleneck'
        self.accuracies = {'concept1': 0.8, 'concept2': 0.5, 'overall': 0.65}
        self.cav_vecs = [[1, 2, 3], [4, 5, 6]]

        self.test_subdirectory = os.path.join(FLAGS.tcav_test_tmpdir, 'test')
        self.cav_dir = self.test_subdirectory
        self.cav_file_name = CAV.cav_key(self.concepts, self.bottleneck,
                                         self.hparams['model_type'],
                                         self.hparams['alpha']) + '.pkl'
        self.save_path = os.path.join(self.cav_dir, self.cav_file_name)
        self.cav = CAV(self.concepts, self.bottleneck, self.hparams)
        # pretend that it was trained and cavs are stored
        self.cav.cavs = np.array(self.cav_vecs)
        shape = (1, 3)
        self.acts = {
            concept: {
                self.bottleneck: np.tile(i * np.ones(shape), (4, 1))
            }
            for i, concept in enumerate(self.concepts)
        }

        if os.path.exists(self.cav_dir):
            shutil.rmtree(self.cav_dir)
        os.mkdir(self.cav_dir)
        with tf.io.gfile.GFile(self.save_path, 'w') as pkl_file:
            pickle.dump(
                {
                    'concepts': self.concepts,
                    'bottleneck': self.bottleneck,
                    'hparams': self.hparams,
                    'accuracies': self.accuracies,
                    'cavs': self.cav_vecs,
                    'saved_path': self.save_path
                }, pkl_file)

    def test_default_hparams(self):
        hparam = CAV.default_hparams()
        self.assertEqual(hparam['alpha'], 0.01)
        self.assertEqual(hparam['model_type'], 'linear')

    def test_load_cav(self):
        """Load up the cav file written in setup function and check values.
    """
        cav_instance = CAV.load_cav(self.save_path)
        self.assertEqual(cav_instance.concepts, self.concepts)
        self.assertEqual(cav_instance.cavs, self.cav_vecs)

    def test_cav_key(self):
        self.assertEqual(
            self.cav.cav_key(self.concepts, self.bottleneck,
                             self.hparams['model_type'],
                             self.hparams['alpha']),
            '-'.join(self.concepts) + '-' + self.bottleneck + '-' +
            self.hparams['model_type'] + '-' + str(self.hparams['alpha']))

    def test_check_cav_exists(self):
        exists = self.cav.check_cav_exists(self.cav_dir, self.concepts,
                                           self.bottleneck, self.hparams)
        self.assertTrue(exists)

    def test__create_cav_training_set(self):
        x, labels, labels2text = self.cav._create_cav_training_set(
            self.concepts, self.bottleneck, self.acts)
        # check values of some elements.
        self.assertEqual(x[0][0], 0)
        self.assertEqual(x[5][0], 1)
        self.assertEqual(labels[0], 0)
        self.assertEqual(labels[5], 1)
        self.assertEqual(labels2text[0], 'concept1')

    def test_perturb_act(self):
        perturbed = self.cav.perturb_act(np.array([1., 0, 1.]),
                                         'concept1',
                                         operation=np.add,
                                         alpha=1.0)
        self.assertEqual(2., perturbed[0])
        self.assertEqual(2., perturbed[1])
        self.assertEqual(4., perturbed[2])

    def test_get_key(self):
        self.assertEqual(
            CAV.cav_key(self.concepts, self.bottleneck,
                        self.hparams['model_type'], self.hparams['alpha']),
            '-'.join([str(c)
                      for c in self.concepts]) + '-' + self.bottleneck + '-' +
            self.hparams['model_type'] + '-' + str(self.hparams['alpha']))

    def test_get_direction(self):
        idx_concept1 = self.cav.concepts.index('concept1')
        cav_directly_from_member = self.cav.cavs[idx_concept1]
        cav_via_get_direction = self.cav.get_direction('concept1')
        for i in range(len(cav_directly_from_member)):
            self.assertEqual(cav_directly_from_member[i],
                             cav_via_get_direction[i])

    def test_train(self):
        self.cav.train({c: self.acts[c] for c in self.concepts})
        # check values of some elements.
        # the two coefficients of the classifier must be negative.
        self.assertLess(self.cav.cavs[0][0] * self.cav.cavs[1][0], 0)

    def test__train_lm(self):
        lm = linear_model.SGDClassifier(alpha=self.hparams['alpha'])
        acc = self.cav._train_lm(lm, np.array([[0], [0], [0], [1], [1], [1]]),
                                 np.array([0, 0, 0, 1, 1, 1]), {
                                     0: 0,
                                     1: 1
                                 })
        # the given data is so easy it should get this almost perfect.
        self.assertGreater(acc[0], 0.99)
        self.assertGreater(acc[1], 0.99)

    def test_get_or_train_cav_save_test(self):
        cav_instance = get_or_train_cav(self.concepts,
                                        self.bottleneck,
                                        self.acts,
                                        cav_dir=self.cav_dir,
                                        cav_hparams=self.hparams)
        # check values of some elements.
        self.assertEqual(cav_instance.cavs[0][0], self.cav_vecs[0][0])
        self.assertEqual(cav_instance.cavs[1][2], self.cav_vecs[1][2])
Example #5
0
    def _run_single_set(self, param):
        """Run TCAV with provided for one set of (target, concepts).

    Args:
      param: parameters to run

    Returns:
      a dictionary of results (panda frame)
    """
        bottleneck = param.bottleneck
        concepts = param.concepts
        target_class = param.target_class
        activation_generator = param.activation_generator
        alpha = param.alpha
        mymodel = param.model
        cav_dir = param.cav_dir
        # first check if target class is in model.

        tf.logging.info('running %s %s' % (target_class, concepts))
        print("running: target->", target_class, " concepts->", concepts)

        # Get acts
        if self.use_numeric_class_label:
            # To correctly concatenate strings with the target class, we cast it into a string
            in_target_class = str(
                target_class) if self.use_numeric_class_label else target_class
            acts = activation_generator.process_and_load_activations(
                [bottleneck], concepts + [in_target_class], target_class)
        else:
            acts = activation_generator.process_and_load_activations(
                [bottleneck], concepts + [in_target_class])

        # Get CAVs
        cav_hparams = CAV.default_hparams()
        cav_hparams.alpha = alpha
        cav_instance = get_or_train_cav(concepts,
                                        bottleneck,
                                        acts,
                                        cav_dir=cav_dir,
                                        cav_hparams=cav_hparams)

        # clean up
        for c in concepts:
            del acts[c]

        # Hypo testing
        a_cav_key = CAV.cav_key(concepts, bottleneck, cav_hparams.model_type,
                                cav_hparams.alpha)
        if self.use_numeric_class_label:
            target_class_for_compute_tcav_score = target_class + 1
        else:
            target_class_for_compute_tcav_score = mymodel.label_to_id(
                target_class) + 1

        for cav_concept in concepts:
            if cav_concept is self.random_counterpart or 'random500' not in cav_concept:
                i_up = self.compute_tcav_score(
                    mymodel,
                    target_class_for_compute_tcav_score,
                    cav_concept,
                    cav_instance,
                    acts[str(target_class)][cav_instance.bottleneck],
                )
                val_directional_dirs = self.get_directional_dir(
                    mymodel, target_class_for_compute_tcav_score, cav_concept,
                    cav_instance,
                    acts[str(target_class)][cav_instance.bottleneck])
                result = {
                    'cav_key':
                    a_cav_key,
                    'cav_concept':
                    cav_concept,
                    'target_class':
                    target_class,
                    'i_up':
                    i_up,
                    'val_directional_dirs_abs_mean':
                    np.mean(np.abs(val_directional_dirs)),
                    'val_directional_dirs_mean':
                    np.mean(val_directional_dirs),
                    'val_directional_dirs_std':
                    np.std(val_directional_dirs),
                    'note':
                    'alpha_%s ' % (alpha),
                    'alpha':
                    alpha,
                    'bottleneck':
                    bottleneck
                }
        del acts
        return result
Example #6
0
File: tcav.py Project: munema/tcav
    def _run_single_set(self, param, overwrite=False, run_parallel=False):
        """Run TCAV with provided for one set of (target, concepts).

    Args:
      param: parameters to run
      overwrite: if True, overwrite any saved CAV files.
      run_parallel: run this parallel.

    Returns:
      a dictionary of results (panda frame)
    """

        bottleneck = param.bottleneck
        concepts = param.concepts
        target_class = param.target_class
        activation_generator = param.activation_generator
        alpha = param.alpha
        mymodel = param.model
        cav_dir = param.cav_dir
        # first check if target class is in model.

        tf.logging.info('running %s %s' % (target_class, concepts))
        keyword = ''
        is_keyword = False
        if self.logit_grad:
            keyword += ':logit_grad'
            is_keyword = True
        if self.grad_nomalize:
            keyword += ':grad_nomalize'
            is_keyword = True
        if is_keyword:
            keyword += ':'
        if self.make_random and os.path.exists(
                self.tcav_dir +
                '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha,
                                          concepts[0], concepts[1], keyword)):
            return None

        # Get acts
        acts = activation_generator.process_and_load_activations(
            [bottleneck], concepts + [target_class])
        # Get CAVs
        cav_hparams = CAV.default_hparams()
        cav_hparams.alpha = alpha
        cav_instance = get_or_train_cav(concepts,
                                        bottleneck,
                                        acts,
                                        cav_dir=cav_dir,
                                        cav_hparams=cav_hparams,
                                        overwrite=overwrite)

        if self.true_cav == False:
            # clean up
            for c in concepts:
                del acts[c]

        # Hypo testing
        a_cav_key = CAV.cav_key(concepts, bottleneck, cav_hparams.model_type,
                                cav_hparams.alpha)
        target_class_for_compute_tcav_score = target_class

        cav_concept = concepts[0]
        #tmp = activation_generator.get_examples_for_concept(target_class)
        i_up = self.compute_tcav_score(
            mymodel,
            target_class_for_compute_tcav_score,
            cav_concept,
            cav_instance,
            acts[target_class][cav_instance.bottleneck],
            activation_generator.get_examples_for_concept(target_class),
            acts,
            cav_instance.bottleneck,
            activation_generator,
            cav_dir,
            self.true_cav,
            self.logit_grad,
            run_parallel=run_parallel)
        val_directional_dirs = self.get_directional_dir_plus(
            mymodel, target_class_for_compute_tcav_score, cav_concept,
            cav_instance, acts[target_class][cav_instance.bottleneck],
            activation_generator.get_examples_for_concept(target_class),
            self.cav_dir, self.project_name, bottleneck, concepts[1], acts,
            activation_generator, self.true_cav, self.logit_grad,
            self.grad_nomalize, self.make_random)
        result = {
            'cav_key': a_cav_key,
            'cav_concept': cav_concept,
            'negative_concept': concepts[1],
            'target_class': target_class,
            'cav_accuracies': cav_instance.accuracies,
            'i_up': i_up,
            'val_directional_dirs': val_directional_dirs,
            'alpha': alpha,
            'bottleneck': bottleneck
        }
        del acts

        if self.make_random and not os.path.exists(
                self.tcav_dir +
                '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha,
                                          concepts[0], concepts[1], keyword)):
            pickle_dump(
                result, self.tcav_dir +
                '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha,
                                          concepts[0], concepts[1], keyword))
        return result