def get_direction_dir_sign_true_cav(mymodel, act, cav, concept, class_id, example, acts, bottleneck, activation_generator, cav_dir, i): # Grad points in the direction which DECREASES probability of class grad = np.reshape( mymodel.get_gradient(act, [class_id], cav.bottleneck, example), -1) #真の方向ベクトルを取得 mean_concept = np.mean(acts[concept][bottleneck], 0) act_example = activation_generator.get_activations_for_examples( np.expand_dims(example, 0), bottleneck) true_cav = np.reshape(mean_concept - act_example, -1) if not os.path.exists(cav_dir + '/' + 'cav-true:' + 'example-' + str(i) + ':' + concept + ':' + bottleneck): pickle_dump( true_cav, cav_dir + '/' + 'cav-true:' + 'example-' + str(i) + ':' + concept + ':' + bottleneck) dot_prod = np.dot(grad, true_cav) return dot_prod < 0
def process_and_load_activations(self, bottleneck_names, concepts): acts = {} if self.acts_dir and not tf.io.gfile.exists(self.acts_dir): tf.io.gfile.makedirs(self.acts_dir) for concept in concepts: if concept not in acts: acts[concept] = {} for bottleneck_name in bottleneck_names: acts_path = os.path.join( self.acts_dir, 'acts_{}_{}'.format( concept, bottleneck_name)) if self.acts_dir else None if acts_path and os.path.exists(acts_path): # if acts_path and tf.io.gfile.exists(acts_path): # with tf.io.gfile.GFile(acts_path, 'rb') as f: # acts[concept][bottleneck_name] = np.load( # f, allow_pickle=True).squeeze() try: acts[concept][bottleneck_name] = pickle_load( acts_path).squeeze() tf.logging.debug('Loaded ' + acts_path) except: tf.logging.info( 'Fail loading Activation. Now calculating...') acts[concept][ bottleneck_name] = self.get_activations_for_concept( concept, bottleneck_name) else: acts[concept][ bottleneck_name] = self.get_activations_for_concept( concept, bottleneck_name) if acts_path: tf.logging.info( '{} does not exist, Making one...'.format( acts_path)) # tf.io.gfile.mkdir(os.path.dirname(acts_path)) # with tf.io.gfile.GFile(acts_path, 'w') as f: # np.save(f, acts[concept][bottleneck_name], allow_pickle=False) pickle_dump(acts[concept][bottleneck_name], acts_path) return acts
path = root_dir + 'log/2layers-colored-mnist-number_10/random100' target = 'mnist-6' #bottleneck = 'conv1' # activation_path = path + '/activations/' # cav_path = path + '/cavs/' tcav_path = path + '/tcavs/' keyword = 'grad_nomalize' results_path = os.listdir(tcav_path) all_results = [] non_dup_results = [] for result in results_path: if keyword in result: continue positive_num = int( re.match(r'.+:.+:.+:random500_(\d+)_random500_(\d+).*', result).group(1)) negative_num = int( re.match(r'.+:.+:.+:random500_(\d+)_random500_(\d+).*', result).group(2)) if result.split(':')[1] == target: all_results.append(pickle_load(tcav_path + result)) if positive_num < negative_num: non_dup_results.append(pickle_load(tcav_path + result)) print(len(all_results)) print(len(non_dup_results)) pickle_dump(all_results, path + '/' + target + '_results_all') pickle_dump(non_dup_results, path + '/' + target + '_results_non_dup')
def _run_single_set(self, param, overwrite=False, run_parallel=False): """Run TCAV with provided for one set of (target, concepts). Args: param: parameters to run overwrite: if True, overwrite any saved CAV files. run_parallel: run this parallel. Returns: a dictionary of results (panda frame) """ bottleneck = param.bottleneck concepts = param.concepts target_class = param.target_class activation_generator = param.activation_generator alpha = param.alpha mymodel = param.model cav_dir = param.cav_dir # first check if target class is in model. tf.logging.info('running %s %s' % (target_class, concepts)) keyword = '' is_keyword = False if self.logit_grad: keyword += ':logit_grad' is_keyword = True if self.grad_nomalize: keyword += ':grad_nomalize' is_keyword = True if is_keyword: keyword += ':' if self.make_random and os.path.exists( self.tcav_dir + '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha, concepts[0], concepts[1], keyword)): return None # Get acts acts = activation_generator.process_and_load_activations( [bottleneck], concepts + [target_class]) # Get CAVs cav_hparams = CAV.default_hparams() cav_hparams.alpha = alpha cav_instance = get_or_train_cav(concepts, bottleneck, acts, cav_dir=cav_dir, cav_hparams=cav_hparams, overwrite=overwrite) if self.true_cav == False: # clean up for c in concepts: del acts[c] # Hypo testing a_cav_key = CAV.cav_key(concepts, bottleneck, cav_hparams.model_type, cav_hparams.alpha) target_class_for_compute_tcav_score = target_class cav_concept = concepts[0] #tmp = activation_generator.get_examples_for_concept(target_class) i_up = self.compute_tcav_score( mymodel, target_class_for_compute_tcav_score, cav_concept, cav_instance, acts[target_class][cav_instance.bottleneck], activation_generator.get_examples_for_concept(target_class), acts, cav_instance.bottleneck, activation_generator, cav_dir, self.true_cav, self.logit_grad, run_parallel=run_parallel) val_directional_dirs = self.get_directional_dir_plus( mymodel, target_class_for_compute_tcav_score, cav_concept, cav_instance, acts[target_class][cav_instance.bottleneck], activation_generator.get_examples_for_concept(target_class), self.cav_dir, self.project_name, bottleneck, concepts[1], acts, activation_generator, self.true_cav, self.logit_grad, self.grad_nomalize, self.make_random) result = { 'cav_key': a_cav_key, 'cav_concept': cav_concept, 'negative_concept': concepts[1], 'target_class': target_class, 'cav_accuracies': cav_instance.accuracies, 'i_up': i_up, 'val_directional_dirs': val_directional_dirs, 'alpha': alpha, 'bottleneck': bottleneck } del acts if self.make_random and not os.path.exists( self.tcav_dir + '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha, concepts[0], concepts[1], keyword)): pickle_dump( result, self.tcav_dir + '{}:{}:{}:{}_{}{}'.format(bottleneck, target_class, alpha, concepts[0], concepts[1], keyword)) return result
def run(self, num_workers=10, run_parallel=False, overwrite=False, return_proto=False): """Run TCAV for all parameters (concept and random), write results to html. Args: num_workers: number of workers to parallelize run_parallel: run this parallel. overwrite: if True, overwrite any saved CAV files. return_proto: if True, returns results as a tcav.Results object; else, return as a list of dicts. Returns: results: an object (either a Results proto object or a list of dictionaries) containing metrics for TCAV results. """ # for random exp, a machine with cpu = 30, ram = 300G, disk = 10G and # pool worker 50 seems to work. tf.logging.info('running %s params' % len(self.params)) tf.logging.info('training with alpha={}'.format(self.alphas)) results = [] if self.true_cav: concept_lst = self.concepts bottleneck_lst = self.bottlenecks concept_dct = {} for c in self.concepts: concept_dct[c] = {} for b in self.bottlenecks: concept_dct[c][b] = 0 now = time.time() if run_parallel: pool = multiprocessing.Pool(num_workers) for i, res in enumerate( pool.imap( lambda p: self._run_single_set( p, overwrite=overwrite, run_parallel=run_parallel), self.params), 1): tf.logging.info('Finished running param %s of %s' % (i, len(self.params))) results.append(res) else: keyword = '' if self.logit_grad: keyword += ':logit_grad' if self.grad_nomalize: keyword += ':grad_nomalize' for i, param in enumerate(self.params): tf.logging.info('Running param %s of %s' % (i, len(self.params))) # randomをスキップ if 'random' in param.concepts[0] and self.make_random == False: continue # randomのみ計算 elif self.make_random == True and ( 'random' not in param.concepts[0] or os.path. exists(self.tcav_dir + '{}:{}:{}:{}_{}{}'.format( param.bottleneck, param.target_class, param.alpha, param.concepts[0], param.concepts[1], keyword))): continue # 真のCAVで計算 elif self.true_cav: if param.concepts[ 0] not in concept_lst and param.bottleneck not in bottleneck_lst: continue elif concept_dct[param.concepts[0]][param.bottleneck] == 1: continue concept_dct[param.concepts[0]][param.bottleneck] = 1 results.append( self._run_single_set(param, overwrite=overwrite, run_parallel=run_parallel)) tf.logging.info('Done running %s params. Took %s seconds...' % (len(self.params), time.time() - now)) keyword = '' is_keyword = False if self.logit_grad: keyword += ':logit_grad' is_keyword = True if self.grad_nomalize: keyword += ':grad_nomalize' is_keyword = True if return_proto: return utils.results_to_proto(results) elif self.make_random == False and self.true_cav == False: pickle_dump(results, self.tcav_dir + self.project_name + keyword) elif self.make_random == False and self.true_cav: pickle_dump( results, self.tcav_dir + 'trueCAV-' + self.project_name + keyword) return results
def get_directional_dir_plus(mymodel, target_class, concept, cav, class_acts, examples, cav_dir, project_name, bottleneck, negative_concept, acts, activation_generator, true_cav, logit_grad, grad_nomalize, make_random): class_id = mymodel.label_to_id(target_class) directional_dir_vals = [] cav_vector_vals = [] if logit_grad: if os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad_vals = pickle_load(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class) else: grad_vals = [] else: if os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): grad_vals = pickle_load(cav_dir + '/grad:' + bottleneck + ':' + target_class) else: grad_vals = [] if os.path.exists(cav_dir + '/predict:' + target_class): class_pred = pickle_load(cav_dir + '/predict:' + target_class) else: class_pred = [] for i in range(len(class_acts)): act = np.expand_dims(class_acts[i], 0) if len(act.shape) == 3: act = np.expand_dims(act, 3) example = examples[i] if logit_grad: if not os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad = np.reshape( mymodel.get_gradient(act, [class_id], cav.bottleneck, example), -1) else: grad = grad_vals[i] else: if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): grad = np.reshape( mymodel.get_gradient(act, [class_id], cav.bottleneck, example), -1) else: grad = grad_vals[i] if not os.path.exists(cav_dir + '/predict:' + target_class): pred = mymodel.get_predictions(np.expand_dims(example, 0))[:, class_id] else: pred = class_pred[i] if true_cav: #真の方向ベクトルを取得 mean_concept = np.mean(acts[concept][bottleneck], 0) act_example = activation_generator.get_activations_for_examples( np.expand_dims(example, 0), bottleneck) cav_vector = np.reshape(mean_concept - act_example, -1) directional_dir = np.dot(grad, cav_vector) else: cav_vector = cav.get_direction(concept) if grad_nomalize == False: if logit_grad: directional_dir = np.dot(grad, cav_vector) else: directional_dir = np.dot(-pred * grad, cav_vector) else: if logit_grad: directional_dir = cos_sim(grad, cav_vector) else: directional_dir = cos_sim(-grad, cav_vector) directional_dir_vals.append(directional_dir) cav_vector_vals.append(cav_vector) if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class) or not os.path.exists( cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad_vals.append(grad) if not os.path.exists(cav_dir + '/predict:' + target_class): class_pred.append(pred) #logit_grad = np.reshape(mymodel.get_logit_gradient(act,class_id,cav.bottleneck).squeeze(),-1) if logit_grad: if not os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): pickle_dump( grad_vals, cav_dir + '/logitgrad:' + bottleneck + ':' + target_class) else: if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): pickle_dump( grad_vals, cav_dir + '/grad:' + bottleneck + ':' + target_class) if not os.path.exists(cav_dir + '/predict:' + target_class): class_pred = mymodel.get_predictions(examples)[:, class_id] pickle_dump(class_pred, cav_dir + '/predict:' + target_class) return directional_dir_vals