def get_true_cav_mean(path, bottleneck, concept): cnt = 0 for cav_dir in os.listdir(path): if cav_dir.split(':')[0] != 'cav-true': continue t, en, c, b = cav_dir.split(':') if b == bottleneck and c == concept: if cnt == 0: cav_values = np.array(pickle_load(path + '/' + cav_dir)) else: cav_values += np.array(pickle_load(path + '/' + cav_dir)) cnt += 1 cav_values /= cnt return cav_values
def load_cav(cav_path): save_dict = pickle_load(cav_path) cav = CAV(save_dict['concepts'], save_dict['bottleneck'], save_dict['hparams'], save_dict['saved_path']) cav.accuracies = save_dict['accuracies'] cav.cavs = save_dict['cavs'] return cav
def get_grad(path, bottleneck, target): for dir in os.listdir(path): if dir.split(':')[0] != 'grad': continue _, b, t = dir.split(':') if b == bottleneck and t == target: values = np.array(pickle_load(path + '/' + dir)) return values
def get_predict(path, target): for dir in os.listdir(path): if dir.split(':')[0] != 'predict': continue _, t = dir.split(':') if t == target: values = np.array(pickle_load(path + '/' + dir)) return values
def get_cav_mean(path, bottleneck, concept): cnt = 0 for cav_dir in os.listdir(path): if cav_dir.split(':')[0] != 'cav': continue t, c, nc, b = cav_dir.split(':') b = b.split('.')[0] if b == bottleneck and c == concept: if cnt == 0: cav_values = np.array( pickle_load(path + '/' + cav_dir)['cavs'][0]) else: cav_values += np.array( pickle_load(path + '/' + cav_dir)['cavs'][0]) cnt += 1 cav_values /= cnt return cav_values
def get_random_cav(path, bottleneck): cav_lst = [] for cav_dir in os.listdir(path): if cav_dir.split(':')[0] != 'cav': continue t, c, nc, b = cav_dir.split(':') b = b.split('.')[0] if b == bottleneck and 'random500_' in c: cav_values = np.array(pickle_load(path + '/' + cav_dir)['cavs'][0]) cav_lst.append(cav_values) return cav_lst
def process_and_load_activations(self, bottleneck_names, concepts): acts = {} if self.acts_dir and not tf.io.gfile.exists(self.acts_dir): tf.io.gfile.makedirs(self.acts_dir) for concept in concepts: if concept not in acts: acts[concept] = {} for bottleneck_name in bottleneck_names: acts_path = os.path.join( self.acts_dir, 'acts_{}_{}'.format( concept, bottleneck_name)) if self.acts_dir else None if acts_path and os.path.exists(acts_path): # if acts_path and tf.io.gfile.exists(acts_path): # with tf.io.gfile.GFile(acts_path, 'rb') as f: # acts[concept][bottleneck_name] = np.load( # f, allow_pickle=True).squeeze() try: acts[concept][bottleneck_name] = pickle_load( acts_path).squeeze() tf.logging.debug('Loaded ' + acts_path) except: tf.logging.info( 'Fail loading Activation. Now calculating...') acts[concept][ bottleneck_name] = self.get_activations_for_concept( concept, bottleneck_name) else: acts[concept][ bottleneck_name] = self.get_activations_for_concept( concept, bottleneck_name) if acts_path: tf.logging.info( '{} does not exist, Making one...'.format( acts_path)) # tf.io.gfile.mkdir(os.path.dirname(acts_path)) # with tf.io.gfile.GFile(acts_path, 'w') as f: # np.save(f, acts[concept][bottleneck_name], allow_pickle=False) pickle_dump(acts[concept][bottleneck_name], acts_path) return acts
path = root_dir + 'log/2layers-colored-mnist-number_10/random100' target = 'mnist-6' #bottleneck = 'conv1' # activation_path = path + '/activations/' # cav_path = path + '/cavs/' tcav_path = path + '/tcavs/' keyword = 'grad_nomalize' results_path = os.listdir(tcav_path) all_results = [] non_dup_results = [] for result in results_path: if keyword in result: continue positive_num = int( re.match(r'.+:.+:.+:random500_(\d+)_random500_(\d+).*', result).group(1)) negative_num = int( re.match(r'.+:.+:.+:random500_(\d+)_random500_(\d+).*', result).group(2)) if result.split(':')[1] == target: all_results.append(pickle_load(tcav_path + result)) if positive_num < negative_num: non_dup_results.append(pickle_load(tcav_path + result)) print(len(all_results)) print(len(non_dup_results)) pickle_dump(all_results, path + '/' + target + '_results_all') pickle_dump(non_dup_results, path + '/' + target + '_results_non_dup')
def compute_tcav_score(mymodel, target_class, concept, cav, class_acts, examples, acts, bottleneck, activation_generator, cav_dir, true_cav=False, logit_grad=False, run_parallel=True, num_workers=20): """Compute TCAV score. Args: mymodel: a model class instance target_class: one target class concept: one concept cav: an instance of cav class_acts: activations of the examples in the target class where examples[i] corresponds to class_acts[i] examples: an array of examples of the target class where examples[i] corresponds to class_acts[i] run_parallel: run this parallel fashion num_workers: number of workers if we run in parallel. Returns: TCAV score (i.e., ratio of pictures that returns negative dot product wrt loss). """ # load grad if logit_grad: if os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad_vals = pickle_load(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class) else: grad_vals = [] else: if os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): grad_vals = pickle_load(cav_dir + '/grad:' + bottleneck + ':' + target_class) else: grad_vals = [] count = 0 class_id = mymodel.label_to_id(target_class) if run_parallel: pool = multiprocessing.Pool(num_workers) directions = pool.map( lambda i: TCAV.get_direction_dir_sign( mymodel, np.expand_dims(class_acts[i], 0), cav, concept, class_id, examples[i]), range(len(class_acts))) return sum(directions) / float(len(class_acts)) else: for i in range(len(class_acts)): act = np.expand_dims(class_acts[i], 0) if len(act.shape) == 3: act = np.expand_dims(act, 3) example = examples[i] if true_cav: if TCAV.get_direction_dir_sign_true_cav( mymodel, act, cav, concept, class_id, example, acts, bottleneck, activation_generator, cav_dir, i): count += 1 else: if TCAV.get_direction_dir_sign(mymodel, act, cav, concept, class_id, example, grad_vals, i, logit_grad): count += 1 return float(count) / float(len(class_acts))
def get_directional_dir_plus(mymodel, target_class, concept, cav, class_acts, examples, cav_dir, project_name, bottleneck, negative_concept, acts, activation_generator, true_cav, logit_grad, grad_nomalize, make_random): class_id = mymodel.label_to_id(target_class) directional_dir_vals = [] cav_vector_vals = [] if logit_grad: if os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad_vals = pickle_load(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class) else: grad_vals = [] else: if os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): grad_vals = pickle_load(cav_dir + '/grad:' + bottleneck + ':' + target_class) else: grad_vals = [] if os.path.exists(cav_dir + '/predict:' + target_class): class_pred = pickle_load(cav_dir + '/predict:' + target_class) else: class_pred = [] for i in range(len(class_acts)): act = np.expand_dims(class_acts[i], 0) if len(act.shape) == 3: act = np.expand_dims(act, 3) example = examples[i] if logit_grad: if not os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad = np.reshape( mymodel.get_gradient(act, [class_id], cav.bottleneck, example), -1) else: grad = grad_vals[i] else: if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): grad = np.reshape( mymodel.get_gradient(act, [class_id], cav.bottleneck, example), -1) else: grad = grad_vals[i] if not os.path.exists(cav_dir + '/predict:' + target_class): pred = mymodel.get_predictions(np.expand_dims(example, 0))[:, class_id] else: pred = class_pred[i] if true_cav: #真の方向ベクトルを取得 mean_concept = np.mean(acts[concept][bottleneck], 0) act_example = activation_generator.get_activations_for_examples( np.expand_dims(example, 0), bottleneck) cav_vector = np.reshape(mean_concept - act_example, -1) directional_dir = np.dot(grad, cav_vector) else: cav_vector = cav.get_direction(concept) if grad_nomalize == False: if logit_grad: directional_dir = np.dot(grad, cav_vector) else: directional_dir = np.dot(-pred * grad, cav_vector) else: if logit_grad: directional_dir = cos_sim(grad, cav_vector) else: directional_dir = cos_sim(-grad, cav_vector) directional_dir_vals.append(directional_dir) cav_vector_vals.append(cav_vector) if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class) or not os.path.exists( cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): grad_vals.append(grad) if not os.path.exists(cav_dir + '/predict:' + target_class): class_pred.append(pred) #logit_grad = np.reshape(mymodel.get_logit_gradient(act,class_id,cav.bottleneck).squeeze(),-1) if logit_grad: if not os.path.exists(cav_dir + '/logitgrad:' + bottleneck + ':' + target_class): pickle_dump( grad_vals, cav_dir + '/logitgrad:' + bottleneck + ':' + target_class) else: if not os.path.exists(cav_dir + '/grad:' + bottleneck + ':' + target_class): pickle_dump( grad_vals, cav_dir + '/grad:' + bottleneck + ':' + target_class) if not os.path.exists(cav_dir + '/predict:' + target_class): class_pred = mymodel.get_predictions(examples)[:, class_id] pickle_dump(class_pred, cav_dir + '/predict:' + target_class) return directional_dir_vals