class FeatureOperator: def __init__(self): if not os.path.exists(settings.OUTPUT_FOLDER): os.makedirs(os.path.join(settings.OUTPUT_FOLDER, 'image')) self.data = SegmentationData(settings.DATA_DIRECTORY, categories=settings.CATAGORIES) self.loader = SegmentationPrefetcher(self.data,categories=['image'],once=True,batch_size=settings.BATCH_SIZE) self.mean = [109.5388,118.6897,124.6901] def feature_extraction(self, model=None, memmap=True): loader = self.loader # extract the max value activaiton for each image maxfeatures = [None] * len(settings.FEATURE_NAMES) wholefeatures = [None] * len(settings.FEATURE_NAMES) features_size = [None] * len(settings.FEATURE_NAMES) features_size_file = os.path.join(settings.OUTPUT_FOLDER, "feature_size.npy") if memmap: skip = True mmap_files = [os.path.join(settings.OUTPUT_FOLDER, "%s.mmap" % feature_name) for feature_name in settings.FEATURE_NAMES] mmap_max_files = [os.path.join(settings.OUTPUT_FOLDER, "%s_max.mmap" % feature_name) for feature_name in settings.FEATURE_NAMES] if os.path.exists(features_size_file): features_size = np.load(features_size_file) else: skip = False for i, (mmap_file, mmap_max_file) in enumerate(zip(mmap_files,mmap_max_files)): if os.path.exists(mmap_file) and os.path.exists(mmap_max_file) and features_size[i] is not None: print('loading features %s' % settings.FEATURE_NAMES[i]) wholefeatures[i] = np.memmap(mmap_file, dtype=float,mode='r', shape=tuple(features_size[i])) maxfeatures[i] = np.memmap(mmap_max_file, dtype=float, mode='r', shape=tuple(features_size[i][:2])) else: print('file missing, loading from scratch') skip = False if skip: return wholefeatures, maxfeatures num_batches = (len(loader.indexes) + loader.batch_size - 1) / loader.batch_size for batch_idx,batch in enumerate(loader.tensor_batches(bgr_mean=self.mean)): del features_blobs[:] input = batch[0] batch_size = len(input) print('extracting feature from batch %d / %d' % (batch_idx+1, num_batches)) input = torch.from_numpy(input[:, ::-1, :, :].copy()) input.div_(255.0 * 0.224) if settings.GPU: input = input.cuda() input_var = V(input,volatile=True) logit = model.forward(input_var,0) while np.isnan(logit.data.max()): print("nan") #which I have no idea why it will happen del features_blobs[:] logit = model.forward(input_var,0) if maxfeatures[0] is None: # initialize the feature variable for i, feat_batch in enumerate(features_blobs): size_features = (len(loader.indexes), feat_batch.shape[1]) if memmap: maxfeatures[i] = np.memmap(mmap_max_files[i],dtype=float,mode='w+',shape=size_features) else: maxfeatures[i] = np.zeros(size_features) if len(feat_batch.shape) == 4 and wholefeatures[0] is None: # initialize the feature variable for i, feat_batch in enumerate(features_blobs): size_features = ( len(loader.indexes), feat_batch.shape[1], feat_batch.shape[2], feat_batch.shape[3]) features_size[i] = size_features if memmap: wholefeatures[i] = np.memmap(mmap_files[i], dtype=float, mode='w+', shape=size_features) else: wholefeatures[i] = np.zeros(size_features) np.save(features_size_file, features_size) start_idx = batch_idx*settings.BATCH_SIZE end_idx = min((batch_idx+1)*settings.BATCH_SIZE, len(loader.indexes)) for i, feat_batch in enumerate(features_blobs): if len(feat_batch.shape) == 4: wholefeatures[i][start_idx:end_idx] = feat_batch maxfeatures[i][start_idx:end_idx] = np.max(np.max(feat_batch,3),2) elif len(feat_batch.shape) == 3: maxfeatures[i][start_idx:end_idx] = np.max(feat_batch, 2) elif len(feat_batch.shape) == 2: maxfeatures[i][start_idx:end_idx] = feat_batch if len(feat_batch.shape) == 2: wholefeatures = maxfeatures return wholefeatures,maxfeatures def quantile_threshold(self, features, savepath=''): qtpath = os.path.join(settings.OUTPUT_FOLDER, savepath) if savepath and os.path.exists(qtpath): return np.load(qtpath) print("calculating quantile threshold") quant = vecquantile.QuantileVector(depth=features.shape[1], seed=1) start_time = time.time() last_batch_time = start_time batch_size = 64 for i in range(0, features.shape[0], batch_size): batch_time = time.time() rate = i / (batch_time - start_time + 1e-15) batch_rate = batch_size / (batch_time - last_batch_time + 1e-15) last_batch_time = batch_time print('Processing quantile index %d: %f %f' % (i, rate, batch_rate)) batch = features[i:i + batch_size] batch = np.transpose(batch, axes=(0, 2, 3, 1)).reshape(-1, features.shape[1]) quant.add(batch) ret = quant.readout(1000)[:, int(1000 * (1-settings.QUANTILE)-1)] if savepath: np.save(qtpath, ret) return ret # return np.percentile(features,100*(1 - settings.QUANTILE),axis=axis) @staticmethod def tally_job(args): features, data, threshold, tally_labels, tally_units, tally_units_cat, tally_both, start, end = args units = features.shape[1] size_RF = (settings.IMG_SIZE / features.shape[2], settings.IMG_SIZE / features.shape[3]) fieldmap = ((0, 0), size_RF, size_RF) pd = SegmentationPrefetcher(data, categories=data.category_names(), once=True, batch_size=settings.TALLY_BATCH_SIZE, ahead=settings.TALLY_AHEAD, start=start, end=end) count = start start_time = time.time() last_batch_time = start_time for batch in pd.batches(): batch_time = time.time() rate = (count - start) / (batch_time - start_time + 1e-15) batch_rate = len(batch) / (batch_time - last_batch_time + 1e-15) last_batch_time = batch_time print('labelprobe image index %d, items per sec %.4f, %.4f' % (count, rate, batch_rate)) for concept_map in batch: count += 1 img_index = concept_map['i'] scalars, pixels = [], [] for cat in data.category_names(): label_group = concept_map[cat] shape = np.shape(label_group) if len(shape) % 2 == 0: label_group = [label_group] if len(shape) < 2: scalars += label_group else: pixels.append(label_group) for scalar in scalars: tally_labels[scalar] += concept_map['sh'] * concept_map['sw'] if pixels: pixels = np.concatenate(pixels) tally_label = np.bincount(pixels.ravel()) if len(tally_label) > 0: tally_label[0] = 0 tally_labels[:len(tally_label)] += tally_label for unit_id in range(units): feature_map = features[img_index][unit_id] if feature_map.max() > threshold[unit_id]: mask = imresize(feature_map, (concept_map['sh'], concept_map['sw']), mode='F') #reduction = int(round(settings.IMG_SIZE / float(concept_map['sh']))) #mask = upsample.upsampleL(fieldmap, feature_map, shape=(concept_map['sh'], concept_map['sw']), reduction=reduction) indexes = np.argwhere(mask > threshold[unit_id]) tally_units[unit_id] += len(indexes) if len(pixels) > 0: tally_bt = np.bincount(pixels[:, indexes[:, 0], indexes[:, 1]].ravel()) if len(tally_bt) > 0: tally_bt[0] = 0 tally_cat = np.dot(tally_bt[None,:], data.labelcat[:len(tally_bt), :])[0] tally_both[unit_id,:len(tally_bt)] += tally_bt for scalar in scalars: tally_cat += data.labelcat[scalar] tally_both[unit_id, scalar] += len(indexes) tally_units_cat[unit_id] += len(indexes) * (tally_cat > 0) def tally(self, features, threshold, savepath=''): csvpath = os.path.join(settings.OUTPUT_FOLDER, savepath) if savepath and os.path.exists(csvpath): return load_csv(csvpath) units = features.shape[1] labels = len(self.data.label) categories = self.data.category_names() tally_both = np.zeros((units,labels),dtype=np.float64) tally_units = np.zeros(units,dtype=np.float64) tally_units_cat = np.zeros((units,len(categories)), dtype=np.float64) tally_labels = np.zeros(labels,dtype=np.float64) if settings.PARALLEL > 1: psize = int(np.ceil(float(self.data.size()) / settings.PARALLEL)) ranges = [(s, min(self.data.size(), s + psize)) for s in range(0, self.data.size(), psize) if s < self.data.size()] params = [(features, self.data, threshold, tally_labels, tally_units, tally_units_cat, tally_both) + r for r in ranges] threadpool = pool.ThreadPool(processes=settings.PARALLEL) threadpool.map(FeatureOperator.tally_job, params) else: FeatureOperator.tally_job((features, self.data, threshold, tally_labels, tally_units, tally_units_cat, tally_both, 0, self.data.size())) primary_categories = self.data.primary_categories_per_index() tally_units_cat = np.dot(tally_units_cat, self.data.labelcat.T) iou = tally_both / (tally_units_cat + tally_labels[np.newaxis,:] - tally_both + 1e-10) pciou = np.array([iou * (primary_categories[np.arange(iou.shape[1])] == ci)[np.newaxis, :] for ci in range(len(self.data.category_names()))]) label_pciou = pciou.argmax(axis=2) name_pciou = [ [self.data.name(None, j) for j in label_pciou[ci]] for ci in range(len(label_pciou))] score_pciou = pciou[ np.arange(pciou.shape[0])[:, np.newaxis], np.arange(pciou.shape[1])[np.newaxis, :], label_pciou] bestcat_pciou = score_pciou.argsort(axis=0)[::-1] ordering = score_pciou.max(axis=0).argsort()[::-1] rets = [None] * len(ordering) for i,unit in enumerate(ordering): # Top images are top[unit] bestcat = bestcat_pciou[0, unit] data = { 'unit': (unit + 1), 'category': categories[bestcat], 'label': name_pciou[bestcat][unit], 'score': score_pciou[bestcat][unit] } for ci, cat in enumerate(categories): label = label_pciou[ci][unit] data.update({ '%s-label' % cat: name_pciou[ci][unit], '%s-truth' % cat: tally_labels[label], '%s-activation' % cat: tally_units_cat[unit, label], '%s-intersect' % cat: tally_both[unit, label], '%s-iou' % cat: score_pciou[ci][unit] }) rets[i] = data if savepath: import csv csv_fields = sum([[ '%s-label' % cat, '%s-truth' % cat, '%s-activation' % cat, '%s-intersect' % cat, '%s-iou' % cat] for cat in categories], ['unit', 'category', 'label', 'score']) with open(csvpath, 'w') as f: writer = csv.DictWriter(f, csv_fields) writer.writeheader() for i in range(len(ordering)): writer.writerow(rets[i]) return rets
class FeatureOperator: def __init__(self): if not os.path.exists(settings.OUTPUT_FOLDER): os.makedirs(os.path.join(settings.OUTPUT_FOLDER, 'html', 'image')) os.makedirs(os.path.join(settings.OUTPUT_FOLDER, 'snapshot')) os.makedirs(os.path.join(settings.OUTPUT_FOLDER, 'sample_cache')) self.data = SegmentationData(settings.DATA_DIRECTORY, categories=settings.CATAGORIES) self.loader = SegmentationPrefetcher(self.data, categories=['image'], once=True, batch_size=settings.BATCH_SIZE) self.mean = [109.5388, 118.6897, 124.6901] def val(self, model): # val_loader = places365_imagenet_loader('val') import torch.nn as nn criterion = nn.CrossEntropyLoss().cuda() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for i, (input, target) in enumerate(val_loader): target = target.cuda() input = input.cuda() input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output fc_output = model(input_var) loss = criterion(fc_output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(fc_output.data, target, topk=(1, 5)) if torch.__version__.startswith('0.4'): losses.update(loss.item(), input.size(0)) else: losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) if i % 10 == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1, top5=top5)) val_acc = top1.avg print('VAL Prec@1 %.3f ' % (val_acc)) def feature_extraction(self, model=None, memmap=True): loader = self.loader # extract the max value activaiton for each image maxfeatures = [None] * len(settings.FEATURE_NAMES) wholefeatures = [None] * len(settings.FEATURE_NAMES) features_size = [None] * len(settings.FEATURE_NAMES) features_size_file = os.path.join(settings.OUTPUT_FOLDER, "feature_size.npy") if memmap: skip = True mmap_files = [ os.path.join(settings.OUTPUT_FOLDER, "%s.mmap" % feature_name) for feature_name in settings.FEATURE_NAMES ] mmap_max_files = [ os.path.join(settings.OUTPUT_FOLDER, "%s_max.mmap" % feature_name) for feature_name in settings.FEATURE_NAMES ] if os.path.exists(features_size_file): features_size = np.load(features_size_file) else: skip = False for i, (mmap_file, mmap_max_file) in enumerate(zip(mmap_files, mmap_max_files)): if os.path.exists(mmap_file) and os.path.exists( mmap_max_file) and features_size[i] is not None: print('loading features %s' % settings.FEATURE_NAMES[i]) wholefeatures[i] = np.memmap(mmap_file, dtype=np.float32, mode='r', shape=tuple(features_size[i])) maxfeatures[i] = np.memmap(mmap_max_file, dtype=np.float32, mode='r', shape=tuple( features_size[i][:2])) else: print('file missing, loading from scratch') skip = False if skip: return wholefeatures, maxfeatures num_batches = (len(loader.indexes) + loader.batch_size - 1) / loader.batch_size for batch_idx, batch in enumerate( loader.tensor_batches(bgr_mean=self.mean)): del features_blobs[:] print('extracting feature from batch %d / %d' % (batch_idx + 1, num_batches)) input = batch[0] if settings.CAFFE_MODEL: input = torch.from_numpy(input.copy()) else: input = torch.from_numpy(input[:, ::-1, :, :].copy()) input.div_(255.0 * 0.224) batch_size = len(input) if settings.GPU: input = input.cuda() input_var = V(input, volatile=True) if settings.APP == "classification": output = model.forward(input_var) else: output = model.cnn.forward(input_var) while np.isnan(output.data.max()): print("nan") #which I have no idea why it will happen del features_blobs[:] output = model.forward(input_var) if maxfeatures[0] is None: # initialize the feature variable for i, feat_batch in enumerate(features_blobs): size_features = (len(loader.indexes), feat_batch.shape[1]) if memmap: maxfeatures[i] = np.memmap(mmap_max_files[i], dtype=np.float32, mode='w+', shape=size_features) else: maxfeatures[i] = np.zeros(size_features) if len(feat_batch.shape) == 4 and wholefeatures[0] is None: # initialize the feature variable for i, feat_batch in enumerate(features_blobs): size_features = (len(loader.indexes), feat_batch.shape[1], feat_batch.shape[2], feat_batch.shape[3]) features_size[i] = size_features if memmap: wholefeatures[i] = np.memmap(mmap_files[i], dtype=np.float32, mode='w+', shape=size_features) else: wholefeatures[i] = np.zeros(size_features) np.save(features_size_file, features_size) start_idx = batch_idx * settings.BATCH_SIZE end_idx = min((batch_idx + 1) * settings.BATCH_SIZE, len(loader.indexes)) for i, feat_batch in enumerate(features_blobs): if len(feat_batch.shape) == 4: wholefeatures[i][start_idx:end_idx] = feat_batch maxfeatures[i][start_idx:end_idx] = np.max( np.max(feat_batch, 3), 2) elif len(feat_batch.shape) == 3: maxfeatures[i][start_idx:end_idx] = np.max(feat_batch, 2) elif len(feat_batch.shape) == 2: maxfeatures[i][start_idx:end_idx] = feat_batch if len(feat_batch.shape) == 2: wholefeatures = maxfeatures return wholefeatures, maxfeatures def vqa_feature_extraction( self, model, org_img, q, q_len, a, ): del features_blobs[:] img = np.array(org_img, dtype=np.float32) if (img.ndim == 2): img = np.repeat(img[:, :, None], 3, axis=2) img -= np.array(self.mean)[::-1] img = img.transpose((2, 0, 1)) if settings.CAFFE_MODEL: input = torch.from_numpy(img[None, ::-1, :, :].copy()) else: input = torch.from_numpy(img[None, :, :, :].copy()) input.div_(255.0 * 0.224) input_var = V(input, requires_grad=True) if settings.GPU: input_var = input_var.cuda() model.cnn.forward(input_var) img_feat = features_blobs[0] v = V(torch.from_numpy(img_feat).cuda(async=True), requires_grad=True) q = V(q.cuda(async=True), requires_grad=False) # a = V(a.cuda(async=True), requires_grad=False) q_len = V(q_len.cuda(async=True), requires_grad=False) out = model(v, q, q_len) val, ind = out.max(1) val.backward(torch.FloatTensor([1]).cuda()) img_grad = v.grad.data.cpu().numpy() return img_feat[0].transpose(1, 2, 0), img_grad[0].transpose(1, 2, 0), ind, None def imagecap_feature_extraction(self, model, org_img): del features_blobs[:] img = np.array(org_img, dtype=np.float32) if (img.ndim == 2): img = np.repeat(img[:, :, None], 3, axis=2) img -= np.array(self.mean)[::-1] img = img.transpose((2, 0, 1)) if settings.CAFFE_MODEL: input = torch.from_numpy(img[None, ::-1, :, :].copy()) else: input = torch.from_numpy(img[None, :, :, :].copy()) input.div_(255.0 * 0.224) input_var = V(input, requires_grad=True) if settings.GPU: input_var = input_var.cuda() model.cnn.forward(input_var) img_feat = features_blobs[0] v = V(torch.from_numpy(img_feat).cuda(async=True), requires_grad=True) sents, caps = model.generate(input_var) out, _ = model(input_var, sents[0]) sents = np.array(sents).ravel() if np.where(sents == 10002)[0].__len__() != 0: sents = sents[:np.where(sents == 10002)[0][0]] # words_ind = sents.argsort()[:-3:-1] w = sents.argmax() onehot = torch.zeros(out[0].size()) onehot[w] = 1 del grad_blobs[:] model.cnn.zero_grad() if settings.GPU: onehot = onehot.cuda() out[0].backward(onehot) img_feat = features_blobs[0][0].transpose(1, 2, 0) img_grad = grad_blobs[0][0].transpose(1, 2, 0) return img_feat, img_grad, w, sents def single_feature_extraction(self, model, org_img): del features_blobs[:] img = np.array(org_img, dtype=np.float32) if (img.ndim == 2): img = np.repeat(img[:, :, None], 3, axis=2) img -= np.array(self.mean)[::-1] img = img.transpose((2, 0, 1)) if settings.CAFFE_MODEL: input = torch.from_numpy(img[None, ::-1, :, :].copy()) else: input = torch.from_numpy(img[None, :, :, :].copy()) input.div_(255.0 * 0.224) input_var = V(input, requires_grad=True) if settings.GPU: input_var = input_var.cuda() out = model.forward(input_var) onehot = torch.zeros(out.size()) if settings.GPU: onehot = onehot.cuda() if torch.__version__.startswith('0.4'): onehot[0][out.max(1)[1].item()] = 1.0 else: onehot[0][out.max(1)[1].data[0]] = 1.0 if settings.DATASET == 'imagenet': if torch.__version__.startswith('0.4'): prediction = imagenet_categories[out.max(1)[1].item()] else: prediction = imagenet_categories[out.max(1)[1].data[0]] elif settings.DATASET == 'places365': if torch.__version__.startswith('0.4'): prediction = places365_categories[out.max(1)[1].item()] else: prediction = places365_categories[out.max(1)[1].data[0]] del grad_blobs[:] model.zero_grad() out.backward(onehot) img_feat = features_blobs[0][0].transpose(1, 2, 0) img_grad = grad_blobs[0][0].transpose(1, 2, 0) if torch.__version__.startswith('0.4'): return img_feat, img_grad, out.max(1)[1].item(), prediction else: return img_feat, img_grad, out.max(1)[1].data[0], prediction def weight_extraction(self, model, feat_clf): params = list(model.parameters()) if settings.GPU: weight_softmax = params[-2].data.cpu().numpy() weight_clf = feat_clf.fc.weight.data.cpu().numpy() else: weight_softmax = params[-2].data.numpy() weight_clf = feat_clf.fc.weight.data.numpy() # weight_label = np.maximum(weight_softmax, 0) # weight_concept = np.maximum(weight_clf, 0) weight_label = weight_softmax weight_concept = weight_clf weight_label = weight_label / np.linalg.norm(weight_label, axis=1)[:, None] weight_concept = weight_concept / np.linalg.norm(weight_concept, axis=1)[:, None] return weight_label, weight_concept def weight_decompose(self, model, feat_clf, feat_labels=None): weight_label, weight_concept = self.weight_extraction(model, feat_clf) filename = os.path.join(settings.OUTPUT_FOLDER, "decompose.npy") if os.path.exists(filename): rankings, errvar, coefficients, residuals_T = np.load(filename) else: rankings, errvar, coefficients, residuals = self.decompose_Gram_Schmidt( weight_concept, weight_label, prediction_ind=None, MAX=settings.BASIS_NUM) np.save(filename, (rankings, errvar, coefficients, residuals.T)) # for i in range(len(weight_label)): # residuals[i] = weight_label[i] - np.matmul(ws[i][None, :], weight_concept[rankings[i, :].astype(int)]) if settings.COMPRESSED_INDEX: try: feat_labels = [ feat_labels[concept] for concept in feat_clf.valid_concepts ] except Exception: feat_labels = [ feat_labels[concept] for concept in np.load('cache/valid_concept.npy') ] if settings.DATASET == "places365": model_labels = places365_categories elif settings.DATASET == "imagenet": model_labels = imagenet_categories for pi in range(len(rankings)): prediction = model_labels[pi] print(prediction, end=":\t") concept_inds = rankings[pi, :] for ci, concept_ind in enumerate(concept_inds): print("%s(%.2f) -> (%.2f)" % (feat_labels[concept_ind], coefficients[pi, ci], errvar[pi, ci]), end=",\t") print() def decompose_Gram_Schmidt(self, weight_concept, weight_label, prediction_ind=None, MAX=20): if prediction_ind is not None: if type(prediction_ind) == int: weight_label = weight_label[prediction_ind:prediction_ind + 1, :] else: weight_label = weight_label[prediction_ind, :] rankings = np.zeros((len(weight_label), MAX), dtype=np.int32) errvar = np.zeros((len(weight_label), MAX)) coefficients = np.zeros((len(weight_label), MAX + 2)) residuals = np.zeros((len(weight_label), weight_concept.shape[1])) for label_id in range(len(weight_label)): if len(weight_label) > 10: print("decomposing label %d" % label_id) qo = weight_label[label_id].copy() residual = weight_label[label_id].copy() ortho_concepts = [(i, qc) for i, qc in enumerate(weight_concept)] basis = np.zeros((weight_label.shape[1], MAX + 2)) for epoch in range(MAX): if MAX > 50: print("epoch (%d/%d)" % (epoch, MAX)) _, best_uc, best_index = max([(sum(uc * qo), uc, index) for index, uc in ortho_concepts]) residual -= best_uc * sum(best_uc * residual) basis[:, epoch] = weight_concept[best_index] rankings[label_id][epoch] = best_index errvar[label_id][epoch] = np.linalg.norm( residual )**2 #cosine_similarity(weight_label[label_id][None,:], (weight_label[label_id] - residual)[None,:])[:,0]# ortho_concepts = [ (i, (uc - best_uc * sum(best_uc * uc)) / np.linalg.norm( (uc - best_uc * sum(best_uc * uc)))) for i, uc in ortho_concepts if i != best_index ] positive_residual = np.maximum(residual, 0) negative_residual = -np.minimum(residual, 0) basis[:, MAX] = positive_residual / np.linalg.norm(positive_residual) basis[:, MAX + 1] = negative_residual / np.linalg.norm(negative_residual) residuals[label_id] = residual coefficients[label_id] = np.dot(np.linalg.pinv(basis), qo) return rankings, errvar, coefficients, residuals def decompose_cosine_similarity(self, weight_concept, weight_label, prediction_ind=None, MAX=7): if prediction_ind is not None: if type(prediction_ind) == int: weight_label = weight_label[prediction_ind:prediction_ind + 1, :] else: weight_label = weight_label[prediction_ind, :] X = cosine_similarity(weight_label, weight_concept) rankings = X.argsort(1)[:, :-MAX - 1:-1] scores = np.zeros((len(weight_label), MAX)) ws = np.zeros((len(weight_label), MAX)) for label_id in range(len(weight_label)): print("decomposing label %d" % label_id) for epoch in range(MAX): B = weight_label[label_id][None, :] A = weight_concept[rankings[label_id, :epoch + 1]] scores[label_id][epoch] = cosine_similarity( B, np.matmul(np.matmul(B, np.linalg.pinv(A)), A)) ws[label_id] = np.matmul(B, np.linalg.pinv(A)).ravel() return rankings, scores, ws def ranking_gradient( self, weight_concept, img_grad, ): # img_feat_resized_v = V(torch.FloatTensor(img_feat)) # concept_predicted = feat_clf.fc(img_feat_resized_v) # concept_grad = feat_clf.fc.weight[None, :, :] * ((F.sigmoid(concept_predicted)) * (1 - F.sigmoid(concept_predicted)))[:, :, None] X = cosine_similarity(img_grad.mean(0)[None, :], weight_concept)[0] return X def ranking_weight( self, weight_concept, weight_label, activation=None, prediction_ind=None, ): if activation is None: A = weight_concept B = weight_label[prediction_ind, :] X = np.matmul(B[None, :], A.T)[0] else: X = np.zeros((len(activation), len(weight_concept))) B = weight_label[prediction_ind, :] # * activation[i] for i in range(len(activation)): A = weight_concept * activation[i] # X[i] = cosine_similarity(B[None, :], A) X[i] = np.matmul(B[None, :], A.T) X = X.mean(0) return X def single_weight_synthesis(self, component_weights, target_weight): w = np.matmul(target_weight, np.linalg.pinv(component_weights)) # combination_score = cosine_similarity(target_weight[None, :], np.matmul(w, component_weights)[None, :]) combination_score = 1 - np.linalg.norm( target_weight - np.matmul(w, component_weights))**2 return w, combination_score # X = nn.Parameter(torch.randn((weight_softmax.shape[0], weight_clf.shape[0]))) # loss = nn.MSELoss() # opt = optim.Adam([X], lr=0.02) # W = V(torch.from_numpy(weight_concept)) # Y = V(torch.from_numpy(weight_label)) # if settings.GPU: # loss, X, W, Y = loss.cuda(), X.cuda(), W.cuda(), Y.cuda() # for i in range(5000): # err = loss(torch.matmul(F.leaky_relu(X, 0.005), W), Y) # print("epoch %02d: err %.8f" % (i, err.item())) # opt.zero_grad() # err.backward() # opt.step() # # X = X.data.numpy() def weight_retrieval(self, model, feat_clf, feat_labels=None): params = list(model.parameters()) if settings.GPU: weight_softmax = params[-2].data.cpu().numpy() weight_clf = feat_clf.fc.weight.data.cpu().numpy() else: weight_softmax = params[-2].data.numpy() weight_clf = feat_clf.fc.weight.data.numpy() weight_label = weight_softmax / np.linalg.norm(weight_softmax, axis=1)[:, None] weight_concept = weight_clf / np.linalg.norm(weight_clf, axis=1)[:, None] # weight_label = weight_softmax # weight_concept = weight_clf # A = np.maximum(weight_concept,0) # B = np.maximum(weight_label,0)[prediction_ind, :] A = weight_concept B = weight_label X = np.matmul(B, A.T) if settings.COMPRESSED_INDEX: feat_labels = [ feat_labels[concept] for concept in feat_clf.valid_concepts ] if settings.DATASET == "places365": model_labels = places365_categories elif settings.DATASET == "imagenet": model_labels = imagenet_categories mat_sort = X.argsort(1)[:, :-6:-1] # [:, :5]# for pi in range(len(X)): prediction = model_labels[pi] print(prediction, end=":\t") concept_inds = mat_sort[pi, :] for concept_ind in concept_inds: print("%s(%.2f)" % (feat_labels[concept_ind], X[pi, concept_ind]), end=",\t") print() def concept_indexmap(self, feat, feature_name, save=True): b, u, h, w = feat.shape print("generating concept index map ...") filename = os.path.join(settings.OUTPUT_FOLDER, "%s-concept-map.pickle" % feature_name) if os.path.exists(filename): with open(filename, 'rb') as f: return pickle.load(f) concept_indexes = [set() for i in range(len(self.data.label))] pd = SegmentationPrefetcher(self.data, categories=self.data.category_names(), once=True, batch_size=settings.TALLY_BATCH_SIZE, ahead=settings.TALLY_AHEAD) for batch in pd.batches(): for concept_map in batch: scalars, pixels = [], [] for cat in self.data.category_names(): label_group = concept_map[cat] shape = np.shape(label_group) if len(shape) % 2 == 0: label_group = [label_group] if len(shape) < 2: scalars += label_group else: pixels.append(label_group) for pixel in pixels: pixel = imresize(pixel[0], (h, w), interp='nearest', mode='F').astype(int) for hi in range(h): for wi in range(w): if pixel[hi, wi]: concept_indexes[pixel[hi, wi]].add( (concept_map['i'], hi, wi)) if save: with open(filename, 'wb') as f: pickle.dump(concept_indexes, f) return concept_indexes def embedding2d_feat(self, feat, feature_name, alg="se", save=True): filename = os.path.join(settings.OUTPUT_FOLDER, "%s-%s.pickle" % (feature_name, alg)) if os.path.exists(filename): return np.load(filename) b, u, h, w = feat.shape feat.transpose(0, 2, 3, 1) feat.shape = (b * w * h, u) if alg == 'se': feat_nse = SpectralEmbedding(n_components=2).fit_transform(feat) else: feat_nse = TSNE(n_components=2, verbose=2).fit_transform(feat) feat_nse.shape = (b, w, h, 2) if save: np.save(filename, feat_nse) return feat_nse def cluster(self, feat, feature_name, linkage='ward', save=True): filename = os.path.join(settings.OUTPUT_FOLDER, "%s-cluster.npy" % feature_name) if os.path.exists(filename): return np.load(filename) b, u, h, w = feat.shape feat.transpose(0, 2, 3, 1) feat.shape = (b * w * h, u) clustering = AgglomerativeClustering(linkage=linkage, n_clusters=10) clustering.fit(feat) if save: np.save(filename, (clustering.labels_, clustering.children_)) return clustering.labels_, clustering.children_ def instance_segment_by_id(self, feat, img_index, feat_clf): _, u, h, w = feat.shape img_feat = feat[img_index].transpose(1, 2, 0) img_feat.shape = (h * w, u) concept_predicted = feat_clf( V(torch.FloatTensor(img_feat), volatile=True)) concept_predicted = concept_predicted.data.numpy().reshape(h, w, -1) img = imread( os.path.join(settings.DATA_DIRECTORY, 'images', self.data.image[img_index]['image'])) imsave(os.path.join(settings.OUTPUT_FOLDER, 'original.jpg'), img) return concept_predicted.argmax(2) def cam_mat(self, mat, above_zero=False): if above_zero: mat = np.maximum(mat, 0) if len(mat.shape) == 3: mat = mat.sum(2) mat = mat - np.min(mat) mat = mat / np.max(mat) return mat def instance_segment_by_file(self, model, image_file, feat_clf): #feature extraction org_img = imread(image_file) org_img = imresize(org_img, (settings.IMG_SIZE, settings.IMG_SIZE)) if org_img.shape.__len__() == 2: org_img = org_img[:, :, None].repeat(3, axis=2) img_feat, img_grad, prediction_ind, prediction = self.single_feature_extraction( model, org_img) # feature classification h, w, u = img_feat.shape seg_resolution = h img_feat_resized = np.zeros((seg_resolution, seg_resolution, u)) for i in range(u): img_feat_resized[:, :, i] = imresize(img_feat[:, :, i], (seg_resolution, seg_resolution), mode="F") img_feat_resized.shape = (seg_resolution * seg_resolution, u) concept_predicted = feat_clf( V(torch.FloatTensor(img_feat_resized), volatile=True)) concept_predicted = concept_predicted.data.numpy().reshape( seg_resolution, seg_resolution, -1) concept_inds = concept_predicted.argmax(2) concept_colors = np.array(random_color( concept_predicted.shape[2])) * 256 # feature visualization vis_size = settings.IMG_SIZE * 2 cam_mat = self.cam_mat(img_feat * img_grad, above_zero=True) cam_mask = 255 * imresize(cam_mat, (settings.IMG_SIZE, settings.IMG_SIZE), mode="F") cam_mask = cv2.applyColorMap(np.uint8(cam_mask), cv2.COLORMAP_JET)[:, :, ::-1] vis_cam = cam_mask * 0.5 + org_img * 0.5 vis_cam = Image.fromarray(vis_cam.astype(np.uint8)) vis_cam = vis_cam.resize((vis_size, vis_size), resample=Image.BILINEAR) seg_mask = imresize(concept_colors[concept_inds], (settings.IMG_SIZE, settings.IMG_SIZE), interp='nearest', mode="RGB") vis_seg = seg_mask * 0.7 + org_img * 0.3 vis_seg = Image.fromarray(vis_seg.astype(np.uint8)) vis_seg = vis_seg.resize((vis_size, vis_size), resample=Image.NEAREST) label_seg(vis_seg, vis_size / h, self.data.label, concept_inds, cam=cam_mat) vis_img = Image.fromarray(org_img).resize((vis_size, vis_size), resample=Image.BILINEAR) vis = imconcat([vis_img, vis_cam, vis_seg], vis_size, vis_size) return vis, prediction # Y, X = np.meshgrid(np.arange(h), np.arange(w)) # concept_value = concept_predicted[X, Y, concept_inds] # concept_scores = np.exp(concept_value) / np.sum(np.exp(concept_predicted), 2) # mask = np.concatenate([concept_colors[concept_inds], concept_scores[:, :, None] * 256], 2) def instance_cam_by_file(self, model, image_file, feat_clf, other_params=None, fig_style=0): # feature extraction org_img = imread(image_file) org_img = imresize(org_img, (settings.IMG_SIZE, settings.IMG_SIZE)) if org_img.shape.__len__() == 2: org_img = org_img[:, :, None].repeat(3, axis=2) if settings.APP == "vqa": img_feat, img_grad, prediction_ind, _ = self.vqa_feature_extraction( model, org_img, *other_params) prediction = prediction_ind elif settings.APP == "imagecap": img_feat, img_grad, prediction_ind, prediction = self.imagecap_feature_extraction( model, org_img) prediction = (np.array(model.vocab)[prediction], prediction_ind) else: img_feat, img_grad, prediction_ind, prediction = self.single_feature_extraction( model, org_img) if settings.COMPRESSED_INDEX: try: labels = [ self.data.label[concept] for concept in feat_clf.valid_concepts ] except Exception: labels = [ self.data.label[concept] for concept in np.load('cache/valid_concept.npy') ] else: labels = self.data.label h, w, u = img_feat.shape # feature classification seg_resolution = settings.SEG_RESOLUTION img_feat_resized = np.zeros((seg_resolution, seg_resolution, u)) for i in range(u): img_feat_resized[:, :, i] = imresize(img_feat[:, :, i], (seg_resolution, seg_resolution), mode="F") img_feat_resized.shape = (seg_resolution * seg_resolution, u) concept_predicted = feat_clf.fc(V(torch.FloatTensor(img_feat_resized))) concept_predicted = concept_predicted.data.numpy().reshape( seg_resolution, seg_resolution, -1) # concept_predicted_reg = (concept_predicted - np.min(concept_predicted, 2, keepdims=True)) / np.max( # concept_predicted, 2, keepdims=True) concept_inds = concept_predicted.argmax(2) concept_colors = np.array(random_color( concept_predicted.shape[2])) * 256 # feature visualization vis_size = settings.IMG_SIZE margin = int(vis_size / 30) img_cam = self.cam_mat(img_feat * img_grad.mean((0, 1))[None, None, :], above_zero=False) img_camp = self.cam_mat(img_feat * img_grad, above_zero=True) vis_cam = vis_cam_mask(img_cam, org_img, vis_size) vis_camp = vis_cam_mask(img_camp, org_img, vis_size) CONCEPT_CAM_TOPN = settings.BASIS_NUM CONCEPT_CAM_BOTTOMN = 0 if settings.GRAD_CAM: weight_clf = feat_clf.fc.weight.data.numpy() weight_concept = weight_clf #np.maximum(weight_clf, 0) weight_concept = weight_concept / np.linalg.norm(weight_concept, axis=1)[:, None] # ranking = self.ranking_gradient(weight_concept, img_grad.reshape(-1, u)) # component_weights = weight_concept[ranking.argsort()[:-5 - 1:-1], :] target_weight = img_grad.mean((0, 1)) target_weight = target_weight / np.linalg.norm(target_weight) # w, combination_score = self.single_weight_synthesis(component_weights, target_weight) rankings, scores, coefficients, residuals = self.decompose_Gram_Schmidt( weight_concept, target_weight[None, :], MAX=settings.BASIS_NUM) ranking = rankings[0] residual = residuals[0] d_e = np.linalg.norm(residuals[0])**2 component_weights = np.vstack([ coefficients[0][:settings.BASIS_NUM, None] * weight_concept[ranking], residual[None, :] ]) a = img_feat.mean((0, 1)) a /= np.linalg.norm(a) qcas = np.dot(component_weights, a) combination_score = sum(abs(qcas)) inds = qcas[:-1].argsort()[:-CONCEPT_CAM_TOPN - 1:-1] concept_masks_ind = ranking[inds] scores_topn = coefficients[0][inds] contribution = qcas[inds] else: # activation=img_feat[(img_cam > 0.6).nonzero()] weight_label, weight_concept = self.weight_extraction( model, feat_clf) # ranking = 1 / (1 + np.exp(-np.matmul(weight_concept, img_feat.reshape(-1,u).transpose()).max(1))) # ranking = self.ranking_weight(weight_concept, weight_label, prediction_ind=prediction_ind) # component_weights = weight_concept[ranking.argsort()[:-5 - 1:-1], :] # concept_masks_ind = ranking.argsort()[:-5 - 1:-1] # scores_topn = ranking[concept_masks_ind] rankings, errvar, coefficients, residuals_T = np.load( os.path.join(settings.OUTPUT_FOLDER, "decompose.npy")) ranking = rankings[prediction_ind].astype(int) residual = residuals_T.T[prediction_ind] d_e = np.linalg.norm(residual)**2 component_weights = np.vstack([ coefficients[prediction_ind][:settings.BASIS_NUM, None] * weight_concept[ranking], residual[None, :] ]) a = img_feat.mean((0, 1)) a /= np.linalg.norm(a) qcas = np.dot(component_weights, a) combination_score = sum(qcas) inds = qcas[:-1].argsort()[:-CONCEPT_CAM_TOPN - 1:-1] concept_masks_ind = ranking[inds] scores_topn = coefficients[prediction_ind][inds] contribution = qcas[inds] # target_weight = img_feat.mean((0, 1))# * weight_label[prediction_ind, :] # target_weight /= np.linalg.norm(target_weight) # # component_weights = img_feat.mean((0, 1))[None,: ] * weight_concept # # component_weights /= np.linalg.norm(component_weights, axis=1)[:, None] # rankings, scores, ws, residuals = self.decompose_Gram_Schmidt(weight_concept, target_weight[None, :]) # concept_masks_ind = rankings.ravel() # scores_topn = scores.ravel() # w = ws.ravel() # combination_score = scores_topn[-1] concept_masks = concept_predicted[:, :, concept_masks_ind] concept_masks = concept_masks * ((scores_topn > 0) * 1)[None, None, :] concept_masks = (np.maximum(concept_masks, 0)) / np.max(concept_masks) vis_concept_cam = [] # acc = np.memmap(os.path.join(settings.OUTPUT_FOLDER, "mAP_table.mmap"), dtype=np.float16, mode='r', shape=(660,15))[:, 6][concept_masks_ind] # captions = [labels[concept_masks_ind[i]]['name'] + "(%.3f)" % (scores_topn[i]) for i in range(CONCEPT_CAM_TOPN+CONCEPT_CAM_BOTTOMN)] for i in range(CONCEPT_CAM_TOPN + CONCEPT_CAM_BOTTOMN): vis_concept_cam.append( vis_cam_mask(concept_masks[:, :, i], org_img, vis_size, font_text=None)) # vis_concept_cam.append(vis_cam_mask(self.cam_mat(np.dot(img_feat, residual)) * 0.8, org_img, vis_size, font_text=None)) # test = np.matmul(score_mat[prediction_ind], concept_predicted.reshape(w * h, -1).T).reshape(h, w) # vis_concept_cam.append(vis_cam_mask(self.cam_mat(test, above_zero=True), org_img, vis_size)) # seg_mask = imresize(concept_colors[concept_inds], (settings.IMG_SIZE, settings.IMG_SIZE), interp='nearest', mode="RGB") # vis_seg = seg_mask * 0.7 + org_img * 0.3 # vis_seg = Image.fromarray(vis_seg.astype(np.uint8)) # vis_seg = vis_seg.resize((vis_size, vis_size), resample=Image.NEAREST) # label_seg(vis_seg, vis_size, labels, concept_inds, cam=img_cam) if fig_style == 0: vis_img = Image.fromarray(org_img).resize((vis_size, vis_size), resample=Image.BILINEAR) vis = imconcat([vis_img, vis_cam, vis_camp] + vis_concept_cam, vis_size, vis_size, margin=margin) captions = [ "{%s}: s(%.2f)->%4.2f%%" % (labels[concept_masks_ind[i]]['name'], scores_topn[i], contribution[i] * 100 / combination_score) for i in range(CONCEPT_CAM_TOPN + CONCEPT_CAM_BOTTOMN) ] captions = [ "score {%.2f} residual {de %.2f/(%4.2f%%)}" % (combination_score, d_e, qcas[-1] * 100 / combination_score), "CAM", "CAM+" ] + captions # captions = ["%.2f * {%s}" % (w[i], captions[i]) for i in range(len(captions))] # captions = ["original image", "CAM or grad CAM", "VIS+", "score {%.2f}" % (combination_score)] + captions vis_headline = headline(captions, vis_size, vis.height // 4, vis.width, margin=margin) vis = imstack([vis_headline, vis]) elif fig_style == 1: vis_img = Image.fromarray(org_img).resize((vis_size, vis_size), resample=Image.BILINEAR) vis_bm = big_margin(vis_size) vis = imconcat([vis_img, vis_cam, vis_bm] + vis_concept_cam[:3], vis_size, vis_size, margin=margin) captions = [ "%s(%4.2f%%)" % (labels[concept_masks_ind[i]]['name'], contribution[i] * 100 / combination_score) for i in range(3) ] captions = ["%s(%.2f) " % (prediction, combination_score)] + captions vis_headline = headline2(captions, vis_size, vis.height // 5, vis.width, margin=margin) vis = imstack([vis_headline, vis]) return vis, prediction