def create_image_dataset_label_index(directory, batch_size=64, ahead=16): ed = expdir.ExperimentDirectory(directory) info = ed.load_info() ds = loadseg.SegmentationData(info.dataset) categories = ds.category_names() shape = (ds.size(), len(DATASETS), len(ds.label)) index = np.zeros(shape) pf = loadseg.SegmentationPrefetcher(ds, categories=categories, once=True, batch_size=batch_size, ahead=ahead, thread=True) batch_count = 0 for batch in pf.batches(): if batch_count % 100 == 0: print('Processing batch %d ...' % batch_count) for rec in batch: dataset_index = get_dataset_index(rec['fn']) image_index = rec['i'] for cat in categories: if ((type(rec[cat]) is np.ndarray and rec[cat].size > 0) or type(rec[cat]) is list and len(rec[cat]) > 0): index[image_index][dataset_index][np.unique( rec[cat])] = True batch_count += 1 mmap = ed.open_mmap(part='image_dataset_label', mode='w+', dtype=bool, shape=shape) mmap[:] = index[:] ed.finish_mmap(mmap) print('Finished and saved at %s' % ed.mmap_filename(part='image_dataset_label'))
def compute_alpha(directory): ed = expdir.ExperimentDirectory(directory) info = ed.load_info() ds = loadseg.SegmentationData(info.dataset) L = ds.label_size() if not has_image_to_label(directory): create_image_to_label(directory) image_to_label = load_image_to_label(directory) label_names = np.array([ds.label[i]['name'] for i in range(L)]) alphas = np.zeros((L, )) for label_i in range(1, L): label_categories = ds.label[label_i]['category'].keys() label_idx = np.where(image_to_label[:, label_i])[0] train_loader = loadseg.SegmentationPrefetcher( ds, categories=label_categories, split='train', indexes=label_idx, once=True, batch_size=64, ahead=4, thread=True) train_idx = np.array(train_loader.indexes) #sw = 0 #sh = 0 perc_label = [] #train_label_categories = [] for batch in train_loader.batches(): for rec in batch: sw, sh = [rec[k] for k in ['sw', 'sh']] #sw_r, sh_r = [rec[k] for k in ['sw', 'sh']] #if sw == 0 and sh == 0: # sw = sw_r # sh = sh_r #else: # assert(sw == sw_r and sh == sh_r) for cat in label_categories: if rec[cat] != []: #train_label_categories.append(cat) if type(rec[cat]) is np.ndarray: perc_label.append( np.sum(rec[cat] == label_i) / float(sw * sh)) else: perc_label.append(1.) break assert (len(perc_label) == len(train_idx)) alphas[label_i] = float(1. - np.mean(perc_label)) print label_i, label_names[label_i], alphas[label_i] train_loader.close() alphas_mmap = ed.open_mmap(part='train_alphas', mode='w+', dtype='float32', shape=alphas.shape) alphas_mmap[:] = alphas[:] ed.finish_mmap(alphas_mmap)
def create_image_to_label(directory, batch_size=16, ahead=4): ed = expdir.ExperimentDirectory(directory) info = ed.load_info() print info.dataset if 'broden' in info.dataset: ds = loadseg.SegmentationData(info.dataset) categories = ds.category_names() shape = (ds.size(), len(ds.label)) pf = loadseg.SegmentationPrefetcher(ds, categories=categories, once=True, batch_size=batch_size, ahead=ahead, thread=False) image_to_label = np.zeros(shape, dtype='int32') batch_count = 0 for batch in pf.batches(): if batch_count % 100 == 0: print('Processing batch %d ...' % batch_count) for rec in batch: image_index = rec['i'] for cat in categories: if ((type(rec[cat]) is np.ndarray and rec[cat].size > 0) or type(rec[cat]) is list and len(rec[cat]) > 0): image_to_label[image_index][np.unique(rec[cat])] = True batch_count += 1 elif 'imagenet' in info.dataset or 'ILSVRC' in info.dataset: classes, class_to_idx = find_classes(info.dataset) imgs = make_dataset(info.dataset, class_to_idx) _, labels = zip(*imgs) labels = np.array(labels) L = 1000 shape = (len(labels), L) image_to_label = np.zeros(shape) for i in range(L): image_to_label[labels == i, i] = 1 else: assert (False) mmap = ed.open_mmap(part='image_to_label', mode='w+', dtype=bool, shape=shape) mmap[:] = image_to_label[:] ed.finish_mmap(mmap) f = ed.mmap_filename(part='image_to_label') print('Finished and saved index_to_label at %s' % f)
def extract_concept_data(directory, batch_size=64, ahead=16, verbose=True): ed = expdir.ExperimentDirectory(directory) if ed.has_mmap(part='concept_data'): print('%s already has %s, so skipping' % (directory, ed.mmap_filename(part='concept_data'))) return info = ed.load_info() (sh, sw) = get_seg_size(info.input_dim) ds = loadseg.SegmentationData(info.dataset) categories = np.array(ds.category_names()) L = ds.label_size() N = ds.size() pf = loadseg.SegmentationPrefetcher(ds, categories=categories, once=True, batch_size=batch_size, ahead=ahead, thread=True) if verbose: print 'Creating new mmap at %s' % ed.mmap_filename(part='concept_data') data = ed.open_mmap(part='concept_data', mode='w+', shape=(N, L, sh, sw)) start_time = time.time() last_batch_time = start_time index = 0 for batch in pf.batches(): batch_time = time.time() rate = index / (batch_time - start_time + 1e-15) batch_rate = batch_size / (batch_time - last_batch_time + 1e-15) last_batch_time = batch_time if verbose: print 'extract_concept_data index %d/%d (%.2f)\titems per sec %.2f\t%.2f' % ( index, N, index / float(N), batch_rate, rate) for rec in batch: for cat in categories: if len(rec[cat]) == 0: continue if cat == 'texture' or cat == 'scene': for i in range(len(rec[cat])): data[index][rec[cat][i] - 1, :, :] = 1 else: for i in range(len(rec[cat])): ys, xs = np.where(rec[cat][i]) for j in range(len(xs)): data[index][rec[cat][i][ys[j]][xs[j]] - 1][ys[j]][xs[j]] = 1 index += 1 assert index == N, ("Data source should return every item once %d %d." % (index, N)) if verbose: print 'Renaming mmap.' ed.finish_mmap(data)
def BrodenDataGenerator(): split = "train" batch_size = 8 randomize = True while True: datasource = loadseg.SegmentationData(TEST_DIR, categories=categories) prefetcher = loadseg.SegmentationPrefetcher(datasource, split=split, categories=['image'] + categories, segmentation_shape=None, batch_size=batch_size, randomize=randomize, ahead=12) batch = prefetcher.fetch_batch() yield batch, None
def process_data(fn_t, fn_read, shape, tally_depth, ds, iw, ih, categories, fieldmap, thresh, labelcat, batch_size, ahead, verbose, thread, start, end): unit_size = len(thresh) blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape) count_t = cached_memmap(fn_t, mode='r+', dtype='int32', shape=(ds.size(), tally_depth, 3)) count_t[...] = 0 # The main loop if verbose: print 'Beginning work for evaluating', blob pf = loadseg.SegmentationPrefetcher(ds, categories=categories, start=start, end=end, once=True, batch_size=batch_size, ahead=ahead, thread=False) index = start start_time = time.time() last_batch_time = start_time batch_size = 0 for batch in pf.batches(): batch_time = time.time() rate = (index - start) / (batch_time - start_time + 1e-15) batch_rate = batch_size / (batch_time - last_batch_time + 1e-15) last_batch_time = batch_time if verbose: print 'labelprobe index', index, 'items per sec', batch_rate, rate sys.stdout.flush() for rec in batch: sw, sh = [rec[k] for k in ['sw', 'sh']] reduction = int(round(iw / float(sw))) up = upsample.upsampleL(fieldmap, blobdata[index], shape=(sh, sw), reduction=reduction) mask = up > thresh accumulate_counts(mask, [rec[cat] for cat in categories], count_t[index], unit_size, labelcat) index += 1 batch_size = len(batch) count_t.flush()
def __init__(self): """ Setup data layer according to parameters: - mean: tuple of mean values to subtract - randomize: load in random order (default: True) - seed: seed for randomization (default: None / current time) """ # config # params = eval(self.param_str) self.directory = TEST_DIR # Really should be from param_str self.split = params['split'] # I have not implemented splits yet self.mean = numpy.array(params['mean']) self.random = params.get('randomize', True) self.random_flip = True #params.get('randomize', True) self.seed = params.get('seed', None) self.batch_size = params.get('batch_size', 1) self.disp = 0 self.categories = ['object', 'part', 'texture', 'material', 'color'] # self.categories = ['object'] self.categories_num_class = [584, 234, 47, 32, 11] self.segmentation_shape = params.get('segmentation_shape', None) self.splitmap = {'train': 1, 'val': 2} # Convert to 2-dimensional shape. if self.segmentation_shape and len(numpy.shape( self.segmentation_shape)) == 0: self.segmentation_shape = (self.segmentation_shape, ) * 2 # Specific object classes to ignore. self.blacklist = { #'object': [1,2] # wall, floor, ceiling, sky: in uniseg: 4 become tree!! } # Thresholds to ignore: these classes and any ones rarer (higher). self.outliers = { 'object': 537, # brick occurs only 9 times in test_384. #'part': 155, # porch occurs only 9 times in test_384 # if switching to uniseg, switch 561->544. # because there are fewer object classes. # part classes remain the same. } # make eval deterministic if 'train' not in self.split: self.random = False self.random_flip = False # Load up metadata for images and labels self.datasource = loadseg.SegmentationData(self.directory, categories=self.categories) self.prefetcher = loadseg.SegmentationPrefetcher( self.datasource, split=self.split, categories=['image'] + self.categories, segmentation_shape=self.segmentation_shape, batch_size=self.batch_size, randomize=self.random) # ahead=12) # Now make a blacklist map for blacklisted types self.zeromap = {} for k, z in self.blacklist.items(): self.zeromap[k] = numpy.arange(self.datasource.label_size(k)) self.zeromap[k][z] = 0 for k, z in self.outliers.items(): if k not in self.zeromap: self.zeromap[k] = numpy.arange(self.datasource.label_size(k)) self.zeromap[k][numpy.arange(z, self.datasource.label_size(k))] = 0 # Build our category map which merges the category map and the zeromap self.categorymap = {} for cat in self.categories: catmap = self.datasource.category_index_map(cat) if cat in self.zeromap: catmap = self.zeromap[cat][catmap] self.categorymap[cat] = catmap
def linear_probe(directory, blob, label_i, suffix='', init_suffix='', num_filters=None, batch_size=16, ahead=4, quantile=0.005, bias=False, positive=False, num_epochs=30, lr=1e-4, momentum=0.9, l1_weight_decay=0, l2_weight_decay=0, validation=False, nesterov=False, lower_bound=None, min_train=None, max_train=None, max_val=None, cuda=False): # Make sure we have a directory to work in #qcode = ('%f' % quantile).replace('0.','').rstrip('0') ed = expdir.ExperimentDirectory(directory) # Check if linear weights have already been learned print ed.mmap_filename(blob=blob, part='label_i_%s_weights%s' % (label_i, suffix)) if ed.has_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix)): print('%s already has %s, so skipping.' % (directory, ed.mmap_filename(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix)))) return # Load probe metadata info = ed.load_info() ih, iw = info.input_dim # Load blob metadata blob_info = ed.load_info(blob=blob) shape = blob_info.shape unit_size = shape[1] fieldmap = blob_info.fieldmap # Load the blob quantile data and grab thresholds if quantile == 1: thresh = np.zeros((unit_size,1,1)) else: quantdata = ed.open_mmap(blob=blob, part='quant-*', shape=(unit_size, -1)) threshold = quantdata[:, int(round(quantdata.shape[1] * quantile))] thresh = threshold[:, np.newaxis, np.newaxis] #print np.max(thresh), thresh.shape, type(thresh) # Map the blob activation data for reading fn_read = ed.mmap_filename(blob=blob) # Load the dataset ds = loadseg.SegmentationData(info.dataset) # Get all the categories the label is a part of label_categories = ds.label[label_i]['category'].keys() num_categories = len(label_categories) # Get label name label_name = ds.name(category=None, j=label_i) blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape) # Get indices of images containing the given label if not has_image_to_label(directory): print('image_to_label does not exist in %s; creating it now...' % directory) create_image_to_label(directory, batch_size=batch_size, ahead=ahead) image_to_label = load_image_to_label(directory) label_idx = np.where(image_to_label[:, label_i])[0] train_idx = np.array([i for i in label_idx if ds.split(i) == 'train']) val_idx = np.array([i for i in label_idx if ds.split(i) == 'val']) if min_train is not None and len(train_idx) < min_train: print('Number of training examples for label %d (%s) is %d, which is less than the minimum of %d so skipping.' % (label_i, label_name, len(train_idx), min_train)) if max_train is not None and len(train_idx) > max_train: train_idx = train_idx[:max_train] if max_val is not None and len(val_idx) > max_val: val_idx = val_idx[:max_val] print('Total number of images containing label %d (%s): %d' % ( label_i, label_name, len(label_idx))) try: train_loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories, indexes=train_idx, once=False, batch_size=batch_size, ahead=ahead, thread=True) except IndexError as err: print(err.args) return sw = 0 sh = 0 perc_label = [] train_label_categories = [] for batch in train_loader.batches(): for rec in batch: # Check that the same segmentation dimensions are used for all # examples sw_r, sh_r = [rec[k] for k in ['sw', 'sh']] if sw == 0 and sh == 0: sw = sw_r sh = sh_r else: assert(sw == sw_r and sh == sh_r) for cat in label_categories: if rec[cat] != []: train_label_categories.append(cat) if type(rec[cat]) is np.ndarray: perc_label.append(np.sum(rec[cat] == label_i) / float(sw * sh)) else: perc_label.append(1.) break assert(len(perc_label) == len(train_idx)) # Compute reduction from segmentation dimensions to image dimensions reduction = int(round(iw / float(sw))) # Calculate class-weighting alpha parameter for segmentation loss # (Note: float typecast is necessary) alpha = float(1. - np.mean(perc_label)) if alpha == 0: alpha = None print('Not using class-weighting because no pixel-level annotations') else: print('Alpha for label %d (%s): %f' % (label_i, label_name, alpha)) # Prepare segmentation loss function using class-weight alpha criterion = lambda x,y: BCELoss2d(x,y,alpha) # Prepare to learn linear weights with a sigmoid activation after # the linear layer #layer = CustomLayer(unit_size, upsample=False, act=True, positive=False) if num_filters is not None: if ed.has_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, init_suffix)): init_weights_mmap = ed.open_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, init_suffix), mode='r', dtype='float32', shape=unit_size) elif ed.has_mmap(blob=blob, part='linear_weights%s' % (init_suffix)): all_weights_mmap = ed.open_mmap(blob=blob, part='linear_weights%s' % init_suffix, mode='r', dtype='float32', shape=(ds.label_size(),unit_size)) init_weights_mmap = all_weights_mmap[label_i] else: assert(False) sorted_idx = np.argsort(np.abs(init_weights_mmap))[::-1] mask_idx = np.zeros(unit_size, dtype=int) mask_idx[sorted_idx[:num_filters]] = 1 layer = CustomLayer(unit_size, upsample=True, up_size=(sh,sw), act=True, bias=bias, positive=positive, mask_idx=torch.ByteTensor(mask_idx), cuda=cuda) else: layer = CustomLayer(unit_size, upsample=True, up_size=(sh,sw), act=True, bias=bias, positive=positive, cuda=cuda) if cuda: layer.cuda() optimizer = Custom_SGD(layer.parameters(), lr, momentum, l1_weight_decay=l1_weight_decay, l2_weight_decay=l2_weight_decay, nesterov=nesterov, lower_bound=lower_bound) if not validation: try: val_loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories, indexes=val_idx, once=False, batch_size=batch_size, ahead=ahead, thread=True) except IndexError as err: print(err.args) train_loader.close() return val_label_categories = [] for batch in val_loader.batches(): for rec in batch: for cat in label_categories: if rec[cat] != []: val_label_categories.append(cat) break assert(len(val_label_categories) == len(val_idx)) for t in range(num_epochs): (_, iou) = run_epoch(blobdata, train_idx, train_label_categories, label_i, fieldmap, thresh, sh, sw, reduction, train_loader, layer, criterion, optimizer, t+1, train=True, cuda=cuda, iou_threshold=0.5) if not validation: (_, iou) = run_epoch(blobdata, val_idx, val_label_categories, label_i, fieldmap, thresh, sh, sw, reduction, val_loader, layer, criterion, optimizer, t+1, train=False, cuda=cuda, iou_threshold=0.5) # Close segmentation prefetcher (i.e. close pools) train_loader.close() if not validation: val_loader.close() # Save weights weights = (layer.mask * layer.weight).data.cpu().numpy() weights_mmap = ed.open_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix), mode='w+', dtype='float32', shape=weights.shape) weights_mmap[:] = weights[:] ed.finish_mmap(weights_mmap) if bias: bias_v = layer.bias.data.cpu().numpy() bias_mmap = ed.open_mmap(blob=blob, part='label_i_%d_bias%s' % (label_i, suffix), mode='w+', dtype='float32', shape=(1,)) bias_mmap[:] = bias_v[:] ed.finish_mmap(bias_mmap) print '%s finished' % ed.mmap_filename(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix))
def probe_linear(directory, blob, suffix='', start=None, end=None, batch_size=16, ahead=4, quantile=0.005, bias=False, positive=False, cuda=False, force=False): qcode = ('%f' % quantile).replace('0.', '.').rstrip('0') ed = expdir.ExperimentDirectory(directory) if (ed.has_mmap(blob=blob, part='linear_ind_ious%s' % suffix) and ed.has_mmap(blob=blob, part='linear_set_ious%s' % suffix)): print('Linear weights have already been probed.') print ed.mmap_filename(blob=blob, part='linear_set_val_ious%s' % suffix) if not force: return else: print('Forcefully continuing...') info = ed.load_info() seg_size = get_seg_size(info.input_dim) blob_info = ed.load_info(blob=blob) ds = loadseg.SegmentationData(info.dataset) shape = blob_info.shape N = shape[0] # number of total images K = shape[1] # number of units in given blob L = ds.label_size() # number of labels if quantile == 1: thresh = np.zeros((K, 1, 1)) else: quantdata = ed.open_mmap(blob=blob, part='quant-*', shape=(K, -1)) threshold = quantdata[:, int(round(quantdata.shape[1] * quantile))] thresh = threshold[:, np.newaxis, np.newaxis] fn_read = ed.mmap_filename(blob=blob) blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape) image_to_label = load_image_to_label(directory) if ed.has_mmap(blob=blob, part='linear_ind_ious%s' % suffix, inc=True) and not force: assert (ed.has_mmap(blob=blob, part='linear_set_ious%s' % suffix, inc=True)) ind_ious = ed.open_mmap(blob=blob, part='linear_ind_ious%s' % suffix, mode='r+', inc=True, dtype='float32', shape=(L, N)) set_ious = ed.open_mmap(blob=blob, part='linear_set_ious%s' % suffix, mode='r+', inc=True, dtype='float32', shape=(L, )) set_ious_train = ed.open_mmap(blob=blob, part='linear_set_train_ious%s' % suffix, mode='r+', inc=True, dtype='float32', shape=(L, )) try: set_ious_val = ed.open_mmap(blob=blob, part='linear_set_val_ious%s' % suffix, mode='r+', inc=True, dtype='float32', shape=(L, )) except: set_ious_val = ed.open_mmap(blob=blob, part='linear_set_val_ious%s' % suffix, mode='r+', dtype='float32', shape=(L, )) else: ind_ious = ed.open_mmap(blob=blob, part='linear_ind_ious%s' % suffix, mode='w+', dtype='float32', shape=(L, N)) set_ious = ed.open_mmap(blob=blob, part='linear_set_ious%s' % suffix, mode='w+', dtype='float32', shape=(L, )) set_ious_train = ed.open_mmap(blob=blob, part='linear_set_train_ious%s' % suffix, mode='w+', dtype='float32', shape=(L, )) set_ious_val = ed.open_mmap(blob=blob, part='linear_set_val_ious%s' % suffix, mode='w+', dtype='float32', shape=(L, )) if start is None: start = 1 if end is None: end = L for label_i in range(start, end): if ed.has_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix)): try: weights = ed.open_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix), mode='r', dtype='float32', shape=(K, )) except ValueError: # SUPPORTING LEGACY CODE (TODO: Remove) weights = ed.open_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix), mode='r', dtype=float, shape=(K, )) elif ed.has_mmap(blob=blob, part='linear_weights%s' % suffix): all_weights = ed.open_mmap(blob=blob, part='linear_weights%s' % suffix, mode='r', dtype='float32', shape=(L, K)) weights = all_weights[label_i] if not np.any(weights): print( 'Label %d does not have associated weights to it, so skipping.' % label_i) continue else: print( 'Label %d does not have associated weights to it, so skipping.' % label_i) continue if bias: if ed.has_mmap(blob=blob, part='label_i_%d_bias%s' % (label_i, suffix)): bias_v = ed.open_mmap(blob=blob, part='label_i_%d_bias%s' % (label_i, suffix), mode='r', dtype='float32', shape=(1, )) else: assert (ed.has_mmap(blob=blob, part='linear_bias%s' % suffix)) all_bias_v = ed.open_mmap(blob=blob, part='linear_bias%s' % suffix, mode='r', dtype='float32', shape=(L, )) bias_v = np.array([all_bias_v[label_i]]) label_categories = ds.label[label_i]['category'].keys() label_name = ds.name(category=None, j=label_i) label_idx = np.where(image_to_label[:, label_i])[0] loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories, indexes=label_idx, once=True, batch_size=batch_size, ahead=ahead, thread=True) loader_idx = loader.indexes num_imgs = len(loader.indexes) print( 'Probing with learned weights for label %d (%s) with %d images...' % (label_i, label_name, num_imgs)) model = CustomLayer(K, upsample=True, up_size=seg_size, act=True, bias=bias, positive=positive, cuda=cuda) model.weight.data[...] = torch.Tensor(weights) if bias: model.bias.data[...] = torch.Tensor(bias_v) if cuda: model.cuda() model.eval() iou_intersects = np.zeros(num_imgs) iou_unions = np.zeros(num_imgs) i = 0 for batch in loader.batches(): start_t = time.time() if (i + 1) * batch_size < num_imgs: idx = range(i * batch_size, (i + 1) * batch_size) else: idx = range(i * batch_size, num_imgs) i += 1 input = torch.Tensor( (blobdata[loader_idx[idx]] > thresh).astype(float)) input_var = (Variable(input.cuda(), volatile=True) if cuda else Variable(input, volatile=True)) target_ = [] for rec in batch: for cat in label_categories: if rec[cat] != []: if type(rec[cat]) is np.ndarray: target_.append( np.max((rec[cat] == label_i).astype(float), axis=0)) else: target_.append(np.ones(seg_size)) break target = torch.Tensor(target_) target_var = (Variable(target.cuda(), volatile=True) if cuda else Variable(target, volatile=True)) #target_var = Variable(target.unsqueeze(1).expand_as( # input_var).cuda() if cuda # else target.unsqueeze(1).expand_as(input_var)) output_var = model(input_var) iou_intersects[idx] = np.squeeze( iou_intersect_d(output_var, target_var).data.cpu().numpy()) iou_unions[idx] = np.squeeze( iou_union_d(output_var, target_var).data.cpu().numpy()) print('Batch: %d/%d\tTime: %f secs\tAvg Ind IOU: %f' % (i, num_imgs / batch_size, time.time() - start_t, np.mean( np.true_divide(iou_intersects[idx], iou_unions[idx] + 1e-20)))) loader.close() label_ind_ious = np.true_divide(iou_intersects, iou_unions + 1e-20) label_set_iou = np.true_divide(np.sum(iou_intersects), np.sum(iou_unions) + 1e-20) ind_ious[label_i, loader_idx] = label_ind_ious set_ious[label_i] = label_set_iou train_idx = [ i for i in range(len(loader_idx)) if ds.split(loader_idx[i]) == 'train' ] val_idx = [ i for i in range(len(loader_idx)) if ds.split(loader_idx[i]) == 'val' ] set_ious_train[label_i] = np.true_divide( np.sum(iou_intersects[train_idx]), np.sum(iou_unions[train_idx]) + 1e-20) set_ious_val[label_i] = np.true_divide( np.sum(iou_intersects[val_idx]), np.sum(iou_unions[val_idx]) + 1e-20) print( 'Label %d (%s) Set IOU: %f, Train Set IOU: %f, Val Set IOU: %f, Max Ind IOU: %f' % (label_i, label_name, label_set_iou, set_ious_train[label_i], set_ious_val[label_i], np.max(label_ind_ious))) ed.finish_mmap(ind_ious) ed.finish_mmap(set_ious) ed.finish_mmap(set_ious_train) ed.finish_mmap(set_ious_val)
def label_probe(directory, blob, quantile=0.005, batch_size=16, ahead=4, start=None, end=None, suffix='', cuda=False): # Make sure we have a directory to work in qcode = ('%f' % quantile).replace('0.','').rstrip('0') ed = expdir.ExperimentDirectory(directory) # Check if label probe has already been created if (ed.has_mmap(blob=blob, part='single_set_ious%s' % suffix) and ed.has_mmap(blob=blob, part='single_ind_ious%s' % suffix)): print('label_probe_pytorch.py has already been run.') return # Load probe metadata info = ed.load_info() seg_size = get_seg_size(info.input_dim) # Load blob metadata blob_info = ed.load_info(blob=blob) shape = blob_info.shape tot_imgs = shape[0] unit_size = shape[1] # Load the blob quantile data and grab thresholds quantdata = ed.open_mmap(blob=blob, part='quant-*', shape=(unit_size, -1)) threshold = quantdata[:, int(round(quantdata.shape[1] * quantile))] thresh = threshold[:, np.newaxis, np.newaxis] # Load the dataset ds = loadseg.SegmentationData(info.dataset) # Map the blob activation data for reading #fn_read = ed.mmap_filename(blob=blob) #blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape) blobdata = ed.open_mmap(blob=blob, mode='r', shape=shape) # Get image-to-labels mapping if not has_image_to_label(directory): print('image_to_label does not exist in %s; creating it now...' % directory) create_image_to_label(directory, batch_size=batch_size, ahead=ahead) image_to_label = load_image_to_label(directory) num_labels = ds.label_size() upsample = nn.Upsample(size=seg_size, mode='bilinear') set_ious_train_mmap = ed.open_mmap(blob=blob, part='single_set_train_ious%s' % suffix, mode='w+', dtype='float32', shape=(num_labels, unit_size)) set_ious_val_mmap = ed.open_mmap(blob=blob, part='single_set_val_ious%s' % suffix, mode='w+', dtype='float32', shape=(num_labels, unit_size)) set_ious_mmap = ed.open_mmap(blob=blob, part='single_set_ious%s' % suffix, mode='w+', dtype='float32', shape=(num_labels, unit_size)) ind_ious_mmap = ed.open_mmap(blob=blob, part='single_ind_ious%s' % suffix, mode='w+', dtype='float32', shape=(num_labels, tot_imgs, unit_size)) if start is None: start = 1 if end is None: end = num_labels #for label_i in range(1, num_labels): for label_i in range(start, end): print('Starting for label %d (%s)' % (label_i, ds.name(category=None, j=label_i))) label_categories = ds.label[label_i]['category'].keys() num_cats = len(label_categories) label_idx = np.where(image_to_label[:, label_i])[0] loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories, indexes=label_idx, once=False, batch_size=batch_size, ahead=ahead, thread=True) loader_idx = loader.indexes N = len(loader_idx) iou_intersects = np.zeros((N, unit_size)) iou_unions = np.zeros((N, unit_size)) if num_cats > 1: rec_labcat = [] for batch in loader.batches(): for rec in batch: for cat in label_categories: if rec[cat] != []: rec_labcat.append(cat) break else: rec_labcat = [label_categories[0] for i in range(N)] i = 0 for batch in loader.batches(): start_t = time.time() if (i+1)*batch_size < N: idx = range(i*batch_size, (i+1)*batch_size) else: idx = range(i*batch_size, N) i += 1 input = torch.Tensor((blobdata[loader_idx[idx]] > thresh).astype(float)) input_var = upsample(Variable(input.cuda()) if cuda else Variable(input)) target = torch.Tensor([np.max((rec[rec_labcat[j]] == label_i).astype(float), axis=0) if type(rec[rec_labcat[j]]) is np.ndarray else np.ones(seg_size) for j, rec in enumerate(batch)]) target_var = Variable(target.unsqueeze(1).expand_as( input_var).cuda() if cuda else target.unsqueeze(1).expand_as(input_var)) iou_intersects[idx] = np.squeeze(iou_intersect_d(input_var, target_var).data.cpu().numpy()) iou_unions[idx] = np.squeeze(iou_union_d(input_var, target_var).data.cpu().numpy()) print('Batch %d/%d\tTime %f secs\tAvg Ind IOU %f\t' % (i, N/batch_size, time.time()-start_t, np.mean(np.true_divide(iou_intersects[idx], iou_unions[idx] + 1e-20)))) set_ious = np.true_divide(np.sum(iou_intersects, axis=0), np.sum(iou_unions, axis=0) + 1e-20) loader.close() best_filter = np.argmax(set_ious) print('Label %d (%s): best set IOU = %f (filter %d)' % (label_i, ds.name(category=None,j=label_i), set_ious[best_filter], best_filter)) ind_ious = np.true_divide(iou_intersects, iou_unions + 1e-20) set_ious_mmap[label_i] = set_ious ind_ious_mmap[label_i, loader_idx] = ind_ious train_idx = [i for i in range(len(loader_idx)) if ds.split(loader_idx[i]) == 'train'] val_idx = [i for i in range(len(loader_idx)) if ds.split(loader_idx[i]) == 'val'] set_ious_train_mmap[label_i] = np.true_divide(np.sum(iou_intersects[train_idx], axis=0), np.sum(iou_unions[train_idx], axis=0) + 1e-20) set_ious_val_mmap[label_i] = np.true_divide(np.sum(iou_intersects[val_idx], axis=0), np.sum(iou_unions[val_idx], axis=0) + 1e-20) #set_ious_mmap.flush() #ind_ious_mmap.flush() ed.finish_mmap(set_ious_train_mmap) ed.finish_mmap(set_ious_val_mmap) ed.finish_mmap(set_ious_mmap) ed.finish_mmap(ind_ious_mmap)