def __read_file(self, data_dir, dataset): img_list = list() mlabel_list = list() img_dict = dict() all_img_list = [] with open(self.configer.get('data.{}_label_path'.format(dataset)), 'r') as file_stream: all_img_list += file_stream.readlines() if dataset == 'train' and self.configer.get('data.include_val', default=False): with open(self.configer.get('data.val_label_path'), 'r') as file_stream: all_img_list += file_stream.readlines() for line_cnt in range(len(all_img_list)): line_items = all_img_list[line_cnt].strip().split() if len(line_items) == 0: continue path = line_items[0] if not os.path.exists(os.path.join( data_dir, path)) or not ImageHelper.is_img(path): Log.warn('Invalid Image Path: {}'.format( os.path.join(data_dir, path))) continue img_list.append(os.path.join(data_dir, path)) mlabel_list.append([int(item) for item in line_items[1:]]) assert len(img_list) > 0 Log.info('Length of {} imgs is {}...'.format(dataset, len(img_list))) return img_list, mlabel_list
def sscrop_test(self, inputs, crop_size, scale=1): ''' Currently, sscrop_test does not support diverse_size testing ''' n, c, ori_h, ori_w = inputs.size(0), inputs.size(1), inputs.size(2), inputs.size(3) scaled_inputs = F.interpolate(inputs, size=(int(ori_h*scale), int(ori_w*scale)), mode="bilinear", align_corners=True) n, c, h, w = scaled_inputs.size(0), scaled_inputs.size(1), scaled_inputs.size(2), scaled_inputs.size(3) full_probs = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0) count_predictions = torch.cuda.FloatTensor(n, self.configer.get('data', 'num_classes'), h, w).fill_(0) crop_counter = 0 height_starts = self._decide_intersection(h, crop_size[0]) width_starts = self._decide_intersection(w, crop_size[1]) for height in height_starts: for width in width_starts: crop_inputs = scaled_inputs[:, :, height:height+crop_size[0], width:width + crop_size[1]] prediction = self.ss_test(crop_inputs) count_predictions[:, :, height:height+crop_size[0], width:width + crop_size[1]] += 1 full_probs[:, :, height:height+crop_size[0], width:width + crop_size[1]] += prediction crop_counter += 1 Log.info('predicting {:d}-th crop'.format(crop_counter)) full_probs /= count_predictions full_probs = F.interpolate(full_probs, size=(ori_h, ori_w), mode='bilinear', align_corners=True) return full_probs
def get_testloader(self, dataset=None): dataset = 'test' if dataset is None else dataset if self.configer.exists('data', 'use_sw_offset') or self.configer.exists('data', 'pred_sw_offset'): Log.info('use sliding window based offset loader for test ...') test_loader = data.DataLoader( SWOffsetTestLoader(root_dir=self.configer.get('data', 'data_dir'), dataset=dataset, img_transform=self.img_transform, configer=self.configer), batch_size=self.configer.get('test', 'batch_size'), pin_memory=True, num_workers=self.configer.get('data', 'workers'), shuffle=False, collate_fn=lambda *args: collate( *args, trans_dict=self.configer.get('test', 'data_transformer') ) ) return test_loader elif self.configer.get('method') == 'fcn_segmentor': Log.info('use CSDataTestLoader for test ...') test_loader = data.DataLoader( CSDataTestLoader(root_dir=self.configer.get('data', 'data_dir'), dataset=dataset, img_transform=self.img_transform, configer=self.configer), batch_size=self.configer.get('test', 'batch_size'), pin_memory=True, num_workers=self.configer.get('data', 'workers'), shuffle=False, collate_fn=lambda *args: collate( *args, trans_dict=self.configer.get('test', 'data_transformer') ) ) return test_loader
def load_net(self, net): if self.configer.get('gpu') is not None: net = self._make_parallel(net) net = net.to(torch.device('cpu' if self.configer.get('gpu') is None else 'cuda')) net.float() if self.configer.get('network', 'resume') is not None: Log.info('Loading checkpoint from {}...'.format(self.configer.get('network', 'resume'))) resume_dict = torch.load(self.configer.get('network', 'resume')) if 'state_dict' in resume_dict: checkpoint_dict = resume_dict['state_dict'] elif 'model' in resume_dict: checkpoint_dict = resume_dict['model'] elif isinstance(resume_dict, OrderedDict): checkpoint_dict = resume_dict else: raise RuntimeError( 'No state_dict found in checkpoint file {}'.format(self.configer.get('network', 'resume'))) if list(checkpoint_dict.keys())[0].startswith('module.'): checkpoint_dict = {k[7:]: v for k, v in checkpoint_dict.items()} # load state_dict if hasattr(net, 'module'): self.load_state_dict(net.module, checkpoint_dict, self.configer.get('network', 'resume_strict')) else: self.load_state_dict(net, checkpoint_dict, self.configer.get('network', 'resume_strict')) if self.configer.get('network', 'resume_continue'): self.configer.resume(resume_dict['config_dict']) return net
def _relabel(self): label_id = 0 label_dict = dict() old_label_path = self.configer.get('data', 'label_path') new_label_path = '{}_new'.format(self.configer.get('data', 'label_path')) self.configer.update('data.label_path', new_label_path) fw = open(new_label_path, 'w') check_valid_dict = dict() with open(old_label_path, 'r') as fr: for line in fr.readlines(): line_items = line.strip().split() if not os.path.exists(os.path.join(self.configer.get('data', 'data_dir'), line_items[0])): continue if line_items[1] not in label_dict: label_dict[line_items[1]] = label_id label_id += 1 if line_items[0] in check_valid_dict: Log.error('Duplicate Error: {}'.format(line_items[0])) exit() check_valid_dict[line_items[0]] = 1 fw.write('{} {}\n'.format(line_items[0], label_dict[line_items[1]])) fw.close() shutil.copy(self.configer.get('data', 'label_path'), os.path.join(self.configer.get('data', 'merge_dir'), 'ori_label.txt')) self.configer.update(('data.num_classes'), [label_id]) Log.info('Num Classes is {}...'.format(self.configer.get('data', 'num_classes')))
def get_valloader(self, dataset=None): dataset = 'val' if dataset is None else dataset if self.configer.exists('data', 'use_dt_offset') or self.configer.exists('data', 'pred_dt_offset'): """ dt-offset manner: load both the ground-truth label and offset (based on distance transform). """ Log.info('use distance transform based offset loader for val ...') klass = DTOffsetLoader elif self.configer.get('method') == 'fcn_segmentor': """ default manner: load the ground-truth label. """ Log.info('use DefaultLoader for val ...') klass = DefaultLoader else: Log.error('Method: {} loader is invalid.'.format(self.configer.get('method'))) return None loader, sampler = self.get_dataloader_sampler(klass, 'val', dataset) valloader = data.DataLoader( loader, sampler=sampler, batch_size=self.configer.get('val', 'batch_size') // get_world_size(), pin_memory=True, num_workers=self.configer.get('data', 'workers'), shuffle=False, collate_fn=lambda *args: collate( *args, trans_dict=self.configer.get('val', 'data_transformer') ) ) return valloader
def save_net(runner, net, performance=None, val_loss=None, iters=None, epoch=None, postfix='latest'): state = { 'config_dict': runner.configer.to_dict(), 'state_dict': net.state_dict(), 'runner_state': runner.runner_state } checkpoints_dir = os.path.join(runner.configer.get('project_dir'), runner.configer.get('network', 'checkpoints_dir')) if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) latest_name = '{}_{}.pth'.format(runner.configer.get('network', 'checkpoints_name'), postfix) torch.save(state, os.path.join(checkpoints_dir, latest_name)) Log.info('save model {}'.format(os.path.join(checkpoints_dir, latest_name))) if performance is not None: if performance > runner.runner_state['max_performance']: latest_name = '{}_max_performance.pth'.format(runner.configer.get('network', 'checkpoints_name')) torch.save(state, os.path.join(checkpoints_dir, latest_name)) runner.runner_state['max_performance'] = performance if val_loss is not None: if val_loss < runner.runner_state['min_val_loss']: latest_name = '{}_min_loss.pth'.format(runner.configer.get('network', 'checkpoints_name')) torch.save(state, os.path.join(checkpoints_dir, latest_name)) runner.runner_state['min_val_loss'] = val_loss if iters is not None: latest_name = '{}_iters{}.pth'.format(runner.configer.get('network', 'checkpoints_name'), iters) torch.save(state, os.path.join(checkpoints_dir, latest_name)) if epoch is not None: latest_name = '{}_epoch{}.pth'.format(runner.configer.get('network', 'checkpoints_name'), epoch) torch.save(state, os.path.join(checkpoints_dir, latest_name))
def get_valloader(self, dataset=None): dataset = 'val' if dataset is None else dataset if self.configer.get('method') == 'fcn_segmentor': """ default manner: load the ground-truth label. """ Log.info('use DefaultLoader for val ...') valloader = data.DataLoader( DefaultLoader(root_dir=self.configer.get('data', 'data_dir'), dataset=dataset, aug_transform=self.aug_val_transform, img_transform=self.img_transform, label_transform=self.label_transform, configer=self.configer), batch_size=self.configer.get('val', 'batch_size'), pin_memory=True, num_workers=self.configer.get('data', 'workers'), shuffle=False, collate_fn=lambda *args: collate( *args, trans_dict=self.configer.get('val', 'data_transformer'))) return valloader else: Log.error('Method: {} loader is invalid.'.format( self.configer.get('method'))) return None
def _hard_anchor_sampling(self, X, y_hat, y): batch_size, feat_dim = X.shape[0], X.shape[-1] classes = [] total_classes = 0 for ii in range(batch_size): this_y = y_hat[ii] this_classes = torch.unique(this_y) this_classes = [x for x in this_classes if x > 0 and x != self.ignore_label] this_classes = [x for x in this_classes if (this_y == x).nonzero().shape[0] > self.max_views] classes.append(this_classes) total_classes += len(this_classes) if total_classes == 0: return None, None n_view = self.max_samples // total_classes n_view = min(n_view, self.max_views) X_ = torch.zeros((total_classes, n_view, feat_dim), dtype=torch.float).cuda() y_ = torch.zeros(total_classes, dtype=torch.float).cuda() X_ptr = 0 for ii in range(batch_size): this_y_hat = y_hat[ii] this_y = y[ii] this_classes = classes[ii] for cls_id in this_classes: hard_indices = ((this_y_hat == cls_id) & (this_y != cls_id)).nonzero() easy_indices = ((this_y_hat == cls_id) & (this_y == cls_id)).nonzero() num_hard = hard_indices.shape[0] num_easy = easy_indices.shape[0] if num_hard >= n_view / 2 and num_easy >= n_view / 2: num_hard_keep = n_view // 2 num_easy_keep = n_view - num_hard_keep elif num_hard >= n_view / 2: num_easy_keep = num_easy num_hard_keep = n_view - num_easy_keep elif num_easy >= n_view / 2: num_hard_keep = num_hard num_easy_keep = n_view - num_hard_keep else: Log.info('this shoud be never touched! {} {} {}'.format(num_hard, num_easy, n_view)) raise Exception perm = torch.randperm(num_hard) hard_indices = hard_indices[perm[:num_hard_keep]] perm = torch.randperm(num_easy) easy_indices = easy_indices[perm[:num_easy_keep]] indices = torch.cat((hard_indices, easy_indices), dim=0) X_[X_ptr, :, :] = X[ii, indices, :].squeeze(1) y_[X_ptr] = cls_id X_ptr += 1 return X_, y_
def load_net(runner, net, model_path=None): if model_path is not None or runner.configer.get('network', 'resume') is not None: resume_path = runner.configer.get('network', 'resume') resume_path = model_path if model_path is not None else resume_path if not os.path.exists(resume_path): Log.warn('Resume path: {} not exists...'.format(resume_path)) return net Log.info('Resuming from {}'.format(resume_path)) resume_dict = torch.load(resume_path, map_location="cpu") if 'state_dict' in resume_dict: checkpoint_dict = resume_dict['state_dict'] elif 'model' in resume_dict: checkpoint_dict = resume_dict['model'] elif isinstance(resume_dict, OrderedDict): checkpoint_dict = resume_dict else: raise RuntimeError( 'No state_dict found in checkpoint file {}'.format(runner.configer.get('network', 'resume'))) # load state_dict if hasattr(net, 'module'): RunnerHelper.load_state_dict(net.module, checkpoint_dict, runner.configer.get('network', 'resume_strict')) else: RunnerHelper.load_state_dict(net, checkpoint_dict, runner.configer.get('network', 'resume_strict')) if runner.configer.get('network', 'resume_continue'): runner.runner_state = resume_dict['runner_state'] return net
def get_seg_loss(self, loss_type=None): key = self.configer.get('loss', 'loss_type') if loss_type is None else loss_type if key not in SEG_LOSS_DICT: Log.error('Loss: {} not valid!'.format(key)) exit(1) Log.info('use loss: {}.'.format(key)) loss = SEG_LOSS_DICT[key](self.configer) return self._parallel(loss)
def save_file(json_dict, json_file): dir_name = os.path.dirname(json_file) if not os.path.exists(dir_name): Log.info('Json Dir: {} not exists.'.format(dir_name)) os.makedirs(dir_name) with open(json_file, 'w') as write_stream: write_stream.write(json.dumps(json_dict))
def test(self, data_loader=None): """ Validation function during the train phase. """ self.seg_net.eval() start_time = time.time() image_id = 0 Log.info('save dir {}'.format(self.save_dir)) FileHelper.make_dirs(self.save_dir, is_file=False) print('Total batches', len(self.test_loader)) for j, data_dict in enumerate(self.test_loader): inputs = [data_dict['img']] names = data_dict['name'] metas = data_dict['meta'] dest_dir = self.save_dir with torch.no_grad(): offsets, logits = self.extract_offset(inputs) print([x.shape for x in logits]) for k in range(len(inputs[0])): image_id += 1 ori_img_size = metas[k]['ori_img_size'] border_size = metas[k]['border_size'] offset = offsets[k].squeeze().cpu().numpy() offset = cv2.resize( offset[:border_size[1], :border_size[0]], tuple(ori_img_size), interpolation=cv2.INTER_NEAREST) print(image_id) os.makedirs(dest_dir, exist_ok=True) if names[k].rpartition('.')[0]: dest_name = names[k].rpartition('.')[0] + '.mat' else: dest_name = names[k] + '.mat' dest_name = os.path.join(dest_dir, dest_name) print('Shape:', offset.shape, 'Saving to', dest_name) data_dict = {'mat': offset} scipy.io.savemat(dest_name, data_dict, do_compression=True) try: scipy.io.loadmat(dest_name) except Exception as e: print(e) scipy.io.savemat(dest_name, data_dict, do_compression=False) self.batch_time.update(time.time() - start_time) start_time = time.time() Log.info('Test Time {batch_time.sum:.3f}s'.format( batch_time=self.batch_time))
def xml2json(xml_file, json_file): if not os.path.exists(xml_file): Log.error('Xml file: {} not exists.'.format(xml_file)) exit(1) json_dir_name = os.path.dirname(json_file) if not os.path.exists(json_dir_name): Log.info('Json Dir: {} not exists.'.format(json_dir_name)) os.makedirs(json_dir_name)
def json2xml(json_file, xml_file): if not os.path.exists(json_file): Log.error('Json file: {} not exists.'.format(json_file)) exit(1) xml_dir_name = os.path.dirname(xml_file) if not os.path.exists(xml_dir_name): Log.info('Xml Dir: {} not exists.'.format(xml_dir_name)) os.makedirs(xml_dir_name)
def get_evaluator(configer, trainer, name=None): name = os.environ.get('evaluator', 'standard') if not name in evaluators: raise RuntimeError('Unknown evaluator name: {}'.format(name)) klass = evaluators[name] Log.info('Using evaluator: {}'.format(klass.__name__)) return klass(configer, trainer)
def forward(self, inputs, targets, **kwargs): from lib.utils.helpers.offset_helper import DTOffsetHelper pred_mask, pred_direction = inputs seg_label_map, distance_map, angle_map = targets[0], targets[ 1], targets[2] gt_mask = DTOffsetHelper.distance_to_mask_label(distance_map, seg_label_map, return_tensor=True) gt_size = gt_mask.shape[1:] mask_weights = self.calc_weights(gt_mask, 2) pred_direction = F.interpolate(pred_direction, size=gt_size, mode="bilinear", align_corners=True) pred_mask = F.interpolate(pred_mask, size=gt_size, mode="bilinear", align_corners=True) mask_loss = F.cross_entropy(pred_mask, gt_mask, weight=mask_weights, ignore_index=-1) mask_threshold = float(os.environ.get('mask_threshold', 0.5)) binary_pred_mask = torch.softmax(pred_mask, dim=1)[:, 1, :, :] > mask_threshold gt_direction = DTOffsetHelper.angle_to_direction_label( angle_map, seg_label_map=seg_label_map, extra_ignore_mask=(binary_pred_mask == 0), return_tensor=True) direction_loss_mask = gt_direction != -1 direction_weights = self.calc_weights( gt_direction[direction_loss_mask], pred_direction.size(1)) direction_loss = F.cross_entropy(pred_direction, gt_direction, weight=direction_weights, ignore_index=-1) if self.training \ and self.configer.get('iters') % self.configer.get('solver', 'display_iter') == 0 \ and torch.cuda.current_device() == 0: Log.info('mask loss: {} direction loss: {}.'.format( mask_loss, direction_loss)) mask_weight = float(os.environ.get('mask_weight', 1)) direction_weight = float(os.environ.get('direction_weight', 1)) return mask_weight * mask_loss + direction_weight * direction_loss
def _parallel(self, loss): if is_distributed(): Log.info('use distributed loss') return loss if self.configer.get('network', 'loss_balance') and len(self.configer.get('gpu')) > 1: Log.info('use DataParallelCriterion loss') from lib.extensions.parallel.data_parallel import DataParallelCriterion loss = DataParallelCriterion(loss) return loss
def load_url(url, map_location=None): model_dir = os.path.join('~', '.models') if not os.path.exists(model_dir): os.makedirs(model_dir) filename = url.split('/')[-1] cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): Log.info('Downloading: "{}" to {}\n'.format(url, cached_file)) urlretrieve(url, cached_file) Log.info('Loading pretrained model:{}'.format(cached_file)) return torch.load(cached_file, map_location=map_location)
def __init__(self, dim_in, proj_dim=256, proj='convmlp', bn_type='inplace_abn'): super(ProjectionHead, self).__init__() Log.info('proj_dim: {}'.format(proj_dim)) if proj == 'linear': self.proj = nn.Conv2d(dim_in, proj_dim, kernel_size=1) elif proj == 'convmlp': self.proj = nn.Sequential( nn.Conv2d(dim_in, dim_in, kernel_size=1), ModuleHelper.BNReLU(dim_in, bn_type=bn_type), nn.Conv2d(dim_in, proj_dim, kernel_size=1) )
def train(self): """ Train function of every epoch during train phase. """ self.cls_net.train() start_time = time.time() while self.runner_state['iters'] < self.solver_dict['max_iters']: # Adjust the learning rate after every epoch. self.runner_state['epoch'] += 1 for i, data_dict in enumerate(self.train_loader): Trainer.update(self, solver_dict=self.solver_dict) self.data_time.update(time.time() - start_time) # Change the data type. # Forward pass. out = self.cls_net(data_dict) # Compute the loss of the train batch & backward. loss_dict = self.loss(out) loss = loss_dict['loss'] self.train_losses.update(loss.item(), data_dict['img'].size(0)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update the vars of the train phase. self.batch_time.update(time.time() - start_time) start_time = time.time() self.runner_state['iters'] += 1 # Print the log info & reset the states. if self.runner_state['iters'] % self.solver_dict['display_iter'] == 0: Log.info('Train Epoch: {0}\tTrain Iteration: {1}\t' 'Time {batch_time.sum:.3f}s / {2}iters, ({batch_time.avg:.3f})\t' 'Data load {data_time.sum:.3f}s / {2}iters, ({data_time.avg:3f})\n' 'Learning rate = {3}\tLoss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( self.runner_state['epoch'], self.runner_state['iters'], self.solver_dict['display_iter'], RunnerHelper.get_lr(self.optimizer), batch_time=self.batch_time, data_time=self.data_time, loss=self.train_losses)) self.batch_time.reset() self.data_time.reset() self.train_losses.reset() if self.solver_dict['lr']['metric'] == 'iters' and self.runner_state['iters'] == self.solver_dict['max_iters']: self.val() break # Check to val the current model. if self.runner_state['iters'] % self.solver_dict['test_interval'] == 0: self.val()
def load_model(model, pretrained=None, all_match=True): if pretrained is None: return model if not os.path.exists(pretrained): Log.info('{} not exists.'.format(pretrained)) return model if all_match: Log.info('Loading pretrained model:{}'.format(pretrained)) pretrained_dict = torch.load(pretrained, map_location="cpu") model_dict = model.state_dict() load_dict = dict() for k, v in pretrained_dict.items(): if 'prefix.{}'.format(k) in model_dict: load_dict['prefix.{}'.format(k)] = v else: load_dict[k] = v # load_dict = {k: v for k, v in pretrained_dict.items() if 'resinit.{}'.format(k) not in model_dict} model.load_state_dict(load_dict) else: Log.info('Loading pretrained model:{}'.format(pretrained)) pretrained_dict = torch.load(pretrained) model_dict = model.state_dict() load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} Log.info('Matched Keys: {}'.format(load_dict.keys())) model_dict.update(load_dict) model.load_state_dict(model_dict) return model
def update_performance(self): try: rs = self.running_scores[self.save_net_main_key] if self.save_net_metric == 'miou': perf = rs.get_mean_iou() elif self.save_net_metric == 'acc': perf = rs.get_pixel_acc() max_perf = self.configer.get('max_performance') self.configer.update(['performance'], perf) if perf > max_perf: Log.info('Performance {} -> {}'.format(max_perf, perf)) except Exception as e: Log.warn(e)
def __read_list(self, data_dir, list_path): item_list = [] with open(list_path, 'r') as fr: for line in fr.readlines(): filename = line.strip().split()[0] label = None if len(line.strip().split()) == 1 else line.strip().split()[1] img_path = os.path.join(data_dir, filename) if not os.path.exists(img_path) or not ImageHelper.is_img(img_path): Log.error('Image Path: {} is Invalid.'.format(img_path)) exit(1) item_list.append((img_path, filename, label)) Log.info('There are {} images..'.format(len(item_list))) return item_list
def _make_head(self, pre_stage_channels, bn_type, bn_momentum): head_block = Bottleneck head_channels = [32, 64, 128, 256] Log.info("pre_stage_channels: {}".format(pre_stage_channels)) Log.info("head_channels: {}".format(head_channels)) # Increasing the #channels on each resolution # from C, 2C, 4C, 8C to 128, 256, 512, 1024 incre_modules = [] for i, channels in enumerate(pre_stage_channels): incre_module = self._make_layer(head_block, channels, head_channels[i], 1, bn_type=bn_type, bn_momentum=bn_momentum) incre_modules.append(incre_module) incre_modules = nn.ModuleList(incre_modules) # downsampling modules downsamp_modules = [] for i in range(len(pre_stage_channels) - 1): in_channels = head_channels[i] * head_block.expansion out_channels = head_channels[i + 1] * head_block.expansion downsamp_module = nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1), ModuleHelper.BatchNorm2d(bn_type=bn_type)( out_channels, momentum=bn_momentum), nn.ReLU(inplace=False)) downsamp_modules.append(downsamp_module) downsamp_modules = nn.ModuleList(downsamp_modules) final_layer = nn.Sequential( nn.Conv2d(in_channels=head_channels[3] * head_block.expansion, out_channels=2048, kernel_size=1, stride=1, padding=0), ModuleHelper.BatchNorm2d(bn_type=bn_type)(2048, momentum=bn_momentum), nn.ReLU(inplace=False)) return incre_modules, downsamp_modules, final_layer
def _init(self): self.configer.add(['iters'], 0) self.configer.add(['last_iters'], 0) self.configer.add(['epoch'], 0) self.configer.add(['last_epoch'], 0) self.configer.add(['max_performance'], 0.0) self.configer.add(['performance'], 0.0) self.configer.add(['min_val_loss'], 9999.0) self.configer.add(['val_loss'], 9999.0) if not self.configer.exists('network', 'bn_type'): self.configer.add(['network', 'bn_type'], 'torchbn') if self.configer.get('phase') == 'train': assert len(self.configer.get('gpu')) > 1 or self.configer.get('network', 'bn_type') == 'torchbn' Log.info('BN Type is {}.'.format(self.configer.get('network', 'bn_type')))
def __init__(self, label_list, batch_size=64, min_cnt=0, max_cnt=-1, is_distributed=False): self.label_list = label_list self.label_indices_dict = dict() for i, mlabel in enumerate(self.label_list): if mlabel[0] not in self.label_indices_dict: self.label_indices_dict[mlabel[0]] = [i] else: self.label_indices_dict[mlabel[0]].append(i) self.is_distributed = is_distributed self.batch_size = batch_size self.min_cnt = min_cnt self.max_cnt = max_cnt sample_dict = random_sample(self.label_indices_dict, self.min_cnt, self.max_cnt) self.num_samples = 0 self.epoch = 0 self.class_rweight = [] if self.is_distributed: self.num_replicas = dist.get_world_size() self.rank = dist.get_rank() for k in sample_dict: num_samples_pergpu = int( math.ceil(len(sample_dict[k]) * 1.0 / self.num_replicas)) self.num_samples += num_samples_pergpu self.class_rweight.append(num_samples_pergpu + 0.0) else: for k in sample_dict: self.num_samples += len(sample_dict[k]) self.class_rweight.append(len(sample_dict[k]) + 0.0) max_weight = max(self.class_rweight) sum_weight = 0.0 for i in range(len(self.class_rweight)): self.class_rweight[i] = max_weight / self.class_rweight[i] sum_weight += self.class_rweight[i] for i in range(len(self.class_rweight)): self.class_rweight[i] /= sum_weight Log.info( 'ReverseSampler: The number of resampled images is {}...'.format( self.num_samples))
def get_dataloader_sampler(self, klass, split, dataset): from lib.datasets.loader.multi_dataset_loader import MultiDatasetLoader, MultiDatasetTrainingSampler root_dir = self.configer.get('data', 'data_dir') if isinstance(root_dir, list) and len(root_dir) == 1: root_dir = root_dir[0] kwargs = dict( dataset=dataset, aug_transform=(self.aug_train_transform if split == 'train' else self.aug_val_transform), img_transform=self.img_transform, torch_img_transform=(self.torch_img_transform if split == 'train' else None), label_transform=self.label_transform, configer=self.configer ) if isinstance(root_dir, str): loader = klass(root_dir, **kwargs) multi_dataset = False elif isinstance(root_dir, list): loader = MultiDatasetLoader(root_dir, klass, **kwargs) multi_dataset = True Log.info('use multi-dataset for {}...'.format(dataset)) else: raise RuntimeError('Unknown root dir {}'.format(root_dir)) if split == 'train': if is_distributed() and multi_dataset: raise RuntimeError('Currently multi dataset doesn\'t support distributed.') if is_distributed(): sampler = torch.utils.data.distributed.DistributedSampler(loader) elif multi_dataset: sampler = MultiDatasetTrainingSampler(loader) else: sampler = None elif split == 'val': if is_distributed(): sampler = torch.utils.data.distributed.DistributedSampler(loader) else: sampler = None return loader, sampler
def __init__(self, label_list, batch_size=64, min_cnt=0, max_cnt=-1): self.label_list = label_list self.label_indices_dict = dict() for i, mlabel in enumerate(self.label_list): if mlabel[0] not in self.label_indices_dict: self.label_indices_dict[mlabel[0]] = [i] else: self.label_indices_dict[mlabel[0]].append(i) self.batch_size = batch_size self.min_cnt = min_cnt self.max_cnt = max_cnt sample_dict = random_sample(self.label_indices_dict, self.min_cnt, self.max_cnt) self.num_samples = 0 for k in sample_dict: self.num_samples += len(sample_dict[k]) Log.info('The number of resampled images is {}...'.format(self.num_samples))
def __read_file(self, root_dir, dataset, label_path): img_list = list() mlabel_list = list() with open(label_path, 'r') as file_stream: for line in file_stream.readlines(): line_items = line.rstrip().split() path = line_items[0] if not os.path.exists(os.path.join(root_dir, path)) or not ImageHelper.is_img(path): Log.warn('Invalid Image Path: {}'.format(os.path.join(root_dir, path))) continue img_list.append(os.path.join(root_dir, path)) mlabel_list.append([int(item) for item in line_items[1:]]) assert len(img_list) > 0 Log.info('Length of {} imgs is {}...'.format(dataset, len(img_list))) return img_list, mlabel_list