def train_model(model, dataset, cfg, distributed=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) if cfg.load_from: load_checkpoint(model, cfg.load_from, map_location='cpu', strict=False, logger=logger) print_log( "*****loading from {} to init the model****".format(cfg.load_from), logger) # start training if distributed: _dist_train(model, dataset, cfg, logger=logger, timestamp=timestamp, meta=meta) else: _non_dist_train(model, dataset, cfg, logger=logger, timestamp=timestamp, meta=meta)
def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger(__name__) load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, (nn.Linear, nn.Embedding)): m.weight.data.normal_( mean=0.0, std=self.config.get('initializer_range')) elif isinstance(m, BertLayerNorm): m.bias.data.zero_() m.weight.data.fill_(1.0) if isinstance(m, nn.Linear) and m.bias is not None: m.bias.data.zero_() else: raise TypeError('pretrained must be a str or None')
def init_weights(self, pretrained=None): """Init backbone weights Args: pretrained (str | None): If pretrained is a string, then it initializes backbone weights by loading the pretrained checkpoint. If pretrained is None, then it follows default initializer or customized initializer in subclasses. """ if isinstance(pretrained, str): logger = get_root_logger() load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: # use default initializer or customized initializer in subclasses pass else: raise TypeError('pretrained must be a str or None.' f' But received {type(pretrained)}.')
def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = get_root_logger() load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): if isinstance(m, nn.Conv2d): kaiming_init(m) elif isinstance(m, (_BatchNorm, nn.GroupNorm)): constant_init(m, 1) if self.zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): constant_init(m.norm3, 0) elif isinstance(m, BasicBlock): constant_init(m.norm2, 0) else: raise TypeError('pretrained must be a str or None')
def main(): args = parse_args() cfg = commons.Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True # cfg.model.pretrained = None cfg.data.test.test_mode = True # init distributed env first, since depends on the dist info if args.launcher == 'none': distributed = False else: distributed = True init_dist(args.launcher, **cfg.dist_params) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader(dataset, imgs_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) # build the model and load checkpoint model = build_model(cfg.model) check_item = args.checkpoint[0] checkpoint = load_checkpoint(model, os.path.join( cfg.work_dir, 'epoch_' + str(check_item) + '.pth'), map_location='cpu') label2ans = dataset.label2ans gpu_id = dist.get_rank() % torch.cuda.device_count() torch.cuda.set_device(gpu_id) model = model.cuda() if cfg.fp_16.enable: model = amp.initialize(model, opt_level=cfg.fp_16.opt_level, loss_scale=cfg.fp_16.loss_scale, max_loss_scale=cfg.fp_16.max_loss_scale) print('**** Initializing mixed precision done. ****') model = MMDistributedDataParallel( model, device_ids=[torch.cuda.current_device()], broadcast_buffers=False, ) outputs = multi_gpu_test(model, data_loader, args.tmpdir) rank, _ = get_dist_info() if rank == 0: output_path = os.path.join(cfg.work_dir, "test_results") commons.mkdir_or_exist(output_path) out_list = [] pickle.dump(outputs, open("outputs.pkl", 'wb')) ids = outputs["ids"] preds = outputs["pred"] for id, pred in zip(ids, preds): q_id = dataset.q_id_list[int(id)] pred_index = np.argmax(pred, axis=0) answer = dataset.label2ans[pred_index] out_list.append({'question_id': q_id, 'answer': answer}) print('\nwriting results to {}'.format(output_path)) commons.dump( out_list, os.path.join(output_path, "test_submit_{0}.json".format(str(check_item)))) os.system("rm -rf outputs.pkl")