Ejemplo n.º 1
0
def train_model(model,
                dataset,
                cfg,
                distributed=False,
                timestamp=None,
                meta=None):
    logger = get_root_logger(cfg.log_level)
    if cfg.load_from:
        load_checkpoint(model,
                        cfg.load_from,
                        map_location='cpu',
                        strict=False,
                        logger=logger)
        print_log(
            "*****loading from {} to init the model****".format(cfg.load_from),
            logger)

    # start training
    if distributed:
        _dist_train(model,
                    dataset,
                    cfg,
                    logger=logger,
                    timestamp=timestamp,
                    meta=meta)
    else:
        _non_dist_train(model,
                        dataset,
                        cfg,
                        logger=logger,
                        timestamp=timestamp,
                        meta=meta)
Ejemplo n.º 2
0
 def init_weights(self, pretrained=None):
     if isinstance(pretrained, str):
         logger = logging.getLogger(__name__)
         load_checkpoint(self, pretrained, strict=False, logger=logger)
     elif pretrained is None:
         for m in self.modules():
             if isinstance(m, (nn.Linear, nn.Embedding)):
                 m.weight.data.normal_(
                     mean=0.0, std=self.config.get('initializer_range'))
             elif isinstance(m, BertLayerNorm):
                 m.bias.data.zero_()
                 m.weight.data.fill_(1.0)
             if isinstance(m, nn.Linear) and m.bias is not None:
                 m.bias.data.zero_()
     else:
         raise TypeError('pretrained must be a str or None')
Ejemplo n.º 3
0
 def init_weights(self, pretrained=None):
     """Init backbone weights
     Args:
         pretrained (str | None): If pretrained is a string, then it
             initializes backbone weights by loading the pretrained
             checkpoint. If pretrained is None, then it follows default
             initializer or customized initializer in subclasses.
     """
     if isinstance(pretrained, str):
         logger = get_root_logger()
         load_checkpoint(self, pretrained, strict=False, logger=logger)
     elif pretrained is None:
         # use default initializer or customized initializer in subclasses
         pass
     else:
         raise TypeError('pretrained must be a str or None.'
                         f' But received {type(pretrained)}.')
Ejemplo n.º 4
0
    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
            logger = get_root_logger()
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
                    constant_init(m, 1)

            if self.zero_init_residual:
                for m in self.modules():
                    if isinstance(m, Bottleneck):
                        constant_init(m.norm3, 0)
                    elif isinstance(m, BasicBlock):
                        constant_init(m.norm2, 0)
        else:
            raise TypeError('pretrained must be a str or None')
Ejemplo n.º 5
0
def main():
    args = parse_args()
    cfg = commons.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # init distributed env first, since depends on the dist info
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(dataset,
                                   imgs_per_gpu=1,
                                   workers_per_gpu=cfg.data.workers_per_gpu,
                                   dist=distributed,
                                   shuffle=False)
    # build the model and load checkpoint
    model = build_model(cfg.model)
    check_item = args.checkpoint[0]
    checkpoint = load_checkpoint(model,
                                 os.path.join(
                                     cfg.work_dir,
                                     'epoch_' + str(check_item) + '.pth'),
                                 map_location='cpu')
    label2ans = dataset.label2ans

    gpu_id = dist.get_rank() % torch.cuda.device_count()
    torch.cuda.set_device(gpu_id)
    model = model.cuda()
    if cfg.fp_16.enable:
        model = amp.initialize(model,
                               opt_level=cfg.fp_16.opt_level,
                               loss_scale=cfg.fp_16.loss_scale,
                               max_loss_scale=cfg.fp_16.max_loss_scale)
        print('**** Initializing mixed precision done. ****')
    model = MMDistributedDataParallel(
        model,
        device_ids=[torch.cuda.current_device()],
        broadcast_buffers=False,
    )
    outputs = multi_gpu_test(model, data_loader, args.tmpdir)

    rank, _ = get_dist_info()
    if rank == 0:
        output_path = os.path.join(cfg.work_dir, "test_results")
        commons.mkdir_or_exist(output_path)
        out_list = []
        pickle.dump(outputs, open("outputs.pkl", 'wb'))

        ids = outputs["ids"]
        preds = outputs["pred"]

        for id, pred in zip(ids, preds):
            q_id = dataset.q_id_list[int(id)]
            pred_index = np.argmax(pred, axis=0)
            answer = dataset.label2ans[pred_index]
            out_list.append({'question_id': q_id, 'answer': answer})

        print('\nwriting results to {}'.format(output_path))
        commons.dump(
            out_list,
            os.path.join(output_path,
                         "test_submit_{0}.json".format(str(check_item))))
        os.system("rm -rf outputs.pkl")