Ejemplo n.º 1
0
        distributed = int(os.environ['WORLD_SIZE']) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
    dist_print(datetime.datetime.now().strftime('[%Y/%m/%d %H:%M:%S]') +
               ' start training...')
    dist_print(cfg)
    assert cfg.backbone in [
        '18', '34', '50', '101', '152', '50next', '101next', '50wide',
        '101wide'
    ]

    train_loader, cls_num_per_lane = get_train_loader(
        cfg.batch_size, cfg.data_root, cfg.griding_num, cfg.dataset,
        cfg.use_aux, distributed, cfg.num_lanes)

    net = parsingNet(pretrained=True,
                     backbone=cfg.backbone,
                     cls_dim=(cfg.griding_num + 1, cls_num_per_lane,
                              cfg.num_lanes),
                     use_aux=cfg.use_aux).cuda()

    if distributed:
        net = torch.nn.parallel.DistributedDataParallel(
            net, device_ids=[args.local_rank])
    optimizer = get_optimizer(net, cfg)

    if cfg.finetune is not None:
        dist_print('finetune from ', cfg.finetune)
Ejemplo n.º 2
0
    work_dir = get_work_dir(cfg)

    distributed = False
    if 'WORLD_SIZE' in os.environ:
        distributed = int(os.environ['WORLD_SIZE']) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
    dist_print(datetime.datetime.now().strftime('[%Y/%m/%d %H:%M:%S]') + ' start training...')
    dist_print(cfg)
    assert cfg.backbone in ['18','34','50','101','152','50next','101next','50wide','101wide']


    train_loader, num_anchors = get_train_loader(cfg.batch_size, cfg.data_root, cfg.griding_num, cfg.dataset, cfg.use_aux, distributed, cfg.num_lanes, cfg.num_classes)

    net = parsingNet(
        pretrained=True,
        backbone=cfg.backbone,
        det_dim=(cfg.griding_num+1, num_anchors, cfg.num_lanes),
        cls_dim=(cfg.num_classes, cfg.num_lanes),
        use_aux=cfg.use_aux
    ).cuda()

    if distributed:
        net = torch.nn.parallel.DistributedDataParallel(net, device_ids = [args.local_rank])
    optimizer = get_optimizer(net, cfg)

    if cfg.finetune is not None:
        dist_print('finetune from ', cfg.finetune)
Ejemplo n.º 3
0
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
    w, h = cfg['dataset']['w'], cfg['dataset']['h']
    net = parsingNet(network=cfg['network'], datasets=cfg['dataset']).cuda()
    if distributed:
        net = torch.nn.parallel.DistributedDataParallel(
            net, device_ids=[args.local_rank])
    # try:
    #     from thop import profile
    #     macs, params = profile(net, inputs=(torch.zeros(1, 3, h, w).to(device)))
    #     ms = 'FLOPs:  %.2f GFLOPS, Params: %.2f M'%(params/ 1E9, params/ 1E6)
    # except:
    #     ms = 'Model profile error'
    # logger.log(ms)
    train_loader = get_train_loader(cfg['dataset'], args.local_rank)
    test_loader = get_test_loader(cfg['dataset'], args.local_rank)
    optimizer = get_optimizer(net, cfg['train'])

    if cfg['finetune'] is not None:
        state_all = torch.load(cfg['finetune'])['model']
        state_clip = {}  # only use backbone parameters
        for k, v in state_all.items():
            if 'model' in k:
                state_clip[k] = v
        net.load_state_dict(state_clip, strict=False)
    if cfg['resume'] is not None:
        logger.log('==> Resume model from ' + cfg['resume'])
        resume_dict = torch.load(cfg['resume'], map_location='cpu')
        net.load_state_dict(resume_dict['model'])
        if 'optimizer' in resume_dict.keys():
Ejemplo n.º 4
0
    work_dir = get_work_dir(cfg)

    distributed = False
    if 'WORLD_SIZE' in os.environ:
        distributed = int(os.environ['WORLD_SIZE']) > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
    dist_print(datetime.datetime.now().strftime('[%Y/%m/%d %H:%M:%S]') +
               ' start training...')
    dist_print(cfg)

    train_loader = get_train_loader(cfg.batch_size, cfg.data_root, cfg.dataset,
                                    distributed)

    net = E2ENet(Channels=96,
                 nums_lane=4,
                 culomn_channels=cfg.griding_num,
                 row_channels=cfg.row_num,
                 initialed=True)

    net.to(device)

    if distributed:
        net = torch.nn.parallel.DistributedDataParallel(
            net, device_ids=[args.local_rank])

    if cfg.finetune is not None:
        dist_print('finetune from ', cfg.finetune)