def init_model(self): torch.backends.cudnn.benchmark = True self.args, self.cfg = merge_config() assert self.cfg.backbone in ['18','34','50','101','152','50next','101next','50wide','101wide'] if self.cfg.dataset == 'CULane': cls_num_per_lane = 18 elif self.cfg.dataset == 'Tusimple': cls_num_per_lane = 56 else: raise NotImplementedError self.net = parsingNet(pretrained = False, backbone=self.cfg.backbone,cls_dim = (self.cfg.griding_num+1,cls_num_per_lane,4), use_aux=False).cuda() # we dont need auxiliary segmentation in testing state_dict = torch.load(self.cfg.test_model, map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v self.net.load_state_dict(compatible_state_dict, strict=False) self.net.eval() self.img_transforms = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ])
def __init__(self): torch.backends.cudnn.benchmark = True if CFG.dataset == 'CULane': self.cls_num_per_lane = 18 elif CFG.dataset == 'Tusimple': self.cls_num_per_lane = 56 else: raise NotImplementedError self.net = parsingNet(pretrained = False, backbone=CFG.backbone,cls_dim = (CFG.griding_num+1,self.cls_num_per_lane,4), use_aux=False).cuda() state_dict = torch.load(CFG.test_model, map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v self.net.load_state_dict(compatible_state_dict, strict=False) self.net.eval() self.imgTransform = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) self.row_anchor = culane_row_anchor pass
def __init__(self): torch.backends.cudnn.benchmark = True self.args, self.cfg = merge_config() self.cls_num_per_lane = 56 self.row_anchor = tusimple_row_anchor self.net = parsingNet(pretrained=False, backbone=self.cfg.backbone, cls_dim=(self.cfg.griding_num + 1, self.cls_num_per_lane, self.cfg.num_lanes), use_aux=False).cuda() state_dict = torch.load(self.cfg.test_model, map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v self.net.load_state_dict(compatible_state_dict, strict=False) #not recommend to uncommen this line self.net.eval() self.img_transforms = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) self.img_w = 960 self.img_h = 480 self.scale_factor = 1 self.color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)] self.idx = np.arange(self.cfg.griding_num) + 1 self.idx = self.idx.reshape(-1, 1, 1) self.cpu_img = None self.gpu_img = None self.type = None self.gpu_output = None self.cpu_output = None col_sample = np.linspace(0, 800 - 1, self.cfg.griding_num) self.col_sample_w = col_sample[1] - col_sample[0]
if cfg.dataset == 'CULane': cls_num_per_lane = 18 lane_num = 4 elif cfg.dataset == 'Bdd100k': cls_num_per_lane = 56 #18 lane_num = 4 #14 elif cfg.dataset == 'Tusimple': cls_num_per_lane = 56 lane_num = 4 else: raise NotImplementedError net = parsingNet( pretrained=False, backbone=cfg.backbone, cls_dim=(cfg.griding_num + 1, cls_num_per_lane, lane_num), use_aux=False).cuda() # we dont need auxiliary segmentation in testing state_dict = torch.load(cfg.test_model, map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v net.load_state_dict(compatible_state_dict, strict=False) net.eval() img_transforms = transforms.Compose([
init_method='env://') dist_print(datetime.datetime.now().strftime('[%Y/%m/%d %H:%M:%S]') + ' start training...') dist_print(cfg) assert cfg.backbone in [ '18', '34', '50', '101', '152', '50next', '101next', '50wide', '101wide' ] train_loader, cls_num_per_lane = get_train_loader( cfg.batch_size, cfg.data_root, cfg.griding_num, cfg.dataset, cfg.use_aux, distributed, cfg.num_lanes) net = parsingNet(pretrained=True, backbone=cfg.backbone, cls_dim=(cfg.griding_num + 1, cls_num_per_lane, cfg.num_lanes), use_aux=cfg.use_aux).cuda() if distributed: net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.local_rank]) optimizer = get_optimizer(net, cfg) if cfg.finetune is not None: dist_print('finetune from ', cfg.finetune) state_all = torch.load(cfg.finetune)['model'] state_clip = {} # only use backbone parameters for k, v in state_all.items(): if 'model' in k: state_clip[k] = v
args = parse_args() with open(args.params) as f: cfg = yaml.load(f, Loader=yaml.FullLoader) # data dict logger = Logger(args.local_rank, cfg['log_path']) logger.log('start training') assert cfg['network']['backbone'] in ['resnet_18', '34', 'mobilenetv2'] distributed = False if 'WORLD_SIZE' in os.environ: distributed = int(os.environ['WORLD_SIZE']) > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') w, h = cfg['dataset']['w'], cfg['dataset']['h'] net = parsingNet(network=cfg['network'], datasets=cfg['dataset']).cuda() if distributed: net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.local_rank]) # try: # from thop import profile # macs, params = profile(net, inputs=(torch.zeros(1, 3, h, w).to(device))) # ms = 'FLOPs: %.2f GFLOPS, Params: %.2f M'%(params/ 1E9, params/ 1E6) # except: # ms = 'Model profile error' # logger.log(ms) train_loader = get_train_loader(cfg['dataset'], args.local_rank) test_loader = get_test_loader(cfg['dataset'], args.local_rank) optimizer = get_optimizer(net, cfg['train']) if cfg['finetune'] is not None:
import torch import time from model.model import parsingNet # torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True net = parsingNet(pretrained=False, backbone='18', cls_dim=(100 + 1, 56, 4), use_aux=False).cuda() # net = parsingNet(pretrained = False, backbone='18',cls_dim = (200+1,18,4),use_aux=False).cuda() net.eval() x = torch.zeros((1, 3, 288, 800)).cuda() + 1 for i in range(10): y = net(x) t_all = 0 for i in range(100): t1 = time.time() y = net(x) t2 = time.time() t_all += t2 - t1 print('avg_time:', t_all / 100) print('avg_fps:', 100 / t_all)
import torchvision.transforms as transforms from data.dataset import LaneTestDataset from data.constant import culane_row_anchor, tusimple_row_anchor from PIL import Image # Export to TorchScript that can be used for LibTorch torch.backends.cudnn.benchmark = True # From cuLANE, Change this line if you are using TuSimple num_anchors = 18 griding_num = 200 backbone = 18 net = parsingNet(pretrained=False, backbone='18', det_dim=(griding_num + 1, num_anchors, 4), use_aux=False) # Change test_model where your model stored. test_model = '/data/Models/UltraFastLaneDetection/culane_18.pth' #state_dict = torch.load(test_model, map_location='cpu')['model'] # CPU state_dict = torch.load(test_model, map_location='cuda')['model'] # CUDA compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v net.load_state_dict(compatible_state_dict, strict=False)
import torch from torch2trt import torch2trt from model.model import parsingNet from torch2trt import TRTModule # create some regular pytorch model... model = parsingNet(pretrained=False, cls_dim=(100 + 1, 56, 4), use_aux=False).cuda() state_dict = torch.load("/home/dji/Lane_fast/Ultra-Fast-Lane-Detection-ori/tusimple_18.pth", map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v model.load_state_dict(compatible_state_dict, strict=False) model.eval() # create example data x = torch.ones((1, 3, 288, 800)).cuda() # convert to TensorRT feeding sample data as input model_trt = torch2trt(model, [x]) y = model(x) y_trt = model_trt(x) # check the output against PyTorch print(y.shape) print(y_trt.shape)
distributed = int(os.environ['WORLD_SIZE']) > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') dist_print(datetime.datetime.now().strftime('[%Y/%m/%d %H:%M:%S]') + ' start training...') dist_print(cfg) assert cfg.backbone in ['18','34','50','101','152','50next','101next','50wide','101wide'] train_loader, num_anchors = get_train_loader(cfg.batch_size, cfg.data_root, cfg.griding_num, cfg.dataset, cfg.use_aux, distributed, cfg.num_lanes, cfg.num_classes) net = parsingNet( pretrained=True, backbone=cfg.backbone, det_dim=(cfg.griding_num+1, num_anchors, cfg.num_lanes), cls_dim=(cfg.num_classes, cfg.num_lanes), use_aux=cfg.use_aux ).cuda() if distributed: net = torch.nn.parallel.DistributedDataParallel(net, device_ids = [args.local_rank]) optimizer = get_optimizer(net, cfg) if cfg.finetune is not None: dist_print('finetune from ', cfg.finetune) state_all = torch.load(cfg.finetune)['model'] state_clip = {} # only use backbone parameters for k,v in state_all.items(): if 'model' in k: state_clip[k] = v
python single_img_forward.py ''' cls_num_per_lane = 18 griding_num = 200 model_weight = 'download/culane_18.pth' img_transforms = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) net = parsingNet( pretrained=False, backbone='18', cls_dim=(griding_num + 1, cls_num_per_lane, 4), use_aux=False) # we dont need auxiliary segmentation in testing state_dict = torch.load(model_weight, map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v net.load_state_dict(compatible_state_dict, strict=False) net.eval() def SingleImgFoward(img_path, out_path='./tmp/'):
if __name__ == "__main__": torch.backends.cudnn.benchmark = True args, cfg = merge_config() dist_print('start testing...') assert cfg.backbone in ['18','34','50','101','152','50next','101next','50wide','101wide'] if cfg.dataset == 'CULane': num_anchors = 18 elif cfg.dataset == 'Tusimple': num_anchors = 56 else: raise NotImplementedError net = parsingNet(pretrained = False, backbone=cfg.backbone,det_dim = (cfg.griding_num+1,num_anchors,4), use_aux=False).cuda() # we dont need auxiliary segmentation in testing state_dict = torch.load(cfg.test_model, map_location='cpu')['model'] compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v net.load_state_dict(compatible_state_dict, strict=False) net.eval() img_transforms = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(),
import numpy as np import torchvision.transforms as transforms from data.dataset import LaneTestDataset from data.constant import culane_row_anchor, tusimple_row_anchor from PIL import Image # Export to TorchScript that can be used for LibTorch torch.backends.cudnn.benchmark = True # From cuLANE, Change this line if you are using TuSimple cls_num_per_lane = 18 griding_num = 200 backbone =18 net = parsingNet(pretrained = False,backbone='18', cls_dim = (griding_num+1,cls_num_per_lane,4), use_aux=False) # Change test_model where your model stored. test_model = '/data/Models/UltraFastLaneDetection/culane_18.pth' #state_dict = torch.load(test_model, map_location='cpu')['model'] # CPU state_dict = torch.load(test_model, map_location='cuda')['model'] # CUDA compatible_state_dict = {} for k, v in state_dict.items(): if 'module.' in k: compatible_state_dict[k[7:]] = v else: compatible_state_dict[k] = v net.load_state_dict(compatible_state_dict, strict=False) net.eval()