def evaluate(image_path='./imgs/116.jpg', cp='cp/79999_iter.pth'): # if not os.path.exists(respth): # os.makedirs(respth) n_classes = 19 net = BiSeNet(n_classes=n_classes) # net.cuda() # use_cuda = torch.cuda.is_available() device = torch.device("cpu") net.to(device) net.load_state_dict(torch.load(cp,map_location='cpu')) net.eval() to_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) with torch.no_grad(): img = Image.open(image_path) # image = img.resize((512, 512), Image.BILINEAR) img = to_tensor(img) img = torch.unsqueeze(img, 0) # img = img.cuda() out = net(img)[0] # parsing = out.squeeze(0).cpu().numpy().argmax(0) parsing = out.squeeze(0).numpy().argmax(0) # print(parsing) # print(np.unique(parsing)) # vis_parsing_maps(image, parsing, stride=1, save_im=False, save_path=osp.join(respth, dspth)) return parsing
def parser(image_path='./imgs/116.jpg', cp='cp/79999_iter.pth'): n_classes = 19 net = BiSeNet(n_classes=n_classes) device = torch.device("cpu") net.to(device) net.load_state_dict(torch.load(cp,map_location='cpu')) net.eval() to_tensor = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),]) with torch.no_grad(): # img = Image.open(image_path) img = to_tensor(im) img = torch.unsqueeze(img, 0) out = net(img)[0] parsing = out.squeeze(0).numpy().argmax(0) return parsing
def train(fintune_model, data_root, respth): # dataset n_classes = 19 n_img_per_gpu = 16 n_workers = 8 cropsize = [448, 448] ds = FaceMask(data_root, cropsize=cropsize, mode='train') # sampler = torch.utils.data.distributed.DistributedSampler(ds) dl = DataLoader(ds, batch_size=n_img_per_gpu, shuffle=True, num_workers=n_workers, pin_memory=True, drop_last=True) # model ignore_idx = -100 use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") net = BiSeNet(n_classes=n_classes) net = net.to(device) if os.access(fintune_model, os.F_OK) and (fintune_model is not None): # checkpoint chkpt = torch.load(fintune_model, map_location=device) net.load_state_dict(chkpt) print('load fintune model : {}'.format(fintune_model)) else: print('no fintune model') score_thres = 0.7 n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16 LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) ## optimizer momentum = 0.9 weight_decay = 5e-4 lr_start = 1e-2 max_epoch = 1000 optim = Optimizer.SGD(net.parameters(), lr=lr_start, momentum=momentum, weight_decay=weight_decay) ## train loop msg_iter = 50 loss_avg = [] st = glob_st = time.time() # diter = iter(dl) epoch = 0 flag_change_lr_cnt = 0 # 学习率更新计数器 init_lr = lr_start # 学习率 best_loss = np.inf loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 print('start training ~') it = 0 for epoch in range(max_epoch): net.train() # 学习率更新策略 if loss_mean != 0.: if best_loss > (loss_mean / loss_idx): flag_change_lr_cnt = 0 best_loss = (loss_mean / loss_idx) else: flag_change_lr_cnt += 1 if flag_change_lr_cnt > 30: init_lr = init_lr * 0.1 set_learning_rate(optimizer, init_lr) flag_change_lr_cnt = 0 loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 for i, (im, lb) in enumerate(dl): im = im.cuda() lb = lb.cuda() H, W = im.size()[2:] lb = torch.squeeze(lb, 1) optim.zero_grad() out, out16, out32 = net(im) lossp = LossP(out, lb) loss2 = Loss2(out16, lb) loss3 = Loss3(out32, lb) loss = lossp + loss2 + loss3 loss_mean += loss.item() loss_idx += 1. loss.backward() optim.step() if it % msg_iter == 0: print('epoch <{}/{}> -->> <{}/{}> -> iter {} : loss {:.5f}, loss_mean :{:.5f}, best_loss :{:.5f},lr :{:.6f},batch_size : {}'.\ format(epoch,max_epoch,i,int(ds.__len__()/n_img_per_gpu),it,loss.item(),loss_mean/loss_idx,best_loss,init_lr,n_img_per_gpu)) # print(msg) if (it) % 500 == 0: state = net.module.state_dict() if hasattr( net, 'module') else net.state_dict() torch.save(state, respth + '/model/face_parse_latest.pth') # evaluate(dspth='./images', cp='{}_iter.pth'.format(it)) it += 1 torch.save(state, respth + '/model/face_parse_epoch_{}.pth'.format(epoch))
class BISENET: def __init__(self, model_path, csv_path): # retrieve label info self.label_info = get_label_info(csv_path) # build model and load weight self.model = BiSeNet(12, 'resnet18') self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') self.model.load_state_dict(torch.load(model_path)) self.model.to(self.device).eval() self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # retrieve person color self.person_color = self.label_info['Pedestrian'][:-1] def predict_on_image(self, image): # transform image to tensor image = self.transform(image[:, :, ::-1]).to(self.device) # prediction map predict = self.model(image.unsqueeze(0)).squeeze() # encode to class index predict = reverse_one_hot(predict).cpu().numpy() # encode to color code predict = colour_code_segmentation(predict, self.label_info).astype(np.uint8) # get bbox output predict, bboxes, num_people = self.bbox_output(predict) return predict, bboxes, num_people def bbox_output(self, predict): # get a binary mask with persons color white and background black person_mask = np.zeros(predict.shape, dtype=np.uint8) person_mask[np.all(predict == self.person_color, axis=-1)] = [255, 255, 255] person_mask = cv2.cvtColor(person_mask, cv2.COLOR_BGR2GRAY) # label the mask image with connected-components algorithm label_image = label(person_mask) # find the bbox regions regions = regionprops(label_image) bboxes = [] num_people = [0] i = 1 for props in regions: if props.area > 100: minr, minc, maxr, maxc = props.bbox bboxes += [props.bbox] num_people += [i] predict = cv2.rectangle(predict, (minc, minr), (maxc, maxr), (0, 255, 0), 2, cv2.LINE_AA) predict = cv2.putText(predict, f'person{i}', (minc, minr - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2, cv2.LINE_AA) i += 1 return predict, bboxes, num_people[-1]