Ejemplo n.º 1
0
def evaluate(image_path='./imgs/116.jpg', cp='cp/79999_iter.pth'):

	# if not os.path.exists(respth):
	#     os.makedirs(respth)

	n_classes = 19
	net = BiSeNet(n_classes=n_classes)
	# net.cuda()
	# use_cuda = torch.cuda.is_available()
	device = torch.device("cpu")

	net.to(device)
	net.load_state_dict(torch.load(cp,map_location='cpu'))
	net.eval()

	to_tensor = transforms.Compose([
		transforms.ToTensor(),
		transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
	])

	with torch.no_grad():
		img = Image.open(image_path)
		# image = img.resize((512, 512), Image.BILINEAR)
		img = to_tensor(img)
		
		img = torch.unsqueeze(img, 0)
		# img = img.cuda()
		out = net(img)[0]
		# parsing = out.squeeze(0).cpu().numpy().argmax(0)
		parsing = out.squeeze(0).numpy().argmax(0)
		# print(parsing)
		# print(np.unique(parsing))

		# vis_parsing_maps(image, parsing, stride=1, save_im=False, save_path=osp.join(respth, dspth))
		return parsing
Ejemplo n.º 2
0
	def parser(image_path='./imgs/116.jpg', cp='cp/79999_iter.pth'):
		n_classes = 19
		net = BiSeNet(n_classes=n_classes)
		device = torch.device("cpu")
		net.to(device)
		net.load_state_dict(torch.load(cp,map_location='cpu'))
		net.eval()

		to_tensor = transforms.Compose([transforms.ToTensor(),
			transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])

		with torch.no_grad():
			# img = Image.open(image_path)
			img = to_tensor(im)
			img = torch.unsqueeze(img, 0)
			out = net(img)[0]
			parsing = out.squeeze(0).numpy().argmax(0)
			return parsing
Ejemplo n.º 3
0
def train(fintune_model, data_root, respth):

    # dataset
    n_classes = 19
    n_img_per_gpu = 16
    n_workers = 8
    cropsize = [448, 448]

    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
    # sampler = torch.utils.data.distributed.DistributedSampler(ds)
    dl = DataLoader(ds,
                    batch_size=n_img_per_gpu,
                    shuffle=True,
                    num_workers=n_workers,
                    pin_memory=True,
                    drop_last=True)

    # model
    ignore_idx = -100

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    net = BiSeNet(n_classes=n_classes)
    net = net.to(device)

    if os.access(fintune_model, os.F_OK) and (fintune_model
                                              is not None):  # checkpoint
        chkpt = torch.load(fintune_model, map_location=device)
        net.load_state_dict(chkpt)
        print('load fintune model : {}'.format(fintune_model))
    else:
        print('no fintune model')

    score_thres = 0.7
    n_min = n_img_per_gpu * cropsize[0] * cropsize[1] // 16
    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)

    ## optimizer
    momentum = 0.9
    weight_decay = 5e-4
    lr_start = 1e-2
    max_epoch = 1000

    optim = Optimizer.SGD(net.parameters(),
                          lr=lr_start,
                          momentum=momentum,
                          weight_decay=weight_decay)

    ## train loop
    msg_iter = 50
    loss_avg = []
    st = glob_st = time.time()
    # diter = iter(dl)
    epoch = 0
    flag_change_lr_cnt = 0  # 学习率更新计数器
    init_lr = lr_start  # 学习率

    best_loss = np.inf
    loss_mean = 0.  # 损失均值
    loss_idx = 0.  # 损失计算计数器

    print('start training ~')
    it = 0
    for epoch in range(max_epoch):
        net.train()
        # 学习率更新策略
        if loss_mean != 0.:
            if best_loss > (loss_mean / loss_idx):
                flag_change_lr_cnt = 0
                best_loss = (loss_mean / loss_idx)
            else:
                flag_change_lr_cnt += 1

                if flag_change_lr_cnt > 30:
                    init_lr = init_lr * 0.1
                    set_learning_rate(optimizer, init_lr)
                    flag_change_lr_cnt = 0

        loss_mean = 0.  # 损失均值
        loss_idx = 0.  # 损失计算计数器

        for i, (im, lb) in enumerate(dl):

            im = im.cuda()
            lb = lb.cuda()
            H, W = im.size()[2:]
            lb = torch.squeeze(lb, 1)

            optim.zero_grad()
            out, out16, out32 = net(im)
            lossp = LossP(out, lb)
            loss2 = Loss2(out16, lb)
            loss3 = Loss3(out32, lb)
            loss = lossp + loss2 + loss3

            loss_mean += loss.item()
            loss_idx += 1.

            loss.backward()
            optim.step()

            if it % msg_iter == 0:

                print('epoch <{}/{}> -->> <{}/{}> -> iter {} : loss {:.5f}, loss_mean :{:.5f}, best_loss :{:.5f},lr :{:.6f},batch_size : {}'.\
                format(epoch,max_epoch,i,int(ds.__len__()/n_img_per_gpu),it,loss.item(),loss_mean/loss_idx,best_loss,init_lr,n_img_per_gpu))
                # print(msg)

                if (it) % 500 == 0:
                    state = net.module.state_dict() if hasattr(
                        net, 'module') else net.state_dict()
                    torch.save(state, respth + '/model/face_parse_latest.pth')
                    # evaluate(dspth='./images', cp='{}_iter.pth'.format(it))
            it += 1
        torch.save(state,
                   respth + '/model/face_parse_epoch_{}.pth'.format(epoch))
Ejemplo n.º 4
0
class BISENET:
    def __init__(self, model_path, csv_path):
        # retrieve label info
        self.label_info = get_label_info(csv_path)

        # build model and load weight
        self.model = BiSeNet(12, 'resnet18')
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')
        self.model.load_state_dict(torch.load(model_path))
        self.model.to(self.device).eval()

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        # retrieve person color
        self.person_color = self.label_info['Pedestrian'][:-1]

    def predict_on_image(self, image):
        # transform image to tensor
        image = self.transform(image[:, :, ::-1]).to(self.device)

        # prediction map
        predict = self.model(image.unsqueeze(0)).squeeze()

        # encode to class index
        predict = reverse_one_hot(predict).cpu().numpy()

        # encode to color code
        predict = colour_code_segmentation(predict,
                                           self.label_info).astype(np.uint8)

        # get bbox output
        predict, bboxes, num_people = self.bbox_output(predict)
        return predict, bboxes, num_people

    def bbox_output(self, predict):
        # get a binary mask with persons color white and background black
        person_mask = np.zeros(predict.shape, dtype=np.uint8)
        person_mask[np.all(predict == self.person_color,
                           axis=-1)] = [255, 255, 255]
        person_mask = cv2.cvtColor(person_mask, cv2.COLOR_BGR2GRAY)

        # label the mask image with connected-components algorithm
        label_image = label(person_mask)

        # find the bbox regions
        regions = regionprops(label_image)

        bboxes = []
        num_people = [0]
        i = 1

        for props in regions:
            if props.area > 100:
                minr, minc, maxr, maxc = props.bbox
                bboxes += [props.bbox]
                num_people += [i]
                predict = cv2.rectangle(predict, (minc, minr), (maxc, maxr),
                                        (0, 255, 0), 2, cv2.LINE_AA)
                predict = cv2.putText(predict, f'person{i}', (minc, minr - 10),
                                      cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                      (0, 255, 0), 2, cv2.LINE_AA)
                i += 1

        return predict, bboxes, num_people[-1]