def test_train(self): num_classses = 2 net = facebox.FaceBox(num_classes=num_classses) facebox_box_coder = facebox.FaceBoxCoder(net) C, H, W = (3, 1024, 1024) x = Variable(torch.randn(1, C, H, W)) boxes = torch.from_numpy( np.array([(0, 0, 100, 100), (25, 25, 125, 125), (200, 200, 250, 250), (0, 0, 300, 300)], dtype=np.float32)) boxes /= torch.Tensor([W, H, W, H]).expand_as(boxes) # norm to [0-1] labels = torch.from_numpy(np.array([1, 1, 1, 1], dtype=np.long)) loc_targets, cls_targets = facebox_box_coder.encode(boxes, labels) loc_targets = loc_targets[None, :] cls_targets = cls_targets[None, :] # print('loc_targets.size():{}'.format(loc_targets.size())) # print('cls_targets.size():{}'.format(cls_targets.size())) # optimizer = optim.SGD(net.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4) optimizer = optim.Adam(net.parameters(), lr=1e-5, weight_decay=5e-4) criterion = facebox.FaceBoxLoss(num_classes=num_classses) for epoch in range(1): loc_preds, cls_preds = net(x) # print('loc_preds.size():{}'.format(loc_preds.size())) # print('cls_preds.size():{}'.format(cls_preds.size())) optimizer.zero_grad() loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() optimizer.step()
def test_encode(self): pass C, H, W = (3, 300, 300) net = facebox.FaceBox(num_classes=2) facebox_data_coder = facebox.FaceBoxCoder(net) boxes = torch.from_numpy( np.array([(0.4531, 0.1200, 0.6465, 0.4567)], dtype=np.float32)) labels = torch.from_numpy(np.array([1], dtype=np.int32)) loc_targets, conf_targets = facebox_data_coder.encode(boxes, labels)
def demo(): num_classses = 2 net = facebox.FaceBox(num_classes=num_classses) facebox_box_coder = facebox.FaceBoxCoder(net) net.load_state_dict( torch.load('weight/facebox.pt', map_location=lambda storage, loc: storage)) net.eval() cap = cv2.VideoCapture(0) while True: # images_np = cv2.imread('13_Interview_Interview_2_People_Visible_13_52.jpg') retval, images_np = cap.read() images = cv2.resize(images_np, (1024, 1024)) images = torch.from_numpy(images.transpose((2, 0, 1))) images = images.float().div(255) images = Variable(torch.unsqueeze(images, 0), volatile=True) loc_preds, conf_preds = net(images) loc = loc_preds[0, :, :] conf = conf_preds[0, :, :] boxes, labels, probs = facebox_box_coder.decode( loc, F.softmax(conf).data) # print('boxes:{}'.format(boxes)) # print('labels:{}'.format(labels)) print('probs:{}'.format(probs)) img_h, img_w, img_c = images_np.shape print('images_np.shape:{}'.format(images_np.shape)) for box_id, box in enumerate(boxes): prob = probs[box_id] box_x1 = box[0] * img_w box_y1 = box[1] * img_h box_x2 = box[2] * img_w box_y2 = box[3] * img_h print('({},{})->({},{})'.format(box_x1, box_y1, box_x2, box_y2)) cv2.rectangle(images_np, (box_x1, box_y1), (box_x2, box_y2), (255, 0, 0)) cv2.putText(images_np, str(prob), (box_x1, box_y1), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 0.4, (0, 255, 0)) cv2.imshow('images_np', images_np) key = cv2.waitKey(1) if key == 27: break cap.release()
def test_decode(self): C, H, W = (3, 1024, 1024) net = facebox.FaceBox(num_classes=2) facebox_data_coder = facebox.FaceBoxCoder(net) facebox_default_boxes = facebox_data_coder.default_boxes print('facebox_default_boxes.size():{}'.format( facebox_default_boxes.size())) # print('facebox_default_boxes:{}'.format(facebox_default_boxes)) # locs和confs刚好只有一个batch locs = torch.load('../data/loc.pt') confs = torch.load('../data/conf.pt') # print('locs:', locs) # print('confs:', confs) loc = locs[0, :, :] conf = confs[0, :, :] loc_np = loc.data.numpy() conf_np = conf.data.numpy() print('loc.size():{}'.format(loc.size())) print('conf.size():{}'.format(conf.size())) boxes, labels, probs = facebox_data_coder.decode( loc, F.softmax(conf).data) print('boxes:{}'.format(boxes)) print('labels:{}'.format(labels)) print('probs:{}'.format(probs)) import cv2 img = cv2.imread('../obama.jpg') img_h, img_w, img_c = img.shape for box in boxes: box_x1 = box[0] * img_w box_y1 = box[1] * img_h box_x2 = box[2] * img_w box_y2 = box[3] * img_h cv2.rectangle(img, (box_x1, box_y1), (box_x2, box_y2), (255, 0, 0)) cv2.imshow('img', img) cv2.waitKey()
def test(): num_classses = 2 net = facebox.FaceBox(num_classes=num_classses) facebox_box_coder = facebox.FaceBoxCoder(net) root = os.path.expanduser('~/Data/WIDER') train_dataset = wider_face_loader.WiderFaceLoader( root=root, split='train', boxcoder=facebox_box_coder) train_dataloader = data.DataLoader(train_dataset, batch_size=1, shuffle=True) net.load_state_dict( torch.load('weight/facebox.pt', map_location=lambda storage, loc: storage)) net.eval() for epoch in range(1): for train_id, (images, loc_targets, conf_targets) in enumerate(train_dataloader): # images = Variable(images) images_np = cv2.imread('obama.jpg') images = cv2.resize(images_np, (1024, 1024)) images = torch.from_numpy(images.transpose((2, 0, 1))) images = images.float().div(255) images = Variable(torch.unsqueeze(images, 0), volatile=True) loc_preds, conf_preds = net(images) loc = loc_preds[0, :, :] conf = conf_preds[0, :, :] loc_np = loc.data.numpy() conf_np = conf.data.numpy() # image_np = images[0, :, :, :].data.numpy() # image_np = image_np.transpose((1, 2, 0)) # print(image_np.dtype) print(images_np.shape) # cv2.imshow('img', image_np) # cv2.waitKey() boxes, labels, probs = facebox_box_coder.decode( loc, F.softmax(conf).data) print('boxes:{}'.format(boxes)) print('labels:{}'.format(labels)) print('probs:{}'.format(probs)) img_h, img_w, img_c = images_np.shape for box in boxes: box_x1 = box[0] * img_w box_y1 = box[1] * img_h box_x2 = box[2] * img_w box_y2 = box[3] * img_h print('({},{})->({},{})'.format(box_x1, box_y1, box_x2, box_y2)) cv2.rectangle(images_np, (box_x1, box_y1), (box_x2, box_y2), (255, 0, 0)) cv2.imshow('images_np', images_np) cv2.waitKey() print('loc_preds.size():{}'.format(loc_preds.size())) print('conf_preds.size():{}'.format(conf_preds.size())) # print('loc_targets.size():{}'.format(loc_targets.size())) # print('conf_targets.size():{}'.format(conf_targets.size())) break
def train(): vis = visdom.Visdom() num_classses = 2 net = facebox.FaceBox(num_classes=num_classses) if os.path.exists('weight/facebox.pt'): net.load_state_dict( torch.load('weight/facebox.pt', map_location=lambda storage, loc: storage)) facebox_box_coder = facebox.FaceBoxCoder(net) root = os.path.expanduser('~/Data/WIDER') train_dataset = wider_face_loader.WiderFaceLoader( root=root, boxcoder=facebox_box_coder) train_dataloader = data.DataLoader(train_dataset, batch_size=1, shuffle=True) # optimizer = optim.SGD(net.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4) optimizer = optim.Adam(net.parameters(), lr=1e-5, weight_decay=1e-4) criterion = facebox.FaceBoxLoss(num_classes=num_classses) for epoch in range(100): loss_epoch = 0 loss_avg_epoch = 0 data_count = 0 for train_id, (images, loc_targets, conf_targets) in enumerate(train_dataloader): # data_count = train_id+1 images = Variable(images) loc_preds, conf_preds = net(images) # print('loc_preds.size():{}'.format(loc_preds.size())) # print('conf_preds.size():{}'.format(conf_preds.size())) # print('loc_targets.size():{}'.format(loc_targets.size())) # print('conf_targets.size():{}'.format(conf_targets.size())) optimizer.zero_grad() loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets) loss_numpy = loss.data.numpy() loss_numpy = np.expand_dims(loss_numpy, axis=0) if not np.isinf(loss_numpy.sum()): loss_epoch += loss_numpy data_count += 1 else: data_count = 0 loss_epoch = 0 loss.backward() optimizer.step() # print('loss_numpy:', loss_numpy) # print('loss_epoch:', loss_epoch) # print('loss_numpy:{},loss_epoch:{}'.format(loss_numpy, loss_epoch)) if not np.isinf(loss_numpy.sum()): win = 'loss' win_res = vis.line(X=np.ones(1) * train_id, Y=loss_numpy, win=win, update='append') if win_res != win: vis.line(X=np.ones(1) * train_id, Y=loss_numpy, win=win) # 50个batch显示一次作为平均值 if data_count == 30: loss_avg_epoch = loss_epoch / (30 * 1.0) loss_avg_epoch = np.expand_dims(loss_avg_epoch, axis=0) print('loss_avg_epoch:', loss_avg_epoch) win = 'loss_epoch' win_res = vis.line(X=np.ones(1) * (epoch * 30 + train_id / 30), Y=loss_avg_epoch, win=win, update='append') if win_res != win: vis.line(X=np.ones(1) * (epoch * 30 + train_id / 30), Y=loss_avg_epoch, win=win) data_count = 0 loss_epoch = 0 # loss_avg_epoch = loss_epoch / (data_count * 1.0) # print('loss_avg_epoch:', loss_avg_epoch) # 关闭清空一个周期的loss win = 'loss' vis.close(win) if not os.path.exists('weight/'): os.mkdir('weight') print('saving model ...') torch.save(net.state_dict(), 'weight/facebox.pt')