def __init__(self, arg=None, vis=None, id=None): super().__init__() self.arg = ARG() if arg is None else arg self.vis = VIS(save_dir=arg.save.tensorboard) if vis is None else vis self.id = 0 if id is None else id self.global_step = 0 self.hyper_parameter = dict() self.hyper_parameter['lr'] = 10**-1.6 self.dataset = Hand_Dataset( self.arg, self.arg.data.dataset.train.root, im_size=self.arg.model.net.im_size, relative_path_txt=self.arg.data.dataset.train.txt_path) self.dataloader = DataLoader( self.dataset, batch_size=self.arg.data.dataloader.train.batch_size, shuffle=self.arg.data.dataloader.train.shuffle, drop_last=self.arg.data.dataloader.train.drop_last) # print('train dataloader len = ', len(self.dataloader)) self.net = YoloV3_Micro(class_num=self.arg.model.net.num_class, anchors_num=self.arg.model.net.num_anchor) input_to_net = torch.rand(1, 3, self.arg.model.net.im_size[0], self.arg.model.net.im_size[1]) # self.net.forward(input_to_net) self.vis.model(self.net, input_to_net) if self.arg.model.weight_path is not None: self.net.load_state_dict( one_card_model(torch.load(self.arg.model.weight_path))) # self.net = torch.nn.DataParallel(self.net) self.loss_func = YOLOLoss() # loss_func = torch.nn.DataParallel(loss_func) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.hyper_parameter['lr']) # self.lr_decay = ExponentialDecay(0.1, 10000, 1e-4) self.bbox_format = list() self.bbox_format.append( Bbox_Format(self.arg.model.net.im_size, self.arg.model.net.feature_size[0], self.arg.model.flt_anchor, self.arg.model.net.num_class, self.arg.model.mask_iou_threshold)) self.bbox_format.append( Bbox_Format(self.arg.model.net.im_size, self.arg.model.net.feature_size[1], self.arg.model.flt_anchor, self.arg.model.net.num_class, self.arg.model.mask_iou_threshold)) self.valider = Valider(self.arg) self.tester = Tester(self.arg)
def __init__(self, arg): super().__init__() self.arg = arg self.dataset = Hand_Dataset(arg, arg.data.dataset.test.root, im_size=arg.model.net.im_size, mode='test') self.dataloader = DataLoader(self.dataset, batch_size=1, shuffle=False) self.bbox_format = list() self.bbox_format.append(Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) self.bbox_format.append(Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold))
def valid(arg, valid_loader, net, loss_func, epoch, vis): bbox_format = list() bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) net.eval() valid_loss_list = list() for batch_idx, batch_data_dict in enumerate(valid_loader): batch_image = batch_data_dict['image'] batch_label = batch_data_dict['label'] batch_image_path = batch_data_dict['image_path'] batch_image, batch_label = list( map(lambda x: x.to(arg.train.device), (batch_image, batch_label))) net_out = net.forward(batch_image) predict_list = list() target_list = list() losses_list = list() whole_loss = 0 for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict( feature, num_anchors=arg.model.net.num_anchor) predict_list.append(batch_predict) batch_target = bbox_format[model_out_idx].to_model(batch_label) target_list.append(batch_target) batch_predict = list( filter(lambda x: x.shape != torch.Size([0]), batch_predict)) batch_target = list( filter(lambda x: x.shape != torch.Size([0]), batch_target)) layer_loss = loss_func.forward(batch_predict, batch_target) losses_list.append(layer_loss) whole_loss += torch.mean(layer_loss[0]) / len(net_out) valid_loss_list.append(whole_loss) vis.line('valid/whole_loss:', sum(valid_loss_list) / len(valid_loss_list))
def train(arg: ARG, train_loader, valid_loader, net, loss_func, optimizer, lr_decay, epoch, vis: VIS=None): bbox_format = list() bbox_format.append(Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) bbox_format.append(Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) net.train() whole_loss_list = list() for batch_idx, batch_data_dict in enumerate(train_loader): vis.iteration_counter = batch_idx print('Epoch:{}, step/all: {}/{}'.format(epoch, batch_idx, len(train_loader))) batch_image = batch_data_dict['image'] batch_label = batch_data_dict['label'] batch_image_path = batch_data_dict['image_path'] batch_image, batch_label = list(map(lambda x: x.to(arg.train.device), (batch_image, batch_label))) lr = lr_decay.get_lr(global_step=vis.step) optimizer.param_groups[0]['lr'] = lr optimizer.zero_grad() net_out = net.forward(batch_image) predict_list = list() target_list = list() losses_list = list() whole_loss = 0 for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict(feature, num_anchors=arg.model.net.num_anchor) predict_list.append(batch_predict) batch_target = bbox_format[model_out_idx].to_model(batch_label) target_list.append(batch_target) batch_predict = list(filter(lambda x: x.shape != torch.Size([0]), batch_predict)) batch_target = list(filter(lambda x: x.shape != torch.Size([0]), batch_target)) layer_loss = loss_func.forward(batch_predict, batch_target) losses_list.append(layer_loss) whole_loss += torch.mean(layer_loss[0])/len(net_out) whole_loss.backward() optimizer.step() vis.line('log_lr', np.log10(lr)) vis.line('whole_loss', y=whole_loss.item()) whole_loss_list.append(whole_loss.item()) print('to step '+str(batch_idx)+': ', whole_loss_list)
def tst(arg, test_loader, net, write_image=False): bbox_format = list() bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) net.eval() target_dict = dict() predict_list = list() print('testing......') image_counter = 0 for batch_idx, batch_data_dict in tqdm(enumerate(test_loader)): # if batch_idx > 10: # break batch_image = batch_data_dict['image'] batch_label = batch_data_dict['label'] batch_image_path = batch_data_dict['image_path'] np_im = torch_im_to_cv2_im(batch_image[0]).copy() label_list = list() for label_box in batch_label[0]: cls, xmid, ymid, w, h = label_box.tolist() if xmid + ymid + w + h > 0.000001: im_w, im_h = arg.model.net.im_size label_list.append( [cls, xmid * im_w, ymid * im_h, w * im_w, h * im_h]) xmin, ymin, xmax, ymax = cxcywh_to_x1y1x2y2(xmid, ymid, w, h) xmin, ymin, xmax, ymax = int(xmin * im_w), int( ymin * im_h), int(xmax * im_w), int(ymax * im_h) np_im = cv2.rectangle(np_im, (xmin, ymin), (xmax, ymax), (0, 255, 0)) target_dict[str(batch_idx) + '.jpg'] = label_list batch_image, batch_label = list( map(lambda x: x.to(arg.train.device), (batch_image, batch_label))) net_out = net.forward(batch_image) predict_bbox_list = list() confidence_list = list() for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict( feature, num_anchors=arg.model.net.num_anchor) batch_predict_bbox, batch_confidence = bbox_format[ model_out_idx].to_bbox(*batch_predict) predict_bbox_list.append( batch_predict_bbox[0]) # notice: batch size == 1 confidence_list.append( batch_confidence[0]) # notice: batch size == 1 predict_bboxes = torch.cat(predict_bbox_list, 0) predict_bboxes = cxcywh_to_x1y1x2y2(predict_bboxes) confidences = torch.cat(confidence_list, 0) bboxes, confidences = confidence_filter( predict_bboxes, confidences, arg.model.out_confidence_filter_threshold) bboxes, confidences = none_max_suppression(bboxes, confidences, arg.model.nms_iou_threshold) for box, conf in zip(bboxes, confidences): xmin, ymin, xmax, ymax = box.tolist() conf = conf.item() predict_list.append([ str(batch_idx) + '.jpg', 0, conf, (xmin + xmax) // 2, (ymin + ymax) // 2, xmax - xmin, ymax - ymin ]) if write_image: for box, conf in zip(bboxes, confidences): xmin, ymin, xmax, ymax = box.tolist() conf = conf.item() xmin = round(xmin) xmax = round(xmax) ymin = round(ymin) ymax = round(ymax) np_im = cv2.rectangle(np_im, (xmin, ymin), (xmax, ymax), (255, 255, 0)) np_im = cv2.putText(np_im, str(round(conf, 3)), ((xmin + xmax) // 2, (ymin + ymax) // 2), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) cv2.imshow('', np_im) cv2.waitKey() os.makedirs('image_out', exist_ok=True) cv2.imwrite('image_out/' + str(image_counter) + '.jpg', np_im) image_counter += 1 rec, prec, ap = det_eval(predict_list, target_dict, 0, 0.5) return ap
def tst(): arg = ARG() net = YoloV3_Tiny(class_num=arg.model.net.num_class, anchors_num=arg.model.net.num_anchor) net = net.to(arg.train.device) if arg.model.weight_path is not None: net.load_state_dict(one_card_model(torch.load(arg.model.weight_path))) net = torch.nn.DataParallel(net) net.eval() bbox_format = list() bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) for image_path in os.listdir(arg.test.image_dir): abs_image_path = os.path.join(arg.test.image_dir, image_path) image = cv2.imread(abs_image_path) assert image is not None torch_im, pad_hwc = yolo_mode_cv2_im_to_torch_im( image, arg.model.net.im_size) batch_image = torch_im.unsqueeze(0) # make up as a batch with torch.no_grad(): net_out = net.forward(batch_image) predict_bbox_list = list() confidence_list = list() for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict(feature, num_anchors=len( arg.model.anchor)) batch_predict_bbox, batch_confidence = bbox_format[ model_out_idx].to_bbox(*batch_predict) predict_bbox_list.append( batch_predict_bbox[0]) # notice: batch size == 1 confidence_list.append( batch_confidence[0]) # notice: batch size == 1 predict_bboxes = torch.cat(predict_bbox_list, 0) predict_bboxes = cxcywh_to_x1y1x2y2(predict_bboxes) confidences = torch.cat(confidence_list, 0) bboxes, confidences = confidence_filter( predict_bboxes, confidences, arg.model.out_confidence_filter_threshold) bboxes, confidences = none_max_suppression(bboxes, confidences, arg.model.nms_iou_threshold) np_im = torch_im_to_cv2_im(torch_im).copy() for box in bboxes: x1, y1, x2, y2 = list(map(lambda x: int(x), box)) image_pre_show = cv2.rectangle(np_im, (x1, y1), (x2, y2), (255, 255, 0)) # test_out_dir = arg.test.out_dir # cv2.imwrite(test_out_dir+'/'+image_path, image_pre_show) cv2.imshow(image_path, image_pre_show) cv2.waitKey()
def train(arg: ARG, train_loader, valid_loader, net, loss_func, optimizer, lr_decay, epoch, vis: VIS = None): bbox_format = list() bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.flt_anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) net.train() for batch_idx, batch_data_dict in enumerate(train_loader): vis.iteration_counter = batch_idx print('Epoch:{}, step/all: {}/{}'.format(epoch, batch_idx, len(train_loader))) batch_image = batch_data_dict['image'] batch_label = batch_data_dict['label'] batch_image_path = batch_data_dict['image_path'] batch_image, batch_label = list( map(lambda x: x.to(arg.train.device), (batch_image, batch_label))) lr = lr_decay.get_lr(global_step=vis.step) optimizer.param_groups[0]['lr'] = lr optimizer.zero_grad() net_out = net.forward(batch_image) predict_list = list() target_list = list() losses_list = list() whole_loss = 0 for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict( feature, num_anchors=arg.model.net.num_anchor) predict_list.append(batch_predict) batch_target = bbox_format[model_out_idx].to_model(batch_label) target_list.append(batch_target) batch_predict = list( filter(lambda x: x.shape != torch.Size([0]), batch_predict)) batch_target = list( filter(lambda x: x.shape != torch.Size([0]), batch_target)) layer_loss = loss_func.forward(batch_predict, batch_target) losses_list.append(layer_loss) whole_loss += torch.mean(layer_loss[0]) / len(net_out) whole_loss.backward() optimizer.step() if batch_idx % arg.train.log_iteration_interval == 0: vis.line('log10_lr', np.log10(lr)) predict_bbox_list = list() confidence_list = list() for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict( feature, num_anchors=arg.model.net.num_anchor) batch_predict_bbox, batch_confidence = bbox_format[ model_out_idx].to_bbox(*batch_predict) predict_bbox_list.append(batch_predict_bbox[0]) confidence_list.append(batch_confidence[0]) predict_bboxes = torch.cat(predict_bbox_list, 0) predict_bboxes = cxcywh_to_x1y1x2y2(predict_bboxes) confidences = torch.cat(confidence_list, 0) bboxes, confidences = confidence_filter( predict_bboxes, confidences, arg.model.out_confidence_filter_threshold) np_im = torch_im_to_cv2_im(batch_image[0]).copy() for box in bboxes: x1, y1, x2, y2 = list(map(lambda x: int(x), box)) np_im = cv2.rectangle(np_im, (x1, y1), (x2, y2), (255, 255, 0)) vis.image('_0_predict_before_nms', np_im) bboxes, confidences = none_max_suppression( bboxes, confidences, arg.model.nms_iou_threshold) np_im = torch_im_to_cv2_im(batch_image[0]).copy() for box, conf in zip(bboxes, confidences): x1, y1, x2, y2 = list(map(lambda x: int(x), box.tolist())) np_im = cv2.rectangle(np_im, (x1, y1), (x2, y2), (0, 255, 255)) np_im = cv2.putText(np_im, str(round(conf.item(), 3)), ((x1 + x2) // 2, (y1 + y2) // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) vis.image('_0_predict_after_nms', np_im) image_show = torch_im_to_cv2_im(batch_image[0]).copy() for hand_idx in range(batch_label[0].shape[0]): cx, cy, w, h = ( batch_label[0][hand_idx].data.cpu().numpy()[1:]) x1, y1, x2, y2 = cxcywh_to_x1y1x2y2(cx, cy, w, h) x1, y1, x2, y2 = list( map(lambda x: int(x * arg.model.net.im_size[0]), (x1, y1, x2, y2))) image_show = cv2.rectangle(image_show, (x1, y1), (x2, y2), (0, 255, 0)) vis.image('_0_target', image_show) vis.line('_0_whole_loss', y=whole_loss.item()) for layer_idx, (losses, batch_predict, batch_target) in enumerate( zip(losses_list, predict_list, target_list)): mask, negative_mask, tar_dx, tar_dy, tar_w, tar_h, tar_confidence, tar_class = batch_target pre_dx, pre_dy, pre_w, pre_h, pre_confidence, pre_class = batch_predict batch_show_idx = 0 for anchor_idx in range(arg.model.net.num_anchor): vis.image( 'layer' + str(layer_idx) + '_anchor' + str(anchor_idx) + '_confidence/target', tar_confidence[batch_show_idx, anchor_idx]) vis.image( 'layer' + str(layer_idx) + '_anchor' + str(anchor_idx) + '_confidence/predict', pre_confidence[batch_show_idx, anchor_idx]) # vis.image('layer'+str(layer_idx)+'_anchor'+str(anchor_idx)+'_class/target', tar_class[batch_show_idx, anchor_idx, :, :, 0]) # vis.image('layer'+str(layer_idx)+'_anchor'+str(anchor_idx)+'_class/predict', pre_class[batch_show_idx, anchor_idx]) loss, loss_dx, loss_dy, loss_w, loss_h, loss_confidence, loss_class = losses vis.line(str(layer_idx) + '/loss', y=sum(loss.tolist())) vis.line(str(layer_idx) + '/loss_dx', y=sum(loss_dx.tolist())) vis.line(str(layer_idx) + '/loss_dy', y=sum(loss_dy.tolist())) vis.line(str(layer_idx) + '/loss_w', y=sum(loss_w.tolist())) vis.line(str(layer_idx) + '/loss_h', y=sum(loss_h.tolist())) vis.line(str(layer_idx) + '/loss_confidence', y=sum(loss_confidence.tolist())) # vis.line(str(layer_idx)+'/loss_class', y=sum(loss_class.tolist())) if batch_idx % arg.train.valid_iteration_interval == 1: valid(arg, valid_loader, net, loss_func, epoch, vis) net.train()
def train(arg: ARG, train_loader, net, loss_func, optimizer, epoch, vis: VIS = None): bbox_format = list() bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[0], arg.model.anchor, arg.model.net.num_class, arg.model.mask_iou_threshold)) bbox_format.append( Bbox_Format(arg.model.net.im_size, arg.model.net.feature_size[1], arg.model.anchor, arg.model.net.num_class, arg.model.mask_iou_threshold, arg.model.filter_iou_threshold)) net.train() for batch_idx, batch_data_dict in enumerate(train_loader): vis.iteration_counter = batch_idx print('Epoch:{}, step/all: {}/{}'.format(epoch, batch_idx, len(train_loader))) batch_image = batch_data_dict['image'] batch_label = batch_data_dict['label'] batch_image_path = batch_data_dict['image_path'] batch_image, batch_label = list( map(lambda x: x.to(arg.train.device), (batch_image, batch_label))) optimizer.zero_grad() net_out = net.forward(batch_image) predict_list = list() target_list = list() losses_list = list() whole_loss = 0 for model_out_idx, feature in enumerate(net_out): batch_predict = model_out_to_model_predict(feature, num_anchors=len( arg.model.anchor)) predict_list.append(batch_predict) batch_target = bbox_format[model_out_idx].to_model(batch_label) target_list.append(batch_target) layer_loss = loss_func.forward(batch_predict, batch_target) losses_list.append(layer_loss) whole_loss += torch.mean(layer_loss[0]) / len(net_out) whole_loss.backward() optimizer.step() if batch_idx % arg.train.log_iteration_interval == 0: # # # # # TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TES # batch_predict_bbox, batch_confidence = bbox_format[model_out_idx].to_bbox(*batch_predict) # # image_show = torch_im_to_cv2_im(batch_image[0]).copy() # for pre_idx in range(batch_predict_bbox.shape[1]): # cx, cy, w, h = (batch_predict_bbox[0][pre_idx].data.cpu().numpy()) # if np.sum((cx, cy, w, h)) < 0.000001: # continue # x1, y1, x2, y2 = cxcywh_to_x1y1x2y2(cx, cy, w, h) # x1, y1, x2, y2 = list(map(lambda x: int(x), (x1, y1, x2, y2))) # image_pre_show = cv2.rectangle(image_show, (x1, y1), (x2, y2), (255, 255, 0)) # vis.image('_0_predict'+str(model_out_idx), image_pre_show) # # cv2.imshow('_0_predict'+str(model_out_idx), image_pre_show) # # cv2.waitKey() # # # # # TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST vis.line('_0_whole_loss', y=whole_loss.item()) image_show = torch_im_to_cv2_im(batch_image[0]).copy() for hand_idx in range(batch_label[0].shape[0]): cx, cy, w, h = ( batch_label[0][hand_idx].data.cpu().numpy()[1:]) x1, y1, x2, y2 = cxcywh_to_x1y1x2y2(cx, cy, w, h) x1, y1, x2, y2 = list( map(lambda x: int(x * 160), (x1, y1, x2, y2))) image_show = cv2.rectangle(image_show, (x1, y1), (x2, y2), (0, 255, 0)) vis.image('_0_target', image_show) # cv2.imshow('image_show', image_show) # cv2.waitKey() for layer_idx, (losses, batch_predict, batch_target) in enumerate( zip(losses_list, predict_list, target_list)): mask, negative_mask, tar_dx, tar_dy, tar_w, tar_h, tar_confidence, tar_class = batch_target pre_dx, pre_dy, pre_w, pre_h, pre_confidence, pre_class = batch_predict batch_show_idx = 0 for anchor_idx in range(arg.model.net.num_anchor): vis.image( 'layer' + str(layer_idx) + '_anchor' + str(anchor_idx) + '_confidence/target', tar_confidence[batch_show_idx, anchor_idx]) vis.image( 'layer' + str(layer_idx) + '_anchor' + str(anchor_idx) + '_confidence/predict', pre_confidence[batch_show_idx, anchor_idx]) # vis.image('layer'+str(layer_idx)+'_anchor'+str(anchor_idx)+'_class/target', tar_class[batch_show_idx, anchor_idx, :, :, 0]) # vis.image('layer'+str(layer_idx)+'_anchor'+str(anchor_idx)+'_class/predict', pre_class[batch_show_idx, anchor_idx]) loss, loss_dx, loss_dy, loss_w, loss_h, loss_confidence, loss_class = losses vis.line(str(layer_idx) + '/loss', y=sum(loss.tolist())) vis.line(str(layer_idx) + '/loss_dx', y=sum(loss_dx.tolist())) vis.line(str(layer_idx) + '/loss_dy', y=sum(loss_dy.tolist())) vis.line(str(layer_idx) + '/loss_w', y=sum(loss_w.tolist())) vis.line(str(layer_idx) + '/loss_h', y=sum(loss_h.tolist())) vis.line(str(layer_idx) + '/loss_confidence', y=sum(loss_confidence.tolist())) vis.line(str(layer_idx) + '/loss_class', y=sum(loss_class.tolist()))