def train(model, train_loader, criterion, epoch, vis): model.train() batch_loss = 0 for batch_idx, sample_batched in enumerate(train_loader): data = sample_batched['image'] target = sample_batched['mask'] data, target = Variable(data.type(opt.dtype)), Variable( target.type(opt.dtype)) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() batch_loss += loss.data[0] if (batch_idx + 1) % opt.plot_every == 0: ori_img_ = inverse_normalize(at.tonumpy(data[0])) target_ = at.tonumpy(target[0]) pred_ = at.tonumpy(output[0]) vis.img('gt_img', ori_img_) vis.img('gt_mask', target_) vis.img('pred_mask', (pred_ >= 0.5).astype(np.float32)) batch_loss /= (batch_idx + 1) print('epoch: ' + str(epoch) + ', train loss: ' + str(batch_loss)) with open('logs.txt', 'a') as file: file.write('epoch: ' + str(epoch) + ', train loss: ' + str(batch_loss) + '\n') vis.plot('train loss', batch_loss)
def eval(dataloader, faster_rcnn, vis, test_num=10000): pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(dataloader)): # plot groud truth bboxes sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, [sizes]) img = imgs.cuda().float() ori_img_ = inverse_normalize(at.tonumpy(img[0])) pred_img = visdom_bbox(ori_img_, at.tonumpy(pred_bboxes_[0]), at.tonumpy(pred_labels_[0]).reshape(-1), at.tonumpy(pred_scores_[0])) vis.img('test_pred_img', pred_img) gt_bboxes += list(gt_bboxes_.numpy()) gt_labels += list(gt_labels_.numpy()) gt_difficults += list(gt_difficults_.numpy()) pred_bboxes += pred_bboxes_ pred_labels += pred_labels_ pred_scores += pred_scores_ if ii == test_num: break result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) return result
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) # img, bbox, label, scale = dataset[0] # 返回的img是被scale后的图像,可能已经被随机翻转了 # 返回的 bbox 按照 ymin xmin ymax xmax 排列 # H, W = size(im) # 对于一张屏幕上显示的图片,a,b,c,d 代表 4 个顶点 # a ... b ymin # . . # c ... d ymax H高度 y的范围在 [0, H-1] 间 # xmin xmax # W宽度 x的范围在 [0, W-1] 间 print('load data') dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn) if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) for epoch in range(opt.epoch): for ii, (img, bbox_, label_, scale) in (enumerate(dataloader)): print('step: ', ii) scale = at.scalar(scale) img, bbox, label = img.float(), bbox_, label_ img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if ((ii + 1) % opt.plot_every == 0) and (epoch > 50): # 运行多少步以后再predict一次,epoch跑的太少的话根本预测不准什么东西 # if os.path.exists(opt.debug_file): # ipdb.set_trace() # plot groud truth bboxes 画出原本的框 ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) # gt_img np类型 范围是 [0, 1] 间 3 x H x W # 这里要将 gt_img 这个带框,带标注的图像保存或者显示出来 # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]))
def imgflip(img, bbox, x_flip=True, y_flip=True): imgs = at.tonumpy(img[0]) if y_flip: imgs = imgs[:, ::-1, :] if x_flip: imgs = imgs[:, :, ::-1] # print imgs imgs = np.expand_dims(imgs, axis=0) return inverse_normalize(imgs)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # ori_img_ = (at.tonumpy(img[0])) losses = trainer.get_meter_data() print(losses) write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png') _bboxes = trainer.faster_rcnn.predict([ori_img_], visualize=True) _bboxes = at.tonumpy(_bboxes[0]) # plot predicted bboxes write_image(ori_img_, _bboxes, 'pred.png') print('saved an image') if epoch == 13: break
def train(individual, **kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNN_mine(individual) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() ori_img_ = inverse_normalize(at.tonumpy(img[0])) _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] best_path = None if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay individual.accuracy = best_map
def eval_mAP(trainer, val_loader): tqdm.monitor_interval = 0 mAP = [] for ii, sample in tqdm(enumerate(val_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) else: img_id, img, scale = sample['img_id'], sample['image'], sample[ 'scale'] bbox = np.zeros((1, 0, 4)) label = np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if bbox is None: # continue scale = at.scalar(scale) ori_img_ = inverse_normalize(at.tonumpy(img[0])) pred_boxes, pred_labels, pred_scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_boxes = pred_boxes[0] pred_labels = pred_labels[0] pred_scores = pred_scores[0] bbox = at.tonumpy(bbox[0]) # Rescale back C, H, W = ori_img_.shape ori_img_ = transform.resize(ori_img_, (C, H * (1 / scale), W * (1 / scale)), mode='reflect') o_H, o_W = H * (1 / scale), W * (1 / scale) pred_boxes = resize_bbox(pred_boxes, (H, W), (o_H, o_W)) bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) mAP.append(map_iou(bbox, pred_boxes, pred_scores)) # if ii>=100: # break mAP = np.array(mAP) mAP = mAP[mAP != np.array(None)].astype(np.float32) return np.mean(mAP)
def run_test(model, test_loader): pred_masks = [] img_ids = [] images = [] for batch_idx, sample_batched in tqdm(enumerate(test_loader)): data, img_id = sample_batched['image'], sample_batched['img_id'] data = Variable(data.type(opt.dtype), volatile=True) output = model.forward(data) # output = (output > 0.5) output = at.tonumpy(output) for i in range(0, output.shape[0]): pred_mask = np.squeeze(output[i]) id = img_id[i] pred_mask = (pred_mask >= 0.5).astype(np.float32) pred_masks.append(pred_mask) img_ids.append(id) ori_img_ = inverse_normalize(at.tonumpy(data[i])) images.append(ori_img_) return img_ids, images, pred_masks
def input_visual(imgs, boxes, labels): imgs = inverse_normalize(at.tonumpy(imgs.squeeze())) / 255 plt.figure(figsize=(8, 8)) plt.imshow(imgs.transpose(1, 2, 0)) input_boxes = boxes.reshape(-1, 4) w = input_boxes[:, 3] - input_boxes[:, 1] h = input_boxes[:, 2] - input_boxes[:, 0] for i in range(input_boxes.shape[0]): plt.gca().add_patch( Rectangle(input_boxes[i][[1, 0]], w[i], h[i], fill=False, edgecolor='r')) plt.text( input_boxes[i][1], input_boxes[i][0], dv.VOC_BBOX_LABEL_NAMES[labels.reshape(-1, len(input_boxes))[0][i]]) plt.axis("off") plt.show()
def train(**kwargs): opt._parse(kwargs) #获得config设置信息 dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集 print('load data') dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集 test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络 print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器 if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重 trainer.load(opt.load_path) #训练器加载权重 print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用 lr_ = opt.lr #得到预设的学习率 for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) #进行类别处理得到scale(待定) #bbox是gt_box坐标(ymin, xmin, ymax, xmax) #label是类别的下标VOC_BBOX_LABEL_NAMES #img是图片,代码仅支持batch_size=1的训练 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练 trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型 if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次 trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率 log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #可视化内容,(跳过) if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型 best_map = eval_result['map'] #保存模型的map信息 best_path = trainer.save(best_map=best_map) #调用保存模型函数 if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练 trainer.load(best_path) #加载模型 trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率 lr_ = lr_ * opt.lr_decay #获得当前学习率 if epoch == 13: #13个epoch停止训练 break
im_path_clone = str(img_id) # save_path = _data.save_dir + 'f0rcnn' + im_path.split('/')[-1] save_path = _data.save_dir + im_path_clone.split('/')[-1] + '.jpg' save_path_adv = _data.save_dir_adv + im_path_clone.split( '/')[-1] + '.jpg' save_path_perturb = _data.save_dir_perturb + 'frcnn_perturb_' + im_path_clone.split( '/')[-1] + '.jpg' if not os.path.exists(_data.save_dir): os.makedirs(_data.save_dir) if not os.path.exists(_data.save_dir_adv): os.makedirs(_data.save_dir_adv) if not os.path.exists(_data.save_dir_perturb): os.makedirs(_data.save_dir_perturb) ori_img_ = inverse_normalize(at.tonumpy(img[0])) # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_],\ # new_score=quality, visualize=True) before = time.time() adv_img, perturb, distance = trainer.attacker.perturb_mD_img( img, save_perturb=save_path_perturb, rois=rois, roi_scores=roi_scores) after = time.time() generate_time = after - before total_time = total_time + generate_time total_distance = total_distance + distance adv_img_ = inverse_normalize(at.tonumpy(adv_img[0])) perturb_ = inverse_normalize(at.tonumpy(perturb[0])) del adv_img, perturb, img
def train_val(): print('load data') train_loader, val_loader = get_train_val_loader( opt.root_dir, batch_size=opt.batch_size, val_ratio=0.1, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() # faster_rcnn = FasterRCNNResNet50() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() tqdm.monitor_interval = 0 for ii, sample in tqdm(enumerate(train_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda( ) img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) if bbox.size == 0: continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]), at.tonumpy(label[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) mAP = eval_mAP(trainer, val_loader) trainer.vis.plot('val_mAP', mAP) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(mAP), str(trainer.get_meter_data())) trainer.vis.log(log_info) if mAP > best_map: best_map = mAP best_path = trainer.save(best_map=best_map) if epoch == opt.epoch - 1: best_path = trainer.save() if (epoch + 1) % 10 == 0: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): # opt._parse(kwargs) print('load data') dataloader = get_train_loader(opt.root_dir, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, sample in tqdm(enumerate(dataloader)): if len(sample.keys()) == 5: img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox_.cuda( ), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if label.size == 0: # continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch % 10 == 0: best_path = trainer.save(best_map=best_map)
def cutmix_generate(img, scale, paste_scale, paste_img, attention_map, bboxes, labels, paste_bboxes, paste_labels, info, bbox_drop=True, rescale=True, device='cuda', threshold=0.7, overlap_threshold=0.7): src_img = inverse_normalize(at.tonumpy(img.squeeze())) src_img = torch.from_numpy(src_img).to(device) target_img = inverse_normalize(at.tonumpy(paste_img.squeeze())) target_img = torch.from_numpy(target_img.squeeze().copy()).to(device) target_img_copy = target_img.clone() src_size = src_img.shape target_size = target_img.shape img_size = [ target_size[1] if target_size[1] > src_size[1] else src_size[1], target_size[2] if target_size[2] > src_size[2] else src_size[2] ] if target_size[1] >= img_size[1] and target_size[2] >= src_size[2]: new_scale = paste_scale else: new_scale = scale attention_map = torch.nn.functional.interpolate( attention_map.unsqueeze(0).unsqueeze(0), img_size).squeeze() mask = torch.zeros_like(attention_map).to(device) # print(attention_map.max(), attention_map.min(), attention_map.mean()) mean_threshold = attention_map.mean() mask[attention_map > mean_threshold] = 1 mask[attention_map <= mean_threshold] = 0 # mask表示重要的区域 # print(torch.sum(mask==1)/mask.shape[0]/mask.shape[1]) # if torch.sum(mask)/mask.shape[0]/mask.shape[1] > 0.9: # info["use_cutmix"] = 0 # res_img = pytorch_normalze(target_img_copy/255).unsqueeze(dim=0) # return res_img, torch.from_numpy(paste_bboxes), torch.from_numpy(paste_labels), paste_scale, info mask = mask.int() x_scale = img_size[0] / src_size[1] y_scale = img_size[1] / src_size[2] x_scale_p = img_size[0] / target_size[1] y_scale_p = img_size[1] / target_size[2] h, w = img_size src_img = torch.nn.functional.interpolate(src_img.unsqueeze(0), img_size).squeeze() target_img = torch.nn.functional.interpolate(target_img.unsqueeze(0), img_size).squeeze() if len(bboxes.shape) == 2 and bboxes.shape[1] == 4: pass else: bboxes = bboxes.reshape([-1, 4]) bboxes = bboxes.int() labels = labels.reshape(-1, ) ori_mask = torch.zeros(src_img.shape[1:]).to(device) new_bboxes = [] new_labels = [] paste_bboxes_cp = paste_bboxes.copy() bboxes_cp = bboxes.clone().detach() res_img = target_img.clone() b_h = (bboxes[:, 3] - bboxes[:, 1]).unsqueeze(dim=1) b_w = (bboxes[:, 2] - bboxes[:, 0]).unsqueeze(dim=1) b = torch.cat([b_h, b_w], axis=1) areas = torch.prod(b, 1) _, index = torch.sort(areas) for i in index: bboxes_cp[i, [0, 2]] = (bboxes[i, [0, 2]] * x_scale).int() bboxes_cp[i, [1, 3]] = (bboxes[i, [1, 3]] * y_scale).int() ymin, xmin, ymax, xmax = bboxes_cp[i].int() x0 = ymin.item() x1 = ymax.item() y0 = xmin.item() y1 = xmax.item() ori_mask[x0:x1, y0:y1] = 1 ori_mask = ori_mask.int() area = (x1 - x0) * (y1 - y0) # print(area/w/h) portion = torch.sum(mask[x0:x1, y0:y1] & ori_mask[x0:x1, y0:y1]) / area if bbox_drop: remain_flag = True if np.random.rand(1) > 0.2 else False else: remain_flag = True if rescale: rescale_flag = True if np.random.rand(1) > 0.0 else False else: rescale_flag = False if rescale_flag: if np.random.randn(1) >= 0.5: rescale_conf = np.random.randint(low=10, high=1000, size=1)[0] * 0.0001 + 1 else: rescale_conf = np.random.rand(1)[0] # print(rescale_conf) if portion < threshold or not remain_flag or area / h / w > 0.8: # 不考虑占比大的bbox: mask[x0:x1, y0:y1] = 0 for j in range(len(bboxes_cp)): x = np.maximum(x0, bboxes_cp[j][0].detach().cpu().numpy()) y = np.maximum(y0, bboxes_cp[j][1].detach().cpu().numpy()) x = np.minimum(x1, bboxes_cp[j][2].detach().cpu().numpy()) y = np.minimum(y1, bboxes_cp[j][3].detach().cpu().numpy()) mask[x:x, y:y] = 1 else: index_y = torch.where( mask[ymin:ymax, xmin:xmax] == 1)[0].unique().sort()[0] + ymin index_x = torch.where(mask[ymin:ymax, xmin:xmax] == 1)[1].unique( ).sort()[0] + xmin # bboxes尽量取得靠近图像边缘 mask_m = (mask[ymin:ymax, xmin:xmax] & ori_mask[ymin:ymax, xmin:xmax]).bool() mask_m = torch.stack([mask_m, mask_m, mask_m], dim=0) sub_obj = src_img[:, ymin:ymax, xmin:xmax] # 取出来的目标 if len(index_y) < 2 or len(index_x) < 2: print('!', index_y, type(index_y)) if rescale_flag and 0 < ymin * rescale_conf < h and 0 < ymax * rescale_conf < h and 0 < xmin * rescale_conf < w and 0 < xmax * rescale_conf < w and ( ymax - ymin) * rescale_conf * (xmax - xmin) * rescale_conf > 400: index_y = (index_y * rescale_conf).int() index_x = (index_x * rescale_conf).int() bbox_w = xmax.item() - xmin.item() bbox_h = ymax.item() - ymin.item() # print("before", ymin.item(), ymax.item(), xmin.item(), xmax.item()) ymin = int(ymin * rescale_conf) ymax = int(ymax * rescale_conf) xmin = int(xmin * rescale_conf) xmax = int(xmax * rescale_conf) # print("enlarge", ymin, ymax, xmin, xmax) # print(bbox_h * rescale_conf, bbox_w *rescale_conf) resize_f = tvtsf.Resize( [int(bbox_h * rescale_conf), int(bbox_w * rescale_conf)]) r_mask_m = resize_f(mask_m).bool() # print(r_mask_m.shape, res_img[:, ymin:ymin+int(bbox_h*rescale_conf), xmin:xmin+int(bbox_w*rescale_conf)].shape) res_img[:, ymin:ymin + int(bbox_h * rescale_conf), xmin:xmin + int(bbox_w * rescale_conf)][r_mask_m] = resize_f( sub_obj)[r_mask_m] else: # print(ymax*rescale_conf, ymin*rescale_conf, h) # print(xmax*rescale_conf, xmin*rescale_conf, w) res_img[:, ymin:ymax, xmin:xmax][mask_m] = sub_obj[mask_m] new_bboxes.append([ index_y[0].item(), index_x[0].item(), index_y[-1].item(), index_x[-1].item() ]) new_labels.append(labels[i]) new_bboxes_cp = np.concatenate([new_bboxes]) mask = mask & ori_mask for i in range(paste_bboxes.shape[0]): paste_bboxes_cp[i, [0, 2]] = (paste_bboxes[i, [0, 2]] * x_scale_p).astype(np.int) paste_bboxes_cp[i, [1, 3]] = (paste_bboxes[i, [1, 3]] * y_scale_p).astype(np.int) if len(new_bboxes_cp) > 0: ymin, xmin, ymax, xmax = paste_bboxes_cp[i] area = (ymax - ymin) * (xmax - xmin) overlap = torch.sum(mask[int(ymin):int(ymax), int(xmin):int(xmax)] == 1) / area # print(overlap) # lt = np.maximum(paste_bboxes_cp[i:i+1:,:2], new_bboxes_cp[:,:2]) # rb = np.minimum(paste_bboxes_cp[i:i+1:,2:], new_bboxes_cp[:,2:]) # overlap = np.prod(rb - lt, axis=1) / area if (overlap > overlap_threshold).any(): continue # if np.sum(mask[ymix:ymax, xmax:xmax] == 1) / area > 0.6 : # continue new_bboxes.append(paste_bboxes_cp[i]) new_labels.append(paste_labels[i]) if len(new_bboxes) == 0: new_bboxes = None else: new_bboxes = torch.from_numpy(np.concatenate( [new_bboxes])).float().to(device).unsqueeze(dim=0) new_labels = torch.Tensor(new_labels).int().to(device).unsqueeze(dim=0) mask = mask.int() new_mask = mask # res_img = src_img.mul(new_mask) + target_img.mul(1 - new_mask) area = torch.sum(new_mask == 1).float().item() / ( torch.sum(ori_mask == 1) + 1e-8).float().item() # compute iou res_img = res_img / 255 res_img = res_img.float() # if area < 0.6 or len(new_bboxes)==0: # print(area) if len(new_bboxes) == 0: info["use_cutmix"] = 0 res_img = pytorch_normalze(target_img_copy / 255).unsqueeze(dim=0) return res_img, torch.from_numpy(paste_bboxes), torch.from_numpy( paste_labels), paste_scale, info else: info["use_cutmix"] = 1 res_img = pytorch_normalze(res_img).unsqueeze(dim=0) return res_img, new_bboxes, new_labels, new_scale, info
def train(opt, faster_rcnn, dataloader, val_dataloader, test_dataloader, trainer, lr_, best_map, start_epoch): trainer.train() for epoch in range(start_epoch, start_epoch+opt.epoch): trainer.reset_meters() pbar = tqdm(enumerate(dataloader), total=len(dataloader)) for ii, (img, bbox_, label_, scale) in pbar: # Currently configured to predict (y_min, x_min, y_max, x_max) # bbox_tmp = bbox_.clone() # bbox_ = transform_bbox(bbox_) scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) if ii % 100 == 0: rpnloc = losses[0].cpu().data.numpy() rpncls = losses[1].cpu().data.numpy() roiloc = losses[2].cpu().data.numpy() roicls = losses[3].cpu().data.numpy() tot = losses[4].cpu().data.numpy() pbar.set_description(f"Epoch: {epoch} | Batch: {ii} | RPNLoc Loss: {rpnloc:.4f} | RPNclc Loss: {rpncls:.4f} | ROIloc Loss: {roiloc:.4f} | ROIclc Loss: {roicls:.4f} | Total Loss: {tot:.4f}") if (ii+1) % 1000 == 0: eval_result = eval(val_dataloader, faster_rcnn, test_num=1000) trainer.vis.plot('val_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] val_log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(val_log_info) print("Evaluation Results on Val Set ") print(val_log_info) print("\n\n") if (ii + 1) % 100 == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) print(trainer.get_meter_data()) try: ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) plt.show() # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) plt.show() trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) except: print("Cannot display images") if (ii + 1) % 100 == 0: eval_result = eval(val_dataloader, faster_rcnn, test_num=25) trainer.vis.plot('val_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str( eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # Save after every epoch epoch_path = trainer.save(epoch, best_map=0) eval_result = eval(test_dataloader, faster_rcnn, test_num=1000) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] test_log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(test_log_info) print("Evaluation Results on Test Set ") print(test_log_info) print("\n\n") if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = epoch_path if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(): # Get the dataset for phase in phases: if phase == 'train': if dataset_name == 'hollywood': train_data_list_path = os.path.join( opt.hollywood_dataset_root_path, 'hollywood_train.idl') train_data_list = utils.get_phase_data_list( train_data_list_path, dataset_name) if dataset_name == 'brainwash': train_data_list_path = os.path.join( opt.brainwash_dataset_root_path, 'brainwash_train.idl') train_data_list = utils.get_phase_data_list( train_data_list_path, dataset_name) elif phase == 'val': if dataset_name == 'hollywood': val_data_list_path = os.path.join( opt.hollywood_dataset_root_path, 'hollywood_val.idl') val_data_list = utils.get_phase_data_list( val_data_list_path, dataset_name) if dataset_name == 'brainwash': val_data_list_path = os.path.join( opt.brainwash_dataset_root_path, 'brainwash_val.idl') val_data_list = utils.get_phase_data_list( val_data_list_path, dataset_name) elif phase == 'test': if dataset_name == 'hollywood': test_data_list_path = os.path.join( opt.hollywood_dataset_root_path, 'hollywood_test.idl') test_data_list = utils.get_phase_data_list( test_data_list_path, dataset_name) if dataset_name == 'brainwash': test_data_list_path = os.path.join( opt.brainwash_dataset_root_path, 'brainwash_test.idl') test_data_list = utils.get_phase_data_list( test_data_list_path, dataset_name) print("Number of images for training: %s" % (len(train_data_list))) print("Number of images for val: %s" % (len(val_data_list))) print("Number of images for test: %s" % (len(test_data_list))) if data_check_flag: utils.check_loaded_data(train_data_list[random.randint( 1, len(train_data_list))]) utils.check_loaded_data(val_data_list[random.randint( 1, len(val_data_list))]) utils.check_loaded_data(test_data_list[random.randint( 1, len(test_data_list))]) # Load the train dataset train_dataset = Dataset(train_data_list) test_dataset = Dataset(val_data_list) print("Load data.") train_dataloader = data_.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1) test_dataloader = data_.DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=1) # Initialize the head detector. head_detector_vgg16 = Head_Detector_VGG16(ratios=[1], anchor_scales=[8, 16]) print("model construct completed") trainer = Head_Detector_Trainer(head_detector_vgg16).cuda() lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, scale) in enumerate(train_dataloader): scale = at.scalar(scale) img, bbox = img.cuda().float(), bbox_.cuda() img, bbox = Variable(img), Variable(bbox) _, _, _ = trainer.train_step(img, bbox, scale) print("Forward and backward pass done.") if (ii + 1) % opt.plot_every == 0: trainer.vis.plot_many(trainer.get_meter_data()) ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0])) trainer.vis.img('gt_img', gt_img) rois, _ = trainer.head_detector.predict(img, scale=scale, mode='visualize') pred_img = visdom_bbox(ori_img_, at.tonumpy(rois)) trainer.vis.img('pred_img', pred_img) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') avg_test_CorrLoc = eval(test_dataloader, head_detector_vgg16) print("Epoch {} of {}.".format(epoch + 1, opt.epoch)) print(" test average corrLoc accuracy:\t\t{:.3f}".format( avg_test_CorrLoc)) model_save_path = trainer.save(best_map=avg_test_CorrLoc) if epoch == 8: trainer.load(model_save_path) trainer.head_detector.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse( kwargs ) #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面! dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) # testset = TestDataset(opt) # test_dataloader = data_.DataLoader(testset, # batch_size=1, # num_workers=opt.test_num_workers, # shuffle=False, # #pin_memory=True # ) #pin_memory锁页内存,开启时使用显卡的内存,速度更快 faster_rcnn = FasterRCNN(7) print('model construct completed') pdb.set_trace() trainer = Trainer(faster_rcnn).cuda() #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数 for epoch in range(opt.epoch): print('epoch {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() #首先在可视化界面重设所有数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() #可视化画出loss #可视化画出groudtruth bboxes ori_img_ = inverse_normalize(array_tool.tonumpy(img[0])) #可视化画出预测bboxes # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面 _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=False) #eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{}, loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): """ 训练 """ #解析命令行参数,设置配置文件参数 opt._parse(kwargs) #初始化Dataset参数 dataset = Dataset(opt) print('load data') #data_ 数据加载器(被重命名,pytorch方法) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) #初始化TestDataset参数 testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) #新建一个FasterRCNNVGG16 faster_rcnn = FasterRCNNVGG16() print('model construct completed') #新建一个trainer,并将网络模型转移到GPU上 #将FasterRCNNVGG16模型传入 trainer = FasterRCNNTrainer(faster_rcnn).cuda() #如果存在,加载训练好的模型 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #可视化类别 vis为visdom加载器 trainer.vis.text(dataset.db.label_names, win='labels') #best_map存放的是 最优的mAP的网络参数 best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): #trainer方法 将平均精度的元组 和 混淆矩阵的值置0 trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): #调整数据的形状 scale:缩放倍数(输入图片尺寸 比上 输出数据的尺寸) #1.6左右 供模型训练之前将模型规范化 scale = at.scalar(scale) #将数据集转入到GPU上 #img 1x3x800x600 一张图片 三通道 大小800x600(不确定) #bbox 1x1x4 #label 1x1 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #将数据转为V 变量,以便进行自动反向传播 img, bbox, label = Variable(img), Variable(bbox), Variable(label) #训练并更新可学习参数(重点*****) 前向+反向,返回losses trainer.train_step(img, bbox, label, scale) #进行多个数据的可视化 if (ii + 1) % opt.plot_every == 0: #进入调试模式 if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss 画五个损失 trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes img[0],是压缩0位,形状变为[3x800x600] #反向归一化,将img反向还原为原始图像,以便用于显示 ori_img_ = inverse_normalize(at.tonumpy(img[0])) #通过原始图像,真实bbox,真实类别 进行显示 gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes #对原图进行预测,得到预测的bbox label scores _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) #通过原始图像、预测的bbox,预测的类别 以及概率 进行显示 pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) #rpn混淆矩阵 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #roi混淆矩阵 trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) #使用验证集对当前的网络进行验证,返回一个字典,key值有AP,mAP eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #如果当前的map值优于best_map,则将当前值赋给best_map。将当前模型保留 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) #如果epoch到达9时,加载 当前的最优模型,并将学习率按lr_decay衰减调低 if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay #可视化验证集的test_map 和log信息 trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
# This can be removed once PyTorch 0.4.x is out. # See https://discuss.pytorch.org/t/question-about-rebuild-tensor-v2/14560 import torch._utils try: torch._utils._rebuild_tensor_v2 except AttributeError: def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) tensor.requires_grad = requires_grad tensor._backward_hooks = backward_hooks return tensor torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 #%% faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('./checkpoints/fasterrcnn_09031352_0') opt.caffe_pretrain=True # this model was trained from caffe-pretrained model # Plot examples on training set dataset = RSNADataset(opt.root_dir) for i in range(0, len(dataset)): sample = dataset[i] img = sample['image'] ori_img_ = inverse_normalize(at.tonumpy(img)) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = vis_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]))
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 best_ap = np.array([0.] * opt.label_number) lr_ = opt.lr vis = trainer.vis starttime = datetime.datetime.now() for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix roi_cm = at.totensor(trainer.roi_cm.conf, False).float() trainer.vis.img('roi_cm', roi_cm) eval_result = eval(test_dataloader, faster_rcnn, vis=vis, test_num=opt.test_num) best_ap = dict(zip(opt.VOC_BBOX_LABEL_NAMES, eval_result['ap'])) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: print('roi_cm=\n', trainer.roi_cm.value()) plot_confusion_matrix(trainer.roi_cm.value(), classes=('animal', 'plant', 'rock', 'background'), normalize=False, title='Normalized Confusion Matrix') best_map = eval_result['map'] best_path = trainer.save(best_map=best_map, best_ap=best_ap) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay # if epoch == 13: # break endtime = datetime.datetime.now() train_consum = (endtime - starttime).seconds print("train_consum=", train_consum)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) # 300w_dataset = FaceLandmarksDataset() print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ pin_memory=True,\ num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') attacker = attacks.DCGAN(train_adv=False) if opt.load_attacker: attacker.load(opt.load_attacker) print('load attacker model from %s' % opt.load_attacker) trainer = VictimFasterRCNNTrainer(faster_rcnn, attacker, attack_mode=True).cuda() # trainer = VictimFasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') # eval_result = eval(test_dataloader, faster_rcnn, test_num=2000) best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters(adv=True) for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): ipdb.set_trace() scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) trainer.vis.plot_many(trainer.get_meter_data(adv=True)) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) if trainer.attacker is not None: adv_img = trainer.attacker.perturb(img) adv_img_ = inverse_normalize(at.tonumpy(adv_img[0])) _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [adv_img_], visualize=True) adv_pred_img = visdom_bbox( adv_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('adv_img', adv_pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if (ii) % 500 == 0: best_path = trainer.save(epochs=epoch, save_rcnn=True) if epoch % 2 == 0: best_path = trainer.save(epochs=epoch)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) testset_all = TestDataset_all(opt, 'test2') test_all_dataloader = data_.DataLoader(testset_all, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) tsf = Transform(opt.min_size, opt.max_size) faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() print('model construct completed') # 加载训练过的模型,在config配置路径就可以了 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #提取蒸馏知识所需要的软标签 if opt.is_distillation == True: opt.predict_socre = 0.3 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, scale, id_) in tqdm(enumerate(dataloader)): if len(gt_bboxes_) == 0: continue sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_, features_ = trainer.faster_rcnn.predict(imgs, [ sizes]) img_file = os.path.join( opt.voc_data_dir, 'JPEGImages', id_[0] + '.jpg') ori_img = read_image(img_file, color=True) img, pred_bboxes_, pred_labels_, scale_ = tsf( (ori_img, pred_bboxes_[0], pred_labels_[0])) #去除软标签和真值标签重叠过多的部分,去除错误的软标签 pred_bboxes_, pred_labels_, pred_scores_ = py_cpu_nms( gt_bboxes_[0], gt_labels_[0], pred_bboxes_, pred_labels_, pred_scores_[0]) #存储软标签,这样存储不会使得GPU占用过多 np.save('label/' + str(id_[0]) + '.npy', pred_labels_) np.save('bbox/' + str(id_[0]) + '.npy', pred_bboxes_) np.save('feature/' + str(id_[0]) + '.npy', features_) np.save('score/' + str(id_[0]) + '.npy', pred_scores_) opt.predict_socre = 0.05 t.cuda.empty_cache() # visdom 显示所有类别标签名 trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): print('epoch=%d' % epoch) # 重置混淆矩阵 trainer.reset_meters() # tqdm可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator), # 是一个快速、扩展性强 for ii, (img, sizes, bbox_, label_, scale, id_) in tqdm(enumerate(dataloader)): if len(bbox_) == 0: continue scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 训练的就这一步 下面的都是打印的信息 # 转化成pytorch能够计算的格式,转tensor格式 if opt.is_distillation == True: #读取软标签 teacher_pred_labels = np.load( 'label/' + str(id_[0]) + '.npy') teacher_pred_bboxes = np.load( 'bbox/' + str(id_[0]) + '.npy') teacher_pred_features_ = np.load( 'feature/' + str(id_[0]) + '.npy') teacher_pred_scores = np.load( 'score/' + str(id_[0]) + '.npy') #格式转换 teacher_pred_bboxes = teacher_pred_bboxes.astype(np.float32) teacher_pred_labels = teacher_pred_labels.astype(np.int32) teacher_pred_scores = teacher_pred_scores.astype(np.float32) #转成pytorch格式 teacher_pred_bboxes_ = at.totensor(teacher_pred_bboxes) teacher_pred_labels_ = at.totensor(teacher_pred_labels) teacher_pred_scores_ = at.totensor(teacher_pred_scores) teacher_pred_features_ = at.totensor(teacher_pred_features_) #使用GPU teacher_pred_bboxes_ = teacher_pred_bboxes_.cuda() teacher_pred_labels_ = teacher_pred_labels_.cuda() teacher_pred_scores_ = teacher_pred_scores_.cuda() teacher_pred_features_ = teacher_pred_features_.cuda() # 如果dataset.py 中的Transform 设置了图像翻转,就要使用这个判读软标签是否一起翻转 if(teacher_pred_bboxes_[0][1] != bbox[0][0][1]): _, o_C, o_H, o_W = img.shape teacher_pred_bboxes_ = flip_bbox( teacher_pred_bboxes_, (o_H, o_W), x_flip=True) losses = trainer.train_step(img, bbox, label, scale, epoch, teacher_pred_bboxes_, teacher_pred_labels_, teacher_pred_features_, teacher_pred_scores) else: trainer.train_step(img, bbox, label, scale, epoch) # visdom显示的信息 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) gt_img = visdom_bbox(ori_img_, at.tonumpy(teacher_pred_bboxes_), at.tonumpy(teacher_pred_labels_), at.tonumpy(teacher_pred_scores_)) trainer.vis.img('gt_img_all', gt_img) # plot predicti bboxes _bboxes, _labels, _scores, _ = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 混淆矩阵 # rpn confusion matrix(meter) trainer.vis.text( str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.text( str(trainer.roi_cm.value().tolist()), win='roi_cm') # trainer.vis.img('roi_cm', at.totensor( # trainer.roi_cm.value(), False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{},ap:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['ap']), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # 保存最好结果并记住路径 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 20: trainer.save(best_map='20') result = eval(test_all_dataloader, trainer.faster_rcnn, test_num=5000) print('20result={}'.format(str(result))) # trainer.load(best_path) # result=eval(test_all_dataloader,trainer.faster_rcnn,test_num=5000) # print('bestmapresult={}'.format(str(result))) break # 每10轮加载前面最好权重,并且减少学习率 if epoch % 20 == 15: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt.parse(kwargs) print('loading data...') trainset = TrainDataset(opt) train_dataloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = torch.utils.data.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) print('constructing model...') if opt.model == 'vgg16': faster_rcnn = FasterRCNNVGG16() elif opt.model == 'resnet101': faster_rcnn = FasterRCNNResNet101() trainer = FasterRCNNTrainer(faster_rcnn).cuda() print('loading model...') if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) else: print('no pretrained model found') trainer.vis.text('<br/>'.join(trainset.db.label_names), win='labels') print('start training...') best_map = 0.0 lr_ = opt.lr for epoch in range(opt.epoch): print("epoch : %d training ..." % epoch) trainer.reset_meters() for ii, (imgs_, bboxes_, labels_, scales_) in tqdm(enumerate(train_dataloader)): scales = at.scalar(scales_) imgs, bboxes, labels = imgs_.cuda().float(), bboxes_.cuda( ), labels_.cuda() trainer.train_step(imgs, bboxes, labels, scales) if (ii + 1) % opt.plot_every == 0: # plot loss trainer.vis.plot_many(trainer.losses_data()) # generate plotted image img = inverse_normalize(at.tonumpy(imgs_[0])) # plot groud truth bboxes bbox = at.tonumpy(bboxes_[0]) label = at.tonumpy(labels_[0]) img_gt = visdom_bbox(img, bbox, label) trainer.vis.img('ground truth', img_gt) bboxes__, labels__, scores__ = trainer.faster_rcnn.predict( [img], visualize=True) # plot prediction bboxes bbox = at.tonumpy(bboxes__[0]) label = at.tonumpy(labels__[0]).reshape(-1) score = at.tonumpy(scores__[0]) img_pred = visdom_bbox(img, bbox, label, score) trainer.vis.img('prediction', img_pred) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if ii + 1 == opt.train_num: break print("epoch : %d evaluating ..." % epoch) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = vis_dict( { 'epoch': '%s/%s' % (str(epoch), str(opt.epoch)), 'lr': lr_, 'map': float(eval_result['map']), }, trainer.losses_data()) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map="%.4f" % best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): # *变量名, 表示任何多个无名参数, 它是一个tuple;**变量名, 表示关键字参数, 它是一个dict opt._parse(kwargs) # 识别参数,传递过来的是一个字典,用parse来解析 dataset = Dataset(opt) # 作者自定义的Dataset类 print('读取数据中...') # Dataloader 定义了一次获取批次数据的方法 dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) # PyTorch自带的DataLoader类,生成一个多线程迭代器来迭代dataset, 以供读取一个batch的数据 testset = TestDataset(opt, split='trainval') # 测试集loader test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() # 网络定义 print('模型构建完毕!') trainer = FasterRCNNTrainer( faster_rcnn).cuda() # 定义一个训练器,返回loss, .cuda()表示把返回的Tensor存入GPU if opt.load_path: # 如果要加载预训练模型 trainer.load(opt.load_path) print('已加载预训练参数 %s' % opt.load_path) else: print("未引入预训练参数, 随机初始化网络参数") trainer.vis.text(dataset.db.label_names, win='labels') # 显示labels标题 best_map = 0 # 定义一个best_map for epoch in range(opt.epoch): # 对于每一个epoch trainer.reset_meters() # 重置测各种测量仪 # 对每一个数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) # 转化为标量 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda( ) # 存入GPU img, bbox, label = Variable(img), Variable(bbox), Variable( label) # 转换成变量以供自动微分器使用 # TODO trainer.train_step(img, bbox, label, scale) # 训练一步 if (ii + 1) % opt.plot_every == 0: # 如果到达"每多少次显示" if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # 使用测试数据集来评价模型(此步里面包含预测信息) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save( best_map=best_map) # 好到一定程度就存储模型, 存储在checkpoint文件夹内 if epoch == 9: # 到第9轮的时候读取模型, 并调整学习率 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # if epoch == 13: # 到第14轮的时候停止训练 # break trainer.save(best_map=best_map)
def train(**kwargs): opt._parse( kwargs ) #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面! dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, #pin_memory=True ) #pin_memory锁页内存,开启时使用显卡的内存,速度更快 faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.dataset.label_names, win='labels') best_map = 0 lr_ = opt.lr # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数 for epoch in range(opt.epoch): print('epoch {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() #首先在可视化界面重设所有数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = array_tool.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() #可视化画出loss trainer.vis.plot_many(trainer.get_meter_data()) #可视化画出groudtruth bboxes ori_img_ = inverse_normalize(array_tool.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]), array_tool.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) #可视化画出预测bboxes # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面 _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox( ori_img_, array_tool.tonumpy(_bboxes[0]), array_tool.tonumpy(_labels[0]).reshape(-1), array_tool.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来 trainer.vis.img( 'roi_cm', array_tool.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{}, loss{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #将学习率以及map等信息及时显示更新 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) print('load data') dataset = Dataset(opt) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count(), anchor_scales=[1]) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: old_state = trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, test_num=num_eval_images) print('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if opt.load_path and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print('Model was trained until epoch {}, continuing with epoch {}'.format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 #trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): lr_ = opt.lr * (opt.lr_decay ** (epoch // opt.epoch_decay)) trainer.faster_rcnn.set_lr(lr_) print('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale) if opt.use_cuda: img, bbox, label = img.cuda().float(), bbox_.float().cuda(), label_.float().cuda() else: img, bbox, label = img.float(), bbox_.float(), label_.float() img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss #trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) #print('Current total loss {}'.format(losses[-1].tolist())) trainer.vis.plot('train_total_loss', losses[-1].tolist()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save(epoch=epoch) print("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) eval_result = eval(test_dataloader, faster_rcnn, test_num=min(opt.test_num, len(testset))) print(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'],epoch=epoch) print("After epoch {}: snapshotted to {}".format(epoch, best_path)) trainer.vis.plot('test_map', eval_result['map'])
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from {}'.format(opt.load_path)) # trainer.vis.text(dataset.db.label_names, win='labels') adversary = None if opt.flagadvtrain: print("flagadvtrain turned: Adversarial training!") atk = PGD.PGD(trainer, eps=16/255, alpha=3/255, steps=4) # atk = torchattacks.PGD(trainer.faster_rcnn, eps=16, alpha=3, steps=4) # adversary = PGDAttack(trainer.faster_rcnn, loss_fn=nn.CrossEntropyLoss(), eps=16, nb_iter=4, eps_iter=3, # rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) best_map = 0 lr_ = opt.lr normal_total_loss = [] adv_total_loss = [] total_time = 0.0 total_imgs = 0 true_imgs = 0 for epoch in range(opt.epoch): trainer.reset_meters() once = True for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) temp_img = copy.deepcopy(img).cuda() img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if opt.flagadvtrain: before_time = time.time() img = atk(img, bbox, label, scale) after_time = time.time() # with ctx_noparamgrad_and_eval(trainer.faster_rcnn): # img = adversary.perturb(img, label) # print("Adversarial training done!") total_time += after_time - before_time # print("Normal training starts\n") # trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # adv_total_loss.append(trainer.get_meter_data()["total_loss"]) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes temp_ori_img_ = inverse_normalize(at.tonumpy(temp_img[0])) # img2jpg(temp_ori_img_, "imgs/orig_images/", "gt_img{}".format(ii)) # temp_gt_img = visdom_bbox(temp_ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # plt.figure() # c, h, w = temp_gt_img.shape # plt.imshow(np.reshape(temp_gt_img, (h, w, c))) # plt.savefig("imgs/temp_orig_images/temp_gt_img{}".format(ii)) # plt.close() ori_img_ = inverse_normalize(at.tonumpy(img[0])) # print("GT Label is {} and pred_label is {}".format(label_[0],)) # img2jpg(ori_img_, "imgs/adv_images/", "adv_img{}".format(ii)) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # plt.figure() # c, h, w = gt_img.shape # plt.imshow(np.reshape(gt_img, (h, w, c))) # plt.savefig("imgs/orig_images/gt_img{}".format(ii)) # plt.close() # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) fig1 = plt.figure() ax1 = fig1.add_subplot(1,1,1) # final1 = (at.tonumpy(img[0].cpu()).transpose(1,2,0).astype(np.uint8)) final1 = (ori_img_.transpose(1, 2, 0).astype(np.uint8)) ax1.imshow(final1) gt_img = visdom_bbox(ax1,at.tonumpy(_bboxes[0]),at.tonumpy(_labels[0])) fig1.savefig("imgs/adv_images/adv_img{}".format(ii)) plt.close() _temp_bboxes, _temp_labels, _temp_scores = trainer.faster_rcnn.predict([temp_ori_img_], visualize=True) fig2 = plt.figure() ax2 = fig2.add_subplot(1, 1, 1) final2 = (temp_ori_img_.transpose(1, 2, 0).astype(np.uint8)) # final2 = (at.tonumpy(temp_img[0].cpu()).transpose(1, 2, 0).astype(np.uint8)) ax2.imshow(final2) gt_img = visdom_bbox(ax2, at.tonumpy(_temp_bboxes[0]), at.tonumpy(_temp_labels[0])) fig2.savefig("imgs/orig_images/gt_img{}".format(ii)) plt.close() # img2jpg(temp_gt_img, "imgs/orig_images/", "gt_img{}".format(ii)) # print("gt labels is {}, pred_orig_labels is {} and pred_adv_labels is {}".format(label_, _labels, _temp_labels)) total_imgs += 1 if len(_temp_labels) == 0: continue if _labels[0].shape[0] == _temp_labels[0].shape[0] and (_labels[0] == _temp_labels[0]).all() is True: true_imgs += 1 # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # # print("Shape of temp_orig_img_ is {}".format(temp_ori_img_.shape)) # temp_pred_img = visdom_bbox(temp_ori_img_, # at.tonumpy(_temp_bboxes[0]), # at.tonumpy(_temp_labels[0]).reshape(-1), # at.tonumpy(_temp_scores[0])) # # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # fig = plt.figure() # ax1 = fig.add_subplot(2,1,1) # ax1.plot(normal_total_loss) # ax2 = fig.add_subplot(2,1,2) # ax2.plot(adv_total_loss) # fig.savefig("losses/both_loss{}".format(epoch)) # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num, # flagadvtrain=opt.flagadvtrain, adversary=atk)# adversary=adversary) # trainer.vis.plot('test_map', eval_result['map']) # lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), # str(eval_result['map']), # str(trainer.get_meter_data())) # print(log_info) # # trainer.vis.log(log_info) # # if eval_result['map'] > best_map: # best_map = eval_result['map'] # best_path = trainer.save(best_map=best_map) # if epoch == 9: # trainer.load(best_path) # trainer.faster_rcnn.scale_lr(opt.lr_decay) # lr_ = lr_ * opt.lr_decay if epoch == 0: break if epoch == 13: break print("Total number of images is {}".format(total_imgs)) print("True images is {}".format(true_imgs)) print("Total time is {}".format(total_time)) print("Avg time is {}".format(total_time/total_imgs))
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) log = SummaryWriter(log_dir=opt.log_dir) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) # 配置文件 # cfg = VGConf() # 训练数据集 # trainset = Dataset(cfg) # valset = Dataset(cfg, valid=True) # 加载数据 # print("load data2..") # dataloader = DataLoader(dataloader, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) # valloader = DataLoader(test_dataloader, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr idx = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) # 获取损失值 losses = trainer.get_meter_data() log.add_scalars(main_tag='Training(batch)', tag_scalar_dict=losses, global_step=idx) idx = idx+1 if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) print(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) log.add_scalar(tag='mAP', scalar_value=eval_result['map'], global_step=epoch) # trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) # trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def eval(dataloader, faster_rcnn, trainer, dataset, test_num=10000): with torch.no_grad(): print('Running validation') # Each predicted box is organized as :`(y_{min}, x_{min}, y_{max}, x_{max}), # Where y corresponds to the height and x to the width pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() image_ids = list() for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_, image_ids_) in tqdm(enumerate(dataloader), total=test_num): sizes = [ sizes[0].detach().numpy().tolist()[0], sizes[1].detach().numpy().tolist()[0] ] pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, [sizes]) # We have to add .copy() here to allow for the loaded image to be released after each iteration gt_bboxes += list(gt_bboxes_.numpy().copy()) gt_labels += list(gt_labels_.numpy().copy()) gt_difficults += list(gt_difficults_.numpy().copy()) image_ids += list(image_ids_.numpy().copy()) pred_bboxes += [pp.copy() for pp in pred_bboxes_] pred_labels += [pp.copy() for pp in pred_labels_] pred_scores += [pp.copy() for pp in pred_scores_] if ii == test_num: break result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) if opt.validate_only: save_path = '{}_detections.npz'.format(opt.load_path) np.savez(save_path, pred_bboxes=pred_bboxes, pred_labels=pred_labels, pred_scores=pred_scores, gt_bboxes=gt_bboxes, gt_labels=gt_labels, gt_difficults=gt_difficults, image_ids=image_ids, result=result) else: ori_img_ = inverse_normalize(at.tonumpy(imgs[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(gt_bboxes[-1]), at.tonumpy(gt_labels[-1]), label_names=dataset.get_class_names() + ['BG']) trainer.vis.img('test_gt_img', gt_img) # plot predicti bboxes pred_img = visdom_bbox(ori_img_, at.tonumpy(pred_bboxes[-1]), at.tonumpy(pred_labels[-1]).reshape(-1), at.tonumpy(pred_scores[-1]), label_names=dataset.get_class_names() + ['BG']) trainer.vis.img('test_pred_img', pred_img) del imgs, gt_bboxes_, gt_labels_, gt_difficults_, image_ids_, pred_bboxes_, pred_labels_, pred_scores_ torch.cuda.empty_cache() return result