def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=2, shuffle=False, \ # pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(7): trainer.reset_meters() for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255 gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], at.tonumpy(bbox_)[0], label_[0].numpy()) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True) pred_img = visdom_bbox( at.tonumpy(ori_img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch==4: trainer.faster_rcnn.scale_lr(opt.lr_decay) eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100) print('eval_result') trainer.save(mAP=eval_result['map'])
from data.dataset import TrainDataset from torch.utils import data as data_ from tqdm import tqdm from model.faster_rcnn_vgg16 import decom_vgg16, VGG16RoIHead import torch from model.region_proposal_network import RegionProposalNetwork import numpy as np from model.faster_rcnn_vgg16 import FasterRCNNVGG16 from trainer import FasterRCNNTrainer data_set = TrainDataset() #data_loader = data_.DataLoader(data_set, batch_size=1, shuffle=False) img, bbox, label, scale = data_set.__getitem__(0) model = FasterRCNNVGG16().cuda() trainer = FasterRCNNTrainer(model) loss = trainer.forward( torch.from_numpy(img[None, :]).cuda(), bbox, label, scale) print(loss) """ roi_locs, roi_scores, rpn_locs, rpn_scores = model.forward(torch.from_numpy(img[None, :]).cuda()) print(roi_locs.shape) print(roi_scores.shape) print(rpn_locs.shape) print(rpn_scores.shape) """ """ extractor, classifier = decom_vgg16() feature_map = extractor.cuda()(torch.from_numpy(img[None, :]).cuda()) print(img.shape)#3, 600, 800 print(feature_map.shape)#1, 512, 37, 50
def train(**kwargs): opt._parse(kwargs) if not VOC: dataset = CsvDataset('/home/artemlyan/data/avito_intro/images/', 'labeled_with_classes.csv') print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) test_dataloader = data_.DataLoader(dataset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) else: dataset = Dataset(opt) print('load data for VOC') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): if not VOC: dataset.set_mode('train') trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): print(img.size(), bbox_.size(), label_.size(), scale.size()) scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) print('pred', _bboxes, 'gt', bbox_[0]) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if not VOC: dataset.set_mode('val') eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) print("eval reuslt:", eval_result) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 30: break
import os import torch as t from utils.config import opt from model.faster_rcnn_vgg16 import FasterRCNNVGG16 from trainer import FasterRCNNTrainer from data.util import read_image from utils.vis_tool import vis_bbox from utils import array_tool as at from matplotlib import pyplot as plt img = read_image('/home/fengkai/dog.jpg') img = t.from_numpy(img)[None] faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('/home/fengkai/PycharmProjects/my-faster-rcnn/checkpoints/fasterrcnn_04231732_0.6941460588341642') opt.caffe_pretrain=False # this model was trained from torchvision-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,visualize=True) vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) plt.show()
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=False, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale, human_box, object_box, action) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() human_box, object_box, action = human_box.cuda(), object_box.cuda(), action.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), # at.tonumpy(action[0]), at.tonumpy(label_[0]) ) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) print(_labels[0]) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def fit_ont_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch): train_util = FasterRCNNTrainer(net, optimizer) total_loss = 0 rpn_loc_loss = 0 rpn_cls_loss = 0 roi_loc_loss = 0 roi_cls_loss = 0 val_toal_loss = 0 for iteration, batch in enumerate(gen): if iteration >= epoch_size: break start_time = time.time() imgs, boxes, labels = batch[0], batch[1], batch[2] with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type( torch.FloatTensor)).cuda() boxes = [ Variable(torch.from_numpy(box).type(torch.FloatTensor)).cuda() for box in boxes ] labels = [ Variable(torch.from_numpy(label).type( torch.FloatTensor)).cuda() for label in labels ] losses = train_util.train_step(imgs, boxes, labels, 1) rpn_loc, rpn_cls, roi_loc, roi_cls, total = losses total_loss += total rpn_loc_loss += rpn_loc rpn_cls_loss += rpn_cls roi_loc_loss += roi_loc roi_cls_loss += roi_cls waste_time = time.time() - start_time print('\nEpoch:' + str(epoch + 1) + '/' + str(Epoch)) print('iter:' + str(iteration) + '/' + str(epoch_size) + ' || total_loss: %.4f|| rpn_loc_loss: %.4f || rpn_cls_loss: %.4f || roi_loc_loss: %.4f || roi_cls_loss: %.4f || %.4fs/step' \ % (total_loss/(iteration+1), rpn_loc_loss/(iteration+1),rpn_cls_loss/(iteration+1),roi_loc_loss/(iteration+1),roi_cls_loss/(iteration+1),waste_time)) print('Start Validation') for iteration, batch in enumerate(genval): if iteration >= epoch_size_val: break imgs, boxes, labels = batch[0], batch[1], batch[2] with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type( torch.FloatTensor)).cuda() boxes = Variable(torch.from_numpy(boxes).type( torch.FloatTensor)).cuda() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)).cuda() train_util.optimizer.zero_grad() losses = train_util.forward(imgs, boxes, labels, 1) _, _, _, _, val_total = losses val_toal_loss += val_total print('Finish Validation') print('\nEpoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) torch.save( model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' % ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1)))
f.close() if img.ndim == 2: # reshape (H, W) -> (1, H, W) return img[np.newaxis] else: # transpose (H, W, C) -> (C, H, W) return img.transpose((2, 0, 1)) if __name__ == '__main__': #1.加载网络 load network opt.env = 'test' opt.caffe_pretrain = True faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load( 'C:/Users/86188/Desktop/simple-faster-rcnn-pytorch-master/checkpoints/fasterrcnn_04250634_0.6951113529274409') print('network loads successs!') # 2.Init the camera # cap = cv2.VideoCapture(0) cap = cv2.VideoCapture('C:/Users/86188/Desktop/simple-faster-rcnn-pytorch-master/test.mp4') cap.set(3, 900) cap.set(4, 900) while 1: ret, frame = cap.read() # 读取每一帧 cv2.imshow('摄像头', frame) # 显示每一帧 k = cv2.waitKey(1) #img = read_image('misc/catdog.jpg')
pred_labels += pred_labels_ pred_scores += pred_scores_ result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) return result if __name__ == "__main__": testset = TestDataset() opt = Option() args = opt.parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu dataloader = data.DataLoader(testset, batch_size=1, num_workers=4, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNN_vgg16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('checkpoints/threat_model.pth') eval_result = eval(dataloader, faster_rcnn, test_num=10000) #eval_result = attack_eval(dataloader, faster_rcnn) print(eval_result['map'])
# This can be removed once PyTorch 0.4.x is out. # See https://discuss.pytorch.org/t/question-about-rebuild-tensor-v2/14560 import torch._utils try: torch._utils._rebuild_tensor_v2 except AttributeError: def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) tensor.requires_grad = requires_grad tensor._backward_hooks = backward_hooks return tensor torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 #%% faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('./checkpoints/fasterrcnn_09031352_0') opt.caffe_pretrain=True # this model was trained from caffe-pretrained model # Plot examples on training set dataset = RSNADataset(opt.root_dir) for i in range(0, len(dataset)): sample = dataset[i] img = sample['image'] ori_img_ = inverse_normalize(at.tonumpy(img)) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = vis_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]))
import os import torch as t import torch.nn as nn from utils.config import opt from model import FasterRCNNVGG16 from trainer import FasterRCNNTrainer from data.util import read_image from utils.vis_tool import vis_bbox from utils import array_tool as at from utils.average import AverageVal import matplotlib.pyplot as plt img = read_image("misc/demo.jpg") img = t.from_numpy(img)[None] logger = AverageVal() faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda() trainer.load("model/chainer_best_model_converted_to_pytorch_0.7053.pth") opt.caffe_pretrain = True # this model was trained from caffe-pretrained model print(img.shape) h = trainer.faster_rcnn.getFeatureMap(img) print(h)
def train(**kwargs): opt._parse(kwargs) carrada = download('Carrada') train_set = Carrada().get('Train') val_set = Carrada().get('Validation') test_set = Carrada().get('Test') train_seqs = SequenceCarradaDataset(train_set) val_seqs = SequenceCarradaDataset(val_set) test_seqs = SequenceCarradaDataset(test_set) train_seqs_loader = data_.DataLoader(train_seqs, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) val_seqs_loader = data_.DataLoader(val_seqs, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) test_seqs_loader = data_.DataLoader(test_seqs, batch_size=1, shuffle=False, # pin_memory=True, num_workers=opt.num_workers) # faster_rcnn = FasterRCNNVGG16(n_fg_class=3) # faster_rcnn = FasterRCNNRESNET101(n_fg_class=3) faster_rcnn = FasterRCNNRESNET18(n_fg_class=3) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() scheduler = ExponentialLR(trainer.faster_rcnn.optimizer, gamma=0.9) if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) writer_path = os.path.join(opt.logs_path, opt.model_name) os.makedirs(writer_path, exist_ok=True) writer = SummaryWriter(writer_path) iteration = 0 best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): print('Processing epoch: {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() for n_seq, sequence_data in tqdm(enumerate(train_seqs_loader)): seq_name, seq = sequence_data path_to_frames = os.path.join(carrada, seq_name[0]) train_frame_set = CarradaDataset(opt, seq, 'box', opt.signal_type, path_to_frames) train_frame_loader = data_.DataLoader(train_frame_set, batch_size=1, shuffle=False, num_workers=opt.num_workers) for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_frame_loader)): iteration += 1 scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img = normalize(img) if opt.debug_step and (iteration+1) % opt.debug_step == 0: trainer.train_step(img, bbox, label, scale, stop=True) else: trainer.train_step(img, bbox, label, scale) if (iteration + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() train_results = trainer.get_meter_data() writer.add_scalar('Losses/rpn_loc', train_results['rpn_loc_loss'], iteration) writer.add_scalar('Losses/rpn_cls', train_results['rpn_cls_loss'], iteration) writer.add_scalar('Losses/roi_loc', train_results['roi_loc_loss'], iteration) writer.add_scalar('Losses/roi_cls', train_results['roi_cls_loss'], iteration) writer.add_scalar('Losses/total', train_results['total_loss'], iteration) if (iteration + 1) % opt.img_every == 0: ori_img_ = at.tonumpy(img[0]) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) gt_img_grid = make_grid(torch.from_numpy(gt_img)) writer.add_image('Ground_truth_img', gt_img_grid, iteration) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], opt.signal_type, visualize=True) # FLAG: vis pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) pred_img_grid = make_grid(torch.from_numpy(pred_img)) writer.add_image('Predicted_img', pred_img_grid, iteration) if opt.train_eval and (iteration + 1) % opt.train_eval == 0: train_eval_result, train_best_iou = eval(train_seqs_loader, faster_rcnn, opt.signal_type) writer.add_scalar('Train/mAP', train_eval_result['map'], iteration) writer.add_scalar('Train/Best_IoU', train_best_iou, iteration) eval_result, best_val_iou = eval(val_seqs_loader, faster_rcnn, opt.signal_type, test_num=opt.test_num) writer.add_scalar('Validation/mAP', eval_result['map'], iteration) writer.add_scalar('Validation/Best_IoU', best_val_iou, iteration) lr_ = scheduler.get_lr()[0] writer.add_scalar('learning_rate', lr_, iteration) log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) print(log_info) if eval_result['map'] > best_map: test_result, test_best_iou = eval(test_seqs_loader, faster_rcnn, opt.signal_type, test_num=opt.test_num) writer.add_scalar('Test/mAP', test_result['map'], iteration) writer.add_scalar('Test/Best_IoU', test_best_iou, iteration) best_map = eval_result['map'] best_test_map = test_result['map'] best_path = trainer.save(best_val_map=best_map, best_test_map=best_test_map) # best_path = trainer.save(best_map=best_map) if (epoch + 1) % opt.lr_step == 0: scheduler.step()
def predict(**kwargs): dsbpredopt._parse(kwargs) dataset = DSBPredictDataset(dsbpredopt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=False, \ pin_memory=True, num_workers=dsbpredopt.num_workers) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if dsbpredopt.load_path: trainer.load(dsbpredopt.load_path) print('load checkpoint from %s' % dsbpredopt.load_path) new_test_ids = [] rles = [] for ii, (imgs, sizes, predicted_mask, id_) in tqdm(enumerate(dataloader)): sizes = [sizes[0][0], sizes[1][0]] pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, [sizes]) #pred_img = visdom_bbox(at.tonumpy(imgs[0]), # at.tonumpy(pred_bboxes_[0]), # at.tonumpy(pred_labels_[0]).reshape(-1), # at.tonumpy(pred_scores_[0])) #pred_mask_img = visdom_bbox(at.tonumpy(predicted_mask[0]), # at.tonumpy(pred_bboxes_[0]), # at.tonumpy(pred_labels_[0]).reshape(-1), # at.tonumpy(pred_scores_[0])) #trainer.vis.img('pred_img', pred_img) #trainer.vis.img('pred_mask_img', pred_mask_img) #input("Press Enter to continue...") predicted_mask_labeled = np.squeeze( at.tonumpy(predicted_mask[0]).copy()) pred_bboxes_ = at.tonumpy(pred_bboxes_[0]).astype(np.uint16) if pred_bboxes_.shape[0] == 0: print(id_[0]) if predicted_mask_labeled.shape[0] != sizes[ 0] or predicted_mask_labeled.shape[1] != sizes[1]: print('wtf') for idx, pred_bbox in enumerate(pred_bboxes_): mask = predicted_mask_labeled[pred_bbox[0]:pred_bbox[2], pred_bbox[1]:pred_bbox[3]] #print(predicted_mask_labeled.shape) #print(pred_bbox[0]) #print(pred_bbox[2]) #print(pred_bbox[1]) #print(pred_bbox[3]) #print(mask) #input("input") if (pred_bbox[2] > sizes[0] or pred_bbox[3] > sizes[1]): print('wtf') mask[mask > 0] = idx + 1 predicted_mask_labeled[pred_bbox[0]:pred_bbox[2], pred_bbox[1]:pred_bbox[3]] = mask predicted_mask_labeled[predicted_mask_labeled == 255] = 0 #print(predicted_mask_labeled) rle = list(prob_to_rles(predicted_mask_labeled)) #print(rle) #exit() #for r_i, rle_i in enumerate(rle): #for r_j, rle_j in enumerate(rle_i): #if r_j % 2 == 0: #if (rle_j-1)%sizes[0]+1+rle_i[r_j+1]-1 >= sizes[0]: #print(rle_j) #print(rle_i[r_j+1]) #print((rle_j-1)%sizes[0]+1+rle_i[r_j+1]-1) #print(sizes[0]) #print('out of size 0') #print(rle[r_i][r_j+1]) #print(r_i) #print(r_j+1) #rle[r_i][r_j+1] = rle[r_i][r_j+1] - 1 #print(rle[r_i][r_j+1]) #if rle_j + rle_i[r_j+1]-1 >= sizes[0] * sizes[1]: #print('out of total number') #rle[0][1] = 6 #print(rle) #exit() rles.extend(rle) new_test_ids.extend([id_[0]] * len(rle)) sub = pd.DataFrame() sub['ImageId'] = new_test_ids sub['EncodedPixels'] = pd.Series(rles).apply( lambda x: ' '.join(str(y) for y in x)) sub.to_csv("predicts/unet__data_0_d6_t_c_lr9_bs4_size256_epoch_74.csv", index=False)
def train(epochs, img_box_dict, pretrained_model=None, save_path=None, rpn_rois=None, train_rpn=True, train_rcnn=True, validate=False, lock_grad_for_rpn=False, lock_grad_for_rcnn=False): fpn_resnet = FPNResNet().cuda() # lock gradient if lock_grad_for_rcnn: for param in fpn_resnet.parameters(): param.requires_grad = False for param in fpn_resnet.head.parameters(): param.requires_grad = True if lock_grad_for_rpn: for param in fpn_resnet.parameters(): param.requires_grad = False for param in fpn_resnet.rpn.parameters(): param.requires_grad = True fpn_resnet.get_optimizer(Config.lr) trainer = FasterRCNNTrainer(fpn_resnet).cuda() print('model constructed') if pretrained_model is not None: trainer.load(pretrained_model, load_optimizer=False) if validate: dict_train, dict_val = generate_train_val_data(img_box_dict, p_train=0.95) else: dict_train = img_box_dict dict_val = None for epoch in range(epochs): print('epoch: ', epoch) for i, [img_dir, img_info] in tqdm(enumerate(dict_train.items())): img, img_info, flipped = rescale_image(img_dir, img_info, flip=True) img_size = list(img_info['img_size']) img_tensor = create_img_tensor(img) if rpn_rois: img_rois = rpn_rois[img_dir] if flipped: max = img_size[1] - img_rois[:, 1] min = img_size[1] - img_rois[:, 3] img_rois[:, 1] = min img_rois[:, 3] = max img_rois = torch.from_numpy(img_rois).cuda() trainer.train_step(img_tensor, img_info, img_rois, train_rpn, train_rcnn) else: trainer.train_step(img_tensor, img_info, None, train_rpn, train_rcnn) trainer.save(save_path, save_optimizer=False) if validate: map = evaluation(dict_val, trainer.fpn_resnet) print('mAP: ', map) # lr decay if epoch == int(epochs * 0.7): trainer.scale_lr(Config.lr_decay)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 best_ap = np.array([0.] * opt.label_number) lr_ = opt.lr vis = trainer.vis starttime = datetime.datetime.now() for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix roi_cm = at.totensor(trainer.roi_cm.conf, False).float() trainer.vis.img('roi_cm', roi_cm) eval_result = eval(test_dataloader, faster_rcnn, vis=vis, test_num=opt.test_num) best_ap = dict(zip(opt.VOC_BBOX_LABEL_NAMES, eval_result['ap'])) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if eval_result['map'] > best_map: print('roi_cm=\n', trainer.roi_cm.value()) plot_confusion_matrix(trainer.roi_cm.value(), classes=('animal', 'plant', 'rock', 'background'), normalize=False, title='Normalized Confusion Matrix') best_map = eval_result['map'] best_path = trainer.save(best_map=best_map, best_ap=best_ap) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay # if epoch == 13: # break endtime = datetime.datetime.now() train_consum = (endtime - starttime).seconds print("train_consum=", train_consum)
def train(**kwargs): opt._parse(kwargs) print('dataset = Dataset(opt)') transform = transforms.Compose([ # you can add other transformations in this list transforms.ToTensor() ]) dataset = Dataset(opt, transform=transform) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers, ) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) print('faster_rcnn = FasterRCNNVGG16()') faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s at trian.py line 70' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): print("tqdm(enumerate(dataloader)):") scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) print("train.py trainer.train_step(img, bbox, label, scale)") print(img.shape) print(bbox.shape) print(label.shape) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss print("trian.py line94") print(trainer.get_meter_data()) trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay trainer.vis.plot('test_map', eval_result['map']) log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def fit_ont_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch): train_util = FasterRCNNTrainer(net, optimizer) total_loss = 0 rpn_loc_loss = 0 rpn_cls_loss = 0 roi_loc_loss = 0 roi_cls_loss = 0 val_toal_loss = 0 with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen): if iteration >= epoch_size: break imgs, boxes, labels = batch[0], batch[1], batch[2] with torch.no_grad(): imgs = Variable( torch.from_numpy(imgs).type(torch.FloatTensor)).cuda() boxes = [ Variable(torch.from_numpy(box).type( torch.FloatTensor)).cuda() for box in boxes ] labels = [ Variable(torch.from_numpy(label).type( torch.FloatTensor)).cuda() for label in labels ] losses = train_util.train_step(imgs, boxes, labels, 1) rpn_loc, rpn_cls, roi_loc, roi_cls, total = losses total_loss += total rpn_loc_loss += rpn_loc rpn_cls_loss += rpn_cls roi_loc_loss += roi_loc roi_cls_loss += roi_cls pbar.set_postfix( **{ 'total': total_loss.item() / (iteration + 1), 'rpn_loc': rpn_loc_loss.item() / (iteration + 1), 'rpn_cls': rpn_cls_loss.item() / (iteration + 1), 'roi_loc': roi_loc_loss.item() / (iteration + 1), 'roi_cls': roi_cls_loss.item() / (iteration + 1), 'lr': get_lr(optimizer) }) pbar.update(1) print('Start Validation') with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(genval): if iteration >= epoch_size_val: break imgs, boxes, labels = batch[0], batch[1], batch[2] with torch.no_grad(): imgs = Variable( torch.from_numpy(imgs).type(torch.FloatTensor)).cuda() boxes = Variable( torch.from_numpy(boxes).type(torch.FloatTensor)).cuda() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)).cuda() train_util.optimizer.zero_grad() losses = train_util.forward(imgs, boxes, labels, 1) _, _, _, _, val_total = losses val_toal_loss += val_total pbar.set_postfix( **{'total_loss': val_toal_loss.item() / (iteration + 1)}) pbar.update(1) print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) torch.save( model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' % ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1)))
def main(): faster_rcnn = FasterRCNNVGG16(mask=opt.mask) trainer = FasterRCNNTrainer(faster_rcnn).cuda() assert os.path.isfile( args.load_path), f"Need valid checkpoint, {args.load_path} not found" trainer.load(args.load_path) ''' Check to make sure weights are dense ''' for n, m in trainer.named_modules(): if hasattr(m, 'sparse'): m.sparse = False for n, m in trainer.named_modules(): if hasattr(m, 'weight'): if m.weight.is_sparse: print("Weights are already sparse") return print("\n\n=========SIZE BEFORE=============") try: trainer.faster_rcnn.set_pruned() except: print("No masks.") get_size(trainer) trainer.quantize(bits=args.bits, verbose=args.verbose) print("\n\n=========SIZE AFTER==============") get_size(trainer) print("Saving a maskedmodel") trainer.save(save_path=args.save_path) print("Saving a SparseDense Model") trainer.replace_with_sparsedense() sd_file = args.save_path.split("/") sd_file[-1] = "SparseDense_" + sd_file[-1] sd_file = "/".join(sd_file) trainer.save(save_path=sd_file)
def train(**kwargs): opt._parse(kwargs) #获得config设置信息 dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集 print('load data') dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集 test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1 batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络 print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器 if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重 trainer.load(opt.load_path) #训练器加载权重 print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用 lr_ = opt.lr #得到预设的学习率 for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) #进行类别处理得到scale(待定) #bbox是gt_box坐标(ymin, xmin, ymax, xmax) #label是类别的下标VOC_BBOX_LABEL_NAMES #img是图片,代码仅支持batch_size=1的训练 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练 trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型 if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次 trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率 log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #可视化内容,(跳过) if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型 best_map = eval_result['map'] #保存模型的map信息 best_path = trainer.save(best_map=best_map) #调用保存模型函数 if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练 trainer.load(best_path) #加载模型 trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率 lr_ = lr_ * opt.lr_decay #获得当前学习率 if epoch == 13: #13个epoch停止训练 break
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def train(**kwargs): opt._parse(kwargs) log_dir = os.path.join("logs", "faster_rcnn_train_onGray") os.makedirs(log_dir, exist_ok=True) log_path = os.path.join( log_dir, time.strftime("%Y-%m-%d-%H%M.log", time.localtime(time.time())) ) logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[logging.FileHandler(log_path), logging.StreamHandler()], ) logger = logging.getLogger() dataset = Dataset(opt) print("load data") dataloader = data_.DataLoader( dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers, ) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True, ) faster_rcnn = FasterRCNNVGG16() print("model construct completed") logger.info(faster_rcnn) logger.info("-" * 50) trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda() if opt.load_path: trainer.load(opt.load_path) print("load pretrained model from %s" % opt.load_path) trainer.vis.text(dataset.db.label_names, win="labels") best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() trainer.reset_ave() for ii, (img, bbox_, label_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if (ii + 1) % opt.print_freq == 0: logger.info( "[Train] Epoch:{} [{:03d}/{:03d}]({:.0f}%)\t".format( epoch, ii + 1, len(dataloader), (ii + 1) / len(dataloader) * 100 ) ) trainer.train_step( img, bbox, label, scale, print_epoch=epoch, print_info=True ) else: trainer.train_step( img, bbox, label, scale, print_epoch=epoch, print_info=False ) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox( # ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]) # ) # trainer.vis.img("gt_img", gt_img) # if (ii + 1) % opt.plot_every == 0: # if os.path.exists(opt.debug_file): # ipdb.set_trace() # # # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # # # plot groud truth bboxes # ori_img_ = inverse_normalize(at.tonumpy(img[0])) # gt_img = visdom_bbox( # ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]) # ) # trainer.vis.img("gt_img", gt_img) # # # plot predicti bboxes # _bboxes, _labels, _scores = trainer.faster_rcnn.predict( # [ori_img_], visualize=True # ) # pred_img = visdom_bbox( # ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0]), # ) # trainer.vis.img("pred_img", pred_img) # # # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm") # # roi confusion matrix # trainer.vis.img( # "roi_cm", at.totensor(trainer.roi_cm.conf, False).float() # ) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) # trainer.vis.plot("test_map", eval_result["map"]) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]["lr"] log_info = "lr:{}, map:{},loss:{}".format( str(lr_), str(eval_result["map"]), str(trainer.get_meter_data()) ) logger.info(log_info) # trainer.vis.log(log_info) if eval_result["map"] > best_map: best_map = eval_result["map"] best_path = trainer.save( best_map=best_map, save_path="checkpoints/trainedOnGray/fasterrcnn_%s" % time.strftime("%m%d%H%M"), ) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse(kwargs) print('load data') dataset = Dataset(opt) dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count(), ratios=[1], anchor_scales=[1]) print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count()) if opt.use_cuda: trainer = trainer.cuda() if opt.load_path: old_state = trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) if opt.validate_only: num_eval_images = len(testset) eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, test_num=num_eval_images) print('Evaluation finished, obtained {} using {} out of {} images'. format(eval_result, num_eval_images, len(testset))) return if opt.load_path and 'epoch' in old_state.keys(): starting_epoch = old_state['epoch'] + 1 print('Model was trained until epoch {}, continuing with epoch {}'. format(old_state['epoch'], starting_epoch)) else: starting_epoch = 0 #trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr global_step = 0 for epoch in range(starting_epoch, opt.num_epochs): lr_ = opt.lr * (opt.lr_decay**(epoch // opt.epoch_decay)) trainer.faster_rcnn.set_lr(lr_) print('Starting epoch {} with learning rate {}'.format(epoch, lr_)) trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)): global_step = global_step + 1 scale = at.scalar(scale) if opt.use_cuda: img, bbox, label = img.cuda().float(), bbox_.float().cuda( ), label_.float().cuda() else: img, bbox, label = img.float(), bbox_.float(), label_.float() img, bbox, label = Variable(img), Variable(bbox), Variable(label) losses = trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss #trainer.vis.plot_many(trainer.get_meter_data()) """ # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]), label_names=dataset.get_class_names()+['BG']) trainer.vis.img('pred_img', pred_img) """ # rpn confusion matrix(meter) #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) #print('Current total loss {}'.format(losses[-1].tolist())) trainer.vis.plot('train_total_loss', losses[-1].tolist()) if (global_step) % opt.snapshot_every == 0: snapshot_path = trainer.save(epoch=epoch) print("Snapshotted to {}".format(snapshot_path)) #snapshot_path = trainer.save(epoch=epoch) #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path)) for lo in losses: del lo del img, bbox_, label_, scale torch.cuda.empty_cache() eval_result = eval(test_dataloader, faster_rcnn, trainer, testset, test_num=min(opt.test_num, len(testset))) print(eval_result) # TODO: this definitely is not good and will bias evaluation if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=eval_result['map'], epoch=epoch) print("After epoch {}: snapshotted to {}".format(epoch, best_path)) trainer.vis.plot('test_map', eval_result['map']) del eval_result torch.cuda.empty_cache()
def main(**kwargs): opt._parse(kwargs) checkpoint = t.load('se_0314_all') num_classes = 8 # step = [112, 112] classifier = t.hub.load( 'moskomule/senet.pytorch', 'se_resnet50', pretrained=True, ) num_ftrs = classifier.fc.in_features classifier.fc = nn.Linear(num_ftrs, num_classes) classifier.load_state_dict(checkpoint['state_dict']) classifier.eval() classifier = classifier.cuda() result_file = open('result0522.txt', 'w') save_root = './result/bbox/' makeDir() faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('checkpoints/fasterrcnn_04081709_0.6626689194895079') data_root = '/home/lsm/testSamples700_new/' test_file = 'GT707.txt' test700 = Test700Dataset(data_root, test_file, opt) test_dataloader = data_.DataLoader(test700, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) print('data loaded!') for ii, (fn, imgs, sizes, gt_bboxes_) in tqdm(enumerate(test_dataloader)): # print(gt_bboxes_) gt_x1 = int(gt_bboxes_[0][0][1]) gt_y1 = int(gt_bboxes_[0][0][0]) gt_x2 = int(gt_bboxes_[0][0][3]) gt_y2 = int(gt_bboxes_[0][0][2]) # print([gt_x1,gt_y1,gt_x2,gt_y2]) sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, [sizes]) result_file.write(fn[0]) # drawBbox(data_root,fn[0],pred_bboxes_,save_root) img = Image.open(data_root + fn[0]).convert("RGB") plt.imshow(img) currentAxis = plt.gca() for i in range(len(pred_bboxes_[0])): bbox = pred_bboxes_[0][i] score = pred_scores_[0][i] x1, y1, x2, y2 = bbox[1], bbox[0], bbox[3], bbox[2] canditate = img.crop((x1, y1, x2, y2)) # decision = decide(classifier, canditate, step) decision = decide2(classifier, canditate) if decision != 0: # plt.text(x1, y1, toolNameList[decision]+" "+str(score), size=15, color='r') rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='r', linewidth=2) currentAxis.add_patch(rect) result_file.write(' ' + toolNameList[decision] + ' ' + str(x1) + ' ' + str(y1) + ' ' + str(x2) + ' ' + str(y2)) rect = patches.Rectangle((gt_x1, gt_y1), gt_x2 - gt_x1, gt_y2 - gt_y1, fill=False, edgecolor='g', linewidth=2) currentAxis.add_patch(rect) plt.savefig(save_root + fn[0]) plt.close() result_file.write('\n') result_file.close()
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) testset_all = TestDataset_all(opt, 'test2') test_all_dataloader = data_.DataLoader(testset_all, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, pin_memory=True ) tsf = Transform(opt.min_size, opt.max_size) faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() print('model construct completed') # 加载训练过的模型,在config配置路径就可以了 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) #提取蒸馏知识所需要的软标签 if opt.is_distillation == True: opt.predict_socre = 0.3 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, scale, id_) in tqdm(enumerate(dataloader)): if len(gt_bboxes_) == 0: continue sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_, features_ = trainer.faster_rcnn.predict(imgs, [ sizes]) img_file = os.path.join( opt.voc_data_dir, 'JPEGImages', id_[0] + '.jpg') ori_img = read_image(img_file, color=True) img, pred_bboxes_, pred_labels_, scale_ = tsf( (ori_img, pred_bboxes_[0], pred_labels_[0])) #去除软标签和真值标签重叠过多的部分,去除错误的软标签 pred_bboxes_, pred_labels_, pred_scores_ = py_cpu_nms( gt_bboxes_[0], gt_labels_[0], pred_bboxes_, pred_labels_, pred_scores_[0]) #存储软标签,这样存储不会使得GPU占用过多 np.save('label/' + str(id_[0]) + '.npy', pred_labels_) np.save('bbox/' + str(id_[0]) + '.npy', pred_bboxes_) np.save('feature/' + str(id_[0]) + '.npy', features_) np.save('score/' + str(id_[0]) + '.npy', pred_scores_) opt.predict_socre = 0.05 t.cuda.empty_cache() # visdom 显示所有类别标签名 trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): print('epoch=%d' % epoch) # 重置混淆矩阵 trainer.reset_meters() # tqdm可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator), # 是一个快速、扩展性强 for ii, (img, sizes, bbox_, label_, scale, id_) in tqdm(enumerate(dataloader)): if len(bbox_) == 0: continue scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() # 训练的就这一步 下面的都是打印的信息 # 转化成pytorch能够计算的格式,转tensor格式 if opt.is_distillation == True: #读取软标签 teacher_pred_labels = np.load( 'label/' + str(id_[0]) + '.npy') teacher_pred_bboxes = np.load( 'bbox/' + str(id_[0]) + '.npy') teacher_pred_features_ = np.load( 'feature/' + str(id_[0]) + '.npy') teacher_pred_scores = np.load( 'score/' + str(id_[0]) + '.npy') #格式转换 teacher_pred_bboxes = teacher_pred_bboxes.astype(np.float32) teacher_pred_labels = teacher_pred_labels.astype(np.int32) teacher_pred_scores = teacher_pred_scores.astype(np.float32) #转成pytorch格式 teacher_pred_bboxes_ = at.totensor(teacher_pred_bboxes) teacher_pred_labels_ = at.totensor(teacher_pred_labels) teacher_pred_scores_ = at.totensor(teacher_pred_scores) teacher_pred_features_ = at.totensor(teacher_pred_features_) #使用GPU teacher_pred_bboxes_ = teacher_pred_bboxes_.cuda() teacher_pred_labels_ = teacher_pred_labels_.cuda() teacher_pred_scores_ = teacher_pred_scores_.cuda() teacher_pred_features_ = teacher_pred_features_.cuda() # 如果dataset.py 中的Transform 设置了图像翻转,就要使用这个判读软标签是否一起翻转 if(teacher_pred_bboxes_[0][1] != bbox[0][0][1]): _, o_C, o_H, o_W = img.shape teacher_pred_bboxes_ = flip_bbox( teacher_pred_bboxes_, (o_H, o_W), x_flip=True) losses = trainer.train_step(img, bbox, label, scale, epoch, teacher_pred_bboxes_, teacher_pred_labels_, teacher_pred_features_, teacher_pred_scores) else: trainer.train_step(img, bbox, label, scale, epoch) # visdom显示的信息 if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) gt_img = visdom_bbox(ori_img_, at.tonumpy(teacher_pred_bboxes_), at.tonumpy(teacher_pred_labels_), at.tonumpy(teacher_pred_scores_)) trainer.vis.img('gt_img_all', gt_img) # plot predicti bboxes _bboxes, _labels, _scores, _ = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 混淆矩阵 # rpn confusion matrix(meter) trainer.vis.text( str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.text( str(trainer.roi_cm.value().tolist()), win='roi_cm') # trainer.vis.img('roi_cm', at.totensor( # trainer.roi_cm.value(), False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{},ap:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['ap']), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # 保存最好结果并记住路径 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 20: trainer.save(best_map='20') result = eval(test_all_dataloader, trainer.faster_rcnn, test_num=5000) print('20result={}'.format(str(result))) # trainer.load(best_path) # result=eval(test_all_dataloader,trainer.faster_rcnn,test_num=5000) # print('bestmapresult={}'.format(str(result))) break # 每10轮加载前面最好权重,并且减少学习率 if epoch % 20 == 15: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
import os import torch as t from utils.config import opt from model import FasterRCNNVGG16 from trainer import FasterRCNNTrainer from data.util import read_image from utils.vis_tool import vis_bbox from utils import array_tool as at import matplotlib.pyplot as plt if __name__ == "__main__": img = read_image('misc/demo.jpg') img = t.from_numpy(img)[None] faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('./chainer_best_model_converted_to_pytorch_0.7053.pth') opt.caffe_pretrain = True # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) ax = vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) # plt.savefig("detection.png") print("hoge")
def train(**kwargs): # *变量名, 表示任何多个无名参数, 它是一个tuple;**变量名, 表示关键字参数, 它是一个dict opt._parse(kwargs) # 识别参数,传递过来的是一个字典,用parse来解析 dataset = Dataset(opt) # 作者自定义的Dataset类 print('读取数据中...') # Dataloader 定义了一次获取批次数据的方法 dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) # PyTorch自带的DataLoader类,生成一个多线程迭代器来迭代dataset, 以供读取一个batch的数据 testset = TestDataset(opt, split='trainval') # 测试集loader test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() # 网络定义 print('模型构建完毕!') trainer = FasterRCNNTrainer( faster_rcnn).cuda() # 定义一个训练器,返回loss, .cuda()表示把返回的Tensor存入GPU if opt.load_path: # 如果要加载预训练模型 trainer.load(opt.load_path) print('已加载预训练参数 %s' % opt.load_path) else: print("未引入预训练参数, 随机初始化网络参数") trainer.vis.text(dataset.db.label_names, win='labels') # 显示labels标题 best_map = 0 # 定义一个best_map for epoch in range(opt.epoch): # 对于每一个epoch trainer.reset_meters() # 重置测各种测量仪 # 对每一个数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) # 转化为标量 img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda( ) # 存入GPU img, bbox, label = Variable(img), Variable(bbox), Variable( label) # 转换成变量以供自动微分器使用 # TODO trainer.train_step(img, bbox, label, scale) # 训练一步 if (ii + 1) % opt.plot_every == 0: # 如果到达"每多少次显示" if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # 使用测试数据集来评价模型(此步里面包含预测信息) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save( best_map=best_map) # 好到一定程度就存储模型, 存储在checkpoint文件夹内 if epoch == 9: # 到第9轮的时候读取模型, 并调整学习率 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) # if epoch == 13: # 到第14轮的时候停止训练 # break trainer.save(best_map=best_map)
gen_val = Generator(lines[num_train:], (IMAGE_SHAPE[0], IMAGE_SHAPE[1])).generate() epoch_size = num_train epoch_size_val = num_val # 冻结一定部分训练 for param in model.extractor.parameters(): param.requires_grad = False # 由于b==0.8所以冻结bn层 model.freeze_bn() train_util = FasterRCNNTrainer(model, optimizer) for epoch in range(Init_Epoch, Freeze_Epoch): fit_ont_epoch(net, epoch, epoch_size, epoch_size_val, gen, gen_val, Freeze_Epoch, Cuda) lr_scheduler.step() if True: lr = 1e-5 Freeze_Epoch = 50 Unfreeze_Epoch = 100 optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)
def train(**kwargs): # opt._parse(kwargs) print('load data') dataloader = get_train_loader(opt.root_dir, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, sample in tqdm(enumerate(dataloader)): if len(sample.keys()) == 5: img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox_.cuda( ), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if label.size == 0: # continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) if epoch % 10 == 0: best_path = trainer.save(best_map=best_map)
def train(**kwargs): opt._parse( kwargs ) #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面! dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader( testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, #pin_memory=True ) #pin_memory锁页内存,开启时使用显卡的内存,速度更快 faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作 if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.dataset.label_names, win='labels') best_map = 0 lr_ = opt.lr # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数 for epoch in range(opt.epoch): print('epoch {}/{}'.format(epoch, opt.epoch)) trainer.reset_meters() #首先在可视化界面重设所有数据 for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = array_tool.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() #可视化画出loss trainer.vis.plot_many(trainer.get_meter_data()) #可视化画出groudtruth bboxes ori_img_ = inverse_normalize(array_tool.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]), array_tool.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) #可视化画出预测bboxes # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面 _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox( ori_img_, array_tool.tonumpy(_bboxes[0]), array_tool.tonumpy(_labels[0]).reshape(-1), array_tool.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来 trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来 trainer.vis.img( 'roi_cm', array_tool.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{}, loss{}'.format( str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) #将学习率以及map等信息及时显示更新 if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一 trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay if epoch == 13: break
def train_val(): print('load data') train_loader, val_loader = get_train_val_loader( opt.root_dir, batch_size=opt.batch_size, val_ratio=0.1, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) faster_rcnn = FasterRCNNVGG16() # faster_rcnn = FasterRCNNResNet50() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() # if opt.load_path: # trainer.load(opt.load_path) # print('load pretrained model from %s' % opt.load_path) # trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() tqdm.monitor_interval = 0 for ii, sample in tqdm(enumerate(train_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda( ) img, bbox, label = Variable(img), Variable(bbox), Variable( label) else: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \ sample['scale'], np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) if bbox.size == 0: continue scale = at.scalar(scale) trainer.train_step(img_id, img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot ground truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]), at.tonumpy(label[0])) trainer.vis.img('gt_img', gt_img) # plot predicted bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img( 'roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) mAP = eval_mAP(trainer, val_loader) trainer.vis.plot('val_mAP', mAP) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format( str(lr_), str(mAP), str(trainer.get_meter_data())) trainer.vis.log(log_info) if mAP > best_map: best_map = mAP best_path = trainer.save(best_map=best_map) if epoch == opt.epoch - 1: best_path = trainer.save() if (epoch + 1) % 10 == 0: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from {}'.format(opt.load_path)) # trainer.vis.text(dataset.db.label_names, win='labels') adversary = None if opt.flagadvtrain: print("flagadvtrain turned: Adversarial training!") atk = PGD.PGD(trainer, eps=16/255, alpha=3/255, steps=4) # atk = torchattacks.PGD(trainer.faster_rcnn, eps=16, alpha=3, steps=4) # adversary = PGDAttack(trainer.faster_rcnn, loss_fn=nn.CrossEntropyLoss(), eps=16, nb_iter=4, eps_iter=3, # rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) best_map = 0 lr_ = opt.lr normal_total_loss = [] adv_total_loss = [] total_time = 0.0 total_imgs = 0 true_imgs = 0 for epoch in range(opt.epoch): trainer.reset_meters() once = True for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) temp_img = copy.deepcopy(img).cuda() img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() if opt.flagadvtrain: before_time = time.time() img = atk(img, bbox, label, scale) after_time = time.time() # with ctx_noparamgrad_and_eval(trainer.faster_rcnn): # img = adversary.perturb(img, label) # print("Adversarial training done!") total_time += after_time - before_time # print("Normal training starts\n") # trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: # adv_total_loss.append(trainer.get_meter_data()["total_loss"]) if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss # trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes temp_ori_img_ = inverse_normalize(at.tonumpy(temp_img[0])) # img2jpg(temp_ori_img_, "imgs/orig_images/", "gt_img{}".format(ii)) # temp_gt_img = visdom_bbox(temp_ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # plt.figure() # c, h, w = temp_gt_img.shape # plt.imshow(np.reshape(temp_gt_img, (h, w, c))) # plt.savefig("imgs/temp_orig_images/temp_gt_img{}".format(ii)) # plt.close() ori_img_ = inverse_normalize(at.tonumpy(img[0])) # print("GT Label is {} and pred_label is {}".format(label_[0],)) # img2jpg(ori_img_, "imgs/adv_images/", "adv_img{}".format(ii)) # gt_img = visdom_bbox(ori_img_, # at.tonumpy(bbox_[0]), # at.tonumpy(label_[0])) # plt.figure() # c, h, w = gt_img.shape # plt.imshow(np.reshape(gt_img, (h, w, c))) # plt.savefig("imgs/orig_images/gt_img{}".format(ii)) # plt.close() # trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) fig1 = plt.figure() ax1 = fig1.add_subplot(1,1,1) # final1 = (at.tonumpy(img[0].cpu()).transpose(1,2,0).astype(np.uint8)) final1 = (ori_img_.transpose(1, 2, 0).astype(np.uint8)) ax1.imshow(final1) gt_img = visdom_bbox(ax1,at.tonumpy(_bboxes[0]),at.tonumpy(_labels[0])) fig1.savefig("imgs/adv_images/adv_img{}".format(ii)) plt.close() _temp_bboxes, _temp_labels, _temp_scores = trainer.faster_rcnn.predict([temp_ori_img_], visualize=True) fig2 = plt.figure() ax2 = fig2.add_subplot(1, 1, 1) final2 = (temp_ori_img_.transpose(1, 2, 0).astype(np.uint8)) # final2 = (at.tonumpy(temp_img[0].cpu()).transpose(1, 2, 0).astype(np.uint8)) ax2.imshow(final2) gt_img = visdom_bbox(ax2, at.tonumpy(_temp_bboxes[0]), at.tonumpy(_temp_labels[0])) fig2.savefig("imgs/orig_images/gt_img{}".format(ii)) plt.close() # img2jpg(temp_gt_img, "imgs/orig_images/", "gt_img{}".format(ii)) # print("gt labels is {}, pred_orig_labels is {} and pred_adv_labels is {}".format(label_, _labels, _temp_labels)) total_imgs += 1 if len(_temp_labels) == 0: continue if _labels[0].shape[0] == _temp_labels[0].shape[0] and (_labels[0] == _temp_labels[0]).all() is True: true_imgs += 1 # pred_img = visdom_bbox(ori_img_, # at.tonumpy(_bboxes[0]), # at.tonumpy(_labels[0]).reshape(-1), # at.tonumpy(_scores[0])) # # print("Shape of temp_orig_img_ is {}".format(temp_ori_img_.shape)) # temp_pred_img = visdom_bbox(temp_ori_img_, # at.tonumpy(_temp_bboxes[0]), # at.tonumpy(_temp_labels[0]).reshape(-1), # at.tonumpy(_temp_scores[0])) # # trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) # fig = plt.figure() # ax1 = fig.add_subplot(2,1,1) # ax1.plot(normal_total_loss) # ax2 = fig.add_subplot(2,1,2) # ax2.plot(adv_total_loss) # fig.savefig("losses/both_loss{}".format(epoch)) # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num, # flagadvtrain=opt.flagadvtrain, adversary=atk)# adversary=adversary) # trainer.vis.plot('test_map', eval_result['map']) # lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), # str(eval_result['map']), # str(trainer.get_meter_data())) # print(log_info) # # trainer.vis.log(log_info) # # if eval_result['map'] > best_map: # best_map = eval_result['map'] # best_path = trainer.save(best_map=best_map) # if epoch == 9: # trainer.load(best_path) # trainer.faster_rcnn.scale_lr(opt.lr_decay) # lr_ = lr_ * opt.lr_decay if epoch == 0: break if epoch == 13: break print("Total number of images is {}".format(total_imgs)) print("True images is {}".format(true_imgs)) print("Total time is {}".format(total_time)) print("Avg time is {}".format(total_time/total_imgs))
import os import torch as t from utils.config import opt from model.faster_rcnn_vgg16 import FasterRCNNVGG16 from trainer import FasterRCNNTrainer from data.util import read_image from utils.vis_tool import vis_bbox from utils import array_tool as at from matplotlib import pyplot as plt img = read_image('/home/fengkai/dog.jpg') img = t.from_numpy(img)[None] faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('/home/fengkai/PycharmProjects/simple-faster-rcnn-pytorch-master/simple-faster-rcnn/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth') opt.caffe_pretrain=False # this model was trained from torchvision-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,visualize=True) vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) plt.show()