def load_detection_model(dataset, model_name, num_classes, ckpt_epoch='latest'): """ :param dataset: Cigar, VOC :param model_name: faster_rcnn_res50, faster_rcnn_mobile, yolov3_tiny :param num_classes: :param ckpt_epoch: default load the latest model """ global device # model print('load detection model...') ckpt_path = os.path.join(model_dir, dataset, model_name, '{}_{}.pth'.format(model_name, ckpt_epoch)) if 'faster_rcnn' in model_name: model = faster_rcnn.get_model(model_name, num_classes, self_pretrained=True) elif model_name == 'yolov3-tiny': config = os.path.join(os.path.dirname(__file__), 'config/{}-cigar2.cfg'.format(model_name)) # todo: change config yolo layer class model = yolov3.Darknet(config, 416) else: raise ValueError('not implement!') # ckpt print('load {}'.format(ckpt_path)) # load to gpu/cpu if torch.cuda.is_available(): # print('use gpu') device = torch.device('cuda') model.load_state_dict(torch.load(ckpt_path)) else: # print('use cpu') device = torch.device('cpu') model.load_state_dict(torch.load(ckpt_path, map_location='cpu')) model.to(device) print('load done!') return model
def test_faster_rcnn(device, img, backbone, epoch, num_class): ckpt = 'output/{}/{}_epoch_{}.pth'.format(backbone, backbone, epoch) print('load', ckpt) model = faster_rcnn.get_model(backbone, num_class, self_pretrained=True) model.load_state_dict(torch.load(ckpt)) model.to(device) print('done!') # save results vis_dir = 'vis/{}/epoch_{}'.format(backbone, epoch) if not os.path.exists(vis_dir): os.makedirs(vis_dir) with torch.no_grad(): model.eval() img_tensor = F.to_tensor(img) detection = model([img_tensor.to(device) ])[0] # [ ] equals unsqueeze(0) parse_rcnn_detection(detection, save_dir=vis_dir)
split='test', transforms=get_transform(False)) # to tensor data_loader = torch.utils.data.DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=collate_fn) data_loader_eval = torch.utils.data.DataLoader(dataset_eval, batch_size=1, shuffle=False, num_workers=4, collate_fn=collate_fn) # 2.model backbone = 'mobile' num_classes = 21 # bg + 20 model = get_model(backbone, num_classes, self_pretrained=False) # whether use data parallel if len(gpus) > 1: # !!! model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() # maybe this code not support data parallel? # optimizer params = [p for p in model.parameters() if p.requires_grad] # conv1,conv2_x not update optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # lr scheduler which decreases the learning rate by 10x every 3 epochs lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
def start_train(): model_name, dataset = request.values['model'], request.values['dataset'] ticks, evals = request.values['tricks'].split(','), request.values['evals'].split(',') K = 100 # todo: judge by user if dataset is not None: # get class2names of current project if choose_dataset_idx is not None: project = project_list[choose_dataset_idx] else: project = find_project_by_name(session.get('choose_dataset'), session.get('project_list')) num_class, class2names = project['classes'], project['cats'] names2class = {cat: idx for idx, cat in enumerate(class2names)} ## 1. prepare asm train/test anns # 从数据库查询 3 种不同 status 标注 gt_sl_anns = get_gt_anns_from_sql(dataset, names2class, status='sl') gt_al_anns = get_gt_anns_from_sql(dataset, names2class, status='al') label_anns = get_gt_anns_from_sql(dataset, names2class, status='done') # compute al ratio with sl_anns and gt_sl_anns # 按图像 id 排序,保证对应 global sl_anns sl_anns = list(sorted(sl_anns, key=lambda t: t['id'])) gt_sl_anns = list(sorted(gt_sl_anns, key=lambda t: t['id'])) # 如果 sl 人工没有标注,那么 gt_sl_anns = sl_anns,sl_ratio 全=1 sl_ratios = [ compute_sl_ratio(sl_ann, gt_ann) for sl_ann, gt_ann in zip(sl_anns, gt_sl_anns) ] # print(sl_ratios) # 判断要从 sl_anns 中选出的 sl_ratio 较低的样本数量 topK = min(K - len(gt_al_anns), len(gt_sl_anns)) top_idxs = np.argsort(sl_ratios)[:topK] # sl 最少的样本 weak_sl_anns = [sl_anns[i] for i in top_idxs] rndK_label_anns = random.sample(label_anns, min(K, len(label_anns))) # build train/test anns from hard anns hard_anns = gt_al_anns + weak_sl_anns random.shuffle(hard_anns) split = len(hard_anns) // 3 hard_anns_eval = hard_anns[:split] hard_anns_train = hard_anns[split:] asm_train_anns = rndK_label_anns + hard_anns_train asm_eval_anns = hard_anns_eval ## 2. build model and train model = get_model(backbone=model_name.split('_')[-1], input_size=(480, 640), # 可存入 ckpt num_classes=num_class + 1, # +bg self_pretrained=True) model_save_dir = os.path.join(model_dir, dataset, model_name) train_model(asm_train_anns, asm_eval_anns, model, device, model_save_dir, batch_size=1) return 'OK'
def auto_label(): """ detect on notDone imgs, insert high conf anns to d_hits_result + update status in d_hits - global sl_anns 保存 SL 标注结果 - sl/al status 直接存入数据库,得到划分开的样本 todo: 解析得到 anns,再与 gt 比较,返回 al_ratio 筛选困难样本 """ model_name, dataset = request.values['model'], request.values['dataset'] if dataset is not None: # get class2names of current project if choose_dataset_idx is not None: project = project_list[choose_dataset_idx] else: project = find_project_by_name(session.get('choose_dataset'), session.get('project_list')) num_class, class2names = project['classes'], project['cats'] # load model and corresponding detector model = get_model(backbone=model_name.split('_')[-1], input_size=(480, 640), # 可存入 ckpt num_classes=num_class + 1, # +bg self_pretrained=True) ckpt = os.path.join(model_dir, dataset, model_name, 'model_latest.pth') model = load_model(model, ckpt_path=ckpt) model = model.to(device) model.eval() print('load model done!') # 前端进度 图片结果 显示需要 data['cur_idx'] = data['sl_num'] = data['al_num'] = data['progress'] = 0 data['sl_img_src'] = data['al_img_src'] = '' # todo: can set a batch_num in query_d_hits() 如果 unlabel 数据量很大 unlabeled_rows, data['total_num'] = query_d_hits(project_name=dataset, status='notDone') # has no len print('total:', data['total_num']) # query one use, insert need userId = query_one_userId() global sl_anns sl_anns = [] # ASM: detect here! for img_idx, row in enumerate(unlabeled_rows): # 从 row 解析得到 dict 标注 hit_dict = parse_row_to_dict(row, dict_template=hit_dict_tmp) file_path = map_docker2host(hit_dict['data']) # change to the real img_path on nfs img = Image.open(file_path) img_w, img_h = img.size # cvt DT ann need # hit_result_dict id auto increment hit_result_dict['hitId'] = hit_dict['id'] hit_result_dict['projectId'] = hit_dict['projectId'] # ASM: generate auto label boxes, labels = detect_unlabel_img(model, img, device) # x1y1x2y2, label_id boxes = [list(map(int, box)) for box in boxes] labels = [int(label) for label in labels] # insert row to d_hits_result if len(boxes) > 0: ## 保存 SL 结果到 Dataturks result = [] for idx, (box, label) in enumerate(zip(boxes, labels)): labels[idx] = label # already -1 in infer() box_info = { "label": [class2names[labels[idx]]], "shape": "rectangle", "points": cvt_box_to_4floatpts(box, img_w, img_h), "notes": "", "imageWidth": img_w, "imageHeight": img_h } result.append(box_info) # other columns hit_result_dict['result'] = str(result).replace("'", '\\"') # 转义字符,插入 mysql 使用 hit_result_dict['userId'] = userId hit_result_dict['notes'] = 'auto' # sl 备注 hit_result_dict['created_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') hit_result_dict['updated_timestamp'] = hit_result_dict['created_timestamp'] # insert to DT database insert_sl_ann_to_db(hit_result_dict) # ASM: 保存模型标注结果到 sl_anns sl_anns.append({ # 解析的 unlabel gt_anns 也要有这种格式 'id': hit_dict['id'], 'filepath': file_path, 'boxes': boxes, # x1y1x2y2 'labels': labels }) update_status_in_db(hit_dict['id'], status='sl') else: update_status_in_db(hit_dict['id'], status='al') # plt bbox on img, visualize on the frontend web_img_src = plt_bbox(img, boxes, labels, class2names, send_web=True) if len(boxes) > 0: data['sl_img_src'] = web_img_src data['sl_num'] += 1 else: data['al_img_src'] = web_img_src data['al_num'] += 1 data['cur_idx'] = img_idx + 1 data['progress'] = int(data['cur_idx'] / data['total_num'] * 100) return jsonify(data) else: return redirect('/datasets')
'mobile', '--num_classes', '21', # 20+bg '--pretrain_epoch', '4', '--conf_thre', '0.7', '--check_step', '2', ] args = parse_args(params) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus # load pretrain model, to do active learning on VOC2012 model = get_model(args.backbone, args.num_classes, self_pretrained=True) model = torch.nn.DataParallel(model).cuda() ckpt = 'output/{}/{}_epoch_{}.pth'.format(args.backbone, args.backbone, args.pretrain_epoch) model.load_state_dict(torch.load(ckpt)) model.cuda() print('load pretrain model on voc2007 done!') # prepare dataset voc2007_anns = load_data('VOC2007', split='trainval') # 5011 voc2012_anns = load_data('VOC2012', split='trainval') # 11540 voc2012_anns = voc2012_anns[:1000] # only less samples unlabel_idxs = list(range(len(voc2012_anns))) asm_eval_anns = load_data('VOC2007', split='test') # 4592 asm_eval_anns = asm_eval_anns[:1000]
def train_faster_rcnn(root, train_data, valid_data, test_data, epochs, tricks=None, backbone='faster_rcnn_res50', num_classes=3, check_step=5, debug=False): clear_training_statue() if tricks is None or tricks[0] == '': tricks = model_tricks['faster_rcnn'] training_status['status'] = 'training' training_status['epoch_total'] = epochs device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # load data data_train_loader = load_faster_rcnn_dataset(root, train_data, batch_size=2, augment='augment' in tricks, aug_double='aug_double' in tricks) data_valid_loader = load_faster_rcnn_dataset(root, valid_data, batch_size=1) data_test_loader = load_faster_rcnn_dataset(root, test_data, batch_size=1) model = faster_rcnn.get_model(backbone, num_classes, self_pretrained=False) model.to(device) model_save_dir = os.path.join('output', backbone) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) params = [p for p in model.parameters() if p.requires_grad] # if not update backbone optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) if 'cosine' in tricks: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) else: # and a learning rate scheduler which decreases the learning rate by 10x every 3 epochs lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # 1/10 writer = SummaryWriter('logs/faster_rcnn') training_status['tb_path'] = os.getcwd() + '/logs/faster_rcnn' for epoch in range(epochs): training_status['epoch_current'] = epoch + 1 if training_status['status'] == 'stop': return training_status # train for one epoch, printing every 10 iterations train_one_epoch(model, optimizer, data_train_loader, device, epoch, print_freq=10, writer=writer, begin_step=epoch * len(data_train_loader)) # update the learning rate lr_scheduler.step() # evaluate on the test dataset evaluate(model, data_valid_loader, device=device) if (epoch + 1) % check_step == 0: torch.save( model.state_dict(), os.path.join(model_save_dir, '{}_epoch_{}.pth'.format(backbone, epoch + 1))) evaluate(model, data_test_loader, device=device) training_status['status'] = 'stop' return training_status