def main(): parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', type=str, help='checkpoint file path') args = parser.parse_args() strategy = tf.distribute.MirroredStrategy() global_batch_size = strategy.num_replicas_in_sync * BATCH_SIZE train_dataset = create_dataset('{}/train*'.format(TF_RECORDS), global_batch_size, is_train=True) val_dataset = create_dataset('{}/val*'.format(TF_RECORDS), global_batch_size, is_train=False) if not os.path.exists(os.path.join('./models')): os.makedirs(os.path.join('./models/')) with strategy.scope(): train_dist_dataset = strategy.experimental_distribute_dataset( train_dataset) val_dist_dataset = strategy.experimental_distribute_dataset( val_dataset) model = YOLOv3(shape=(416, 416, 3), num_classes=TOTAL_CLASSES) model.summary() initial_epoch = 1 if args.checkpoint: model.load_weights(args.checkpoint) initial_epoch = int(args.checkpoint.split('-')[-3]) + 1 val_loss = float(args.checkpoint.split('-')[-1][:-3]) print('Resume Training from checkpoint {} and epoch {}'.format( args.checkpoint, initial_epoch)) else: print( "Loading Imagenet pretrained darknet53.conv.74 weights for Darknet..." ) load_darknet_weights(model, "./darknet53.conv.74") print("Pretrained weights loaded for Darknet53") val_loss = math.inf trainer = Trainer( model=model, initial_epoch=initial_epoch, epochs=TOTAL_EPOCHS, global_batch_size=global_batch_size, strategy=strategy, last_val_loss=val_loss, ) trainer.run(train_dist_dataset, val_dist_dataset)
def train(to_static): program_translator = ProgramTranslator() program_translator.enable(to_static) random.seed(0) np.random.seed(0) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 model = YOLOv3(3, is_train=True) boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) learning_rate = cfg.learning_rate values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = fluid.dygraph.PiecewiseDecay(boundaries=boundaries, values=values, begin=0) lr = fluid.layers.linear_lr_warmup( learning_rate=lr, warmup_steps=cfg.warm_up_iter, start_lr=0.0, end_lr=cfg.learning_rate, ) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum, parameter_list=model.parameters()) start_time = time.time() snapshot_loss = 0 snapshot_time = 0 total_sample = 0 input_size = cfg.input_size shuffle = True shuffle_seed = None total_iter = cfg.max_iter mixup_iter = total_iter - cfg.no_mixup_iter train_reader = FakeDataReader().reader() smoothed_loss = SmoothedValue() ret = [] for iter_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() img = np.array([x[0] for x in data]).astype('float32') img = to_variable(img) gt_box = np.array([x[1] for x in data]).astype('float32') gt_box = to_variable(gt_box) gt_label = np.array([x[2] for x in data]).astype('int32') gt_label = to_variable(gt_label) gt_score = np.array([x[3] for x in data]).astype('float32') gt_score = to_variable(gt_score) loss = model(img, gt_box, gt_label, gt_score, None, None) smoothed_loss.add_value(np.mean(loss.numpy())) snapshot_loss += loss.numpy() snapshot_time += start_time - prev_start_time total_sample += 1 print("Iter {:d}, loss {:.6f}, time {:.5f}".format( iter_id, smoothed_loss.get_mean_value(), start_time - prev_start_time)) ret.append(smoothed_loss.get_mean_value()) loss.backward() optimizer.minimize(loss) model.clear_gradients() return np.array(ret)
CONFIG_PATH = './YOLOv3/yolov3.cfg' WEIGHTS_PATH = './YOLOv3/yolov3.weights' def show_detections(frame, detections): for detection in track_bbs_ids: x1, y1, x2, y2, id = [int(i) for i in detection] cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2) cv2.putText(frame, 'id: {}'.format(id), (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2) cv2.imshow('deepsort', frame) cv2.waitKey(1) deepsort = DeepSort(max_age=30) yolo = YOLOv3(labels_path=LABELS_PATH, config_path=CONFIG_PATH, weights_path=WEIGHTS_PATH, confidence=0.3) cap = cv2.VideoCapture(0) while 1: grabbed, frame = cap.read() detections = yolo.detectObjects(frame) # deep appearance was designed to work on humans but # this allows all yolov3 detections to be tracked for fun detections = [obj[1] for obj in detections] # detections = [obj[1] for obj in detections if obj[0] == 'person'] detections = np.array(detections) track_bbs_ids = deepsort.update(frame, detections[:,:4])
ANCHOR_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] VALID_THRESH = 0.01 NMS_TOPK = 400 NMS_POSK = 100 NMS_THRESH = 0.45 NUM_CLASSES = 7 #TESTDIR = './insects/test/images' #请将此目录修改成用户自己保存测试图片的路径 #WEIGHT_FILE = './yolo_epoch50' # 请将此文件名修改成用户自己训练好的权重参数存放路径 if __name__ == '__main__': with fluid.dygraph.guard(): model = YOLOv3('yolov3', num_classes=NUM_CLASSES, is_train=False) params_file_path = WEIGHT_FILE model_state_dict, _ = fluid.load_dygraph(params_file_path) model.load_dict(model_state_dict) model.eval() total_results = [] test_loader = test_data_loader(TESTDIR, batch_size=16, mode='test') for i, data in enumerate(test_loader()): img_name, img_data, img_scale_data = data img = to_variable(img_data) img_scale = to_variable(img_scale_data) outputs = model.forward(img) bboxes, scores = model.get_pred(outputs, im_shape=img_scale,
# (1)目标置信度阈值过滤 # 对于有低于一个阈值的 objectness 分数的每个边界框,我们将其每个属性的值(表示该边界框的一整行)都设为零。 # conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2) # prediction = prediction*conf_mask # (2)NMS(按照类别执行) #我们现在拥有的边界框属性是由中心坐标以及边界框的高度和宽度决定的。但是,使用每个框的两个对角坐标能更轻松地计算两个框的 #IoU。所以,我们可以将我们的框的(中心x, 中心 y, 高度, 宽度) 属性转换成(左上角x, 左上角y, 右下角x, 右下角y)。 # box_corner = prediction.new(prediction.shape) # box_corner[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) # box_corner[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) # box_corner[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) # box_corner[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) # prediction[:, :, :4] = box_corner[:, :, :4] # 每张图像中的「真实」检测结果的数量可能存在差异。比如,一个大小为 3 的 batch 中有 1、2、3 这 3 张图像,它们各自有 5、2、4 个「真实」检测结果。 # 因此,一次只能完成一张图像的置信度阈值设置和 NMS。也就是说,我们不能将所涉及的操作向量化, # 而且必须在预测的第一个维度(包含一个 batch 中图像的索引)上循环。 # batch_size = prediction.size(0) # write = False # for ind in range(batch_size): # image_pred = prediction[ind] #image Tensor # #confidence threshholding # #NMS ### 测试yolov3模型加载 weight = 'C:/Users/62349/Desktop/best.pt' chkpt = torch.load(weight, map_location=device) model = YOLOv3() model.load_state_dict(chkpt) print('end!')
def main(): """ YOLOv3 trainer. See README for details. """ args = parse_args() print("Setting Arguments.. : ", args) cuda = torch.cuda.is_available() and args.use_cuda os.makedirs(args.checkpoint_dir, exist_ok=True) # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.load(f) print("successfully loaded config file: ", cfg) momentum = cfg['TRAIN']['MOMENTUM'] decay = cfg['TRAIN']['DECAY'] burn_in = cfg['TRAIN']['BURN_IN'] iter_size = cfg['TRAIN']['MAXITER'] steps = eval(cfg['TRAIN']['STEPS']) batch_size = cfg['TRAIN']['BATCHSIZE'] subdivision = cfg['TRAIN']['SUBDIVISION'] ignore_thre = cfg['TRAIN']['IGNORETHRE'] random_resize = cfg['AUGMENTATION']['RANDRESIZE'] base_lr = cfg['TRAIN']['LR'] / batch_size / subdivision print('effective_batch_size = batch_size * iter_size = %d * %d' % (batch_size, subdivision)) # Learning rate setup def burnin_schedule(i): if i < burn_in: factor = pow(i / burn_in, 4) elif i < steps[0]: factor = 1.0 elif i < steps[1]: factor = 0.1 else: factor = 0.01 return factor # Initiate model model = YOLOv3(cfg['MODEL'], ignore_thre=ignore_thre) if args.weights_path: print("loading darknet weights....", args.weights_path) parse_yolo_weights(model, args.weights_path) elif args.checkpoint: print("loading pytorch ckpt...", args.checkpoint) state = torch.load(args.checkpoint) if 'model_state_dict' in state.keys(): model.load_state_dict(state['model_state_dict']) else: model.load_state_dict(state) if cuda: print("using cuda") model = model.cuda() # if args.tfboard: # print("using tfboard") # from tensorboardX import SummaryWriter # tblogger = SummaryWriter(args.tfboard) model.train() imgsize = cfg['TRAIN']['IMGSIZE'] # dataset = COCODataset(model_type=cfg['MODEL']['TYPE'], # data_dir='COCO/', # img_size=imgsize, # augmentation=cfg['AUGMENTATION'], # debug=args.debug) input_files = json.load(open(args.tar_files)) dataset = ArxivDataSet(list_of_files=input_files, shuffle_input=True, get_raw_image=False, ignore_pages_with_no_figures=True) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=args.n_cpu) dataiterator = iter(dataloader) # evaluator = COCOAPIEvaluator(model_type=cfg['MODEL']['TYPE'], # data_dir='COCO/', # img_size=cfg['TEST']['IMGSIZE'], # confthre=cfg['TEST']['CONFTHRE'], # nmsthre=cfg['TEST']['NMSTHRE']) dtype = torch.cuda.FloatTensor if cuda else torch.FloatTensor # optimizer setup # set weight decay only on conv.weight params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'conv.weight' in key: params += [{ 'params': value, 'weight_decay': decay * batch_size * subdivision }] else: params += [{'params': value, 'weight_decay': 0.0}] optimizer = optim.SGD(params, lr=base_lr, momentum=momentum, dampening=0, weight_decay=decay * batch_size * subdivision) iter_state = 0 if args.checkpoint: if 'optimizer_state_dict' in state.keys(): optimizer.load_state_dict(state['optimizer_state_dict']) iter_state = state['iter'] + 1 scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule) # start training loop for iter_i in range(iter_state, iter_size + 1): # COCO evaluation if iter_i % args.eval_interval == 0 and iter_i > 0: # ap50_95, ap50 = evaluator.evaluate(model) model.train() # if args.tfboard: # tblogger.add_scalar('val/COCOAP50', ap50, iter_i) # tblogger.add_scalar('val/COCOAP50_95', ap50_95, iter_i) # subdivision loop optimizer.zero_grad() for inner_iter_i in range(subdivision): try: # print("----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "------------------------ Trying to obtain a batch --------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n", flush=True) imgs, targets, _, _ = next(dataiterator) # load a batch # print("----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n" # "----------------------------------------------------------------------------\n", flush=True) except StopIteration: dataiterator = iter(dataloader) imgs, targets, _, _ = next(dataiterator) # load a batch imgs = Variable(imgs.type(dtype)) targets = Variable(targets.type(dtype), requires_grad=False) loss = model(imgs, targets) loss.backward() optimizer.step() scheduler.step() if iter_i % 10 == 0: # logging current_lr = scheduler.get_lr()[0] * batch_size * subdivision print( '[Iter %d/%d] [lr %f] ' '[Losses: xy %f, wh %f, conf %f, cls %f, total %f, imgsize %d]' % (iter_i, iter_size, current_lr, model.loss_dict['xy'], model.loss_dict['wh'], model.loss_dict['conf'], model.loss_dict['cls'], model.loss_dict['l2'], imgsize), flush=True) # if args.tfboard: # tblogger.add_scalar('train/total_loss', model.loss_dict['l2'], iter_i) # random resizing if random_resize: imgsize = (random.randint(0, 9) % 10 + 10) * 32 dataset.img_shape = (imgsize, imgsize) dataset.img_size = imgsize dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu) dataiterator = iter(dataloader) # save checkpoint if iter_i > 0 and (iter_i % args.checkpoint_interval == 0): torch.save( { 'iter': iter_i, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(args.checkpoint_dir, "snapshot" + str(iter_i) + ".ckpt"))