def main(): input_size = (416,416) image_file = './005767.jpg' image = cv2.imread(image_file) image_shape = image.shape[:2] tf.reset_default_graph() # copy、resize416*416、 image_cp = preprocess_image(image,input_size) #image_cp=np.ones([1,416,416,3]).astype(np.float32) image_cp=np.load("005767.npy")#("/home/huawei/chems/bioavailability_model/atlas_data/005767.npy") image_cp=np.transpose(image_cp,[0,2,3,1]) np.save("atoms.npy",image_cp) # with tf.name_scope('input'): tf_image = tf.placeholder(tf.float32,[1,input_size[0],input_size[1],3],name='input_data') #meta_variable=tf.placeholder(tf.float32,[1,1,len(class_names)*1024,1],name='meta_weigiht') model_output = Darknet(tf_image) #meta=np.ones([1,1,len(class_names)*1024,1], dtype=np.float32) model_path = "./yolov2_model/yolov2_coco.ckpt" saver = tf.train.Saver() with tf.Session() as sess: #sess.run(model_output,feed_dict={tf_image:image_cp,meta_variable:meta}) sess.run(tf.global_variables_initializer()) a=sess.run(model_output.rtn(),feed_dict={tf_image:image_cp}) a=np.transpose(a,[0,3,1,2]) #a=np.transpose(a,[0,3,2,1]) a=np.reshape(a,[-1])[:90] #print(a) for i in range(90): # print("=============================") print(a[i],i) saver.save(sess,model_path)
def test(): model = Darknet() print('Size of the test set:{}'.format(len(testset))) # load ckpt ckpt = torch.load(os.path.join(EXPT_DIR, CKPT_FILE)) model.load_state_dict(ckpt['model']) fw = Flow(model, trainset, testset, hyp) # write prediction fw.validate(batch_size=16)
def infer(): model = Darknet() ckpt = torch.load(os.path.join(EXPT_DIR, CKPT_FILE)) model.load_state_dict(ckpt['model']) fw = Flow(model, trainset, testset, hyp) # get the indices of unlabeled data with open(os.path.join(EXPT_DIR, 'unlabeled.pkl'), 'rb') as f: dt = pickle.load(f) unlabeled = [i for i in dt] fw.infer(unlabeled) return
def train(): # Evaluation pipeline files = glob.glob( os.path.join('/home/iotsc_group1/ChangxingDENG/det/datasets/', 'PretrainImageNet', 'ILSVRC2012_img_val', '*.JPEG')) files = sorted(files, key=lambda f: f.split('/')[-1].split('_')[-1].split('.')[0]) labels = loadlabel( os.path.join( '/home/iotsc_group1/ChangxingDENG/det/datasets/', 'PretrainImageNet', 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt' )) eval_pipeline = EvalImageDecoderPipeline(files=files, labels=labels) eval_pipeline.build() eval_pii = pytorchIterator(eval_pipeline, last_batch_policy=LastBatchPolicy.PARTIAL, reader_name='Reader', auto_reset=True) model = Darknet() state_dict = torch.load( 'logs/PretrainImageNet_20210316173822_1/13_70056.pth') model.load_state_dict(state_dict=state_dict) model = model.cuda() criterion = nn.CrossEntropyLoss() model.eval() epoch_loss = 0 prediciton = [] target = [] with torch.no_grad(): for iter, data in enumerate(eval_pii): x = data[0]['data'] label = data[0]['label'].squeeze(-1).long().cuda() output = model(x) loss = criterion(output, label).item() epoch_loss += loss * x.shape[0] prediciton.append(output) target.append(label) loss = epoch_loss / 50000 prediciton = torch.cat(prediciton, dim=0) target = torch.cat(target, dim=0) acc = top1accuracy(prediciton, target) acctop5 = top5accuracy(prediciton, target) print(f'Top1 ACC: {acc} Top5 ACC {acctop5} loss: {loss}')
def main(): # Initiate model eval_model = Darknet("config/net/resnet_dropout.cfg").to('cuda') eval_model.load_state_dict(torch.load("./logs/model/model_params_99.ckpt")) precision, recall, AP, f1, ap_class = evaluate(eval_model, ['2012', 'val'], [0.5, 0.5, 0.5], 4, True, diagnosis_code=1) evaluation_metrics = [ ("val_precision", precision.mean()), ("val_recall", recall.mean()), ("val_mAP", AP.mean()), ("val_f1", f1.mean()), ] for tag, value in evaluation_metrics: print("{}: {}".format(tag, value.item()))
def train(): epochs = 1 model = Darknet() if RESUME_FROM: ckpt = torch.load(os.path.join(EXPT_DIR, RESUME_FROM)) model.load_state_dict(ckpt['model']) trainset.augment = True fw = Flow(model, trainset, testset, hyp) with open(os.path.join(EXPT_DIR, 'labeled.pkl'), 'rb') as f: selected = pickle.load(f) # if want to try pollution study # load labels from EXPT_DIR/labeled.pkl for epoch in range(epochs): fw.train(epoch, samples=list(selected.keys()), prebias=False) return
def train(params): params = Params(params) set_random_seeds(params.seed) time_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S") params.save_root = params.save_root + f'/{params.project_name}_{time_now}_{params.version}' os.makedirs(params.save_root, exist_ok=True) logging.basicConfig( filename= f'{params.save_root}/{params.project_name}_{time_now}_{params.version}.log', filemode='a', format='%{asctime}s - %(levalname)s: %(message)s') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' logging.info(f'Available GPUs: {torch.cuda.device_count()}') data_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=params.mean, std=params.std) ]) train_set = TrainDataset(root=os.path.join(params.data_root, params.project_name, params.train_set), transform=data_transform) # val_set = EvalDataset(root=os.path.join(params.data_root, params.project_name, params.val_set), # label_path=os.path.join(params.data_root, params.project_name, # 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt'), # transform=data_transform) train_params = { 'batch_size': params.batch_size, 'shuffle': False, 'num_workers': 4, 'drop_last': True } # val_params = {'batch_size': params.batch_size, 'shuffle': False, 'num_workers': params.num_gpus * 4, # 'drop_last': False} train_loader = DataLoader(train_set, **train_params) # val_loader = DataLoader(val_set, **val_params) eli = ExternalInputIterator(params.batch_size) params.mean = torch.Tensor(params.mean).unsqueeze(0).unsqueeze(0) params.std = torch.Tensor(params.std).unsqueeze(0).unsqueeze(0) pipe = ExternalSourcePipeline(params=params, num_threads=4, device_id=0, external_date=eli, seed=params.seed) # pipe.build() # images, _ = pipe.run() # print(np.array(images[0].as_cpu()).shape) import matplotlib.pyplot as plt # plt.imsave('image[0].jpg', np.array(images[0].as_cpu()).transpose((1, 2, 0))) # plt.imsave('image[0].jpg', images[0].as_cpu()) # exit() pii = pytorchIterator(pipe, last_batch_padded=True, last_batch_policy=LastBatchPolicy.DROP) model = Darknet() criterion = nn.CrossEntropyLoss() last_step = 0 last_epoch = 0 if params.load_weights != 'None': try: state_dict = torch.load(params.load_weights) model.load_state_dict(state_dict) last_step = int(params.load_weights.split('_')[-1].split('.')[0]) last_epoch = int(params.load_weights.split('_')[-2]) except: logging.error('Fail to resuming from weight!') exit() if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = nn.DataParallel(model) if params.optim == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate) else: optimizer = torch.optim.SGD(model.parameters(), lr=params.learning_rate, momentum=0.9, nesterov=True) epoch = 0 begin_epoch = max(0, last_epoch) step = max(0, last_step) logging.info('Begin to train...') model.train() try: import time for epoch in range(begin_epoch, params.epoch): for iter, (data_pii, data_torch) in enumerate(zip(pii, train_loader)): t = time.time() # type(x)显示torch.Tensor,但是x已经在显存上 x_pii = data_pii[0]['data'] label_pii = data_pii[0]['label'].cuda() x_torch = data_torch[0].cuda() label_torch = data_torch[1].cuda() x_pii = x_pii.cpu().squeeze(0).numpy().transpose((1, 2, 0)) x_torch = x_torch.cpu().squeeze(0).numpy().transpose((1, 2, 0)) import matplotlib.pyplot as plt plt.imsave('x_pii.jpg', x_pii) plt.imsave('x_torch.jpg', x_torch) exit() # print('load data time:', time.time() - t) # t = time.time() # output = model(x) # loss = criterion(output, label) # optimizer.zero_grad() # loss.backward() # optimizer.step() # print('running time:', time.time() - t) # if iter == 6: # exit() except KeyboardInterrupt: save_checkpoint(model, f'{params.save_root}/Interrupt_{epoch}_{step}.pth')
label_noobj_mask * FOCAL(input=p_conf, target=label_obj_mask)) * label_mix # loss classes loss_cls = label_obj_mask * BCE(input=p_cls, target=label_cls) * label_mix loss_giou = (torch.sum(loss_giou)) / batch_size loss_conf = (torch.sum(loss_conf)) / batch_size loss_cls = (torch.sum(loss_cls)) / batch_size loss = loss_giou + loss_conf + loss_cls return loss, loss_giou, loss_conf, loss_cls if __name__ == "__main__": from model import Darknet net = Darknet("cfg/yolov3-voc.cfg") p, p_d = net(torch.rand(3, 3, 416, 416)) label_sbbox = torch.rand(3, 52, 52, 3,26) label_mbbox = torch.rand(3, 26, 26, 3, 26) label_lbbox = torch.rand(3, 13, 13, 3,26) sbboxes = torch.rand(3, 150, 4) mbboxes = torch.rand(3, 150, 4) lbboxes = torch.rand(3, 150, 4) loss, loss_xywh, loss_conf, loss_cls = YoloV3Loss(pms.MODEL["ANCHORS"], pms.MODEL["STRIDES"])(p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) print(loss)
def __init__(self, path, args, with_tracking, widget, queue_size=3000): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.stream = cv2.VideoCapture(path) self.widget = widget self.tracking = with_tracking if not self.stream: if type(path) == int: print_info(widget, True, "error", f"Error opening web cam on {path}") else: print_info(widget, True, "error", f"Error opening video file {path}") self.stopped = False self.canceled = False self.paused = False self.ready = False # initialize the queue used to store frames read from # the video file self.Q = Queue(maxsize=queue_size) self.imread = Queue(maxsize=queue_size) self.Q_processed = Queue(maxsize=queue_size) self.inp_dim = int(args.reso) self.batch_size = int(args.bs) self.names_file = args.names self.confidence = float(args.confidence) self.nms_thresh = float(args.nms_thresh) self.is_classifier = args.is_classifier self.classes = load_classes(self.names_file) self.num_classes = len(self.classes) self.model = None self.model_classifier = None if self.is_classifier: print_info(widget, False, "info", "Loading network for detection.....", -1) self.model = Darknet(args.classifier_cfg) self.model.load_weights(args.classifier_weights) print_info(widget, False, "info", "Network for detection successfully loaded", 0) print_info(widget, False, "info", "Loading network for classification.....", -1) self.model_classifier = Darknet(args.cfg) self.model_classifier.load_weights(args.weights) print_info(widget, False, "info", "Network for classification successfully loaded", 0) self.model_classifier.net_info["height"] = args.reso self.inp_dim = int(self.model_classifier.net_info["height"]) # If there's a GPU availible, put the model on GPU self.cuda = torch.cuda.is_available() if self.cuda: self.model_classifier.cuda() # Set the model in evaluation mode self.model_classifier.eval() self.classifier_confidence = self.confidence self.classifier_nms_thesh = self.nms_thresh self.classifier_classes = self.classes self.classifier_num_classes = self.num_classes self.classifier_names_file = self.names_file self.classifier_inp_dim = self.inp_dim self.inp_dim = args.classifier_inp_dim self.confidence = args.classifier_confidence self.nms_thresh = args.classifier_nms_thresh self.names_file = args.classifier_names self.classes = load_classes(self.names_file) self.num_classes = len(self.classes) else: print_info(widget, False, "info", "Loading network.....", -1) self.model = Darknet(args.cfg) self.model.load_weights(args.weights) print_info(widget, False, "info", "Network successfully loaded", 0) self.model.net_info["height"] = self.inp_dim assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 # If there's a GPU availible, put the model on GPU self.cuda = torch.cuda.is_available() if self.cuda: self.model.cuda() # Set the model in evaluation mode self.model.eval() # if tracking selected, initialize sort class self.mot_tracking = None if self.tracking == "sort": self.mot_tracking = Sort(max_age=30, min_hits=3) elif self.tracking == "deep_sort": print_info(widget, False, "info", "Loading Deep Sort model ...", -1) self.mot_tracking = DeepSort() print_info(widget, False, "info", "Deep Sort model loaded", -1)
def test(payload): ckpt_file = payload["ckpt_file"] batch_size = 16 coco = COCO("./data", Transforms(), train=False) loader = DataLoader(coco, shuffle=False, batch_size=batch_size, collate_fn=collate_fn) config_file = "yolov3.cfg" model = Darknet(config_file).to(device) ckpt = torch.load(os.path.join("./log", ckpt_file)) model.load_state_dict(ckpt["model"]) model.eval() # batch predictions from the entire test set predictions = [] # keep track of ground-truth boxes and label labels = [] with torch.no_grad(): for img, boxes, class_labels in loader: img = img.to(device) # get inference output output = model(img) for b, c in zip(boxes, class_labels): labels.append((b, c)) # batch predictions from 3 yolo layers batched_prediction = [] for p in output: # (bacth_size, 3, gx, gy, 85) p = p.view(p.shape[0], -1, 85) batched_prediction.append(p) batched_prediction = torch.cat(batched_prediction, dim=1) predictions.append(batched_prediction) predictions = torch.cat(predictions, dim=0) # apply nms to predicted bounding boxes predicted_boxes, predicted_objectness, predicted_class_dist = bbox_transform( predictions) # the predicted boxes are in log space relative to the anchor priors # bring them back to normalized xyxy format cxcy_priors = anchors.normalize("cxcy") # expand the priors to match the dimension of predicted_boxes batched_cxcy_priors = cxcy_priors.unsqueeze(0).repeat( predicted_boxes.shape[0], 1, 1) predicted_boxes = batched_gcxgcy_to_cxcy(predicted_boxes, batched_cxcy_priors) del batched_cxcy_priors # convert predicted_boxes to xyxy format and perform nms xyxy = batched_cxcy_to_xy(predicted_boxes) del predicted_boxes # (no longer need cxcy format) # get predicted object # apply softmax to the predicted class distribution # note that bbox_tranform does not apply softmax # because the loss we are using requires us to use raw output predicted_objects = torch.argmax(F.softmax(predicted_class_dist, dim=-1), dim=-1) # predictions on the test set (value of "predictions" of the return) prd = {} for i in range(len(coco)): # get boxes, scores, and objects on each image _xyxy, _scores = xyxy[i], predicted_objectness[i] _objects = predicted_objects[i] keep = tv.ops.nms(_xyxy, _scores, 0.5) boxes, scores, objects = _xyxy[keep], _scores[keep], _objects[keep] prd[i] = { "boxes": boxes.cpu().numpy().tolist(), "objects": objects.cpu().numpy().tolist(), "scores": scores.cpu().numpy().tolist(), } # ground-truth of the test set # skip "difficulties" field, because every object in COCO # should be considered reasonable lbs = {} for i in range(len(coco)): boxes, class_labels = labels[i] lbs[i] = { "boxes": boxes.cpu().numpy().tolist(), "objects": class_labels } return {"predictions": prd, "labels": lbs}
def infer(payload): unlabeled = payload["unlabeled"] ckpt_file = payload["ckpt_file"] batch_size = 16 coco = COCO("./data", Transforms(), samples=unlabeled, train=True) loader = DataLoader(coco, shuffle=False, batch_size=batch_size, collate_fn=collate_fn) config_file = "yolov3.cfg" model = Darknet(config_file).to(device) ckpt = torch.load(os.path.join("./log", ckpt_file)) model.load_state_dict(ckpt["model"]) model.eval() # batch predictions from the entire test set predictions = [] with torch.no_grad(): for img, _, _ in loader: img = img.to(device) # get inference output output = model(img) # batch predictions from 3 yolo layers batched_prediction = [] for p in output: # (batch_size, 3, gx, gy, 85) batch_size = p.shape[0] p = p.view(batch_size, -1, 85) batched_prediction.append(p) batched_prediction = torch.cat(batched_prediction, dim=1) predictions.append(batched_prediction) predictions = torch.cat(predictions, dim=0) # apply nms to predicted bounding boxes predicted_boxes, predicted_objectness, predicted_class_dist = bbox_transform( predictions) # the predicted boxes are in log space relative to the anchor priors # bring them back to normalized xyxy format cxcy_priors = anchors.normalize("cxcy") # expand the priors to match the dimension of predicted_boxes batched_cxcy_priors = cxcy_priors.unsqueeze(0).repeat( predicted_boxes.shape[0], 1, 1) predicted_boxes = batched_gcxgcy_to_cxcy(predicted_boxes, batched_cxcy_priors) del batched_cxcy_priors # convert predicted_boxes to xyxy format and perform nms xyxy = batched_cxcy_to_xy(predicted_boxes) del predicted_boxes # (no longer need cxcy format) # class distribution is part of the return # do notapply softmax to the predicted class distribution # as we will do it internally for efficiency outputs = {} for i in range(len(coco)): # get boxes, scores, and objects on each image _xyxy, _scores = xyxy[i], predicted_objectness[i] _pre_softmax = predicted_class_dist[i] keep = tv.ops.nms(_xyxy, _scores, 0.5) boxes, scores, pre_softmax = _xyxy[keep], _scores[keep], _pre_softmax[ keep] outputs[i] = { "boxes": boxes.cpu().numpy().tolist(), "pre_softmax": pre_softmax.cpu().numpy().tolist(), "scores": scores.cpu().numpy().tolist(), } return {"outputs": outputs}
def train(params): params = Params(params) set_random_seeds(params.seed) time_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S") params.save_root = params.save_root + f'/{params.project_name}_{time_now}_{params.version}' os.makedirs(params.save_root, exist_ok=True) logging.basicConfig( filename= f'{params.save_root}/{params.project_name}_{time_now}_{params.version}.log', filemode='a', format='%{asctime}s - %(levalname)s: %(message)s') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' logging.info(f'Available GPUs: {torch.cuda.device_count()}') # Train pipeline files = glob.glob( os.path.join(params.data_root, params.project_name, params.train_set, '*/*.JPEG')) labels = [] for fp in files: label = int(fp.split('/')[-2]) - 1 labels.append(label) assert len(files) == len(labels) train_pipeline = TrainImageDecoderPipeline(params=params, device_id=0, files=files, labels=labels) train_pipeline.build() train_pii = pytorchIterator(train_pipeline, last_batch_policy=LastBatchPolicy.DROP, reader_name='Reader', auto_reset=True) # Evaluation pipeline files = glob.glob( os.path.join(params.data_root, params.project_name, params.val_set, '*.JPEG')) files = sorted(files, key=lambda f: f.split('/')[-1].split('_')[-1].split('.')[0]) labels = loadlabel( os.path.join( params.data_root, params.project_name, 'ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt' )) eval_pipeline = EvalImageDecoderPipeline(params=params, device_id=0, files=files, labels=labels) eval_pipeline.build() eval_pii = pytorchIterator(eval_pipeline, last_batch_policy=LastBatchPolicy.PARTIAL, reader_name='Reader', auto_reset=True) model = Darknet() last_step = 0 last_epoch = 0 if params.load_weights != 'None': try: state_dict = torch.load(params.load_weights) model.load_state_dict(state_dict) last_step = int(params.load_weights.split('_')[-1].split('.')[0]) last_epoch = int(params.load_weights.split('_')[-2]) except: logging.error('Fail to resuming from weight!') exit() if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = nn.DataParallel(model) if params.optim == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate) else: optimizer = torch.optim.SGD(model.parameters(), lr=params.learning_rate, momentum=0.9, nesterov=True) criterion = nn.CrossEntropyLoss() # ls_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.5, verbose=True, patience=8) epoch = 0 begin_epoch = max(0, last_epoch) step = max(0, last_step) best_loss = 100 logging.info('Begin to train...') model.train() try: for epoch in range(begin_epoch, params.epoch): for iter, data in enumerate(train_pii): x = data[0]['data'] label = data[0]['label'].squeeze(-1).long().cuda() output = model(x) loss = criterion(output, label) optimizer.zero_grad() loss.backward() optimizer.step() if iter % params.save_interval == 0: logging.info( f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ' f'Train Epoch: {epoch} iter: {iter} loss: {loss.item()}' ) step += 1 if epoch % params.eval_interval == 0: model.eval() epoch_loss = 0 prediciton = [] target = [] with torch.no_grad(): for iter, data in enumerate(eval_pii): x = data[0]['data'] label = data[0]['label'].squeeze(-1).long().cuda() output = model(x) loss = criterion(output, label).item() epoch_loss += loss * x.shape[0] prediciton.append(output) target.append(label) loss = epoch_loss / 50000 prediciton = torch.cat(prediciton, dim=0) target = torch.cat(target, dim=0) acc = top1accuracy(prediciton, target) acctop5 = top5accuracy(prediciton, target) logging.info( f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ' f'Eval Epoch: {epoch} loss: {loss} accuracy: {acc} Top5 acc: {acctop5}' ) if loss < best_loss: best_loss = loss save_checkpoint( model, f'{params.save_root}/{epoch}_{step}.pth') model.train() except KeyboardInterrupt: save_checkpoint(model, f'{params.save_root}/Interrupt_{epoch}_{step}.pth')
"D:\py_pro\YOLOv3-PyTorch\weights\kalete\ep893-map80.55-loss0.00.weights", type=str) parser.add_argument("--evaluation_interval", type=int, default=2, help="每隔几次使用验证集") args = parser.parse_args() print(args) class_names = load_classes( r"D:\py_pro\YOLOv3-PyTorch\data\kalete\dnf_classes.txt") # 加载所有种类名称 train_path = r'D:\py_pro\YOLOv3-PyTorch\data\kalete\train.txt' val_path = r'D:\py_pro\YOLOv3-PyTorch\data\kalete\val.txt' print("载入网络...") model = Darknet(args.cfg) pretrained = True if pretrained: model.load_state_dict(torch.load(args.weights)) else: # 随机初始化权重,会对模型进行高斯随机初始化 model.apply(weights_init_normal) print("网络权重加载成功.") # 设置网络输入图片尺寸大小与学习率 reso = int(model.net_info["height"]) lr = float(model.net_info["learning_rate"]) assert reso % 32 == 0 # 判断如果不是32的整数倍就抛出异常 assert reso > 32 # 判断如果网络输入图片尺寸小于32也抛出异常
print(opt) #logger = Logger("logs") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") os.makedirs("output", exist_ok=True) os.makedirs("checkpoints", exist_ok=True) # Get data configuration data_config = parse_data_config(opt.data_config) train_path = data_config["train"] valid_path = data_config["valid"] class_names = load_classes(data_config["names"]) # Initiate model model = Darknet(opt.model_def).to(device) model.apply(weights_init_normal) # If specified we start from checkpoint if opt.pretrained_weights: if opt.pretrained_weights.endswith(".pth"): model.load_state_dict(torch.load(opt.pretrained_weights)) else: model.load_darknet_weights(opt.pretrained_weights) # Get dataloader dataset = ListDataset(train_path, augment=True, multiscale=opt.multiscale_training) dataloader = torch.utils.data.DataLoader( dataset,
import os import sys weights = './pretrained.pt' imgsz = 448 conf_thres = 0.35 iou_thres = 0.5 cfg = './cfg/yolov4-pacsp.cfg' names = ['fire', 'smoke'] colors = [(255, 30, 0), (50, 0, 255)] device = torch.device('cpu') if not os.path.isfile(weights): # Download weight 200Mb # torch.hub.download_url_to_file('https://www.dropbox.com/s/a1puv47v6tmrk6j/weights.pt?dl=1', weights) pass # Load model model = Darknet(cfg, imgsz) # model.load_state_dict(weights) model.to(device).eval() source = sys.argv[1] out = detect(model, source, './out.png', imgsz, conf_thres, iou_thres, names, colors, device) cv2.imshow('output', out) cv2.waitKey(0) cv2.destroyAllWindows()
def main(): # Hyperparameters parser parser = argparse.ArgumentParser() parser.add_argument("--year", type=str, default='2012', help="used to select training set") parser.add_argument("--set", type=str, default='train', help="used to select training set") parser.add_argument("--epochs", type=int, default=201, help="number of epochs") parser.add_argument("--batch_size", type=int, default=8, help="size of each image batch") parser.add_argument("--model_def", type=str, default="config/net/resnet_dropout.cfg", help="path to model definition file") # parser.add_argument("--model_def", type=str, default="config/net/dqnyolo_large.cfg", help="path to model definition file") # parser.add_argument("--model_def", type=str, default="config/net/dqnyolo_mini.cfg", help="path to model definition file") # parser.add_argument("--model_def", type=str, default="config/net/dqnyolo_tiny.cfg", help="path to model definition file") parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension") parser.add_argument("--opt_lr", type=float, default=1e-5, help="learning rate for optimizer") parser.add_argument("--use_gpu", default=True, help="use GPU to accelerate training") parser.add_argument("--shuffle_train", default=True, help="shuffle the training dataset") parser.add_argument("--checkpoint_interval", type=int, default=20, help="interval between saving model weights") parser.add_argument("--evaluation_interval", type=int, default=10, help="interval evaluations on validation set") # parser.add_argument("--pretrained_weights", type=str, default="data/backbone/darknet53.conv.74", help="if specified starts from checkpoint model") # parser.add_argument("--pretrained_weights", type=str, default="logs/model/model_params_200.ckpt", help="if specified starts from checkpoint model") parser.add_argument("--pretrained_weights", default=False, help="if specified starts from checkpoint model") opt = parser.parse_args() print(opt) if opt.use_gpu is True: if torch.cuda.is_available(): device = torch.device('cuda') else: raise RuntimeError("Current Torch doesn't have GPU support.") else: device = torch.device('cpu') logger = SummaryWriter(exist_or_create_folder("./logs/tb/")) # Initiate model eval_model = Darknet(opt.model_def).to(device) if opt.pretrained_weights: print("Initialize model with pretrained_model") if opt.pretrained_weights.endswith(".ckpt"): eval_model.load_state_dict(torch.load(opt.pretrained_weights)) else: eval_model.load_darknet_weights(opt.pretrained_weights) else: print("Initialize model randomly") eval_model.apply(weights_init_normal) # eval_model.load_state_dict(torch.load("./logs/saved_exp/master-v2/model_params_80.ckpt")) print(eval_model) summary(eval_model, (3, 416, 416)) learn_batch_counter = 0 # for logger update (total numbers) batch_size = opt.batch_size # Get dataloader print("Begin loading train dataset ......") t_load_data = time.time() dataset = torchvision.datasets.VOCDetection(root='data/VOC/', year=opt.year, image_set=opt.set, transforms=None, download=True) dataset_dict = trans_voc(dataset) dataset = ListDataset(dataset_dict) loader = torch.utils.data.DataLoader( dataset, batch_size=opt.batch_size, shuffle=opt.shuffle_train, pin_memory=True, collate_fn=dataset.collate_fn, ) print("Complete loading train dataset in {} s".format(time.time() - t_load_data)) optimizer = torch.optim.Adam(eval_model.parameters(), lr=opt.opt_lr) # Warmup and learning rate decay scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, opt.epochs) # 5 epoch warmup, lr from 1e-5 to 1e-4, after that schedule as after_scheduler scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=10, after_scheduler=scheduler_cosine) start_time = time.time() for i_epoch in range(opt.epochs): eval_model.train() for i_batch, (_, imgs, raw_targets, transform_params, tar_boxes) in enumerate(loader): print("\n++++++++++ i_epoch-i_batch {}-{} ++++++++++".format(i_epoch, i_batch)) batch_step_counter = 0 if len(imgs) != batch_size: print("Current batch size is smaller than opt.batch_size!") continue imgs = imgs.to(device) raw_targets = raw_targets.to(device) tar_boxes = tar_boxes.to(device) input_img = imgs if i_epoch == 0 and i_batch == 0: logger.add_graph(eval_model, input_img) # print(raw_targets) # print(raw_targets.size()) # print(raw_targets[:, :, :, 6:].size()) # print(raw_targets[:, :, :, 0].unsqueeze(3).size()) cls_targets = torch.cat((raw_targets[:, :, :, 0].unsqueeze(3), raw_targets[:, :, :, 6:]), 3) # print(cls_targets.size()) loss, pred = eval_model(input_img, cls_targets) optimizer.zero_grad() loss.backward() optimizer.step() batch_step_counter += 1 learn_batch_counter += 1 print("Ep-bt: {}-{} | Loss: {}".format(i_epoch, i_batch, loss.item())) logger.add_scalar('loss/loss', loss.item(), learn_batch_counter) if (i_epoch + 1) % opt.checkpoint_interval == 0: print("Saving model in epoch {}".format(i_epoch)) torch.save(eval_model.state_dict(), exist_or_create_folder("./logs/model/model_params_{}.ckpt".format(i_epoch))) # Evaluate the model on the validation set if (i_epoch + 1) % opt.evaluation_interval == 0: precision, recall, AP, f1, ap_class = evaluate( eval_model, [opt.year, 'val'], [0.5, 0.5, 0.5], batch_size, True, diagnosis_code=1 ) evaluation_metrics = [ ("val_precision", precision.mean()), ("val_recall", recall.mean()), ("val_mAP", AP.mean()), ("val_f1", f1.mean()), ] for tag, value in evaluation_metrics: logger.add_scalar("val/{}".format(tag), value.item(), i_epoch) # Print class APs and mAP ap_table = [["Index", "Class name", "AP"]] for i, c in enumerate(ap_class): ap_table += [[c, val2labels(c), "%.5f" % AP[i]]] print(AsciiTable(ap_table).table) print(f"---- validation mAP {AP.mean()}") # Evaluate the model on the training set if (i_epoch + 1) % opt.evaluation_interval == 0: precision, recall, AP, f1, ap_class = evaluate( eval_model, [opt.year, 'train'], [0.5, 0.5, 0.5], batch_size, True, diagnosis_code=1 ) evaluation_metrics = [ ("train_precision", precision.mean()), ("train_recall", recall.mean()), ("train_mAP", AP.mean()), ("train_f1", f1.mean()), ] for tag, value in evaluation_metrics: logger.add_scalar("train/{}".format(tag), value.item(), i_epoch) # Print class APs and mAP ap_table = [["Index", "Class name", "AP"]] for i, c in enumerate(ap_class): ap_table += [[c, val2labels(c), "%.5f" % AP[i]]] print(AsciiTable(ap_table).table) print(f"---- training mAP {AP.mean()}") # Warmup and lr decay scheduler_warmup.step() # Free GPU memory torch.cuda.empty_cache() total_train_time = time.time() - start_time print("Training complete in {} hours".format(total_train_time / 3600))
continue #org_weights_mess.append([var_name, var_shape]) #print(var_name,var_shape) #print("=> " + str(var_name).ljust(50), var_shape) print() tf.reset_default_graph() cur_weights_mess = [] tf.Graph().as_default() with tf.name_scope('input'): input_data = tf.placeholder(dtype=tf.float32, shape=(1, 416, 416, 3), name='input_data') #meta_weight=tf.placeholder(dtype=tf.float32,shape=(1,1,20*1024,1),name='meta_weight') #training = tf.placeholder(dtype=tf.bool, name='trainable') model = Darknet(input_data, False) for var in tf.global_variables(): var_name = var.op.name var_name_mess = str(var_name).split('/') var_shape = var.shape #print(var_name_mess[0]) #if flag.train_from_coco: #if 'dyconv' in var_name_mess and 'weights' in var_name_mess: # cur_weights_mess.append([var_name,]) if (var_name_mess[-1] not in [ 'weights', 'gamma', 'beta', 'moving_mean', 'moving_variance' ]) or 'pred' in var_name_mess or 'decode' in var_name_mess: print("===========", var_shape) continue #cur_weights_mess.append([var_name, var_shape]) #print("=> " + str(var_name).ljust(50), var_shape)
import os import cv2 import torch import numpy as np from torch.autograd import Variable from model import Darknet def get_test_img(): img = cv2.imread('dog-cycle-car.png') img = cv2.resize(img, (416, 416)) img = img[:, :, ::-1].transpose((2, 0, 1)) img = img[np.newaxis, :, :, :] / 255. img = torch.from_numpy(img).float() img = Variable(img) return img model = Darknet('config/yolov3.cfg') img = get_test_img() out = model(img) print(out) print(f'Shape:=> First YOLO:{out[0].shape} Sencond YOLO:{out[0].shape}')
def detect(kitti_weights='../checkpoints/best_weights_kitti.pth', config_path='../config/yolov3-kitti.cfg', class_path='../data/names.txt'): """ Script to run inference on sample images. It will store all the inference results in /output directory ( relative to repo root) Args kitti_weights: Path of weights config_path: Yolo configuration file path class_path: Path of class names txt file """ cuda = torch.cuda.is_available() os.makedirs('../output', exist_ok=True) # Set up model model = Darknet(config_path, img_size=416) model.load_weights(kitti_weights) if cuda: model.cuda() print("Cuda available for inference") model.eval() # Set in evaluation mode dataloader = DataLoader(ImageFolder("../data/samples/", img_size=416), batch_size=2, shuffle=False, num_workers=0) classes = load_classes(class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index print('data size : %d' % len(dataloader)) print('\nPerforming object detection:') prev_time = time.time() for batch_i, (img_paths, input_imgs) in enumerate(dataloader): # Configure input input_imgs = Variable(input_imgs.type(Tensor)) # Get detections with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, 80, 0.8, 0.4) # print(detections) # Log progress current_time = time.time() inference_time = datetime.timedelta(seconds=current_time - prev_time) prev_time = current_time print('\t+ Batch %d, Inference Time: %s' % (batch_i, inference_time)) # Save image and detections imgs.extend(img_paths) img_detections.extend(detections) # Bounding-box colors # cmap = plt.get_cmap('tab20b') cmap = plt.get_cmap('tab10') colors = [cmap(i) for i in np.linspace(0, 1, 20)] print('\nSaving images:') # Iterate through images and save plot of detections for img_i, (path, detections) in enumerate(zip(imgs, img_detections)): print("(%d) Image: '%s'" % (img_i, path)) # Create plot img = np.array(Image.open(path)) plt.figure() fig, ax = plt.subplots(1) ax.imshow(img) kitti_img_size = 416 # The amount of padding that was added pad_x = max(img.shape[0] - img.shape[1], 0) * (kitti_img_size / max(img.shape)) pad_y = max(img.shape[1] - img.shape[0], 0) * (kitti_img_size / max(img.shape)) # Image height and width after padding is removed unpad_h = kitti_img_size - pad_y unpad_w = kitti_img_size - pad_x # Draw bounding boxes and labels of detections if detections is not None: print(type(detections)) print(detections.size()) unique_labels = detections[:, -1].cpu().unique() n_cls_preds = len(unique_labels) bbox_colors = random.sample(colors, n_cls_preds) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: print('\t+ Label: %s, Conf: %.5f' % (classes[int(cls_pred)], cls_conf.item())) # Rescale coordinates to original dimensions box_h = int(((y2 - y1) / unpad_h) * (img.shape[0])) box_w = int(((x2 - x1) / unpad_w) * (img.shape[1])) y1 = int(((y1 - pad_y // 2) / unpad_h) * (img.shape[0])) x1 = int(((x1 - pad_x // 2) / unpad_w) * (img.shape[1])) color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] # Create a Rectangle patch bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor='none') # Add the bbox to the plot ax.add_patch(bbox) # Add label plt.text(x1, y1 - 30, s=classes[int(cls_pred)] + ' ' + str('%.4f' % cls_conf.item()), color='white', verticalalignment='top', bbox={'color': color, 'pad': 0}) # Save generated image with detections plt.axis('off') plt.gca().xaxis.set_major_locator(NullLocator()) plt.gca().yaxis.set_major_locator(NullLocator()) plt.savefig('../output/%d.png' % (img_i), bbox_inches='tight', pad_inches=0.0) plt.close()
def main(train_path="../data/train/images/", val_path="../data/train/images/", labels_path="../data/train/yolo_labels/", weights_path="../checkpoints/", preload_weights_file="darknet53.conv.74", output_path="../output", yolo_config_file="../config/yolov3-kitti.cfg", fraction=1, learning_rate=1e-3, weight_decay=1e-4, batch_size=2, epochs=30, freeze_struct=[True, 5]): """ This is the point of entry to the neural network program. All the training history will be saved as a csv in the output path Args train_path (string): Directory containing the training images val_path (string):: Directory containing the val images labels_path (string):: Directory containing the yolo format labels for data weights_path (string):: Directory containing the weights (new weights for this program will also be added here) preload_weights_file (string): Name of preload weights file output_path (string): Directory to store the training history outputs as csv yolo_config_file (string): file path of yolo configuration file fraction (float): fraction of data to use for training learning_rate (float): initial learning rate weight_decay (float): weight decay value batch_size (int): batch_size for both training and validation epochs (int): maximum number of epochs to train the model freeze_struct (list): [bool, int] indicating whether to freeze the Darknet backbone and until which epoch should it be frozen Returns None """ # Set up checkpoints path checkpoints_path = weights_path # Set up env variables and create required directories os.makedirs(output_path, exist_ok=True) os.makedirs(checkpoints_path, exist_ok=True) # Set up cuda use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("Available device = ", device) # Create model and load pretrained darknet weights model = Darknet(yolo_config_file) print("Loading imagenet weights to darknet") model.load_weights(os.path.join(weights_path, preload_weights_file)) model.to(device) #print(model) # Create datasets train_dataset = KITTI2D(train_path, labels_path, fraction=fraction, train=True) valid_dataset = KITTI2D(val_path, labels_path, fraction=fraction, train=False) # Create dataloaders train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) # Create optimizers optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) # Create log csv files train_log_file = open(os.path.join(output_path, "train_results.csv"), "w", newline="") valid_log_file = open(os.path.join(output_path, "valid_results.csv"), "w", newline="") train_csv = csv.writer(train_log_file) valid_csv = csv.writer(valid_log_file) print("Starting to train yolov3 model...") # Train model here train_model(model, device, optimizer, lr_scheduler, train_dataloader, valid_dataloader, train_csv, valid_csv, weights_path, max_epochs=epochs, tensor_type=torch.cuda.FloatTensor, update_gradient_samples=1, freeze_darknet=freeze_struct[0], freeze_epoch=freeze_struct[1]) # Close the log files train_log_file.close() valid_log_file.close() print("Training completed")
help="the image to predict (default: %(default)s)") parser.add_argument("--weight", required=True, metavar="/path/to/yolov4.weights", help="the path of weight file") parser.add_argument("--save-img", metavar="predicted-img", help="the path to save predicted image") args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() img: Image.Image = Image.open(args.img_file) img = img.resize((608, 608)) # C*H*W img_data = to_image(img) net = Darknet(img_data.size(0)) net.load_weights(args.weight) net.eval() with torch.no_grad(): boxes, confs = net(img_data.unsqueeze(0)) idxes_pred, boxes_pred, probs_pred = utils.post_processing(boxes, confs, 0.4, 0.6) utils.plot_box(boxes_pred, args.img_file, args.save_img)
map(pre_img, loaded_imgs, [height for i in range(len(loaded_imgs))])) left_over = 0 if (len(loaded_imgs) % batch_size): left_over = 1 num_batches = (len(loaded_imgs) // batch_size) + left_over batches = [ det_imgs[i * batch_size:min((i + 1) * batch_size, len(loaded_imgs))] for i in range(num_batches) ] batches = [torch.cat(batch, 0) for batch in batches] net = Darknet() net = net.cuda() net.load_weights() net.eval() write = 0 def put_rectangle(x, results): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = results[int(x[0])] cls = int(x[-1]) label = "{0}".format(classes[cls]) cv2.rectangle(img, c1, c2, [255, 0, 0], 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
def detect_images(widget=None): if not widget: args = arg_parse() else: args = widget.args read_dir = time.time() print_info(widget, False, "info", "Reading addresses.....") images = args.images im_list = [] img = None try: for img in images: if os.path.isabs(img): im_list.append(img) else: im_list.append(osp.join(osp.realpath('.'), img)) except FileNotFoundError: print_info(widget, True, "error", "No file or directory with the name {}".format(img)) if not os.path.exists(args.det): os.makedirs(args.det) print_info(widget, False, "info", "Finished reading addresses") finish_read_dir = time.time() batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) namesfile = args.names cuda_present = torch.cuda.is_available() classes = load_classes(namesfile) num_classes = len(classes) # Set up the neural network load_net = time.time() print_info(widget, False, "info", "Loading network.....") model = Darknet(args.cfg) model.load_weights(args.weights) print_info(widget, False, "info", "Network successfully loaded") finish_load_net = time.time() model.net_info["height"] = args.reso model.net_info["width"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 # If there's a GPU availible, put the model on GPU if cuda_present: model.cuda() # Set the model in evaluation mode (for Batchnorm layers) model.eval() # Detection phase load_batch = time.time() print_info(widget, False, "info", "Loading batches.....") loaded_ims = [cv2.imread(x) for x in im_list] im_batches = list(map(prep_image, loaded_ims, [inp_dim for _ in range(len(im_list))])) im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) leftover = 0 if len(im_dim_list) % batch_size: leftover = 1 if batch_size != 1: num_batches = len(im_list) // batch_size + leftover im_batches = [torch.cat((im_batches[i * batch_size: min((i + 1) * batch_size, len(im_batches))])) for i in range(num_batches)] if cuda_present: im_dim_list = im_dim_list.cuda() output = torch.empty((0, 0)) print_info(widget, False, "info", "Finished loading batches....") start_det_loop = time.time() for i, batch in enumerate(im_batches): # load the image start = time.time() print_info(widget, False, "info", f"Detecting batch no {i}....") if cuda_present: batch = batch.cuda() with torch.no_grad(): prediction = model(batch, cuda_present) prediction = write_results(prediction, confidence, num_classes, nms_conf=nms_thesh) end = time.time() if type(prediction) == int: for im_num, image in enumerate(im_list[i * batch_size: min((i + 1) * batch_size, len(im_list))]): im_id = i * batch_size + im_num msg = "{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start) / batch_size) msg += "\n{0:20s} {1:s}".format("Objects Detected:", "") msg += "\n----------------------------------------------------------" print_info(widget, False, 'batch_info', msg, im_id) continue prediction[:, 0] += i * batch_size # transform the atribute from index in batch to index in imlist if np.size(output, 0) == 0: # If we have't initialised output output = prediction else: output = torch.cat((output, prediction)) for im_num, image in enumerate(im_list[i * batch_size: min((i + 1) * batch_size, len(im_list))]): im_id = i * batch_size + im_num objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id] msg = "{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start) / batch_size) msg += "\n{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)) msg += "\n----------------------------------------------------------" print_info(widget, False, 'batch_info', msg, im_id) if cuda_present: torch.cuda.synchronize() print_info(widget, False, "info", f"Finished detecting batch no {i}") if np.size(output, 0) == 0: print_info(widget, False, 'no_detections', "No detections were made") print_info(widget, False, 'finished') return # Start rescaling print_info(widget, False, "info", "Output processing....") output_rescale = time.time() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor # set padding space black for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) class_load = time.time() print_info(widget, False, "info", "Finished output processing.") # Start draw print_info(widget, False, "info", "Drawing boxes....") draw = time.time() images_handler = ImagesHandler(classes, output, loaded_ims, args.det, im_list, batch_size) images_handler.write() print_info(widget, False, "images_ready", images_handler.imageList) end = time.time() print_info(widget, False, "info", "Finished drawing boxes") msg = "\n\nSUMMARY" msg += "\n----------------------------------------------------------" msg += "\n{:25s}: {}".format("Task", "Time Taken (in seconds)") msg += "\n" msg += "\n{:25s}: {:2.3f}".format("Reading addresses", finish_read_dir - read_dir) msg += "\n{:25s}: {:2.3f}".format("Loading network", finish_load_net - load_net) msg += "\n{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch) msg += "\n{:25s}: {:2.3f}".format("Detection (" + str(len(im_list)) + " images)", output_rescale - start_det_loop) msg += "\n{:25s}: {:2.3f}".format("Output Processing", class_load - output_rescale) msg += "\n{:25s}: {:2.3f}".format("Drawing Boxes", end - draw) msg += "\n{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch) / len(im_list)) msg += "\n----------------------------------------------------------" print_info(widget, False, 'info', msg) torch.cuda.empty_cache() print_info(widget, False, 'finished')
def train(payload): labeled = payload["labeled"] resume_from = payload["resume_from"] ckpt_file = payload["ckpt_file"] # hyperparameters batch_size = 16 epochs = 2 # just for demo lr = 1e-2 weight_decay = 1e-2 coco = COCO("./data", Transforms(), samples=labeled, train=True) loader = DataLoader(coco, shuffle=True, batch_size=batch_size, collate_fn=collate_fn) config_file = "yolov3.cfg" model = Darknet(config_file).to(device) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # resume model and optimizer from previous loop if resume_from is not None: ckpt = torch.load(os.path.join("./log", resume_from)) model.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) # loss function priors = anchors.normalize("xyxy") loss_fn = HardNegativeMultiBoxesLoss(priors, device=device) model.train() for img, boxes, labels in loader: img = img.to(device) # 3 predictions from 3 yolo layers output = model(img) # batch predictions on each image batched_prediction = [] for p in output: # (batch_size, 3, gx, gy, 85) batch_size = p.shape[0] p = p.view(batch_size, -1, 85) batched_prediction.append(p) batched_prediction = torch.cat(batched_prediction, dim=1) # (batch_size, n_priors, 85) # the last dim of batched_prediction represent the predicted box # batched_prediction[...,:4] is the coordinate of the predicted bbox # batched_prediction[...,4] is the objectness score # batched_prediction[...,5:] is the pre-softmax class distribution # we need to apply some transforms to the those predictions # before we can use HardNegativeMultiBoxesLoss # In particular, the predicted bbox need to be relative to # normalized anchor priors # we will define another function bbox_transform # to do those transform, since it will be used by other processes # as well. # see documentation on HardNegativeMultiBoxesLoss # on its input parameters predicted_boxes, predicted_objectness, predicted_class_dist = bbox_transform( batched_prediction) loss = loss_fn(predicted_boxes, predicted_objectness, predicted_class_dist, boxes, labels) optimizer.zero_grad() loss.backward() optimizer.step() # save ckpt for this loop ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join("./log", ckpt_file)) return