Esempio n. 1
0
def client_handler(c, addr, config, pid):
    message = {"code": 1}
    send_msg(c, json.dumps(message).encode("utf-8"))
    yolo_losses = []
    complex_yolo_416 = []
    response = []
    net = None
    try:
        classes = open(config["classes_names_path"], "r").read().split("\n")[:-1]
        for i in range(3):
            yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"], (config["img_w"], config["img_h"])))
    except Exception as e:
        print(e.__str__())
        return
    while True:
        try:
            res_start = time.time()
            data = recv_msg(c)
            start = time.time()
            info = json.loads(str(data.decode('utf-8')))
            if info["code"] == 1 and info["name"] is not None:
                if info["name"] is not None:
                    with open(config["images_path"] + info["name"], 'wb') as file:
                        file.write(base64.b64decode(info["data"]))
                    if info["app"] == "yolo3":
                        if net is None:
                            net = initial_yolo_model(config, info["size"])
                        compute_time, cpu, complex_yolo_416= detect_image(info["name"], response, config, net, yolo_losses, classes, complex_yolo_416, pid)
                    elif info["app"] == "yolo2":
                        if net is None:
                            net = initial_yolo_model_2()
                        start_yolo_2 = time.time()
                        img = cv2.imread(os.path.join(config["images_path"], info["name"]))
                        net.return_predict(img)
                        compute_time = time.time() - start_yolo_2
                        cpu = psutil.cpu_percent()/100
                        complex_yolo_416.append(compute_time * cpu * 2.8)
                        print("\tyolo2} finished in {}s, system response in {} s, cpu in {} cycles(10^9)"
                              .format(round(cpu, 3), round(compute_time, 4)
                                      , round(np.average(response), 4)
                                      , round(np.average(complex_yolo_416), 3)))
                    else:
                        if net is None:
                            net = initial_pose_model(config)
                        compute_time, cpu, complex_yolo_416 = detect_pose(info["name"], net, response, config, complex_yolo_416, pid)
                    message = {"code": 2, "time": time.time() - start, "inx": info["inx"], "cpu": cpu,
                               "compute_time": compute_time, "path": info["name"], "next": True, "timestamp": info["timestamp"]}
                    send_msg(c, json.dumps(message).encode("utf-8"))
                    response.append(round(time.time() - res_start, 3))
            if info["code"] == -1:
                c.close()
                print(addr, "close")
        except Exception as e:
            print("1." + traceback.format_exc())
            return
    print(list(response))
    def __init__(self, config, is_training):
        self.config = config
        self.is_training = is_training
        use_gpu = config.use_gpu
        self.net = Model(self.config, is_training=self.is_training)
        if self.is_training:
            self.net.train(is_training)
        else:
            self.net.eval()

        self.net.init_weights(gpu=use_gpu)

        if self.is_training:
            self.optimizer = self._get_optimizer()

        if len(self.config.parallels) > 0:
            self.net = nn.DataParallel(self.net)
            if use_gpu:
                self.net = self.net.cuda()

        self.yolo_loss = []
        for i in range(3):
            self.yolo_loss.append(
                YOLOLoss(config.anchors[i], config.image_size,
                         config.num_classes))
        #if is_refine:
        #    self.refine_loss = RefineLoss(config.anchors, config.num_classes, (config.image_size, config.image_size))

        if config.pretrained_weights:
            logging.info("Load pretrained weights from {}".format(
                config.pretrained_weights))
            if use_gpu:
                checkpoint = torch.load(config.pretrained_weights)
            else:
                checkpoint = torch.load(config.pretrained_weights,
                                        map_location=torch.device('cpu'))
            state_dict = checkpoint['state_dict']
            self.net.load_state_dict(state_dict)
            self.epoch = checkpoint["epoch"] + 1
            self.global_step = checkpoint['global step'] + 1
        else:
            self.epoch = 0
            self.global_step = 0

        if config.official_weights:
            logging.info("Loading official weights from {}".format(
                config.official_weights))
            self.net.load_state_dict(torch.load(config.official_weights))
            self.global_step = 20000

        #self.pre_prune_weights()
        #self.prune_weights_in_training_perc()
        self.prune_weights_in_training_thresh()
Esempio n. 3
0
def detect(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # Load tested img
    imgfile = config["img_path"]
    img = Image.open(imgfile).convert('RGB')
    resized = img.resize((config["img_w"], config["img_h"]))
    input = image2torch(resized)
    input = input.to(torch.device("cuda"))

    start = time.time()
    outputs = net(input)
    output_list = []
    for i in range(3):
        output_list.append(yolo_losses[i](outputs[i]))
    output = torch.cat(output_list, 1)
    output = non_max_suppression(output,
                                 config["yolo"]["classes"],
                                 conf_thres=0.5,
                                 nms_thres=0.4)
    finish = time.time()

    print('%s: Predicted in %f seconds.' % (imgfile, (finish - start)))

    namefile = config["classname_path"]
    class_names = load_class_names(namefile)
    plot_boxes(img, output, 'predictions.jpg', class_names)
Esempio n. 4
0
def main(video_fn):
    logging.basicConfig(level=logging.DEBUG,
                        format="[%(asctime)s %(filename)s] %(message)s")

    if len(sys.argv) != 2:
        logging.error("Usage: python video.py params.py")
        sys.exit()

    params_path = sys.argv[1]

    if not os.path.isfile(params_path):
        logging.error("no params file found! path: {}".format(params_path))
        sys.exit()

    config = importlib.import_module(params_path[:-3]).TRAINING_PARAMS
    config["batch_size"] *= len(config["parallels"])

    is_training = False

    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # load pretrained model
    if config["pretrain_snapshot"]:
        logging.info("load checkpoint from {}".format(
            config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        raise Exception("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # load class names
    classes = open(config["classes_names_path"], "r").read().split("\n")[:-1]

    cap = cv2.VideoCapture(video_fn)
    # Check if camera opened successfully
    if (cap.isOpened() == False):
        print("Error opening video stream or file")
    # Read until video is completed
    while (cap.isOpened()):
        # Capture frame-by-frame
        ret, frame = cap.read()
        frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
        if ret == True:
            # 1. pre-process image
            logging.info("processing frame")
            image_tensor = prep_image(frame, config)

            with torch.no_grad():
                outputs = net(image_tensor)
                output_list = []

                for i in range(3):
                    output_list.append(yolo_losses[i](outputs[i]))

                output = torch.cat(output_list, 1)

                batch_detections = non_max_suppression(
                    output,
                    config["yolo"]["classes"],
                    conf_thres=config["confidence_threshold"],
                    nms_thres=0.45)

            for idx, detections in enumerate(batch_detections):
                if detections is not None:
                    unique_labels = detections[:, -1].cpu().unique()
                    n_cls_preds = len(unique_labels)
                    bbox_colors = random.sample(colors, n_cls_preds)

                    for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                        color = bbox_colors[int(
                            np.where(unique_labels == int(cls_pred))[0])]
                        # Rescale coordinates to original dimensions
                        x1, y1, box_w, box_h = get_rescaled_coords(
                            frame.shape[0], frame.shape[1], config["img_h"],
                            config["img_w"], x1, y1, x2, y2)

                        cv2.rectangle(frame, (x1, y1),
                                      (x1 + box_w, y1 + box_h), color, 2)

                        cv2.putText(frame, classes[int(cls_pred)], (x1, y1),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, color, 1,
                                    cv2.LINE_AA)

            cv2.imshow('Frame', frame)
            # Press Q on keyboard to  exit
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
        # Break the loop
        else:
            break
    # When everything done, release the video capture object
    cap.release()
    # Closes all the frames
    cv2.destroyAllWindows()
Esempio n. 5
0
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(COCODataset(
        config["val_path"], (config["img_w"], config["img_h"]),
        is_training=False),
                                             batch_size=config["batch_size"],
                                             shuffle=False,
                                             num_workers=16,
                                             pin_memory=False)

    # Start the eval loop
    logging.info("Start eval.")
    n_gt = 0
    correct = 0
    for step, samples in enumerate(dataloader):
        images, labels = samples["image"], samples["label"]
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            output = non_max_suppression(output, 80, conf_thres=0.2)
            #  calculate
            for sample_i in range(labels.size(0)):
                # Get labels for sample where width is not zero (dummies)
                target_sample = labels[sample_i, labels[sample_i, :, 3] != 0]
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    tx1, tx2 = config["img_w"] * (
                        tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (
                        ty - th / 2), config["img_h"] * (ty + th / 2)
                    n_gt += 1
                    box_gt = torch.cat([
                        coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]
                    ]).view(1, -1)
                    sample_pred = output[sample_i]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[
                                sample_pred[:, 6] == obj_cls]:
                            box_pred = torch.cat([
                                coord.unsqueeze(0)
                                for coord in [x1, y1, x2, y2]
                            ]).view(1, -1)
                            iou = bbox_iou(box_pred, box_gt)
                            if iou >= config["iou_thres"]:
                                correct += 1
                                break
        if n_gt:
            logging.info('Batch [%d/%d] mAP: %.5f' %
                         (step, len(dataloader), float(correct / n_gt)))

    logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
Esempio n. 6
0
def Evaluate(net, config):
    # logger = GetLogger(output_file=config['output_file'])

    print('Start evaluating.')
    net.eval()

    device = config['device']

    with open(config['class_names'], 'r') as f:
        class_names = f.read().split('\n')[:-1]

    dataset = DAC_SDC_2020_Dataset(dataset_path=config['valid_path'],
                                   class_names=class_names,
                                   img_w=config['img_w'],
                                   img_h=config['img_h'],
                                   is_training=False)

    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"]), config['device']))

    batch_size = config['valid_batch_size']
    results = []
    right = 0
    for step in range(0, len(dataset), batch_size):
        # print('step:', step)
        samples = []
        for ii in range(step, min(len(dataset), step + batch_size)):
            samples.append(dataset[ii])
        images = torch.cat(
            [sample['image'].unsqueeze(0) for sample in samples], 0)
        images = images.to(device)

        with torch.no_grad():
            outputs = net(images)

            tt = []
            for i in range(3):
                tt.append(yolo_losses[i](outputs[i]))
            tt = torch.cat(tt, 1)

        for kk in range(len(samples)):
            t = tt[kk]
            best = t[:, 4].argmax()
            qq = t[best]
            cc = qq[5:].argmax()
            x = qq[0] / samples[kk]['image'].size(
                2) * samples[kk]['original_image'].shape[1]
            y = qq[1] / samples[kk]['image'].size(
                1) * samples[kk]['original_image'].shape[0]
            w = qq[2] / samples[kk]['image'].size(
                2) * samples[kk]['original_image'].shape[1]
            h = qq[3] / samples[kk]['image'].size(
                1) * samples[kk]['original_image'].shape[0]
            x1 = int(x - w / 2)
            y1 = int(y - h / 2)
            x2 = int(x + w / 2)
            y2 = int(y + h / 2)
            bbox = [x1, y1, x2, y2]
            iou = GetIOU(bbox, samples[kk]['original_bbox'])
            if samples[kk]['original_label'] == class_names[cc]:
                right += 1

            # logger.info('file: ' + samples[kk]['jpg_path'])
            # logger.info('pred: ' + str(bbox) + ' ' + class_names[cc])
            # logger.info('gt: ' + str(samples[kk]['original_bbox']) + ' ' + samples[kk]['original_label'])
            # logger.info(f'iou: {iou}\n')

            result = {
                'jpg_path': samples[kk]['jpg_path'],
                'pred': {
                    'bbox': bbox,
                    'class': class_names[cc],
                },
                'gt': {
                    'bbox': samples[kk]['original_bbox'],
                    'class': samples[kk]['original_label'],
                },
                'iou': iou,
            }
            results.append(result)

    mean_iou = np.array([res['iou'] for res in results]).mean()

    # logger.info('Mean IOU: ' + str(ious.mean()))

    info = {
        'mean_iou': mean_iou,
        'accuracy': right / len(results),
        'results': results,
        'config': config,
    }
    return info
Esempio n. 7
0
def test(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        logging.info("load checkpoint from {}".format(
            config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        raise Exception("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # prepare images path
    images_name = os.listdir(config["images_path"])
    images_path = [
        os.path.join(config["images_path"], name) for name in images_name
    ]
    if len(images_path) == 0:
        raise Exception("no image found in {}".format(config["images_path"]))

    # Start testing FPS of different batch size
    for batch_size in range(1, 10):
        # preprocess
        images = []
        for path in images_path[:batch_size]:
            image = cv2.imread(path, cv2.IMREAD_COLOR)
            if image is None:
                logging.error("read path error: {}. skip it.".format(path))
                continue
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (config["img_w"], config["img_h"]),
                               interpolation=cv2.INTER_LINEAR)
            image = image.astype(np.float32)
            image /= 255.0
            image = np.transpose(image, (2, 0, 1))
            image = image.astype(np.float32)
            images.append(image)
        for i in range(batch_size - len(images)):
            images.append(images[0])  #  fill len to batch_sze
        images = np.asarray(images)
        images = torch.from_numpy(images).cuda()
        # inference in 30 times and calculate average
        inference_times = []
        for i in range(30):
            start_time = time.time()
            with torch.no_grad():
                outputs = net(images)
                output_list = []
                for i in range(3):
                    output_list.append(yolo_losses[i](outputs[i]))
                output = torch.cat(output_list, 1)
                batch_detections = non_max_suppression(
                    output,
                    config["yolo"]["classes"],
                    conf_thres=config["confidence_threshold"])
                torch.cuda.synchronize()  #  wait all done.
            end_time = time.time()
            inference_times.append(end_time - start_time)
        inference_time = sum(inference_times) / len(
            inference_times) / batch_size
        fps = 1.0 / inference_time
        logging.info(
            "Batch_Size: {}, Inference_Time: {:.5f} s/image, FPS: {}".format(
                batch_size, inference_time, fps))
Esempio n. 8
0
def train(config):
    # Hyper-parameters
    config["global_step"] = config.get("start_step", 0)
    is_training =  True

    # Net & Loss & Optimizer
    ## Net Main
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    ## YOLO Loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_loss = YOLOLoss(config["yolo"]["anchors"][i],
                             config["yolo"]["classes"], (config["img_w"], config["img_h"]))
        yolo_losses.append(yolo_loss)

    ## Optimizer and LR scheduler
    optimizer = _get_optimizer(config, net)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config["lr"]["decay_step"], gamma=config["lr"]["decay_gamma"])

    net = nn.DataParallel(net)
    net = net.cuda()

    # Load checkpoint
    if config["pretrain_snapshot"]:
        logging.info("Load pretrained weights from {}".format(config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)

    # DataLoader
    dataloader = torch.utils.data.DataLoader(AIPrimeDataset(config["train_path"]),
                                             batch_size=config["batch_size"],
                                             shuffle=True, num_workers=16, pin_memory=False)

    # Start the training
    logging.info("Start training.")
    for epoch in range(config["start_epoch"], config["epochs"]):
        for step, (images, labels) in enumerate(dataloader):
            start_time = time.time()
            config["global_step"] += 1

            # Forward
            outputs = net(images)

            # Loss
            losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"]
            losses = [[]] * len(losses_name)
            for i in range(3):
                _loss_item = yolo_losses[i](outputs[i], labels)
                for j, l in enumerate(_loss_item):
                    losses[j].append(l)
            losses = [sum(l) for l in losses]
            loss = losses[0]

            # Zero & Backward & Step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Logging
            if step > 0 and step % 10 == 0:
                _loss = loss.item()
                duration = float(time.time() - start_time)
                example_per_second = config["batch_size"] / duration
                lr = optimizer.param_groups[0]['lr']
                logging.info(
                    "epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f " %
                    (epoch, step, _loss, example_per_second, lr)
                )

        # Things to be done for every epoch
        ## LR schedule
        lr_scheduler.step()
        ## Save checkpoint
        _save_checkpoint(net.state_dict(), config, epoch)

    # Finish training
    logging.info("QiaJiaBa~ BeiBei")
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        logging.info("Load checkpoint: {}".format(config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # DataLoader.
    dataloader = torch.utils.data.DataLoader(COCODataset(
        config["val_path"], (config["img_w"], config["img_h"]),
        is_training=False),
                                             batch_size=config["batch_size"],
                                             shuffle=False,
                                             num_workers=8,
                                             pin_memory=False)

    # Coco Prepare.
    index2category = json.load(open("coco_index2category.json"))

    # Start the eval loop
    logging.info("Start eval.")
    coco_results = []
    coco_img_ids = set([])
    for step, samples in enumerate(dataloader):
        images, labels = samples["image"], samples["label"]
        image_paths, origin_sizes = samples["image_path"], samples[
            "origin_size"]
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   config["yolo"]["classes"],
                                                   conf_thres=0.01,
                                                   nms_thres=0.45)
        for idx, detections in enumerate(batch_detections):
            image_id = int(os.path.basename(image_paths[idx])[-16:-4])
            coco_img_ids.add(image_id)
            if detections is not None:
                origin_size = eval(origin_sizes[idx])
                detections = detections.cpu().numpy()
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                    x1 = x1 / config["img_w"] * origin_size[0]
                    x2 = x2 / config["img_w"] * origin_size[0]
                    y1 = y1 / config["img_h"] * origin_size[1]
                    y2 = y2 / config["img_h"] * origin_size[1]
                    w = x2 - x1
                    h = y2 - y1
                    coco_results.append({
                        "image_id":
                        image_id,
                        "category_id":
                        index2category[str(int(cls_pred.item()))],
                        "bbox": (float(x1), float(y1), float(w), float(h)),
                        "score":
                        float(conf),
                    })
        logging.info("Now {}/{}".format(step, len(dataloader)))
    save_results_path = "coco_results.json"
    with open(save_results_path, "w") as f:
        json.dump(coco_results,
                  f,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ':'))
    logging.info("Save coco format results to {}".format(save_results_path))

    #  COCO api
    logging.info("Using coco-evaluate tools to evaluate.")
    cocoGt = COCO(config["annotation_path"])
    cocoDt = cocoGt.loadRes(save_results_path)
    cocoEval = COCOeval(cocoGt, cocoDt, "bbox")
    cocoEval.params.imgIds = list(coco_img_ids)  # real imgIds
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
Esempio n. 10
0
            }
        }

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = YoloHead(config)
        model.load_state_dict(torch.load('weights/yolov3_orig.pth'))
        model = model.to(device)
        model.train()
        losses = []
        cc_res = []
        confidence = 0.1
        iou_threshold = 0.6

        for i in range(3):
            losses.append(
                YOLOLoss(config['yolo']['anchors'][i], 80, [416, 416]))

        forw_sum = 0
        with torch.no_grad():
            for batch_idx, (images, targets) in enumerate(test_dataloader):
                # measure data loading time

                images = images.to(device)

                targets2 = []
                for t in targets:
                    dd = {}
                    for k, v in t.items():
                        if (k != 'img_size'):
                            dd[k] = v.to(device)
                        else:
Esempio n. 11
0
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
                                    config["yolo"]["classes"], (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(dataset=COCODataset(config["val_path"], config["img_w"]),
                                             batch_size=config["batch_size"],
                                             shuffle=True, num_workers=1, pin_memory=False)

    # Start the eval loop
    logging.info("Start eval.")
    n_gt = 0
    correct = 0
    logging.info('%s' % str(dataloader))

    gt_histro={}
    pred_histro = {}
    correct_histro = {}

    for i in range(config["yolo"]["classes"]):
        gt_histro[i] = 1
        pred_histro[i] = 1
        correct_histro[i] = 0

    # images 是一个batch里的全部图片,labels是一个batch里面的全部标签
    for step, (images, labels) in enumerate(dataloader):
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))

            # 把三个尺度上的预测结果在第1维度(第0维度是batch里的照片,第1维度是一张照片里面的各个预测框,第2维度是各个预测数值)上拼接起来
            batch_output = torch.cat(output_list, dim=1)

            logging.info('%s' % str(batch_output.shape))

            # 进行NMS抑制
            batch_output = non_max_suppression(prediction=batch_output, num_classes=config["yolo"]["classes"], conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"])
            #  calculate
            for sample_index_in_batch in range(labels.size(0)):
                # fetched img sample in tensor( C(RxGxB) x H x W ), transform to cv2 format in  H x W x C(BxGxR)
                sample_image = images[sample_index_in_batch].numpy()
                sample_image = np.transpose(sample_image, (1, 2, 0))
                sample_image = cv2.cvtColor(sample_image, cv2.COLOR_RGB2BGR)

                logging.debug("fetched img %d size %s" % (sample_index_in_batch, sample_image.shape))
                # Get labels for sample where width is not zero (dummies)(init all labels to zeros in array)
                target_sample = labels[sample_index_in_batch, labels[sample_index_in_batch, :, 3] != 0]
                # get prediction for this sample
                sample_pred = batch_output[sample_index_in_batch]
                if sample_pred is not None:
                    for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred:  # for each prediction box
                        # logging.info("%d" % obj_cls)
                        box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1)
                        sample_image = draw_prediction(sample_image,conf, obj_conf, int(obj_pred), (x1, y1, x2, y2), config)

                # 每一个ground truth的 分类编号obj_cls、相对中心x、相对中心y、相对宽w、相对高h
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    # 转化为输入像素尺寸的 左上角像素tx1 ty1,右下角像素tx2 ty2
                    tx1, tx2 = config["img_w"] * (tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (ty - th / 2), config["img_h"] * (ty + th / 2)
                    # 计算ground truth数量,用于统计信息
                    n_gt += 1
                    gt_histro[int(obj_cls)] += 1
                    # 转化为 shape(1,4)的tensor,用来计算IoU
                    box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1)
                    # logging.info('%s' % str(box_gt.shape))

                    sample_pred = batch_output[sample_index_in_batch]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        # 对于每一个ground truth,遍历预测结果
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]:  # 如果当前预测分类 == 当前真实分类
                            #logging.info("%d" % obj_cls)
                            box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1)
                            pred_histro[int(obj_pred)] += 1
                            iou = bbox_iou(box_pred, box_gt)
                            if iou >= config["iou_thresh"]:
                                correct += 1
                                correct_histro[int(obj_pred)] += 1
                                break
        if n_gt:
            types = config["types"]
            reverse_types = {}  # 建立一个反向的types
            for key in types.keys():
                reverse_types[types[key]] = key

            logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt)))
            logging.info('mAP Histro:%s' % str([  reverse_types[i] +':'+ str(int(100 * correct_histro[i] / gt_histro[i])) for i in range(config["yolo"]["classes"] )  ]))
            logging.info('Recall His:%s' % str([  reverse_types[i] +':'+ str(int(100 * correct_histro[i] / pred_histro[i])) for i in range(config["yolo"]["classes"]) ]))

    logging.info('Mean Average Precision: %.5f' % float(correct / n_gt))
Esempio n. 12
0
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        logging.info("Load checkpoint: {}".format(config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # DataLoader.

    dataloader = torch.utils.data.DataLoader(COCODataset(
        config["val_path"], (config["img_w"], config["img_h"]),
        is_training=False),
                                             batch_size=config["batch_size"],
                                             shuffle=False,
                                             num_workers=8,
                                             pin_memory=False)

    # Coco Prepare.
    index2category = json.load(open("coco_index2category.json"))

    # Start the eval loop
    logging.info("Start eval.")
    coco_results = []
    coco_img_ids = set([])
    APs = []

    for step, samples in enumerate(dataloader):
        images, labels = samples["image"], samples["label"]
        image_paths, origin_sizes = samples["image_path"], samples[
            "origin_size"]
        with torch.no_grad():
            outputs = net(images)
            output_list = []

            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   config["yolo"]["classes"],
                                                   conf_thres=0.0001,
                                                   nms_thres=0.45)

        for idx, detections in enumerate(batch_detections):

            correct = []
            annotations = labels[idx, labels[idx, :, 3] != 0]

            image_id = int(os.path.basename(image_paths[idx])[-16:-4])
            coco_img_ids.add(image_id)
            if detections is None:
                if annotations.size(0) != 0:
                    APs.append(0)
                continue
            detections = detections[np.argsort(-detections[:, 4])]

            origin_size = eval(origin_sizes[idx])
            detections = detections.cpu().numpy()
            # ===========================================================================================================================
            # The amount of padding that was added
            pad_x = max(origin_size[1] - origin_size[0],
                        0) * (config["img_w"] / max(origin_size))
            pad_y = max(origin_size[0] - origin_size[1],
                        0) * (config["img_w"] / max(origin_size))
            # Image height and width after padding is removed
            unpad_h = config["img_w"] - pad_y
            unpad_w = config["img_w"] - pad_x
            # ===========================================================================================================================

            if annotations.size(0) == 0:
                correct.extend([0 for _ in range(len(detections))])
            else:
                target_boxes = torch.FloatTensor(annotations[:, 1:].shape)
                target_boxes[:,
                             0] = (annotations[:, 1] - annotations[:, 3] / 2)
                target_boxes[:,
                             1] = (annotations[:, 2] - annotations[:, 4] / 2)
                target_boxes[:,
                             2] = (annotations[:, 1] + annotations[:, 3] / 2)
                target_boxes[:,
                             3] = (annotations[:, 2] + annotations[:, 4] / 2)
                target_boxes *= config["img_w"]

                detected = []

                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                    pred_bbox = (x1, y1, x2, y2)

                    #x1 = x1 / config["img_w"] * origin_size[0]
                    #x2 = x2 / config["img_w"] * origin_size[0]
                    #y1 = y1 / config["img_h"] * origin_size[1]
                    #y2 = y2 / config["img_h"] * origin_size[1]
                    #w = x2 - x1
                    #h = y2 - y1

                    h = ((y2 - y1) / unpad_h) * origin_size[1]
                    w = ((x2 - x1) / unpad_w) * origin_size[0]
                    y1 = ((y1 - pad_y // 2) / unpad_h) * origin_size[1]
                    x1 = ((x1 - pad_x // 2) / unpad_w) * origin_size[0]

                    coco_results.append({
                        "image_id":
                        image_id,
                        "category_id":
                        index2category[str(int(cls_pred.item()))],
                        "bbox": (float(x1), float(y1), float(w), float(h)),
                        "score":
                        float(conf),
                    })

                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes)
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > config[
                            'iou_thres'] and cls_pred == annotations[
                                best_i, 0] and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)

            true_positives = np.array(correct)
            false_positives = 1 - true_positives

            # Compute cumulative false positives and true positives
            false_positives = np.cumsum(false_positives)
            true_positives = np.cumsum(true_positives)

            # Compute recall and precision at all ranks
            recall = true_positives / annotations.size(0) if annotations.size(
                0) else true_positives
            precision = true_positives / np.maximum(
                true_positives + false_positives,
                np.finfo(np.float64).eps)

            # Compute average precision
            AP = compute_ap(recall, precision)
            APs.append(AP)

            print("+ Sample [%d/%d] AP: %.4f (%.4f)" %
                  (len(APs), 5000, AP, np.mean(APs)))
        logging.info("Now {}/{}".format(step, len(dataloader)))
    print("Mean Average Precision: %.4f" % np.mean(APs))

    save_results_path = "coco_results.json"
    with open(save_results_path, "w") as f:
        json.dump(coco_results,
                  f,
                  sort_keys=True,
                  indent=4,
                  separators=(',', ':'))
    logging.info("Save coco format results to {}".format(save_results_path))

    #  COCO api
    logging.info("Using coco-evaluate tools to evaluate.")
    cocoGt = COCO(config["annotation_path"])
    cocoDt = cocoGt.loadRes(save_results_path)
    cocoEval = COCOeval(cocoGt, cocoDt, "bbox")
    cocoEval.params.imgIds = list(coco_img_ids)  # real imgIds
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
Esempio n. 13
0
def train(imgs, labels, checkpoint_path, config):
    config["global_step"] = config.get("start_step", 0)
    is_training = False if config.get("export_onnx") else True

    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Optimizer and learning rate
    optimizer = _get_optimizer(config, net)
    lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config["lr"]["decay_step"],
        gamma=config["lr"]["decay_gamma"])

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if checkpoint_path:
        logging.info("Load pretrained weights from {}".format(checkpoint_path))
        state_dict = torch.load(checkpoint_path)
        net.load_state_dict(state_dict)

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(SatDataset(
        imgs, labels, (config["img_w"], config["img_h"]), is_training=True),
                                             batch_size=config["batch_size"],
                                             shuffle=True,
                                             num_workers=1,
                                             pin_memory=True)

    # Start the training loop
    logging.info("Start training.")
    for epoch in range(config["epochs"]):
        for step, samples in enumerate(dataloader):
            images, labels = samples["image"], samples["label"]
            start_time = time.time()
            config["global_step"] += 1

            # Forward and backward
            optimizer.zero_grad()
            outputs = net(images)
            losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"]
            losses = [[]] * len(losses_name)
            for i in range(3):
                _loss_item = yolo_losses[i](outputs[i], labels)
                for j, l in enumerate(_loss_item):
                    losses[j].append(l)
            losses = [sum(l) for l in losses]
            loss = losses[0]
            loss.backward()
            optimizer.step()

            if step > 0 and step % 10 == 0:
                _loss = loss.item()
                duration = float(time.time() - start_time)
                example_per_second = config["batch_size"] / duration
                lr = optimizer.param_groups[0]['lr']
                logging.info(
                    "epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f "
                    % (epoch, step, _loss, example_per_second, lr))
                config["tensorboard_writer"].add_scalar(
                    "lr", lr, config["global_step"])
                config["tensorboard_writer"].add_scalar(
                    "example/sec", example_per_second, config["global_step"])
                for i, name in enumerate(losses_name):
                    value = _loss if i == 0 else losses[i]
                    config["tensorboard_writer"].add_scalar(
                        name, value, config["global_step"])
        lr_scheduler.step()

    # net.train(False)
    checkpoint_path = _save_checkpoint(net.state_dict(), config)
    # net.train(True)
    logging.info("Bye~")
    return checkpoint_path
Esempio n. 14
0
def test(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        logging.info("load checkpoint from {}".format(
            config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        raise Exception("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # prepare images path
    images_name = os.listdir(config["images_path"])
    images_path = [
        os.path.join(config["images_path"], name) for name in images_name
    ]
    if len(images_path) == 0:
        raise Exception("no image found in {}".format(config["images_path"]))

    # Start inference
    batch_size = config["batch_size"]
    for step in range(0, len(images_path), batch_size):
        # preprocess
        images = []
        images_origin = []
        for path in images_path[step * batch_size:(step + 1) * batch_size]:
            logging.info("processing: {}".format(path))
            image = cv2.imread(path, cv2.IMREAD_COLOR)
            if image is None:
                logging.error("read path error: {}. skip it.".format(path))
                continue
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            images_origin.append(image)  # keep for save result
            image = cv2.resize(image, (config["img_w"], config["img_h"]),
                               interpolation=cv2.INTER_LINEAR)
            image = image.astype(np.float32)
            image /= 255.0
            image = np.transpose(image, (2, 0, 1))
            image = image.astype(np.float32)
            images.append(image)
        images = np.asarray(images)
        images = torch.from_numpy(images).cuda()
        # inference
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(
                output,
                config["yolo"]["classes"],
                conf_thres=config["confidence_threshold"])

        # write result images. Draw bounding boxes and labels of detections
        classes = open(config["classes_names_path"],
                       "r").read().split("\n")[:-1]
        if not os.path.isdir("./output/"):
            os.makedirs("./output/")
        for idx, detections in enumerate(batch_detections):
            plt.figure()
            fig, ax = plt.subplots(1)
            ax.imshow(images_origin[idx])
            if detections is not None:
                unique_labels = detections[:, -1].cpu().unique()
                n_cls_preds = len(unique_labels)
                bbox_colors = random.sample(colors, n_cls_preds)
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                    color = bbox_colors[int(
                        np.where(unique_labels == int(cls_pred))[0])]
                    # Rescale coordinates to original dimensions
                    ori_h, ori_w = images_origin[idx].shape[:2]
                    pre_h, pre_w = config["img_h"], config["img_w"]
                    box_h = ((y2 - y1) / pre_h) * ori_h
                    box_w = ((x2 - x1) / pre_w) * ori_w
                    y1 = (y1 / pre_h) * ori_h
                    x1 = (x1 / pre_w) * ori_w
                    # Create a Rectangle patch
                    bbox = patches.Rectangle((x1, y1),
                                             box_w,
                                             box_h,
                                             linewidth=2,
                                             edgecolor=color,
                                             facecolor='none')
                    # Add the bbox to the plot
                    ax.add_patch(bbox)
                    # Add label
                    plt.text(x1,
                             y1,
                             s=classes[int(cls_pred)],
                             color='white',
                             verticalalignment='top',
                             bbox={
                                 'color': color,
                                 'pad': 0
                             })
            # Save generated image with detections
            plt.axis('off')
            plt.gca().xaxis.set_major_locator(NullLocator())
            plt.gca().yaxis.set_major_locator(NullLocator())
            plt.savefig('output/{}_{}.jpg'.format(step, idx),
                        bbox_inches='tight',
                        pad_inches=0.0)
            plt.close()
    logging.info("Save all results to ./output/")
Esempio n. 15
0
def evaluate(config):
    is_training = False
    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)
    else:
        logging.warning("missing pretrain_snapshot!!!")

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(config["yolo"]["anchors"][i],
                                    config["yolo"]["classes"], (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(dataset=COCODataset(config["test_path"], config["img_w"]),
                                             batch_size=config["batch_size"],
                                             shuffle=False, num_workers=8, pin_memory=False)

    # Start the eval loop
    #logging.info("Start eval.")
    n_gt = 0
    correct = 0
    #logging.debug('%s' % str(dataloader))

    gt_histro={}
    pred_histro = {}
    correct_histro = {}

    for i in range(config["yolo"]["classes"]):
        gt_histro[i] = 1
        pred_histro[i] = 1
        correct_histro[i] = 0

    # images 是一个batch里的全部图片,labels是一个batch里面的全部标签
    for step, (images, labels) in enumerate(dataloader):
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            output_list = []
            for i in range(3):
                output_list.append(yolo_losses[i](outputs[i]))

            # 把三个尺度上的预测结果在第1维度(第0维度是batch里的照片,第1维度是一张照片里面的各个预测框,第2维度是各个预测数值)上拼接起来
            output = torch.cat(output_list, dim=1)

            #logging.info('%s' % str(output.shape))

            # 进行NMS抑制
            #output = non_max_suppression(prediction=output, num_classes=config["yolo"]["classes"], conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"])
            output = class_nms(prediction=output, num_classes=config["yolo"]["classes"],conf_thres=config["conf_thresh"], nms_thres=config["nms_thresh"])
            #  calculate
            for sample_i in range(labels.size(0)):

                # 计算所有的预测数量
                sample_pred = output[sample_i]
                if sample_pred is not None:
                    #logging.debug(sample_pred.shape)
                    for i in range(sample_pred.shape[0]):
                        pred_histro[int(sample_pred[i,6])] +=  1

                # Get labels for sample where width is not zero (dummies)
                target_sample = labels[sample_i, labels[sample_i, :, 3] != 0]
                # Ground truth的 分类编号obj_cls、相对中心x、相对中心y、相对宽w、相对高h
                n_gt=0
                correct=0
                for obj_cls, tx, ty, tw, th in target_sample:
                    # Get rescaled gt coordinates
                    # 转化为输入像素尺寸的 左上角像素tx1 ty1,右下角像素tx2 ty2
                    tx1, tx2 = config["img_w"] * (tx - tw / 2), config["img_w"] * (tx + tw / 2)
                    ty1, ty2 = config["img_h"] * (ty - th / 2), config["img_h"] * (ty + th / 2)
                    # 计算ground truth数量,用于统计信息
                    n_gt += 1
                    gt_histro[int(obj_cls)] += 1
                    # 转化为 shape(1,4)的tensor,用来计算IoU
                    box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1)
                    # logging.info('%s' % str(box_gt.shape))

                    sample_pred = output[sample_i]
                    if sample_pred is not None:
                        # Iterate through predictions where the class predicted is same as gt
                        # 对于每一个ground truth,遍历预测结果
                        for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]:  # 如果当前预测分类 == 当前真实分类
                            #logging.info("%d" % obj_cls)
                            box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1)
                            #pred_histro[int(obj_pred)] += 1
                            iou = bbox_iou(box_pred, box_gt)
                            #if iou >= config["iou_thres"] and obj_conf >= config["obj_thresh"]:
                            if iou >= config["iou_thresh"]:
                                correct += 1
                                correct_histro[int(obj_pred)] += 1
                                break
                #logging.debug("----------------")
                #logging.debug(correct_histro[4])
                #logging.debug(pred_histro[4])
                #logging.debug(gt_histro[4])
    if n_gt:
        types = config["types"]

        reverse_types = {}  # 建立一个反向的types
        for key in types.keys():
            reverse_types[types[key]] = key

        #logging.info('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt)))
        logging.info('Precision:%s' % str([reverse_types[i] +':'+ str(int(100 * correct_histro[i] / pred_histro[i])) for i in range(config["yolo"]["classes"]) ]))
        logging.info('Recall   :%s' % str([reverse_types[i] +':'+ str(int(100 * correct_histro[i] / gt_histro[i])) for i in range(config["yolo"]["classes"])]))
def train(config):
    config["global_step"] = config.get("start_step", 0)
    is_training = False if config.get("export_onnx") else True

    # Load and initialize network
    net = ModelMain(config, is_training=is_training)
    net.train(is_training)

    # Optimizer and learning rate
    optimizer = _get_optimizer(config, net)
    lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config["lr"]["decay_step"],
        gamma=config["lr"]["decay_gamma"])

    # Set data parallel
    net = nn.DataParallel(net)
    net = net.cuda()

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        logging.info("Load pretrained weights from {}".format(
            config["pretrain_snapshot"]))
        state_dict = torch.load(config["pretrain_snapshot"])
        net.load_state_dict(state_dict)

    # Only export onnx
    # if config.get("export_onnx"):
    # real_model = net.module
    # real_model.eval()
    # dummy_input = torch.randn(8, 3, config["img_h"], config["img_w"]).cuda()
    # save_path = os.path.join(config["sub_working_dir"], "pytorch.onnx")
    # logging.info("Exporting onnx to {}".format(save_path))
    # torch.onnx.export(real_model, dummy_input, save_path, verbose=False)
    # logging.info("Done. Exiting now.")
    # sys.exit()

    # Evaluate interface
    # if config["evaluate_type"]:
    # logging.info("Using {} to evaluate model.".format(config["evaluate_type"]))
    # evaluate_func = importlib.import_module(config["evaluate_type"]).run_eval
    # config["online_net"] = net

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"])))

    # DataLoader
    dataloader = torch.utils.data.DataLoader(COCODataset(
        config["train_path"], (config["img_w"], config["img_h"]),
        is_training=True),
                                             batch_size=config["batch_size"],
                                             shuffle=True,
                                             num_workers=32,
                                             pin_memory=True)

    # Start the training loop
    logging.info("Start training.")
    for epoch in range(config["epochs"]):
        for step, samples in enumerate(dataloader):
            images, labels = samples["image"], samples["label"]
            start_time = time.time()
            config["global_step"] += 1

            # Forward and backward
            optimizer.zero_grad()
            outputs = net(images)
            losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"]
            losses = []
            for _ in range(len(losses_name)):
                losses.append([])
            for i in range(3):
                _loss_item = yolo_losses[i](outputs[i], labels)
                for j, l in enumerate(_loss_item):
                    losses[j].append(l)
            losses = [sum(l) for l in losses]
            loss = losses[0]
            loss.backward()
            optimizer.step()

            if step > 0 and step % 10 == 0:
                _loss = loss.item()
                duration = float(time.time() - start_time)
                example_per_second = config["batch_size"] / duration
                lr = optimizer.param_groups[0]['lr']
                logging.info(
                    "epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f "
                    % (epoch, step, _loss, example_per_second, lr))
                config["tensorboard_writer"].add_scalar(
                    "lr", lr, config["global_step"])
                config["tensorboard_writer"].add_scalar(
                    "example/sec", example_per_second, config["global_step"])
                for i, name in enumerate(losses_name):
                    value = _loss if i == 0 else losses[i]
                    config["tensorboard_writer"].add_scalar(
                        name, value, config["global_step"])

        # if step > 0 and step % 1000 == 0:
        # net.train(False)
        # _save_checkpoint(net.state_dict(), config)
        # net.train(True)

        _save_checkpoint(net.state_dict(), config)
        lr_scheduler.step()

    # net.train(False)
    _save_checkpoint(net.state_dict(), config)
    # net.train(True)
    logging.info("Bye~")
def train(config):
    device = config['device']

    net = Yolo_v3(config)

    # Restore pretrain model
    if config["pretrain_snapshot"]:
        net.LoadPretrainedModel(config['pretrain_snapshot'])
        logging.info("Loaded checkpoint from {}".format(
            config["pretrain_snapshot"]))
    else:
        logging.info('No pretrained model to use.')

    # Optimizer and learning rate
    optimizer = _get_optimizer(config, net)
    lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config["lr"]["decay_step"],
        gamma=config["lr"]["decay_gamma"])

    # Set data parallel
    if device == 'cuda':
        net = nn.DataParallel(net)
    net = net.to(device)

    params = [
        param for param in net.parameters() if len(param.size()) == 4
        and param.view(param.size(0), -1).size(1) > 20
    ]

    # YOLO loss with 3 scales
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(
            YOLOLoss(config["yolo"]["anchors"][i], config["yolo"]["classes"],
                     (config["img_w"], config["img_h"]), device))

    with open(config['class_names'], 'r') as f:
        class_names = f.read().split('\n')[:-1]
    print(class_names)
    dataset = DAC_SDC_2020_Dataset(dataset_path=config['train_path'],
                                   class_names=class_names,
                                   img_w=config['img_w'],
                                   img_h=config['img_h'],
                                   is_training=True)
    dataloader = torch.utils.data.DataLoader(
        dataset=dataset, batch_size=config['train_batch_size'], shuffle=True)
    print('here')
    # Start the training loop
    logging.info("Start training.")
    for epoch in range(config["epochs"]):
        logging.info(
            f'~~~~~~~~~~~~~~~~~~~~~~~~~~~~ epoch = {epoch} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
        )

        net.train()
        for step, samples in enumerate(dataloader):
            start_time = time.time()

            images, labels = samples["image"], samples["label"]
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)

            losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"]
            losses = []
            for _ in range(len(losses_name)):
                losses.append([])
            for i in range(3):
                _loss_item = yolo_losses[i](outputs[i], labels)
                for j, l in enumerate(_loss_item):
                    losses[j].append(l)
            losses = [sum(l) for l in losses]
            loss = losses[0]

            loss.backward()
            optimizer.step()

            if step > 0 and step % config['display_interval'] == 0:
                _loss = loss.item()
                duration = float(time.time() - start_time)
                example_per_second = config["train_batch_size"] / duration
                lr = optimizer.param_groups[0]['lr']
                logging.info(
                    "epoch [%.3d] iter = %d loss = %.6f example/sec = %.3f lr = %.5f "
                    % (epoch, step, _loss, example_per_second, lr))

        info = Evaluate(net, config)
        logging.info('net   iou: ' + str(info['mean_iou']) + '  acc: ' +
                     str(info['accuracy']))
        info['state_dict'] = net.state_dict()
        info['epoch'] = epoch
        checkpoint_path = os.path.join(config["sub_working_dir"],
                                       'epoch_' + str(epoch) + '_net.pth')
        torch.save(info, checkpoint_path)
        logging.info("Model checkpoint saved to %s" % checkpoint_path)

        lr_scheduler.step()

    logging.info("Bye~")