コード例 #1
0
def preprocess_image_train(image,
                           annotations,
                           netblock,
                           target_dim,
                           augment=False,
                           letterbox=False,
                           force_cpu=False):
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Converts a cv2 image into Darknet input format with dimensions target_dim x target_dim
    - Annotations are assumed to be normalized (0-1) relative to the input image
    - Will map annotations to the transformed input and filter out invalid annotations
    - force_cpu will force the return type to be on the cpu, otherwise uses the default device
    ----------
    """

    if (force_cpu):
        device = cpu_device()
    else:
        device = get_device()

    if ((augment) and (letterbox)):
        print(
            "preprocess_image_train: WARNING: Cannot letterbox and augment at the same time. Not letterboxing..."
        )
        letterbox = False

    # Copy of annotations since augmentations change them in_place
    annotations = annotations.clone()

    if (augment):
        input_image = augment_image(image,
                                    netblock,
                                    target_dim,
                                    annotations=annotations)
    elif (letterbox):
        input_image = letterbox_image(image,
                                      target_dim,
                                      annotations=annotations)
    else:
        input_image = image_resize(image, (target_dim, target_dim))

    # Converting to tensor
    input_tensor = image_to_tensor(input_image, device=device)

    # Filtering out bad annotations
    boxes = annotations[..., ANN_BBOX_X1:ANN_BBOX_Y2 + 1]
    is_valid = is_valid_box(boxes,
                            target_dim,
                            target_dim,
                            boxes_normalized=True)
    annotations = annotations[is_valid]

    return input_tensor, annotations
コード例 #2
0
def preprocess_images_mosaic(images,
                             annotations,
                             netblock,
                             target_dim,
                             force_cpu=False):
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Converts a list of 4 images and a list of 4 annotations tensors to a mosaic image
    - Annotations are assumed to be normalized (0-1) relative to the input image
    - Will map annotations to the mosaic and filter out invalid annotations
    - force_cpu will force the return type to be on the cpu, otherwise uses the default device
    ----------
    """

    if (force_cpu):
        device = cpu_device()
    else:
        device = get_device()

    if (len(images) != 4):
        print(
            "preprocess_image_mosaic: ERROR: Images must be a list of 4 images"
        )
        return None, None
    if (len(annotations) != 4):
        print(
            "preprocess_image_mosaic: ERROR: Annotation must be a list of 4 normalized annotation tensors"
        )
        return None, None

    # Returned annotations is a tensor
    input_image, annotations = create_mosaic(images,
                                             netblock,
                                             target_dim,
                                             images_annotations=annotations)

    # Converting to tensor
    input_tensor = image_to_tensor(input_image, device=device)

    # Filtering out bad annotations
    boxes = annotations[..., ANN_BBOX_X1:ANN_BBOX_Y2 + 1]
    is_valid = is_valid_box(boxes,
                            target_dim,
                            target_dim,
                            boxes_normalized=True)
    annotations = annotations[is_valid]

    return input_tensor, annotations
コード例 #3
0
def preprocess_image_eval(image,
                          target_dim,
                          letterbox=False,
                          show_img=False,
                          force_cpu=False):
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Converts a cv2 image into Darknet input format with dimensions target_dim x target_dim
    - show_img will show the augmented input image
    - force_cpu will force the return type to be on the cpu, otherwise uses the default device
    - Returns preprocessed input tensor and image info object for mapping detections back
    ----------
    """

    # Tracking image information to map detections back
    image_info = ImageInfo(image)

    if (force_cpu):
        device = cpu_device()
    else:
        device = get_device()

    # Letterbox to preserve aspect ratio vs. not caring and resizing
    if (letterbox):
        input_image = letterbox_image(image, target_dim, image_info=image_info)
    else:
        input_image = image_resize(image, (target_dim, target_dim),
                                   image_info=image_info)

    # Converting to tensor
    input_tensor = image_to_tensor(input_image, device=device)

    # Show image (if applicable)
    if (show_img):
        cv2.imshow("Augmented Input Image", image_info.aug_image)
        cv2.waitKey(0)

    return input_tensor, image_info
コード例 #4
0
ファイル: training.py プロジェクト: gwinndr/YOLOv4-Pytorch
def train_batch_batchloader(model,
                            minibatch_loader,
                            optim,
                            scheduler,
                            print_shape=False):
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Trains a batch using the BatchLoader class (utilities.loaders)
    - minibatch_loader should be a BatchLoader class with its batch size set to the mini-batch (batch / subdivisions)
    - Returns total_loss for the batch
    - NOTE: Does not track if this batch finishes an epoch
    ----------
    """

    net = model.net_block

    model.train()
    optim.zero_grad()

    total_loss = 0.0
    for subdiv in range(net.subdivisions):
        x, anns, img_ids = minibatch_loader.next_batch()
        try:
            x = x.to(get_device())
            anns = anns.to(get_device())

            # Sanity check
            if ((subdiv == 0) and print_shape):
                print("Input shape:", list(x.shape))

            loss = model(x, anns=anns)
            loss.backward()

            total_loss += loss.item()

            print("Subdivisions: %d / %d" % (subdiv + 1, net.subdivisions),
                  end=CARRIAGE_RETURN)

            # Just making sure memory is freed up
            del loss

        except:
            img_ids = ", ".join([str(id.item()) for id in img_ids])

            print("")
            print("----- Exception occured on image ids:", img_ids, "-----")
            traceback.print_exc()
            sys.exit(1)
    # end for

    print("")

    optim.step()
    scheduler.step()

    # Average loss with respect to the batch size (yolo loss is already averaged by the minibatch size)
    avg_loss = total_loss / net.subdivisions

    return avg_loss
コード例 #5
0
def main():
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Entry point for training a darknet model
    ----------
    """

    args = parse_train_args()

    train_imgs = os.path.realpath(args.train_imgs)
    train_anns = os.path.realpath(args.train_anns)

    val_imgs = os.path.realpath(args.val_imgs)
    val_anns = os.path.realpath(args.val_anns)

    print("Parsing config into model...")
    model = parse_config(args.cfg)
    if(model is None):
        return

    model = model.to(get_device())
    model.train()

    net = model.net_block

    # Network input dim
    if(net.width != net.height):
        print("Error: Width and height must match in [net]")
        return

    mini_batch = net.batch // net.subdivisions

    print("Loading weights...")
    load_weights(model, args.weights)

    # For testing purposes
    # model.imgs_seen = 0
    # net.max_batches = 5000

    print("Building optimizer...")
    optim, scheduler = build_optimizer(model)

    ##### Setting up results #####
    print("Setting up results folder:", args.results)
    weights_dir = os.path.join(args.results, "weights")
    info_f = os.path.join(args.results, "info.txt")

    # Sanity check so you don't lose all your hard work
    if(os.path.isdir(args.results)):
        print("")
        print("---- WARNING: Results folder already exists: %s ----" % args.results)
        print("This could really mess things up if you're not meaning to continue from a checkpoint")
        if(not args.no_ask):
            print("")
            while(True):
                user_in = input("Are you absolutely positively sure you want to continue?(y/n): ")
                if(user_in == "y"):
                    break
                elif(user_in == "n"):
                    print("Crisis averted, don't forget to leave a tip :-)")
                    return
        print("")

    os.makedirs(weights_dir, exist_ok=True)

    # Writing the info.txt with hyperparam information
    print("Writing info.txt...")
    if(os.path.isfile(info_f)):
        info_mode = "a"
    else:
        info_mode = "w"
    with open(info_f, info_mode, newline="") as info_stream:
        print("Detector trained and evaluated on MS-COCO", file=info_stream)
        print("cfg:", args.cfg, file=info_stream)
        print("weights:", args.weights, file=info_stream)
        print("batches_seen:", round(model.imgs_seen / net.batch), file=info_stream)
        print("augment:", not args.no_augment, file=info_stream)
        print("letterbox:", args.letterbox, file=info_stream)
        print("obj_thresh:", args.obj_thresh, file=info_stream)
        print("max_imgs:", args.max_imgs, file=info_stream)
        print(net.to_string(), file=info_stream)
        print(SEPARATOR, file=info_stream)

    # Copying config file to results which acts as a nice little sanity check :-)
    print("Copying config...")
    shutil.copy(args.cfg, args.results)

    # Log csv setup
    if(args.batch_csv):
        batch_log = os.path.join(args.results, "batch_log.csv")
    else:
        batch_log = None
    if(args.epoch_csv):
        epoch_log = os.path.join(args.results, "epoch_log.csv")
    else:
        epoch_log = None

    # Tensorboard setup
    if(args.tensorboard):
        print("Setting up tensorboard...")
        from torch.utils.tensorboard import SummaryWriter
        tensorboard_dir = os.path.join(args.results, "tensorboard")
        tensorboard_summary = SummaryWriter(log_dir=tensorboard_dir)
    else:
        tensorboard_dir = None
        tensorboard_summary = None

    print("")

    print("Building datasets:")
    init_dim = net.width
    augment = not args.no_augment
    letterbox = args.letterbox

    train_set = CocoDataset(train_imgs, net, init_dim, augment=augment, annotation_file=train_anns)
    val_set = CocoDataset(val_imgs, net, init_dim, letterbox=letterbox, annotation_file=val_anns)
    print("")

    print("Building BatchLoader...")
    train_loader = BatchLoader(train_set, mini_batch, shuffle=True, drop_last=True)
    print("")

    ##### Train setup #####
    # Building random resizing list
    if(net.random != 0.0):
        rand_coef = net.random if (net.random != 1.0) else NET_RAND_COEF_IF_1
        resizings = possible_image_sizings(init_dim, rand_coef, net.resize_step)

        # First resize will be the max so we test if there's enough memory
        max_dim = resizings[-1]
        train_set.resize(max_dim)

        init_dim = max_dim

        # Uncomment to test out different random coefficients
        # print(rand_coef)
        # print(resizings)
        # import sys
        # sys.exit()
    else:
        resizings = None

    # Getting position in training
    batches_trained = model.imgs_seen // net.batch
    epochs_trained = model.imgs_seen // len(train_set)

    ### Too unstable and slow ###
    # # Evaluate on epoch 0 for a baseline error
    # if(epochs_trained == 0):
    #     print("Evaluating epoch 0 as a baseline mAP and mAR...")
    #     evaluate_and_log_epoch_coco(
    #         model, epochs_trained, val_set, args.obj_thresh,
    #         epoch_log=epoch_log, tb_summary=tensorboard_summary, max_imgs=args.max_imgs
    #     )
    #     print("")

    # For tracking when we finished a epoch
    progress_epoch = 0

    # Filenames for printing
    cfg_fname = os.path.basename(args.cfg)
    weights_fname = os.path.basename(args.weights)

    print("Resizings:", resizings)
    print("----- Initial input dim: %d x %d -----" % (init_dim, init_dim))

    ##### Training Loop #####
    while(batches_trained < net.max_batches):
        cur_lr = get_optimizer_lr(optim)

        print(SEPARATOR)
        print("Batches: %d / %d  Epochs: %d" % (batches_trained, net.max_batches, epochs_trained))
        print("Model: %s  Weights: %s" % (cfg_fname, weights_fname))
        print("Learn Rate:", cur_lr)
        print("")

        # Training ze batch
        before = time.time()
        loss = train_batch_batchloader(model, train_loader, optim, scheduler, print_shape=True)
        after = time.time()

        print("")
        print("Loss: %.4f" % loss)
        print("")
        print("Time taken: %.2f seconds" % (after - before))

        # Logging
        if(batch_log is not None):
            print("Logging to csv...")
            log_detector_batch(batch_log, batches_trained, cur_lr, loss)
        if(tensorboard_summary is not None):
            print("Logging to Tensorboard...")
            log_detector_batch_tb(tensorboard_summary, batches_trained, cur_lr, loss)

        print(SEPARATOR)
        print("")

        batches_trained += 1
        model.imgs_seen += net.batch
        progress_epoch += net.batch

        # After n batches, random resize (if applicable)
        if((resizings is not None) and (batches_trained % N_BATCH_TO_RANDOM_RESIZE == 0)):
            new_dim = random.choice(resizings)
            print("----- Resizing input to %d x %d -----" % (new_dim, new_dim))
            train_set.resize(new_dim)

        # Finished an epoch, evaluate mAP and mAR
        if(progress_epoch >= len(train_set)):
            epochs_trained += 1
            progress_epoch -= len(train_set)

            print("----- Finished epoch -----")
            print("Num epochs trained:", epochs_trained)
            print("")

            # Write weights
            if((epochs_trained % args.epoch_mod == 0) and (not args.only_save_last)):
                print("Writing weights...")
                cur_weights = os.path.join(weights_dir, "weights_epoch_%d.weights" % epochs_trained)
                write_weights(model, cur_weights)
                print("")

            # Log that epoch
            print("Evaluating epoch...")
            evaluate_and_log_epoch_coco(
                model, epochs_trained, val_set, args.obj_thresh,
                epoch_log=epoch_log, tb_summary=tensorboard_summary, max_imgs=args.max_imgs
            )
            print("")

        # end if
    # end while

    print("")
    print("---- Finished Training! ----")
    print("")

    # Will log if it's a partial epoch
    final_epoch = round((progress_epoch / len(train_set)) + epochs_trained, 4)
    if(final_epoch != epochs_trained):
        print("Part way through an epoch, evaluating and logging")
        evaluate_and_log_epoch_coco(
            model, final_epoch, val_set, args.obj_thresh,
            epoch_log=epoch_log, tb_summary=tensorboard_summary, max_imgs=args.max_imgs
        )
        print("")

    print("Saving final weights...")
    final_weights = os.path.join(weights_dir, "final_weights.weights")
    write_weights(model, final_weights)
    print("")

    # Sanity check just to make sure everything is gone
    if(tensorboard_summary is not None):
        tensorboard_summary.flush()

    print("Goodbye!")
    print("")

    return
コード例 #6
0
ファイル: detect.py プロジェクト: gwinndr/YOLOv4-Pytorch
def main():
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Entry point for generating labels on a given image
    ----------
    """

    args = parse_detect_args()

    # Benchmarking information
    if (not args.benchmark):
        benchmark = NO_BENCHMARK
    elif (not args.video):
        print("Warning: Benchmarking is only available with a video input")
        benchmark = NO_BENCHMARK
    else:
        benchmark = args.benchmark_method
        if ((benchmark < MODEL_ONLY) or (benchmark > MODEL_WITH_IO)):
            print(
                "Unrecognized -benchmark_method. Please use 1 (MODEL_ONLY), 2 (MODEL_WITH_PP), or 3 (MODEL_WITH_IO)"
            )
            return

    # no_grad disables autograd so our model runs faster
    with torch.no_grad():
        if (args.force_cpu):
            print(
                "----- WARNING: Model is using the CPU (--force_cpu), expect model to run slower! -----"
            )
            print("")
            use_cuda(False)

        print("Parsing config into model...")
        model = parse_config(args.cfg)
        if (model is None):
            return

        model = model.to(get_device())
        model.eval()

        # Network input dim
        if (model.net_block.width != model.net_block.height):
            print("Error: Width and height must match in [net]")
            return

        network_dim = model.net_block.width

        # Letterboxing
        letterbox = args.letterbox

        print("Parsing class names...")
        class_names = parse_names(args.class_names)
        if (class_names is None):
            return

        print("Loading weights...")
        load_weights(model, args.weights)

        print("")
        print(SEPARATOR)
        print("DARKNET")
        print("GPU:", gpu_device_name())
        print("Config:", args.cfg)
        print("Weights:", args.weights)
        print("Version:", model.version)
        print("Images seen:", model.imgs_seen)
        print("")
        print("Network Dim:", network_dim)
        print("Letterbox:", letterbox)
        print(SEPARATOR)
        print("")

        # Print network
        if (args.print_network):
            model.print_network()

        obj_thresh = args.obj_thresh

        ##### IMAGE DETECTION #####
        if (not args.video):
            image = load_image(args.input)
            if (image is None):
                return

            detections = inference_on_image(model,
                                            image,
                                            network_dim,
                                            obj_thresh,
                                            letterbox=letterbox)
            output_image = draw_detections(detections,
                                           image,
                                           class_names,
                                           verbose_output=True)

            cv2.imwrite(args.output, output_image)

            if (not args.no_show):
                cv2.imshow("Detections", output_image)
                cv2.waitKey(0)

        ##### VIDEO DETECTION #####
        else:
            # Warm up the model for more accurate benchmarks
            if (benchmark != NO_BENCHMARK):
                print("Warming up model for benchmarks...")
                for i in range(BENCHMARK_N_WARMUPS):
                    warmup = torch.rand(
                        (IMG_CHANNEL_COUNT, network_dim, network_dim),
                        dtype=torch.float32,
                        device=get_device())
                    model(warmup.unsqueeze(0))
                print("Done!")
                print("")

            # Load video capture object
            video_in = cv2.VideoCapture(args.input)
            if (video_in.isOpened()):
                # Getting input video hyperparameters for the output video
                vid_w = int(video_in.get(cv2.CAP_PROP_FRAME_WIDTH))
                vid_h = int(video_in.get(cv2.CAP_PROP_FRAME_HEIGHT))
                vid_dims = (vid_w, vid_h)
                fourcc = int(video_in.get(cv2.CAP_PROP_FOURCC))
                fps = int(video_in.get(cv2.CAP_PROP_FPS))

                video_out = cv2.VideoWriter(args.output,
                                            fourcc,
                                            fps,
                                            vid_dims,
                                            isColor=True)

                fps = inference_video_to_video(model,
                                               video_in,
                                               video_out,
                                               class_names,
                                               network_dim,
                                               obj_thresh,
                                               letterbox,
                                               benchmark,
                                               verbose=True)
                print("")

                video_in.release()
                video_out.release()

                if (benchmark == MODEL_ONLY):
                    print("Model fps: %.2f" % fps)
                elif (benchmark == MODEL_WITH_PP):
                    print("Model fps with pre/post-processing: %.2f" % fps)
                elif (benchmark == MODEL_WITH_IO):
                    print(
                        "Model fps with file io and pre/post-processing: %.2f"
                        % fps)

    return
コード例 #7
0
def main():
    """
    ----------
    Author: Damon Gwinn (gwinndr)
    ----------
    - Entry point for evaluating a darknet model
    ----------
    """

    args = parse_evaluate_args()

    # no_grad disables autograd so our model runs faster
    with torch.no_grad():
        if(args.force_cpu):
            print("----- WARNING: Model is using the CPU (--force_cpu), expect model to run slower! -----")
            print("")
            use_cuda(False)

        print("Parsing config into model...")
        model = parse_config(args.cfg)
        if(model is None):
            return

        model = model.to(get_device())
        model.eval()

        # Network input dim
        if(model.net_block.width != model.net_block.height):
            print("Error: Width and height must match in [net]")
            return

        network_dim = model.net_block.width

        # Letterboxing
        letterbox = args.letterbox

        print("Loading weights...")
        load_weights(model, args.weights)

        print("")
        print(SEPARATOR)
        print("DARKNET")
        print("GPU:", gpu_device_name())
        print("Config:", args.cfg)
        print("Weights:", args.weights)
        print("Version:", model.version)
        print("Images seen:", model.imgs_seen)
        print("")
        print("Network Dim:", network_dim)
        print("Letterbox:", letterbox)
        print(SEPARATOR)
        print("")

        # Print network
        if(args.print_network):
            model.print_network()

        image_dir = args.images
        ann_file = args.anns
        max_imgs = args.max_imgs
        obj_thresh = args.obj_thresh

        val_dataset = CocoDataset(image_dir, model.net_block, network_dim, letterbox=letterbox, annotation_file=ann_file)
        print("")

        coco_evaluate_bbox(val_dataset, model, obj_thresh, max_imgs=max_imgs)

    return