Ejemplo n.º 1
0
def detect_single_image(checkpoint, image_path, visualize=False):
    device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')
    configs = combine_values(checkpoint['model_specs']['training_configs'], checkpoint['hp_values'])
    labels = checkpoint['labels']
    num_classes = len(labels)
    retinanet = model.resnet152(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    retinanet.eval()

    img = skimage.io.imread(image_path)

    if len(img.shape) == 2:
        img = skimage.color.gray2rgb(img)

    img = img.astype(np.float32) / 255.0
    transform = transforms.Compose([Normalizer(), Resizer(min_side=608)]) #TODO: make this dynamic
    data = transform({'img': img, 'annot': np.zeros((0, 5))})
    img = data['img']
    img = img.unsqueeze(0)
    img = img.permute(0, 3, 1, 2)
    with torch.no_grad():
        scores, classification, transformed_anchors = retinanet(img.to(device=device).float())


        idxs = np.where(scores.cpu() > 0.5)[0]
        scale = data['scale']
        detections_list = []
        for j in range(idxs.shape[0]):
            bbox = transformed_anchors[idxs[j], :]
            label_idx = int(classification[idxs[j]])
            label_name = labels[label_idx]
            score = scores[idxs[j]].item()

            # un resize for eval against gt
            bbox /= scale
            bbox.round()
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
        img_name = image_path.split('/')[-1].split('.')[0]
        filename = img_name + '.txt'
        path = os.path.dirname(image_path)
        filepathname = os.path.join(path, filename)
        with open(filepathname, 'w', encoding='utf8') as f:
            for single_det_list in detections_list:
                for i, x in enumerate(single_det_list):
                    f.write(str(x))
                    f.write(' ')
                f.write('\n')

        if visualize:
            unnormalize = UnNormalizer()


    return filepathname
Ejemplo n.º 2
0
def detect(checkpoint,
           pred_on_path,
           output_path,
           threshold=0.5,
           visualize=False,
           red_label='sick'):
    device = torch.device(
        type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')

    if os.path.exists(output_path):
        shutil.rmtree(output_path)
        os.makedirs(output_path)
    logger.info('inside ' + str(pred_on_path) + ': ' +
                str(os.listdir(pred_on_path)))
    dataset_val = PredDataset(pred_on_path=pred_on_path,
                              transform=transforms.Compose([
                                  Normalizer(),
                                  Resizer(min_side=608)
                              ]))  #TODO make resize an input param
    logger.info('dataset prepared')
    dataloader_val = DataLoader(dataset_val,
                                num_workers=0,
                                collate_fn=collater,
                                batch_sampler=None)
    logger.info('data loader initialized')
    labels = checkpoint['labels']
    logger.info('labels are: ' + str(labels))
    num_classes = len(labels)
    configs = deepcopy(checkpoint['model_specs']['training_configs'])
    configs.update(checkpoint['hp_values'])
    logger.info('initializing object_detection model')
    retinanet = ret50(
        num_classes=num_classes,
        scales=configs['anchor_scales'],
        ratios=configs['anchor_ratios'])  #TODO: make depth an input parameter
    logger.info('loading weights')
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    logger.info('model to device: ' + str(device))
    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    inference_times = []
    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(
                data['img'].to(device=device).float())
            elapsed_time = time.time() - st
            print('Elapsed time: {}'.format(elapsed_time))
            inference_times.append(elapsed_time)
            idxs = np.where(scores.cpu() > threshold)[0]
            if visualize:
                img = np.array(255 *
                               unnormalize(data['img'][0, :, :, :])).copy()

                img[img < 0] = 0
                img[img > 255] = 255

                img = np.transpose(img, (1, 2, 0))
                img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                if visualize:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])

                label_idx = int(classification[idxs[j]])
                label_name = labels[label_idx]
                score = scores[idxs[j]].item()
                if visualize:
                    draw_caption(img, (x1, y1, x2, y2), label_name)
                    if red_label in label_name:
                        cv2.rectangle(img, (x1, y1), (x2, y2),
                                      color=(0, 0, 255),
                                      thickness=2)
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2),
                                      color=(0, 255, 0),
                                      thickness=2)
                    print(label_name)

                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([
                    label_name,
                    str(score),
                    str(x1),
                    str(y1),
                    str(x2),
                    str(y2)
                ])
            img_name = dataset_val.image_names[idx].split('/')[-1]
            i_name = img_name.split('.')[0]
            filename = i_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')
            if visualize:
                save_to_path = os.path.join(output_path, img_name)
                cv2.imwrite(save_to_path, img)
                cv2.waitKey(0)
    print('average inference time per image: ', np.mean(inference_times))
    return output_path
Ejemplo n.º 3
0
def detect(checkpoint, output_dir, home_path=None, visualize=False):
    device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')
    if home_path is None:
        home_path = checkpoint['model_specs']['data']['home_path']
    if os.getcwd().split('/')[-1] == 'ObjectDetNet':
        home_path = os.path.join('..', home_path)
    # must have a file to predict on called "predict_on"
    pred_on_path = os.path.join(home_path, 'predict_on')

    #create output path
    output_path = os.path.join(home_path, 'predictions', output_dir)

    try:
        os.makedirs(output_path)
    except FileExistsError:
        if output_dir != 'check0':
            raise Exception('there are already predictions for model: ' + output_dir)
        else:
            logger.info('there was already a check0 in place, erasing and predicting again from scratch')
            shutil.rmtree(output_path)
            os.makedirs(output_path)
    logger.info('inside ' + str(pred_on_path) + ': ' + str(os.listdir(pred_on_path)))
    dataset_val = PredDataset(pred_on_path=pred_on_path,
                              transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param
    logger.info('dataset prepared')
    dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None)
    logger.info('data loader initialized')
    labels = checkpoint['labels']
    logger.info('labels are: ' + str(labels))
    num_classes = len(labels)

    configs = combine_values(checkpoint['model_specs']['training_configs'], checkpoint['hp_values'])
    logger.info('initializing retinanet model')
    if checkpoint['model_specs']['training_configs']['depth'] == 50:
        retinanet = model.resnet50(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter
    elif checkpoint['model_specs']['training_configs']['depth'] == 152:
        retinanet = model.resnet152(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios'])
    logger.info('loading weights')
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    logger.info('model to device: ' + str(device))
    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].to(device=device).float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)[0]
            if visualize:
                img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

                img[img < 0] = 0
                img[img > 255] = 255

                img = np.transpose(img, (1, 2, 0))
                img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                if visualize:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])

                label_idx = int(classification[idxs[j]])
                label_name = labels[label_idx]
                score = scores[idxs[j]].item()
                if visualize:
                    draw_caption(img, (x1, y1, x2, y2), label_name)
                    cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                    print(label_name)

                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
            img_name = dataset_val.image_names[idx].split('/')[-1]
            i_name = img_name.split('.')[0]
            filename = i_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')
            if visualize:
                save_to_path = os.path.join(output_path, img_name)
                cv2.imwrite(save_to_path, img)
                cv2.waitKey(0)

    return output_path
Ejemplo n.º 4
0
def detect(home_path, checkpoint_path):

    class_names_path = os.path.join(home_path, "d.names")
    # compute number of classes
    num_classes = sum(1 for line in open(class_names_path))
    # must have a file to predict on called "predict_on"
    pred_on_path = os.path.join(home_path, 'predict_on')

    #create output path
    checkpoint_name = checkpoint_path.split('.')[0]
    output_path = os.path.join(home_path, 'predictions', checkpoint_name)
    if not os.path.exists(os.path.join(home_path, 'predictions')):
        os.mkdir(os.path.join(home_path, 'predictions'))
    if os.path.exists(output_path):
        raise Exception('there are already predictions for model: ' + checkpoint_name)
    os.mkdir(output_path)

    #copy annotations to predictions
    gt_file = glob.glob(os.path.join(pred_on_path, '*.json'))[0]
    set_name = gt_file.split('/')[-1].split('.')[0].split('_')[1]
    if os.path.exists(gt_file):
        if not os.path.exists(os.path.join(home_path, 'predictions', 'annotations')):
            os.mkdir(os.path.join(home_path, 'predictions', 'annotations'))
        copyfile(gt_file, os.path.join(home_path, 'predictions', 'annotations', gt_file.split('/')[-1]))
    # dataset_val = PredDataset(pred_on_path=pred_on_path, class_list_path=class_names_path,
    #                          transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param
    dataset_val = PredDataset(pred_on_path, set_name=set_name,
                        transform=transforms.Compose([Normalizer(), Resizer(min_side=608)]))
    # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None)

    if torch.cuda.is_available():
        checkpoint = torch.load(checkpoint_path)
    else:
        checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
    scales = checkpoint['scales']
    ratios = checkpoint['ratios']

    retinanet = model.resnet152(num_classes=num_classes, scales=scales, ratios=ratios) #TODO: make depth an input parameter
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.cuda()
    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)[0]
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))
            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_idx = int(classification[idxs[j]])
                label_name = dataset_val.labels[label_idx]
                score = scores[idxs[j]].item()

                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                print(label_name)
                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
            img_name = dataset_val.coco.dataset['images'][idx]['file_name'].split('.')[0]
            filename = img_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')

            img_save_name = dataset_val.coco.dataset['images'][idx]['file_name']
            save_to_path = os.path.join(output_path, img_save_name)
            cv2.imwrite(save_to_path, img)
            cv2.waitKey(0)