def __init__(self, prior_boxes, dataset_name='VOC2007', box_scale_factors=[.1, .1, .2, .2], background_index=0, lower_probability_threshold=.1, iou_threshold=.2, class_names=None): self.prior_boxes = prior_boxes self.box_scale_factors = box_scale_factors self.background_index = background_index self.iou_threshold = iou_threshold self.lower_probability_threshold = lower_probability_threshold self.class_names = class_names if self.class_names is None: self.class_names = get_class_names(dataset_name) self.num_classes = len(self.class_names) self.colors = plt.cm.hsv(np.linspace(0, 1, self.num_classes)).tolist() self.colors = np.asarray(self.colors) * 255 self.arg_to_class = dict(zip(list(range(self.num_classes)), self.class_names)) self.font = cv2.FONT_HERSHEY_SIMPLEX
image_prefix = '../datasets/VOCtest/VOCdevkit/VOC2007/JPEGImages/' weights_path = '../trained_models/SSD300_weights.hdf5' model = SSD300(weights_path=weights_path) # from utils.wtf import flip_l2_weights # model = flip_l2_weights(model) prior_boxes = create_prior_boxes() input_shape = model.input_shape[1:3] class_threshold = .1 iou_threshold = .45 average_precisions = [] # range(1, 21) for ground_truth_class_arg in range(1, 21): labels = [] scores = [] class_names = get_class_names(dataset_name) # ground_truth_class_arg = class_arg selected_classes = [class_names[0]] + [class_names[ground_truth_class_arg]] num_ground_truth_boxes = 0 class_decoder = get_arg_to_class(class_names) num_classes = len(class_names) data_manager = DataManager(dataset_name, selected_classes, data_prefix, image_prefix) ground_truth_data = data_manager.load_data() difficult_data_flags = data_manager.parser.difficult_objects image_names = sorted(list(ground_truth_data.keys())) # print('Number of images found:', len(image_names)) for image_name in image_names: ground_truth_sample = ground_truth_data[image_name] image_prefix = data_manager.image_prefix
from utils.evaluation import record_detections from utils.datasets import get_class_names from models.ssd import SSD300 # from tf_eval import evaluate_detection_results_pascal_voc import pickle dataset_name = 'VOC2007' data_prefix = '../datasets/VOCdevkit/VOC2007/Annotations/' image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/' weights_path = '../trained_models/SSD300_weights.hdf5' iou_threshold = .5 iou_nms_threshold = .45 # class_names = ['background', 'aeroplane', 'bird'] class_names = get_class_names() model = SSD300(weights_path=weights_path) data_records = record_detections(model, dataset_name, data_prefix, image_prefix, class_names, iou_threshold, iou_nms_threshold) pickle.dump(data_records, open('data_records.pkl', 'wb')) """ data_records = pickle.load(open('data_records.pkl', 'rb')) class_ids = list(range(len(class_names))) class_ids = [int(class_id) for class_id in class_ids] categories = [] for class_id, class_name in zip(class_ids, class_names): category = dict = {} category['id'] = class_id category['name'] = class_name categories.append(category)
import time import argparse import numpy as np import pickle if sys.version_info[0] == 2: import xml.etree.cElementTree as ET else: import xml.etree.ElementTree as ET def str2bool(v): return v.lower() in ("yes", "true", "t", "1") labelmap = get_class_names('VOC2007') labelmap = labelmap[1:] VOC_CLASSES = labelmap.copy() weights_path = '../trained_models/ssd_300_VOC0712.pth' parser = argparse.ArgumentParser(description='Single Shot MultiBox Detection') voc_root = '../datasets/VOCdevkit' """ parser.add_argument('--trained_model', default='weights/ssd300_mAP_77.43_v2.pth', type=str, help='Trained state_dict file path to open') """ parser.add_argument('--trained_model', default=weights_path, type=str, help='Trained state_dict file path to open') parser.add_argument('--save_folder', default='eval/', type=str,
def _main(args): start_time = timer() input_path = os.path.expanduser(args.input_path) output_path = os.path.expanduser(args.output_path) if not os.path.exists(output_path): print('Creating output path {}'.format(output_path)) os.mkdir(output_path) logging.basicConfig(filename=output_path + "/tracking.log", level=logging.DEBUG) #parse car positions and angles print("Parsing timestamps and oxts files...") if args.oxts.startswith('..'): parse_oxts(input_path + "/" + args.oxts, input_path + "/" + args.time_stamps) else: parse_oxts(args.oxts, args.time_stamps) print("Done. Data acquired.") dataset_name = 'VOC2007' NUM_CLASSES = 21 weights_filename = args.model_path model = SSD300(num_classes=NUM_CLASSES) prior_boxes = create_prior_boxes(model) model.load_weights(weights_filename) # drawing stuff # Generate colors for drawing bounding boxes. hsv_tuples = [(x / NUM_CLASSES, 1., 1.) for x in range(NUM_CLASSES)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(10101) # Fixed seed for consistent colors across runs. random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. random.seed(None) # Reset seed to default. class_names = get_class_names(dataset_name) box_scale_factors=[.1, .1, .2, .2] # maybe adjust to our image size background_index=0 lower_probability_threshold=.1 iou_threshold=.2 num_classes = len(class_names) # sould be equal to NUM_CLASSES arg_to_class = dict(zip(list(range(num_classes)), class_names)) frame_idx = 0 for image_file in os.listdir(input_path): try: image_type = imghdr.what(os.path.join(input_path, image_file)) if not image_type: print("frame dropped") continue except IsADirectoryError: print("frame dropped") continue image = Image.open(os.path.join(input_path, image_file)) image_data = np.array(image) selected_boxes = predict(model, image_data, prior_boxes, image_data.shape[0:2], num_classes, lower_probability_threshold, iou_threshold, background_index, box_scale_factors) if selected_boxes is not None: x_mins = selected_boxes[:, 0] y_mins = selected_boxes[:, 1] x_maxs = selected_boxes[:, 2] y_maxs = selected_boxes[:, 3] classes = selected_boxes[:, 4:] num_boxes = len(selected_boxes) else: num_boxes = 0 print("frame dropped, no boxes") print('Found {} boxes for {}'.format(num_boxes, image_file)) font = ImageFont.truetype( font='font/FiraMono-Medium.otf', size=11) # np.floor(3e-2 * image.size[1] + 0.5).astype('int32') thickness = (image_data.shape[0] + image_data.shape[1]) // 300 logging.info("Img: " + str(image_file)) boxes_data = [] for i in range(num_boxes): xmin = int(x_mins[i]) ymin = int(y_mins[i]) xmax = int(x_maxs[i]) ymax = int(y_maxs[i]) box_class_scores = classes[i] label_class = np.argmax(box_class_scores) score = box_class_scores[label_class] predicted_class = arg_to_class[label_class] box = [ymin, xmin, ymax, xmax] box = [max(0,v) for v in box] # sometimes it's negative. # log positions obj_coord = np.array([]) if predicted_class in ["person", "bicycle", "car", "motorbike", "bus", "train", "truck"] and score > 0.2: #object and classes to track if predicted_class in ["bus", "bruck"]: #vehicle predicted_class = "car" obj_coord = computeCoordinates(box, frame_idx) if obj_coord is not None: hist = histogram(image_data, box) #create data and store it boxes_data.append({ 'predicted_class': predicted_class, 'score': float(score), 'coord': obj_coord, 'hist': hist }) logging.info(predicted_class + " :" + str(obj_coord) + " | " + str(np.linalg.norm(obj_coord))) # end log positions if saveImages: if obj_coord is not None: label = '{} {:.2f} {} {:.2f}'.format(predicted_class, score, str(obj_coord), np.linalg.norm(obj_coord)) else: label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=colors[label_class]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[label_class]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw frame_idx += 1 global data_frames data_frames.append(boxes_data) if saveImages: image.save(os.path.join(output_path, image_file), quality=80) now = timer() start_trj_time = timer() print("Time elapsed CNN: " + str(now - start_time) + " seconds") print("Calculating trajectories...") calculate_trajectories() now = timer() print("Done. Time elapsed: " + str(now - start_trj_time) + " seconds\n\n") print("Total time elapsed: " + str(now - start_time) + " seconds")
if target_size is not None: image_array = imresize(image_array, target_size) return image_array, (height, width) # parameters trained_weights_path = '../trained_models/ssd_300_VOC0712.pth' input_size = 300 num_classes = 21 iou_threshold = .5 lower_probability_threshold = .01 background_index = 0 dataset_name = 'VOC2007' data_prefix = '../datasets/VOCdevkit/VOC2007/Annotations/' image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/' selected_classes = get_class_names(dataset_name) prior_boxes = create_prior_boxes() # loading pytorch model pytorch_ssd = build_ssd('train', input_size, num_classes) pytorch_ssd.load_weights(trained_weights_path) # loading keras model weights_path = '../trained_models/SSD300_weights.hdf5' with tf.device('/cpu:0'): model = SSD300(weights_path=weights_path) image_path = '../images/boys.jpg' data_manager = DataManager(dataset_name, selected_classes, data_prefix, image_prefix)
# import torch.utils.data as data from data import VOCDetection, BaseTransform from ssd import build_ssd import sys import os import time import argparse import numpy as np import pickle # import cv2 trained_model_path = '../trained_models/SSD300_weights.hdf5' voc_root = '../datasets/VOCdevkit/' labelmap = get_class_names() VOC_CLASSES = labelmap[1:] if sys.version_info[0] == 2: import xml.etree.cElementTree as ET else: import xml.etree.ElementTree as ET def str2bool(v): return v.lower() in ("yes", "true", "t", "1") class AnnotationTransform(object): def __init__(self, class_to_ind=None, keep_difficult=False): self.class_to_ind = class_to_ind or dict(