import torch import torch.nn.functional as F import torchvision.transforms as transforms import os import sys # os.chdir('/home/hector/project/proj-pytorch/pytorch-dt/') # sys.path.insert(0, '/home/hector/project/proj-pytorch/pytorch-dt/') from PIL import Image, ImageDraw from torch.autograd import Variable from models.ssd import SSD300, SSDBoxCoder print('Loading model..') net = SSD300(num_classes=2) checkpoint = torch.load('./example/ssd+/checkpoint/ckpt.pth') net.load_state_dict(checkpoint['net']) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # cudnn.benchmark = True net.to(device) net.eval() print('Loading image..') img_org = Image.open('~/project/data/caltech/JPEGImages/set04_V002_I00059_usatrain.jpg') ow = oh = 300 img = img_org.resize((ow, oh)) print('Predicting..')
from utils.boxes import create_prior_boxes from utils.boxes import calculate_intersection_over_union from utils.boxes import denormalize_box from models.ssd import SSD300 from metrics import compute_average_precision # from metrics import compute_average_precision_2 from metrics import compute_precision_and_recall # from utils.visualizer import draw_image_boxes from utils.datasets import get_arg_to_class import cv2 dataset_name = 'VOC2007' image_prefix = '../datasets/VOCdevkit/VOC2007/JPEGImages/' weights_path = '../trained_models/SSD300_weights.hdf5' model = SSD300(weights_path=weights_path) prior_boxes = create_prior_boxes() input_shape = model.input_shape[1:3] class_threshold = .1 iou_nms_threshold = .45 iou_threshold = .5 num_classes = 21 average_precisions = [] for ground_truth_class_arg in range(1, num_classes): labels = [] scores = [] class_names = get_class_names(dataset_name) selected_classes = [class_names[0]] + [class_names[ground_truth_class_arg]] num_ground_truth_boxes = 0 class_decoder = get_arg_to_class(class_names)
def _main(args): start_time = timer() input_path = os.path.expanduser(args.input_path) output_path = os.path.expanduser(args.output_path) if not os.path.exists(output_path): print('Creating output path {}'.format(output_path)) os.mkdir(output_path) logging.basicConfig(filename=output_path + "/tracking.log", level=logging.DEBUG) #parse car positions and angles print("Parsing timestamps and oxts files...") if args.oxts.startswith('..'): parse_oxts(input_path + "/" + args.oxts, input_path + "/" + args.time_stamps) else: parse_oxts(args.oxts, args.time_stamps) print("Done. Data acquired.") dataset_name = 'VOC2007' NUM_CLASSES = 21 weights_filename = args.model_path model = SSD300(num_classes=NUM_CLASSES) prior_boxes = create_prior_boxes(model) model.load_weights(weights_filename) # drawing stuff # Generate colors for drawing bounding boxes. hsv_tuples = [(x / NUM_CLASSES, 1., 1.) for x in range(NUM_CLASSES)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) random.seed(10101) # Fixed seed for consistent colors across runs. random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. random.seed(None) # Reset seed to default. class_names = get_class_names(dataset_name) box_scale_factors=[.1, .1, .2, .2] # maybe adjust to our image size background_index=0 lower_probability_threshold=.1 iou_threshold=.2 num_classes = len(class_names) # sould be equal to NUM_CLASSES arg_to_class = dict(zip(list(range(num_classes)), class_names)) frame_idx = 0 for image_file in os.listdir(input_path): try: image_type = imghdr.what(os.path.join(input_path, image_file)) if not image_type: print("frame dropped") continue except IsADirectoryError: print("frame dropped") continue image = Image.open(os.path.join(input_path, image_file)) image_data = np.array(image) selected_boxes = predict(model, image_data, prior_boxes, image_data.shape[0:2], num_classes, lower_probability_threshold, iou_threshold, background_index, box_scale_factors) if selected_boxes is not None: x_mins = selected_boxes[:, 0] y_mins = selected_boxes[:, 1] x_maxs = selected_boxes[:, 2] y_maxs = selected_boxes[:, 3] classes = selected_boxes[:, 4:] num_boxes = len(selected_boxes) else: num_boxes = 0 print("frame dropped, no boxes") print('Found {} boxes for {}'.format(num_boxes, image_file)) font = ImageFont.truetype( font='font/FiraMono-Medium.otf', size=11) # np.floor(3e-2 * image.size[1] + 0.5).astype('int32') thickness = (image_data.shape[0] + image_data.shape[1]) // 300 logging.info("Img: " + str(image_file)) boxes_data = [] for i in range(num_boxes): xmin = int(x_mins[i]) ymin = int(y_mins[i]) xmax = int(x_maxs[i]) ymax = int(y_maxs[i]) box_class_scores = classes[i] label_class = np.argmax(box_class_scores) score = box_class_scores[label_class] predicted_class = arg_to_class[label_class] box = [ymin, xmin, ymax, xmax] box = [max(0,v) for v in box] # sometimes it's negative. # log positions obj_coord = np.array([]) if predicted_class in ["person", "bicycle", "car", "motorbike", "bus", "train", "truck"] and score > 0.2: #object and classes to track if predicted_class in ["bus", "bruck"]: #vehicle predicted_class = "car" obj_coord = computeCoordinates(box, frame_idx) if obj_coord is not None: hist = histogram(image_data, box) #create data and store it boxes_data.append({ 'predicted_class': predicted_class, 'score': float(score), 'coord': obj_coord, 'hist': hist }) logging.info(predicted_class + " :" + str(obj_coord) + " | " + str(np.linalg.norm(obj_coord))) # end log positions if saveImages: if obj_coord is not None: label = '{} {:.2f} {} {:.2f}'.format(predicted_class, score, str(obj_coord), np.linalg.norm(obj_coord)) else: label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=colors[label_class]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[label_class]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw frame_idx += 1 global data_frames data_frames.append(boxes_data) if saveImages: image.save(os.path.join(output_path, image_file), quality=80) now = timer() start_trj_time = timer() print("Time elapsed CNN: " + str(now - start_time) + " seconds") print("Calculating trajectories...") calculate_trajectories() now = timer() print("Done. Time elapsed: " + str(now - start_trj_time) + " seconds\n\n") print("Total time elapsed: " + str(now - start_time) + " seconds")
image_array = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) selected_boxes = predict(model, image_array, prior_boxes, frame.shape[0:2], self.num_classes, self.lower_probability_threshold, self.iou_threshold, self.background_index, self.box_scale_factors) if selected_boxes is None: continue draw_video_boxes(selected_boxes, frame, self.arg_to_class, self.colors, self.font) cv2.imshow('webcam', frame) if cv2.waitKey(1)&0xFF == ord('q'): break camera.release() cv2.destroyAllWindows() if __name__ == "__main__": from models.ssd import SSD300 from utils.boxes import create_prior_boxes num_classes = 21 dataset_name = 'VOC2007' weights_filename = '../trained_models/weights_SSD300.hdf5' model = SSD300(num_classes=num_classes) prior_boxes = create_prior_boxes(model) model.load_weights(weights_filename) video = VideoTest(prior_boxes, dataset_name) video.start_video(model)
import pickle from models.ssd import SSD300 tf_weights = pickle.load(open('tf_w.pkl', 'rb')) model = SSD300() for layer_arg, layer in enumerate(model.layers): print(layer_arg) layer_weights = tf_weights[layer_arg] layer.set_weights(layer_weights)
def make_one_net_model(self, cf, in_shape, loss, metrics, optimizer): # Create the *Keras* model if cf.model_name == 'fcn8': model = build_fcn8(in_shape, cf.dataset.n_classes, cf.weight_decay, freeze_layers_from=cf.freeze_layers_from, load_pretrained=cf.load_imageNet) elif cf.model_name == 'unet': model = build_unet(in_shape, cf.dataset.n_classes, cf.weight_decay, freeze_layers_from=cf.freeze_layers_from, path_weights=None) elif cf.model_name == 'segnet_basic': model = build_segnet(in_shape, cf.dataset.n_classes, cf.weight_decay, freeze_layers_from=cf.freeze_layers_from, path_weights=None, basic=True) elif cf.model_name == 'segnet_vgg': model = build_segnet(in_shape, cf.dataset.n_classes, cf.weight_decay, freeze_layers_from=cf.freeze_layers_from, path_weights=None, basic=False) elif cf.model_name == 'resnetFCN': model = build_resnetFCN(in_shape, cf.dataset.n_classes, cf.weight_decay, freeze_layers_from=cf.freeze_layers_from, path_weights=None) elif cf.model_name == 'densenetFCN': model = build_densenetFCN(in_shape, cf.dataset.n_classes, cf.weight_decay, freeze_layers_from=cf.freeze_layers_from, path_weights=None) elif cf.model_name == 'lenet': model = build_lenet(in_shape, cf.dataset.n_classes, cf.weight_decay) elif cf.model_name == 'alexNet': model = build_alexNet(in_shape, cf.dataset.n_classes, cf.weight_decay) elif cf.model_name == 'vgg16': model = build_vgg(in_shape, cf.dataset.n_classes, 16, cf.weight_decay, load_pretrained=cf.load_imageNet, freeze_layers_from=cf.freeze_layers_from) elif cf.model_name == 'vgg19': model = build_vgg(in_shape, cf.dataset.n_classes, 19, cf.weight_decay, load_pretrained=cf.load_imageNet, freeze_layers_from=cf.freeze_layers_from) elif cf.model_name == 'resnet50': model = build_resnet50(in_shape, cf.dataset.n_classes, cf.weight_decay, load_pretrained=cf.load_imageNet, freeze_layers_from=cf.freeze_layers_from) elif cf.model_name == 'InceptionV3': model = build_inceptionV3(in_shape, cf.dataset.n_classes, cf.weight_decay, load_pretrained=cf.load_imageNet, freeze_layers_from=cf.freeze_layers_from) elif cf.model_name == 'yolo': model = build_yolo(in_shape, cf.dataset.n_classes, cf.dataset.n_priors, load_pretrained=cf.load_imageNet, freeze_layers_from=cf.freeze_layers_from, tiny=False) elif cf.model_name == 'tiny-yolo': model = build_yolo(in_shape, cf.dataset.n_classes, cf.dataset.n_priors, load_pretrained=cf.load_imageNet, freeze_layers_from=cf.freeze_layers_from, tiny=True) elif cf.model_name == 'own': model = build_own(in_shape, cf.dataset.n_classes) elif cf.model_name == 'ssd512': model = SSD300(in_shape, cf.dataset.n_classes) elif cf.model_name == 'segnet': model = build_segnet(in_shape, cf.dataset.n_classes) elif cf.model_name == 'VGGSegnet': model = VGGSegnet(in_shape, cf.dataset.n_classes) else: raise ValueError('Unknown model') # Load pretrained weights if cf.load_pretrained: print(' loading model weights from: ' + cf.weights_file + '...') model.load_weights(cf.weights_file, by_name=True) # Compile model if cf.model_name == 'ssd512': model.compile(loss=loss.compute_loss, optimizer=optimizer) else: model.compile(loss=loss, metrics=metrics, optimizer=optimizer) # Show model structure if cf.show_model: model.summary() plot_model(model, to_file=os.path.join(cf.savepath, 'model.png')) # Output the model print(' Model: ' + cf.model_name) # model is a keras model, Model is a class wrapper so that we can have # other models (like GANs) made of a pair of keras models, with their # own ways to train, test and predict return One_Net_Model(model, cf, optimizer)
origin_stack.pop(0) origin_stack.append(curl[8:120, 30:142, :]) clip = np.array(origin_stack) clip = np.expand_dims(clip, axis=0) clip = preprocessing(clip) c3d_result = c3d.predict(clip) if max(c3d_result[0]) >= conf_thresh: label = np.argmax(c3d_result[0]) action_name = action_class[label] cv2.putText( orig_image, action_name + '%.2f' % max(c3d_result[0]), (xmin + 10, ymin + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) cv2.imshow("SSD result", orig_image) if cv2.waitKey(5) & 0xFF == ord('q'): break if __name__ == '__main__': action_class = ['standing', 'sitting', 'walking'] clip_length = 16 input_shape = (300, 300, 3) ssd_model = SSD(input_shape, num_classes=21) ssd_model.load_weights('weights_SSD300.hdf5') c3d_input_shape = (112, 112, 16, 3) c3d = c3d_model(c3d_input_shape, nb_classes=3) c3d.load_weights('') run_camera(input_shape, ssd_model, action_class, clip_length, c3d)