def _main_(args): config_path = args.conf weights_path = args.weights image_path = args.input with open(config_path) as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend = config['model']['backend'], input_size = config['model']['input_size'], labels = config['model']['labels'], max_box_per_image = config['model']['max_box_per_image'], anchors = config['model']['anchors']) ############################### # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### if image_path[-4:] == '.mp4': video_out = image_path[:-4] + '_detected' + image_path[-4:] video_reader = cv2.VideoCapture(image_path) nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MPEG'), 50.0, (frame_w, frame_h)) for i in tqdm(range(nb_frames)): _, image = video_reader.read() boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) video_writer.write(np.uint8(image)) video_reader.release() video_writer.release() else: image = cv2.imread(image_path) boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) print(len(boxes), 'boxes are found') cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
def _main_(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Parse the annotations ############################### # parse annotations of the training set train_imgs, train_labels = parse_annotation(config['train']['train_annot_folder'], config['train']['train_image_folder'], config['model']['labels']) # parse annotations of the validation set, if any, otherwise split the training set if os.path.exists(config['valid']['valid_annot_folder']): valid_imgs, valid_labels = parse_annotation(config['valid']['valid_annot_folder'], config['valid']['valid_image_folder'], config['model']['labels']) else: train_valid_split = int(0.8*len(train_imgs)) np.random.shuffle(train_imgs) valid_imgs = train_imgs[train_valid_split:] train_imgs = train_imgs[:train_valid_split] if len(config['model']['labels']) > 0: overlap_labels = set(config['model']['labels']).intersection(set(train_labels.keys())) print 'Seen labels:\t', train_labels print 'Given labels:\t', config['model']['labels'] print 'Overlap labels:\t', overlap_labels if len(overlap_labels) < len(config['model']['labels']): print 'Some labels have no annotations! Please revise the list of labels in the config.json file!' return else: print 'No labels are provided. Train on all seen labels.' config['model']['labels'] = train_labels.keys() ############################### # Construct the model ############################### yolo = YOLO(architecture = config['model']['architecture'], input_size = config['model']['input_size'], labels = config['model']['labels'], max_box_per_image = config['model']['max_box_per_image'], anchors = config['model']['anchors']) ############################### # Load the pretrained weights (if any) ############################### if os.path.exists(config['train']['pretrained_weights']): print "Loading pre-trained weights in", config['train']['pretrained_weights'] yolo.load_weights(config['train']['pretrained_weights']) ############################### # Start the training process ############################### yolo.train(train_imgs = train_imgs, valid_imgs = valid_imgs, train_times = config['train']['train_times'], valid_times = config['valid']['valid_times'], nb_epoch = config['train']['nb_epoch'], learning_rate = config['train']['learning_rate'], batch_size = config['train']['batch_size'], warmup_epochs = config['train']['warmup_epochs'], object_scale = config['train']['object_scale'], no_object_scale = config['train']['no_object_scale'], coord_scale = config['train']['coord_scale'], class_scale = config['train']['class_scale'], saved_weights_name = config['train']['saved_weights_name'], debug = config['train']['debug'])
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" weights_path = "trained_wts.h5" image_path = "1.jpg" yolo = YOLO(backend="Full Yolo", input_size=416, labels=["Potholes"], max_box_per_image=15, anchors=[ 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 ]) yolo.load_weights(weights_path) @app.route('/', methods=['GET', 'POST']) def predict(): import keras.backend.tensorflow_backend as tb tb._SYMBOLIC_SCOPE.value = True url = request.form.get('url') urllib.request.urlretrieve(url, '1.jpg') image = cv2.imread("1.jpg") boxes = yolo.predict(image) image = draw_boxes(image, boxes, "Pothole") # print(len(boxes), 'boxes are found')
def _main_(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Parse the annotations ############################### # parse annotations of the training set train_imgs, train_labels = parse_annotation(config['train']['train_annot_folder'], config['train']['train_image_folder'], config['model']['labels']) # parse annotations of the validation set, if any, otherwise split the training set if os.path.exists(config['valid']['valid_annot_folder']): valid_imgs, valid_labels = parse_annotation(config['valid']['valid_annot_folder'], config['valid']['valid_image_folder'], config['model']['labels']) else: train_valid_split = int(0.8*len(train_imgs)) np.random.shuffle(train_imgs) valid_imgs = train_imgs[train_valid_split:] train_imgs = train_imgs[:train_valid_split] if len(config['model']['labels']) > 0: overlap_labels = set(config['model']['labels']).intersection(set(train_labels.keys())) print 'Seen labels:\t', train_labels print 'Given labels:\t', config['model']['labels'] print 'Overlap labels:\t', overlap_labels if len(overlap_labels) < len(config['model']['labels']): print 'Some labels have no annotations! Please revise the list of labels in the config.json file!' return else: print 'No labels are provided. Train on all seen labels.' config['model']['labels'] = train_labels.keys() ############################### # Construct the model ############################### yolo = YOLO(architecture = config['model']['architecture'], input_size = config['model']['input_size'], labels = config['model']['labels'], max_box_per_image = config['model']['max_box_per_image'], anchors = config['model']['anchors']) ############################### # Load the pretrained weights (if any) ############################### if os.path.exists(config['train']['pretrained_weights']): print "Loading pre-trained weights in", config['train']['pretrained_weights'] yolo.load_weights(config['train']['pretrained_weights']) for layer in galaxyModel.layers: print(layer) layer.trainable = True ############################### # Start the training process ############################### if args.training: yolo.train(train_imgs = train_imgs, valid_imgs = valid_imgs, train_times = config['train']['train_times'], valid_times = config['valid']['valid_times'], nb_epoch = config['train']['nb_epoch'], learning_rate = config['train']['learning_rate'], batch_size = config['train']['batch_size'], warmup_epochs = config['train']['warmup_epochs'], object_scale = config['train']['object_scale'], no_object_scale = config['train']['no_object_scale'], coord_scale = config['train']['coord_scale'], class_scale = config['train']['class_scale'], saved_weights_name = config['train']['saved_weights_name'], debug = config['train']['debug']) image = cv2.imread(config['valid']['valid_image_folder'] + '/10.png') plt.figure(figsize=(10,10)) boxes = yolo.predict(image) image = draw_boxes(image, boxes, labels=config['model']['labels']) plt.imshow(image[:,:,::-1]); plt.show()
def _main_(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Parse the annotations ############################### # parse annotations of the training set train_imgs, train_labels = parse_annotation( config['train']['train_annot_folder'], config['train']['train_image_folder'], config['model']['labels']) # parse annotations of the validation set, if any, otherwise split the training set if os.path.exists(config['valid']['valid_annot_folder']): valid_imgs, valid_labels = parse_annotation( config['valid']['valid_annot_folder'], config['valid']['valid_image_folder'], config['model']['labels']) else: train_valid_split = int(0.8 * len(train_imgs)) np.random.shuffle(train_imgs) valid_imgs = train_imgs[train_valid_split:] train_imgs = train_imgs[:train_valid_split] if len(config['model']['labels']) > 0: overlap_labels = set(config['model']['labels']).intersection( set(train_labels.keys())) print('Seen labels:\t', train_labels) print('Given labels:\t', config['model']['labels']) print('Overlap labels:\t', overlap_labels) if len(overlap_labels) < len(config['model']['labels']): print( 'Some labels have no annotations! Please revise the list of labels in the config.json file!' ) return else: print('No labels are provided. Train on all seen labels.') config['model']['labels'] = train_labels.keys() ############################### # Construct the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) ############################### # Load the pretrained weights (if any) ############################### if os.path.exists(config['train']['pretrained_weights']): print("Loading pre-trained weights in", config['train']['pretrained_weights']) yolo.load_weights(config['train']['pretrained_weights']) ############################### # Start the training process ############################### yolo.train(train_imgs=train_imgs, valid_imgs=valid_imgs, train_times=config['train']['train_times'], valid_times=config['valid']['valid_times'], nb_epochs=config['train']['nb_epochs'], learning_rate=config['train']['learning_rate'], batch_size=config['train']['batch_size'], warmup_epochs=config['train']['warmup_epochs'], object_scale=config['train']['object_scale'], no_object_scale=config['train']['no_object_scale'], coord_scale=config['train']['coord_scale'], class_scale=config['train']['class_scale'], saved_weights_name=config['train']['saved_weights_name'], debug=config['train']['debug'])
def main(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) # parse annotations of the training set train_imgs, train_labels = parse_annotation( config['train']['train_annot_folder'], config['train']['train_image_folder'], config['model']['labels']) # parse annotations of the validation set, if any, otherwise split the training set if os.path.exists(config['valid']['valid_annot_folder']): valid_imgs, valid_labels = parse_annotation( config['valid']['valid_annot_folder'], config['valid']['valid_image_folder'], config['model']['labels']) else: train_valid_split = int(0.8 * len(train_imgs)) np.random.shuffle(train_imgs) valid_imgs = train_imgs[train_valid_split:] train_imgs = train_imgs[:train_valid_split] # detected labels overlap_labels = set(config['model']['labels']).intersection( set(train_labels.keys())) print('Seen labels:\t', train_labels) print('Given labels:\t', config['model']['labels']) print('Overlap labels:\t', overlap_labels) if len(overlap_labels) < len(config['model']['labels']): print( 'Some labels have no images! Please revise the list of labels in the config.json file!' ) return # construct models yolo = YOLO(architecture=config['model']['architecture'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) # load pretrained models if os.path.exists(config['train']['pretrained_weights']): print("Loading pre-trained weights in", config['train']['pretrained_weights']) yolo.load_weights(config['train']['pretrained_weights']) # start trianing yolo.train(train_imgs=train_imgs, valid_imgs=valid_imgs, train_times=config['train']['train_times'], valid_times=config['valid']['valid_times'], nb_epoch=config['train']['nb_epoch'], learning_rate=config['train']['learning_rate'], batch_size=config['train']['batch_size'], warmup_bs=config['train']['warmup_batches'], object_scale=config['train']['object_scale'], no_object_scale=config['train']['no_object_scale'], coord_scale=config['train']['coord_scale'], class_scale=config['train']['class_scale'], saved_weights_name=config['model']['architecture'] + "_" + config['train']['saved_weights_name'], debug=config['train']['debug'])
def _main_(args): config_path = args.conf weights_path = args.weights image_path = args.input with open(config_path) as config_buffer: config = json.load(config_buffer) if weights_path == '': weights_path = config['train']['saved_weights_name'] ############################### # Make the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=(config['model']['input_size_h'], config['model']['input_size_w']), labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors'], gray_mode=config['model']['gray_mode']) ############################### # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### if image_path[-4:] == '.mp4': video_out = image_path[:-4] + '_detected' + image_path[-4:] video_reader = cv2.VideoCapture(image_path) nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MPEG'), 50.0, (frame_w, frame_h)) for i in tqdm(range(nb_frames)): _, image = video_reader.read() boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) video_writer.write(np.uint8(image)) video_reader.release() video_writer.release() else: if os.path.isfile(image_path): image = cv2.imread(image_path) boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) print(len(boxes), 'boxes are found') cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image) else: detected_images_path = os.path.join(image_path, "detected") if not os.path.exists(detected_images_path): os.mkdir(detected_images_path) images = list(list_images(image_path)) for fname in tqdm(images): image = cv2.imread(fname) boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) fname = os.path.basename(fname) cv2.imwrite(os.path.join(image_path, "detected", fname), image)
class TrackerModel(object): def __init__(self, config, is_inference=False, is_inference_ensemble=False): self.image_h = config['model']['input_size'] self.image_w = config['model']['input_size'] self.batch_size = config['train']['batch_size'] self.num_samples_in_h5 = config['train']['num_samples_in_h5'] self.stride = config['model']['stride'] self.h5_sequence_length = config['model']['h5_sequence_length'] self.last_sequence_length = config['model']['last_sequence_length'] self.sequence_length = self.last_sequence_length / self.stride self.true_box_buffer = config['model']['max_box_per_image'] self.yolo_weights_path = config['model']['detector_weights_path'] self.nb_box = len(config['model']['anchors']) // 2 self.anchors = config['model']['anchors'] self.epochs = config['train']['nb_epochs'] self.warmup_epochs = config['train']['warmup_epochs'] self.object_scale = config['train']['object_scale'] self.no_object_scale = config['train']['no_object_scale'] self.coord_scale = config['train']['coord_scale'] self.class_scale = config['train']['class_scale'] self.labels = config['model']['labels'] self.nb_class = len(config['model']['labels']) self.class_wt = np.ones(self.nb_class, dtype='float32') #added by HS HSS self.full_log_dir = config['train']['full_log_dir'] self.early_stop_patience = config['train']['early_stop_patience'] self.early_stop_min_delta = config['train']['early_stop_min_delta'] self.learning_rate_decay_factor = config['train'][ 'learning_rate_decay_factor'] self.learning_rate_decay_patience = config['train'][ 'learning_rate_decay_patience'] self.learning_rate_decay_min_lr = config['train'][ 'learning_rate_decay_min_lr'] self.lstm_h5_data_path = config['train']['lstm_h5_data_path'] self.saved_weights_name = config['train']['saved_weights_name'] #################### ############################################ # Compile the model ############################################ self.detector = YOLO( backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) self.detector.load_weights(self.yolo_weights_path) self.grid_h, self.grid_w = self.detector.grid_h, self.detector.grid_w print('\nhs loaded backbone:') pp.pprint(vars(self.detector)) # self.full_model = self.create_model() self.full_model = self.create_model_cnn_rnn_extracted() if is_inference: if is_inference_ensemble: self.full_model_inference = self.create_model_cnn_rnn_extracted_inference_ensemble( ) else: self.full_model_inference = self.create_model_cnn_rnn_extracted_inference( ) self.debug = config['train']['debug'] self.initial_epoch = 0 def load_weights(self, weights_path): self.full_model.load_weights(weights_path) print("Successfully loaded weights from %s!" % weights_path) def freeze_layers(self, model): for layer in model.layers: layer.trainable = False return model def create_model_cnn_rnn_extracted(self): input_images = Input(batch_shape=(self.batch_size, self.sequence_length, self.image_h, self.image_w, 3), name='images_input') self.true_boxes = Input(batch_shape=(self.batch_size, 1, 1, 1, 1, self.true_box_buffer, 4), name='bbox_input') #load the detector feature_detector_model = self.detector.get_feature_model( is_before_activation=False) feature_detector_model = self.freeze_layers(feature_detector_model) print("\nSummary of feature detector:") feature_detector_model.summary() #run the yolo bb on each image, and stack up the results to be fed to RNN yolo_feats_seq = TimeDistributed(feature_detector_model, name='each_frame_feats')\ (input_images) recurrent_state = ConvLSTM2D(256, (1, 1), strides=(1, 1), padding='same', return_sequences=True, name='conv_lstm_1')(yolo_feats_seq) recurrent_state = ConvLSTM2D(256, (1, 1), strides=(1, 1), padding='same', return_sequences=False, name='conv_lstm_2')(recurrent_state) output_conv = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1), strides=(1, 1), padding='same', kernel_initializer='lecun_normal', name='track_conv')(recurrent_state) output_reshaped = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output_conv) output_trk = Lambda(lambda args: args[0], name='tracking')( [output_reshaped, self.true_boxes]) model = Model([input_images, self.true_boxes], output_trk, name='cnn_rnn_model') # We initialize the last layer (prediction by lstm) here layer = model.layers[-4] # track_conv layer weights = layer.get_weights() new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w) new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w) layer.set_weights([new_kernel, new_bias]) #model.load_weights(self.rnn_weights_path, by_name=True) print("\nFull MODEL:") model.summary() return model #, feature_detector_model def set_inference_weights(self): for layer in self.full_model.layers: for layer_inference in self.full_model_inference.layers: if (layer_inference.name == layer.name): layer_inference.set_weights(layer.get_weights()) break return def create_model_cnn_rnn_extracted_inference(self): K.set_learning_phase(0) input_images = Input(batch_shape=(self.batch_size, 1, self.image_h, self.image_w, 3), name='images_input') feature_detector_model = self.detector.get_feature_model( is_before_activation=False) print("\nSummary of feature detector:") feature_detector_model.summary() yolo_feats_seq = TimeDistributed(feature_detector_model, name='each_frame_feats')\ (input_images) recurrent_state = ConvLSTM2D(256, (1, 1), strides=(1, 1), padding='same', return_sequences=True, stateful=True, name='conv_lstm_1')\ (yolo_feats_seq) recurrent_state = ConvLSTM2D(256, (1, 1), strides=(1, 1), padding='same', return_sequences=False, stateful=True, name='conv_lstm_2')\ (recurrent_state) output_conv = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1), strides=(1, 1), padding='same', kernel_initializer='lecun_normal', name='track_conv')(recurrent_state) output_reshaped = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output_conv) model = Model(input_images, output_reshaped, name='cnn+rnn model') print("\nFull MODEL:") model.summary() return model def load_data_generators(self, generator_config): pickle_train = 'data/MultiObjDetTracker_TrainAnn.pickle' pickle_val = 'data/MultiObjDetTracker_ValAnn.pickle' if os.path.isfile(pickle_train): with open(pickle_train, 'rb') as fp: train_imgs = pickle.load(fp) else: train_imgs, seen_train_labels = parse_annotation( self.train_annot_folder, self.train_image_folder, labels=self.LABELS) with open(pickle_train, 'wb') as fp: pickle.dump(train_imgs, fp) if os.path.isfile(pickle_val): with open(pickle_val, 'rb') as fp: valid_imgs = pickle.load(fp) else: valid_imgs, seen_valid_labels = parse_annotation( self.valid_annot_folder, self.valid_image_folder, labels=self.LABELS) with open(pickle_val, 'wb') as fp: pickle.dump(valid_imgs, fp) train_batch = BatchSequenceGenerator(train_imgs, generator_config, norm=normalize, shuffle=True, augment=False) valid_batch = BatchSequenceGenerator(valid_imgs, generator_config, norm=normalize, augment=False) return train_batch, valid_batch def load_data_generators_seq(self, batch_size): #path to folder of H5s is in self.lstm_h5_data_path train_batch = SequenceH5Generator(self.lstm_h5_data_path, batch_size, self.num_samples_in_h5, self.last_sequence_length, self.labels, stride=self.stride, is_yolo_feats=False, is_augment=True) valid_batch = SequenceH5Generator(self.lstm_h5_data_path, batch_size, self.num_samples_in_h5, self.last_sequence_length, self.labels, stride=self.stride, is_validation=True, is_yolo_feats=False) return train_batch, valid_batch def custom_loss(self, y_true, y_pred): new_shape = self.batch_size y_pred = tf.reshape(y_pred, (new_shape, self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class)) y_true = tf.reshape(y_true, (new_shape, self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class)) self.true_boxes = tf.reshape( self.true_boxes, (new_shape, 1, 1, 1, self.true_box_buffer, 4)) mask_shape = tf.shape(y_true)[:4] cell_x = tf.to_float( tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1))) cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4)) cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1]) coord_mask = tf.zeros(mask_shape) conf_mask = tf.zeros(mask_shape) class_mask = tf.zeros(mask_shape) seen = tf.Variable(0.) total_recall = tf.Variable(0.) """ Adjust prediction """ ### adjust x and y pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid ### adjust w and h pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape( self.anchors, [1, 1, 1, self.nb_box, 2]) ### adjust confidence pred_box_conf = tf.sigmoid(y_pred[..., 4]) ### adjust class probabilities pred_box_class = y_pred[..., 5:] """ Adjust ground truth """ ### adjust x and y true_box_xy = y_true[..., 0:2] # relative position to the containing cell ### adjust w and h true_box_wh = y_true[ ..., 2:4] # number of cells accross, horizontally and vertically ### adjust confidence true_wh_half = true_box_wh / 2. true_mins = true_box_xy - true_wh_half true_maxes = true_box_xy + true_wh_half pred_wh_half = pred_box_wh / 2. pred_mins = pred_box_xy - pred_wh_half pred_maxes = pred_box_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] true_areas = true_box_wh[..., 0] * true_box_wh[..., 1] pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) true_box_conf = iou_scores * y_true[..., 4] ### adjust class probabilities true_box_class = tf.argmax(y_true[..., 5:], -1) """ Determine the masks """ ### coordinate mask: simply the position of the ground truth boxes (the predictors) coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale ### confidence mask: penelize predictors + penalize boxes with low IOU # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6 true_xy = self.true_boxes[..., 0:2] true_wh = self.true_boxes[..., 2:4] true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half pred_xy = tf.expand_dims(pred_box_xy, 4) pred_wh = tf.expand_dims(pred_box_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half intersect_mins = tf.maximum(pred_mins, true_mins) intersect_maxes = tf.minimum(pred_maxes, true_maxes) intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = tf.truediv(intersect_areas, union_areas) best_ious = tf.reduce_max(iou_scores, axis=4) conf_mask = conf_mask + tf.to_float( best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box conf_mask = conf_mask + y_true[..., 4] * self.object_scale ### class mask: simply the position of the ground truth boxes (the predictors) class_mask = y_true[..., 4] * tf.gather( self.class_wt, true_box_class) * self.class_scale """ Warm-up training """ no_boxes_mask = tf.to_float(coord_mask < self.coord_scale / 2.) seen = tf.assign_add(seen, 1.) true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1), lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, true_box_wh + tf.ones_like(true_box_wh) * \ np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \ no_boxes_mask, tf.ones_like(coord_mask)], lambda: [true_box_xy, true_box_wh, coord_mask]) """ Finalize the loss """ nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0)) nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0)) nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0)) loss_xy = tf.reduce_sum( tf.square(true_box_xy - pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2. loss_wh = tf.reduce_sum( tf.square(true_box_wh - pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2. loss_conf = tf.reduce_sum( tf.square(true_box_conf - pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2. loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=true_box_class, logits=pred_box_class) loss_class = tf.reduce_sum( loss_class * class_mask) / (nb_class_box + 1e-6) loss = tf.cond(tf.less(seen, self.warmup_batches + 1), lambda: loss_xy + loss_wh + loss_conf + loss_class + 10, lambda: loss_xy + loss_wh + loss_conf + loss_class) if self.debug: nb_true_box = tf.reduce_sum(y_true[..., 4]) nb_pred_box = tf.reduce_sum( tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3)) current_recall = nb_pred_box / (nb_true_box + 1e-6) total_recall = tf.assign_add(total_recall, current_recall) loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000) loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000) loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000) loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000) loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000) loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000) # loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000) return loss def train(self): train_batch, valid_batch = self.load_data_generators_seq( self.batch_size) #print('Length of generators:') #print(int(len(train_batch))) #print(int(len(valid_batch))) print("Length of generators: %d, %d" % (len(train_batch), len(valid_batch))) [x, b], y = train_batch[0] print("Input shapes train: ", x.shape, y.shape, b.shape) [x, b], y = valid_batch[0] print("Input shapes val: ", x.shape, y.shape, b.shape) self.warmup_batches = self.warmup_epochs *\ (len(train_batch) + len(valid_batch)) / 4 print("Using %d warmup batches" % self.warmup_batches) ############################################ # Define your callbacks ############################################ #defined by hs for best val checkpoint_multi_hs = MultiGPUCheckpoint( '{name}_{{epoch:02d}}_hsBbAndLstm_valLoss-{{val_loss:.2f}}.h5'. format(name=self.saved_weights_name), verbose=1, save_best_only=True, ) #defined by HS # HS HSS originally i used monitor='val_loss', factor=0.5, patience=20, min_lr=1e-6 reduce_lr_hs = ReduceLROnPlateau( monitor='val_loss', factor=self.learning_rate_decay_factor, patience=self.learning_rate_decay_patience, min_lr=self.learning_rate_decay_min_lr) #HS HSS With a patience of 100 you finish in 200 epochs so I changed it to 400 early_stop = EarlyStopping(monitor='val_loss', min_delta=self.early_stop_min_delta, patience=self.early_stop_patience, verbose=1) evaluate_callback_val = EvaluateCallback(valid_batch, self.evaluate) decay_lr = DecayLR(32, 40, 0.2) optimizer = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # optimizer = SGD(lr=1e-4, momentum=0.9, decay=0.0, nesterov=True) print("Compiling a model...") with tf.device("/cpu:0"): self.full_model.compile(loss=self.custom_loss, optimizer=optimizer) print("Successfuly compiled the full model!") ############################################ # Start the training process ############################################ steps_per_epoch = len(train_batch) print("Using %d/%d for train/val steps_per_epoch of %d batch_size!" %\ (steps_per_epoch, len(valid_batch), self.batch_size)) parallel_model = multi_gpu_model(self.full_model, gpus=2) parallel_model.compile(loss=self.custom_loss, optimizer=optimizer) parallel_model.fit_generator( generator=train_batch, steps_per_epoch=steps_per_epoch, epochs=self.epochs, verbose=2 if self.debug else 1, validation_data=valid_batch, validation_steps=len(valid_batch), callbacks=[ early_stop, checkpoint_multi_hs, TrainValTensorBoard_HS(self.full_log_dir, write_graph=False, write_images=True), ValOnlyProgbarLogger(verbose=1, count_mode='steps'), reduce_lr_hs ], # reduce_lr workers=4, max_queue_size=10, use_multiprocessing=True, shuffle=False, initial_epoch=self.initial_epoch) self.full_model.save(saved_weights_name + "_fullModel_final.h5") def evaluate(self, generator, iou_threshold=0.3, score_threshold=0.3, max_detections=100, save_path=None): """ Evaluate a given dataset using a given model. code originally from https://github.com/fizyr/keras-retinanet # Arguments generator : The generator that represents the dataset to evaluate. model : The model to evaluate. iou_threshold : The threshold used to consider when a detection is positive or negative. score_threshold : The score confidence threshold to use for detections. max_detections : The maximum number of detections to use per image. save_path : The path to save images with visualized detections to. # Returns A dict mapping class names to mAP scores. """ print("\nUsing %.2f IOU and %.2f Score thresholds!" %\ (iou_threshold, score_threshold)) # gather all detections and annotations all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())] all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] for i in range(generator.size()): if i % 100 == 0: print("%d/%d" % (i, generator.size())) raw_image = generator.load_image(i) raw_height, raw_width, raw_channels = raw_image.shape pred_boxes, filtered_boxes = self.predict( raw_image, obj_threshold=score_threshold, is_filter_bboxes=False) score = np.array([box.score for box in pred_boxes]) pred_labels = np.array([box.label for box in pred_boxes]) if len(pred_boxes) > 0: pred_boxes = np.array([[ box.xmin * raw_width, box.ymin * raw_height, box.xmax * raw_width, box.ymax * raw_height, box.score ] for box in pred_boxes]) else: pred_boxes = np.array([[]]) # sort the boxes and the labels according to scores score_sort = np.argsort(-score) pred_labels = pred_labels[score_sort] pred_boxes = pred_boxes[score_sort] # copy detections to all_detections for label in range(generator.num_classes()): all_detections[i][label] = pred_boxes[pred_labels == label, :] annotations = generator.load_annotation(i) # copy detections to all_annotations for label in range(generator.num_classes()): all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() # compute mAP by comparing all detections and all annotations average_precisions = {} for label in range(generator.num_classes()): false_positives = np.zeros((0, )) true_positives = np.zeros((0, )) scores = np.zeros((0, )) num_annotations = 0.0 for i in range(generator.size()): detections = all_detections[i][label] annotations = all_annotations[i][label] num_annotations += annotations.shape[0] detected_annotations = [] for d in detections: scores = np.append(scores, d[4]) if annotations.shape[0] == 0: false_positives = np.append(false_positives, 1) true_positives = np.append(true_positives, 0) continue overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations) assigned_annotation = np.argmax(overlaps, axis=1) max_overlap = overlaps[0, assigned_annotation] if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: false_positives = np.append(false_positives, 0) true_positives = np.append(true_positives, 1) detected_annotations.append(assigned_annotation) else: false_positives = np.append(false_positives, 1) true_positives = np.append(true_positives, 0) # no annotations -> AP for this class is 0 (is this correct?) if num_annotations == 0: average_precisions[label] = 0 continue # sort by score indices = np.argsort(-scores) false_positives = false_positives[indices] true_positives = true_positives[indices] # compute false positives and true positives false_positives = np.cumsum(false_positives) true_positives = np.cumsum(true_positives) # compute recall and precision recall = true_positives / num_annotations precision = true_positives / np.maximum( true_positives + false_positives, np.finfo(np.float64).eps) # compute average precision average_precision = compute_ap(recall, precision) average_precisions[label] = average_precision return average_precisions def predict(self, image): image_h, image_w, _ = image.shape image = cv2.resize(image, (self.input_size, self.input_size)) image = np.divide(image, 255., dtype=np.float32) input_image = image[:, :, ::-1] input_image = np.expand_dims(input_image, 0) dummy_array = np.zeros((1, 1, 1, 1, self.max_box_per_image, 4)) netout = self.full_model.predict([input_image, dummy_array])[0] boxes = decode_netout(netout, self.detector.anchors, self.detector.nb_class) return boxes def predict_on_image(self, image, obj_threshold=0.3, nms_threshold=0.01, is_inference_ensemble=False, is_filter_bboxes=False, shovel_type="Hydraulic"): image_h, image_w, _ = image.shape image = cv2.resize(image, (self.image_h, self.image_w)) image = np.divide(image, 255., dtype=np.float32) input_image = image[:, :, ::-1] input_image = np.expand_dims(input_image, 0) input_image = np.expand_dims(input_image, 0) netout = self.full_model_inference.predict(input_image) boxes = decode_netout(netout[0, ...], self.anchors, self.nb_class, obj_threshold=obj_threshold, nms_threshold=nms_threshold) if is_filter_bboxes: boxes = filter_teeth_teethline(boxes, obj_threshold, shovel_type) return boxes def predict_on_h5(self, h5_path, idx, path_to_save, sequence_length=30, stride=1, obj_threshold=0.3, nms_threshold=0.1, is_yolo_pred=False): f = h5py.File(h5_path, 'r') x_batches = f["x_batches"] if is_yolo_pred: yolo_outs = f["yolo_out"] b_batches = f["b_batches"] y_batches = f["y_batches"] id_in_h5 = idx % x_batches.shape[0] x_batch = x_batches[id_in_h5, ...] # read from disk start_time = time() x_batch = np.divide(x_batch, 255., dtype=np.float32) print("time: %.3f" % (time() - start_time)) x_batch = x_batch[..., ::-1] if is_yolo_pred: yolo_out = yolo_outs[id_in_h5, ...] b_batch = b_batches[id_in_h5, ...] y_batch = y_batches[id_in_h5, ...] # [reverse ][:seq_length:skip ][reverse] x_batch = x_batch[::-1, ...][:sequence_length:stride][::-1, ...] if is_yolo_pred: yolo_out = yolo_out[::-1, ...][:sequence_length:stride][::-1, ...] image = x_batch[-1, ...].copy() image_yolo = image.copy() image_first = x_batch[0, ...].copy() b_batch = b_batch[::-1, ...][:sequence_length:stride][::-1, ...] x_batch = np.expand_dims(x_batch, axis=0) if is_yolo_pred: yolo_out = np.expand_dims(yolo_out, axis=0) b_batch = np.expand_dims(b_batch, axis=0) y_batch = np.expand_dims(y_batch, axis=0) # x_batch = yolo_out start_time = time() netouts = self.full_model.predict([x_batch, b_batch]) print("Time taken ConvLSTM: %.3f" % (time() - start_time)) print("image.shape: ", image.shape) # boxes_yolo = self.detector.predict(image, obj_threshold=obj_threshold-0.2) if is_yolo_pred: netouts_yolo_last = yolo_out[:, -1, ...] netouts_yolo_first = yolo_out[:, 0, ...] labels_tensor = y_batch[0, ...].copy() for i, netout in enumerate(netouts): start_time = time() boxes = decode_netout(netout, self.anchors, self.nb_class, obj_threshold, nms_threshold) print("Decoding time: %.3f" % (time() - start_time)) if is_yolo_pred: boxes_yolo_first = decode_netout(netouts_yolo_first[i], self.anchors, self.nb_class, obj_threshold, nms_threshold) boxes_yolo_last = decode_netout(netouts_yolo_last[i], self.anchors, self.nb_class, obj_threshold, nms_threshold) labels_boxes = decode_netout(labels_tensor, self.anchors, self.nb_class, obj_threshold, nms_threshold) image_labels = image.copy() image_conv_lstm = draw_boxes(image, boxes, self.labels, obj_threshold) if is_yolo_pred: image_yolo_first = draw_boxes(image_first, boxes_yolo_first, self.labels, obj_threshold) image_yolo_last = draw_boxes(image_yolo, boxes_yolo_last, self.labels, obj_threshold) image_labels = draw_boxes(image_labels, labels_boxes, self.labels, obj_threshold) print("Bounding boxes found %d/%d" %\ (len(boxes), len(labels_boxes))) #fig, ax = plt.subplots(1, 3, figsize=(19, 10)) #fig.tight_layout() #if is_yolo_pred: # ax[0].imshow(image_yolo_first) # ax[1].imshow(image_yolo_last) #ax[2].imshow(image_conv_lstm) #ax[0].set_title("First Frame YOLO pred") #ax[1].set_title("Last Frame YOLO pred") #ax[2].set_title("Conv LSTM") # plt.show() #plt.imshow(image_conv_lstm) #plt.savefig("/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/hydraulic/LSTM/try6__2lstm-256-1b1-30frames/preds_onHardTestSet/pred_" + h5_name + "_" + str(idx) + ".jpg", #format='jpg') h5_name = h5_path.split('/')[-1] filepath = os.path.join(path_to_save, "pred_" + h5_name + str(idx) + ".jpg") temps = image_conv_lstm * 255. #np.uint8(image_conv_lstm*255) cv2.imwrite(filepath, temps) # plt.figure(figsize=(10, 10)) # plt.imshow(image_conv_lstm) # plt.savefig("./lstm_preds/pred_" + h5_name + "_" + str(idx) + ".jpg", # format='jpg') # plt.show() f.close() return
from keras.preprocessing import image with open('config.json') as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) yolo.load_weights('fire_weights.h5') img_width, img_height = 640, 480 labels = config['model']['labels'] with picamera.PiCamera() as camera: camera.resolution = (640, 480) # (320, 240) camera.framerate = 24 frame = np.empty((480, 640, 3), dtype=np.uint8) # (240, 320, 3) try: while True: camera.capture(frame, 'rgb', use_video_port=True) frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR) boxes = yolo.predict(frame) frame = draw_boxes(frame, boxes, config['model']['labels']) cv.imshow('frame', frame) cv.waitKey(2)
def trainer(config_path): with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) if 'logdir' in config['train']: logdir = config['train']['logdir'] else: Exception("В конфиге должна быть указана папка для логов!") logdir = os.path.dirname(config_path) + '/' + logdir os.makedirs(logdir, exist_ok=True) shutil.copy(config_path, logdir + 'config.json') ############################### # Parse the annotations ############################### # parse annotations of the training set train_imgs, train_labels = parse_annotation(config['train']['train_annot_folder'], config['train']['train_image_folder'], config['model']['labels']) # parse annotations of the validation set, if any, otherwise split the training set if os.path.exists(config['valid']['valid_annot_folder']): valid_imgs, valid_labels = parse_annotation(config['valid']['valid_annot_folder'], config['valid']['valid_image_folder'], config['model']['labels']) else: train_valid_split = int(0.8 * len(train_imgs)) np.random.shuffle(train_imgs) valid_imgs = train_imgs[train_valid_split:] train_imgs = train_imgs[:train_valid_split] if len(config['model']['labels']) > 0: overlap_labels = set(config['model']['labels']).intersection(set(train_labels.keys())) print('Seen labels:\t', train_labels) print('Given labels:\t', config['model']['labels']) print('Overlap labels:\t', overlap_labels) if len(overlap_labels) < len(config['model']['labels']): print('Some labels have no annotations! Please revise the list of labels in the config.json file!') return else: print('No labels are provided. Train on all seen labels.') config['model']['labels'] = train_labels.keys() ############################### # Construct the model ############################### yolo = YOLO(architecture = config['model']['architecture'], input_size = config['model']['input_size'], labels = config['model']['labels'], max_box_per_image = config['model']['max_box_per_image'], anchors = config['model']['anchors']) ############################### # Load the pretrained weights (if any) ############################### if os.path.exists(config['train']['pretrained_weights']): print("Loading pre-trained weights in {}".format(config['train']['pretrained_weights'])) yolo.load_weights(config['train']['pretrained_weights']) ############################### # Start the training process ############################### freq = config['train']['weights_saving_freq'] if 'weights_saving_freq' in config['train'] else 0 yolo.train(train_imgs = train_imgs, valid_imgs = valid_imgs, train_times = config['train']['train_times'], valid_times = config['valid']['valid_times'], nb_epoch = config['train']['nb_epoch'], learning_rate = config['train']['learning_rate'], batch_size = config['train']['batch_size'], warmup_epochs = config['train']['warmup_epochs'], object_scale = config['train']['object_scale'], no_object_scale = config['train']['no_object_scale'], coord_scale = config['train']['coord_scale'], class_scale = config['train']['class_scale'], saved_weights_name = config['train']['saved_weights_name'], saving_freq = freq, debug = config['train']['debug'], logdir = logdir)
def _main_(args): config_path = args.conf weights_path = args.weights image_path = args.input with open(config_path) as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) ############################# # # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### # video_reader = cv2.VideoCapture(image_path) # video_reader = cv2.VideoCapture('rtsp://192.168.0.35:555/PXyxOv2O_m') # video_reader = cv2.VideoCapture('rtsp://*****:*****@172.16.16.34/Streaming/Channels/1') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/left/5.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/left/0.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/nothing/3.avi') # video_reader = cv2.VideoCapture('maidan.avi') # video_reader = cv2.VideoCapture('Militari-1.avi') # video_reader = cv2.VideoCapture('weed.avi') # video_reader = cv2.VideoCapture('orig.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/military/Militari-8.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop-may-be-copies/cash_desk_0.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/0.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop-may-be-copies/warehouse_up_0.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues-hard/касса 2-3_nzvsm_2.avi') # # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues-hard/Очередь 3_20150323-174453--20150323-181951.tva.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues-hard/касса 1_20150618-110002--20150618-111330.tva.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/unsorted/VideoBK_1/ВК-2.1_20131119-110300--20131119-110500.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/shop/nothing/4.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/abandonment/rzd2/nothing/3.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop-big/k10.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/BAD_2_THE_BONE_x5_p9.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/AC-D4031 21_20140208-123300--20140208-125100.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/queues/x5shop/AC-D4031 2_3.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Lanser 3MP-16 10_20171110-193448--20171110-194108.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/кассы 8-9_20171110-192101--20171110-192601.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Проход касса 2-3_20180327-122122--20180327-122613.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Проход касса 6-7_20180327-142813--20180327-143313.avi') # video_reader = cv2.VideoCapture('/media/oem/022cfb2b-3c52-4dfe-a5fb-c5fe826db5e3/samples/очереди/Проход касса 16-17_20180327-112348--20180327-113348.tmp.avi') nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) every_nth = 50 count = 0 pbar = tqdm(total=nb_frames) while video_reader.isOpened(): _, image = video_reader.read() count += 1 pbar.update(1) if image is None: break if count % every_nth: continue boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) cv2.imshow('Predicted2', cv2.resize(image, (1280, 720))) # cv2.imshow('Predicted', image) cv2.waitKey(1) video_reader.release() pbar.close()
def _main_(args): config_path = args.conf weights_path = args.weights image_path = args.input use_camera = args.real_time keras.backend.tensorflow_backend.set_session( get_session()) #added by kenny. with open(config_path) as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) ############################### # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### if use_camera: video_reader = cv2.VideoCapture(int(image_path)) pbar = tqdm() while True: pbar.update(1) ret, frame = video_reader.read() if not ret: break boxes = yolo.predict(frame) frame = draw_boxes(frame, boxes, config['model']['labels']) cv2.imshow("frame", frame) key = cv2.waitKey(1) if key == ord("q") or key == 27: break pbar.close() elif image_path[-4:] == '.mp4': video_out = image_path[:-4] + '_detected' + image_path[-4:] video_reader = cv2.VideoCapture(image_path) nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MPEG'), 50.0, (frame_w, frame_h)) for i in tqdm(range(nb_frames)): _, image = video_reader.read() boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) video_writer.write(np.uint8(image)) video_reader.release() video_writer.release() else: image = cv2.imread(image_path) boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) print(len(boxes), 'boxes are found') path = '/home/creaton/keras-yolo2/detected object' cv2.imwrite('/home/creaton/keras-yolo2/detected object/img.jpg', image)
class predictor: def __init__(self, config_path, weights_path): with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) self.labels = config['model']['labels'] self.yolo = YOLO( architecture=config['model']['architecture'], input_size=config['model']['input_size'], labels=self.labels, max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) self.yolo.load_weights(weights_path) self.timing = [0, 0.] def _predict_one(self, image, threshold, decimals, draw_bboxes=True): t = timer() boxes = self.yolo.predict(image, threshold=threshold) image = draw_boxes(image, boxes, self.labels, decimals=decimals) t = timer() - t self.timing[0] += 1 self.timing[1] += t print('{} boxes are found for {} s'.format(len(boxes), t)) return image, boxes def predict_from_dir(self, path_to_dir, image_format, path_to_outputs=None, threshold=0.5, decimals=8, save_anno=False, draw_truth=False): if path_to_outputs and not os.path.exists(path_to_outputs): print('Creating output path {}'.format(path_to_outputs)) os.mkdir(path_to_outputs) for image_filename in os.listdir(path_to_dir): # TODO: здесь надо сделать адекватную проверку, изображение ли это if image_filename.endswith(image_format): image = cv2.imread(os.path.join(path_to_dir, image_filename), cv2.IMREAD_COLOR) image_h = image.shape[0] image_w = image.shape[1] curr_time = timer() image, boxes = self._predict_one(image, threshold=threshold, decimals=decimals) curr_time = timer() - curr_time print(curr_time) boxes = get_annoboxes(image_w=image_w, image_h=image_h, boxes=boxes, labels=self.labels) if path_to_outputs: if save_anno: # save_anno_xml( dir=path_to_outputs + 'annotations/', img_name=image_filename[:-len(image_format) - 1], img_format=image_format, img_w=image.shape[1], img_h=image.shape[0], img_d=image.shape[2], boxes=boxes, quiet=False, minConf=threshold) retval = cv2.imwrite( path_to_outputs + 'images/' + image_filename, image) if retval: print('Изображение {} успешно сохранено в папку {}'. format(image_filename, path_to_outputs)) else: print('В папке не только изображения - {}'.format( image_filename)) print('Все изображения обработаны') print( 'Число изображений {}, общее время {}, среднее время на изображение {}' .format(self.timing[0], self.timing[1], self.timing[1] / self.timing[0])) def predict_from_webcam(self, threshold=0.5, fps=False, decimals=8): vid = cv2.VideoCapture(1) if not vid.isOpened(): raise IOError( ("Couldn't open webcam. If you're trying to open a webcam, " "make sure you video_path is an integer!")) # Compute aspect ratio of video vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) vidar = vidw / vidh accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: retval, orig_image = vid.read() if not retval: print("Done!") return res_image = self._predict_one(orig_image, threshold=threshold, decimals=2) # Calculate FPS # This computes FPS for everything, not just the model's execution # which may or may not be what you want if fps: curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # Draw FPS in top left corner cv2.rectangle(res_image, (0, 0), (50, 17), (255, 255, 255), -1) cv2.putText(res_image, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) cv2.imshow("YOLOv2 result", res_image) pressedKey = cv2.waitKey(10) if pressedKey == 27: # ESC key break def predict_from_video(self, path_to_video, threshold=0.5, decimals=8, output_file='', crop=True, writeFPS=False, show=False): vid = cv2.VideoCapture(path_to_video) if not vid.isOpened(): raise IOError(( "Couldn't open webcam. Make sure you video_path is an integer!" )) # Compute aspect ratio of video vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) n = 0 if crop: n = int((vidw - vidh) * 0.5) vidw = vidh # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, 20.0, (int(vidw), int(vidh))) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: retval, orig_image = vid.read() if not retval: print("Done!") return if crop: orig_image = orig_image[:, n:int(n + vidh), :] res_image, boxes = self._predict_one(orig_image, threshold=threshold, decimals=decimals) # Calculate FPS # This computes FPS for everything, not just the model's execution # which may or may not be what you want if writeFPS: curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # Draw FPS in top left corner cv2.rectangle(res_image, (0, 0), (50, 17), (255, 255, 255), -1) cv2.putText(res_image, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) if show: cv2.imshow("YOLOv2 result", res_image) if output_file: out.write(res_image) pressedKey = cv2.waitKey(10) if pressedKey == 27: # ESC key break out.release()
def _main_(): config_path = 'config.json' weights_path = 'model.h5' image_path = 'image.mp4' with open(config_path) as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) time_now = 0 data_head = pd.DataFrame({'Time': [0], 'Head_count': [0]}) ############################### # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### if image_path[-4:] == '.mp4': video_out = image_path[:-4] + '_detected' + image_path[-4:] video_reader = cv2.VideoCapture(image_path) nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MPEG'), 50.0, (frame_w, frame_h)) for i in tqdm(range(nb_frames)): _, image = video_reader.read() time_now += 1 boxes = yolo.predict(image) data_head = data_head.append( { 'Time': str(time_now // 60) + '/' + str(time_now % 60), 'Head_count': len(boxes) }, ignore_index=True) image = draw_boxes(image, boxes, config['model']['labels']) video_writer.write(np.uint8(image)) if (time_now == 62): break video_reader.release() video_writer.release() data_head.to_csv('head_count.csv', index=False) else: image = cv2.imread(image_path) boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels']) data_head = data_head.append({ 'Time': '0/1', 'Head_count': len(boxes) }, ignore_index=True) print(len(boxes), 'boxes are found') data_head.to_csv('head_count.csv', index=False) cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
def _main_(args): config_path = args.conf weights_path = args.weights image_path = args.input with open(config_path) as config_buffer: config = json.load(config_buffer) ############################### # Make the model ############################### yolo = YOLO(backend=config['model']['backend'], input_size=config['model']['input_size'], labels=config['model']['labels'], max_box_per_image=config['model']['max_box_per_image'], anchors=config['model']['anchors']) ############################### # Load trained weights ############################### yolo.load_weights(weights_path) ############################### # Predict bounding boxes ############################### if image_path[-4:] == '.mp4': video_out = image_path[:-4] + '_detected' + image_path[-4:] video_reader = cv2.VideoCapture(image_path) nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter( video_out, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (frame_h, frame_w), True) #(frame_w, frame_h) # Virando video for i in tqdm(range(nb_frames)): _, image = video_reader.read() image = np.rot90(image, 3) image = image.copy() # Fix Bug np.rot90 boxes = yolo.predict(image) #image = draw_boxes(image, boxes, config['model']['labels'], 20, 3.5, -90) image = draw_boxes(image, boxes, config['model']['labels'], 2, 1.1, -30) video_writer.write(np.uint8(image)) video_reader.release() video_writer.release() else: image = cv2.imread(image_path) if (args.annotFile != None): boxes_ann = [] tree = ET.parse(args.annotFile) for elem in tree.iter(): if 'object' in elem.tag or 'part' in elem.tag: obj = {} for attr in list(elem): if 'name' in attr.tag: obj['name'] = attr.text boxes_ann.append(obj) if 'bndbox' in attr.tag: for dim in list(attr): if 'xmin' in dim.tag: obj['xmin'] = int(round(float(dim.text))) if 'ymin' in dim.tag: obj['ymin'] = int(round(float(dim.text))) if 'xmax' in dim.tag: obj['xmax'] = int(round(float(dim.text))) if 'ymax' in dim.tag: obj['ymax'] = int(round(float(dim.text))) for box in boxes_ann: cv2.rectangle(image, (box['xmin'], box['ymin']), (box['xmax'], box['ymax']), (255, 0, 0), 30) boxes = yolo.predict(image) image = draw_boxes(image, boxes, config['model']['labels'], 30, 4.5, 35) print(len(boxes), 'boxes are found') cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)
def detect_videos(annotations_list, video_folders_list, detected_folder): """ Detect videos by YOLO, and store the detected bounding boxes """ yolo_config_path = "../config_aerial.json" with open(yolo_config_path) as config_buffer: yolo_config = json.load(config_buffer) # ############################## # Make the model # ############################## yolo = YOLO(architecture = yolo_config['model']['architecture'], input_size = yolo_config['model']['input_size'], labels = yolo_config['model']['labels'], max_box_per_image = yolo_config['model']['max_box_per_image'], anchors = yolo_config['model']['anchors']) # ############################### # # Load trained weights # ############################### yolo_weights_path = "../yolo_coco_aerial_person.h5" print("YOLO weights path:", yolo_weights_path) yolo.load_weights(yolo_weights_path) if len(annotations_list) != len(video_folders_list): raise IOError("Mismatch # videos {} {}.".format(len(annotations_list), len(video_folders_list))) for vid, video_folder in enumerate(video_folders_list): print(basename(video_folder)) detected_label_path = os.path.join(detected_folder, basename(video_folder)) if os.path.exists(detected_label_path + '.npy'): continue if basename(annotations_list[vid]) != (basename(video_folder) + ".txt"): print("Annot: {}".format(basename(annotations_list[vid]))) print("image: {}".format(basename(video_folder))) raise IOError("Mismatch video {}.".format(basename(video_folder))) num_frames = sum(1 for line in open(annotations_list[vid], 'r')) image_path_list = sorted(glob.glob(video_folder + "/*")) sort_nicely(image_path_list) if num_frames != len(image_path_list): raise IOError("Number of frames in {} does not match annotations.".format(basename(video_folder))) with open(annotations_list[vid], 'r') as annot_file: first_box_unnormailzed = parse_label(annot_file.readline()) first_image = cv2.imread(image_path_list[0]) first_box = normalize_box(first_image.shape, first_box_unnormailzed) last_box = first_box # Write the detected labels into detected/ detected_boxes = [] detected_box = [first_box.x, first_box.y, first_box.w, first_box.h] detected_boxes.append(detected_box) # Write the detected features into features/ for i, image_path in enumerate(image_path_list): print("============ Detecting {} video, {} frame ===============".format(basename(video_folder), basename(image_path))) image = cv2.imread(image_path) if image is None: print('Cannot find', image_path) boxes, dummy_feature = yolo.predict_for_rolo(image) chosen_box = choose_best_box(boxes, last_box) last_box = chosen_box if i > 0: # Write the detected result of target detected_box = [chosen_box.x, chosen_box.y, chosen_box.w, chosen_box.h] detected_boxes.append(detected_box) print("======================= Save detected label result ==========================") detected_boxes = np.array(detected_boxes) print("Video:{} {} boxes are detected".format(basename(video_folder), detected_boxes.shape[0])) np.save(detected_label_path + '.npy', detected_boxes)