def main(): args = parser.parse_args() time_stamp = "{0:%Y%m%d-%H%M%S}".format(datetime.now()) save_name = os.path.join(args.save_dir, "train_{}".format(time_stamp)) if not(os.path.isdir(args.save_dir)): os.makedirs(args.save_dir) if args.path == None: raise OSError("path to annotation file must be required.") C = config.Config() C.config_filename = save_name + "_config.pickle" C.model_path = save_name + "_model.hdf5" C.use_horizontal_flips = bool(args.horizontal_flips) C.use_vertical_flips = bool(args.vertical_flips) C.rot_90 = bool(args.rot_90) all_imgs, classes_count, class_mapping = get_data(args.path) C.class_mapping = class_mapping with open(C.config_filename, 'wb') as config_f: pickle.dump(C,config_f) print("-------------------------------") print('path to config file : {}'.format(C.config_filename)) print("-------------------------------") train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, K.image_dim_ordering(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, K.image_dim_ordering(), mode='val') model_rpn, model_classifier, model_all = faster_rcnn.get_model(C, classes_count) losses = np.zeros((args.n_iters, 5)) rpn_accuracy_rpn_monitor, rpn_accuracy_for_epoch = [], [] best_loss = np.Inf with open('out.csv', 'w') as f: f.write('Accuracy,RPN classifier,RPN regression,Detector classifier,Detector regression,Total') f.write('\t') iter_num = 0 t0 = start_time = time.time() try: for epoch_num in range(args.n_epochs): progbar = generic_utils.Progbar(args.n_iters) print('Epoch {}/{}'.format(epoch_num + 1, args.n_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == args.n_iters and C.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, args.n_iters)) if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2 = roi_helpers.calc_iou(R, img_data, C, class_mapping) neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if len(pos_samples) < C.num_rois//2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice(pos_samples, C.num_rois//2, replace=False).tolist() try: selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) if iter_num == args.n_iters: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if C.verbose: print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes)) print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format(loss_class_cls)) print('Loss Detector regression: {}'.format(loss_class_regr)) print('Elapsed time: {}[s]'.format(time.time() - start_time)) target_text_file = open('out.csv', 'a') target_text_file.write('{},{},{},{},{},{}'.format(class_acc, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr, loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr)) target_text_file.write('\t') curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if C.verbose: print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) best_loss = curr_loss model_all.save_weights(C.model_path) break losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) except Exception as e: print('Exception: {}'.format(e)) continue except KeyboardInterrupt: t1 = time.time() print('\nIt took {:.2f}s'.format(t1-t0)) sys.exit('Keyboard Interrupt') print("training is done") print("-------------------------------") print('path to config file : {}'.format(C.config_filename)) print("-------------------------------")
def detect_predict(pic, C, model_rpn, model_classifier, model_classifier_only, class_mapping, class_to_color, print_dets=False, export=False): """ Detect and predict object in the picture :param pic: picture numpy array :param C: config object :params model_*: models from get_models function :params class_*: mapping and colors, need to be loaded to keep the same colors/classes :return: picture with bounding boxes """ img = pic X, ratio = format_img(img, C) img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy() img_scaled[:, :, 0] += 123.68 img_scaled[:, :, 1] += 116.779 img_scaled[:, :, 2] += 103.939 img_scaled = img_scaled.astype(np.uint8) if K.image_data_format() == 'channels_last': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_data_format(), overlap_thresh=0.7) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} # print(class_mapping) for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] boxes_export = {} for key in bboxes: bbox = np.array(bboxes[key]) # Eliminating redundant object detection windows new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=overlap_thresh) # Keep only the best prediction per character jk = np.argmax(new_probs) # Threshold for best prediction if new_probs[jk] > 0.55: (x1, y1, x2, y2) = new_boxes[jk, :] # Convert predicted picture box coordinates to real-size picture coordinates (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) # Exporting box coordinates instead of draw on the picture if export: boxes_export[key] = [(real_x1, real_y1, real_x2, real_y2), int(100 * new_probs[jk])] else: cv2.rectangle( img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = '{}: {}%'.format(key, int(100 * new_probs[jk])) all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) # To avoid putting text outside the frame # replace the legende if the box is outside the image if real_y1 < 20 and real_y2 < img.shape[0]: textOrg = (real_x1, real_y2 + 5) elif real_y1 < 20 and real_y2 > img.shape[0]: textOrg = (real_x1, img.shape[0] - 10) else: textOrg = (real_x1, real_y1 + 5) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) if print_dets: print(all_dets) if export: return boxes_export else: return img
.format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) X, Y, img_data = data_gen_train.next() loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2 = roi_helpers.calc_iou(R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0)