def train(im_size, train_mode, epoch_length=1000, num_epochs=10, val_steps=200, lr_mode='adaptive'): """ Train the model Args: im_size: user input. Input images are resized to this size. train_mode: 1 to train new model with pre-trained weights on generic dataset. 2 to keep training on the WashingtonOB Race dataset. epoch_length: number of steps per training epoch num_epochs: maximum number of training epochs val_steps: number of validation steps, at the end of each training epoch lr_mode: if 'adaptive', learning rate varies with eponential decay. If 'constant', it is 1e-5. Returns: None - (trained model saved automatically) """ C = config.Config() C.network = 'resnet50' C.im_size = im_size C.model_path = f'models/model_frcnn_{C.im_size}.hdf5' all_imgs, classes_count, class_mapping = get_data( 'data/training/corners_training.txt') # add background class classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) C.class_mapping = class_mapping print( f'Total number of objects per class (across all dataset): {class_mapping}' ) config_output_filename = f'models/conf/config_frcnn_{C.im_size}.pickle' record_path = f'models/logs/frcnn_{C.im_size}.csv' # Where to record data (used to save the losses, classification accuracy and mean average precision) if lr_mode == 'adaptive': record_path = f'models/logs/frcnn_{C.im_size}_adap.csv' pickle.dump(C, open(config_output_filename, 'wb')) train_imgs = [s for s in all_imgs if s['imageset'] == 'train'] val_imgs = [s for s in all_imgs if s['imageset'] == 'val'] # Shuffle the images with seed 1 random.seed(1) random.shuffle(train_imgs) random.shuffle(val_imgs) print( f'{len(train_imgs)} and {len(val_imgs)} training and validation samples (before augmentation), respectively.' ) data_gen_train = regions_proposal_network.get_anchor_gt( train_imgs, C, nn.get_img_output_length, mode='train') data_gen_val = regions_proposal_network.get_anchor_gt( val_imgs, C, nn.get_img_output_length, mode='val') X, Y, image_data = next(data_gen_train) print('Original image: height=%d width=%d' % (image_data['height'], image_data['width'])) print('Resized image: height=%d width=%d' % (X.shape[1], X.shape[2])) print('Feature map size: height=%d width=%d C.rpn_stride=%d' % (Y[0].shape[1], Y[0].shape[2], C.rpn_stride)) input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count)) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) model_rpn, model_classifier, record_df = load_weights( im_size, model_rpn, model_classifier, train_mode) optimizer = Adam(lr=1e-5) optimizer_classifier = Adam(lr=1e-5) model_rpn.compile(optimizer=optimizer, loss=[ losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors) ]) model_classifier.compile( optimizer=optimizer_classifier, loss=[ losses.class_loss_cls, losses.class_loss_regr(len(classes_count) - 1) ], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') # Training setting total_epochs = len(record_df) if len(record_df) == 0: best_loss = np.Inf else: best_loss = np.min(record_df['curr_loss_val']) print( f'Resuming training. Already trained for {len(record_df)} epochs.') validation_trend_hold = False total_epochs += num_epochs iter_num = 0 loss = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] print('Starting training') start_time = time.time() for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) # use next to extract data since it is a generator X, Y, img_data = next(data_gen_train) # Train rpn model and get loss value [_, loss_rpn_cls, loss_rpn_regr] current_learning_rate = calculate_learning_rate(epoch_num, mode=lr_mode) K.set_value(model_rpn.optimizer.lr, current_learning_rate) loss_rpn = model_rpn.train_on_batch(X, Y) # Get predicted rpn from rpn model [rpn_cls, rpn_regr] P_rpn = model_rpn.predict_on_batch(X) # R: bboxes (shape=(im_size,4)) # Convert rpn layer to roi bboxes R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, overlap_thresh=0.6, max_boxes=C.max_boxes) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format # X2: bboxes with iou > C.classifier_min_overlap for all gt bboxes in 20 non_max_suppression bboxes # Y2: corresponding labels and corresponding ground truth bboxes X2, Y1, Y2, IouS = roi_helpers.calc_iou( R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if C.num_rois > 1: # If number of positive anchors is larger than 4//2 = 2, randomly choose 2 pos samples if len(pos_samples) < C.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, C.num_rois // 2, replace=False).tolist() # Randomly choose (num_rois - num_pos) neg samples try: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() # Save all the pos and neg samples in sel_samples sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) # training_data: [X, X2[:, sel_samples, :]] # labels: [Y1[:, sel_samples, :], Y2[:, sel_samples, :]] # X => img_data resized image # X2[:, sel_samples, :] => num_rois (4 in here) bboxes which contains selected neg and pos # Y1[:, sel_samples, :] => one hot encode for num_rois bboxes which contains selected neg and pos # Y2[:, sel_samples, :] => labels and gt bboxes for num_rois bboxes which contains selected neg and pos K.set_value(model_classifier.optimizer.lr, current_learning_rate) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) loss[iter_num, 0] = loss_rpn[1] loss[iter_num, 1] = loss_rpn[2] loss[iter_num, 2] = loss_class[1] loss[iter_num, 3] = loss_class[2] loss[iter_num, 4] = loss_class[3] progbar.update( iter_num + 1, [('RPN Classifier Loss', loss[iter_num, 0]), ('RPN Regression Loss', loss[iter_num, 1]), ('Detector Classifier Loss', loss[iter_num, 2]), ('Detector Regression Loss', loss[iter_num, 3])]) iter_num += 1 # end of epoch check if iter_num == epoch_length: loss_rpn_cls = np.mean(loss[:, 0]) loss_rpn_regr = np.mean(loss[:, 1]) loss_class_cls = np.mean(loss[:, 2]) loss_class_regr = np.mean(loss[:, 3]) class_acc = np.mean(loss[:, 4]) print("Performing validation.") val_loss = validate(val_steps, data_gen_val, model_rpn, C, class_mapping, model_classifier) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if C.verbose: print( f'Classifier accuracy for bounding boxes: {class_acc}' ) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 if val_loss['curr_loss'] <= best_loss: if C.verbose: print( f'Total validation loss decreased from {best_loss} to {val_loss["curr_loss"]}, saving weights.' ) print('') best_loss = val_loss['curr_loss'] model_all.save_weights(C.model_path) validation_trend_hold = False elif not validation_trend_hold: if C.verbose: print( f'Total validation loss increased for the first time, from {best_loss} to {val_loss["curr_loss"]}. Performing one more epoch to verify trend. Not saving weights for now.' ) print('') validation_trend_hold = True else: if C.verbose: print( f'Total validation loss increased for the second time, from {best_loss} to {val_loss["curr_loss"]}.' ) print( f'Terminating training now to prevent over-fitting. Keeping weights from epoch {epoch_num - 1}.' ) exit() new_row = { 'mean_overlapping_bboxes': round(mean_overlapping_bboxes, 3), 'class_acc': round(class_acc, 3), 'loss_rpn_cls': round(loss_rpn_cls, 3), 'loss_rpn_regr': round(loss_rpn_regr, 3), 'loss_class_cls': round(loss_class_cls, 3), 'loss_class_regr': round(loss_class_regr, 3), 'curr_loss': round(curr_loss, 3), 'class_acc_val': round(val_loss['class_acc'], 3), 'curr_loss_val': round(val_loss['curr_loss'], 3), 'elapsed_time': round(time.time() - start_time, 3) } start_time = time.time() record_df = record_df.append(new_row, ignore_index=True) record_df.to_csv(record_path, index=False) break except Exception as e: print(f'Exception: {e}') continue print('Training complete.') return
def detect_img(img_name): use_horizontal_flips = False use_vertical_flips = False rot_90 = False im_size = 600 anchor_box_scales = [64, 128, 256, 512] anchor_box_ratios = [[1, 1], [1, 2], [2, 1]] im_size = 600 img_channel_mean = [103.939, 116.779, 123.68] img_scaling_factor = 1.0 num_rois = 4 rpn_stride = 16 balanced_classes = False std_scaling = 4.0 classifier_regr_std = [8.0, 8.0, 4.0, 4.0] rpn_min_overlap = 0.3 rpn_max_overlap = 0.7 classifier_min_overlap = 0.1 classifier_max_overlap = 0.5 class_mapping = {'MALIGNANT': 0, 'BENIGN': 1, 'bg': 2} if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} print(class_mapping) class_to_color = { class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping } num_features = 1024 if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(anchor_box_scales) * len(anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) model_path = "frcnn\\model_final_1.hdf5" # print('Loading weights from {}'.format(model_path)) # model_rpn.load_weights(model_path, by_name=True) # model_classifier.load_weights(model_path, by_name=True) # model_rpn.compile(optimizer='sgd', loss='mse') # model_classifier.compile(optimizer='sgd', loss='mse') model_rpn, model_classifier = get_model(model_path, model_rpn, model_classifier) all_imgs = [] classes = {} bbox_threshold = 0.8 print(img_name) st = time.time() # filepath = os.path.join(img_path,img_name) img = cv2.imread(img_name) X, ratio = format_img(img, im_size, img_channel_mean, img_scaling_factor) if K.image_dim_ordering() == 'tf': X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, anchor_box_scales, anchor_box_ratios, std_scaling, rpn_stride, K.image_dim_ordering(), overlap_thresh=0.5) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} for jk in range(R.shape[0] // num_rois + 1): ROIs = np.expand_dims(R[num_rois * jk:num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= classifier_regr_std[0] ty /= classifier_regr_std[1] tw /= classifier_regr_std[2] th /= classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ rpn_stride * x, rpn_stride * y, rpn_stride * (x + w), rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) all_dets = [] for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.5) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) cv2.rectangle( img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int( class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = '{}: {}'.format(key, int(100 * new_probs[jk])) all_dets.append((key, 100 * new_probs[jk])) (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1) textOrg = (real_x1, real_y1 - 0) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2) cv2.rectangle( img, (textOrg[0] - 5, textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1) cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) # print('Elapsed time = {}'.format(time.time() - st)) # print(all_dets) # cv2.imshow('img', img) # cv2.waitKey(0) img_name = img_name.split('\\')[-1] # cv2.imwrite(f'./static/images/{img_name}.png', img) cv2.imwrite('./predict/kq.jpg', img) try: a = all_dets[0] except: a = ("khong phat hien", "khong phat hien") print(a) return img_name, a # print("tp: {} \nfp: {}".format(tp, fp)) # img_name = r"D:\Desktop\thesis\Images\mass_crop_train\P_01981_RIGHT_MLO_FULL.jpg" # a, b = detect_img(img_name) # print(b[0], b[1]) # print(type(b[0]), type(b[1]))
def setupFRCNN(options): # suppressing tensorflow warnings and allowing gpu memory allocation to grow import os import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' configTF = tf.ConfigProto() configTF.gpu_options.allow_growth = True sess = tf.Session(config=configTF) # setting system recursion limit import sys sys.setrecursionlimit(40000) # loading config file with open(options.config_filename, 'r') as f_in: C = pickle.load(f_in) # turn off any data augmentation at run time C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False # obtaining classes used to train classifier class_mapping = C.class_mapping if 'bg' not in class_mapping: class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.iteritems()} class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} # number of ROIs fed to classifier per batch C.num_rois = int(options.num_rois) # acccounting for difference in img dimesion order in theano/tensorflow if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) input_shape_features = (1024, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # setting up the layers in RPN and CNN classifier shared_layers = nn.nn_base(img_input, trainable=True) num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) # loading weights model_rpn.load_weights(options.model_weight_path, by_name=True) model_classifier.load_weights(options.model_weight_path, by_name=True) # compiling models model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') return C,model_rpn,model_classifier,model_classifier_only
input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) print('Loading weights from {}'.format(C.model_path)) model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse')
(x2 * C.rpn_stride, y2 * C.rpn_stride), colors[class_id].tolist(), 2) cv2.putText(a_image, '{} {:.2f}'.format(class_name, pos_ious[idx]), (x1, y1 + 10), cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), 1) cv2.namedWindow('a_image', cv2.WINDOW_NORMAL) cv2.imshow('a_image', a_image) cv2.waitKey(0) image_input_shape = (None, None, 3) image_input = Input(shape=image_input_shape) num_anchors = 9 # define the base network (resnet here, can be VGG, Inception, etc) base_net_output = nn.base_net(image_input) rpn_output = nn.rpn(base_net_output, num_anchors) with open('../config.pickle', 'rb') as f_in: C = pickle.load(f_in) class_name_idx_mapping = C.class_name_idx_mapping if 'bg' not in class_name_idx_mapping: class_name_idx_mapping['bg'] = len(class_name_idx_mapping) class_idx_name_mapping = {v: k for k, v in class_name_idx_mapping.items()} model_rpn = Model(image_input, rpn_output) model_rpn.load_weights( '../frcnn_resnet50_8_0.7191_0.1186_0.2485_0.1342_0.9118.hdf5', by_name=True) annotations = json.load(open('annotation_data.json')) train_annotations = [ annotation for annotation in annotations if annotation['imageset'] == 'train' ]
def test(im_size=600, mode='normal', detect_threshold=0.9, overlap_threshold=0.5): """ Test the model Args: im_size: trained model available for 300, 400 or 600. Input images are resized to this size. mode: 'normal' means predictions will be saved. Any other mode means only a CSV corner file will be saved. detect_threshold: minimum class belonging probability for a proposal to be accepted overlap_threshold: maximum IoU between two proposals Returns: avg_time: time taken over the last 10 images. Time is measured from loading the image to saving the predictions. """ conf_file = f'models/conf/config_frcnn_{im_size}.pickle' C = pickle.load(open(conf_file, 'rb')) img_path = 'data/testing/images' class_mapping = C.class_mapping class_mapping['bg'] = len(class_mapping) class_mapping = {v: k for k, v in class_mapping.items()} class_to_color = { class_mapping[v]: np.array([0, 128, 255]) for v in class_mapping } input_shape_img = (None, None, 3) input_shape_features = (None, None, 1024) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base resnet network shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping)) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) model_rpn, model_classifier = load_weights(im_size, model_rpn, model_classifier) model_rpn.compile(optimizer='sgd', loss='mse') model_classifier.compile(optimizer='sgd', loss='mse') output_folder = f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}' if not os.path.exists(output_folder): os.makedirs(output_folder) output_file = open( f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/' f'predicted_corners_size{int(im_size)}_p{int(detect_threshold * 100)}.csv', 'w') writer = csv.writer(output_file) print( f'Predicting gates for im_size={im_size} and detection probability threshold of {int(detect_threshold * 100)}%.' ) print( f'Output to be saved in directory "/output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/"' ) progbar = Progbar(len(os.listdir(img_path)) - 1) for idx, img_name in enumerate(sorted(os.listdir(img_path))): if not img_name.lower().endswith(('.png', '.jpg')): continue filepath = os.path.join(img_path, img_name) start_time = time.time() img = cv2.imread(filepath) X, ratio = format_img(img, C) X = np.transpose(X, (0, 2, 3, 1)) # get the feature maps and output from the RPN [Y1, Y2, F] = model_rpn.predict(X) R = roi_helpers.rpn_to_roi(Y1, Y2, C, overlap_thresh=overlap_threshold, max_boxes=C.max_boxes) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} time_list = [] for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < detect_threshold or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue # only gate objects cls_name = 'gate' if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] # only gate objects, which is index 0 cls_num = 0 try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = roi_helpers.apply_regr( x, y, w, h, tx, ty, tw, th) except: pass bboxes[cls_name].append([ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = roi_helpers.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=overlap_threshold) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] w = (x2 - x1) h = (y2 - y1) scale = 0.16 x1 += w * scale x2 -= w * scale y1 += h * scale y2 -= h * scale (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) writer.writerow([ img_name, real_x1, real_y1, real_x2, real_y1, real_x2, real_y2, real_x1, real_y2 ]) if mode == 'normal': cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2) textLabel = f'{key}: {int(100 * new_probs[jk])}%' (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) textOrg = (real_x1, real_y1) cv2.rectangle(img, (textOrg[0], textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), 2) cv2.rectangle(img, (textOrg[0], textOrg[1] + baseLine - 5), (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 128, 255), -1) cv2.putText(img, textLabel, (real_x1, real_y1 - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) time_taken = time.time() - start_time progbar.update(idx) print(f' - Elapsed time: {time_taken:.3}s for {img_name}') if idx > 20: time_list.append(time_taken) if mode == 'normal': cv2.imwrite( f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/predict_{img_name}', img) plt.close() output_file.close() # return the prediction time over the last 10 images return sum(time_list) / len(time_list)
def trainModel(options): import random import pprint import sys import time import numpy as np import pickle import os from keras import backend as K from keras.optimizers import Adam, SGD, RMSprop from keras.layers import Input from keras.models import Model from frcnn import config, data_generators from frcnn import losses as losses from frcnn import resnet as nn import frcnn.roi_helpers as roi_helpers from keras.utils import generic_utils from frcnn.simple_parser import get_data from frcnn.simple_parser import load_data # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf configTF = tf.ConfigProto() configTF.gpu_options.allow_growth = True sess = tf.Session(config=configTF) sys.setrecursionlimit(40000) # Config class stores all relevant settings to be recalled during test/further training C = config.Config() C.im_size = options.im_size C.anchor_box_scales = options.anchor_box_scales C.anchor_box_ratios = options.anchor_box_ratios C.num_rois = int(options.num_rois) C.use_horizontal_flips = bool(options.horizontal_flips) C.use_vertical_flips = bool(options.vertical_flips) C.rot_90 = bool(options.rot_90) C.rpn_max_overlap = options.rpn_max_overlap_threshold C.model_path = options.output_weight_path C.balanced_classes = options.balanced_classes C.rpn_stride = options.rpn_stride C.cutImage = options.cutImage # loading old weights to continue training if options.load_weights: C.base_net_weights = options.input_weight_path # get_data() function returns image list along with info on bbox, # height, width in all_imgs,and class data in classes_count and class_mapping if options.load_data: all_imgs, classes_count, class_mapping = load_data(options.name) else: all_imgs, classes_count, class_mapping = get_data( options.train_path, C, options.name, options.num_frames) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) C.class_mapping = class_mapping # making assigned value of class the key to index the dictionary inv_map = {v: k for k, v in class_mapping.iteritems()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) print '\nUsing RPN Stride = %d' % C.rpn_stride config_output_filename = options.config_filename with open(config_output_filename, 'w') as config_f: pickle.dump(C, config_f) print( 'Config has been written to {}, and can be loaded when testing to ensure correct results' .format(config_output_filename)) random.shuffle(all_imgs) num_imgs = len(all_imgs) train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] print('\nTraining on {} Frames'.format(len(train_imgs))) # ground truth boxes are obtained data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, K.image_dim_ordering(), mode='train') if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) try: model_rpn.load_weights(C.base_net_weights, by_name=True) model_classifier.load_weights(C.base_net_weights, by_name=True) print('\nLoaded weights from {}'.format(C.base_net_weights)) except: print('\nNo pretrained weights found in folder...') print('Proceeding to train from scratch\n\n') # setting learning rates and optimizers optimizer = Adam(lr=1e-4) optimizer_classifier = Adam(lr=1e-4) model_rpn.compile(optimizer=optimizer, loss=[ losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors) ]) model_classifier.compile( optimizer=optimizer_classifier, loss=[ losses.class_loss_cls, losses.class_loss_regr(len(classes_count) - 1) ], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') # if epoch_length was set as default, a epoch goes over entire trainset images if options.epoch_length == 'default': epoch_length = len(train_imgs) # else it uses the number of images given else: epoch_length = int(options.epoch_length) num_epochs = int(options.num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.iteritems()} print('Starting training\n') prev_pos_samples = [] prev_neg_samples = [] for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if iter_num == epoch_length and C.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] if mean_overlapping_bboxes == 0: print( '\n\nRPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.\n' ) # date_gen_train is a generator defined in data_generators.py # it reads the the image with the filepath in image list and returns relevant data for training X, Y, img_data = data_gen_train.next() # train_on_batch() is the keras function defined in Sequential. # does a single gradient update on given data (essentially, one epoch) loss_rpn = model_rpn.train_on_batch(X, Y) # uses trained model to make prediction P_rpn = model_rpn.predict_on_batch(X) # converting RPN prediction to ROI R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2 = roi_helpers.calc_iou(R, img_data, C, class_mapping) # if no ROI is detected if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if C.num_rois > 1: # Take half of positive samples and half of negative samples for classfier training if len(pos_samples) < C.num_rois / 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, C.num_rois / 2, replace=False).tolist() try: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] iter_num += 1 progbar.update( iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('rpn_overlap', float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if C.verbose: print( '\n---------------------------------------------------------------------------------------' ) print( 'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( 'Classifier accuracy for bounding boxes from RPN: {}' .format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format( loss_class_cls)) print('Loss Detector regression: {}'.format( loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if C.verbose: print( '\nTotal loss decreased from {} to {}'.format( best_loss, curr_loss)) print( '---------------------------------------------------------------------------------------\n' ) best_loss = curr_loss # saving weights with smallest loss (overwrite) model_all.save_weights(C.model_path) else: if C.verbose: print('\nLoss did not improve') print( '---------------------------------------------------------------------------------------\n' ) # also saving weights for each epoch model_all.save_weights(C.model_path[:-5] + '_%03d' % (epoch_num + 1) + '_%2.2f' % mean_overlapping_bboxes + '.hdf5') break except Exception as e: print('\nException: {}\n'.format(e)) continue print('\n-----------------\nTraining complete!\n')