def create_faster_rcnn_model(features, scaled_gt_boxes, dims_input, cfg): # Load the pre-trained classification net and clone layers base_model = load_model(cfg['BASE_MODEL_PATH']) conv_layers = clone_conv_layers(base_model, cfg) fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], clone_method=CloneMethod.clone) # Normalization and conv layers feat_norm = features - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) conv_out = conv_layers(feat_norm) # RPN and prediction targets rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_input, cfg) rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg) # Fast RCNN and losses cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) loss = rpn_losses + detection_losses pred_error = classification_error(cls_score, label_targets, axis=1) return loss, pred_error
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output=cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)