def create_faster_rcnn_model(features, scaled_gt_boxes, dims_input, cfg): # Load the pre-trained classification net and clone layers base_model = load_model(cfg['BASE_MODEL_PATH']) conv_layers = clone_conv_layers(base_model, cfg) fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], clone_method=CloneMethod.clone) # Normalization and conv layers feat_norm = features - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) conv_out = conv_layers(feat_norm) # RPN and prediction targets rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_input, cfg) rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg) # Fast RCNN and losses cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) loss = rpn_losses + detection_losses pred_error = classification_error(cls_score, label_targets, axis=1) return loss, pred_error
def frcn_predictor(features, rois, n_classes, model_path): # Load the pretrained classification net and find nodes loaded_model = load_model(model_path) feature_node = find_by_name(loaded_model, feature_node_name) conv_node = find_by_name(loaded_model, last_conv_node_name) pool_node = find_by_name(loaded_model, pool_node_name) last_node = find_by_name(loaded_model, last_hidden_node_name) # Clone the conv layers and the fully connected layers of the network conv_layers = combine([conv_node.owner ]).clone(CloneMethod.freeze, {feature_node: placeholder()}) fc_layers = combine([last_node.owner]).clone(CloneMethod.clone, {pool_node: placeholder()}) # Create the Fast R-CNN model feat_norm = features - Constant(114) conv_out = conv_layers(feat_norm) roi_out = roipooling(conv_out, rois, C.MAX_POOLING, (roi_dim, roi_dim), 0.0625) fc_out = fc_layers(roi_out) # z = Dense(rois[0], num_classes, map_rank=1)(fc_out) # --> map_rank=1 is not yet supported W = parameter(shape=(4096, n_classes), init=glorot_uniform()) b = parameter(shape=n_classes, init=0) z = times(fc_out, W) + b return z
def modify_model(features, n_classes): loaded_model = load_model(model_file) feature_node = find_by_name(loaded_model, 'features') last_node = find_by_name(loaded_model, 'h2_d') all_layers = combine([last_node.owner ]).clone(CloneMethod.freeze, {feature_node: placeholder()}) feat_norm = features - Constant(114) fc_out = all_layers(feat_norm) z = Dense(num_classes)(fc_out) return (z)
def create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, input_features, freeze=False): # Load the pretrained classification net and find nodes base_model = load_model(base_model_file) feature_node = find_by_name(base_model, feature_node_name) last_node = find_by_name(base_model, last_hidden_node_name) # Clone the desired layers with fixed weights cloned_layers = combine([last_node.owner]).clone( CloneMethod.freeze if freeze else CloneMethod.clone, {feature_node: Placeholder(name='features')}) # Add new dense layer for class prediction feat_norm = input_features - Constant(114) cloned_out = cloned_layers(feat_norm) z = Dense(num_classes, activation=None, name=new_output_node_name)(cloned_out) return z
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output=cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
image_height = cfg["CNTK"].IMAGE_HEIGHT num_channels = cfg["CNTK"].NUM_CHANNELS # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) dims_input_const = MinibatchData( Value(batch=np.asarray([ image_width, image_height, image_width, image_height, image_width, image_height ], dtype=np.float32)), 1, 1, False) # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) img_pad_value = [103, 116, 123 ] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114] normalization_const = Constant([[[103]], [[116]], [[ 123 ]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) globalvars = {} globalvars['output_path'] = os.path.join(abs_path, "Output") # dataset specific parameters map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH) globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES num_test_images = cfg["CNTK"].NUM_TEST_IMAGES
def set_global_vars(use_arg_parser=True): global globalvars global image_width global image_height global dims_input_const global img_pad_value global normalization_const global map_file_path global epoch_size global num_test_images global model_folder global base_model_file global feature_node_name global last_conv_node_name global start_train_conv_node_name global pool_node_name global last_hidden_node_name global roi_dim global prediction global prediction_in global prediction_out if use_arg_parser: parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help='Configuration file in YAML format', required=False, default=None) parser.add_argument('-t', '--device_type', type=str, help="The type of the device (cpu|gpu)", required=False, default="cpu") parser.add_argument( '-d', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None) parser.add_argument('-l', '--list_devices', action='store_true', help="Lists the available devices and exits", required=False, default=False) parser.add_argument('--prediction', action='store_true', help="Switches to prediction mode", required=False, default=False) parser.add_argument( '--prediction_in', action='append', type=str, help= "The input directory for images in prediction mode. Can be supplied mulitple times.", required=False, default=list()) parser.add_argument( '--prediction_out', action='append', type=str, help= "The output directory for processed images and predicitons in prediction mode. Can be supplied mulitple times.", required=False, default=list()) parser.add_argument( '--no_headers', action='store_true', help="Whether to suppress the header row in the ROI CSV files", required=False, default=False) parser.add_argument( '--output_width_height', action='store_true', help= "Whether to output width/height instead of second x/y in the ROI CSV files", required=False, default=False) parser.add_argument( '--suppressed_labels', type=str, help= "Comma-separated list of labels to suppress from being output in ROI CSV files.", required=False, default="") args = vars(parser.parse_args()) # prediction mode? prediction = args['prediction'] if prediction: prediction_in = args['prediction_in'] if len(prediction_in) == 0: raise RuntimeError("No prediction input directory provided!") for p in prediction_in: if not os.path.exists(p): raise RuntimeError( "Prediction input directory '%s' does not exist" % p) prediction_out = args['prediction_out'] if len(prediction_out) == 0: raise RuntimeError("No prediction output directory provided!") for p in prediction_out: if not os.path.exists(p): raise RuntimeError( "Prediction output directory '%s' does not exist" % p) if len(prediction_in) != len(prediction_out): raise RuntimeError( "Number of input and output directories don't match: %i != %i" % (len(prediction_in), len(prediction_out))) for i in range(len(prediction_in)): if prediction_in[i] == prediction_out[i]: raise RuntimeError( "Input and output directories #%i for prediction are the same: %s" % ((i + 1), prediction_in[i])) if args['list_devices']: print("Available devices (Type - ID - description)") for d in cntk.device.all_devices(): if d.type() == 0: type = "cpu" elif d.type() == 1: type = "gpu" else: type = "<unknown:" + str(d.type()) + ">" print(type + " - " + str(d.id()) + " - " + str(d)) sys.exit(0) if args['config'] is not None: cfg_from_file(args['config']) if args['device'] is not None: if args['device_type'] == 'gpu': cntk.device.try_set_default_device( cntk.device.gpu(args['device'])) else: cntk.device.try_set_default_device(cntk.device.cpu()) image_width = cfg["CNTK"].IMAGE_WIDTH image_height = cfg["CNTK"].IMAGE_HEIGHT # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) dims_input_const = MinibatchData( Value(batch=np.asarray([ image_width, image_height, image_width, image_height, image_width, image_height ], dtype=np.float32)), 1, 1, False) # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [ 114, 114, 114 ] normalization_const = Constant([[[103]], [[116]], [[ 123 ]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) # dataset specific parameters map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH) globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE globalvars['output_path'] = cfg["CNTK"].OUTPUT_PATH epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES num_test_images = cfg["CNTK"].NUM_TEST_IMAGES # model specific parameters if cfg["CNTK"].PRETRAINED_MODELS.startswith(".."): model_folder = os.path.join(abs_path, cfg["CNTK"].PRETRAINED_MODELS) else: model_folder = cfg["CNTK"].PRETRAINED_MODELS base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE) feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME pool_node_name = cfg["CNTK"].POOL_NODE_NAME last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME roi_dim = cfg["CNTK"].ROI_DIM data_path = map_file_path # set and overwrite learning parameters globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].E2E_MAX_EPOCHS globalvars[ 'rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].FRCN_EPOCHS globalvars['rnd_seed'] = cfg.RNG_SEED globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file']) globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file']) globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file']) globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file']) globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file']) globalvars['headers'] = not args['no_headers'] globalvars['output_width_height'] = args['output_width_height'] suppressed_labels = [] if len(args['suppressed_labels']) > 0: suppressed_labels = args['suppressed_labels'].split(",") globalvars['suppressed_labels'] = suppressed_labels if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=globalvars['rnd_seed']) globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) globalvars['num_classes'] = len(globalvars['classes']) if cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(globalvars['train_conv'])) print("Random seed : {}".format(globalvars['rnd_seed'])) print("Momentum per MB : {}".format(globalvars['momentum_per_mb'])) if globalvars['train_e2e']: print("E2E epochs : {}".format(globalvars['e2e_epochs'])) else: print("RPN lr factor : {}".format(globalvars['rpn_lr_factor'])) print("RPN epochs : {}".format(globalvars['rpn_epochs'])) print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor'])) print("FRCN epochs : {}".format(globalvars['frcn_epochs']))