def setup_model_for_training(model, weights_file, output_dir): """Loaded saved weights and create the network in the C2 workspace.""" logger = logging.getLogger(__name__) # 添加网络输入 add_model_training_inputs(model) if weights_file: # Override random weight initialization with weights from a saved model # 加载预训练模型参数 nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) # 创建网络 workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) # Start loading mini-batches and enqueuing blobs # 开始加载数据 model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return output_dir
def main(args): merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 for i, weights_file in enumerate(args.weights_list): args.weights_list[i] = cache_url(weights_file, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) preffix_list = args.preffix_list if len(args.preffix_list) \ else [""] * len(args.weights_list) model = model_builder.create(cfg.MODEL.TYPE, train=False) # Initialize GPU from weights files for i, weights_file in enumerate(args.weights_list): nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0, preffix=preffix_list[i]) nu.broadcast_parameters(model) blobs = {} # Save all parameters for param in model.params: scoped_name = str(param) unscoped_name = c2_utils.UnscopeName(scoped_name) if unscoped_name not in blobs: if workspace.HasBlob(scoped_name): blobs[unscoped_name] = workspace.FetchBlob(scoped_name) # Save merged weights file save_object(dict(blobs=blobs), args.output_wts)
def test_restore_checkpoint(): # Create Model model = model_builder.create(cfg.MODEL.TYPE, train=True) add_momentum_init_ops(model) init_weights(model) # Fill input blobs roidb = combined_roidb_for_training(cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES) model_builder.add_training_inputs(model, roidb=roidb) workspace.CreateNet(model.net) # Bookkeeping for checkpoint creation iter_num = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num)) checkpoints[iter_num] = chk_file_path # Save model weights nu.save_model_to_weights_file(checkpoints[iter_num], model) orig_gpu_0_params, orig_all_params = get_params(model) # Change the model weights init_weights(model) # Reload the weights in the model nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0) nu.broadcast_parameters(model) shutil.rmtree(cfg.OUTPUT_DIR) _, restored_all_params = get_params(model) # Check if all params are loaded correctly for scoped_name, blob in orig_all_params.items(): np.testing.assert_array_equal(blob, restored_all_params[scoped_name]) # Check if broadcast_parameters works for scoped_name, blob in restored_all_params.items(): unscoped_name = c2_utils.UnscopeName(scoped_name) np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def test_restore_checkpoint(): # Create Model model = model_builder.create(cfg.MODEL.TYPE, train=True) add_momentum_init_ops(model) init_weights(model) # Fill input blobs roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) model_builder.add_training_inputs(model, roidb=roidb) workspace.CreateNet(model.net) # Bookkeeping for checkpoint creation iter_num = 0 checkpoints = {} output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True) chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num)) checkpoints[iter_num] = chk_file_path # Save model weights nu.save_model_to_weights_file(checkpoints[iter_num], model) orig_gpu_0_params, orig_all_params = get_params(model) # Change the model weights init_weights(model) # Reload the weights in the model nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0) nu.broadcast_parameters(model) shutil.rmtree(cfg.OUTPUT_DIR) _, restored_all_params = get_params(model) # Check if all params are loaded correctly for scoped_name, blob in orig_all_params.items(): np.testing.assert_array_equal(blob, restored_all_params[scoped_name]) # Check if broadcast_parameters works for scoped_name, blob in restored_all_params.items(): unscoped_name = c2_utils.UnscopeName(scoped_name) np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def initialize_model_from_cfg(weights_file, gpu_id=0): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) return model
def initialize_model_from_cfg(weights_file, gpu_id=0): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) return model
def generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN roidb, start_ind, end_ind, total_num_images = get_roidb( dataset_name, ind_range ) logger.info( 'Output will be saved to: {:s}'.format(os.path.abspath(output_dir)) ) model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) nu.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) boxes, scores, ids = generate_proposals_on_roidb( model, roidb, start_ind=start_ind, end_ind=end_ind, total_num_images=total_num_images, gpu_id=gpu_id, ) cfg_yaml = envu.yaml_dump(cfg) if ind_range is not None: rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range) else: rpn_name = 'rpn_proposals.pkl' rpn_file = os.path.join(output_dir, rpn_name) save_object( dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file ) logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file))) return boxes, scores, ids, rpn_file
def generate_rpn_on_range( weights_file, dataset_name, _proposal_file_ignored, output_dir, ind_range=None, gpu_id=0 ): """Run inference on all images in a dataset or over an index range of images in a dataset using a single GPU. """ assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN roidb, start_ind, end_ind, total_num_images = get_roidb( dataset_name, ind_range ) logger.info( 'Output will be saved to: {:s}'.format(os.path.abspath(output_dir)) ) model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) nu.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) boxes, scores, ids = generate_proposals_on_roidb( model, roidb, start_ind=start_ind, end_ind=end_ind, total_num_images=total_num_images, gpu_id=gpu_id, ) cfg_yaml = yaml.dump(cfg) if ind_range is not None: rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range) else: rpn_name = 'rpn_proposals.pkl' rpn_file = os.path.join(output_dir, rpn_name) save_object( dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file ) logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file))) return boxes, scores, ids, rpn_file
def create_model(weights_file): """adapted from utils.train.setup_model_for_training """ model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoaderSimple( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) with c2_utils.NamedCudaScope(0): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op) nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) nu.broadcast_parameters(model) workspace.CreateBlob("gpu_0/track_n_rois_two") workspace.CreateNet(model.net) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return model
def setup_model_for_training(model, weights_file, output_dir): """Loaded saved weights and create the network in the C2 workspace.""" logger = logging.getLogger(__name__) add_model_training_inputs(model) if weights_file: # Override random weight initialization with weights from a saved model nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return output_dir
def setup_model_for_training(model, weights_file, output_dir): """Loaded saved weights and create the network in the C2 workspace.""" logger = logging.getLogger(__name__) if cfg.TRAIN.DOMAIN_ADAPTATION: add_model_da_training_inputs(model) else: add_model_training_inputs(model) if weights_file: # Override random weight initialization with weights from a saved model nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) # from IPython import display # graph = net_drawer.GetPydotGraphMinimal(model.net.Proto().op,"da-frcnn",rankdir='LR') # png = graph.create(format='png') # with open('graph.png','w') as f: # f.write(png) # f.flush() # print(graph) # import pydot # print(pydot.graph_from_dot_data(graph)) # (graph2,) = pydot.graph_from_dot_data(str(graph)) # png = graph2.create_png() # png = graph.create_png() # import matplotlib.pyplot as plt # plt.imshow('graph.png') # plt.show() # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() # Jerome: TODO: set back to True: model.roi_data_loader.start(prefill=False) return output_dir
def setup_model_for_training(model, weights_file, output_dir): """Loaded saved weights and create the network in the C2 workspace.""" logger = logging.getLogger(__name__) add_model_training_inputs(model) if weights_file: # Override random weight initialization with weights from a saved model nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) logger.info("{}".format(cfg.TRAIN.Load_SqueezeNetWeights)) logger.info("{}".format(cfg.TRAIN.SqueezeNetWeightsFile)) # print (cfg.TRAIN.SqueezeNetWeightsFile) if cfg.TRAIN.Load_SqueezeNetWeights: prefix = "gpu_0/" logger.info( '\n\n\n\n========> Loading Weights For SqueezeNet<======================\n\n\n\n' ) pickle_file = cfg.TRAIN.SqueezeNetWeightsFile with open(pickle_file, 'rb') as file: weights = pickle.load(file) dev = c2_utils.CudaDevice(0) for i in weights.keys(): # workspace.FetchBlob(prefix+i) workspace.FeedBlob(prefix + i, weights[i], device_option=dev) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return output_dir
def initialize_mixed_model_from_cfg(weights_list, preffix_list, gpu_id=0): """Initialize a model from the global cfg. Loads and combinds multiple test-time weights and creates the networks in the Caffe2 workspace. """ model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) assert len(weights_list) == len(preffix_list) for i, weights_file in enumerate(weights_list): net_utils.initialize_gpu_from_weights_file(model, weights_file, gpu_id=gpu_id, preffix=preffix_list[i]) model_builder.add_inference_inputs(model) workspace.CreateNet(model.net) workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: workspace.CreateNet(model.mask_net) if cfg.MODEL.KEYPOINTS_ON: workspace.CreateNet(model.keypoint_net) if cfg.MODEL.TRACKING_ON: workspace.CreateNet(model.track_net) return model
def main(): # Initialize C2 workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']) # Set up logging and load config options logger = setup_logging(__name__) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info() logger.info("cuda version : {}".format(cuda_ver)) logger.info("cudnn version: {}".format(cudnn_ver)) logger.info("nvidia-smi output:\n{}".format(smi_output)) logger.info('Training with config:') logger.info(pprint.pformat(cfg)) # Note that while we set the numpy random seed network training will not be # deterministic in general. There are sources of non-determinism that cannot # be removed with a reasonble execution-speed tradeoff (such as certain # non-deterministic cudnn functions). np.random.seed(cfg.RNG_SEED) # test model logger.info("creat test model ...") test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS, gpu_id=0) logger.info("created test model ...") train_data = DataLoader(root, "train_id.txt", cfg, test_model, is_train=True) # creat mode model, weights_file, start_iter, checkpoints = create_model( True, cfg, output_dir) # test blob print(workspace.Blobs()) # create input blob blob_names = ['data_stage2', 'gt_label_stage2'] for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) # Override random weight initialization with weights from a saved model if weights_file: nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) writer = SummaryWriter(log_dir=output_dir) training_stats = TrainingStats(model, writer) CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS) logger.info("start train ...") for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER): # feed data # print("{} iter starting feed data...".format(cur_iter)) data_stage2, gt_label = train_data.next_batch() with c2_utils.NamedCudaScope(gpu_id): workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2) workspace.FeedBlob(core.ScopedName('gt_label_stage2'), gt_label) # print("workspace.RunNet(model.net.Proto().name)") training_stats.IterTic() lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter)) workspace.RunNet(model.net.Proto().name) if cur_iter == start_iter: nu.print_net(model) training_stats.IterToc() training_stats.UpdateIterStats(cur_iter) training_stats.LogIterStats(cur_iter, lr) writer.add_scalar('learning_rate', lr, cur_iter) # print("end of RunNet") if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter: checkpoints[cur_iter] = os.path.join( output_dir, 'model_iter{}.pkl'.format(cur_iter)) nu.save_model_to_weights_file(checkpoints[cur_iter], model) if cur_iter == start_iter + training_stats.LOG_PERIOD: # Reset the iteration timer to remove outliers from the first few # SGD iterations training_stats.ResetIterTimer() if np.isnan(training_stats.iter_total_loss): handle_critical_error(model, 'Loss is NaN') # Save the final model checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl') nu.save_model_to_weights_file(checkpoints['final'], model) # save train loss and metric state_file = os.path.join(output_dir, 'training_state.json') training_stats.SaveTrainingStates(state_file) # Execute the training run checkpoints = detectron.utils.train.train_model() # Test the trained model if not args.skip_test: test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)
def initialize_model_from_cfg(weights_file, gpu_id=0, int8=True): """Initialize a model from the global cfg. Loads test-time weights and creates the networks in the Caffe2 workspace. """ ob = None ob_mask = None ob_keypoint = None model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id) net_utils.initialize_gpu_from_weights_file( model, weights_file, gpu_id=gpu_id, ) model_builder.add_inference_inputs(model) int8_path = os.environ.get('INT8PATH') def LoadModuleFile(fname): with open(fname) as f: from caffe2.proto import caffe2_pb2 net_def = caffe2_pb2.NetDef() if os.environ.get('INT8PTXT') == "1": import google.protobuf.text_format as ptxt net_def = ptxt.Parse(f.read(), caffe2_pb2.NetDef()) else: net_def.ParseFromString(f.read()) if gpu_id == -2: device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.IDEEP for op in net_def.op: op.device_option.CopyFrom(device_opts) return net_def return None def CreateNet(net): int8_file_path = int8_path if int8_path else '' if os.environ.get('INT8PTXT') == "1": int8_predict_file = int8_file_path + '/' + net.Proto( ).name + '_predict_int8.pbtxt' int8_init_file = int8_file_path + '/' + net.Proto( ).name + '_init_int8.pbtxt' else: int8_predict_file = int8_file_path + '/' + net.Proto( ).name + '_predict_int8.pb' int8_init_file = int8_file_path + '/' + net.Proto( ).name + '_init_int8.pb' if os.path.isfile(int8_init_file): logging.warning('Loading Int8 init file for module {}'.format( net.Proto().name)) workspace.RunNetOnce(LoadModuleFile(int8_init_file)) if os.path.isfile(int8_predict_file): logging.warning('Loading Int8 predict file for module {}'.format( net.Proto().name)) net.Proto().CopyFrom(LoadModuleFile(int8_predict_file)) if os.environ.get('DEBUGMODE') == "1": for i, op in enumerate(net.Proto().op): if len(op.name) == 0: op.name = op.type.lower() + str(i) if gpu_id == -2 and os.environ.get('DNOOPT') != "1": logging.warning('Optimize module {}....................'.format( net.Proto().name)) tf.optimizeForIDEEP(net) if os.environ.get('DEBUGMODE') == "1": with open("{}_opt_predict_net.pb".format(net.Proto().name), "w") as fid: fid.write(net.Proto().SerializeToString()) with open("{}_opt_predict_net.pbtxt".format(net.Proto().name), "w") as fid: fid.write(str(net.Proto())) workspace.CreateNet(net) if os.environ.get('COSIM') and int8 == False: int8_path = None CreateNet(model.net) if os.environ.get('DPROFILE') == "1": logging.warning('need profile, add observer....................') ob = model.net.AddObserver("TimeObserver") workspace.CreateNet(model.conv_body_net) if cfg.MODEL.MASK_ON: CreateNet(model.mask_net) if os.environ.get('DPROFILE') == "1": ob_mask = model.mask_net.AddObserver("TimeObserver") if cfg.MODEL.KEYPOINTS_ON: CreateNet(model.keypoint_net) if os.environ.get('DPROFILE') == "1": ob_keypoint = model.keypoint_net.AddObserver("TimeObserver") return model, ob, ob_mask, ob_keypoint
def main(): # Initialize C2 workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'] ) # Set up logging and load config options logger = setup_logging(__name__) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info() logger.info("cuda version : {}".format(cuda_ver)) logger.info("cudnn version: {}".format(cudnn_ver)) logger.info("nvidia-smi output:\n{}".format(smi_output)) logger.info('Training with config:') logger.info(pprint.pformat(cfg)) # Note that while we set the numpy random seed network training will not be # deterministic in general. There are sources of non-determinism that cannot # be removed with a reasonble execution-speed tradeoff (such as certain # non-deterministic cudnn functions). np.random.seed(cfg.RNG_SEED) # test model logger.info("creat test model ...") test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS, gpu_id=0) logger.info("created test model ...") #cfg.TRAIN.IMS_PER_BATCH = 1 train_data = DataLoader(root, "val_id.txt", cfg, test_model, is_train=False) # creat mode model, weights_file, start_iter, checkpoints = create_model(False, cfg, output_dir) # test blob print(workspace.Blobs()) # create input blob blob_names = ['data_stage2'] for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) # Override random weight initialization with weights from a saved model if weights_file: nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) logger.info("start test ...") save_root = os.path.join(output_dir, 'fusion') if not os.path.exists(save_root): os.makedirs(save_root) for cur_iter in range(10000): # feed data # print("{} iter starting feed data...".format(cur_iter)) data_stage2, gt_label, meta = train_data.next_batch() '''# print('input0-20 sungalsses max score:', np.max(data_stage2[0, 4, :, :])) print('input20-40 sungalsses max score:', np.max(data_stage2[0, 24, :, :])) print('input0-20 glovess max score:', np.max(data_stage2[0, 3, :, :])) print('input20-40 glovess max score:', np.max(data_stage2[0, 23, :, :])) #''' with c2_utils.NamedCudaScope(gpu_id): workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2) # print("workspace.RunNet(model.net.Proto().name)") with c2_utils.NamedCudaScope(gpu_id): workspace.RunNet(model.net.Proto().name) batch_probs = workspace.FetchBlob(core.ScopedName('probs_human_NCHW_stage2')) batch_probs = batch_probs.transpose((0, 2, 3, 1)) assert len(meta) == batch_probs.shape[0] #print('batch_probs shape:', batch_probs.shape) for i in range(len(meta)): probs = cv2.resize(batch_probs[i], (meta[i]['width'], meta[i]['height']), interpolation=cv2.INTER_LINEAR) probs = probs.transpose((2,0,1)) print('sungalsses max score:', np.max(probs[4, :, :])) print('glovess max score:', np.max(probs[3, :, :])) #print('probs shape:', probs.shape) cv2.imwrite(os.path.join(save_root, meta[i]['id']+'.png'), probs.argmax(0)) print("prossed ", cur_iter)