def create_blobs_if_not_existed(blob_names): existd_names = set(workspace.Blobs()) for xx in blob_names: if xx not in existd_names: workspace.CreateBlob(str(xx))
init_params=False) AddModel(deploy_model, "data") # The parameter initialization network only needs to be run once. # Now all the parameter blobs are going to be initialized in the workspace. workspace.RunNetOnce(train_model.param_init_net) # overwrite=True allows you to run this cell several times and avoid errors workspace.CreateNet(train_model.net, overwrite=True) # Set the iterations number and track the accuracy & loss total_iters = 200 accuracy = np.zeros(total_iters) loss = np.zeros(total_iters) print("The blobs in the workspace pre-train: {}".format(workspace.Blobs())) # Now, we will manually run the network for 200 iterations. for i in range(total_iters): workspace.RunNet(train_model.net) accuracy[i] = workspace.blobs["accuracy"] loss[i] = workspace.blobs["loss"] print("The blobs in the workspace post-train: {}".format(workspace.Blobs())) # param_init_net here will only create a data reader # Other parameters won't be re-created because we selected # init_params=False before workspace.RunNetOnce(test_model.param_init_net) workspace.CreateNet(test_model.net, overwrite=True) test_accuracy = np.zeros(100)
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() src_blobs = load_object(weights_file) if 'cfg' in src_blobs: saved_cfg = load_cfg(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.info( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype(np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob('__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.info( '{:s} preserved in workspace (unused)'.format(src_name))
def initialize_gpu_0_from_weights_file(model, weights_file): logger.info('Loading from: {}'.format(weights_file)) is_first_init = 'trainedCOCO' in weights_file ws_blobs = workspace.Blobs() with open(weights_file, 'r') as f: src_blobs = pickle.load(f) if 'cfg' in src_blobs: saved_cfg = yaml.load(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU 0 only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[utils.blob.unscope_name(str(blob))] = True with core.NameScope('gpu_0'): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[ unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.info('{:s}{:} loaded from weights file into {:s}: {}'. format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) pretrained_w = src_blobs[src_name] if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) if ws_blob.shape != src_blobs[src_name].shape: pretrained_w = inflate_weights( pretrained_w, ws_blob, src_name, src_blobs) workspace.FeedBlob( dst_name, pretrained_w.astype(np.float32, copy=False)) if has_momentum and not is_first_init: # when feeding momentum, we're probably resuming from # previous checkpoint. So all the inflated stuff won't be # needed in that case workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype( np.float32, copy=False)) # Add _rm/_riv BN mean/var params, in case the pre-trained model contains it. # Needed to test the scratch trained models. for src_name in src_blobs.keys(): if src_name.endswith('_rm') or src_name.endswith('_riv'): with core.NameScope('gpu_0'): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)): dst_name = core.ScopedName(src_name) workspace.FeedBlob(dst_name, src_blobs[src_name]) logger.info('Loaded BN param {}'.format(src_name)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): workspace.FeedBlob( '__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.info( '{:s} preserved in workspace (unused)'.format(src_name))
def run(self, index, inputFeatures, accumulate=True, image_path=None): """ index - index of the dataset entry inputFeatures - features input to the head accumulate - whether to save to predictions in self.all_... members image_path - path to the annotated image, to which the predictions correspond """ timers = self.timers # Format the inputs to the mask rcnn head features = {} for k, v in inputFeatures.iteritems(): assert v.dim() == 3, 'Batch mode not allowed' features[k] = np.expand_dims(v.data.cpu().numpy(), axis=0) gpu_dev = caffe2_core.DeviceOption(caffe2_pb2.CUDA, self.gpu_id) name_scope = 'gpu_{}'.format(self.gpu_id) # Clean the workspace to make damn sure that nothing comes from the # possible forwarding of target features, depending on the use of this # module parameters = [str(s) for s in self.model.params] + [ str(s) + '_momentum' for s in self.model.TrainableParams() ] for b in workspace.Blobs(): if not b in parameters: workspace.FeedBlob(b, np.array([])) # Produce the top level of the pyramid of features with caffe2_core.NameScope(name_scope): with caffe2_core.DeviceScope(gpu_dev): workspace.FeedBlob( caffe2_core.ScopedName("predicted_fpn_res5_2_sum"), features['fpn_res5_2_sum']) workspace.RunOperatorOnce(self.subsampler) features[ u'fpn_res5_2_sum_subsampled_2x'] = workspace.FetchBlob( caffe2_core.ScopedName( "predicted_fpn_res5_2_sum_subsampled_2x")) # Forward the rest of the features in the head of the model im_info = np.array([[1024., 2048., 1.]], dtype=np.float32) im_scales = np.array([1.]) im_shape = (1024, 2048, 3) with caffe2_core.NameScope(name_scope): with caffe2_core.DeviceScope(gpu_dev): cls_boxes_i, cls_segms_i = im_detect_all_given_features( self.model, self.subsampler, features, im_info, im_scales, im_shape, timers) # If required, store the results in the class's members if accumulate: extend_results(index, self.all_boxes_ann_frame, cls_boxes_i) if cls_segms_i is not None and accumulate: extend_results(index, self.all_segms_ann_frame, cls_segms_i) if image_path is not None and accumulate: self.id_sequences.append(image_path) if index % 10 == 0: ave_total_time = np.sum([t.average_time for t in timers.values()]) det_time = (timers['im_detect_bbox'].average_time + timers['im_detect_mask'].average_time) misc_time = (timers['misc_bbox'].average_time + timers['misc_mask'].average_time) print(('im_detect: ' '{:d}/{:d} {:.3f}s + {:.3f}s => avg total time: {:.3f}s' ).format(index, self.num_images, det_time, misc_time, ave_total_time)) return cls_boxes_i, cls_segms_i
def testBlobNameOverrides(self): original_names = ['blob_a', 'blob_b', 'blob_c'] new_names = ['x', 'y', 'z'] blobs = [np.random.permutation(6) for i in range(3)] for i, blob in enumerate(blobs): self.assertTrue(workspace.FeedBlob(original_names[i], blob)) self.assertTrue(workspace.HasBlob(original_names[i])) self.assertEqual(len(workspace.Blobs()), 3) try: # Saves the blobs to a local db. tmp_folder = tempfile.mkdtemp() with self.assertRaises(RuntimeError): workspace.RunOperatorOnce( core.CreateOperator("Save", original_names, [], absolute_path=1, strip_prefix='.temp', blob_name_overrides=new_names, db=os.path.join(tmp_folder, "db"), db_type=self._db_type)) self.assertTrue( workspace.RunOperatorOnce( core.CreateOperator("Save", original_names, [], absolute_path=1, blob_name_overrides=new_names, db=os.path.join(tmp_folder, "db"), db_type=self._db_type))) self.assertTrue(workspace.ResetWorkspace()) self.assertEqual(len(workspace.Blobs()), 0) self.assertTrue( workspace.RunOperatorOnce( core.CreateOperator("Load", [], [], absolute_path=1, db=os.path.join(tmp_folder, "db"), db_type=self._db_type, load_all=1))) self.assertEqual(len(workspace.Blobs()), 3) for i, name in enumerate(new_names): self.assertTrue(workspace.HasBlob(name)) self.assertTrue((workspace.FetchBlob(name) == blobs[i]).all()) # moved here per @cxj's suggestion load_new_names = ['blob_x', 'blob_y', 'blob_z'] # load 'x' into 'blob_x' self.assertTrue( workspace.RunOperatorOnce( core.CreateOperator("Load", [], load_new_names[0:1], absolute_path=1, db=os.path.join(tmp_folder, "db"), db_type=self._db_type, source_blob_names=new_names[0:1]))) # we should have 'blob_a/b/c/' and 'blob_x' now self.assertEqual(len(workspace.Blobs()), 4) for i, name in enumerate(load_new_names[0:1]): self.assertTrue(workspace.HasBlob(name)) self.assertTrue((workspace.FetchBlob(name) == blobs[i]).all()) self.assertTrue( workspace.RunOperatorOnce( core.CreateOperator("Load", [], load_new_names[0:3], absolute_path=1, db=os.path.join(tmp_folder, "db"), db_type=self._db_type, source_blob_names=new_names[0:3]))) # we should have 'blob_a/b/c/' and 'blob_x/y/z' now self.assertEqual(len(workspace.Blobs()), 6) for i, name in enumerate(load_new_names[0:3]): self.assertTrue(workspace.HasBlob(name)) self.assertTrue((workspace.FetchBlob(name) == blobs[i]).all()) finally: # clean up temp folder. try: shutil.rmtree(tmp_folder) except OSError as e: if e.errno != errno.ENOENT: raise
print("The deploy model is saved to: " + root_folder + "/mnist_model.minidb") # Now we can load the model back and run the prediction to verify it works. # In[18]: # we retrieve the last input data out and use it in our prediction test before we scratch the workspace blob = workspace.FetchBlob("data") pyplot.figure() _ = visualize.NCHW.ShowMultiple(blob) # reset the workspace, to make sure the model is actually loaded workspace.ResetWorkspace(root_folder) # verify that all blobs are destroyed. print("The blobs in the workspace after reset: {}".format(workspace.Blobs())) # load the predict net predict_net = pe.prepare_prediction_net( os.path.join(root_folder, "mnist_model.minidb"), "minidb") # verify that blobs are loaded back print("The blobs in the workspace after loading the model: {}".format( workspace.Blobs())) # feed the previously saved data to the loaded model workspace.FeedBlob("data", blob) # predict workspace.RunNetOnce(predict_net) softmax = workspace.FetchBlob("softmax")
final_image = transposed_image print("Shape of final_image: " + str(np.array(final_image).shape)) with open(MODEL_ROOT + "/init_net.pb", "rb") as f: init_net = f.read() with open(MODEL_ROOT + "/predict_net.pb", "rb") as f: predict_net = f.read() workspace.ResetWorkspace() blob_name = model_props[MODEL].blob_name workspace.FeedBlob(blob_name, final_image) print("The blobs in the workspace after FeedBlob: {}".format(workspace.Blobs())) # Create a predictor using the loaded model. p = workspace.Predictor(init_net, predict_net) start = time.time() for i in range(0, args.iterations): results = p.run([final_image]) end = time.time() if args.time: print('Wall time per iteration (s): {:0.4f}'.format( (end - start) / args.iterations)) max_idx = np.argmax(results[0][0]) sum_probability = sum(results[0][0])
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() with open(weights_file, 'r') as f: src_blobs = pickle.load(f) if 'cfg' in src_blobs: saved_cfg = load_cfg(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.debug( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: print("dst_name:" + dst_name) # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) print("xhpan:ws_blob.shape:" + str(ws_blob.shape)) print("xhpan:src_blobs[src_name].shape:" + str(src_blobs[src_name].shape)) classes_layers_list_w = [ 'gpu_0/cls_score_w', 'gpu_0/bbox_pred_w', 'gpu_0/mask_fcn_logits_w' ] classes_layers_list_b = [ 'gpu_0/cls_score_b', 'gpu_0/bbox_pred_b', 'gpu_0/mask_fcn_logits_b' ] # -----------------(11, 1024) - --------------(10, 1024) # -----------------(11,) - --------------(10,) # -----------------(44, 1024) - --------------(40, 1024) # -----------------(44,) - --------------(40,) # -----------------(11, 256, 1, 1) - --------------(10, 256, 1, 1) # -----------------(11,) - --------------(10,) # if ws_blob.shape != src_blobs[src_name].shape: # if dst_name is 'gpu_0/cls_score_w': # if ws_blob.shape[0] > src_blobs[src_name].shape[0]:#(10, 1024) # src_blobs[src_name].extend(0.0001 * np.random.randn(*(ws_blob.shape[0] - src_blobs[src_name].shape[0], ws_blob.shape[1]))) # else: # num = src_blobs[src_name].shape[0] - ws_blob.shape[0] # src_blobs[src_name] = src_blobs[src_name][-num] # elif dst_name is cfg.MODEL.NUM_CLASSES if ws_blob.shape != src_blobs[src_name].shape: print("ws_blob.shape != src_blobs[src_name].shape") print("-----------------" + str(ws_blob.shape) + "---------------" + str(src_blobs[src_name].shape)) if dst_name in classes_layers_list_w or dst_name in classes_layers_list_b: if dst_name in classes_layers_list_w: target_shape = [ ws_blob.shape[0] - src_blobs[src_name].shape[0] ] target_shape.extend(list(ws_blob.shape[1:])) init_weight = 0.0001 * np.random.randn( *(tuple(target_shape))) src_blobs[src_name] = np.append( src_blobs[src_name], init_weight, axis=0) else: target_shape = [ ws_blob.shape[0] - src_blobs[src_name].shape[0] ] target_shape.extend(list(ws_blob.shape[1:])) init_weight = -np.log( (1 - 0.00001) / 0.00001) * np.ones( *(tuple(target_shape))) src_blobs[src_name] = np.append( src_blobs[src_name], init_weight, axis=0) target_shape = [ ws_blob.shape[0] - src_blobs[src_name + '_momentum'].shape[0] ] target_shape.extend(list(ws_blob.shape[1:])) init_weight = np.zeros(target_shape) src_blobs[src_name + '_momentum'] = np.append( src_blobs[src_name + '_momentum'], init_weight, axis=0) # if ws_blob.shape != src_blobs[src_name].shape: # print ("ws_blob.shape != src_blobs[src_name].shape") # print ("-----------------" + str(ws_blob.shape) + "---------------" + str(src_blobs[src_name].shape)) # if dst_name in classes_layers_list_w or dst_name in classes_layers_list_b: # if dst_name in classes_layers_list_w : # src_blobs[src_name] = 0.0001 * np.random.randn(*(ws_blob.shape)) # else: # src_blobs[src_name] = -np.log((1 - 0.00001) / 0.00001) * np.ones(*(ws_blob.shape)) # # src_blobs[src_name + '_momentum'] = np.zeros(ws_blob.shape) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype(np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob('__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.debug( '{:s} preserved in workspace (unused)'.format(src_name))
def print_all(self): # approach 1: all print(workspace.Blobs(), end='\n') for _, l in enumerate(workspace.Blobs()): print(l) print(self.FetchBlobWrapper(l))
def initialize_master_gpu_model_params( model, weights_file, load_momentum=True): ws_blobs = workspace.Blobs() logger.info("Initializing model params from file: {}".format(weights_file)) with open(weights_file, 'r') as fopen: blobs = pickle.load(fopen) if 'blobs' in blobs: blobs = blobs['blobs'] unscoped_blob_names = OrderedDict() # Return the model iter from which training should start model_iter = 0 if 'model_iter' in blobs: model_iter = blobs['model_iter'] if 'lr' in blobs: prev_lr = float(blobs['lr']) elif cfg.TRAIN.RESET_START_ITER: prev_lr = 1. else: raise Exception('No lr blob found.') # initialize params, params momentum, computed params if 'test' not in model.net.Name() and load_momentum: for param in model.params: if param in model.TrainableParams(): unscoped_blob_names[misc.unscope_name( str(param) + '_momentum')] = True for blob in model.GetAllParams(): unscoped_blob_names[misc.unscope_name(str(blob))] = True root_gpu_id = cfg.ROOT_GPU_ID with core.NameScope('gpu_{}'.format(root_gpu_id)): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, root_gpu_id)): for unscoped_blob_name in unscoped_blob_names.keys(): scoped_blob_name = misc.scoped_name(unscoped_blob_name) if unscoped_blob_name not in blobs: logger.info('{:s} not found'.format(unscoped_blob_name)) continue if scoped_blob_name in ws_blobs: ws_blob = workspace.FetchBlob(scoped_blob_name) if 'pred' in unscoped_blob_name: if np.prod(ws_blob.shape) \ != np.prod(blobs[unscoped_blob_name].shape): logger.info(('{:s} (classifier) found but ' + 'unmatching (not loaded):' + '{} ---> {}') .format( unscoped_blob_name, blobs[unscoped_blob_name].shape, ws_blob.shape)) continue else: blobs[unscoped_blob_name] = np.reshape( blobs[unscoped_blob_name], ws_blob.shape) if len(ws_blob.shape) != \ len(blobs[unscoped_blob_name].shape): # inflate if so assert ws_blob.shape[:2] == \ blobs[unscoped_blob_name].shape[:2], \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( unscoped_blob_name, ws_blob.shape, blobs[unscoped_blob_name].shape) assert ws_blob.shape[-2:] == \ blobs[unscoped_blob_name].shape[-2:], \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( unscoped_blob_name, ws_blob.shape, blobs[unscoped_blob_name].shape) logger.info( ('{:s} loaded from weights file into: {:s}' + ' inflated {} ---> {}').format( unscoped_blob_name, scoped_blob_name, blobs[unscoped_blob_name].shape, ws_blob.shape)) # inflate num_inflate = ws_blob.shape[2] blobs[unscoped_blob_name] = np.stack( [blobs[unscoped_blob_name]] * num_inflate, axis=2) / float(num_inflate) else: logger.info( ('{:s} loaded from weights file into: {:s}' + ' {}').format( unscoped_blob_name, scoped_blob_name, ws_blob.shape)) assert ws_blob.shape == blobs[unscoped_blob_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( unscoped_blob_name, ws_blob.shape, blobs[unscoped_blob_name].shape) data = blobs[unscoped_blob_name].astype(np.float32, copy=False) workspace.FeedBlob(scoped_blob_name, data) # hack fix: load and broadcast lr to all gpus for i in range(cfg.NUM_GPUS): with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, i)): workspace.FeedBlob( 'gpu_{}/lr'.format(i), np.array(prev_lr, dtype=np.float32)) return model_iter, prev_lr
def initialize_master_xpu_model_params(model, weights_file, opts, reset_epoch): log.info("Initializing model params from file: {}".format(weights_file)) with open(weights_file, 'r') as fopen: blobs = pickle.load(fopen) if 'blobs' in blobs: blobs = blobs['blobs'] start_epoch = 0 best_metric = float('-inf') if 'epoch' in blobs: log.info('epoch {} is found in model file'.format(blobs['epoch'])) if not reset_epoch: start_epoch = blobs['epoch'] else: log.info('Reset epoch') else: log.info('no epoch is found in model file') lr = opts['model_param']['base_learning_rate'] if 'lr' in blobs: lr = blobs['lr'] if 'best_metric' in blobs and not reset_epoch: best_metric = blobs['best_metric'] if model is not None: log.info('initialize model parameters using weights file: {}'.format( weights_file )) ws_blobs = workspace.Blobs() unscoped_blob_names = OrderedDict() for blob in model.GetAllParams(): unscoped_blob_names[unscope_name(str(blob))] = True root_xpu_id = opts['distributed']['first_xpu_id'] device = opts['distributed']['device'] caffe2_pb2_DEVICE =\ caffe2_pb2.CUDA if opts['distributed']['device'] == 'gpu'\ else caffe2_pb2.CPU with core.NameScope('{}_{}'.format(device, root_xpu_id)): with core.DeviceScope(core.DeviceOption(caffe2_pb2_DEVICE, 0)): for unscoped_blob_name in unscoped_blob_names.keys(): scoped_blob_name = scoped_name(unscoped_blob_name) if unscoped_blob_name not in blobs: log.info('{:s} not found'.format(unscoped_blob_name)) continue log.info( '{:s} loaded from weights file into: {:s}'.format( unscoped_blob_name, scoped_blob_name ) ) if scoped_blob_name in ws_blobs: ws_blob = workspace.FetchBlob(scoped_blob_name) if not ws_blob.shape == blobs[unscoped_blob_name].shape: log.info( ('Workspace blob {} with shape {} does ' 'not match weights file shape {}').format( unscoped_blob_name, ws_blob.shape, blobs[unscoped_blob_name].shape) ) else: workspace.FeedBlob( scoped_blob_name, blobs[unscoped_blob_name].astype( np.float32, copy=False)) else: log.info('Skip initializing model parameters from file: {}'.format( weights_file )) log.info('Complete initialize_master_xpu_model_params') return start_epoch, lr, best_metric
print("Model saved as " + full_init_net_out + " and " + full_predict_net_out) # [end-20181001-ben-add] # # # load model # blob = workspace.FetchBlob("data") plt.figure("Load data") plt.title("Batch of Testing Data") _ = visualize.NCHW.ShowMultiple(blob) # reset the workspace, to make sure the model is actually loaded print("The blobs in the workspace before reset: {}".format( workspace.Blobs())) workspace.ResetWorkspace(root_folder) print("The blobs in the workspace after reset: {}".format( workspace.Blobs())) # all blobs are destroyed # load the predict net and verify the blobs predict_net = pe.prepare_prediction_net( os.path.join(root_folder, "mnist_model.minidb"), "minidb") print("The blobs in the workspace after loading the model: {}".format( workspace.Blobs())) # feed the previously saved data to the loaded model workspace.FeedBlob("data", blob) workspace.RunNetOnce(predict_net) softmax = workspace.FetchBlob("softmax")
def main(): # Initialize C2 workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'] ) # Set up logging and load config options logger = setup_logging(__name__) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info() logger.info("cuda version : {}".format(cuda_ver)) logger.info("cudnn version: {}".format(cudnn_ver)) logger.info("nvidia-smi output:\n{}".format(smi_output)) logger.info('Training with config:') logger.info(pprint.pformat(cfg)) # Note that while we set the numpy random seed network training will not be # deterministic in general. There are sources of non-determinism that cannot # be removed with a reasonble execution-speed tradeoff (such as certain # non-deterministic cudnn functions). np.random.seed(cfg.RNG_SEED) # test model logger.info("creat test model ...") test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS, gpu_id=0) logger.info("created test model ...") #cfg.TRAIN.IMS_PER_BATCH = 1 train_data = DataLoader(root, "val_id.txt", cfg, test_model, is_train=False) # creat mode model, weights_file, start_iter, checkpoints = create_model(False, cfg, output_dir) # test blob print(workspace.Blobs()) # create input blob blob_names = ['data_stage2'] for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) # Override random weight initialization with weights from a saved model if weights_file: nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) logger.info("start test ...") save_root = os.path.join(output_dir, 'fusion') if not os.path.exists(save_root): os.makedirs(save_root) for cur_iter in range(10000): # feed data # print("{} iter starting feed data...".format(cur_iter)) data_stage2, gt_label, meta = train_data.next_batch() '''# print('input0-20 sungalsses max score:', np.max(data_stage2[0, 4, :, :])) print('input20-40 sungalsses max score:', np.max(data_stage2[0, 24, :, :])) print('input0-20 glovess max score:', np.max(data_stage2[0, 3, :, :])) print('input20-40 glovess max score:', np.max(data_stage2[0, 23, :, :])) #''' with c2_utils.NamedCudaScope(gpu_id): workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2) # print("workspace.RunNet(model.net.Proto().name)") with c2_utils.NamedCudaScope(gpu_id): workspace.RunNet(model.net.Proto().name) batch_probs = workspace.FetchBlob(core.ScopedName('probs_human_NCHW_stage2')) batch_probs = batch_probs.transpose((0, 2, 3, 1)) assert len(meta) == batch_probs.shape[0] #print('batch_probs shape:', batch_probs.shape) for i in range(len(meta)): probs = cv2.resize(batch_probs[i], (meta[i]['width'], meta[i]['height']), interpolation=cv2.INTER_LINEAR) probs = probs.transpose((2,0,1)) print('sungalsses max score:', np.max(probs[4, :, :])) print('glovess max score:', np.max(probs[3, :, :])) #print('probs shape:', probs.shape) cv2.imwrite(os.path.join(save_root, meta[i]['id']+'.png'), probs.argmax(0)) print("prossed ", cur_iter)
def LoadModelFromPickleFile( model, pkl_file, use_gpu=True, root_gpu_id=0, bgr2rgb=False, inflating=True, collapsing=True, center_init=False, ): ws_blobs = workspace.Blobs() with open(pkl_file, 'r') as fopen: blobs = pickle.load(fopen) if 'blobs' in blobs: blobs = blobs['blobs'] unscoped_blob_names = OrderedDict() for blob in model.GetAllParams(): unscoped_blob_names[unscope_name(str(blob))] = True if use_gpu: device_opt = caffe2_pb2.CUDA else: device_opt = caffe2_pb2.CPU with core.NameScope('gpu_{}'.format(root_gpu_id)): with core.DeviceScope(core.DeviceOption(device_opt, root_gpu_id)): for unscoped_blob_name in unscoped_blob_names.keys(): scoped_blob_name = scoped_name(unscoped_blob_name) if unscoped_blob_name not in blobs: log.info('{} not found'.format(unscoped_blob_name)) continue if scoped_blob_name in ws_blobs: ws_blob = workspace.FetchBlob(scoped_blob_name) target_shape = ws_blob.shape if target_shape == blobs[unscoped_blob_name].shape: log.info('copying {}'.format(unscoped_blob_name)) if bgr2rgb and unscoped_blob_name == 'conv1_w': feeding_blob = FlipBGR2RGB( blobs[unscoped_blob_name] ) else: feeding_blob = blobs[unscoped_blob_name] elif ws_blob.ndim == 5: # inflate from FC to 1x1x1 conv if blobs[unscoped_blob_name].ndim == 2: log.info('convolutionalize {}'.format( unscoped_blob_name) ) feeding_blob = blobs[unscoped_blob_name] feeding_blob = np.reshape( feeding_blob, feeding_blob.shape + (1, 1, 1) ) else: # may need to inflate if not inflating: log.info( '{} found, but inflating is ignored'.format( unscoped_blob_name ) ) continue feeding_blob = InflateBlob( blobs[unscoped_blob_name], target_shape, unscoped_blob_name, (0 if center_init else 1) ) elif ws_blob.ndim == 4: # may need to collapse if not collapsing: log.info( '{} found, but collapsing is ignored'.format( unscoped_blob_name ) ) continue feeding_blob = CollapseBlob( blobs[unscoped_blob_name], target_shape, unscoped_blob_name ) # either copy, inflate, or collapse blob workspace.FeedBlob( scoped_blob_name, feeding_blob.astype(np.float32, copy=False) )
nesterov=1, policy='poly', power=1., max_iter=MAX_ITER, ) # initialization workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) print("hello foo") # ================= DEBUG PRINT ======================= # print(model.net.Proto()) # print(model.param_init_net.Proto()) # i = 0 # for param in model.param_to_grad: # print("{} : {}".format(i , param)) # i += 1 # i = 1 # for param in model.params: # print("{} : {}".format(i , param)) # i += 1 i = 1 for blob in workspace.Blobs(): print("{} : {}".format(i, blob)) i += 1
# Let's show all plots inline. # You might see a warning saying that caffe2 does not have GPU support. That means you are running a CPU-only build. Don't be alarmed - anything CPU is still runnable without problem. # ## Workspaces # # Let's cover workspaces first, where all the data reside. # # If you are familiar with Matlab, workspace consists of blobs you create and store in memory. For now, consider a blob to be a N-dimensional Tensor similar to numpy's ndarray, but is contiguous. Down the road, we will show you that a blob is actually a typed pointer that can store any type of C++ objects, but Tensor is the most common type stored in a blob. Let's show what the interface looks like. # # `Blobs()` prints out all existing blobs in the workspace. # `HasBlob()` queries if a blob exists in the workspace. For now, we don't have anything yet. # In[ ]: print("Current blobs in the workspace: {}".format(workspace.Blobs())) print("Workspace has blob 'X'? {}".format(workspace.HasBlob("X"))) # We can feed blobs into the workspace using `FeedBlob()`. # In[3]: X = np.random.randn(2, 3).astype(np.float32) print("Generated X from numpy:\n{}".format(X)) workspace.FeedBlob("X", X) # Now, let's take a look what blobs there are in the workspace. # In[4]: print("Current blobs in the workspace: {}".format(workspace.Blobs()))
merge_cfg_from_file(cfg_file) cfg.TRAIN.WEIGHTS = '' # NOTE: do not download pretrained model weights cfg.TEST.WEIGHTS = weights_file cfg.NUM_GPUS = 1 assert_and_infer_cfg() #according the cfg to bulid model model = initialize_model_from_cfg(weights_file) return model if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) args = parse_args() model = get_model(args.cfg, args.wts) img = cv2.imread(args.img) #im_scale = im_conv_body_only(model,img,cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) im_blob, im_scale, _im_info = blob_utils.get_image_blob( img, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) with c2_utils.NamedCudaScope(0): # workspace.FeedBlob(core.ScopedName('data'), im_blob) # workspace.RunNet(model.net.Proto().name) # blob = workplace.FetchBlob('rois') # print 1 cls_b, _, _ = infer_engine.im_detect_all(model, img, None) blobs = workspace.Blobs() print blobs mask_logits = workspace.FetchBlob(core.ScopedName('mask_logits')) #print mask_logits print mask_logits.shape np.save('/data1/shuai/adas/code/mask_logits.npy', mask_logits)
def _compare(self, model, forward_only): # Store list of blobs that exist in the beginning workspace.RunNetOnce(model.param_init_net) init_ws = {k: workspace.FetchBlob(k) for k in workspace.Blobs()} # Run with executor for enable_executor in [0, 1]: self.enable_rnn_executor(model.net, enable_executor, forward_only) workspace.ResetWorkspace() # Reset original state for k, v in init_ws.items(): workspace.FeedBlob(k, v) np.random.seed(10022015) ws = {} for j in range(len(self.Tseq)): input_shape = [self.Tseq[j], self.batch_size, self.input_dim] workspace.FeedBlob( "input", np.random.rand(*input_shape).astype(np.float32)) workspace.FeedBlob( "target", np.random.rand(self.Tseq[j], self.batch_size, self.hidden_dim).astype(np.float32)) if j == 0: workspace.CreateNet(model.net, overwrite=True) workspace.RunNet(model.net.Proto().name) # Store results for each iteration for k in workspace.Blobs(): ws[k + "." + str(j)] = workspace.FetchBlob(k) if enable_executor: rnn_exec_ws = ws else: non_exec_ws = ws # Test that all blobs are equal after running with executor # or without. self.assertEqual(list(non_exec_ws.keys()), list(rnn_exec_ws.keys())) mismatch = False for k in rnn_exec_ws.keys(): non_exec_v = non_exec_ws[k] rnn_exec_v = rnn_exec_ws[k] if type(non_exec_v) is np.ndarray: if not np.allclose(non_exec_v, rnn_exec_v): print("Mismatch: {}".format(k)) nv = non_exec_v.flatten() rv = rnn_exec_v.flatten() c = 0 for j in range(len(nv)): if rv[j] != nv[j]: print(j, rv[j], nv[j]) c += 1 if c == 10: break mismatch = True self.assertFalse(mismatch)
workspace.RunNetOnce(c2_net.param_init_net) workspace.CreateNet(c2_net.net) # load pretrained weights wts = pickle.load(open('pretrained/i3d_baseline_32x2_IN_pretrain_400k.pkl', 'rb'), encoding='latin')['blobs'] for key in wts: if type(wts[key]) == np.ndarray: workspace.FeedBlob(key, wts[key]) workspace.FeedBlob('data', data) workspace.RunNet(c2_net.net.Proto().name) c2_blobs = {key: workspace.FetchBlob(key) for key in workspace.Blobs()} #-----------------------------------------------------------------------------------------------# torch.backends.cudnn.enabled = False from models import resnet data = torch.from_numpy(data).cuda() pth_net = resnet.i3_res50(num_classes=400).cuda().eval() def hook(module, input, output): setattr(module, "_value_hook", output) for name, module in pth_net.named_modules(): module.register_forward_hook(hook)
def test_lstm_extract_predictor_net(self): model = ModelHelper(name="lstm_extract_test") with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): output, _, _, _ = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seqlengths", initial_states=("hidden_init", "cell_init"), dim_in=20, dim_out=40, scope="test", drop_states=True, return_last_layer_only=True, ) # Run param init net to get the shapes for all inputs shapes = {} workspace.RunNetOnce(model.param_init_net) for b in workspace.Blobs(): shapes[b] = workspace.FetchBlob(b).shape # But export in CPU (predict_net, export_blobs) = ExtractPredictorNet( net_proto=model.net.Proto(), input_blobs=["input"], output_blobs=[output], device=core.DeviceOption(caffe2_pb2.CPU, 1), ) # Create the net and run once to see it is valid # Populate external inputs with correctly shaped random input # and also ensure that the export_blobs was constructed correctly. workspace.ResetWorkspace() shapes['input'] = [10, 4, 20] shapes['cell_init'] = [1, 4, 40] shapes['hidden_init'] = [1, 4, 40] print(predict_net.Proto().external_input) self.assertTrue('seqlengths' in predict_net.Proto().external_input) for einp in predict_net.Proto().external_input: if einp == 'seqlengths': workspace.FeedBlob("seqlengths", np.array([10] * 4, dtype=np.int32)) else: workspace.FeedBlob( einp, np.zeros(shapes[einp]).astype(np.float32), ) if einp != 'input': self.assertTrue(einp in export_blobs) print(str(predict_net.Proto())) self.assertTrue(workspace.CreateNet(predict_net.Proto())) self.assertTrue(workspace.RunNet(predict_net.Proto().name)) # Validate device options set correctly for the RNNs import google.protobuf.text_format as protobuftx for op in predict_net.Proto().op: if op.type == 'RecurrentNetwork': for arg in op.arg: if arg.name == "step_net": step_proto = caffe2_pb2.NetDef() protobuftx.Merge(arg.s, step_proto) for step_op in step_proto.op: self.assertEqual(0, step_op.device_option.device_type) self.assertEqual(1, step_op.device_option.cuda_gpu_id) elif arg.name == 'backward_step_net': self.assertEqual("", arg.s)
def initialize_gpu_from_weights_file(model, weights_file, gup_id=0): logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() src_blobs = load_object(weights_file) if 'cfg' in src_blobs: saved_cfg = load_cfg(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: src_blobs = src_blobs['blobs'] unscoped_para_names = OrderedDict() for blob in model.params: unscoped_para_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gup_id): for unscoped_para_name in unscoped_para_names.keys(): if (unscoped_para_name.find(']_') >= 0 and unscoped_para_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_para_name[unscoped_para_name.find((']_') + 2)] else: src_name = unscoped_para_name if src_name not in src_blobs: logger.info('{:s} not found.'.format(src_name)) continue dst_name = core.ScopedName(unscoped_para_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum] ' if has_momentum else '' logger.info( '{:s}{:} loaded form weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: # if the blob is already in the workspace, make sure that it matches # the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype(np.float32, copy=False)) for src_name in src_blobs.keys(): if (src_name not in unscoped_para_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob('__presever__/{:s}'.format(src_name), src_blobs[src_name]) logger.info( '{:s} preserved in workspace (unused)'.format(src_name))
def test_meta_net_def_net_runs(self): for param, value in self.params.items(): workspace.FeedBlob(param, value) extra_init_net = core.Net('extra_init') extra_init_net.ConstantFill('data', 'data', value=1.0) pem = pe.PredictorExportMeta( predict_net=self.predictor_export_meta.predict_net, parameters=self.predictor_export_meta.parameters, inputs=self.predictor_export_meta.inputs, outputs=self.predictor_export_meta.outputs, shapes=self.predictor_export_meta.shapes, extra_init_net=extra_init_net, net_type='dag', ) db_type = 'minidb' db_file = tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(db_type)) pe.save_to_db( db_type=db_type, db_destination=db_file.name, predictor_export_meta=pem) workspace.ResetWorkspace() meta_net_def = pe.load_from_db( db_type=db_type, filename=db_file.name, ) self.assertTrue("data" not in workspace.Blobs()) self.assertTrue("y" not in workspace.Blobs()) init_net = pred_utils.GetNet(meta_net_def, pc.PREDICT_INIT_NET_TYPE) # 0-fills externalblobs blobs and runs extra_init_net workspace.RunNetOnce(init_net) self.assertTrue("data" in workspace.Blobs()) self.assertTrue("y" in workspace.Blobs()) print(workspace.FetchBlob("data")) np.testing.assert_array_equal( workspace.FetchBlob("data"), np.ones(shape=(1, 5))) np.testing.assert_array_equal( workspace.FetchBlob("y"), np.zeros(shape=(1, 10))) # Load parameters from DB global_init_net = pred_utils.GetNet(meta_net_def, pc.GLOBAL_INIT_NET_TYPE) workspace.RunNetOnce(global_init_net) # Run the net with a reshaped input and verify we are # producing good numbers (with our custom implementation) workspace.FeedBlob("data", np.random.randn(2, 5).astype(np.float32)) predict_net = pred_utils.GetNet(meta_net_def, pc.PREDICT_NET_TYPE) self.assertEqual(predict_net.type, 'dag') workspace.RunNetOnce(predict_net) np.testing.assert_array_almost_equal( workspace.FetchBlob("y"), workspace.FetchBlob("data").dot(self.params["y_w"].T) + self.params["y_b"])
loss = np.zeros(total_iters) # Now, we will manually run the network for 200 iterations. data_array = [] drop1_array = [] fc2_array = [] for i in range(total_iters): #for i in range(1): workspace.RunNet(train_model.net) accuracy[i] = workspace.FetchBlob('accuracy') loss[i] = workspace.FetchBlob('loss') print('iter {0} loss = {1} '.format(i, loss[i])) print(' accuracy = {0} '.format(accuracy[i])) print("Current blobs in the workspace: {}".format(workspace.Blobs())) print("Workspace has blob 'data'? {}".format(workspace.HasBlob("data"))) #print("Fetched data:\n{}".format(workspace.FetchBlob("data"))) data_array.append(workspace.FetchBlob("data")) print('data_array', np.shape(data_array)) print("Workspace has blob 'drop1'? {}".format(workspace.HasBlob("drop1"))) #print("Fetched drop1:\n{}".format(workspace.FetchBlob("drop1"))) drop1_array.append(workspace.FetchBlob("drop1")) print('drop1_array', np.shape(drop1_array)) print("Workspace has blob 'fc2'? {}".format(workspace.HasBlob("fc2"))) #print("Fetched fc2:\n{}".format(workspace.FetchBlob("fc2"))) fc2_array.append(workspace.FetchBlob("fc2")) print('fc2_array', np.shape(fc2_array))
def from_caffe2(self, init_net, predict_net): """Construct Relay expression from caffe2 graph. Parameters ---------- init_net : protobuf object predict_net : protobuf object Returns ------- mod : tvm.relay.Module The module that optimizations will be performed on. params : dict A dict of name: tvm.nd.array pairs, used as pretrained weights """ from caffe2.python import workspace workspace.RunNetOnce(init_net) # Input input_name = predict_net.op[0].input[0] # Params self._params = {} used_blobs = set() for c2_op in predict_net.op: for i in c2_op.input: used_blobs.add(i) for blob in workspace.Blobs(): if blob in used_blobs and blob != input_name: self._params[blob] = _nd.array(workspace.FetchBlob(blob)) # Variables self._nodes = {} for blob in predict_net.external_input: if blob in self._params: self._nodes[blob] = new_var(blob, shape=self._params[blob].shape, dtype=self._params[blob].dtype) else: shape = self._shape[blob] if blob in self._shape else () if isinstance(self._dtype, dict) and blob in self._dtype: dtype = str(self._dtype[blob]) elif isinstance(self._dtype, str): dtype = self._dtype else: dtype = "float32" self._nodes[blob] = new_var(blob, shape=shape, dtype=dtype) # Ops for c2_op in predict_net.op: for blob in c2_op.output: self._ops[blob] = c2_op for c2_op in predict_net.op: self._process_op(c2_op) # Outputs out = [] for blob in predict_net.external_output: out.append(self._nodes[blob]) if len(out) > 1: outputs = _expr.Tuple(out) else: outputs = out[0] func = _expr.Function(ir_pass.free_vars(outputs), outputs) self._mod[self._mod.entry_func] = func return self._mod, self._params
final_image = transposed_image print("Shape of final_image: " + str(np.array(final_image).shape)) with open(MODEL_ROOT + "/init_net.pb", "rb") as f: init_net = f.read() with open(MODEL_ROOT + "/predict_net.pb", "rb") as f: predict_net = f.read() workspace.ResetWorkspace() blob_name = model_props[MODEL].blob_name workspace.FeedBlob(blob_name, final_image) print("The blobs in the workspace after FeedBlob: {}".format( workspace.Blobs())) # Create a predictor using the loaded model. p = workspace.Predictor(init_net, predict_net) start = time.time() for i in range(0, args.iterations): results = p.run([final_image]) end = time.time() if args.time: print("Wall time per iteration (s): {:0.4f}".format( (end - start) / args.iterations)) max_idx = np.argmax(results[0][0]) sum_probability = sum(results[0][0])
def InferTensorRunAndCompare(self, model): ''' Runs shape inference, and then the model to check that the inferred shapes agree with the actual ones ''' (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], ) # .. Create net workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net, True) workspace.RunNet(model.Proto().name) # ... and then check the shapes mismatch correct_shapes = {} correct_types = {} for b in workspace.Blobs(): arr = workspace.FetchBlob(b) correct_shapes[b] = arr.shape if type(arr) is np.ndarray: if arr.dtype == np.dtype('float32'): correct_types[b] = caffe2_pb2.TensorProto.FLOAT elif arr.dtype == np.dtype('int32'): correct_types[b] = caffe2_pb2.TensorProto.INT32 # BYTE # STRING elif arr.dtype == np.dtype('bool'): correct_types[b] = caffe2_pb2.TensorProto.BOOL elif arr.dtype == np.dtype('uint8'): correct_types[b] = caffe2_pb2.TensorProto.UINT8 elif arr.dtype == np.dtype('int8'): correct_types[b] = caffe2_pb2.TensorProto.INT8 elif arr.dtype == np.dtype('uint16'): correct_types[b] = caffe2_pb2.TensorProto.UINT16 elif arr.dtype == np.dtype('int16'): correct_types[b] = caffe2_pb2.TensorProto.INT16 elif arr.dtype == np.dtype('int64'): correct_types[b] = caffe2_pb2.TensorProto.INT64 elif arr.dtype == np.dtype('float16'): correct_types[b] = caffe2_pb2.TensorProto.FLOAT16 elif arr.dtype == np.dtype('float64'): correct_types[b] = caffe2_pb2.TensorProto.DOUBLE else: correct_types[b] = "unknown {}".format(arr.dtype) else: correct_types[b] = str(type(arr)) for b in correct_shapes: self.assertTrue( np.array_equal( np.array(shapes[b]).astype(np.int32), np.array(correct_shapes[b]).astype(np.int32)), "Shape {} mismatch: {} vs. {}".format(b, shapes[b], correct_shapes[b])) self.assertFalse( b not in types and b in correct_types, "Type for {} not defined".format(b), ) self.assertEqual( types[b], correct_types[b], "Type {} mismatch: {} vs. {}".format( b, types[b], correct_types[b], ))
from caffe2.python.cnn import CNNModelHelper import unittest import numpy as np m, k, n = (1, 28 * 28, 10) # [m][k] * [k][n] = [m][n] x = np.random.rand(m, k).astype(np.float32) - 0.5 # x = m*k 2D tensor workspace.ResetWorkspace() # clear workspace workspace.FeedBlob("x", x) # feed x as a blob model = ModelHelper(name="test_model") # create model model.Proto() # print model's protocol buffer before add operator brew.fc( model, "x", "y", k, n ) # fully connected NN, weight = k*n 2D tensor /// bias, y = m*n 2D tensor brew.softmax(model, "y", "z") model.Validate() model.Proto() # print model's protocol buffer after add operator workspace.RunNetOnce( model.param_init_net) # init [y_w(weight), y_b(bias) (randomize)] # weight is 2D array, bias is 1D array workspace.Blobs() # print workspace's blobs # workspace.FetchBlob("y_w") # workspace.FetchBlob("y_b") workspace.RunNetOnce(model.net) # y = workspace.FetchBlob("y") # z = workspace.FetchBlob("z")
def load_save(self, src_device_type, src_gpu_id, dst_device_type, dst_gpu_id): workspace.ResetWorkspace() dtypes = [np.float16, np.float32, np.float64, np.bool, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16] arrays = [np.random.permutation(6).reshape(2, 3).astype(T) for T in dtypes] src_device_option = core.DeviceOption( src_device_type, src_gpu_id) dst_device_option = core.DeviceOption( dst_device_type, dst_gpu_id) for i, arr in enumerate(arrays): self.assertTrue(workspace.FeedBlob(str(i), arr, src_device_option)) self.assertTrue(workspace.HasBlob(str(i))) try: # Saves the blobs to a local db. tmp_folder = tempfile.mkdtemp() op = core.CreateOperator( "Save", [str(i) for i in range(len(arrays))], [], absolute_path=1, db=os.path.join(tmp_folder, "db"), db_type=self._db_type) self.assertTrue(workspace.RunOperatorOnce(op)) # Reset the workspace so that anything we load is surely loaded # from the serialized proto. workspace.ResetWorkspace() self.assertEqual(len(workspace.Blobs()), 0) def _LoadTest(keep_device, device_type, gpu_id, blobs, loadAll): """A helper subfunction to test keep and not keep.""" op = core.CreateOperator( "Load", [], blobs, absolute_path=1, db=os.path.join(tmp_folder, "db"), db_type=self._db_type, device_option=dst_device_option, keep_device=keep_device, load_all=loadAll) self.assertTrue(workspace.RunOperatorOnce(op)) for i, arr in enumerate(arrays): self.assertTrue(workspace.HasBlob(str(i))) fetched = workspace.FetchBlob(str(i)) self.assertEqual(fetched.dtype, arr.dtype) np.testing.assert_array_equal( workspace.FetchBlob(str(i)), arr) proto = caffe2_pb2.BlobProto() proto.ParseFromString(workspace.SerializeBlob(str(i))) self.assertTrue(proto.HasField('tensor')) self.assertEqual(proto.tensor.device_detail.device_type, device_type) if device_type == caffe2_pb2.CUDA: self.assertEqual(proto.tensor.device_detail.cuda_gpu_id, gpu_id) blobs = [str(i) for i in range(len(arrays))] # Load using device option stored in the proto, i.e. # src_device_option _LoadTest(1, src_device_type, src_gpu_id, blobs, 0) # Load again, but this time load into dst_device_option. _LoadTest(0, dst_device_type, dst_gpu_id, blobs, 0) # Load back to the src_device_option to see if both paths are able # to reallocate memory. _LoadTest(1, src_device_type, src_gpu_id, blobs, 0) # Reset the workspace, and load directly into the dst_device_option. workspace.ResetWorkspace() _LoadTest(0, dst_device_type, dst_gpu_id, blobs, 0) # Test load all which loads all blobs in the db into the workspace. workspace.ResetWorkspace() _LoadTest(1, src_device_type, src_gpu_id, [], 1) # Load again making sure that overwrite functionality works. _LoadTest(1, src_device_type, src_gpu_id, [], 1) # Load again with different device. _LoadTest(0, dst_device_type, dst_gpu_id, [], 1) workspace.ResetWorkspace() _LoadTest(0, dst_device_type, dst_gpu_id, [], 1) finally: # clean up temp folder. try: shutil.rmtree(tmp_folder) except OSError as e: if e.errno != errno.ENOENT: raise
def main(): workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) args = parse_args() logger.info('Called with args:') logger.info(args) # with open(args.init_net_path) as f: # init_net = f.read() # with open(args.predict_net_path) as f: # predict_net = f.read() # p = workspace.Predictor(init_net, predict_net) # img = np.zeros((1,3,256,256), dtype=np.float32) # workspace.FeedBlob('data', img) # results = p.run({'data': img}) init_def = caffe2_pb2.NetDef() with open(args.init_net_path, 'r') as f: init_def.ParseFromString(f.read()) # init_def.device_option.CopyFrom(device_options) net_def = caffe2_pb2.NetDef() with open(args.predict_net_path, 'r') as f: net_def.ParseFromString(f.read()) # net_def.device_option.CopyFrom(device_options) # model = model_helper.ModelHelper(arg_scope=arg_scope) # model = cnn.CNNModelHelper() model = detector.DetectionModelHelper(name=net_def.name, train=True, num_classes=1000, init_params=True) predict_net = core.Net(net_def) init_net = core.Net(init_def) model.param_init_net.AppendNet(init_net) model.net.AppendNet(predict_net) model.params.extend([ core.BlobReference(x) for x in predict_net.Proto().external_input if x != 'data' ]) # add_training_operators(model, 'pred', 'label') blob_names = ['data', 'label'] for gpu_id in range(1): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net, overwrite=True) out_file_name = os.path.join(args.out_dir, net_def.name + '.pkl') net_utils.save_model_to_weights_file(out_file_name, model) # workspace.CreateNet(init_def) # workspace.CreateNet(net_def) # workspace.RunNet(net_def) # workspace.RunNet(init_def) print(type(init_def)) print(net_def.name) print(workspace.blobs) print(len(workspace.blobs)) print(workspace.Blobs())