def __init__(self, dataset_type, dataset_path, real_path, mesh_path, mesh_info, object_id, batch_size, img_res=(224, 224, 3), is_testing=False): self.data_type = dataset_type self.img_res = img_res self.dataset_path = dataset_path self.real_path = [ os.path.join(real_path, x) for x in os.listdir(real_path) ] self.batch_size = batch_size self.is_testing = is_testing self.ply_path = mesh_path self.obj_id = int(object_id) # annotate self.train_info = os.path.join(self.dataset_path, 'annotations', 'instances_' + 'train' + '.json') self.val_info = os.path.join(self.dataset_path, 'annotations', 'instances_' + 'val' + '.json') # self.mesh_info = os.path.join(self.dataset_path, 'annotations', 'models_info' + '.yml') self.mesh_info = mesh_info with open(self.train_info, 'r') as js: data = json.load(js) image_ann = data["images"] anno_ann = data["annotations"] self.image_ids = [] self.Anns = [] # init renderer # < 11 ms; self.ren = bop_renderer.Renderer() self.ren.init(640, 480) self.ren.add_object(self.obj_id, self.ply_path) stream = open(self.mesh_info, 'r') for key, value in yaml.load(stream).items(): # for key, value in yaml.load(open(self.mesh_info)).items(): if int(key) == self.obj_id + 1: self.model_dia = value['diameter'] for ann in anno_ann: y_mean = (ann['bbox'][0] + ann['bbox'][2] * 0.5) x_mean = (ann['bbox'][1] + ann['bbox'][3] * 0.5) max_side = np.max(ann['bbox'][2:]) x_min = int(x_mean - max_side * 0.75) x_max = int(x_mean + max_side * 0.75) y_min = int(y_mean - max_side * 0.75) y_max = int(y_mean + max_side * 0.75) if ann['category_id'] != 2 or ann[ 'feature_visibility'] < 0.5 or x_min < 0 or x_max > 639 or y_min < 0 or y_max > 479: continue else: self.Anns.append(ann) # for img_info in image_ann: # print(img_info) # if img_info['id'] == ann['id']: # self.image_ids.append(img_info['file_name']) # print(img_info['file_name']) template_name = '00000000000' id = str(ann['image_id']) # print(ann['id']) name = template_name[:-len(id)] + id + '_rgb.png' img_path = os.path.join(self.dataset_path, 'images', self.data_type, name) # print(name) self.image_ids.append(img_path) self.fx = image_ann[0]["fx"] self.fy = image_ann[0]["fy"] self.cx = image_ann[0]["cx"] self.cy = image_ann[0]["cy"] #self.image_idxs = range(len(self.image_ids)) c = list(zip(self.Anns, self.image_ids)) #, self.image_idxs)) np.random.shuffle(c) self.Anns, self.image_ids = zip(*c) self.img_seq = iaa.Sequential( [ # blur iaa.SomeOf((0, 2), [ iaa.GaussianBlur((0.0, 2.0)), iaa.AverageBlur(k=(3, 7)), iaa.MedianBlur(k=(3, 7)), iaa.BilateralBlur(d=(1, 7)), iaa.MotionBlur(k=(3, 7)) ]), # color iaa.SomeOf( (0, 2), [ # iaa.WithColorspace(), iaa.AddToHueAndSaturation((-15, 15)), # iaa.ChangeColorspace(to_colorspace[], alpha=0.5), iaa.Grayscale(alpha=(0.0, 0.2)) ]), # brightness iaa.OneOf([ iaa.Sequential([ iaa.Add((-10, 10), per_channel=0.5), iaa.Multiply((0.75, 1.25), per_channel=0.5) ]), iaa.Add((-10, 10), per_channel=0.5), iaa.Multiply((0.75, 1.25), per_channel=0.5), iaa.FrequencyNoiseAlpha(exponent=(-4, 0), first=iaa.Multiply( (0.75, 1.25), per_channel=0.5), second=iaa.LinearContrast( (0.7, 1.3), per_channel=0.5)) ]), # contrast iaa.SomeOf((0, 2), [ iaa.GammaContrast((0.75, 1.25), per_channel=0.5), iaa.SigmoidContrast( gain=(0, 10), cutoff=(0.25, 0.75), per_channel=0.5), iaa.LogContrast(gain=(0.75, 1), per_channel=0.5), iaa.LinearContrast(alpha=(0.7, 1.3), per_channel=0.5) ]), ], random_order=True) self.n_batches = int(np.floor(len(self.image_ids) / self.batch_size)) self.on_epoch_end() self.dataset_length = len(self.image_ids)
if __name__ == "__main__": data_path = '/home/stefan/data/renderings/CIT_render_250/patches' mesh_path = '/home/stefan/data/Meshes/CIT_color/' target = '/home/stefan/data/train_data/CIT_PBR/' visu = True resX = 640 resY = 480 fx = 623.1298104626079 # blender calc fy = 617.1590544390115 # blender calc cx = 320.0 cy = 240.0 K = [fx, 0.0, cx, 0.0, fy, cy, 0.0, 0.0, 1.0] ren = bop_renderer.Renderer() ren.init(resX, resY) mesh_id = 1 light_pose = [0.0, 0.0, 0.0] light_color = [1.0, 0.0, 0.0] light_ambient_weight = 1.0 light_diffuse_weight = 1.0 light_spec_weight = 0.0 light_spec_shine = 1.0 ren.set_light(light_pose, light_color, light_ambient_weight, light_diffuse_weight, light_spec_weight, light_spec_shine) categories = [] for mesh_now in os.listdir(mesh_path): mesh_path_now = os.path.join(mesh_path, mesh_now) if mesh_now[-4:] != '.ply':
def __init__(self, width, height): """See base class.""" super(RendererCpp, self).__init__(width, height) self.renderer = bop_renderer.Renderer() self.renderer.init(width, height) self._set_light()
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Model folder. model_dir = os.path.join(config.TF_MODELS_PATH, FLAGS.model) # Update flags with parameters loaded from the model folder. common.update_flags(os.path.join(model_dir, common.PARAMS_FILENAME)) # Print the flag values. common.print_flags() # Folder from which the latest model checkpoint will be loaded. checkpoint_dir = os.path.join(model_dir, 'train') # Folder for the inference output. infer_dir = os.path.join(model_dir, 'infer') tf.gfile.MakeDirs(infer_dir) # Folder for the visualization output. vis_dir = os.path.join(model_dir, 'vis') tf.gfile.MakeDirs(vis_dir) # TFRecord files used for training. tfrecord_names = FLAGS.infer_tfrecord_names if not isinstance(FLAGS.infer_tfrecord_names, list): tfrecord_names = [FLAGS.infer_tfrecord_names] # Stride of the final output. if FLAGS.upsample_logits: # The stride is 1 if the logits are upsampled to the input resolution. output_stride = 1 else: assert (len(FLAGS.decoder_output_stride) == 1) output_stride = FLAGS.decoder_output_stride[0] with tf.Graph().as_default(): return_gt_orig = np.any([ FLAGS.task_type == common.LOCALIZATION, FLAGS.vis_gt_poses]) return_gt_maps = np.any([ FLAGS.vis_pred_obj_labels, FLAGS.vis_pred_obj_confs, FLAGS.vis_pred_frag_fields]) # Dataset provider. dataset = datagen.Dataset( dataset_name=FLAGS.dataset, tfrecord_names=tfrecord_names, model_dir=model_dir, model_variant=FLAGS.model_variant, batch_size=1, max_height_before_crop=FLAGS.infer_max_height_before_crop, crop_size=list(map(int, FLAGS.infer_crop_size)), num_frags=FLAGS.num_frags, min_visib_fract=None, gt_knn_frags=1, output_stride=output_stride, is_training=False, return_gt_orig=return_gt_orig, return_gt_maps=return_gt_maps, should_shuffle=False, should_repeat=False, prepare_for_projection=FLAGS.project_to_surface, data_augmentations=None) # Initialize a renderer for visualization. renderer = None if FLAGS.vis_gt_poses or FLAGS.vis_pred_poses: tf.logging.info('Initializing renderer for visualization...') renderer = bop_renderer.Renderer() renderer.init(dataset.crop_size[0], dataset.crop_size[1]) model_type_vis = 'eval' dp_model = dataset_params.get_model_params( config.BOP_PATH, dataset.dataset_name, model_type=model_type_vis) for obj_id in dp_model['obj_ids']: path = dp_model['model_tpath'].format(obj_id=obj_id) renderer.add_object(obj_id, path) tf.logging.info('Renderer initialized.') # Inputs. samples = dataset.get_one_shot_iterator().get_next() # A map from output type to the number of associated channels. outputs_to_num_channels = common.get_outputs_to_num_channels( dataset.num_objs, dataset.model_store.num_frags) # Options of the neural network model. model_options = common.ModelOptions( outputs_to_num_channels=outputs_to_num_channels, crop_size=list(map(int, FLAGS.infer_crop_size)), atrous_rates=FLAGS.atrous_rates, encoder_output_stride=FLAGS.encoder_output_stride) # Construct the inference graph. predictions = model.predict( images=samples[common.IMAGE], model_options=model_options, upsample_logits=FLAGS.upsample_logits, image_pyramid=FLAGS.image_pyramid, num_objs=dataset.num_objs, num_frags=dataset.num_frags, frag_cls_agnostic=FLAGS.frag_cls_agnostic, frag_loc_agnostic=FLAGS.frag_loc_agnostic) # Global step. tf.train.get_or_create_global_step() # Get path to the model checkpoint. if FLAGS.checkpoint_name is None: checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir) else: checkpoint_path = os.path.join(checkpoint_dir, FLAGS.checkpoint_name) time_str = time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()) tf.logging.info('Starting inference at: {}'.format(time_str)) tf.logging.info('Inference with model: {}'.format(checkpoint_path)) # Scaffold for initialization. scaffold = tf.train.Scaffold( init_op=tf.global_variables_initializer(), saver=tf.train.Saver(var_list=misc.get_variable_dict())) # TensorFlow configuration. if FLAGS.cpu_only: tf_config = tf.ConfigProto(device_count={'GPU': 0}) else: tf_config = tf.ConfigProto() # tf_config.gpu_options.allow_growth = True # Only necessary GPU memory. tf_config.gpu_options.allow_growth = False # Nodes that can use multiple threads to parallelize their execution will # schedule the individual pieces into this pool. tf_config.intra_op_parallelism_threads = 10 # All ready nodes are scheduled in this pool. tf_config.inter_op_parallelism_threads = 10 poses_all = [] first_im_poses_num = 0 session_creator = tf.train.ChiefSessionCreator( config=tf_config, scaffold=scaffold, master=FLAGS.master, checkpoint_filename_with_path=checkpoint_path) with tf.train.MonitoredSession( session_creator=session_creator, hooks=None) as sess: im_ind = 0 while not sess.should_stop(): # Estimate object poses for the current image. poses, run_times = process_image( sess=sess, samples=samples, predictions=predictions, im_ind=im_ind, crop_size=dataset.crop_size, output_scale=(1.0 / output_stride), model_store=dataset.model_store, renderer=renderer, task_type=FLAGS.task_type, infer_name=FLAGS.infer_name, infer_dir=infer_dir, vis_dir=vis_dir) # Note that the first image takes longer time (because of TF init). tf.logging.info( 'Image: {}, prediction: {:.3f}, establish_corr: {:.3f}, ' 'fitting: {:.3f}, total time: {:.3f}'.format( im_ind, run_times['prediction'], run_times['establish_corr'], run_times['fitting'], run_times['total'])) poses_all += poses if im_ind == 0: first_im_poses_num = len(poses) im_ind += 1 # Set the time of pose estimates from the first image to the average time. # Tensorflow takes a long time on the first image (because of init). time_avg = 0.0 for pose in poses_all: time_avg += pose['time'] if len(poses_all) > 0: time_avg /= float((len(poses_all))) for i in range(first_im_poses_num): poses_all[i]['time'] = time_avg # Save the estimated poses in the BOP format: # https://bop.felk.cvut.cz/challenges/bop-challenge-2020/#formatofresults if FLAGS.save_estimates: suffix = '' if FLAGS.infer_name is not None: suffix = '_{}'.format(FLAGS.infer_name) poses_path = os.path.join( infer_dir, 'estimated-poses{}.csv'.format(suffix)) tf.logging.info('Saving estimated poses to: {}'.format(poses_path)) inout.save_bop_results(poses_path, poses_all, version='bop19') time_str = time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()) tf.logging.info('Finished inference at: {}'.format(time_str))