class YoloAir_Network(VariationalAutoencoder): n_backbone_features = Param() n_objects_per_cell = Param() anchor_box = Param() object_shape = Param() conv_object_layer = Param() build_obj_kl = Param() backbone = None object_layer = None object_renderer = None obj_kl = None _eval_funcs = None def __init__(self, env, updater, scope=None, **kwargs): super(YoloAir_Network, self).__init__(env, updater, scope=scope, **kwargs) self.B = self.n_objects_per_cell @property def eval_funcs(self): if "annotations" in self._tensors: if self._eval_funcs is None: ap_iou_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] eval_funcs = { "AP_at_point_{}".format(int(10 * v)): AP(v) for v in ap_iou_values } eval_funcs["AP"] = AP(ap_iou_values) self._eval_funcs = eval_funcs return self._eval_funcs else: return {} def build_representation(self): # --- build graph --- self.maybe_build_subnet("backbone") assert isinstance(self.backbone, GridConvNet) inp = self._tensors["inp"] backbone_output = self.backbone(inp, self.n_backbone_features, self.is_training) n_grid_cells = self.backbone.layer_info[-1]['n_grid_cells'] grid_cell_size = self.backbone.layer_info[-1]['grid_cell_size'] self.H, self.W = [int(i) for i in n_grid_cells] self.HWB = self.H * self.W * self.B self.pixels_per_cell = tuple(int(i) for i in grid_cell_size) if self.object_layer is None: if self.conv_object_layer: self.object_layer = ConvGridObjectLayer( pixels_per_cell=self.pixels_per_cell, scope="objects") else: self.object_layer = GridObjectLayer( pixels_per_cell=self.pixels_per_cell, scope="objects") if self.object_renderer is None: self.object_renderer = ObjectRenderer(self.anchor_box, self.object_shape, scope="renderer") objects = self.object_layer(self.inp, backbone_output, self.is_training) self._tensors.update(objects) kl_tensors = self.object_layer.compute_kl(objects) self._tensors.update(kl_tensors) if self.obj_kl is None: self.obj_kl = self.build_obj_kl() self._tensors['obj_kl'] = self.obj_kl(self._tensors) render_tensors = self.object_renderer(objects, self._tensors["background"], self.is_training) self._tensors.update(render_tensors) # --- specify values to record --- obj = self._tensors["obj"] self.record_tensors( batch_size=self.batch_size, float_is_training=self.float_is_training, cell_y=self._tensors["cell_y"], cell_x=self._tensors["cell_x"], height=self._tensors["height"], width=self._tensors["width"], z=self._tensors["z"], cell_y_std=self._tensors["cell_y_logit_std"], cell_x_std=self._tensors["cell_x_logit_std"], height_std=self._tensors["height_logit_std"], width_std=self._tensors["width_logit_std"], z_std=self._tensors["z_logit_std"], obj=obj, attr=self._tensors["attr"], pred_n_objects=self._tensors["pred_n_objects"], ) # --- losses --- if self.train_reconstruction: output = self._tensors['output'] inp = self._tensors['inp'] self._tensors['per_pixel_reconstruction_loss'] = xent_loss( pred=output, label=inp) self.losses['reconstruction'] = ( self.reconstruction_weight * tf_mean_sum(self._tensors['per_pixel_reconstruction_loss'])) if self.train_kl: self.losses.update( obj_kl=self.kl_weight * tf_mean_sum(self._tensors["obj_kl"]), cell_y_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["cell_y_kl"]), cell_x_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["cell_x_kl"]), height_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["height_kl"]), width_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["width_kl"]), z_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["z_kl"]), attr_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["attr_kl"]), ) # --- other evaluation metrics --- if "n_annotations" in self._tensors: count_1norm = tf.to_float( tf.abs( tf.to_int32(self._tensors["pred_n_objects_hard"]) - self._tensors["n_valid_annotations"])) count_1norm_relative = (count_1norm / tf.maximum( tf.cast(self._tensors["n_valid_annotations"], tf.float32), 1e-6)) self.record_tensors( count_1norm_relative=count_1norm_relative, count_1norm=count_1norm, count_error=count_1norm > 0.5, )
def build_representation(self): # --- build graph --- self.maybe_build_subnet("backbone") assert isinstance(self.backbone, GridConvNet) inp = self._tensors["inp"] backbone_output = self.backbone(inp, self.n_backbone_features, self.is_training) n_grid_cells = self.backbone.layer_info[-1]['n_grid_cells'] grid_cell_size = self.backbone.layer_info[-1]['grid_cell_size'] self.H, self.W = [int(i) for i in n_grid_cells] self.HWB = self.H * self.W * self.B self.pixels_per_cell = tuple(int(i) for i in grid_cell_size) if self.object_layer is None: if self.conv_object_layer: self.object_layer = ConvGridObjectLayer( pixels_per_cell=self.pixels_per_cell, scope="objects") else: self.object_layer = GridObjectLayer( pixels_per_cell=self.pixels_per_cell, scope="objects") if self.object_renderer is None: self.object_renderer = ObjectRenderer(self.anchor_box, self.object_shape, scope="renderer") objects = self.object_layer(self.inp, backbone_output, self.is_training) self._tensors.update(objects) kl_tensors = self.object_layer.compute_kl(objects) self._tensors.update(kl_tensors) if self.obj_kl is None: self.obj_kl = self.build_obj_kl() self._tensors['obj_kl'] = self.obj_kl(self._tensors) render_tensors = self.object_renderer(objects, self._tensors["background"], self.is_training) self._tensors.update(render_tensors) # --- specify values to record --- obj = self._tensors["obj"] self.record_tensors( batch_size=self.batch_size, float_is_training=self.float_is_training, cell_y=self._tensors["cell_y"], cell_x=self._tensors["cell_x"], height=self._tensors["height"], width=self._tensors["width"], z=self._tensors["z"], cell_y_std=self._tensors["cell_y_logit_std"], cell_x_std=self._tensors["cell_x_logit_std"], height_std=self._tensors["height_logit_std"], width_std=self._tensors["width_logit_std"], z_std=self._tensors["z_logit_std"], obj=obj, attr=self._tensors["attr"], pred_n_objects=self._tensors["pred_n_objects"], ) # --- losses --- if self.train_reconstruction: output = self._tensors['output'] inp = self._tensors['inp'] self._tensors['per_pixel_reconstruction_loss'] = xent_loss( pred=output, label=inp) self.losses['reconstruction'] = ( self.reconstruction_weight * tf_mean_sum(self._tensors['per_pixel_reconstruction_loss'])) if self.train_kl: self.losses.update( obj_kl=self.kl_weight * tf_mean_sum(self._tensors["obj_kl"]), cell_y_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["cell_y_kl"]), cell_x_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["cell_x_kl"]), height_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["height_kl"]), width_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["width_kl"]), z_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["z_kl"]), attr_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["attr_kl"]), ) # --- other evaluation metrics --- if "n_annotations" in self._tensors: count_1norm = tf.to_float( tf.abs( tf.to_int32(self._tensors["pred_n_objects_hard"]) - self._tensors["n_valid_annotations"])) count_1norm_relative = (count_1norm / tf.maximum( tf.cast(self._tensors["n_valid_annotations"], tf.float32), 1e-6)) self.record_tensors( count_1norm_relative=count_1norm_relative, count_1norm=count_1norm, count_error=count_1norm > 0.5, )
def build_representation(self): # --- build graph --- self.maybe_build_subnet("backbone") assert isinstance(self.backbone, GridConvNet) inp = self._tensors["inp"] backbone_output, n_grid_cells, grid_cell_size = self.backbone( inp, self.B * self.n_backbone_features, self.is_training) self.H, self.W = [int(i) for i in n_grid_cells] self.HWB = self.H * self.W * self.B self.pixels_per_cell = tuple(int(i) for i in grid_cell_size) backbone_output = tf.reshape( backbone_output, (-1, self.H, self.W, self.B, self.n_backbone_features)) if self.object_layer is None: self.object_layer = GridObjectLayer(self.pixels_per_cell, scope="objects") if self.object_renderer is None: self.object_renderer = ObjectRenderer(scope="renderer") objects = self.object_layer(self.inp, backbone_output, self.is_training) self._tensors.update(objects) kl_tensors = self.object_layer.compute_kl(objects) self._tensors.update(kl_tensors) render_tensors = self.object_renderer(objects, self._tensors["background"], self.is_training) self._tensors.update(render_tensors) # --- specify values to record --- obj = self._tensors["obj"] pred_n_objects = self._tensors["pred_n_objects"] self.record_tensors( batch_size=self.batch_size, float_is_training=self.float_is_training, cell_y=self._tensors["cell_y"], cell_x=self._tensors["cell_x"], height=self._tensors["height"], width=self._tensors["width"], z=self._tensors["z"], cell_y_std=self._tensors["cell_y_logit_dist"].scale, cell_x_std=self._tensors["cell_x_logit_dist"].scale, height_std=self._tensors["height_logit_dist"].scale, width_std=self._tensors["width_logit_dist"].scale, z_std=self._tensors["z_logit_dist"].scale, n_objects=pred_n_objects, obj=obj, on_cell_y_avg=tf.reduce_sum(self._tensors["cell_y"] * obj, axis=(1, 2, 3, 4)) / pred_n_objects, on_cell_x_avg=tf.reduce_sum(self._tensors["cell_x"] * obj, axis=(1, 2, 3, 4)) / pred_n_objects, on_height_avg=tf.reduce_sum(self._tensors["height"] * obj, axis=(1, 2, 3, 4)) / pred_n_objects, on_width_avg=tf.reduce_sum(self._tensors["width"] * obj, axis=(1, 2, 3, 4)) / pred_n_objects, on_z_avg=tf.reduce_sum(self._tensors["z"] * obj, axis=(1, 2, 3, 4)) / pred_n_objects, attr=self._tensors["attr"], ) # --- losses --- if self.train_reconstruction: output = self._tensors['output'] inp = self._tensors['inp'] self._tensors['per_pixel_reconstruction_loss'] = xent_loss( pred=output, label=inp) self.losses['reconstruction'] = ( self.reconstruction_weight * tf_mean_sum(self._tensors['per_pixel_reconstruction_loss'])) if self.train_kl: self.losses.update( obj_kl=self.kl_weight * tf_mean_sum(self._tensors["obj_kl"]), cell_y_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["cell_y_kl"]), cell_x_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["cell_x_kl"]), height_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["height_kl"]), width_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["width_kl"]), z_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["z_kl"]), attr_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["attr_kl"]), ) # --- other evaluation metrics --- if "n_annotations" in self._tensors: count_1norm = tf.to_float( tf.abs( tf.to_int32(self._tensors["pred_n_objects_hard"]) - self._tensors["n_valid_annotations"])) self.record_tensors( count_1norm=count_1norm, count_error=count_1norm > 0.5, )