Esempio n. 1
0
class YoloAir_Network(VariationalAutoencoder):
    n_backbone_features = Param()
    n_objects_per_cell = Param()
    anchor_box = Param()
    object_shape = Param()
    conv_object_layer = Param()
    build_obj_kl = Param()

    backbone = None
    object_layer = None
    object_renderer = None
    obj_kl = None

    _eval_funcs = None

    def __init__(self, env, updater, scope=None, **kwargs):
        super(YoloAir_Network, self).__init__(env,
                                              updater,
                                              scope=scope,
                                              **kwargs)
        self.B = self.n_objects_per_cell

    @property
    def eval_funcs(self):
        if "annotations" in self._tensors:
            if self._eval_funcs is None:
                ap_iou_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
                eval_funcs = {
                    "AP_at_point_{}".format(int(10 * v)): AP(v)
                    for v in ap_iou_values
                }
                eval_funcs["AP"] = AP(ap_iou_values)
                self._eval_funcs = eval_funcs
            return self._eval_funcs
        else:
            return {}

    def build_representation(self):
        # --- build graph ---

        self.maybe_build_subnet("backbone")
        assert isinstance(self.backbone, GridConvNet)

        inp = self._tensors["inp"]
        backbone_output = self.backbone(inp, self.n_backbone_features,
                                        self.is_training)
        n_grid_cells = self.backbone.layer_info[-1]['n_grid_cells']
        grid_cell_size = self.backbone.layer_info[-1]['grid_cell_size']

        self.H, self.W = [int(i) for i in n_grid_cells]
        self.HWB = self.H * self.W * self.B
        self.pixels_per_cell = tuple(int(i) for i in grid_cell_size)

        if self.object_layer is None:
            if self.conv_object_layer:
                self.object_layer = ConvGridObjectLayer(
                    pixels_per_cell=self.pixels_per_cell, scope="objects")
            else:
                self.object_layer = GridObjectLayer(
                    pixels_per_cell=self.pixels_per_cell, scope="objects")

        if self.object_renderer is None:
            self.object_renderer = ObjectRenderer(self.anchor_box,
                                                  self.object_shape,
                                                  scope="renderer")

        objects = self.object_layer(self.inp, backbone_output,
                                    self.is_training)
        self._tensors.update(objects)

        kl_tensors = self.object_layer.compute_kl(objects)
        self._tensors.update(kl_tensors)

        if self.obj_kl is None:
            self.obj_kl = self.build_obj_kl()

        self._tensors['obj_kl'] = self.obj_kl(self._tensors)

        render_tensors = self.object_renderer(objects,
                                              self._tensors["background"],
                                              self.is_training)
        self._tensors.update(render_tensors)

        # --- specify values to record ---

        obj = self._tensors["obj"]

        self.record_tensors(
            batch_size=self.batch_size,
            float_is_training=self.float_is_training,
            cell_y=self._tensors["cell_y"],
            cell_x=self._tensors["cell_x"],
            height=self._tensors["height"],
            width=self._tensors["width"],
            z=self._tensors["z"],
            cell_y_std=self._tensors["cell_y_logit_std"],
            cell_x_std=self._tensors["cell_x_logit_std"],
            height_std=self._tensors["height_logit_std"],
            width_std=self._tensors["width_logit_std"],
            z_std=self._tensors["z_logit_std"],
            obj=obj,
            attr=self._tensors["attr"],
            pred_n_objects=self._tensors["pred_n_objects"],
        )

        # --- losses ---

        if self.train_reconstruction:
            output = self._tensors['output']
            inp = self._tensors['inp']
            self._tensors['per_pixel_reconstruction_loss'] = xent_loss(
                pred=output, label=inp)
            self.losses['reconstruction'] = (
                self.reconstruction_weight *
                tf_mean_sum(self._tensors['per_pixel_reconstruction_loss']))

        if self.train_kl:
            self.losses.update(
                obj_kl=self.kl_weight * tf_mean_sum(self._tensors["obj_kl"]),
                cell_y_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["cell_y_kl"]),
                cell_x_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["cell_x_kl"]),
                height_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["height_kl"]),
                width_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["width_kl"]),
                z_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["z_kl"]),
                attr_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["attr_kl"]),
            )

        # --- other evaluation metrics ---

        if "n_annotations" in self._tensors:
            count_1norm = tf.to_float(
                tf.abs(
                    tf.to_int32(self._tensors["pred_n_objects_hard"]) -
                    self._tensors["n_valid_annotations"]))

            count_1norm_relative = (count_1norm / tf.maximum(
                tf.cast(self._tensors["n_valid_annotations"], tf.float32),
                1e-6))

            self.record_tensors(
                count_1norm_relative=count_1norm_relative,
                count_1norm=count_1norm,
                count_error=count_1norm > 0.5,
            )
Esempio n. 2
0
    def build_representation(self):
        # --- build graph ---

        self.maybe_build_subnet("backbone")
        assert isinstance(self.backbone, GridConvNet)

        inp = self._tensors["inp"]
        backbone_output = self.backbone(inp, self.n_backbone_features,
                                        self.is_training)
        n_grid_cells = self.backbone.layer_info[-1]['n_grid_cells']
        grid_cell_size = self.backbone.layer_info[-1]['grid_cell_size']

        self.H, self.W = [int(i) for i in n_grid_cells]
        self.HWB = self.H * self.W * self.B
        self.pixels_per_cell = tuple(int(i) for i in grid_cell_size)

        if self.object_layer is None:
            if self.conv_object_layer:
                self.object_layer = ConvGridObjectLayer(
                    pixels_per_cell=self.pixels_per_cell, scope="objects")
            else:
                self.object_layer = GridObjectLayer(
                    pixels_per_cell=self.pixels_per_cell, scope="objects")

        if self.object_renderer is None:
            self.object_renderer = ObjectRenderer(self.anchor_box,
                                                  self.object_shape,
                                                  scope="renderer")

        objects = self.object_layer(self.inp, backbone_output,
                                    self.is_training)
        self._tensors.update(objects)

        kl_tensors = self.object_layer.compute_kl(objects)
        self._tensors.update(kl_tensors)

        if self.obj_kl is None:
            self.obj_kl = self.build_obj_kl()

        self._tensors['obj_kl'] = self.obj_kl(self._tensors)

        render_tensors = self.object_renderer(objects,
                                              self._tensors["background"],
                                              self.is_training)
        self._tensors.update(render_tensors)

        # --- specify values to record ---

        obj = self._tensors["obj"]

        self.record_tensors(
            batch_size=self.batch_size,
            float_is_training=self.float_is_training,
            cell_y=self._tensors["cell_y"],
            cell_x=self._tensors["cell_x"],
            height=self._tensors["height"],
            width=self._tensors["width"],
            z=self._tensors["z"],
            cell_y_std=self._tensors["cell_y_logit_std"],
            cell_x_std=self._tensors["cell_x_logit_std"],
            height_std=self._tensors["height_logit_std"],
            width_std=self._tensors["width_logit_std"],
            z_std=self._tensors["z_logit_std"],
            obj=obj,
            attr=self._tensors["attr"],
            pred_n_objects=self._tensors["pred_n_objects"],
        )

        # --- losses ---

        if self.train_reconstruction:
            output = self._tensors['output']
            inp = self._tensors['inp']
            self._tensors['per_pixel_reconstruction_loss'] = xent_loss(
                pred=output, label=inp)
            self.losses['reconstruction'] = (
                self.reconstruction_weight *
                tf_mean_sum(self._tensors['per_pixel_reconstruction_loss']))

        if self.train_kl:
            self.losses.update(
                obj_kl=self.kl_weight * tf_mean_sum(self._tensors["obj_kl"]),
                cell_y_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["cell_y_kl"]),
                cell_x_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["cell_x_kl"]),
                height_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["height_kl"]),
                width_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["width_kl"]),
                z_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["z_kl"]),
                attr_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["attr_kl"]),
            )

        # --- other evaluation metrics ---

        if "n_annotations" in self._tensors:
            count_1norm = tf.to_float(
                tf.abs(
                    tf.to_int32(self._tensors["pred_n_objects_hard"]) -
                    self._tensors["n_valid_annotations"]))

            count_1norm_relative = (count_1norm / tf.maximum(
                tf.cast(self._tensors["n_valid_annotations"], tf.float32),
                1e-6))

            self.record_tensors(
                count_1norm_relative=count_1norm_relative,
                count_1norm=count_1norm,
                count_error=count_1norm > 0.5,
            )
Esempio n. 3
0
    def build_representation(self):
        # --- build graph ---

        self.maybe_build_subnet("backbone")
        assert isinstance(self.backbone, GridConvNet)

        inp = self._tensors["inp"]
        backbone_output, n_grid_cells, grid_cell_size = self.backbone(
            inp, self.B * self.n_backbone_features, self.is_training)

        self.H, self.W = [int(i) for i in n_grid_cells]
        self.HWB = self.H * self.W * self.B
        self.pixels_per_cell = tuple(int(i) for i in grid_cell_size)

        backbone_output = tf.reshape(
            backbone_output,
            (-1, self.H, self.W, self.B, self.n_backbone_features))

        if self.object_layer is None:
            self.object_layer = GridObjectLayer(self.pixels_per_cell,
                                                scope="objects")

        if self.object_renderer is None:
            self.object_renderer = ObjectRenderer(scope="renderer")

        objects = self.object_layer(self.inp, backbone_output,
                                    self.is_training)
        self._tensors.update(objects)

        kl_tensors = self.object_layer.compute_kl(objects)
        self._tensors.update(kl_tensors)

        render_tensors = self.object_renderer(objects,
                                              self._tensors["background"],
                                              self.is_training)
        self._tensors.update(render_tensors)

        # --- specify values to record ---

        obj = self._tensors["obj"]
        pred_n_objects = self._tensors["pred_n_objects"]

        self.record_tensors(
            batch_size=self.batch_size,
            float_is_training=self.float_is_training,
            cell_y=self._tensors["cell_y"],
            cell_x=self._tensors["cell_x"],
            height=self._tensors["height"],
            width=self._tensors["width"],
            z=self._tensors["z"],
            cell_y_std=self._tensors["cell_y_logit_dist"].scale,
            cell_x_std=self._tensors["cell_x_logit_dist"].scale,
            height_std=self._tensors["height_logit_dist"].scale,
            width_std=self._tensors["width_logit_dist"].scale,
            z_std=self._tensors["z_logit_dist"].scale,
            n_objects=pred_n_objects,
            obj=obj,
            on_cell_y_avg=tf.reduce_sum(self._tensors["cell_y"] * obj,
                                        axis=(1, 2, 3, 4)) / pred_n_objects,
            on_cell_x_avg=tf.reduce_sum(self._tensors["cell_x"] * obj,
                                        axis=(1, 2, 3, 4)) / pred_n_objects,
            on_height_avg=tf.reduce_sum(self._tensors["height"] * obj,
                                        axis=(1, 2, 3, 4)) / pred_n_objects,
            on_width_avg=tf.reduce_sum(self._tensors["width"] * obj,
                                       axis=(1, 2, 3, 4)) / pred_n_objects,
            on_z_avg=tf.reduce_sum(self._tensors["z"] * obj, axis=(1, 2, 3, 4))
            / pred_n_objects,
            attr=self._tensors["attr"],
        )

        # --- losses ---

        if self.train_reconstruction:
            output = self._tensors['output']
            inp = self._tensors['inp']
            self._tensors['per_pixel_reconstruction_loss'] = xent_loss(
                pred=output, label=inp)
            self.losses['reconstruction'] = (
                self.reconstruction_weight *
                tf_mean_sum(self._tensors['per_pixel_reconstruction_loss']))

        if self.train_kl:
            self.losses.update(
                obj_kl=self.kl_weight * tf_mean_sum(self._tensors["obj_kl"]),
                cell_y_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["cell_y_kl"]),
                cell_x_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["cell_x_kl"]),
                height_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["height_kl"]),
                width_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["width_kl"]),
                z_kl=self.kl_weight * tf_mean_sum(obj * self._tensors["z_kl"]),
                attr_kl=self.kl_weight *
                tf_mean_sum(obj * self._tensors["attr_kl"]),
            )

        # --- other evaluation metrics ---

        if "n_annotations" in self._tensors:
            count_1norm = tf.to_float(
                tf.abs(
                    tf.to_int32(self._tensors["pred_n_objects_hard"]) -
                    self._tensors["n_valid_annotations"]))

            self.record_tensors(
                count_1norm=count_1norm,
                count_error=count_1norm > 0.5,
            )