Esempio n. 1
0
    def up(self, input, **_):
        hps = self.hps
        use_iaf = hps.use_iaf
        h_size = hps.h_size
        z_size = hps.z_size
        stride = [2, 2] if self.downsample else [1, 1]

        with arg_scope([conv2d]):
            x = tf.nn.elu(input)
            if use_iaf:
                x = conv2d("up_conv1",
                           x,
                           2 * z_size + 2 * h_size,
                           stride=stride)
                self.qz_mean, self.qz_logsd, self.up_context, h = split(
                    x, 1, [z_size, z_size, h_size, h_size])
            else:
                x = conv2d("up_conv1", x, 2 * z_size + h_size, stride=stride)
                self.qz_mean, self.qz_logsd, h = split(
                    x, 1, [z_size, z_size, h_size])

            h = tf.nn.elu(h)
            h = conv2d("up_conv3", h, h_size)
            if self.downsample:
                input = resize_nearest_neighbor(input, 0.5)
            return input + 0.1 * h
Esempio n. 2
0
    def down(self, input):
        hps = self.hps
        h_size = hps.h_size
        z_size = hps.z_size

        with arg_scope([conv2d, ar_multiconv2d]):
            x = tf.nn.elu(input)
            x = conv2d("down_conv1", x, 4 * z_size + h_size * 2)
            pz_mean, pz_logsd, rz_mean, rz_logsd, down_context, h_det = split(
                x, 1, [z_size] * 4 + [h_size] * 2)

            prior = DiagonalGaussian(pz_mean, 2 * pz_logsd)
            posterior = DiagonalGaussian(rz_mean + self.qz_mean,
                                         2 * (rz_logsd + self.qz_logsd))
            context = self.up_context + down_context

            if self.mode in ["init", "sample"]:
                z = prior.sample
            else:
                z = posterior.sample

            if self.mode == "sample":
                kl_cost = kl_obj = tf.zeros([hps.batch_size * hps.k])
            else:
                logqs = posterior.logps(z)
                x = ar_multiconv2d("ar_multiconv2d", z, context,
                                   [h_size, h_size], [z_size, z_size])
                arw_mean, arw_logsd = x[0] * 0.1, x[1] * 0.1
                z = (z - arw_mean) / tf.exp(arw_logsd)
                logqs += arw_logsd
                logps = prior.logps(z)

                kl_cost = logqs - logps

                if hps.kl_min > 0:
                    # [0, 1, 2, 3] -> [0, 1] -> [1] / (b * k)
                    kl_ave = tf.reduce_mean(tf.reduce_sum(kl_cost, [2, 3]),
                                            [0],
                                            keep_dims=True)
                    kl_ave = tf.maximum(kl_ave, hps.kl_min)
                    kl_ave = tf.tile(kl_ave, [hps.batch_size * hps.k, 1])
                    kl_obj = tf.reduce_sum(kl_ave, [1])
                else:
                    kl_obj = tf.reduce_sum(kl_cost, [1, 2, 3])
                kl_cost = tf.reduce_sum(kl_cost, [1, 2, 3])

            h = tf.concat(1, [z, h_det])
            h = tf.nn.elu(h)
            if self.downsample:
                input = resize_nearest_neighbor(input, 2)
                h = deconv2d("down_deconv2", h, h_size)
            else:
                h = conv2d("down_conv2", h, h_size)
            output = input + 0.1 * h
            return output, kl_obj, kl_cost
Esempio n. 3
0
    def _build_graph(self):
        from tf_utils.layers import conv2d, max_pool, rescale_bilinear, avg_pool

        def layer_width(layer: int):  # number of channels (features per pixel)
            return min([4 * 4**(layer + 1), 64])

        input_shape = [None] + list(self.input_shape)
        output_shape = input_shape[:3] + [self.class_count]

        # Input image and labels placeholders
        input = tf.placeholder(tf.float32, shape=input_shape)
        target = tf.placeholder(tf.float32, shape=output_shape)

        # Downsampled input (to improve speed at the cost of accuracy)
        h = rescale_bilinear(input, 0.5)

        # Hidden layers
        h = conv2d(h, 3, layer_width(0))
        h = tf.nn.relu(h)
        for l in range(1, self.conv_layer_count):
            h = max_pool(h, 2)
            h = conv2d(h, 3, layer_width(l))
            h = tf.nn.relu(h)

        # Pixelwise softmax classification and label upscaling
        logits = conv2d(h, 1, self.class_count)
        probs = tf.nn.softmax(logits)
        probs = tf.image.resize_bilinear(probs, output_shape[1:3])

        # Loss
        clipped_probs = tf.clip_by_value(probs, 1e-10, 1.0)
        ts = lambda x: x[:, :, :, 1:] if self.class0_unknown else x
        cost = -tf.reduce_mean(ts(target) * tf.log(ts(clipped_probs)))

        # Optimization
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        training_step = optimizer.minimize(cost)

        # Dense predictions and labels
        preds, dense_labels = tf.argmax(probs, 3), tf.argmax(target, 3)

        # Other evaluation measures
        self._n_accuracy = tf.reduce_mean(
            tf.cast(tf.equal(preds, dense_labels), tf.float32))

        return AbstractModel.EssentialNodes(
            input=input,
            target=target,
            probs=probs,
            loss=cost,
            training_step=training_step)
Esempio n. 4
0
    def down(self, input):
        hps = self.hps
        h_size = hps.h_size
        z_size = hps.z_size

        with arg_scope([conv2d, ar_multiconv2d]):
            x = tf.nn.elu(input)
            x = conv2d("down_conv1", x, 4 * z_size + h_size * 2)
            pz_mean, pz_logsd, rz_mean, rz_logsd, down_context, h_det = split(x, 1, [z_size] * 4 + [h_size] * 2)

            prior = DiagonalGaussian(pz_mean, 2 * pz_logsd)
            posterior = DiagonalGaussian(rz_mean + self.qz_mean, 2 * (rz_logsd + self.qz_logsd))
            context = self.up_context + down_context

            if self.mode in ["init", "sample"]:
                z = prior.sample
            else:
                z = posterior.sample

            if self.mode == "sample":
                kl_cost = kl_obj = tf.zeros([hps.batch_size * hps.k])
            else:
                logqs = posterior.logps(z)
                x = ar_multiconv2d("ar_multiconv2d", z, context, [h_size, h_size], [z_size, z_size])
                arw_mean, arw_logsd = x[0] * 0.1, x[1] * 0.1
                z = (z - arw_mean) / tf.exp(arw_logsd)
                logqs += arw_logsd
                logps = prior.logps(z)

                kl_cost = logqs - logps

                if hps.kl_min > 0:
                    # [0, 1, 2, 3] -> [0, 1] -> [1] / (b * k)
                    kl_ave = tf.reduce_mean(tf.reduce_sum(kl_cost, [2, 3]), [0], keep_dims=True)
                    kl_ave = tf.maximum(kl_ave, hps.kl_min)
                    kl_ave = tf.tile(kl_ave, [hps.batch_size * hps.k, 1])
                    kl_obj = tf.reduce_sum(kl_ave, [1])
                else:
                    kl_obj = tf.reduce_sum(kl_cost, [1, 2, 3])
                kl_cost = tf.reduce_sum(kl_cost, [1, 2, 3])

            h = tf.concat(1, [z, h_det])
            h = tf.nn.elu(h)
            if self.downsample:
                input = resize_nearest_neighbor(input, 2)
                h = deconv2d("down_deconv2", h, h_size)
            else:
                h = conv2d("down_conv2", h, h_size)
            output = input + 0.1 * h
            return output, kl_obj, kl_cost
Esempio n. 5
0
    def up(self, input, **_):
        hps = self.hps
        h_size = hps.h_size
        z_size = hps.z_size
        stride = [2, 2] if self.downsample else [1, 1]

        with arg_scope([conv2d]):
            x = tf.nn.elu(input)
            x = conv2d("up_conv1", x, 2 * z_size + 2 * h_size, stride=stride)
            self.qz_mean, self.qz_logsd, self.up_context, h = split(x, 1, [z_size, z_size, h_size, h_size])

            h = tf.nn.elu(h)
            h = conv2d("up_conv3", h, h_size)
            if self.downsample:
                input = resize_nearest_neighbor(input, 0.5)
            return input + 0.1 * h
Esempio n. 6
0
    def _forward(self, x, gpu):
        hps = self.hps

        x = tf.to_float(x)
        x = tf.clip_by_value((x + 0.5) / 256.0, 0.0, 1.0) - 0.5

        # Input images are repeated k times on the input.
        # This is used for Importance Sampling loss (k is number of samples).
        data_size = hps.batch_size * hps.k
        x = repeat(x, hps.k)

        orig_x = x
        h_size = hps.h_size

        with arg_scope([conv2d, deconv2d], init=(self.mode == "init")):
            layers = []
            for i in range(hps.depth):
                layers.append([])
                for j in range(hps.num_blocks):
                    downsample = (i > 0) and (j == 0)
                    layers[-1].append(IAFLayer(hps, self.mode, downsample))

            h = conv2d("x_enc", x, h_size, [5, 5], [2, 2])  # -> [16, 16]
            for i, layer in enumerate(layers):
                for j, sub_layer in enumerate(layer):
                    with tf.variable_scope("IAF_%d_%d" % (i, j)):
                        h = sub_layer.up(h)

            # top->down
            self.h_top = h_top = tf.get_variable("h_top", [h_size], initializer=tf.zeros_initializer)
            h_top = tf.reshape(h_top, [1, -1, 1, 1])
            h = tf.tile(h_top, [data_size, 1, hps.image_size / 2 ** len(layers), hps.image_size / 2 ** len(layers)])
            kl_cost = kl_obj = 0.0

            for i, layer in reversed(list(enumerate(layers))):
                for j, sub_layer in reversed(list(enumerate(layer))):
                    with tf.variable_scope("IAF_%d_%d" % (i, j)):
                        h, cur_obj, cur_cost = sub_layer.down(h)
                        kl_obj += cur_obj
                        kl_cost += cur_cost

                        if self.mode == "train" and gpu == hps.num_gpus - 1:
                            tf.scalar_summary("model/kl_obj_%02d_%02d" % (i, j), tf.reduce_mean(cur_obj))
                            tf.scalar_summary("model/kl_cost_%02d_%02d" % (i, j), tf.reduce_mean(cur_cost))

            x = tf.nn.elu(h)
            x = deconv2d("x_dec", x, 3, [5, 5])
            x = tf.clip_by_value(x, -0.5 + 1 / 512., 0.5 - 1 / 512.)

        log_pxz = discretized_logistic(x, self.dec_log_stdv, sample=orig_x)
        obj = tf.reduce_sum(kl_obj - log_pxz)

        if self.mode == "train" and gpu == hps.num_gpus - 1:
            tf.scalar_summary("model/log_pxz", -tf.reduce_mean(log_pxz))
            tf.scalar_summary("model/kl_obj", tf.reduce_mean(kl_obj))
            tf.scalar_summary("model/kl_cost", tf.reduce_mean(kl_cost))

        loss = tf.reduce_sum(compute_lowerbound(log_pxz, kl_cost, hps.k))
        return x, obj, loss
Esempio n. 7
0
    def _forward(self, x, gpu):
        hps = self.hps

        x = tf.to_float(x)
        x = tf.clip_by_value((x + 0.5) / 256.0, 0.0, 1.0) - 0.5

        # Input images are repeated k times on the input.
        # This is used for Importance Sampling loss (k is number of samples).
        data_size = hps.batch_size * hps.k
        x = repeat(x, hps.k)

        orig_x = x
        h_size = hps.h_size

        with arg_scope([conv2d, deconv2d], init=(self.mode == "init")):
            layers = []
            for i in range(hps.depth):
                layers.append([])
                for j in range(hps.num_blocks):
                    downsample = (i > 0) and (j == 0)
                    layers[-1].append(IAFLayer(hps, self.mode, downsample))

            h = conv2d("x_enc", x, h_size, [5, 5], [2, 2])  # -> [16, 16]
            for i, layer in enumerate(layers):
                for j, sub_layer in enumerate(layer):
                    with tf.variable_scope("IAF_%d_%d" % (i, j)):
                        h = sub_layer.up(h)

            # top->down
            self.h_top = h_top = tf.get_variable(
                "h_top", [h_size], initializer=tf.zeros_initializer)
            h_top = tf.reshape(h_top, [1, -1, 1, 1])
            h = tf.tile(h_top, [
                data_size, 1, hps.image_size / 2**len(layers),
                hps.image_size / 2**len(layers)
            ])
            kl_cost = kl_obj = 0.0

            for i, layer in reversed(list(enumerate(layers))):
                for j, sub_layer in reversed(list(enumerate(layer))):
                    with tf.variable_scope("IAF_%d_%d" % (i, j)):
                        h, cur_obj, cur_cost = sub_layer.down(h)
                        kl_obj += cur_obj
                        kl_cost += cur_cost

                        if self.mode == "train" and gpu == hps.num_gpus - 1:
                            tf.scalar_summary(
                                "model/kl_obj_%02d_%02d" % (i, j),
                                tf.reduce_mean(cur_obj))
                            tf.scalar_summary(
                                "model/kl_cost_%02d_%02d" % (i, j),
                                tf.reduce_mean(cur_cost))

            x = tf.nn.elu(h)
            x = deconv2d("x_dec", x, 3, [5, 5])
            x = tf.clip_by_value(x, -0.5 + 1 / 512., 0.5 - 1 / 512.)

        log_pxz = discretized_logistic(x, self.dec_log_stdv, sample=orig_x)
        obj = tf.reduce_sum(kl_obj - log_pxz)

        if self.mode == "train" and gpu == hps.num_gpus - 1:
            tf.scalar_summary("model/log_pxz", -tf.reduce_mean(log_pxz))
            tf.scalar_summary("model/kl_obj", tf.reduce_mean(kl_obj))
            tf.scalar_summary("model/kl_cost", tf.reduce_mean(kl_cost))

        loss = tf.reduce_sum(compute_lowerbound(log_pxz, kl_cost, hps.k))
        return x, obj, loss