def up(self, input, **_): hps = self.hps use_iaf = hps.use_iaf h_size = hps.h_size z_size = hps.z_size stride = [2, 2] if self.downsample else [1, 1] with arg_scope([conv2d]): x = tf.nn.elu(input) if use_iaf: x = conv2d("up_conv1", x, 2 * z_size + 2 * h_size, stride=stride) self.qz_mean, self.qz_logsd, self.up_context, h = split( x, 1, [z_size, z_size, h_size, h_size]) else: x = conv2d("up_conv1", x, 2 * z_size + h_size, stride=stride) self.qz_mean, self.qz_logsd, h = split( x, 1, [z_size, z_size, h_size]) h = tf.nn.elu(h) h = conv2d("up_conv3", h, h_size) if self.downsample: input = resize_nearest_neighbor(input, 0.5) return input + 0.1 * h
def down(self, input): hps = self.hps h_size = hps.h_size z_size = hps.z_size with arg_scope([conv2d, ar_multiconv2d]): x = tf.nn.elu(input) x = conv2d("down_conv1", x, 4 * z_size + h_size * 2) pz_mean, pz_logsd, rz_mean, rz_logsd, down_context, h_det = split( x, 1, [z_size] * 4 + [h_size] * 2) prior = DiagonalGaussian(pz_mean, 2 * pz_logsd) posterior = DiagonalGaussian(rz_mean + self.qz_mean, 2 * (rz_logsd + self.qz_logsd)) context = self.up_context + down_context if self.mode in ["init", "sample"]: z = prior.sample else: z = posterior.sample if self.mode == "sample": kl_cost = kl_obj = tf.zeros([hps.batch_size * hps.k]) else: logqs = posterior.logps(z) x = ar_multiconv2d("ar_multiconv2d", z, context, [h_size, h_size], [z_size, z_size]) arw_mean, arw_logsd = x[0] * 0.1, x[1] * 0.1 z = (z - arw_mean) / tf.exp(arw_logsd) logqs += arw_logsd logps = prior.logps(z) kl_cost = logqs - logps if hps.kl_min > 0: # [0, 1, 2, 3] -> [0, 1] -> [1] / (b * k) kl_ave = tf.reduce_mean(tf.reduce_sum(kl_cost, [2, 3]), [0], keep_dims=True) kl_ave = tf.maximum(kl_ave, hps.kl_min) kl_ave = tf.tile(kl_ave, [hps.batch_size * hps.k, 1]) kl_obj = tf.reduce_sum(kl_ave, [1]) else: kl_obj = tf.reduce_sum(kl_cost, [1, 2, 3]) kl_cost = tf.reduce_sum(kl_cost, [1, 2, 3]) h = tf.concat(1, [z, h_det]) h = tf.nn.elu(h) if self.downsample: input = resize_nearest_neighbor(input, 2) h = deconv2d("down_deconv2", h, h_size) else: h = conv2d("down_conv2", h, h_size) output = input + 0.1 * h return output, kl_obj, kl_cost
def _build_graph(self): from tf_utils.layers import conv2d, max_pool, rescale_bilinear, avg_pool def layer_width(layer: int): # number of channels (features per pixel) return min([4 * 4**(layer + 1), 64]) input_shape = [None] + list(self.input_shape) output_shape = input_shape[:3] + [self.class_count] # Input image and labels placeholders input = tf.placeholder(tf.float32, shape=input_shape) target = tf.placeholder(tf.float32, shape=output_shape) # Downsampled input (to improve speed at the cost of accuracy) h = rescale_bilinear(input, 0.5) # Hidden layers h = conv2d(h, 3, layer_width(0)) h = tf.nn.relu(h) for l in range(1, self.conv_layer_count): h = max_pool(h, 2) h = conv2d(h, 3, layer_width(l)) h = tf.nn.relu(h) # Pixelwise softmax classification and label upscaling logits = conv2d(h, 1, self.class_count) probs = tf.nn.softmax(logits) probs = tf.image.resize_bilinear(probs, output_shape[1:3]) # Loss clipped_probs = tf.clip_by_value(probs, 1e-10, 1.0) ts = lambda x: x[:, :, :, 1:] if self.class0_unknown else x cost = -tf.reduce_mean(ts(target) * tf.log(ts(clipped_probs))) # Optimization optimizer = tf.train.AdamOptimizer(self.learning_rate) training_step = optimizer.minimize(cost) # Dense predictions and labels preds, dense_labels = tf.argmax(probs, 3), tf.argmax(target, 3) # Other evaluation measures self._n_accuracy = tf.reduce_mean( tf.cast(tf.equal(preds, dense_labels), tf.float32)) return AbstractModel.EssentialNodes( input=input, target=target, probs=probs, loss=cost, training_step=training_step)
def down(self, input): hps = self.hps h_size = hps.h_size z_size = hps.z_size with arg_scope([conv2d, ar_multiconv2d]): x = tf.nn.elu(input) x = conv2d("down_conv1", x, 4 * z_size + h_size * 2) pz_mean, pz_logsd, rz_mean, rz_logsd, down_context, h_det = split(x, 1, [z_size] * 4 + [h_size] * 2) prior = DiagonalGaussian(pz_mean, 2 * pz_logsd) posterior = DiagonalGaussian(rz_mean + self.qz_mean, 2 * (rz_logsd + self.qz_logsd)) context = self.up_context + down_context if self.mode in ["init", "sample"]: z = prior.sample else: z = posterior.sample if self.mode == "sample": kl_cost = kl_obj = tf.zeros([hps.batch_size * hps.k]) else: logqs = posterior.logps(z) x = ar_multiconv2d("ar_multiconv2d", z, context, [h_size, h_size], [z_size, z_size]) arw_mean, arw_logsd = x[0] * 0.1, x[1] * 0.1 z = (z - arw_mean) / tf.exp(arw_logsd) logqs += arw_logsd logps = prior.logps(z) kl_cost = logqs - logps if hps.kl_min > 0: # [0, 1, 2, 3] -> [0, 1] -> [1] / (b * k) kl_ave = tf.reduce_mean(tf.reduce_sum(kl_cost, [2, 3]), [0], keep_dims=True) kl_ave = tf.maximum(kl_ave, hps.kl_min) kl_ave = tf.tile(kl_ave, [hps.batch_size * hps.k, 1]) kl_obj = tf.reduce_sum(kl_ave, [1]) else: kl_obj = tf.reduce_sum(kl_cost, [1, 2, 3]) kl_cost = tf.reduce_sum(kl_cost, [1, 2, 3]) h = tf.concat(1, [z, h_det]) h = tf.nn.elu(h) if self.downsample: input = resize_nearest_neighbor(input, 2) h = deconv2d("down_deconv2", h, h_size) else: h = conv2d("down_conv2", h, h_size) output = input + 0.1 * h return output, kl_obj, kl_cost
def up(self, input, **_): hps = self.hps h_size = hps.h_size z_size = hps.z_size stride = [2, 2] if self.downsample else [1, 1] with arg_scope([conv2d]): x = tf.nn.elu(input) x = conv2d("up_conv1", x, 2 * z_size + 2 * h_size, stride=stride) self.qz_mean, self.qz_logsd, self.up_context, h = split(x, 1, [z_size, z_size, h_size, h_size]) h = tf.nn.elu(h) h = conv2d("up_conv3", h, h_size) if self.downsample: input = resize_nearest_neighbor(input, 0.5) return input + 0.1 * h
def _forward(self, x, gpu): hps = self.hps x = tf.to_float(x) x = tf.clip_by_value((x + 0.5) / 256.0, 0.0, 1.0) - 0.5 # Input images are repeated k times on the input. # This is used for Importance Sampling loss (k is number of samples). data_size = hps.batch_size * hps.k x = repeat(x, hps.k) orig_x = x h_size = hps.h_size with arg_scope([conv2d, deconv2d], init=(self.mode == "init")): layers = [] for i in range(hps.depth): layers.append([]) for j in range(hps.num_blocks): downsample = (i > 0) and (j == 0) layers[-1].append(IAFLayer(hps, self.mode, downsample)) h = conv2d("x_enc", x, h_size, [5, 5], [2, 2]) # -> [16, 16] for i, layer in enumerate(layers): for j, sub_layer in enumerate(layer): with tf.variable_scope("IAF_%d_%d" % (i, j)): h = sub_layer.up(h) # top->down self.h_top = h_top = tf.get_variable("h_top", [h_size], initializer=tf.zeros_initializer) h_top = tf.reshape(h_top, [1, -1, 1, 1]) h = tf.tile(h_top, [data_size, 1, hps.image_size / 2 ** len(layers), hps.image_size / 2 ** len(layers)]) kl_cost = kl_obj = 0.0 for i, layer in reversed(list(enumerate(layers))): for j, sub_layer in reversed(list(enumerate(layer))): with tf.variable_scope("IAF_%d_%d" % (i, j)): h, cur_obj, cur_cost = sub_layer.down(h) kl_obj += cur_obj kl_cost += cur_cost if self.mode == "train" and gpu == hps.num_gpus - 1: tf.scalar_summary("model/kl_obj_%02d_%02d" % (i, j), tf.reduce_mean(cur_obj)) tf.scalar_summary("model/kl_cost_%02d_%02d" % (i, j), tf.reduce_mean(cur_cost)) x = tf.nn.elu(h) x = deconv2d("x_dec", x, 3, [5, 5]) x = tf.clip_by_value(x, -0.5 + 1 / 512., 0.5 - 1 / 512.) log_pxz = discretized_logistic(x, self.dec_log_stdv, sample=orig_x) obj = tf.reduce_sum(kl_obj - log_pxz) if self.mode == "train" and gpu == hps.num_gpus - 1: tf.scalar_summary("model/log_pxz", -tf.reduce_mean(log_pxz)) tf.scalar_summary("model/kl_obj", tf.reduce_mean(kl_obj)) tf.scalar_summary("model/kl_cost", tf.reduce_mean(kl_cost)) loss = tf.reduce_sum(compute_lowerbound(log_pxz, kl_cost, hps.k)) return x, obj, loss
def _forward(self, x, gpu): hps = self.hps x = tf.to_float(x) x = tf.clip_by_value((x + 0.5) / 256.0, 0.0, 1.0) - 0.5 # Input images are repeated k times on the input. # This is used for Importance Sampling loss (k is number of samples). data_size = hps.batch_size * hps.k x = repeat(x, hps.k) orig_x = x h_size = hps.h_size with arg_scope([conv2d, deconv2d], init=(self.mode == "init")): layers = [] for i in range(hps.depth): layers.append([]) for j in range(hps.num_blocks): downsample = (i > 0) and (j == 0) layers[-1].append(IAFLayer(hps, self.mode, downsample)) h = conv2d("x_enc", x, h_size, [5, 5], [2, 2]) # -> [16, 16] for i, layer in enumerate(layers): for j, sub_layer in enumerate(layer): with tf.variable_scope("IAF_%d_%d" % (i, j)): h = sub_layer.up(h) # top->down self.h_top = h_top = tf.get_variable( "h_top", [h_size], initializer=tf.zeros_initializer) h_top = tf.reshape(h_top, [1, -1, 1, 1]) h = tf.tile(h_top, [ data_size, 1, hps.image_size / 2**len(layers), hps.image_size / 2**len(layers) ]) kl_cost = kl_obj = 0.0 for i, layer in reversed(list(enumerate(layers))): for j, sub_layer in reversed(list(enumerate(layer))): with tf.variable_scope("IAF_%d_%d" % (i, j)): h, cur_obj, cur_cost = sub_layer.down(h) kl_obj += cur_obj kl_cost += cur_cost if self.mode == "train" and gpu == hps.num_gpus - 1: tf.scalar_summary( "model/kl_obj_%02d_%02d" % (i, j), tf.reduce_mean(cur_obj)) tf.scalar_summary( "model/kl_cost_%02d_%02d" % (i, j), tf.reduce_mean(cur_cost)) x = tf.nn.elu(h) x = deconv2d("x_dec", x, 3, [5, 5]) x = tf.clip_by_value(x, -0.5 + 1 / 512., 0.5 - 1 / 512.) log_pxz = discretized_logistic(x, self.dec_log_stdv, sample=orig_x) obj = tf.reduce_sum(kl_obj - log_pxz) if self.mode == "train" and gpu == hps.num_gpus - 1: tf.scalar_summary("model/log_pxz", -tf.reduce_mean(log_pxz)) tf.scalar_summary("model/kl_obj", tf.reduce_mean(kl_obj)) tf.scalar_summary("model/kl_cost", tf.reduce_mean(kl_cost)) loss = tf.reduce_sum(compute_lowerbound(log_pxz, kl_cost, hps.k)) return x, obj, loss