def build_predict(self, Xnew , full_cov=False): err = self.Y Kuf = self.RBF(self.Z, self.X) Kuu = self.RBF(self.Z,self.Z) + eye(self.num_inducing) * 1e-6 Kus = self.RBF(self.Z, Xnew) sigma = tf.sqrt(self.likelihood_variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tf.transpose(tmp2), c) if full_cov: var = self.RBF(Xnew, Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\ - tf.matmul(tf.transpose(tmp1), tmp1) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.RBF(Xnew, Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.pack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean , var
def bond_conv_layer(activated_atoms, bv_params, layer): flow_depth = flow_layer_depths[layer] next_activated_atoms = tf.zeros(tf.pack([N_atoms_ph, flow_depth])) for deg in range(1, 6): indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32)) flow_param = bv_params['A_flow'+str(layer)+'_'+str(deg)] flow_map = tf.gather(flow_param, type_adj_ph) multiples = tf.pack([N_atoms_ph, 1, 1]) activated_atoms_dim = tf.expand_dims(tf.tile(tf.expand_dims(activated_atoms, 0), multiples), 2) adj_mul = tf.batch_matmul(activated_atoms_dim, flow_map) adj_mul = tf.squeeze(adj_mul, [2]) deg_mask = tf.to_float(tf.equal(deg_list_ph, deg)) multiples = tf.pack([1, N_atoms_ph, flow_depth]) deg_list_dim = tf.tile(tf.expand_dims(tf.expand_dims(deg_mask, 1), 1), multiples) multiples = tf.pack([N_atoms_ph, N_atoms_ph, 1]) biases = tf.tile(bv_params['b_flow'+str(layer)+'_'+str(deg)], multiples) filtered_atoms = tf.add(tf.mul(adj_mul, deg_list_dim), biases) next_activated_atoms = next_activated_atoms + tf.reduce_sum(filtered_atoms, 1) next_activated_atoms = tf.nn.relu(next_activated_atoms) return next_activated_atoms
def K(self, X, X2=None): if X2 is None: d = tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance)) return tf.diag(d) else: shape = tf.pack([tf.shape(X)[0], tf.shape(X2)[0]]) return tf.zeros(shape, tf.float64)
def FixedUnPooling(x, shape, unpool_mat=None): """ Unpool the input with a fixed mat to perform kronecker product with. :param input: NHWC tensor :param shape: int or [h, w] :param unpool_mat: a tf/np matrix with size=shape. If None, will use a mat with 1 at top-left corner. :returns: NHWC tensor """ shape = shape2d(shape) input_shape = tf.shape(x) if unpool_mat is None: mat = np.zeros(shape, dtype='float32') mat[0][0] = 1 unpool_mat = tf.Variable(mat, trainable=False, name='unpool_mat') elif isinstance(unpool_mat, np.ndarray): unpool_mat = tf.Variable(unpool_mat, trainable=False, name='unpool_mat') assert unpool_mat.get_shape().as_list() == list(shape) # perform a tensor-matrix kronecker product fx = flatten(tf.transpose(x, [0, 3, 1, 2])) fx = tf.expand_dims(fx, -1) # (bchw)x1 mat = tf.expand_dims(flatten(unpool_mat), 0) #1x(shxsw) prod = tf.matmul(fx, mat) #(bchw) x(shxsw) prod = tf.reshape(prod, tf.pack( [-1, input_shape[3], input_shape[1], input_shape[2], shape[0], shape[1]])) prod = tf.transpose(prod, [0, 2, 4, 3, 5, 1]) prod = tf.reshape(prod, tf.pack( [-1, input_shape[1] * shape[0], input_shape[2] * shape[1], input_shape[3]])) return prod
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.pack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append(tf.reshape( tf.concat(1, all_scores), tf.pack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def inference(self, x): #loc_t ~ gaussian(loc_mean_t, [[sigma^2, 0], [0, sigma^2]]^-1) #loc_t = loc_mean_t + normal(loc_mean_t.shape, # avg = 0.0, # std = self.sigma) loc_t = self.loc_init h_t = self.h_init loc_mean_ts = [] loc_ts = [] h_ts = [] for i in xrange(self.n_steps): x_t = self.rho(loc_t, x) g_t = self.f_g(x_t, loc_t) h_t = self.f_h(h_t, g_t) loc_mean_t = self.f_l(h_t) loc_t = tf.random_normal(loc_mean_t.get_shape(), mean = loc_mean_t, stddev = self.sigma) loc_mean_ts.append(loc_mean_t) loc_ts.append(loc_t) h_ts.append(h_t) prob = tf.matmul(h_t, self.w_classifier) prob = tf.nn.bias_add(prob, self.b_classifier) pred = tf.argmax(prob, 1) loc_mean_ts = tf.transpose(tf.pack(loc_mean_ts), perm = [1, 0, 2]) loc_ts = tf.transpose(tf.pack(loc_ts), perm = [1, 0, 2]) h_ts = tf.transpose(tf.pack(h_ts), perm = [1, 0, 2]) return loc_mean_ts, loc_ts, h_ts, prob, pred, loc_t
def iou(self, boxes1, boxes2): """calculate ious Args: boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4] ====> (x_center, y_center, w, h) boxes2: 1-D tensor [4] ===> (x_center, y_center, w, h) Return: iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] """ boxes1 = tf.pack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2, boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2]) boxes1 = tf.transpose(boxes1, [1, 2, 3, 0]) boxes2 = tf.pack([boxes2[0] - boxes2[2] / 2, boxes2[1] - boxes2[3] / 2, boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2]) #calculate the left up point lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2]) rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) #intersection intersection = rd - lu inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1] mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32) inter_square = mask * inter_square #calculate the boxs1 square and boxs2 square square1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1]) square2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1]) return inter_square/(square1 + square2 - inter_square + 1e-6)
def build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = tf.shape(self.Z)[0] err = self.Y - self.mean_function(self.X) Kuf = self.kern.K(self.Z, self.X) Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6 Kus = self.kern.K(self.Z, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tf.transpose(tmp2), c) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\ - tf.matmul(tf.transpose(tmp1), tmp1) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.pack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def inputs(path): whole = read_csv(FLAGS.batch_size, path) features = tf.transpose(tf.pack(whole[0:FLAGS.max_sentence_len])) label = tf.one_hot( tf.transpose(tf.pack(whole[FLAGS.max_sentence_len])), depth=2) return features, label
def build_score_loss_kl(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -ELBO = - ( E_{q(z; \lambda)} [ \log p(x | z) ] + KL(q(z; \lambda) || p(z)) ) based on the score function estimator. (Paisley et al., 2012) It assumes the KL is analytic. It assumes the prior is :math:`p(z) = \mathcal{N}(z; 0, 1)`. Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ x = self.data z = self.variational.sample(self.n_samples) q_log_prob = self.variational.log_prob(stop_gradient(z)) p_log_lik = self.model.log_lik(x, z) mu = tf.pack([layer.loc for layer in self.variational.layers]) sigma = tf.pack([layer.scale for layer in self.variational.layers]) kl = kl_multivariate_normal(mu, sigma) self.loss = tf.reduce_mean(p_log_lik) - kl return -(tf.reduce_mean(q_log_prob * stop_gradient(p_log_lik)) - kl)
def build_reparam_loss_kl(self): """Build loss function. Its automatic differentiation is a stochastic gradient of .. math:: -ELBO = - ( E_{q(z; \lambda)} [ \log p(x | z) ] + KL(q(z; \lambda) || p(z)) ) based on the reparameterization trick. (Kingma and Welling, 2014) It assumes the KL is analytic. It assumes the prior is :math:`p(z) = \mathcal{N}(z; 0, 1)` Computed by sampling from :math:`q(z;\lambda)` and evaluating the expectation using Monte Carlo sampling. """ x = self.data z = self.variational.sample(self.n_samples) mu = tf.pack([layer.loc for layer in self.variational.layers]) sigma = tf.pack([layer.scale for layer in self.variational.layers]) self.loss = tf.reduce_mean(self.model.log_lik(x, z)) - \ kl_multivariate_normal(mu, sigma) return -self.loss
def read_record(filename_queue): class FCNRecord(object): pass result = FCNRecord() result.mask_height = int(420/DOWNSAMPLE_FACTOR) result.mask_width = int(580/DOWNSAMPLE_FACTOR) result.mask_depth = 1 result.img_depth = 1 img_len = result.mask_height*result.mask_width*result.img_depth mask_len = result.mask_height*result.mask_width*result.mask_depth record_len = img_len + mask_len reader = tf.FixedLengthRecordReader(record_bytes=record_len) result.key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) #print(record_bytes.get_shape()) int_image = tf.reshape(tf.slice(record_bytes, [0], [img_len]),[result.mask_height, result.mask_width]) rgb_image = tf.pack([int_image,int_image,int_image]) rgb_img = tf.transpose(rgb_image,(1,2,0)) result.image = tf.cast(rgb_img,tf.float32) bool_mask = tf.cast( tf.reshape(tf.slice(record_bytes, [img_len], [mask_len]),[result.mask_height, result.mask_width]), tf.bool) hot_mask= tf.pack( [bool_mask, tf.logical_not(bool_mask)]) h_mask = tf.transpose(hot_mask,(1,2,0)) result.mask = tf.cast(h_mask, tf.float32) return result
def log_prob(self, xs, zs): """Return a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" x = xs['x'] pi, mus, sigmas = zs log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0, np.sqrt(self.c)), 1) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b), 1) # Loop over each sample zs[s, :]. log_lik = [] N = get_dims(x)[0] n_samples = get_dims(pi)[0] for s in range(n_samples): # log-likelihood is # sum_{n=1}^N log sum_{k=1}^K exp( log pi_k + log N(x_n; mu_k, sigma_k) ) # Create a K x N matrix, whose entry (k, n) is # log pi_k + log N(x_n; mu_k, sigma_k). matrix = [] for k in range(self.K): matrix += [tf.ones(N)*tf.log(pi[s, k]) + multivariate_normal.logpdf(x, mus[s, (k*self.D):((k+1)*self.D)], sigmas[s, (k*self.D):((k+1)*self.D)])] matrix = tf.pack(matrix) # log_sum_exp() along the rows is a vector, whose nth # element is the log-likelihood of data point x_n. vector = log_sum_exp(matrix, 0) # Sum over data points to get the full log-likelihood. log_lik_z = tf.reduce_sum(vector) log_lik += [log_lik_z] return log_prior + tf.pack(log_lik)
def build_generator(self): tf.get_variable_scope().reuse_variables() video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) video_flat = tf.reshape(video, [-1, self.dim_image]) image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) image_emb = tf.transpose(image_emb, [1,0,2]) state2 = tf.zeros([self.batch_size, self.lstm2.state_size]) generated_HL = [] _X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h _X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h) [output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h) output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h for ii in range(self.batch_size): logit_words = tf.nn.xw_plus_b( output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2 logit_words = tf.nn.softmax(logit_words) # n x 2 generated_HL.append(logit_words[:,1]) # n x 1 generated_HL = tf.pack(generated_HL) # b x n generated_HL = tf.mul(generated_HL,video_mask) # b x n with tf.variable_scope("RNN") as vs: lstmRNN_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] return video, video_mask, generated_HL, lstmRNN_variables
def build_model(self): video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) HLness = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps]) HLness_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) video_flat = tf.reshape(video, [-1, self.dim_image]) image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (batch_size*n_lstm_steps, dim_hidden) image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h state2 = tf.zeros([self.batch_size, self.lstm2.state_size]) loss_HL = 0.0 _X = tf.reshape(image_emb, [-1, self.dim_hidden]) # (n x b) x h _X = tf.split(0, self.n_lstm_steps, _X) # n x (b x h) [output2, state2] = rnn.rnn(self.lstm_HL_net,_X,dtype=tf.float32) # n x (b x h) output2 = tf.transpose(tf.pack(output2), [1,0,2]) # b x n x h onehot_labels = [] logit_words = [] indices = tf.expand_dims(tf.range(0, self.n_lstm_steps, 1), 1) # n x 1 for ii in xrange(10): labels = tf.expand_dims(HLness[ii,:], 1) # n x 1 concated = tf.concat(1, [indices, labels]) # n x 2 onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.n_lstm_steps, 2]), 1.0, 0.0) # n x 2 logit_words = tf.nn.xw_plus_b(output2[ii,:,:], self.embed_HL_W, self.embed_HL_b) # n x 2 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # n x 1 cross_entropy = tf.mul(cross_entropy, HLness_mask[ii,:]) # n x 1 loss_HL += tf.reduce_sum(cross_entropy) # 1 loss_HL = loss_HL / tf.reduce_sum(HLness_mask) loss = loss_HL return loss, video, video_mask, HLness, HLness_mask
def __init__(self, parent_qs): # TODO: this is redundant with the checks in PackModels self.n = len(parent_qs) base_shape = parent_qs[0].output_shape for pq in parent_qs: assert (pq.output_shape == base_shape) packed_shape = (self.n,) + base_shape self.parent_qs = parent_qs super(PackedQDistribution, self).__init__(shape=packed_shape) self.sample = tf.pack([pq.sample for pq in parent_qs]) # HACKS try: self.variance = tf.pack([pq.variance for pq in parent_qs]) except: pass try: self.stddev = tf.pack([pq.stddev for pq in parent_qs]) except: pass try: self.mean = tf.pack([pq.mean for pq in parent_qs]) except: pass
def total_variation_loss(layer): shape = tf.shape(layer) height = shape[1] width = shape[2] y = tf.slice(layer, [0,0,0,0], tf.pack([-1,height-1,-1,-1])) - tf.slice(layer, [0,1,0,0], [-1,-1,-1,-1]) x = tf.slice(layer, [0,0,0,0], tf.pack([-1,-1,width-1,-1])) - tf.slice(layer, [0,0,1,0], [-1,-1,-1,-1]) return tf.nn.l2_loss(x) / tf.to_float(tf.size(x)) + tf.nn.l2_loss(y) / tf.to_float(tf.size(y))
def extract_patch(x, f_y, f_x, nchannels): """ Args: x: [B, H, W, D] f_y: [B, H, FH] f_x: [B, W, FH] nchannels: D Returns: patch: [B, FH, FW] """ patch = [None] * nchannels fsize_h = tf.shape(f_y)[2] fsize_w = tf.shape(f_x)[2] hh = tf.shape(x)[1] ww = tf.shape(x)[2] for dd in xrange(nchannels): # [B, H, W] x_ch = tf.reshape( tf.slice(x, [0, 0, 0, dd], [-1, -1, -1, 1]), tf.pack([-1, hh, ww])) patch[dd] = tf.reshape(tf.batch_matmul( tf.batch_matmul(f_y, x_ch, adj_x=True), f_x), tf.pack([-1, fsize_h, fsize_w, 1])) return tf.concat(3, patch)
def _transform(theta, input_dim, downsample_factor): with tf.variable_scope('_transform'): num_batch = tf.shape(input_dim)[0] height = tf.shape(input_dim)[1] width = tf.shape(input_dim)[2] num_channels = tf.shape(input_dim)[3] theta = tf.reshape(theta, (-1, 2, 3)) theta = tf.cast(theta, 'float32') # grid of (x_t, y_t, 1), eq (1) in ref [1] height_f = tf.cast(height, 'float32') width_f = tf.cast(width, 'float32') out_height = tf.cast(height_f // downsample_factor, 'int32') out_width = tf.cast(width_f // downsample_factor, 'int32') grid = _meshgrid(out_height, out_width) grid = tf.expand_dims(grid,0) grid = tf.reshape(grid,[-1]) grid = tf.tile(grid,tf.pack([num_batch])) grid = tf.reshape(grid,tf.pack([num_batch, 3, -1])) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = tf.batch_matmul(theta, grid) x_s = tf.slice(T_g, [0,0,0], [-1,1,-1]) y_s = tf.slice(T_g, [0,1,0], [-1,1,-1]) x_s_flat = tf.reshape(x_s,[-1]) y_s_flat = tf.reshape(y_s,[-1]) input_transformed = _interpolate( input_dim, x_s_flat, y_s_flat, downsample_factor) output = tf.reshape(input_transformed, tf.pack([num_batch, out_height, out_width, num_channels])) return output
def get_inference(images_ph, dropout_keep_prob_ph): #subtract average image with tf.variable_scope('centering') as scope: mean = tf.constant(vgg.average_image, dtype=tf.float32, name='avg_image') images_ph = tf.sub(images_ph, mean, name='subtract_avg') #get layers from vgg19 vgg_layers = vgg.get_VGG_layers(images_ph, dropout_keep_prob_ph, train_fc_layers=True) ################################################# ### Add more layers for semantic segmentation ### ################################################# # convolution on top of pool4 to 21 chammenls (to make coarse predictions) with tf.variable_scope('conv9') as scope: conv9 = conv_layer(vgg_layers['pool4'], 21, 1, 'conv9') # convolution on top of conv7 (fc7) to 21 chammenls (to make coarse predictions) with tf.variable_scope('conv8') as scope: conv8 = conv_layer(vgg_layers['dropout2'], 21, 1, 'conv8') # 2x upsampling from last layer with tf.variable_scope('deconv1') as scope: shape = tf.shape(conv8) out_shape = tf.pack([shape[0], shape[1]*2, shape[2]*2, 21]) weights = tf.Variable(tf.truncated_normal(mean=MEAN, stddev=0.1, shape=(4, 4, 21, 21)), name='weights') deconv1 = tf.nn.conv2d_transpose( value=conv8, filter=weights, output_shape=out_shape, strides=(1, 2, 2, 1), padding='SAME', name='deconv1') # slice 2x upsampled tensor in the last layer to fit pool4 shape = tf.shape(conv9) size = tf.pack([-1, shape[1], shape[2], -1]) deconv1 = tf.slice(deconv1, begin=[0,0,0,0], size=size, name="deconv1_slice") # combine preductions from last layer and pool4 with tf.variable_scope('combined_pred') as scope: combined_pred = tf.add(deconv1, conv9, name="combined_pred") # 16x upsampling with tf.variable_scope('deconv2') as scope: shape = tf.shape(combined_pred) out_shape = tf.pack([shape[0], shape[1]*16, shape[2]*16, 21]) weights = tf.Variable(tf.truncated_normal(mean=MEAN, stddev=0.1, shape=(32, 32, 21, 21)), name='weights') deconv2 = tf.nn.conv2d_transpose(value=combined_pred, filter=weights, output_shape=out_shape, strides=(1, 16, 16, 1), padding='SAME', name='deconv2') # slice upsampled tensor to original shape orig_shape = tf.shape(images_ph) size = tf.pack([-1, orig_shape[1], orig_shape[2], -1]) logits = tf.slice(deconv2, begin=[0,0,0,0], size=size, name='logits') return logits
def testConst(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape).astype(np.float32) # Pack back into a single tensorflow tensor directly using np array c = tf.pack(data) # This is implemented via a Const: self.assertEqual(c.op.type, "Const") self.assertAllEqual(c.eval(), data) # Python lists also work for 1-D case: if len(shape) == 1: data_list = list(data) cl = tf.pack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) cl = tf.stack(data_list) self.assertEqual(cl.op.type, "Const") self.assertAllEqual(cl.eval(), data) # Verify that shape induction works with shapes produced via const pack a = tf.constant([1, 2, 3, 4, 5, 6]) b = tf.reshape(a, tf.pack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3]) b = tf.reshape(a, tf.stack([2, 3])) self.assertAllEqual(b.get_shape(), [2, 3])
def build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. """ _, _, Luu, L, _, _, gamma = self.build_common_terms() Kus = self.kern.K(self.Z, Xnew) # size M x Xnew w = tf.matrix_triangular_solve(Luu, Kus, lower=True) # size M x Xnew tmp = tf.matrix_triangular_solve(tf.transpose(L), gamma, lower=False) mean = tf.matmul(tf.transpose(w), tmp) + self.mean_function(Xnew) intermediateA = tf.matrix_triangular_solve(L, w, lower=True) if full_cov: var = ( self.kern.K(Xnew) - tf.matmul(tf.transpose(w), w) + tf.matmul(tf.transpose(intermediateA), intermediateA) ) var = tf.tile(tf.expand_dims(var, 2), tf.pack([1, 1, tf.shape(self.Y)[1]])) else: var = ( self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(w), 0) + tf.reduce_sum(tf.square(intermediateA), 0) ) # size Xnew, var = tf.tile(tf.expand_dims(var, 1), tf.pack([1, tf.shape(self.Y)[1]])) return mean, var
def _composition_function(self, inputs, length, init_state=None): if self._composition == "GRU": cell = GRUCell(self._size) return dynamic_rnn(cell, inputs, sequence_length=length, time_major=True, initial_state=init_state, dtype=tf.float32)[0] elif self._composition == "LSTM": cell = BasicLSTMCell(self._size) init_state = tf.concat(1, [tf.zeros_like(init_state, tf.float32), init_state]) if init_state else None outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True, initial_state=init_state, dtype=tf.float32)[0] return outs elif self._composition == "BiGRU": cell = GRUCell(self._size // 2, self._size) init_state_fw, init_state_bw = tf.split(1, 2, init_state) if init_state else (None, None) with tf.variable_scope("forward"): fw_outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True, initial_state=init_state_fw, dtype=tf.float32)[0] with tf.variable_scope("backward"): rev_inputs = tf.reverse_sequence(tf.pack(inputs), length, 0, 1) rev_inputs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), rev_inputs)] bw_outs = dynamic_rnn(cell, rev_inputs, sequence_length=length, time_major=True, initial_state=init_state_bw, dtype=tf.float32)[0] bw_outs = tf.reverse_sequence(tf.pack(bw_outs), length, 0, 1) bw_outs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), bw_outs)] return [tf.concat(1, [fw_out, bw_out]) for fw_out, bw_out in zip(fw_outs, bw_outs)] else: raise NotImplementedError("Other compositions not implemented yet.")
def inference1(data): data_shape_l = data.get_shape().as_list() with tf.variable_scope('conv1') as scope: weights = _variable_with_weight_decay('weights', shape=[3, 3, 3, 32],wd=0.0) biases = _variable_on_cpu('biases', [32], tf.constant_initializer(0.0)) h_conv1 = _conv2d(data, weights, biases, [1,2,2,1]) with tf.variable_scope('conv2') as scope: weights = _variable_with_weight_decay('weights', shape=[3, 3, 32, 32],wd=0.0) biases = _variable_on_cpu('biases', [32], tf.constant_initializer(0.0)) h_conv2 = _conv2d(h_conv1, weights, biases, [1,1,1,1]) with tf.variable_scope('deconv1') as scope: weights = _variable_with_weight_decay('weights', shape=[3, 3, 32, 32],wd=0.0) biases = _variable_on_cpu('biases', [32], tf.constant_initializer(0.0)) output_shape = tf.pack(h_conv1.get_shape().as_list()) h_dconv1 = _dconv2d(h_conv2, weights, biases, output_shape, [1,1,1,1]) with tf.variable_scope('deconv2') as scope: weights = _variable_with_weight_decay('weights', shape=[3, 3, 3, 32],wd=0.0) biases = _variable_on_cpu('biases', [3], tf.constant_initializer(0.0)) output_shape = tf.pack(data_shape_l) h_dconv2 = _dconv2d(h_dconv1, weights, biases, output_shape, [1,2,2,1]) # with tf.variable_scope('deconv1') as scope: # weights = _variable_with_weight_decay('weights', shape=[3, 3, 3, 32], # stddev=1e-4, wd=0.0) # biases = _variable_on_cpu('biases', [3], tf.constant_initializer(0.0)) # output_shape = tf.pack(data_shape_l) # h_dconv1 = _dconv2d(h_conv1, weights, biases, output_shape, [1,2,2,1]) return h_dconv2
def lstm_cell(i, o, state): """ Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf Note that in this formulation, we omit the various connections between the previous state and the gates. """ i_list = tf.pack([i, i, i, i]) #print i_list.get_shape().as_list() o_list = tf.pack([o, o, o, o]) ins = tf.batch_matmul(i_list, fico_x) outs = tf.batch_matmul(o_list, fico_m) h_x = ins + outs + fico_b #print h_x.get_shape().as_list() #forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb) forget_gate = tf.sigmoid(h_x[0,:,:]) #input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib) input_gate = tf.sigmoid(h_x[1,:,:]) #update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb) update = tf.tanh(h_x[2,:,:]) state = forget_gate*state + input_gate*update #output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob) output_gate = tf.sigmoid(h_x[3,:,:]) h = output_gate * tf.tanh(state) #print 'h', h.get_shape().as_list() return h, state
def build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. Note that this is very similar to the SGPR prediction, for whcih there are notes in the SGPR notebook. """ num_inducing = tf.shape(self.Z)[0] psi0, psi1, psi2 = ke.build_psi_stats(self.Z, self.kern, self.X_mean, self.X_var) Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6 Kus = self.kern.K(self.Z, Xnew) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + eye(num_inducing) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tf.transpose(tmp2), c) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\ - tf.matmul(tf.transpose(tmp1), tmp1) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.pack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def _build_annealed_losses(self, outputs, labels, anneal_factors): sequence_length = len(outputs) packed_outputs = tf.pack(outputs) tiled_labels = tf.pack([labels for i in range(sequence_length)]) accumulated_losses = -tf.reduce_sum(tiled_labels * tf.log(packed_outputs), [1, 2]) annealed_losses = tf.mul(anneal_factors, tf.concat(0, accumulated_losses)) return annealed_losses
def compute_loss(self,emb_batch,curr_batch_size=None): outloss=[] prediction=[] for idx_batch in range(self.config.batch_size): tree_states=self.compute_states(emb_batch,idx_batch) logits = self.create_output(tree_states) labels1=tf.gather(self.labels,idx_batch) labels2=tf.reduce_sum(tf.to_int32(tf.not_equal(labels1,-1))) labels=tf.gather(labels1,tf.range(labels2)) loss = self.calc_loss(logits,labels) pred = tf.nn.softmax(logits) pred_root=tf.gather(pred,labels2-1) prediction.append(pred_root) outloss.append(loss) batch_loss=tf.pack(outloss) self.pred = tf.pack(prediction) return batch_loss
def _rnn_template(incoming, cell, dropout=None, return_seq=False, return_state=False, initial_state=None, dynamic=False, scope=None, name="LSTM"): """ RNN Layer Template. """ sequence_length = None if dynamic: sequence_length = retrieve_seq_length_op( incoming if isinstance(incoming, tf.Tensor) else tf.pack(incoming)) input_shape = utils.get_incoming_shape(incoming) with tf.variable_op_scope([incoming], scope, name) as scope: name = scope.name _cell = cell # Apply dropout if dropout: if type(dropout) in [tuple, list]: in_keep_prob = dropout[0] out_keep_prob = dropout[1] elif isinstance(dropout, float): in_keep_prob, out_keep_prob = dropout, dropout else: raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") cell = DropoutWrapper(cell, in_keep_prob, out_keep_prob) inference = incoming # If a tensor given, convert it to a per timestep list if type(inference) not in [list, np.array]: ndim = len(input_shape) assert ndim >= 3, "Input dim should be at least 3." axes = [1, 0] + list(range(2, ndim)) inference = tf.transpose(inference, (axes)) inference = tf.unpack(inference) outputs, state = _rnn(cell, inference, dtype=tf.float32, initial_state=initial_state, scope=name, sequence_length=sequence_length) # Retrieve RNN Variables c = tf.GraphKeys.LAYER_VARIABLES + '/' + scope.name for v in [_cell.W, _cell.b]: if hasattr(v, "__len__"): for var in v: tf.add_to_collection(c, var) else: tf.add_to_collection(c, v) # Track activations. tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1]) if dynamic: outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) o = advanced_indexing_op(outputs, sequence_length) else: o = outputs if return_seq else outputs[-1] # Track output tensor. tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, o) return (o, state) if return_state else o
def _sample_forward(self, back_filtered, eps): samples = [] epses = tf.unpack(eps) sampling_dist = back_filtered[0] z_i = sampling_dist.sample(epses[0]) samples.append(z_i) sampling_dists = [sampling_dist] entropies = [sampling_dist.entropy()] for t in np.arange(1, self.T): pred_mean = tf.matmul(self._transition_mat(t-1), z_i) noise = self._gaussian_noise(t-1) #new_prec_mean = noise.prec_mean() + tf.matmul(noise.prec(), pred_mean) #incoming = MVGaussianNatural(new_prec_mean, noise.prec()) incoming = MVGaussianMeanCov(noise.mean() + pred_mean, noise.cov()) sampling_dist = back_filtered[t].multiply_density(incoming) sampling_dists.append(sampling_dist) z_i = sampling_dist.sample(epses[t]) entropies.append(sampling_dist.entropy()) samples.append(z_i) self.sampling_dists = sampling_dists self.entropies = entropies entropy = tf.reduce_sum(tf.pack(entropies)) sample = tf.reshape(tf.squeeze(tf.pack(samples)), self.output_shape) return sample, entropy
def __init__(self, Nclass, im_w, im_h, lmbda=5e-4): self.Nclass = Nclass self.im_w = im_w self.im_h = im_h self.graph = tf.Graph() # Define ops and tensors in `g`. with self.graph.as_default(): # Input data. self.X = tf.placeholder(tf.float32, shape=(None, im_h, im_w, 1)) self.y_ = tf.placeholder(tf.float32, shape=(None)) c1 = tf.nn.relu(self._conv_layer(self.X, (11, 11, 1, 32), "conv1")) c2 = tf.nn.relu(self._conv_layer(c1, (5, 5, 32, 64), "conv2")) p1 = tf.nn.max_pool(c2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') c3 = tf.nn.relu(self._conv_layer(p1, (3, 3, 64, 128), "conv3")) c4 = tf.nn.relu(self._conv_layer(c3, (3, 3, 128, 256), "conv4")) p2 = tf.nn.max_pool(c4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') c5 = tf.nn.relu(self._conv_layer(p2, (3, 3, 256, 256), "conv5")) self.top_conv = self._conv_layer(c5, (3, 3, 256, 1024), "conv6") gap = tf.reduce_mean(self.top_conv, [1, 2]) # Global Average Pooling with tf.variable_scope("GAP"): shape = (1024, Nclass) w_init = tf.truncated_normal_initializer(mean=0.0, stddev=HeSD(shape)) gap_w = tf.get_variable("W", shape=shape, initializer=w_init) self.logits = tf.matmul(gap, gap_w) xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits( self.logits, tf.to_int64(self.y_), name='xentropy') self.loss = tf.reduce_mean(xentropy, name='xentropy_mean') weights = filter(lambda x: x.name.endswith('W:0'), tf.trainable_variables()) regularizer = tf.reduce_sum( tf.pack([tf.nn.l2_loss(x) for x in weights])) #self.loss += (regularizer * 5e-4) self.loss += (regularizer * lmbda) correct = tf.equal(tf.argmax(self.logits, 1), tf.cast(self.y_, tf.int64)) self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) * 100. # CAM top_conv_resz = tf.image.resize_bilinear(self.top_conv, [self.im_h, self.im_w]) label_w = tf.gather(tf.transpose(gap_w), tf.cast(self.y_, tf.int32)) label_w = tf.reshape(label_w, [-1, 1024, 1]) top_conv_resz = tf.reshape(top_conv_resz, [-1, self.im_h * self.im_w, 1024]) cam = tf.batch_matmul(top_conv_resz, label_w) self.cam = tf.reshape(cam, [-1, self.im_h, self.im_w]) self.saver = tf.train.Saver()
def build_generator(self, maxlen): image = tf.placeholder(tf.float32, [1, self.dim_image], name='image') image_emb = tf.matmul(image, self.encode_img_W) + \ self.encode_img_b captions = tf.placeholder( tf.int32, [1, self.n_lstm_steps]) articles = tf.placeholder( tf.int32, [1, None], name='articles') news_len = tf.placeholder(tf.int32, [1]) mask = tf.placeholder(tf.float32, [1, self.n_lstm_steps], name='news_len') state = self.lstm.zero_state(1, tf.float32) generated_words = [] loss = 0.0 with tf.variable_scope("encoder"): current_emb = tf.nn.embedding_lookup( self.Wemb, articles) + self.bemb current_emb = tf.concat( #for image 1, [tf.expand_dims(image_emb, 1), current_emb]) encoder_outputs, state = tf.nn.bidirectional_dynamic_rnn( self.lstm, self.back_lstm, current_emb, news_len, dtype=tf.float32) state = state[0] encoder_outputs = tf.concat(1, encoder_outputs) with tf.variable_scope("decoder"): loop_function = extract_argmax_and_embed( self.Wemb, self.bemb, (self.embed_word_W, self.embed_word_b), update_embedding=False) current_emb = tf.nn.embedding_lookup( self.Wemb, captions) + self.bemb current_emb = unpack_sequence(current_emb) cell = tf.nn.rnn_cell.LSTMCell( dim_hidden, state_is_tuple=True, initializer=tf.random_uniform_initializer( -0.1, 0.1, seed=113)) cell = rnn_cell.DropoutWrapper(self.lstm, output_keep_prob=1) decoder_outputs, dec_out_state = tf.nn.seq2seq.attention_decoder( decoder_inputs=current_emb, initial_state=state, attention_states=encoder_outputs, cell=cell, output_size=None, num_heads=1, loop_function=loop_function, dtype=None, scope=None, initial_state_attention=True) model_outputs = [] with tf.variable_scope("loss"): for i in range(1,self.n_lstm_steps): # maxlen + 1 output = decoder_outputs[i] labels = tf.expand_dims(captions[:, i], 1) # (batch_size) indices = tf.expand_dims( tf.range(0, 1, 1), 1) concated = tf.concat(1, [indices, labels]) onehot_labels = tf.sparse_to_dense( concated, tf.pack([1, self.n_words]), 1.0, 0.0) # (batch_size, n_words) # (batch_size, n_words) logit_words = tf.matmul( output, self.embed_word_W) + self.embed_word_b max_prob_word = tf.argmax(logit_words, 1) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logit_words, onehot_labels) cross_entropy = cross_entropy * \ mask[:, i] # tf.expand_dims(mask, 1) current_loss = tf.reduce_sum(cross_entropy) loss = loss + current_loss model_outputs.append(max_prob_word) loss = loss / tf.reduce_sum(mask[:, 1:]) return image, model_outputs, articles, news_len, loss, captions, mask
def __init__(self, vocab_size, size, num_layers, batch_size, embedding, encoder_max_lens, share_encoder, encoder_args=None): encoder_args = encoder_args or {} self.vocab_size = vocab_size self.batch_size = batch_size self.encoder_names = [k for k, _ in encoder_max_lens.iteritems()] cell = rnn_cell.GRUCell(size) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers) with tf.variable_scope("many_seq_to_seq"): self.encoders = {} for i, (k, encoder_max_len) in enumerate( sorted(encoder_max_lens.iteritems())): reuse = share_encoder and i != 0 vs_key = k.replace(' ', '_') with tf.variable_scope("encoder" + ('' if share_encoder else '_' + vs_key), reuse=reuse): print 'Building Encoder [' + k + ']...' + ( ' (shared params)' if reuse else '') encoder = Encoder(embedding, encoder_max_len, cell, vocab_size, dtype=tf.float32, **encoder_args.get(k, {})) with tf.variable_scope("encoder_" + vs_key): weight = encoder_args.get(k, {}).get( 'weight', tf.placeholder(tf.float32, shape=[None, 1], name="encoder_{0}_weight".format(k))) self.encoders[k] = { 'encoder': encoder, 'weight': weight, } # TODO: may need to apply weights explicitly, i.e. (tf.reduce_sum(..)*weight) # sum individual encoder outputs to get aggregate attention states = [ tf.reduce_sum(self.encoders[k]['encoder'].attention_states, 1, True) for k in self.encoder_names ] self.attention_states = array_ops.concat(1, states) self.attention_matrix = tf.pack([ self.encoders[k]['encoder'].attention_states for k in self.encoder_names ], 1) # sum individual encoder states to get aggregate state self.state = tf.reduce_sum( tf.pack([ self.encoders[k]['encoder'].state for k in self.encoder_names ]), 0)
def __init__(self, embedding, max_length, initial_state, attention_states, cell, num_samples=512, feed_previous=False, update_embedding_for_previous=True, dtype=dtypes.float32, scope=None, initial_state_attention=False, **kwargs): # account for _GO and _EOS self.max_length = max_length + 2 self.lengths = kwargs.get( 'lengths', tf.placeholder(tf.int32, shape=[None], name="decoder_lengths")) self.inputs = kwargs.get('inputs', [ tf.placeholder( tf.int32, shape=[None], name="decoder_input{0}".format(i)) for i in xrange(self.max_length) ]) self.weights = kwargs.get('weights', [ tf.placeholder( tf.float32, shape=[None], name="decoder_weight{0}".format(i)) for i in xrange(self.max_length) ]) self.targets = [ self.inputs[i + 1] for i in xrange(len(self.inputs) - 1) ] self.targets.append(tf.zeros_like(self.targets[0])) num_symbols = embedding.get_shape()[0].value output_projection = None loss_function = None self.cell = cell self.feed_previous = feed_previous if num_samples > 0 and num_samples < num_symbols: with tf.device('/cpu:0'): w = tf.get_variable('proj_w', [cell.output_size, num_symbols]) w_t = tf.transpose(w) b = tf.get_variable('proj_b', [num_symbols]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device('/cpu:0'): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, num_symbols) loss_function = sampled_loss output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols) output_size = num_symbols if output_size is None: output_size = cell.output_size if output_projection is not None: proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with( [cell.output_size, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) with variable_scope.variable_scope(scope or "embedding_attention_decoder"): loop_function = self._extract_argmax_and_embed( embedding, output_projection, update_embedding_for_previous) if feed_previous else None emb_inp = [ embedding_ops.embedding_lookup(embedding, i) for i in self.inputs ] self.outputs, self.state = attention_decoder( emb_inp, self.lengths, initial_state, attention_states, cell, output_size=output_size, loop_function=loop_function, initial_state_attention=initial_state_attention) targets = [self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)] targets.append(tf.zeros_like(self.inputs[-1])) # loss for each instance in batch self.instance_loss = sequence_loss_by_example( self.outputs, targets, self.weights, softmax_loss_function=loss_function) # aggregated average loss per instance for batch self.loss = tf.reduce_sum(self.instance_loss) / math_ops.cast( array_ops.shape(targets[0])[0], self.instance_loss.dtype) if output_projection is not None: self.projected_output = [ tf.matmul(o, output_projection[0]) + output_projection[1] for o in self.outputs ] self.decoded_outputs = tf.unpack( tf.argmax(tf.pack(self.projected_output), 2)) else: self.decoded_outputs = tf.unpack( tf.argmax(tf.pack(self.outputs), 2)) self.decoded_lenghts = tf.reduce_sum( tf.sign(tf.transpose(tf.pack(self.decoded_outputs))), 1) self.decoded_batch = tf.transpose(tf.pack(self.decoded_outputs))
def pack_sequence(sequence): """Combine a list of the frames into a single tensor of the sequence.""" return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])
def write_thin_stack(thin_stack, stack_pointers, decoder_position, batch_size, max_num_concepts): """Writes to the thin stack at the given pointers the current decoder position.""" new_vals = tf.fill(tf.pack([batch_size]), decoder_position) return write_thin_stack_vals(thin_stack, stack_pointers, new_vals, batch_size, max_num_concepts)
def __init__(self, sess, data_format, history_length, num_steps, num_layers, attention, observation_dims, output_size, trainable=True, hidden_activation_fn=tf.nn.relu, output_activation_fn=None, weights_initializer=initializers.xavier_initializer(), biases_initializer=tf.constant_initializer(0.1), value_hidden_sizes=[512], advantage_hidden_sizes=[512], network_output_type='dueling', network_header_type='nips', name='CNN'): super(RNNCNN, self).__init__(sess, name) if data_format == 'NHWC': self.inputs = tf.placeholder('float32', [None] + observation_dims + [history_length], name='inputs') elif data_format == 'NCHW': self.inputs = tf.placeholder('float32', [None, history_length] + observation_dims, name='inputs') else: raise ValueError("unknown data_format : %s" % data_format) self.var = {} self.l0s = tf.div(self.inputs, 255.) if data_format == 'NHWC': self.l0s = tf.split(3, num_steps, self.l0s) elif data_format == 'NCHW': self.l0s = tf.split(1, num_steps, self.l0s) layers = [] with tf.variable_scope(name): for t, l0 in enumerate(self.l0s): # TODO: not sure why get_variable is not just reusing variables if t > 0: tf.get_variable_scope().reuse_variables() l1, self.var['l1_w'], self.var['l1_b'] = conv2d( l0, 16, [8, 8], [4, 4], weights_initializer, biases_initializer, hidden_activation_fn, data_format, name='l1_conv') l2, self.var['l2_w'], self.var['l2_b'] = conv2d( l1, 32, [4, 4], [2, 2], weights_initializer, biases_initializer, hidden_activation_fn, data_format, name='l2_conv') l3, self.var['l3_w'], self.var['l3_b'] = linear( l2, 256, weights_initializer, biases_initializer, hidden_activation_fn, data_format, name='l3_conv') layers.append(l3) with tf.variable_scope(name): cell = tf.nn.rnn_cell.LSTMCell(256) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers) outputs, state = tf.nn.dynamic_rnn(cell, tf.pack(layers), dtype=tf.float32, time_major=True) # grab MultiRNNCell variables for v in tf.all_variables(): if v.name.startswith(name + '/RNN/MultiRNNCell/'): self.var[v.name.replace(name + '/', '')] = v if not attention: layer = outputs[-2] elif attention.lower() == 'global': assert num_steps > 1 self.var['Wc'] = tf.get_variable('Wc', shape=[2 * 256, 256]) self.var['bc'] = tf.get_variable('bc', shape=[256]) h_t = outputs[-1] encode = tf.pack(outputs[:-1]) scores = tf.reduce_sum(tf.mul(encode, h_t), 2) a_t = tf.nn.softmax(tf.transpose(scores)) a_t = tf.expand_dims(a_t, 2) c_t = tf.batch_matmul(tf.transpose(encode, perm=[1, 2, 0]), a_t) c_t = tf.squeeze(c_t, [2]) layer = tf.tanh( tf.matmul(tf.concat(1, [h_t, c_t]), self.var['Wc']) + self.var['bc']) elif attention.lower() == 'linear': self.var['va'] = tf.get_variable('va', shape=[256]) scores = tf.reduce_sum(tf.mul(outputs, self.var['va']), 2) a_t = tf.nn.softmax(tf.transpose(scores)) a_t = tf.expand_dims(a_t, 2) c_t = tf.batch_matmul(tf.transpose(outputs, perm=[1, 2, 0]), a_t) c_t = tf.squeeze(c_t, [2]) # TODO: extra nonlinearity? layer = c_t else: raise ValueError('uknown attention: {}'.format(attention)) self.build_output_ops(layer, network_output_type, value_hidden_sizes, advantage_hidden_sizes, output_size, weights_initializer, biases_initializer, hidden_activation_fn, output_activation_fn, trainable)
def loss_interp(flows, inputs, outputs, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale, FlowDeltaWeights): shape = inputs.get_shape() shape = [int(dim) for dim in shape] num_batch = shape[0] height = shape[1] width = shape[2] channels = shape[3] needMask = True # Create border mask for image border_ratio = 0.1 shortestDim = height borderWidth = int(np.ceil(shortestDim * border_ratio)) smallerMask = tf.ones([height-2*borderWidth, width-2*borderWidth]) borderMask = tf.pad(smallerMask, [[borderWidth,borderWidth], [borderWidth,borderWidth]], "CONSTANT") borderMask = tf.tile(tf.expand_dims(borderMask, 0), [num_batch, 1, 1]) borderMaskImg = tf.tile(tf.expand_dims(borderMask, 3), [1, 1, 1, channels]) borderMaskFlow = tf.tile(tf.expand_dims(borderMask, 3), [1, 1, 1, 2]) # Create smoothness border mask for optical flow smallerSmoothMaskU = tf.ones([height, width-1]) smallerSmoothMaskV = tf.ones([height-1, width]) smoothnessMaskU = tf.pad(smallerSmoothMaskU, [[0,0], [0,1]], "CONSTANT") smoothnessMaskV = tf.pad(smallerSmoothMaskV, [[0,1], [0,0]], "CONSTANT") smoothnessMask = tf.pack([smoothnessMaskU, smoothnessMaskV], axis=2) smoothnessMask = tf.tile(tf.expand_dims(smoothnessMask, 0), [num_batch, 1, 1, 1]) inputs_flat = tf.reshape(inputs, [num_batch, -1, channels]) outputs_flat = tf.reshape(outputs, [num_batch, -1, channels]) borderMask_flat = tf.reshape(borderMaskImg, [num_batch, -1, channels]) flows = tf.mul(flows, flow_scale) flows_flat = tf.reshape(flows, [num_batch, -1, 2]) floor_flows = tf.to_int32(tf.floor(flows_flat)) weights_flows = flows_flat - tf.floor(flows_flat) # Construct the grids pos_x = tf.range(height) pos_x = tf.tile(tf.expand_dims(pos_x, 1), [1, width]) pos_x = tf.reshape(pos_x, [-1]) pos_y = tf.range(width) pos_y = tf.tile(tf.expand_dims(pos_y, 0), [height, 1]) pos_y = tf.reshape(pos_y, [-1]) zero = tf.zeros([], dtype='int32') # Warp two images based on optical flow batch = [] for b in range(num_batch): channel = [] x = floor_flows[b, :, 0] y = floor_flows[b, :, 1] xw = weights_flows[b, :, 0] yw = weights_flows[b, :, 1] for c in range(channels): x0 = pos_y + x x1 = x0 + 1 y0 = pos_x + y y1 = y0 + 1 x0 = tf.clip_by_value(x0, zero, width-1) x1 = tf.clip_by_value(x1, zero, width-1) y0 = tf.clip_by_value(y0, zero, height-1) y1 = tf.clip_by_value(y1, zero, height-1) idx_a = y0 * width + x0 idx_b = y1 * width + x0 idx_c = y0 * width + x1 idx_d = y1 * width + x1 Ia = tf.gather(outputs_flat[b, :, c], idx_a) Ib = tf.gather(outputs_flat[b, :, c], idx_b) Ic = tf.gather(outputs_flat[b, :, c], idx_c) Id = tf.gather(outputs_flat[b, :, c], idx_d) wa = (1-xw) * (1-yw) wb = (1-xw) * yw wc = xw * (1-yw) wd = xw * yw img = tf.mul(Ia, wa) + tf.mul(Ib, wb) + tf.mul(Ic, wc) + tf.mul(Id, wd) channel.append(img) batch.append(tf.pack(channel, axis=1)) reconstructs = tf.pack(batch) # Recostruction loss diff_reconstruct = tf.scalar_mul(255.0, tf.sub(reconstructs, inputs_flat)) eleWiseLoss = tf.pow(tf.square(diff_reconstruct) + tf.square(epsilon), alpha_c) Charbonnier_reconstruct = 0.0 numValidPixels = 0.0 if needMask: eleWiseLoss = tf.mul(borderMask_flat, eleWiseLoss) validPixels = tf.equal(borderMask_flat, tf.ones_like(borderMask_flat)) numValidPixels = tf.to_float(tf.reduce_sum(tf.to_int32(validPixels))) Charbonnier_reconstruct = tf.reduce_sum(eleWiseLoss) / numValidPixels else: Charbonnier_reconstruct = tf.reduce_mean(eleWiseLoss) # Smoothness loss flow_delta = tf.nn.conv2d(flows, FlowDeltaWeights, [1,1,1,1], padding="SAME") U_loss = 0.0 V_loss = 0.0 if needMask: flow_delta_clean = tf.mul(flow_delta, smoothnessMask) # why need smoothness mask flow_delta_clean = tf.mul(flow_delta_clean, borderMaskFlow) U_eleWiseLoss = tf.pow(tf.square(flow_delta_clean[:,:,:,0]) + tf.square(epsilon), alpha_s) U_loss = tf.reduce_sum(U_eleWiseLoss) / numValidPixels V_eleWiseLoss = tf.pow(tf.square(flow_delta_clean[:,:,:,1]) + tf.square(epsilon), alpha_s) V_loss = tf.reduce_sum(V_eleWiseLoss) / numValidPixels else: U_loss = tf.reduce_mean(tf.pow(tf.square(flow_delta[:,:,:,0] * flow_scale) + tf.square(epsilon), alpha_s)) V_loss = tf.reduce_mean(tf.pow(tf.square(flow_delta[:,:,:,1] * flow_scale) + tf.square(epsilon), alpha_s)) loss_smooth = U_loss + V_loss total_loss = Charbonnier_reconstruct + lambda_smooth * loss_smooth # Define a loss structure lossDict = {} lossDict["total"] = total_loss lossDict["Charbonnier_reconstruct"] = Charbonnier_reconstruct lossDict["U_loss"] = U_loss lossDict["V_loss"] = V_loss return lossDict, tf.reshape(reconstructs, [num_batch, height, width, 3])
def Dec2(latents, targets): if PIXCNN_ONLY: batch_size = tf.shape(latents)[0] return tf.zeros( tf.pack([ batch_size, 2 * LATENT_DIM_1, LATENTS1_HEIGHT, LATENTS1_WIDTH ]), tf.float32) output = tf.clip_by_value(latents, -50., 50.) output = lib.ops.linear.Linear('Dec2.Input', input_dim=LATENT_DIM_2, output_dim=4 * 4 * DIM_4, inputs=output) output = tf.reshape(output, [-1, DIM_4, 4, 4]) output = ResidualBlock('Dec2.Res1', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), he_init=True, inputs=output) output = ResidualBlock('Dec2.Res1Post', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), he_init=True, inputs=output) output = ResidualBlock('Dec2.Res3', input_dim=DIM_4, output_dim=DIM_3, filter_size=3, resample='up', inputs_stdev=np.sqrt(3), he_init=True, inputs=output) output = ResidualBlock('Dec2.Res3Post', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), he_init=True, inputs=output) output = ResidualBlock('Dec2.Res3Post', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), he_init=True, inputs=output) if HIGHER_LEVEL_PIXCNN: masked_targets = lib.ops.conv2d.Conv2D('Dec2.Pix1', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=5, mask_type=('a', PIX_2_N_BLOCKS), he_init=False, inputs=targets) # Make the stdev of output and masked_targets match output /= np.sqrt(4) output = tf.concat(1, [masked_targets, output]) output = ResidualBlock('Dec2.Pix2Res', input_dim=2 * DIM_3, output_dim=DIM_PIX_2, filter_size=3, mask_type=('b', PIX_2_N_BLOCKS), inputs_stdev=1, he_init=True, inputs=output) output = ResidualBlock('Dec2.Pix3Res', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=3, mask_type=('b', PIX_2_N_BLOCKS), inputs_stdev=np.sqrt(2), he_init=True, inputs=output) output = ResidualBlock('Dec2.Pix4Res', input_dim=DIM_PIX_2, output_dim=DIM_PIX_2, filter_size=1, mask_type=('b', PIX_2_N_BLOCKS), inputs_stdev=np.sqrt(2), he_init=True, inputs=output) output = lib.ops.conv2d.Conv2D('Dec2.Out', input_dim=DIM_PIX_2, output_dim=2 * LATENT_DIM_1, filter_size=1, mask_type=('b', PIX_2_N_BLOCKS), he_init=False, inputs=output) else: output = lib.ops.conv2d.Conv2D('Dec2.Out', input_dim=DIM_3, output_dim=2 * LATENT_DIM_1, filter_size=1, mask_type=('b', PIX_2_N_BLOCKS), he_init=False, inputs=output) return output
def _create_loss_optimizer(self): rnn_state = tf.zeros([self.batch_size, self.rnn_state_size], dtype=tf.float32) reconstr_loss_list = [] prior_loss_list = [] recognition_loss_list = [] for particle in range(self.n_particles): recog_mean, recog_log_sigma_sq, rnn_state = self._recognition_network( self.x, rnn_state, self.network_weights["weights_recog"], self.network_weights["biases_recog"]) eps = tf.random_normal( (self.batch_size, self.network_architecture["n_z"]), 0, 1, dtype=tf.float32) z = tf.add(recog_mean, tf.mul(tf.sqrt(tf.exp(recog_log_sigma_sq)), eps)) prior_loss = self._log_p_z(z) recognition_loss = self._log_q_z_given_x(z, recog_mean, recog_log_sigma_sq) #Generate frame x_t and Calc reconstruction error reconstructed_mean = self._generator_network_no_sigmoid( z, self.network_weights["weights_gener"], self.network_weights["biases_gener"]) #this sum is over the dimensions reconstr_loss = \ tf.reduce_sum(tf.maximum(reconstructed_mean, 0) - reconstructed_mean * self.x + tf.log(1 + tf.exp(-abs(reconstructed_mean))), 1) prior_loss_list.append(prior_loss) recognition_loss_list.append(recognition_loss) reconstr_loss_list.append(reconstr_loss) # prior_loss_tensor = tf.pack(prior_loss_list, axis=1) # recognition_loss_tensor = tf.pack(recognition_loss_list, axis=1) # reconstr_loss_tensor = tf.pack(reconstr_loss_list, axis=1) prior_loss_tensor = tf.pack(prior_loss_list, axis=1) recognition_loss_tensor = tf.pack(recognition_loss_list, axis=1) reconstr_loss_tensor = tf.pack(reconstr_loss_list, axis=1) log_w = tf.sub(tf.add(reconstr_loss_tensor, recognition_loss_tensor), prior_loss_tensor) # sum_ws = tf.reshape(tf.reduce_sum(tf.exp(log_w), reduction_indices=1), [self.batch_size, 1]) # sum_ws = tf.matmul(sum_ws, tf.ones([1,self.n_particles])) # log_w = tf.div(tf.mul(tf.exp(log_w), log_w), sum_ws) log_w = log_w * tf.nn.softmax(log_w) mean_log_w = tf.reduce_mean(log_w, 1) #averave over particles self.cost = tf.reduce_mean(mean_log_w) #average over batch # #THIS WORKS # #average over particles # prior_loss_average_over_particles = tf.reduce_mean(prior_loss_tensor, 0) # recognition_loss_average_over_particles = tf.reduce_mean(recognition_loss_tensor, 0) # reconstr_loss_average_over_particles = tf.reduce_mean(reconstr_loss_tensor, 0) # # average over batch # self.cost = tf.reduce_mean(-prior_loss_average_over_particles + # recognition_loss_average_over_particles + # reconstr_loss_average_over_particles) # Use ADAM optimizer self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.cost)
def Dec1(latents, images): if PIXCNN_ONLY: batch_size = tf.shape(latents)[0] output = tf.zeros(tf.pack([batch_size, DIM_1, HEIGHT, WIDTH]), tf.float32) else: output = tf.clip_by_value(latents, -50., 50.) output = lib.ops.conv2d.Conv2D('Dec1.Input', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=1, inputs=output, he_init=False) output = ResidualBlock('Dec1.Res1', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=1, inputs=output) output = ResidualBlock('Dec1.Res1Post', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=1, inputs=output) output = ResidualBlock('Dec1.Res2', input_dim=DIM_3, output_dim=DIM_2, filter_size=3, resample='up', inputs_stdev=np.sqrt(2), inputs=output) output = ResidualBlock('Dec1.Res2Post', input_dim=DIM_2, output_dim=DIM_2, filter_size=3, resample=None, inputs_stdev=np.sqrt(2), inputs=output) output = ResidualBlock('Dec1.Res3', input_dim=DIM_2, output_dim=DIM_1, filter_size=3, resample='up', inputs_stdev=np.sqrt(3), inputs=output) output = ResidualBlock('Dec1.Res3Post', input_dim=DIM_1, output_dim=DIM_1, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), inputs=output) if SETTINGS == '64px': output = ResidualBlock('Dec1.Res4', input_dim=DIM_1, output_dim=DIM_0, filter_size=3, resample='up', inputs_stdev=np.sqrt(3), inputs=output) output = ResidualBlock('Dec1.Res4Post', input_dim=DIM_0, output_dim=DIM_0, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), inputs=output) if PIXEL_LEVEL_PIXCNN: if EMBED_INPUTS: masked_images = lib.ops.conv2d.Conv2D('Dec1.Pix1', input_dim=N_CHANNELS * DIM_EMBED, output_dim=DIM_0, filter_size=5, inputs=images, mask_type=('a', N_CHANNELS), he_init=False) else: masked_images = lib.ops.conv2d.Conv2D('Dec1.Pix1', input_dim=N_CHANNELS, output_dim=DIM_1, filter_size=7, inputs=images, mask_type=('a', N_CHANNELS), he_init=False) # Make the stdev of output and masked_images match output /= np.sqrt(4) # Warning! Because of the masked convolutions it's very important that masked_images comes first in this concat output = tf.concat(1, [masked_images, output]) if PIXCNN_ONLY: for i in xrange(9): inp_dim = (2 * DIM_1 if i == 0 else DIM_PIX_1) output = ResidualBlock('Dec1.ExtraPixCNN_' + str(i), input_dim=inp_dim, output_dim=DIM_PIX_1, filter_size=5, mask_type=('b', N_CHANNELS), inputs_stdev=1, inputs=output) if SETTINGS == '64px': output = ResidualBlock('Dec1.Pix2Res', input_dim=2 * DIM_0, output_dim=DIM_PIX_1, filter_size=3, mask_type=('b', N_CHANNELS), inputs_stdev=1, inputs=output) output = ResidualBlock('Dec1.Pix3Res', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=3, mask_type=('b', N_CHANNELS), inputs_stdev=1, inputs=output) output = ResidualBlock('Dec1.Pix4Res', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=3, mask_type=('b', N_CHANNELS), inputs_stdev=1, inputs=output) else: output = ResidualBlock('Dec1.Pix2Res', input_dim=2 * DIM_1, output_dim=DIM_PIX_1, filter_size=3, mask_type=('b', N_CHANNELS), inputs_stdev=1, inputs=output) output = ResidualBlock('Dec1.Pix3Res', input_dim=DIM_PIX_1, output_dim=DIM_PIX_1, filter_size=1, mask_type=('b', N_CHANNELS), inputs_stdev=1, inputs=output) output = lib.ops.conv2d.Conv2D('Dec1.Out', input_dim=DIM_PIX_1, output_dim=256 * N_CHANNELS, filter_size=1, mask_type=('b', N_CHANNELS), he_init=False, inputs=output) else: output = lib.ops.conv2d.Conv2D('Dec1.Out', input_dim=DIM_1, output_dim=256 * N_CHANNELS, filter_size=1, he_init=False, inputs=output) return tf.transpose( tf.reshape(output, [-1, 256, N_CHANNELS, HEIGHT, WIDTH]), [0, 2, 3, 4, 1])
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ label = labels[num:num + 1, :] label = tf.reshape(label, [-1]) #calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) min_x = tf.floor(min_x) min_y = tf.floor(min_y) max_x = tf.ceil(max_x) max_y = tf.ceil(max_y) temp = tf.cast(tf.pack([max_y - min_y, max_x - min_x]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) temp = tf.cast( tf.pack( [min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") #calculate objects tensor [CELL_SIZE, CELL_SIZE] #calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] / (self.image_size / self.cell_size) center_x = tf.floor(center_x) center_y = label[1] / (self.image_size / self.cell_size) center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast( tf.pack([ center_y, self.cell_size - center_y - 1, center_x, self.cell_size - center_x - 1 ]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") #objects = response #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape( predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [ self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size ] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): #nilboy base_boxes[y, x, :] = [ self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0 ] base_boxes = np.tile( np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) #calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape( response, (self.cell_size, self.cell_size, 1)) #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] #calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) #sqrt_w = tf.abs(label[2]) #sqrt_h = tf.abs(label[3]) #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) #p_sqrt_w = predict_boxes[:, :, :, 2] #p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) #calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] #class_loss class_loss = tf.nn.l2_loss( tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #object_loss object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale #noobject_loss #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale #coord_loss coord_loss = (tf.nn.l2_loss(I * (p_x - x) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_y - y) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) / self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) / self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [ loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss ], predict, labels, nilboy
def text_layer(self, x, vsize, esize, fnum, maxlen, pool_window, config): #variables emb = tf.get_variable("embedding", [vsize, esize], initializer=tf.random_uniform_initializer( -0.5 / esize, 0.5 / esize)) filter_w = tf.get_variable( "filter_w", [3, esize, 1, fnum], initializer=tf.truncated_normal_initializer(stddev=0.1)) filter_b = tf.get_variable("filter_b", [fnum], initializer=tf.constant_initializer()) attn_w = tf.get_variable("attn_w", [fnum, fnum]) attn_v = tf.get_variable("attn_v", [fnum], initializer=tf.random_uniform_initializer()) #constants zero_state = tf.zeros([config.batch_size, esize]) #embedding lookup inputs = tf.nn.embedding_lookup(emb, x) inputs_rev = tf.reverse(inputs, [False, True, False]) #transform sent input from [batch_size,sent_len,hidden_size] to [sent_len,batch_size,hidden_size] inputs_s = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, maxlen, inputs) ] inputs_rev_s = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, maxlen, inputs_rev) ] #run lstm and get hidden states with tf.variable_scope("lstm-forward"): lstm_fw = tf.nn.rnn_cell.BasicLSTMCell(esize, forget_bias=1.0) fw_outputs, _ = tf.nn.rnn(lstm_fw, inputs_s, \ initial_state=lstm_fw.zero_state(config.batch_size, tf.float32)) #insert zero state at the front and drop the last state [h_A, h_B, h_C] -> [0, h_A, h_B] fw_outputs.insert(0, zero_state) fw_outputs = fw_outputs[:-1] with tf.variable_scope("lstm-backward"): lstm_bw = tf.nn.rnn_cell.BasicLSTMCell(esize, forget_bias=1.0) bw_outputs, _ = tf.nn.rnn(lstm_bw, inputs_rev_s, \ initial_state=lstm_bw.zero_state(config.batch_size, tf.float32)) #reverse the time steps [j_C, j_B, j_A] -> [j_A, j_B, j_C] bw_outputs = tf.unpack( tf.reverse(tf.pack(bw_outputs), [True, False, False])) #insert zero state at the end and drop the first state [j_A, j_B, j_C] -> [j_B, j_C, 0] bw_outputs.append(zero_state) bw_outputs = bw_outputs[1:] #reshape outputs from [sent_len,batch_size,hidden_size] to [batch_size,sent_len,hidden_size] fw_outputs = tf.reshape(tf.concat(1, fw_outputs), [config.batch_size, -1, esize]) bw_outputs = tf.reshape(tf.concat(1, bw_outputs), [config.batch_size, -1, esize]) #concatenate the left right context and word embeddings [batch_size, sent_len, hidden_size*3] lrw_concat = tf.concat(2, [fw_outputs, inputs, bw_outputs]) #reshape into [batch_size, sent_len*3, hidden_size, 1] lrw_concat = tf.reshape(lrw_concat, [config.batch_size, -1, esize, 1]) #convolutional layer conv = tf.nn.conv2d(lrw_concat, filter_w, strides=[1, 3, 1, 1], padding="VALID") conv_activated = tf.nn.relu(tf.nn.bias_add(conv, filter_b)) #maxpool layer pooled = tf.nn.max_pool(conv_activated, ksize=[1, pool_window, 1, 1], strides=[1, 1, 1, 1], padding="VALID") pooled = tf.reshape(pooled, [-1, fnum]) #compute attention weights mm = tf.nn.tanh(tf.matmul(pooled, attn_w)) inner = tf.reshape(tf.reduce_sum(mm * attn_v, 1), [config.batch_size, -1]) attn = tf.nn.softmax(inner) #compute weighted sum given the attention weights h = tf.reduce_sum( tf.reshape( tf.reshape(attn, [-1, 1]) * pooled, [config.batch_size, -1, fnum]), 1) return h, attn
def Enc2(latents): if PIXCNN_ONLY: batch_size = tf.shape(latents)[0] return tf.zeros(tf.pack([batch_size, 2 * LATENT_DIM_2]), tf.float32) output = tf.clip_by_value(latents, -50., 50.) output = lib.ops.conv2d.Conv2D('Enc2.Input', input_dim=LATENT_DIM_1, output_dim=DIM_3, filter_size=1, inputs=output, he_init=False) output = ResidualBlock('Enc2.Res0', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=1, he_init=True, inputs=output) output = ResidualBlock('Enc2.Res1Pre', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=1, he_init=True, inputs=output) output = ResidualBlock('Enc2.Res1', input_dim=DIM_3, output_dim=DIM_4, filter_size=3, resample='down', inputs_stdev=1, he_init=True, inputs=output) output = ResidualBlock('Enc2.Res2Pre', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, resample=None, inputs_stdev=np.sqrt(2), he_init=True, inputs=output) output = ResidualBlock('Enc2.Res2', input_dim=DIM_4, output_dim=DIM_4, filter_size=3, resample=None, inputs_stdev=np.sqrt(2), he_init=True, inputs=output) output = tf.reshape(output, [-1, 4 * 4 * DIM_4]) output = lib.ops.linear.Linear('Enc2.Output', input_dim=4 * 4 * DIM_4, output_dim=2 * LATENT_DIM_2, inputs=output) return output
def Model(_X, _W, _b, _keepprob): use_bias = 1 # Encoder 128x128 encoder1 = tf.nn.conv2d(_X, _W['ce1'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: encoder1 = tf.nn.bias_add(encoder1, _b['be1']) mean, var = tf.nn.moments(encoder1, [0, 1, 2]) encoder1 = tf.nn.batch_normalization(encoder1, mean, var, 0, 1, 0.0001) encoder1 = tf.nn.relu(encoder1) encoder1 = tf.nn.max_pool(encoder1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') encoder1 = tf.nn.dropout(encoder1, _keepprob) # 64x64 encoder2 = tf.nn.conv2d(encoder1, _W['ce2'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: encoder2 = tf.nn.bias_add(encoder2, _b['be2']) mean, var = tf.nn.moments(encoder1, [0, 1, 2]) encoder2 = tf.nn.batch_normalization(encoder2, mean, var, 0, 1, 0.0001) encoder2 = tf.nn.relu(encoder2) encoder2 = tf.nn.max_pool(encoder2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') encoder2 = tf.nn.dropout(encoder2, _keepprob) # 32x32 encoder3 = tf.nn.conv2d(encoder2, _W['ce3'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: encoder3 = tf.nn.bias_add(encoder3, _b['be3']) mean, var = tf.nn.moments(encoder3, [0, 1, 2]) encoder3 = tf.nn.batch_normalization(encoder3, mean, var, 0, 1, 0.0001) encoder3 = tf.nn.relu(encoder3) encoder3 = tf.nn.max_pool(encoder3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') encoder3 = tf.nn.dropout(encoder3, _keepprob) # 16x16 encoder4 = tf.nn.conv2d(encoder3, _W['ce4'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: encoder4 = tf.nn.bias_add(encoder4, _b['be4']) mean, var = tf.nn.moments(encoder4, [0, 1, 2]) encoder4 = tf.nn.batch_normalization(encoder4, mean, var, 0, 1, 0.0001) encoder4 = tf.nn.relu(encoder4) encoder4 = tf.nn.max_pool(encoder4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') encoder4 = tf.nn.dropout(encoder4, _keepprob) # 8x8 # Decoder 8x8 (128/16 = 8) fsize: 64 decoder4 = Unpooling(encoder4, [tf.shape(_X)[0], height / 16, width / 16, fsize]) decoder4 = tf.nn.conv2d_transpose(decoder4, _W['cd4'] , tf.pack([tf.shape(_X)[0], ksize, ksize, fsize]) , strides=[1, 1, 1, 1], padding='SAME') if use_bias: decoder4 = tf.nn.bias_add(decoder4, _b['bd4']) mean, var = tf.nn.moments(decoder4, [0, 1, 2]) decoder4 = tf.nn.batch_normalization(decoder4, mean, var, 0, 1, 0.0001) decoder4 = tf.nn.relu(decoder4) decoder4 = tf.nn.dropout(decoder4, _keepprob) # 16x16 decoder3 = Unpooling(encoder3, [tf.shape(_X)[0], height / 8, width / 8, fsize]) decoder3 = tf.nn.conv2d(decoder3, _W['cd3'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: decoder3 = tf.nn.bias_add(decoder3, _b['bd3']) mean, var = tf.nn.moments(decoder3, [0, 1, 2]) decoder3 = tf.nn.batch_normalization(decoder3, mean, var, 0, 1, 0.0001) decoder3 = tf.nn.relu(decoder3) decoder3 = tf.nn.dropout(decoder3, _keepprob) # 32x32 decoder2 = Unpooling(decoder3, [tf.shape(_X)[0], height / 4, width / 4, fsize]) decoder2 = tf.nn.conv2d(decoder2, _W['cd2'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: decoder2 = tf.nn.bias_add(decoder2, _b['bd2']) mean, var = tf.nn.moments(decoder2, [0, 1, 2]) decoder2 = tf.nn.batch_normalization(decoder2, mean, var, 0, 1, 0.0001) decoder2 = tf.nn.relu(decoder2) decoder2 = tf.nn.dropout(decoder2, _keepprob) # 64x64 decoder1 = Unpooling(decoder2, [tf.shape(_X)[0], height / 2, width / 2, fsize]) decoder1 = tf.nn.conv2d(decoder1, _W['cd1'], strides=[1, 1, 1, 1], padding='SAME') if use_bias: decoder1 = tf.nn.bias_add(decoder1, _b['bd1']) mean, var = tf.nn.moments(decoder1, [0, 1, 2]) decoder1 = tf.nn.batch_normalization(decoder1, mean, var, 0, 1, 0.0001) decoder1 = tf.nn.relu(decoder1) decoder1 = tf.nn.dropout(decoder1, _keepprob) # 128x128 output = tf.nn.conv2d(decoder1, _W['dense_inner_prod'], strides=[1, 1, 1, 1], padding='SAME') return output
def Enc1(images): if PIXCNN_ONLY: batch_size = tf.shape(images)[0] return tf.zeros( tf.pack([ batch_size, 2 * LATENT_DIM_1, LATENTS1_WIDTH, LATENTS1_HEIGHT ]), tf.float32) output = images if SETTINGS == '64px': if EMBED_INPUTS: output = lib.ops.conv2d.Conv2D('Enc1.Input', input_dim=N_CHANNELS * DIM_EMBED, output_dim=DIM_0, filter_size=1, inputs=output, he_init=False) output = ResidualBlock('Enc1.InputRes0', input_dim=DIM_0, output_dim=DIM_0, filter_size=3, resample=None, inputs_stdev=1, inputs=output) output = ResidualBlock('Enc1.InputRes', input_dim=DIM_0, output_dim=DIM_1, filter_size=3, resample='down', inputs_stdev=1, inputs=output) else: output = lib.ops.conv2d.Conv2D('Enc1.Input', input_dim=N_CHANNELS, output_dim=DIM_1, filter_size=1, inputs=output, he_init=False) output = ResidualBlock('Enc1.InputRes', input_dim=DIM_1, output_dim=DIM_1, filter_size=3, resample='down', inputs_stdev=1, inputs=output) else: if EMBED_INPUTS: output = lib.ops.conv2d.Conv2D('Enc1.Input', input_dim=N_CHANNELS * DIM_1, output_dim=DIM_1, filter_size=1, inputs=output, he_init=False) else: output = lib.ops.conv2d.Conv2D('Enc1.Input', input_dim=N_CHANNELS, output_dim=DIM_1, filter_size=1, inputs=output, he_init=False) output = ResidualBlock('Enc1.Res1Pre', input_dim=DIM_1, output_dim=DIM_1, filter_size=3, resample=None, inputs_stdev=1, inputs=output) output = ResidualBlock('Enc1.Res1', input_dim=DIM_1, output_dim=DIM_2, filter_size=3, resample='down', inputs_stdev=1, inputs=output) output = ResidualBlock('Enc1.Res2Pre', input_dim=DIM_2, output_dim=DIM_2, filter_size=3, resample=None, inputs_stdev=1, inputs=output) output = ResidualBlock('Enc1.Res2', input_dim=DIM_2, output_dim=DIM_3, filter_size=3, resample='down', inputs_stdev=np.sqrt(2), inputs=output) output = ResidualBlock('Enc1.Res3Pre', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=1, inputs=output) output = ResidualBlock('Enc1.Res3', input_dim=DIM_3, output_dim=DIM_3, filter_size=3, resample=None, inputs_stdev=np.sqrt(3), inputs=output) output = lib.ops.conv2d.Conv2D('Enc1.Out', input_dim=DIM_3, output_dim=2 * LATENT_DIM_1, filter_size=1, inputs=output, he_init=False) return output
def __init__(self, nmaps, vec_size, niclass, noclass, dropout, rx_step, max_grad_norm, cutoff, nconvs, kw, kh, height, mode, learning_rate, iw_batches, pull, pull_incr, min_length, act_noise=0.0): # Feeds for parameters and ops to update them. self.global_step = tf.Variable(0, trainable=False) self.cur_length = tf.Variable(min_length, trainable=False) self.cur_length_incr_op = self.cur_length.assign_add(1) self.lr = tf.Variable(float(learning_rate), trainable=False) self.lr_decay_op = self.lr.assign(self.lr * 0.98) self.pull = tf.Variable(float(pull), trainable=False) self.pull_incr_op = self.pull.assign(self.pull * pull_incr) self.do_training = tf.placeholder(tf.float32, name="do_training") self.noise_param = tf.placeholder(tf.float32, name="noise_param") # Feeds for inputs, targets, outputs, losses, etc. self.input = [] self.target = [] for l in xrange(data_utils.forward_max + 1): self.input.append(tf.placeholder(tf.int32, name="inp{0}".format(l))) self.target.append( tf.placeholder(tf.int32, name="tgt{0}".format(l))) self.outputs = [] self.losses = [] self.grad_norms = [] self.updates = [] self.grads_bin = [] self.placeholder_gradients = [] self.iw_batches = iw_batches # Computation. inp0_shape = tf.shape(self.input[0]) batch_size = inp0_shape[0] with tf.device("/cpu:0"): emb_weights = tf.get_variable( "embedding", [niclass, vec_size], initializer=tf.random_uniform_initializer(-1.7, 1.7)) e0 = tf.scatter_update(emb_weights, tf.constant(0, dtype=tf.int32, shape=[1]), tf.zeros([1, vec_size])) #opt = tf.train.AdamOptimizer(self.lr, epsilon=1e-4) opt = tf.train.GradientDescentOptimizer(self.lr) # Main graph creation loop, for every bin in data_utils. self.steps = [] for bin_idx, length in enumerate( sorted(list(set(data_utils.bins + [data_utils.forward_max])))): data_utils.print_out("Creating model for bin of length %d." % length) start_time = time.time() if length > data_utils.bins[0]: tf.get_variable_scope().reuse_variables() # Embed inputs and calculate mask. with tf.device("/cpu:0"): with tf.control_dependencies([e0]): embedded = [ tf.nn.embedding_lookup(emb_weights, self.input[l]) for l in xrange(length) ] # Mask to 0-out padding space in each step. imask = [check_for_zero(self.input[l]) for l in xrange(length)] omask = [ check_for_zero(self.target[l]) for l in xrange(length) ] mask = [1.0 - (imask[i] * omask[i]) for i in xrange(length)] mask = [tf.reshape(m, [-1, 1]) for m in mask] # Use a shifted mask for step scaling and concatenated for weights. shifted_mask = mask + [tf.zeros_like(mask[0])] scales = [ shifted_mask[i] * (1.0 - shifted_mask[i + 1]) for i in xrange(length) ] scales = [tf.reshape(s, [-1, 1, 1, 1]) for s in scales] mask = tf.concat(1, mask[0:length]) # batch x length weights = mask # Add a height dimension to mask to use later for masking. mask = tf.reshape(mask, [-1, length, 1, 1]) mask = tf.concat(2, [mask for _ in xrange(height)]) + tf.zeros( tf.pack([batch_size, length, height, nmaps]), dtype=tf.float32) # Start is a length-list of batch-by-nmaps tensors, reshape and concat. start = [tf.tanh(embedded[l]) for l in xrange(length)] start = [ tf.reshape(start[l], [-1, 1, nmaps]) for l in xrange(length) ] start = tf.reshape(tf.concat(1, start), [-1, length, 1, nmaps]) # First image comes from start by applying one convolution and adding 0s. first = conv_linear(start, 1, 1, vec_size, nmaps, True, 0.0, "input") first = [first] + [ tf.zeros(tf.pack([batch_size, length, 1, nmaps]), dtype=tf.float32) for _ in xrange(height - 1) ] first = tf.concat(2, first) # Computation steps. keep_prob = 1.0 - self.do_training * (dropout * 8.0 / float(length)) step = [tf.nn.dropout(first, keep_prob) * mask] act_noise_scale = act_noise * self.do_training * self.pull outputs = [] for it in xrange(length): with tf.variable_scope("RX%d" % (it % rx_step)) as vs: if it >= rx_step: vs.reuse_variables() cur = step[it] # Do nconvs-many CGRU steps. for layer in xrange(nconvs): cur = conv_gru([], cur, kw, kh, nmaps, cutoff, "cgru_%d" % layer) cur *= mask outputs.append(tf.slice(cur, [0, 0, 0, 0], [-1, -1, 1, -1])) cur = tf.nn.dropout(cur, keep_prob) if act_noise > 0.00001: cur += tf.truncated_normal( tf.shape(cur)) * act_noise_scale step.append(cur * mask) self.steps.append( [tf.reshape(s, [-1, length, height * nmaps]) for s in step]) # Output is the n-th step output; n = current length, as in scales. output = tf.add_n([outputs[i] * scales[i] for i in xrange(length)]) # Final convolution to get logits, list outputs. output = conv_linear(output, 1, 1, nmaps, noclass, True, 0.0, "output") output = tf.reshape(output, [-1, length, noclass]) external_output = [ tf.reshape(o, [-1, noclass]) for o in list(tf.split(1, length, output)) ] external_output = [tf.nn.softmax(o) for o in external_output] self.outputs.append(external_output) # Calculate cross-entropy loss and normalize it. targets = tf.concat( 1, [make_dense(self.target[l], noclass) for l in xrange(length)]) targets = tf.reshape(targets, [-1, noclass]) xent = tf.reshape( tf.nn.softmax_cross_entropy_with_logits( tf.reshape(output, [-1, noclass]), targets), [-1, length]) perp_loss = tf.reduce_sum(xent * weights) perp_loss /= tf.cast(batch_size, dtype=tf.float32) perp_loss /= length # Final loss: cross-entropy + shared parameter relaxation part. relax_dist, self.avg_op = relaxed_distance(rx_step) total_loss = perp_loss + relax_dist * self.pull self.losses.append(perp_loss) # Gradients and Adam update operation. if length == data_utils.bins[0] or ( mode == 0 and length < data_utils.bins[-1] + 1): data_utils.print_out( "Creating backward for bin of length %d." % length) params = tf.trainable_variables() grads = tf.gradients(total_loss, params) grads, norm = tf.clip_by_global_norm(grads, max_grad_norm) self.grad_norms.append(norm) # IW self.grads_size = len(grads) self.grads_bin.append(grads) self.placeholder_gradients.append([ tf.placeholder(tf.float32, shape=v.get_shape()) for v in params ]) #for grad in grads: # if isinstance(grad, tf.Tensor): # grad += tf.truncated_normal(tf.shape(grad)) * self.noise_param update = opt.apply_gradients(zip( self.placeholder_gradients[bin_idx], params), global_step=self.global_step) self.updates.append(update) data_utils.print_out("Created model for bin of length %d in" " %.2f s." % (length, time.time() - start_time)) self.saver = tf.train.Saver(tf.all_variables())
def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None): '''Iterates over the time dimension of a tensor. Parameters ---------- inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. mask: binary tensor with shape (samples, time, 1), with a zero for every element that is masked. Returns ------- A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' ndim = len(inputs.get_shape()) assert ndim >= 3, "Input should be at least 3D." axes = [1, 0] + list(range(2, ndim)) inputs = tf.transpose(inputs, (axes)) input_list = tf.unpack(inputs) states = initial_states successive_states = [] successive_outputs = [] if go_backwards: input_list.reverse() if mask is not None: # Transpose not supported by bool tensor types, hence round-trip to uint8. mask = tf.cast(mask, tf.uint8) if len(mask.get_shape()) == ndim - 1: mask = expand_dims(mask) mask = tf.cast(tf.transpose(mask, axes), tf.bool) mask_list = tf.unpack(mask) for input, mask_t in zip(input_list, mask_list): output, new_states = step_function(input, states) # tf.select needs its condition tensor to be the same shape as its two # result tensors, but in our case the condition (mask) tensor is # (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to # broadcast the mask to match the shape of A and B. That's what the # tile call does, is just repeat the mask along its second dimension # ndimensions times. tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]])) if len(successive_outputs) == 0: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = tf.select(tiled_mask_t, output, prev_output) return_states = [] for state, new_state in zip(states, new_states): # (see earlier comment for tile explanation) tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]])) return_states.append(tf.select(tiled_mask_t, new_state, state)) states = return_states successive_outputs.append(output) successive_states.append(states) else: for input in input_list: output, states = step_function(input, states) successive_outputs.append(output) successive_states.append(states) last_output = successive_outputs[-1] outputs = tf.pack(successive_outputs) new_states = successive_states[-1] axes = [1, 0] + list(range(2, len(outputs.get_shape()))) outputs = tf.transpose(outputs, axes) return last_output, outputs, new_states
def create_pipeline_v3(load=None): nb_features = 29 nb_hidden1 = 400 nb_hidden2 = 200 nb_hidden3 = 100 batch_size = 64 nb_iter = 30001 lamb = 0.0001 in_pl = tf.placeholder(dtype=tf.float32, shape=(None, nb_features), name='input_placeholder') means = tf.constant(dataset['means'], dtype=tf.float32, shape=(1, nb_features), name='features_means') stds = tf.constant(dataset['stds'], dtype=tf.float32, shape=(1, nb_features), name='features_stds_placeholder') means_tiled = tf.tile(means, [tf.shape(in_pl)[0], 1]) stds_tiled = tf.tile(stds, [tf.shape(in_pl)[0], 1]) #scaled inputs inp = (in_pl - means_tiled) / (stds_tiled + 1e-10) y_pl = tf.placeholder(dtype=tf.float32, shape=(None, 1), name='target_placeholder') #first hidden layer W1 = tf.Variable(tf.truncated_normal([nb_features, nb_hidden1]), dtype=tf.float32, name='first_layer_weights') W1_L2reg = (1 / 2 * batch_size) * tf.reduce_sum(tf.square(W1)) b1 = tf.Variable(tf.zeros(shape=[nb_hidden1])) h1 = tf.sigmoid(tf.matmul(inp, W1) + b1, name='first_hidden_layer') #second hidden layer W2 = tf.Variable(tf.truncated_normal([nb_hidden1, nb_hidden2]), dtype=tf.float32, name='second_layer_weights') W2_L2reg = (1 / 2 * batch_size) * tf.reduce_sum(tf.square(W2)) b2 = tf.Variable(tf.zeros(shape=[nb_hidden2])) h2 = tf.sigmoid(tf.matmul(h1, W2) + b2, name='second_hidden_layer') #third hidden layer W3 = tf.Variable(tf.truncated_normal([nb_hidden2, nb_hidden3]), dtype=tf.float32, name='third_layer_weights') W3_L2reg = (1 / 2 * batch_size) * tf.reduce_sum(tf.square(W3)) b3 = tf.Variable(tf.zeros(shape=[nb_hidden3])) h3 = tf.sigmoid(tf.matmul(h2, W3) + b3, name='third_hidden_layer') #out layer W4 = tf.Variable(tf.truncated_normal([nb_hidden3, 1]), dtype=tf.float32, name='last_layer_weights') W4_L2reg = (1 / 2 * batch_size) * tf.reduce_sum(tf.square(W4)) b4 = tf.Variable(tf.zeros(shape=[1])) out = tf.sigmoid(tf.matmul(h3, W4) + b4, name='output_layer') proba = tf.squeeze(tf.pack([1 - out, out], 2), squeeze_dims=[1]) L2reg = lamb * (W1_L2reg + W2_L2reg + W3_L2reg + W4_L2reg) cross_entropy = -(1 / float(2)) * tf.reduce_mean( y_pl * tf.log(out + 1e-10) + (1 - y_pl) * tf.log(1 - out + 1e-10), name='cost_function') cost = cross_entropy + L2reg train_step = tf.train.AdamOptimizer(1e-4).minimize(cost) init = tf.initialize_all_variables() sess = tf.Session() logger.info('Training model...') logger.info('model version %i' % 3) sess.run(init) for i in range(nb_iter): if i % 1000 == 0: logger.info('iteration %i of %i' % (i, nb_iter)) feed_dict_train = fill_feed_dict_train(in_pl, y_pl, dataset, i, batch_size=batch_size) (_, W3_value, cost_value, out_value) = sess.run([train_step, W3, cost, out], feed_dict=feed_dict_train) if i % 10000 == 0: # feed_dict_test = fill_feed_dict_test(in_pl, # y_pl, # dataset) inp_values, proba_values = sess.run([inp, proba], feed_dict=feed_dict_train) logger.debug('scaled inputs') logger.debug(inp_values) logger.debug('probabilities') logger.debug(proba_values) logger.debug('proba out shape') logger.debug(proba_values.shape) logger.debug('cost') logger.debug(cost_value) tfw = TensorFlowWrapper(sess, tf_input=in_pl, tf_output=proba, target="y", target_readable="class", excluded=['class']) return Pipeline([('deep_classifier', tfw)])
weightsClasses = tf.Variable(tf.truncated_normal([nHidden, nClasses], stddev=np.sqrt(2.0 / nHidden))) biasesClasses = tf.Variable(tf.zeros([nClasses])) ####Network forwardH1 = tf.nn.rnn_cell.LSTMCell(nHidden, use_peepholes=True, state_is_tuple=True) backwardH1 = tf.nn.rnn_cell.LSTMCell(nHidden, use_peepholes=True, state_is_tuple=True) fbH1, _, _ = tf.nn.bidirectional_rnn(forwardH1, backwardH1, inputList, dtype=tf.float32, scope='BDLSTM_H1') fbH1rs = [tf.reshape(t, [batchSize, 2, nHidden]) for t in fbH1] outH1 = [tf.reduce_sum(tf.mul(t, weightsOutH1), reduction_indices=1) + biasesOutH1 for t in fbH1rs] logits = [tf.matmul(t, weightsClasses) + biasesClasses for t in outH1] ####Optimizing logits3d = tf.pack(logits) loss = tf.reduce_mean(ctc.ctc_loss(logits3d, targetY, seqLengths)) optimizer = tf.train.MomentumOptimizer(learningRate, momentum).minimize(loss) ####Evaluating logitsMaxTest = tf.slice(tf.argmax(logits3d, 2), [0, 0], [seqLengths[0], 1]) predictions = tf.to_int32(ctc.ctc_beam_search_decoder(logits3d, seqLengths)[0][0]) errorRate = tf.reduce_sum(tf.edit_distance(predictions, targetY, normalize=False)) / \ tf.to_float(tf.size(targetY.values)) ####Run session with tf.Session(graph=graph) as session: print('Initializing') tf.initialize_all_variables().run() for epoch in range(nEpochs): print('Epoch', epoch+1, '...')
def init(H, config=None): if config is None: gpu_options = tf.GPUOptions() config = tf.ConfigProto(gpu_options=gpu_options) k = H['num_classes'] features_dim = 1024 input_layer = 'input' features_layers = ['output/confidences', 'output/boxes'] data_dir = H['dirs']['data_dir'] google_file = 'googlenet.pb' graph_def_orig_file = os.path.join(data_dir, google_file) dense_layer_num_output = [k, 4] googlenet_graph = tf.Graph() graph_def = tf.GraphDef() tf.set_random_seed(0) with open(graph_def_orig_file) as f: tf.set_random_seed(0) graph_def.MergeFromString(f.read()) with googlenet_graph.as_default(): tf.import_graph_def(graph_def, name='') input_op = googlenet_graph.get_operation_by_name(input_layer) weights_ops = [ op for op in googlenet_graph.get_operations() if any(op.name.endswith(x) for x in ['_w', '_b']) and op.type == 'Const' ] reuse_ops = [ op for op in googlenet_graph.get_operations() if op not in weights_ops + [input_op] and op.name != 'output' ] with tf.Session(graph=googlenet_graph, config=config): weights_orig = {op.name: op.outputs[0].eval() for op in weights_ops} def weight_init(num_output): return 0.001 * np.random.randn(features_dim, num_output).astype( np.float32) def bias_init(num_output): return 0.001 * np.random.randn(num_output).astype(np.float32) W = [ tf.Variable(weight_init(dense_layer_num_output[i]), name='softmax/weights_{}'.format(i)) for i in range(len(features_layers)) ] B = [ tf.Variable(bias_init(dense_layer_num_output[i]), name='softmax/biases_{}'.format(i)) for i in range(len(features_layers)) ] weight_vars = { name: tf.Variable(weight, name=name) for name, weight in weights_orig.iteritems() } weight_tensors = { name: tf.convert_to_tensor(weight) for name, weight in weight_vars.iteritems() } W_norm = [tf.nn.l2_loss(weight) for weight in weight_vars.values() + W] W_norm = tf.reduce_sum(tf.pack(W_norm), name='weights_norm') tf.scalar_summary(W_norm.op.name, W_norm) googlenet = { "W": W, "B": B, "weight_tensors": weight_tensors, "reuse_ops": reuse_ops, "input_op": input_op, "W_norm": W_norm, } return googlenet
def Deconv2D( name, input_dim, output_dim, filter_size, inputs, he_init=True, weightnorm=None, biases=True, gain=1., mask_type=None, ): """ inputs: tensor of shape (batch size, height, width, input_dim) returns: tensor of shape (batch size, 2*height, 2*width, output_dim) """ with tf.name_scope(name) as scope: if mask_type != None: raise Exception('Unsupported configuration') def uniform(stdev, size): return np.random.uniform( low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size ).astype('int32') #).astype('float32') stride = 2 fan_in = input_dim * filter_size**2 / (stride**2) fan_out = output_dim * filter_size**2 if he_init: filters_stdev = np.sqrt(4./(fan_in+fan_out)) else: # Normalized init (Glorot & Bengio) filters_stdev = np.sqrt(2./(fan_in+fan_out)) if _weights_stdev is not None: filter_values = uniform( _weights_stdev, (filter_size, filter_size, output_dim, input_dim) ) else: filter_values = uniform( filters_stdev, (filter_size, filter_size, output_dim, input_dim) ) filter_values *= gain filters = lib.param( name+'.Filters', filter_values ) if weightnorm==None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3))) target_norms = lib.param( name + '.g', norm_values ) with tf.name_scope('weightnorm') as scope: norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3])) filters = filters * tf.expand_dims(target_norms / norms, 1) inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC') input_shape = tf.shape(inputs) try: # tf pre-1.0 (top) vs 1.0 (bottom) output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) except Exception as e: output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) result = tf.nn.conv2d_transpose( value=inputs, filter=filters, output_shape=output_shape, strides=[1, 2, 2, 1], padding='SAME' ) if biases: _biases = lib.param( name+'.Biases', np.zeros(output_dim, dtype='float32') ) result = tf.nn.bias_add(result, _biases) result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW') return result
def main(_): # 解析ps和worker对应机器和端口 ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") # 从参数服务器和worker参数创建集群的描述对象ClusterSpec cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # 为该节点运行的服务创建一个server server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) #cluster中的ps还是worker,其中的第几个任务 if FLAGS.job_name == "ps": # 如果是参数服务器,执行参数join server.join() elif FLAGS.job_name == "worker": # 如果是worker # 默认的方式对该本地worker分配op,默认为该节点上的cpu0 with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % FLAGS.task_index,cluster=cluster)): #传入worker的第几个task ################################################################################## # 定义TensorFlow隐含层参数变量,为全连接神经网络隐含层 # filenames = ['hdfs://default/user/bdusr01/asy/mergeOneHot.csv'] filenames = ['/home/bdusr01/asy/DL/DistributeTF/mergeOneHot.csv'] filename_queue = tf.train.string_input_producer(filenames, shuffle=False) #读入文件名序列 reader = tf.TextLineReader() #读取器,用于输出由换行符分隔的行,读文件名 key, value = reader.read(filename_queue) #返回reader产生的下一个记录 lines = tf.decode_csv(value, record_defaults=[[0] for i in range(794)]) features = tf.pack([*[lines[:-10]]]) labels = tf.pack([*lines[-10:]]) W = tf.Variable(tf.zeros([784, 10])) W = tf.to_float(W) b = tf.Variable(tf.zeros([10])) b = tf.to_float(b) x = tf.reshape(features, [1, IMAGE_PIXELS*IMAGE_PIXELS]) #直接把变量传进去 x = tf.to_float(x) y = tf.nn.softmax(tf.matmul(x, W) + b) #y_ = tf.placeholder(tf.float32, [None, 10]) y_ = tf.reshape(labels,[1,10]) y_ = tf.to_float(y_) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) global_step = tf.Variable(0) saver = tf.train.Saver() #对模型定期做checkpoint,用于模型回复 summary_op = tf.summary.merge_all() #定义收集模型统计信息的操作 init_op = tf.global_variables_initializer() # 初始化所有变量 # sv负责监控训练过程,构建模型检查点以及计算模型统计信息 sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="/tmp/train_logs", init_op=init_op, summary_op=summary_op, saver=saver, global_step=global_step, save_model_secs=600) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. # 在sv中启动session #读入mnist训练数据 with sv.managed_session(server.target) as sess: #以这种方式启动session # Loop until the supervisor shuts down or 1000000 steps have completed. while not sv.should_stop(): # coord = tf.train.Coordinator() #创建一个协调器,管理线程 # threads = tf.train.start_queue_runners(coord=coord) #启动QueueRunner, 此时文件名队列已经进队。 for i in range(1000): sess.run(train_step) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) if i % 10 == 0: print(sess.run(accuracy)) # coord.request_stop() # coord.join(threads) # 停止tensorflow session sv.stop()
def pack(x): return tf.pack(x)
import glob def read_and_decode_tfrecord(filename,shape): filename_queue=tf.train.string_input_producer([filename],num_epochs=None) reader=tf.TFRecordReader() _,serialized_example=reader.read(filename_qeueu) features=tf.parse_single_example(serialized_example, features={ 'height':tf.FixedLenFeature([],tf.int64), 'width':tf.FixedLenFeature([],tf.int64), 'input_data_raw':tf.FixedLenFeature([],dtype=tf.string) 'gt_data_raw':tf.FixedLenFeature([],dtype=tf.string) }) image=tf.decode_raw(features['input_data_raw'],tf.float32) gt=tf.decode_raw(features['gt_data_raw'],tf.float32) image_shape=tf.pack(shape) input_image=tf.reahspe(image,image_shape) gt_image=tf.reshape(gt,image_shape) input_image_batch,gt_image_batch=tf.train.batch([input_image,gt_image],batch_size=batch_size) return input_image_batch,gt_image_batch def train(): pass def initialize_weights(nL): pass def relu(x): return tf.nn.relu(x) def conv3d(x,W,b,stride=1,padding='SAME'): x=tf.nn.conv3d(x,W,strides=[1,stride,stride,stride,1],padding=padding)
def __init__(self, input_size, hidden_layer_size, target_size): # Initialization of given values self.input_size = input_size self.hidden_layer_size = hidden_layer_size self.target_size = target_size # Weights and Bias for input and hidden tensor self.Wi = tf.Variable( tf.zeros([self.input_size, self.hidden_layer_size])) self.Ui = tf.Variable( tf.zeros([self.hidden_layer_size, self.hidden_layer_size])) self.bi = tf.Variable(tf.zeros([self.hidden_layer_size])) self.Wf = tf.Variable( tf.zeros([self.input_size, self.hidden_layer_size])) self.Uf = tf.Variable( tf.zeros([self.hidden_layer_size, self.hidden_layer_size])) self.bf = tf.Variable(tf.zeros([self.hidden_layer_size])) self.Wog = tf.Variable( tf.zeros([self.input_size, self.hidden_layer_size])) self.Uog = tf.Variable( tf.zeros([self.hidden_layer_size, self.hidden_layer_size])) self.bog = tf.Variable(tf.zeros([self.hidden_layer_size])) self.Wc = tf.Variable( tf.zeros([self.input_size, self.hidden_layer_size])) self.Uc = tf.Variable( tf.zeros([self.hidden_layer_size, self.hidden_layer_size])) self.bc = tf.Variable(tf.zeros([self.hidden_layer_size])) # Weights for output layers self.Wo = tf.Variable( tf.truncated_normal([self.hidden_layer_size, self.target_size], mean=0, stddev=.01)) self.bo = tf.Variable( tf.truncated_normal([self.target_size], mean=0, stddev=.01)) # Placeholder for input vector with shape[batch, seq, embeddings] self._inputs = tf.placeholder( tf.float32, shape=[None, max_padding_len, self.input_size], name='inputs') # Processing inputs to work with scan function self.processed_input = process_batch_input_for_RNN(self._inputs) ''' Initial hidden state's shape is [1,self.hidden_layer_size] In First time stamp, we are doing dot product with weights to get the shape of [batch_size, self.hidden_layer_size]. For this dot product tensorflow use broadcasting. But during Back propagation a low level error occurs. So to solve the problem it was needed to initialize initial hiddden state of size [batch_size, self.hidden_layer_size]. So here is a little hack !!!! Getting the same shaped initial hidden state of zeros. ''' self.initial_hidden = self._inputs[:, 0, :] self.initial_hidden = tf.matmul( self.initial_hidden, tf.zeros([input_size, hidden_layer_size])) self.initial_hidden = tf.pack( [self.initial_hidden, self.initial_hidden])
dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir) with tf.Graph().as_default(): # Create model architecture from scipy import misc img = misc.imread('lena_299.png') print(img.shape) inputs = np.ones((1, 299, 299, 3), dtype=np.float32) inputs[0, 0, 0, 0] = -1 #inputs[0] = img print(inputs.mean()) print(inputs.std()) inputs = tf.pack(inputs) # tensorflow normalization # https://github.com/tensorflow/models/blob/master/slim/preprocessing/inception_preprocessing.py#L273 #inputs = tf.sub(inputs, 0.5) #inputs = tf.mul(inputs, 2.0) with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): logits, _ = inception.inception_resnet_v2(inputs, num_classes=1001, is_training=False) with tf.Session() as sess: # Initialize model init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir,
def __init__(self, vocab_size, hidden_size, dropout, num_layers, max_gradient_norm, max_seq_length, learning_rate, lr_decay, batch_size, forward_only=False): self.num_classes = 2 self.vocab_size = vocab_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * lr_decay) initializer = tf.random_uniform_initializer(-1, 1) self.batch_pointer = 0 self.seq_input = [] self.batch_size = batch_size self.seq_lengths = [] self.projection_dim = hidden_size self.dropout = dropout self.max_gradient_norm = max_gradient_norm self.global_step = tf.Variable(0, trainable=False) self.max_seq_length = max_seq_length #seq_input: list of tensors, each tensor is size max_seq_length #target: a list of values betweeen 0 and 1 indicating target scores #seq_lengths:the early stop lengths of each input tensor self.str_summary_type = tf.placeholder(tf.string, name="str_summary_type") #for i in range(max_seq_length): # self.seq_input.append(tf.placeholder(tf.int32, shape=[None], # name="input{0}".format(i))) self.seq_input = tf.placeholder(tf.int32, shape=[None, max_seq_length], name="input") self.target = tf.placeholder(tf.float32, name="target", shape=[None, self.num_classes]) self.seq_lengths = tf.placeholder(tf.int32, shape=[None], name="early_stop") self.dropout_keep_prob_embedding = tf.constant(self.dropout) self.dropout_keep_prob_lstm_input = tf.constant(self.dropout) self.dropout_keep_prob_lstm_output = tf.constant(self.dropout) with tf.variable_scope("embedding"), tf.device("/cpu:0"): W = tf.get_variable("W", [self.vocab_size, hidden_size], initializer=tf.random_uniform_initializer( -1.0, 1.0)) self.embedded_tokens = tf.nn.embedding_lookup(W, self.seq_input) self.embedded_tokens_drop = tf.nn.dropout( self.embedded_tokens, self.dropout_keep_prob_embedding) #Using this process to get all hidden states across time is from: #https://github.com/dennybritz/tf-models/blob/master/tfmodels/models/rnn/rnn_classifier.py with tf.variable_scope("lstm") as scope: # The RNN cell single_cell = rnn_cell.DropoutWrapper( rnn_cell.LSTMCell(hidden_size, hidden_size, initializer=tf.random_uniform_initializer( -1.0, 1.0)), input_keep_prob=self.dropout_keep_prob_lstm_input, output_keep_prob=self.dropout_keep_prob_lstm_output) self.cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) # Build the recurrence. We do this manually to use truncated backprop self.initial_state = tf.zeros( [self.batch_size, self.cell.state_size]) self.encoder_states = [self.initial_state] self.encoder_outputs = [] for i in range(self.max_seq_length): if i > 0: scope.reuse_variables() new_output, new_state = self.cell( self.embedded_tokens_drop[:, i, :], self.encoder_states[-1]) #if i < max(0, self.sequence_length - self.backprop_truncate_after): #new_state = tf.stop_gradient(new_state) self.encoder_outputs.append(new_output) self.encoder_states.append(new_state) #split the ccncatenated state into cell state and hidden state concat_states = tf.pack(self.encoder_states) avg_states = tf.reduce_mean(concat_states, 0) _, self.final_state = tf.split(1, 2, avg_states) self.final_state = tf.slice(self.final_state, [0, hidden_size * (num_layers - 1)], [-1, hidden_size]) #self.final_output = self.encoder_outputs[-1] with tf.variable_scope("output_projection"): W = tf.get_variable( "W", [hidden_size, self.num_classes], initializer=tf.truncated_normal_initializer(stddev=0.1)) b = tf.get_variable("b", [self.num_classes], initializer=tf.constant_initializer(0.1)) self.scores = tf.nn.xw_plus_b(self.final_state, W, b) self.y = tf.nn.softmax(self.scores) self.predictions = tf.argmax(self.scores, 1) with tf.variable_scope("loss"): self.losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.target, name="ce_losses") self.total_loss = tf.reduce_sum(self.losses) self.mean_loss = tf.reduce_mean(self.losses) with tf.variable_scope("accuracy"): self.correct_predictions = tf.equal(self.predictions, tf.argmax(self.target, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, "float"), name="accuracy") params = tf.trainable_variables() if not forward_only: with tf.name_scope("train") as scope: opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.losses, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) with tf.name_scope("grad_norms") as scope: grad_summ = tf.scalar_summary("grad_norms", norm) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) loss_summ = tf.scalar_summary( "{0}_loss".format(self.str_summary_type), self.mean_loss) acc_summ = tf.scalar_summary( "{0}_accuracy".format(self.str_summary_type), self.accuracy) self.merged = tf.merge_summary([loss_summ, acc_summ]) self.saver = tf.train.Saver(tf.all_variables())
def sample(self, max_len=30): """ Input: - max_len: max length for generating captions Place Holder: - features: input image features of shape (N, D) Returns - sampled_idxs: generated word idxs of shape (N, T) """ # some hyper-parameters T = self.T N = self.N V = self.V H = self.H M = self.M D = self.D # place holder features and captions features = self.features # word embedding matrix W_embed = self.params['W_embed'] # parameters for (cnn_features)-to-(initial_hidden) W_proj = self.params['W_proj'] b_proj = self.params['b_proj'] # parameters for input-to-hidden, hidden-to-hidden Wx = self.params['Wx'] Wh = self.params['Wh'] b = self.params['b'] # parameters for hidden-to-vocab W_vocab = self.params['W_vocab'] b_vocab = self.params['b_vocab'] # hyper parameters used in some function call hyper_param = { 'n_time_step': T, 'batch_size': N, 'n_time_step': T, 'dim_hidden': H, 'vocab_size': V } # generate initial hidden state h = affine_forward(features, W_proj, b_proj) # (N, H) c = 0 sampled_idxs = [] for t in range(T): # embed previous generatd word if t == 0: x = tf.zeros([N, M]) else: x = word_embedding_forward(prev_word, W_embed) # (N, 1, M) x = tf.reshape(x, [N, M]) # RNN or LSTM step if self.cell_type == 'rnn': h = rnn_step_forward(x, h, Wx, Wh, b) else: h, c = lstm_step_forward(x, h, c, Wx, Wh, b) # hidden-to-vocab out = affine_forward(h, W_vocab, b_vocab) # select word where probability is highest word_idx = tf.argmax(out, 1) # (N, ) sampled_idxs.append(word_idx) prev_word = tf.reshape(word_idx, [N, 1]) sampled_idxs = tf.pack(sampled_idxs) # (T, N) sampled_idxs = tf.transpose(sampled_idxs, (1, 0)) # (N, T) return sampled_idxs
def prediction(logits): return tf.pack( [tf.nn.softmax(logits[i]) for i in classifier_matrix()] )