def __init__(self, shape, lambda1 = 0.1, lambda2 = 0.1, mu = 0.1): """Initialize the ChanVese segmenter Arguments: shape (required) -- size of the image to segment lambda1 (default : 0.1) -- The cost of labeling pixels type 1 (check the Class docstring). This argument (as well as lambda2) can be used if the segmentation should be biased in one direction or the other. It's not deterministic what bits of the image get labeled with either lambda though -- this (as well as lambda2) will likely be a bit of a guess and check parameter. lambda2 (default : 0.1) -- The cost of labeling pixels type 2 (check the Class docstring) mu (default : 0.1) -- This is the cost of having a boundary. A higher value will mean less boundaries """ xs = range(3) ys = range(3) Xs, Ys = numpy.meshgrid(xs, ys) Rs = numpy.sqrt((Xs - 1.0)**2 + (Ys - 1.0)**2) kernelBlurCpu = numpy.exp(-Rs / (2.0 * 0.75**2)).astype('float32') kernelBlurCpu /= numpy.linalg.norm(kernelBlurCpu.flatten()) self.kernel = tf.constant(kernelBlurCpu.reshape([3, 3, 1, 1])) self.I = tf.Variable(tf.truncated_normal(shape = [1, shape[0], shape[1], 1], mean = 0.0, stddev = 0.1)) self.u1 = tf.Variable(1.0) self.u2 = tf.Variable(-1.0) self.G = tf.placeholder(tf.float32, shape = shape) self.Gv = tf.Variable(numpy.zeros([1, shape[0], shape[1], 1]).astype('float32')) self.initialize = self.Gv.assign(tf.reshape(self.G, shape = [1, shape[0], shape[1], 1])) self.initialize2 = self.I.assign(tf.reshape(self.G, shape = [1, shape[0], shape[1], 1])) self.blur = tf.nn.conv2d(self.I, self.kernel, strides = [1, 1, 1, 1], padding = 'SAME') self.Gv = tf.Variable(numpy.zeros([1, shape[0], shape[1], 1]).astype('float32')) self.u1m = tf.abs(self.blur - self.u1) self.u2m = tf.abs(self.blur - self.u2) ones = numpy.ones((1, shape[0], shape[1], 1)).astype('float32') zeros = numpy.zeros((1, shape[0], shape[1], 1)).astype('float32') self.lambda1 = lambda1 self.lambda2 = lambda2 self.mu = mu eta = 0.1 self.conv = eta / (numpy.pi * (eta**2 + self.blur**2)) self.u1t = self.lambda1 * tf.reduce_sum(tf.select(self.u2m > self.u1m, (self.Gv - self.u1)**2, zeros)) self.u2t = self.lambda2 * tf.reduce_sum(tf.select(self.u2m <= self.u1m, (self.Gv - self.u2)**2, zeros)) self.edgeLoss = self.mu * tf.reduce_sum(tf.abs(self.conv)) self.loss = self.u1t + self.u2t + self.edgeLoss self.shape = shape self.train_step = tf.train.AdamOptimizer(1.0e-1).minimize(self.loss, var_list = [self.I, self.u1, self.u2])
def loss_estimate(batch_size,old_state,data,total_data,model_params,base_mean,base_log_var): clipped_log_vals, nan_mask, reset_rows = data zeros = tf.zeros_like(clipped_log_vals) state_init = model_params.init_state(batch_size) data_count = tf.reduce_sum(tf.to_float(tf.logical_not(nan_mask)),name='data_count') model_input = tf.select(nan_mask,zeros,clipped_log_vals) target_outputs = model_input sample_params = model_params.sample_vals(batch_size) #TODO verify significance of old_state filtered_state = tf.select(reset_rows,old_state,state_init) new_state,delta_mean = sample_inference(filtered_state,model_input,sample_params) variance = tf.exp(base_log_var) mean = base_mean + delta_mean * variance raw_losses = gaussian_neg_log_likelyhood(target_outputs,mean,variance) clean_raw_losses = tf.select(nan_mask,zeros,raw_losses) raw_loss = tf.reduce_sum(clean_raw_losses) kl_divergence = model_params.get_divergence() loss_estimate = raw_loss * (total_data / data_count) + kl_divergence return loss_estimate,new_state,kl_divergence
def build_mh_update(self): with tf.name_scope("gold_model"): self.joint_density_gold = self.joint_density(**self.symbols_gold) with tf.name_scope("proposed_model"): self.joint_density_proposed = self.joint_density(**self.symbols_proposed) with tf.name_scope("mh_updates"): self.mh_ratio = self.joint_density_proposed - self.joint_density_gold self.uniform = tf.placeholder(dtype=tf.float32, name="u") log_uniform = tf.log(self.uniform) self.accepted = log_uniform < self.mh_ratio update_ops = [] for name, latent in self.latents.items(): next_val = tf.select(self.accepted, latent["proposed"], latent["gold"]) update_ops.append(latent["gold"].assign(next_val)) self.step_counter = tf.Variable(0) self.accept_counter = tf.Variable(0) self.accept_rate = tf.to_double(self.accept_counter) / tf.to_double(self.step_counter) update_ops.append(self.step_counter.assign_add(1)) update_ops.append(self.accept_counter.assign_add(tf.select(self.accepted, 1, 0))) self.global_update = tf.group(*update_ops) return self.global_update
def evaluate_precision_recall( input_layer, labels, threshold=0.5, per_example_weights=None, name=PROVIDED, phase=Phase.train ): """Computes the precision and recall of the prediction vs the labels. Args: input_layer: A Pretty Tensor object. labels: The target labels to learn as a float tensor. threshold: The threshold to use to decide if the prediction is true. per_example_weights: A Tensor with a weight per example. name: An optional name. phase: The phase of this model; non training phases compute a total across all examples. Returns: Precision and Recall. """ _ = name # Eliminate warning, name used for namescoping by PT. selected, sum_retrieved, sum_relevant = _compute_precision_recall( input_layer, labels, threshold, per_example_weights ) if phase != Phase.train: dtype = tf.float32 # Create the variables in all cases so that the load logic is easier. relevant_count = tf.get_variable( "relevant_count", [], dtype, tf.zeros_initializer, collections=[bookkeeper.GraphKeys.TEST_VARIABLES], trainable=False, ) retrieved_count = tf.get_variable( "retrieved_count", [], dtype, tf.zeros_initializer, collections=[bookkeeper.GraphKeys.TEST_VARIABLES], trainable=False, ) selected_count = tf.get_variable( "selected_count", [], dtype, tf.zeros_initializer, collections=[bookkeeper.GraphKeys.TEST_VARIABLES], trainable=False, ) with input_layer.g.device(selected_count.device): selected = tf.assign_add(selected_count, selected) with input_layer.g.device(retrieved_count.device): sum_retrieved = tf.assign_add(retrieved_count, sum_retrieved) with input_layer.g.device(relevant_count.device): sum_relevant = tf.assign_add(relevant_count, sum_relevant) return ( tf.select(tf.equal(sum_retrieved, 0), tf.zeros_like(selected), selected / sum_retrieved), tf.select(tf.equal(sum_relevant, 0), tf.zeros_like(selected), selected / sum_relevant), )
def _loss_x_entropy(self, x, z, noise=None): with tf.name_scope("xentropy_loss"): z_clipped = tf.clip_by_value(z, FLAGS.zero_bound, FLAGS.one_bound) z_minus_1_clipped = tf.clip_by_value((1.0 - z), FLAGS.zero_bound, FLAGS.one_bound) x_clipped = tf.clip_by_value(x, FLAGS.zero_bound, FLAGS.one_bound) x_minus_1_clipped = tf.clip_by_value((1.0 - x), FLAGS.zero_bound, FLAGS.one_bound) # cross_entropy = x * log(z) + (1 - x) * log(1 - z) cross_entropy = tf.add(tf.mul(tf.log(z_clipped), x_clipped), tf.mul(tf.log(z_minus_1_clipped), x_minus_1_clipped), name='X-Entr') if noise: with tf.name_scope("Given_Emphasis"): a, b = self._get_emph_params corrupted = tf.select(noise, cross_entropy, tf.zeros_like(cross_entropy), name='Corrupted_Emphasis') # OR -- tf.select(tf.logical_not(noisy_points), cross_entropy, tf.zeros_like(cross_entropy), name='Uncorrupted_Emphasis') uncorrupted = tf.select(noise, tf.zeros_like(cross_entropy), cross_entropy, name='Uncorrupted_Emphasis') loss = a * (-1 * tf.reduce_sum(corrupted, 1)) + b * (-1 * tf.reduce_sum(uncorrupted, 1)) else: # Sum the cost for each example loss = -1 * tf.reduce_sum(cross_entropy, 1) # Reduce mean to find the overall cost of the loss cross_entropy_mean = tf.reduce_mean(loss, name='xentropy_mean') return cross_entropy_mean
def UpdateProbs(self, inp): """Update probabilities of each particle based on 2D matrix inp which is a 2D perspectiuve projection of the scene""" projection, onscreen = self.project() filtered_projection = tf.to_int64(tf.select(onscreen, projection, tf.zeros_like(projection))) per_state_probabilities = tf.gather_nd(inp, filtered_projection) filtered_probabilities = tf.select(onscreen, per_state_probabilities, tf.zeros_like(per_state_probabilities)) new_state_indicies = tf.squeeze(tf.multinomial(tf.expand_dims(tf.log(filtered_probabilities),0), self.particles/10*9)) new_state = tf.gather(self.state, new_state_indicies) # Add momentum new_state = tf.concat(1, [new_state[:, 0:3] + new_state[:, 3:6], new_state[:, 3:10]]) # Add in particles for the "just come onscreen" case. new_state = tf.concat(0, [new_state, tf.random_normal([self.particles/10, 10]) * self.initial_std + self.initial_bias]) new_state = new_state + tf.random_normal([self.particles, 10]) * self.update_std # Todo: permute state by adding noise. return self.state.assign(new_state)
def updatesome(): if reverse: return tf.select( tf.greater_equal(time, max_sequence_length-lengths), new_state, old_state) else: return tf.select(tf.less(time, lengths), new_state, old_state)
def testShapeMismatch(self): c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2) x = np.random.rand(1, 3, 2) * 100 y = np.random.rand(2, 5, 3) * 100 for t in [np.float32, np.float64, np.int32, np.int64, np.complex64]: xt = x.astype(t) yt = y.astype(t) with self.assertRaises(ValueError): tf.select(c, xt, yt)
def _copy_some_through(new_output, new_alpha, new_attn_ids, new_lmbdas, new_state): # Use broadcasting select to determine which values should get # the previous state & zero output, and which values should get # a calculated state & output. # Alpha needs to be (batch, tasks, k) copy_cond = (time >= sequence_length) return ([tf.select(copy_cond, zero_output, new_output), tf.select(copy_cond, zero_alpha, new_alpha), # (batch, tasks, k) tf.select(copy_cond, zero_attn_ids, new_attn_ids), tf.select(copy_cond, zero_lmbdas, new_lmbdas)] + [tf.select(copy_cond, old_s, new_s) for (old_s, new_s) in zip(state, new_state)])
def _lcod(x, w_e, w_s, thresh, T): """ Learned Coordinate Descent (LCoD). LCoD is an approximately sparse encoder. It approximates (in an L2 sense) a sparse code of `x` according to dictionary `w_e`. Note that during backpropagation, `w_e` isn't strictly a dictionary (i.e. dictionary atoms are not strictly normalized). LCoD is a differentiable version of greedy coordinate descent. Args: x: [n, n_f] tensor w_e: [n_f, n_c] encoder tensor w_s: [n_c, n_f] mutual inhibition tensor thresh: soft thresold T: number of iterations Returns: z: LCoD output """ with tf.name_scope('itr_00'): b = tf.matmul(x, w_e, name='b') z = tf.zeros_like(b, dtype=tf.float32, name='z') for t in range(1, T): with tf.name_scope('itr_%02d' % t): z_bar = _st(b, thresh, name='z_bar') with tf.name_scope('greedy_heuristic'): # no tf.tile b/c tf.select will brodcast? if t > 1: z_diff = tf.sub(z_bar, z, name='z_diff') else: z_diff = z_bar abs_z_diff = tf.abs(z_diff, name='abs_z_diff') tmp = tf.reduce_max(abs_z_diff, 1, True) tmp2 = tf.equal(abs_z_diff, tmp) e = tf.select(tmp2, z_diff, tf.zeros_like(z_bar, dtype=tf.float32), name='e') ks = tf.argmax(abs_z_diff, 1, name='ks') with tf.name_scope('update_b'): s_slices = tf.gather(w_s, ks, name='s_slices') b = tf.add(b, tf.mul(e, s_slices), name='b') with tf.name_scope('update_z'): z = tf.select(tmp2, z_bar, z, name='z') with tf.name_scope('itr_%02d' % T): z = _st(b, thresh, name='z') return z
def _corrupt(self, x, ratio, n_type='MN'): with tf.name_scope("Corruption"): """ Noise adding (or input corruption) This function adds noise to the given data. Args: x : The input data for the noise to be applied ratio: The percentage of the data affected by the noise addition n_type: The type of noise to be applied. Choices: MN (masking noise), SP (salt-and-pepper noise) """ # Safety check. If unspecified noise type given, use Masking noise instead. if n_type != 'MN' and n_type != 'SP' and n_type != 'TFDO': n_type = 'MN' print("Unknown noise type. Masking noise will be used instead.") # if there is no noise to be added there is no need to proceed further if ratio == 0.0: return x_tilde, None if n_type == 'TFDO': x_tilde = tf.nn.dropout(x, keep_prob= 1 - ratio) # points_to_alter = x_tilde == 0. # print points_to_alter # x_tilde = tf.select(points_to_alter, tf.add(tf.zeros_like(x_tilde, dtype=tf.float32), # FLAGS.zero_bound), x_tilde, name='X_tilde') # x_tilde[x_tilde == 0.] = tf.constant(FLAGS.zero_bound) else: # It makes a copy of the data, otherwise 'target_feed' will also be affected x_tilde = tf.identity(x, name='X_tilde') shape = tf.Tensor.get_shape(x_tilde) # Creating and applying random noise to the data. (Masking noise) points_to_alter = tf.random_uniform(shape=shape, dtype=tf.float32) < ratio if n_type == 'MN': x_tilde = tf.select(points_to_alter, tf.add(tf.zeros_like(x_tilde, dtype=tf.float32), FLAGS.zero_bound), x_tilde, name='X_tilde') elif n_type == 'SP': coin_flip = np.asarray([np.random.choice([FLAGS.zero_bound, FLAGS.one_bound]) for _ in range(shape[0]) for _ in range(shape[1])]).reshape(shape) x_tilde = tf.select(points_to_alter, tf.to_float(coin_flip), x_tilde, name='X_tilde') # Also returns the 'points_to_alter' in case of applied Emphasis if not FLAGS.emphasis or n_type == 'TFDO': points_to_alter = None return x_tilde, points_to_alter
def huber_loss(y_true, y_pred, clip_value): # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b # for details. assert clip_value > 0. x = y_true - y_pred if np.isinf(clip_value): # Spacial case for infinity since Tensorflow does have problems # if we compare `K.abs(x) < np.inf`. return .5 * K.square(x) condition = K.abs(x) < clip_value squared_loss = .5 * K.square(x) linear_loss = clip_value * (K.abs(x) - .5 * clip_value) if K.backend() == 'tensorflow': import tensorflow as tf if hasattr(tf, 'select'): return tf.select(condition, squared_loss, linear_loss) # condition, true, false else: return tf.where(condition, squared_loss, linear_loss) # condition, true, false elif K.backend() == 'theano': from theano import tensor as T return T.switch(condition, squared_loss, linear_loss) else: raise RuntimeError('Unknown backend "{}".'.format(K.backend()))
def noisy_activation(x, generic, linearized, training, alpha=1.1, c=0.5): """ Implements the noisy activation with Half-Normal Noise for Hard-Saturation functions. See http://arxiv.org/abs/1603.00391, Algorithm 1. Args: x: Tensor which is an input to the activation function generic: The generic formulation of the activation function. (denoted as h in the paper) linearized: Linearization of the activation based on the first-order Tailor expansion around zero. (denoted as u in the paper) training: A boolean tensor telling whether we are in the training stage (and the noise is sampled) or in runtime when the expactation is used instead. alpha: Mixing hyper-parameter. The leakage rate from the linearized function to the nonlinear one. c: Standard deviation of the sampled noise. """ delta = generic(x) - linearized(x) d = -tf.sign(x) * tf.sign(1 - alpha) p = tf.Variable(1.0) scale = c * (tf.sigmoid(p * delta) - 0.5) ** 2 noise = tf.select(training, tf.abs(tf.random_normal([])), math.sqrt(2 / math.pi)) activation = alpha * generic(x) + (1 - alpha) * linearized(x) + d * scale * noise return activation
def _compare(self, c, x, y, use_gpu): np_ans = np.where(c, x, y) with self.test_session(use_gpu=use_gpu): out = tf.select(c, x, y) tf_ans = out.eval() self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, out)
def _build_graph(self, inputs, is_training): state, action, reward, next_state, isOver = inputs self.predict_value = self._get_DQN_prediction(state, is_training) action_onehot = tf.one_hot(action, NUM_ACTIONS) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') add_moving_summary(max_pred_reward) self.greedy_choice = tf.argmax(self.predict_value, 1) # N, with tf.variable_scope('target'): targetQ_predict_value = self._get_DQN_prediction(next_state, False) # NxA # DQN #best_v = tf.reduce_max(targetQ_predict_value, 1) # N, # Double-DQN predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v) sqrcost = tf.square(target - pred_action_value) abscost = tf.abs(target - pred_action_value) # robust error func cost = tf.select(abscost < 1, sqrcost, abscost) summary.add_param_summary([('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms']) ]) # monitor all W self.cost = tf.reduce_mean(cost, name='cost')
def proximal_step(train_op, lr): # Apply weight decay for the variables with l2 loss # If basenet weights are trained together, do not set a weight decay on the # conv layers of the basenet l2_op_list = [] l1_op_list = [] with tf.control_dependencies([train_op]): if L2_LOSS_WEIGHT > 0: for var in tf.get_collection(utils.WEIGHT_DECAY_KEY): assign_op = var.assign_add(- lr * tf.convert_to_tensor(L2_LOSS_WEIGHT) * var) l2_op_list.append(assign_op) print('\tL2 loss added: %s(strength: %f)' % (var.name, L2_LOSS_WEIGHT)) # Apply proximal gradient for the variables with l1 lasso loss # Non-negative weights constraint if L1_LOSS_WEIGHT > 0: for var in tf.get_collection(utils.LASSO_KEY): th_t = tf.fill(tf.shape(var), tf.convert_to_tensor(L1_LOSS_WEIGHT) * lr) zero_t = tf.zeros(tf.shape(var)) var_temp = var - th_t * tf.sign(var) assign_op = var.assign(tf.select(tf.less(var, th_t), zero_t, var_temp)) l1_op_list.append(assign_op) print('\tL1 loss added: %s(strength: %f)' % (var.name, L1_LOSS_WEIGHT)) with tf.control_dependencies(l2_op_list + l1_op_list): train_op = tf.no_op(name='proximal_step') return train_op
def con1(): values, top_nodes = top_k_in_2dim_tensor(value_mat, cur_beam_size) beam_new = self._concat_top_nodes(beams, top_nodes) beam_size_tmp = tf.mul(cur_beam_size, self.output_size) beam_size = tf.select(tf.less(beam_size_tmp, self.beam_size), beam_size_tmp, self.beam_size) return values, beam_new, beam_size
def get_total_loss(input_sequence, ngram_predictions, outputs, expected_sequence): if args.bootstrap_out: outputs = tf.add(outputs, tf.log(ngram_predictions)) # [batch_size, unrolled_iterations] losses = tf.nn.sparse_softmax_cross_entropy_with_logits(outputs, expected_sequence) losses = tf.select(tf.equal(input_sequence, data.EOS), tf.zeros_like(losses), losses) return tf.reduce_sum(losses)
def set_logp_to_neg_inf(X, logp, bounds): """Set `logp` to negative infinity when `X` is outside the allowed bounds. # Arguments X: tensorflow.Tensor The variable to apply the bounds to logp: tensorflow.Tensor The log probability corrosponding to `X` bounds: list of `Region` objects The regions corrosponding to allowed regions of `X` # Returns logp: tensorflow.Tensor The newly bounded log probability """ conditions = [] for l, u in bounds: lower_is_neg_inf = not isinstance(l, tf.Tensor) and np.isneginf(l) upper_is_pos_inf = not isinstance(u, tf.Tensor) and np.isposinf(u) if not lower_is_neg_inf and upper_is_pos_inf: conditions.append(tf.greater(X, l)) elif lower_is_neg_inf and not upper_is_pos_inf: conditions.append(tf.less(X, u)) elif not (lower_is_neg_inf or upper_is_pos_inf): conditions.append(tf.logical_and(tf.greater(X, l), tf.less(X, u))) if len(conditions) > 0: is_inside_bounds = conditions[0] for condition in conditions[1:]: is_inside_bounds = tf.logical_or(is_inside_bounds, condition) logp = tf.select(is_inside_bounds, logp, tf.fill(tf.shape(X), config.dtype(-np.inf))) return logp
def _create_state(self, batch_size, dtype, cell_state=None): cand_symbols = tf.fill([batch_size, self.max_len], tf.constant(self.start_token, dtype=tf.int32)) cand_logprobs = tf.ones((batch_size,), dtype=tf.float32) * -float('inf') cand_symbols.set_shape([batch_size, self.max_len]) if cell_state is None: cell_state = self.cell.zero_state(batch_size*self.beam_size, dtype=dtype) else: cell_state = BeamDecoder._tile_along_beam(self.beam_size, cell_state) full_size = batch_size * self.beam_size first_in_beam_mask = tf.equal(tf.range(full_size) % self.beam_size, 0) beam_symbols = tf.fill([full_size, self.max_len], tf.constant(self.start_token, dtype=tf.int32)) beam_logprobs = tf.select( first_in_beam_mask, tf.fill([full_size], 0.0), tf.fill([full_size], -1e18), # top_k does not play well with -inf # TODO: dtype-dependent value here ) return ( cand_symbols, cand_logprobs, beam_symbols, beam_logprobs, cell_state )
def compute_max_or_min(self, select, maxi=True): #computes the argmax and argmin of a column with probabilistic row selection answer = tf.zeros([ self.batch_size, self.num_cols + self.num_word_cols, self.max_elements ], self.data_type) sum_prob = tf.zeros([self.batch_size, self.num_cols + self.num_word_cols], self.data_type) for j in range(self.max_elements): if (maxi): curr_pos = j else: curr_pos = self.max_elements - 1 - j select_index = tf.slice(self.full_processed_sorted_index_column, [0, 0, curr_pos], [self.batch_size, -1, 1]) select_mask = tf.equal( tf.tile( tf.expand_dims( tf.tile( tf.expand_dims(tf.range(self.max_elements), 0), [self.batch_size, 1]), 1), [1, self.num_cols + self.num_word_cols, 1]), select_index) curr_prob = tf.expand_dims(select, 1) * tf.cast( select_mask, self.data_type) * self.select_bad_number_mask curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2) curr_prob = curr_prob * tf.expand_dims( tf.cast((1 - sum_prob) > 0.0, self.data_type), 2) answer = tf.select(select_mask, curr_prob, answer) sum_prob += tf.reduce_sum(curr_prob, 2) return answer
def reduce_mean(seq_batch, allow_empty=False): """Compute the mean of each sequence in a SequenceBatch. Args: seq_batch (SequenceBatch): a SequenceBatch with the following attributes: values (Tensor): a Tensor of shape (batch_size, seq_length, :, ..., :) mask (Tensor): if the mask values are arbitrary floats (rather than binary), the mean will be a weighted average. allow_empty (bool): allow computing the average of an empty sequence. In this case, we assume 0/0 == 0, rather than NaN. Default is False, causing an error to be thrown. Returns: Tensor: of shape (batch_size, :, ..., :) """ values, mask = seq_batch.values, seq_batch.mask # compute weights for the average sums = tf.reduce_sum(mask, 1, keep_dims=True) # (batch_size, 1) if allow_empty: asserts = [] # no assertion sums = tf.select(tf.equal(sums, 0), tf.ones(tf.shape(sums)), sums) # replace 0's with 1's else: asserts = [tf.assert_positive(sums)] # throw error if 0's exist with tf.control_dependencies(asserts): weights = mask / sums # (batch_size, seq_length) return weighted_sum(seq_batch, weights)
def _compare(self, c, x, y, use_gpu): np_ans = np.dstack([x_i if c_i else y_i for c_i, x_i, y_i in zip(c, x, y)]).transpose([2, 0, 1]) with self.test_session(use_gpu=use_gpu): out = tf.select(c, x, y) tf_ans = out.eval() self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, out)
def step(x): """Theano step function""" if (_BACKEND == 'tensorflow'): import tensorflow as tf return tf.select(tf.python.math_ops.greater(x, 0), K.ones_like(x), K.zeros_like(x)) else: return K.switch(x > 0, 1, 0)
def compute_ans(op_embedding, comparison): op_embedding = tf.expand_dims(op_embedding, 0) #dot product of operation embedding with hidden state to the left of the number occurence first = tf.transpose( tf.matmul(op_embedding, tf.transpose( tf.reduce_sum(hidden_vectors * tf.tile( tf.expand_dims( tf.transpose(self.batch_ordinal_question), 2), [1, 1, self.utility.FLAGS.embedding_dims]), 0)))) second = self.batch_question_number_one_mask + tf.transpose( tf.matmul(op_embedding, tf.transpose( tf.reduce_sum(hidden_vectors * tf.tile( tf.expand_dims( tf.transpose(self.batch_ordinal_question_one), 2 ), [1, 1, self.utility.FLAGS.embedding_dims]), 0)))) question_number_softmax = tf.nn.softmax(tf.concat(1, [first, second])) if (self.mode == "test"): cond = tf.equal(question_number_softmax, tf.reshape( tf.reduce_max(question_number_softmax, 1), [self.batch_size, 1])) question_number_softmax = tf.select( cond, tf.fill(tf.shape(question_number_softmax), 1.0), tf.fill(tf.shape(question_number_softmax), 0.0)) question_number_softmax = tf.cast(question_number_softmax, self.data_type) ans = tf.reshape( tf.reduce_sum(question_number_softmax * tf.concat( 1, [self.batch_question_number, self.batch_question_number_one]), 1), [self.batch_size, 1]) return ans
def huber_loss(x, delta=1.0): # https://en.wikipedia.org/wiki/Huber_loss return tf.select( tf.abs(x) < delta, tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta) )
def custom_rnn_autodecoder(decoder_inputs, initial_input, initial_state, cell, scope=None): # customized rnn_decoder for the task of dealing with end of character with tf.variable_scope(scope or "rnn_decoder"): states = [initial_state] outputs = [] prev = None for i in xrange(len(decoder_inputs)): inp = decoder_inputs[i] if i > 0: tf.get_variable_scope().reuse_variables() output, new_state = cell(inp, states[-1]) num_batches = self.args.batch_size # new_state.get_shape()[0].value num_state = new_state.get_shape()[1].value # if the input has an end-of-character signal, have to zero out the state #to do: test this code. eoc_detection = inp[:,3] eoc_detection = tf.reshape(eoc_detection, [num_batches, 1]) eoc_detection_state = tfrepeat(eoc_detection, num_state) eoc_detection_state = tf.greater(eoc_detection_state, tf.zeros_like(eoc_detection_state, dtype=tf.float32)) new_state = tf.select(eoc_detection_state, initial_state, new_state) outputs.append(output) states.append(new_state) return outputs, states
def slice_constant(data, batch_size=32, name='constant_data', global_step=None): """Provide a slice based on the global_step. This is useful when the entire data array can be stored in memory because it allows you to feed the data very efficiently. Args: data: A numpy array or tensor. batch_size: The batch size for the produced data. name: An optional name for this data. global_step: A global step variable that is used to read the data. If None then the default prettytensor global_step is used. Returns: A tensor that produces the given data. """ with tf.name_scope(name): all_data = tf.convert_to_tensor(data) global_step = global_step or bookkeeper.global_step() count = len(data) / batch_size extra = len(data) - count * batch_size if extra: offset = tf.mod(global_step, count) return tf.slice(all_data, offset * batch_size, batch_size) else: offset = tf.mod(global_step, count + 1) return tf.slice(all_data, offset * batch_size, tf.select(tf.equal(offset, count), extra, batch_size))
def __init__(self, action1_bounds, action2_bounds, session): self.graph = session.graph with self.graph.as_default(): self.sess = session self.action_bounds = [[action1_bounds[1], action2_bounds[1]], [action1_bounds[0], action2_bounds[0]]] self.action_size = len(self.action_bounds[0]) self.action_input = tf.placeholder(tf.float32, [None, self.action_size]) self.p_max = tf.constant(self.action_bounds[0], dtype=tf.float32) self.p_min = tf.constant(self.action_bounds[1], dtype=tf.float32) self.p_range = tf.constant([x - y for x, y in zip(self.action_bounds[0], self.action_bounds[1])], dtype=tf.float32) self.p_diff_max = tf.div(-self.action_input + self.p_max, self.p_range) self.p_diff_min = tf.div(self.action_input - self.p_min, self.p_range) self.zeros_act_grad_filter = tf.zeros([self.action_size]) self.act_grad = tf.placeholder(tf.float32, [None, self.action_size]) self.grad_inverter = tf.select(tf.greater(self.act_grad, self.zeros_act_grad_filter), tf.mul(self.act_grad, self.p_diff_max), tf.mul(self.act_grad, self.p_diff_min))
def loop(step_, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record): cur_feat_x_ = tf.gather(x, step_) cur_golden_path_ = tf.gather(golden_path, tf.range(step_)) cur_golden_feat_ = self._add_tag_dynamic(cur_feat_x_, cur_golden_path_) # cur_golden_output_ = self._build_cnn(cur_golden_feat_) cur_golden_output_ = build(cur_golden_feat_) cur_golden_node_ = tf.gather(golden_path, tf.reshape(step_, [1])) golden_value_ = tf.add(golden_value_, tf.slice(cur_golden_output_, tf.concat(0, [[0], cur_golden_node_]), [1, 1])) cur_beam_ = tf.unpack(beams_, num=self.beam_size) cur_beam_feat_ = tf.concat(0, [self._add_tag_dynamic(cur_feat_x_, tf.reshape(e, [-1])) for e in cur_beam_]) # cur_beam_output_ = self._build_cnn(cur_beam_feat_) cur_beam_output_ = build(cur_beam_feat_) golden_record = golden_record.write(step_, cur_golden_output_) beam_record = beam_record.write(step_, cur_beam_output_) beam_value_, beams_ = self._top_beams_new(cur_beam_output_, beam_value_, beams_) new_golden_path_ = tf.gather(golden_path, tf.range(step_ + 1)) # golden_beam_id_ = index_of_tensor(new_golden_path_, beams_) g_id_ = index_of_tensor(new_golden_path_, beams_) golden_inside_ = tf.select(tf.less(tf.shape(g_id_)[0], 1), tf.constant(False, tf.bool), tf.constant(True, tf.bool)) step_valid_ = tf.logical_and(tf.less(step_+1, length), tf.less(step_+1, self.max_step_tracked)) return [step_ + 1, beams_, beam_value_, golden_value_, golden_inside_, step_valid_, g_id_, golden_record, beam_record]
def ClippedError(x): # Huber loss return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
def random_binomial(shape, p=0.0, dtype=_FLOATX, seed=None): if seed is None: seed = np.random.randint(10e6) return tf.select(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p, tf.ones(shape), tf.zeros(shape))
def _relu(self, x, leakiness=0.0): """Relu, with optional leaky support.""" return tf.select(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
def beam_loop(self, time, cell_output, cell_state, loop_state): ( past_cand_symbols, # [batch_size, time-1] past_cand_logprobs, # [batch_size] past_beam_symbols, # [batch_size*beam_size, time-1], right-aligned past_beam_logprobs, # [batch_size*beam_size] ) = loop_state # We don't actually use this, but emit_output is required to match the # cell output size specfication. Otherwise we would leave this as None. emit_output = cell_output # 1. Get scores for all candidate sequences logprobs = self.outputs_to_score_fn(cell_output) try: num_classes = int(logprobs.get_shape()[-1]) except: # Shape inference failed num_classes = tf.shape(logprobs)[-1] logprobs_batched = tf.reshape( logprobs + tf.expand_dims( tf.reshape(past_beam_logprobs, [self.batch_size, self.beam_size]), 2), [self.batch_size, self.beam_size * num_classes]) # 2. Determine which states to pass to next iteration # TODO(nikita): consider using slice+fill+concat instead of adding a mask nondone_mask = tf.reshape( tf.cast(tf.equal(tf.range(num_classes), self.stop_token), tf.float32) * self.INVALID_SCORE, [1, 1, num_classes]) nondone_mask = tf.reshape( tf.tile(nondone_mask, [1, self.beam_size, 1]), [-1, self.beam_size * num_classes]) # disable all done pathes beam_logprobs, indices = tf.nn.top_k(logprobs_batched + nondone_mask, self.beam_size) beam_logprobs = tf.reshape(beam_logprobs, [-1]) # For continuing to the next symbols # TODO (add condition, only those in top K excludes that ends with end token should continue) symbols = indices % num_classes # [batch_size, self.beam_size] parent_refs = indices // num_classes # [batch_size, self.beam_size] symbols_history = flat_batch_gather(past_beam_symbols, parent_refs, batch_size=self.batch_size, options_size=self.beam_size) beam_symbols = concat_op( [symbols_history, tf.reshape(symbols, [-1, 1])], 1) # Handle the output and the cell state shuffling next_cell_state = nest_map( lambda element: batch_gather(element, parent_refs, batch_size=self.batch_size, options_size=self.beam_size), cell_state) next_input = self.tokens_to_inputs_fn( tf.reshape(symbols, [-1, self.beam_size])) # 3. Update the candidate pool to include entries that just ended with a stop token # TODO( They don't care whether the stop token is in top K, it could potentially include partial captions ) logprobs_done = tf.reshape( logprobs_batched, [-1, self.beam_size, num_classes])[:, :, self.stop_token] done_parent_refs = tf.argmax(logprobs_done, 1) done_symbols = flat_batch_gather(past_beam_symbols, done_parent_refs, batch_size=self.batch_size, options_size=self.beam_size) logprobs_done_max = tf.reduce_max(logprobs_done, 1) cand_symbols_unpadded = tf.select( logprobs_done_max > past_cand_logprobs, done_symbols, past_cand_symbols) cand_logprobs = tf.maximum(logprobs_done_max, past_cand_logprobs) cand_symbols = concat_op([ cand_symbols_unpadded, tf.fill([self.batch_size, 1], self.stop_token) ], 1) # 4. Check the stopping criteria if self.max_len is not None: elements_finished_clip = (time >= self.max_len) if self.score_upper_bound is not None: elements_finished_bound = tf.reduce_max( tf.reshape(beam_logprobs, [-1, self.beam_size]), 1) < (cand_logprobs - self.score_upper_bound) if self.max_len is not None and self.score_upper_bound is not None: elements_finished = elements_finished_clip | elements_finished_bound elif self.score_upper_bound is not None: elements_finished = elements_finished_bound elif self.max_len is not None: # this broadcasts elements_finished_clip to the correct shape elements_finished = tf.zeros( [self.batch_size], dtype=tf.bool) | elements_finished_clip else: assert False, "Lack of stopping criterion should have been caught in constructor" # 5. Prepare return values # While loops require strict shape invariants, so we manually set shapes # in case the automatic shape inference can't calculate these. Even when # this is redundant is has the benefit of helping catch shape bugs. for tensor in list(nest.flatten(next_input)) + list( nest.flatten(next_cell_state)): tensor.set_shape( tf.TensorShape( (self.inferred_batch_size, self.beam_size)).concatenate(tensor.get_shape()[2:])) for tensor in [cand_symbols, cand_logprobs, elements_finished]: tensor.set_shape( tf.TensorShape((self.inferred_batch_size, )).concatenate( tensor.get_shape()[1:])) for tensor in [beam_symbols, beam_logprobs]: tensor.set_shape( tf.TensorShape( (self.inferred_batch_size_times_beam_size, )).concatenate( tensor.get_shape()[1:])) next_loop_state = ( cand_symbols, cand_logprobs, beam_symbols, beam_logprobs, ) return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
def create(model, config): dim_v, dim_i, dim_d, dim_t, dim_b, dim_n, dim_c = config.getint( 'vocabsize'), config.getint('wvecsize'), config.getint( 'depth'), config.getint('steps'), config.getint( 'batch'), config.getint('deepness'), config.getint('classes') lrate_ms, dstep_ms, drate_ms, optim_ms = config.getfloat( 'mslrate'), config.getint('msdstep'), config.getfloat( 'msdrate'), getattr(tf.train, config.get('msoptim')) lrate_ce, dstep_ce, drate_ce, optim_ce = config.getfloat( 'celrate'), config.getint('cedstep'), config.getfloat( 'cedrate'), getattr(tf.train, config.get('ceoptim')) with tf.name_scope('embedding'): model['We'] = tf.Variable(tf.truncated_normal([dim_v, dim_i], stddev=1.0 / dim_i), name='We') model['Be'] = tf.Variable(tf.truncated_normal([1, dim_i], stddev=1.0 / dim_i), name='Be') with tf.name_scope('plstm'): with tf.name_scope('input'): for ii in xrange(dim_t): model['pxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='pxi_%i' % ii) model['px_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['pxi_%i' % ii]), model['Be'], name='px_%i' % ii) with tf.name_scope('label'): for ii in xrange(dim_t): model['pyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='pyi_%i' % ii) model['py_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['pyi_%i' % ii]), model['Be'], name='py_%i' % ii) for i in xrange(dim_d): with tf.name_scope('input_%i' % i): for ii in xrange(dim_t): model['pFx_%i_%i' % (i, ii)] = model['px_%i' % ii] if i == 0 else model['pFh_%i_%i' % (i - 1, ii)] model['pBx_%i_%i' % (i, ii)] = model['px_%i' % ii] if i == 0 else model['pBh_%i_%i' % (i - 1, ii)] with tf.name_scope('inputgate_%i' % i): model['pFWi_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pFWi_%i' % i) model['pFBi_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pFBi_%i' % i) model['pBWi_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pBWi_%i' % i) model['pBBi_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBBi_%i' % i) for ii in xrange(dim_t): model['pFi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['pFx_%i_%i' % (i, ii)], model['pFWi_%i' % i]), model['pFBi_%i' % i]), name='pFi_%i_%i' % (i, ii)) model['pBi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['pBx_%i_%i' % (i, ii)], model['pBWi_%i' % i]), model['pBBi_%i' % i]), name='pBi_%i_%i' % (i, ii)) with tf.name_scope('forgetgate_%i' % i): model['pFWf_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pFWf_%i' % i) model['pFBf_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pFBf_%i' % i) model['pBWf_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pBWf_%i' % i) model['pBBf_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBBf_%i' % i) for ii in xrange(dim_t): model['pFf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['pFx_%i_%i' % (i, ii)], model['pFWf_%i' % i]), model['pFBf_%i' % i]), name='pFf_%i_%i' % (i, ii)) model['pBf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['pBx_%i_%i' % (i, ii)], model['pBWf_%i' % i]), model['pBBf_%i' % i]), name='pBf_%i_%i' % (i, ii)) with tf.name_scope('outputgate_%i' % i): model['pFWo_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pFWo_%i' % i) model['pFBo_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pFBo_%i' % i) model['pBWo_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pBWo_%i' % i) model['pBBo_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBBo_%i' % i) for ii in xrange(dim_t): model['pFo_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['pFx_%i_%i' % (i, ii)], model['pFWo_%i' % i]), model['pFBo_%i' % i]), name='pFo_%i_%i' % (i, ii)) model['pBo_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['pBx_%i_%i' % (i, ii)], model['pBWo_%i' % i]), model['pBBo_%i' % i]), name='pBo_%i_%i' % (i, ii)) with tf.name_scope('cellstate_%i' % i): model['pFWc_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pFWc_' + str(i)) model['pFBc_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pFBc_' + str(i)) model['pBWc_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pBWc_' + str(i)) model['pBBc_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBBc_' + str(i)) for ii in xrange(dim_t): model['pFcc_%i_%i' % (i, ii)] = tf.Variable( tf.truncated_normal([dim_b, dim_i], stddev=1.0 / dim_i), name='pFcc_%i_%i' % (i, ii)) if ii == 0 else model[ 'pFc_%i_%i' % (i, ii - 1)] # consider starting with all zeros model['pFc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['pFcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['pFf_%i_%i' % (i, ii)], model['pFcc_%i_%i' % (i, ii)]), tf.mul( model['pFi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['pFx_%i_%i' % (i, ii)], model['pFWc_%i' % i]), model['pFBc_%i' % i])))), name='pFc_%i_%i' % (i, ii)) for ii in reversed(xrange(dim_t)): model['pBcc_%i_%i' % (i, ii)] = tf.Variable( tf.truncated_normal([dim_b, dim_i], stddev=1.0 / dim_i), name='pBcc_%i_%i' % (i, ii)) if ii == dim_t - 1 else model[ 'pBc_%i_%i' % (i, ii + 1)] # consider starting with all zeros model['pBc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['pBcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['pBf_%i_%i' % (i, ii)], model['pBcc_%i_%i' % (i, ii)]), tf.mul( model['pBi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['pBx_%i_%i' % (i, ii)], model['pBWc_%i' % i]), model['pBBc_%i' % i])))), name='pBc_%i_%i' % (i, ii)) with tf.name_scope('hidden_%i' % i): model['pFWz_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pFWz_%i' % i) model['pFBz_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pFBz_%i' % i) model['pBWz_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pBWz_%i' % i) model['pBBz_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBBz_%i' % i) for ii in xrange(dim_t): model['pFz_%i_%i' % (i, ii)] = tf.add( tf.matmul(model['pFc_%i_%i' % (i, ii)], model['pFWz_%i' % i]), model['pFBz_%i' % i], name='pFz_%i_%i' % (i, ii)) model['pBz_%i_%i' % (i, ii)] = tf.add( tf.matmul(model['pBc_%i_%i' % (i, ii)], model['pBWz_%i' % i]), model['pBBz_%i' % i], name='pBz_%i_%i' % (i, ii)) with tf.name_scope('output_%i' % i): for ii in xrange(dim_t): model['pFh_%i_%i' % (i, ii)] = tf.mul( model['pFo_%i_%i' % (i, ii)], tf.nn.tanh(model['pFz_%i_%i' % (i, ii)]), name='pFh_%i_%i' % (i, ii)) model['pBh_%i_%i' % (i, ii)] = tf.mul( model['pBo_%i_%i' % (i, ii)], tf.nn.tanh(model['pBz_%i_%i' % (i, ii)]), name='pBh_%i_%i' % (i, ii)) model['pFh_%i_%i' % (dim_d - 1, -1)] = tf.zeros([dim_b, dim_i], tf.float32) model['pBh_%i_%i' % (dim_d - 1, dim_t)] = tf.zeros( [dim_b, dim_i], tf.float32) with tf.name_scope('output'): for ii in xrange(dim_t): model['pFh_%i' % ii] = tf.select(tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['pFh_%i_%i' % (dim_d - 1, ii - 1)], model['pFh_%i_%i' % (dim_d - 1, ii)], name='pFh_%i' % ii) model['pBh_%i' % ii] = tf.select(tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['pBh_%i_%i' % (dim_d - 1, ii + 1)], model['pBh_%i_%i' % (dim_d - 1, ii)], name='pBh_%i' % ii) with tf.name_scope('meansquared'): for ii in xrange(dim_t): model['pFms_%i' % ii] = tf.select(tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['py_%i' % ii], model['pFh_%i' % ii])), [1]), name='pFms_%i' % ii) model['pFms'] = tf.reduce_sum(tf.add_n( [model['pFms_%i' % ii] for ii in xrange(dim_t)]), name='pFms') model['sp+ms'] = tf.scalar_summary(model['pFms'].name, model['pFms']) for ii in xrange(dim_t): model['pBms_%i' % ii] = tf.select(tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['py_%i' % ii], model['pBh_%i' % ii])), [1]), name='pBms_%i' % ii) model['pBms'] = tf.reduce_sum(tf.add_n( [model['pBms_%i' % ii] for ii in xrange(dim_t)]), name='pBms') model['sp-ms'] = tf.scalar_summary(model['pBms'].name, model['pBms']) with tf.name_scope('hlstm'): with tf.name_scope('input'): for ii in xrange(dim_t): model['hxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='hxi_%i' % ii) model['hx_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['hxi_%i' % ii]), model['Be'], name='hx_%i' % ii) with tf.name_scope('label'): for ii in xrange(dim_t): model['hyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='hyi_%i' % ii) model['hy_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['hyi_%i' % ii]), model['Be'], name='hy_%i' % ii) for i in xrange(dim_d): with tf.name_scope('input_%i' % i): for ii in xrange(dim_t): model['hFx_%i_%i' % (i, ii)] = model['hx_%i' % ii] if i == 0 else model['hFh_%i_%i' % (i - 1, ii)] model['hBx_%i_%i' % (i, ii)] = model['hx_%i' % ii] if i == 0 else model['hBh_%i_%i' % (i - 1, ii)] with tf.name_scope('inputgate_%i' % i): model['hFWi_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hFWi_%i' % i) model['hFBi_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hFBi_%i' % i) model['hBWi_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hBWi_%i' % i) model['hBBi_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBBi_%i' % i) for ii in xrange(dim_t): model['hFi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hFx_%i_%i' % (i, ii)], model['hFWi_%i' % i]), model['hFBi_%i' % i]), name='hFi_%i_%i' % (i, ii)) model['hBi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hBx_%i_%i' % (i, ii)], model['hBWi_%i' % i]), model['hBBi_%i' % i]), name='hBi_%i_%i' % (i, ii)) with tf.name_scope('forgetgate_%i' % i): model['hFWf_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hFWf_%i' % i) model['hFBf_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hFBf_%i' % i) model['hBWf_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hBWf_%i' % i) model['hBBf_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBBf_%i' % i) for ii in xrange(dim_t): model['hFf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hFx_%i_%i' % (i, ii)], model['hFWf_%i' % i]), model['hFBf_%i' % i]), name='hFf_%i_%i' % (i, ii)) model['hBf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hBx_%i_%i' % (i, ii)], model['hBWf_%i' % i]), model['hBBf_%i' % i]), name='hBf_%i_%i' % (i, ii)) with tf.name_scope('outputgate_%i' % i): model['hFWo_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hFWo_%i' % i) model['hFBo_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hFBo_%i' % i) model['hBWo_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hBWo_%i' % i) model['hBBo_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBBo_%i' % i) for ii in xrange(dim_t): model['hFo_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hFx_%i_%i' % (i, ii)], model['hFWo_%i' % i]), model['hFBo_%i' % i]), name='hFo_%i_%i' % (i, ii)) model['hBo_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hBx_%i_%i' % (i, ii)], model['hBWo_%i' % i]), model['hBBo_%i' % i]), name='hBo_%i_%i' % (i, ii)) with tf.name_scope('cellstate_%i' % i): model['hFWc_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hFWc_' + str(i)) model['hFBc_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hFBc_' + str(i)) model['hBWc_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hBWc_' + str(i)) model['hBBc_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBBc_' + str(i)) for ii in xrange(dim_t): model['hFcc_%i_%i' % (i, ii)] = tf.Variable( tf.truncated_normal([dim_b, dim_i], stddev=1.0 / dim_i), name='hFcc_%i_%i' % (i, ii)) if ii == 0 else model[ 'hFc_%i_%i' % (i, ii - 1)] # consider starting with all zeros model['hFc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['hFcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['hFf_%i_%i' % (i, ii)], model['hFcc_%i_%i' % (i, ii)]), tf.mul( model['hFi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['hFx_%i_%i' % (i, ii)], model['hFWc_%i' % i]), model['hFBc_%i' % i])))), name='hFc_%i_%i' % (i, ii)) for ii in reversed(xrange(dim_t)): model['hBcc_%i_%i' % (i, ii)] = tf.Variable( tf.truncated_normal([dim_b, dim_i], stddev=1.0 / dim_i), name='hBcc_%i_%i' % (i, ii)) if ii == dim_t - 1 else model[ 'hBc_%i_%i' % (i, ii + 1)] # consider starting with all zeros model['hBc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['hBcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['hBf_%i_%i' % (i, ii)], model['hBcc_%i_%i' % (i, ii)]), tf.mul( model['hBi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['hBx_%i_%i' % (i, ii)], model['hBWc_%i' % i]), model['hBBc_%i' % i])))), name='hBc_%i_%i' % (i, ii)) with tf.name_scope('hidden_%i' % i): model['hFWz_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hFWz_%i' % i) model['hFBz_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hFBz_%i' % i) model['hBWz_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hBWz_%i' % i) model['hBBz_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBBz_%i' % i) for ii in xrange(dim_t): model['hFz_%i_%i' % (i, ii)] = tf.add( tf.matmul(model['hFc_%i_%i' % (i, ii)], model['hFWz_%i' % i]), model['hFBz_%i' % i], name='hFz_%i_%i' % (i, ii)) model['hBz_%i_%i' % (i, ii)] = tf.add( tf.matmul(model['hBc_%i_%i' % (i, ii)], model['hBWz_%i' % i]), model['hBBz_%i' % i], name='hBz_%i_%i' % (i, ii)) with tf.name_scope('output_%i' % i): for ii in xrange(dim_t): model['hFh_%i_%i' % (i, ii)] = tf.mul( model['hFo_%i_%i' % (i, ii)], tf.nn.tanh(model['hFz_%i_%i' % (i, ii)]), name='hFh_%i_%i' % (i, ii)) model['hBh_%i_%i' % (i, ii)] = tf.mul( model['hBo_%i_%i' % (i, ii)], tf.nn.tanh(model['hBz_%i_%i' % (i, ii)]), name='hBh_%i_%i' % (i, ii)) model['hFh_%i_%i' % (dim_d - 1, -1)] = tf.zeros([dim_b, dim_i], tf.float32) model['hBh_%i_%i' % (dim_d - 1, dim_t)] = tf.zeros( [dim_b, dim_i], tf.float32) with tf.name_scope('output'): for ii in xrange(dim_t): model['hFh_%i' % ii] = tf.select(tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['hFh_%i_%i' % (dim_d - 1, ii - 1)], model['hFh_%i_%i' % (dim_d - 1, ii)], name='hFh_%i' % ii) model['hBh_%i' % ii] = tf.select(tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['hBh_%i_%i' % (dim_d - 1, ii + 1)], model['hBh_%i_%i' % (dim_d - 1, ii)], name='hBh_%i' % ii) with tf.name_scope('meansquared'): for ii in xrange(dim_t): model['hFms_%i' % ii] = tf.select(tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['hy_%i' % ii], model['hFh_%i' % ii])), [1]), name='hFms_%i' % ii) model['hFms'] = tf.reduce_sum(tf.add_n( [model['hFms_%i' % ii] for ii in xrange(dim_t)]), name='hFms') model['sh+ms'] = tf.scalar_summary(model['hFms'].name, model['hFms']) for ii in xrange(dim_t): model['hBms_%i' % ii] = tf.select(tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['hy_%i' % ii], model['hBh_%i' % ii])), [1]), name='hBms_%i' % ii) model['hBms'] = tf.reduce_sum(tf.add_n( [model['hBms_%i' % ii] for ii in xrange(dim_t)]), name='hBms') model['sh-ms'] = tf.scalar_summary(model['hBms'].name, model['hBms']) with tf.name_scope('classification'): with tf.name_scope('label'): model['clabel'] = tf.placeholder(tf.float32, [dim_b, dim_c], name='clabel') for i in xrange(dim_n): with tf.name_scope('layer_%i' % i): model['cW_%i' % i] = tf.Variable( tf.truncated_normal([4 * dim_i, 4 * dim_i], stddev=0.25 / dim_i), name='cW_%i' % i) if i != dim_n - 1 else tf.Variable(tf.truncated_normal( [4 * dim_i, dim_c], stddev=1.0 / dim_c), name='cW_%i' % i) model['cB_%i' % i] = tf.Variable( tf.truncated_normal([1, 4 * dim_i], stddev=0.25 / dim_i), name='cB_%i' % i) if i != dim_n - 1 else tf.Variable( tf.truncated_normal([1, dim_c], stddev=1.0 / dim_c), name='cB_%i' % i) model['cx_%i' % i] = tf.concat(1, [ model['pFh_%i' % (dim_t - 1)], model['pBh_%i' % (0)], model['hFh_%i' % (dim_t - 1)], model['hBh_%i' % (0)] ], name='cx_%i' % i) if i == 0 else model['cy_%i' % (i - 1)] model['cy_%i' % i] = tf.add(tf.matmul(model['cx_%i' % i], model['cW_%i' % i]), model['cB_%i' % i], name='cy_%i' % i) with tf.name_scope('output'): model['output'] = tf.nn.softmax(model['cy_%i' % (dim_n - 1)], name='output') with tf.name_scope('crossentropy'): model['cce'] = tf.reduce_sum( -tf.mul(model['clabel'], tf.log(model['output'])), name='cce') model['scce'] = tf.scalar_summary(model['cce'].name, model['cce']) model['gsms'] = tf.Variable(0, trainable=False, name='gsms') model['lrms'] = tf.train.exponential_decay(lrate_ms, model['gsms'], dstep_ms, drate_ms, staircase=False, name='lrms') model['tms'] = optim_ms(model['lrms']).minimize( model['pFms'] + model['pBms'] + model['hFms'] + model['hBms'], global_step=model['gsms'], name='tms') model['gsce'] = tf.Variable(0, trainable=False, name='gsce') model['lrce'] = tf.train.exponential_decay(lrate_ce, model['gsce'], dstep_ce, drate_ce, staircase=False, name='lrce') model['tce'] = optim_ce(model['lrce']).minimize(model['cce'], global_step=model['gsce'], name='tce') return model
for i in range(max_steps): x_step = x[:, i, :] xh_join = tf.concat( 1, [x_step, h]) # Combine the features and hidden state into one tensor ig = tf.sigmoid(tf.matmul(xh_join, W_ig) + b_ig) fg = tf.sigmoid(tf.matmul(xh_join, W_fg) + b_fg) og = tf.sigmoid(tf.matmul(xh_join, W_og) + b_og) c_in = tf.tanh(tf.matmul(xh_join, W_c) + b_c) c_out = fg * c + ig * c_in h_out = og * tf.tanh(c) c = tf.select( tf.greater(l, i), c_out, c) # Use old states only if the sequence length has not been exceeded h = tf.select(tf.greater(l, i), h_out, h) ly = tf.matmul(h, W_o) + b_o ly_flat = tf.reshape(ly, [batch_size]) ########################################################################################## # Optimizer/Analyzer ########################################################################################## # Cost function and optimizer # cost = tf.reduce_mean(tf.square(ly_flat - y)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
def __init__(self, params, infer=False): self.is_training = tf.placeholder(tf.bool) self.output_keep_prob = tf.placeholder(tf.float32) num_layers = params['nlayer'] rnn_size = params['n_hidden'] grad_clip = 10 cell_lst = [] for i in range(num_layers): cell = tf.nn.rnn_cell.LSTMCell( rnn_size, initializer=tf.contrib.layers.xavier_initializer( uniform=False), forget_bias=1.0) # if i==0: # cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,input_keep_prob= self.output_keep_prob) # cell=cell_drop cell_drop = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=self.output_keep_prob) cell = cell_drop cell_lst.append(cell) cell = tf.nn.rnn_cell.MultiRNNCell(cell_lst) # cell_drop = tf.nn.rnn_cell.DropoutWrapper(cell,output_keep_prob= self.output_keep_prob) # cell=cell_drop self.cell = cell NOUT = params['n_output'] # end_of_stroke + prob + 2*(mu + sig) + corr self.input_data = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], params['n_input']]) self.input_zero = tf.placeholder( dtype=tf.float32, shape=[None, params['seq_length'], params['n_input']]) self.repeat_data = tf.placeholder(dtype=tf.int32, shape=[None, params['seq_length']]) self.target_data = tf.placeholder( tf.float32, [None, params["seq_length"], params["n_output"]]) self.initial_state = cell.zero_state(batch_size=params['batch_size'], dtype=tf.float32) #Noise applied only training phase and if only std bigger than 0 if (params["noise_std"] > 0.0): ran_noise = tf.random_normal(shape=[ params["batch_size"], params['seq_length'], params['n_input'] ], mean=0, stddev=params['noise_std']) # ran_noise=tf.mul(ran_noise,self.input_zero) tmp_input = tf.nn.relu(self.input_data + ran_noise) self.input_data = tf.select(self.is_training, tmp_input, self.input_data) outputs = [] state = self.initial_state with tf.variable_scope("rnnlm"): for time_step in range(params['seq_length']): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(self.input_data[:, time_step, :], state) outputs.append(cell_output) rnn_output = tf.reshape(tf.transpose(tf.pack(outputs), [1, 0, 2]), [-1, params['n_hidden']]) with tf.variable_scope('rnnlm'): output_w1 = tf.get_variable( "output_w1", [rnn_size, NOUT], initializer=tf.contrib.layers.xavier_initializer()) output_b1 = tf.get_variable("output_b1", [NOUT]) self.final_output = tf.add(tf.matmul(rnn_output, output_w1), output_b1) flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1]) where_flt = tf.not_equal(flt, 0) indices = tf.where(where_flt) tmp = self.final_output - tf.reshape(self.target_data, [-1, params["n_output"]]) tmp = tf.gather(tmp, tf.squeeze(indices, [1])) loss = tf.nn.l2_loss(tmp) self.cost = tf.reduce_mean(loss) self.final_state = state tf.scalar_summary('losses/total_loss', loss) self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() total_parameters = 0 for variable in tvars: # shape is an array of tf.Dimension shape = variable.get_shape() variable_parametes = 1 for dim in shape: variable_parametes *= dim.value total_parameters += variable_parametes self.total_parameters = total_parameters grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) for grad in grads: # if isinstance(grad, ops.grads): # grad_values = grad.values # else: # grad_values = grad grad_values = grad logging_ops.histogram_summary(grad.op.name + ':gradient', grad_values) logging_ops.histogram_summary(grad.op.name + ':gradient_norm', clip_ops.global_norm([grad_values])) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def create(embedder, config, scope='bicoder'): dim_v, dim_i, dim_d, dim_t, dim_b = config.getint('vocab'), config.getint( 'wvec'), config.getint('depth'), config.getint('steps'), config.getint( 'batch') samp, lrate, dstep, drate, optim, rfact, reg = config.getint( 'samples'), config.getfloat('lrate'), config.getint( 'dstep'), config.getfloat('drate'), getattr( tf.train, config.get('optim')), config.getfloat('rfact'), getattr( tf.contrib.layers, config.get('reg')) model = dict() with tf.name_scope(scope): with tf.name_scope('input'): for ii in xrange(dim_t): model['exi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='exi_%i' % ii) model['ex_%i' % ii] = tf.add(tf.nn.embedding_lookup( embedder['We'], model['exi_%i' % ii]), embedder['Be'], name='ex_%i' % ii) with tf.name_scope('label'): for ii in xrange(dim_t): model['eyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='eyi_%i' % ii) model['ey_%i' % ii] = tf.add(tf.nn.embedding_lookup( embedder['We'], model['eyi_%i' % ii]), embedder['Be'], name='ey_%i' % ii) for i in xrange(dim_d): with tf.name_scope('input_%i' % i): for ii in xrange(dim_t): model['ex_%i_%i' % (i, ii)] = model['ex_%i' % ii] if i == 0 else model['eh_%i_%i' % (i - 1, ii)] with tf.name_scope('inputgate_%i' % i): model['eFWi_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eFWi_%i' % i) model['eFBi_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eFBi_%i' % i) model['eBWi_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eBWi_%i' % i) model['eBBi_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eBBi_%i' % i) for ii in xrange(dim_t): model['eFi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eFWi_%i' % i]), model['eFBi_%i' % i]), name='eFi_%i_%i' % (i, ii)) model['eBi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eBWi_%i' % i]), model['eBBi_%i' % i]), name='eBi_%i_%i' % (i, ii)) with tf.name_scope('forgetgate_%i' % i): model['eFWf_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eFWf_%i' % i) model['eFBf_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eFBf_%i' % i) model['eBWf_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eBWf_%i' % i) model['eBBf_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eBBf_%i' % i) for ii in xrange(dim_t): model['eFf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eFWf_%i' % i]), model['eFBf_%i' % i]), name='eFf_%i_%i' % (i, ii)) model['eBf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eBWf_%i' % i]), model['eBBf_%i' % i]), name='eBf_%i_%i' % (i, ii)) with tf.name_scope('outputgate_%i' % i): model['eWo_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eWo_%i' % i) model['eBo_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eBo_%i' % i) for ii in xrange(dim_t): model['eo_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eWo_%i' % i]), model['eBo_%i' % i]), name='eo_%i_%i' % (i, ii)) with tf.name_scope('cellstate_%i' % i): model['eFWc_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eFWc_' + str(i)) model['eFBc_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eFBc_' + str(i)) model['eBWc_%i' % i] = tf.Variable( tf.random_uniform([dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eBWc_' + str(i)) model['eBBc_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eBBc_' + str(i)) for ii in xrange(dim_t): model['eFcc_%i_%i' % (i, ii)] = tf.Variable( tf.random_uniform([dim_b, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eFcc_%i_%i' % (i, ii)) if ii == 0 else model[ 'eFc_%i_%i' % (i, ii - 1)] # consider starting with all zeros model['eFc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['exi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['eFcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['eFf_%i_%i' % (i, ii)], model['eFcc_%i_%i' % (i, ii)]), tf.mul( model['eFi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eFWc_%i' % i]), model['eFBc_%i' % i])))), name='eFc_%i_%i' % (i, ii)) for ii in reversed(xrange(dim_t)): model['eBcc_%i_%i' % (i, ii)] = tf.Variable( tf.random_uniform([dim_b, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eBcc_%i_%i' % (i, ii)) if ii == dim_t - 1 else model[ 'eBc_%i_%i' % (i, ii + 1)] # consider starting with all zeros model['eBc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['exi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['eBcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['eBf_%i_%i' % (i, ii)], model['eBcc_%i_%i' % (i, ii)]), tf.mul( model['eBi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['ex_%i_%i' % (i, ii)], model['eBWc_%i' % i]), model['eBBc_%i' % i])))), name='eBc_%i_%i' % (i, ii)) for ii in xrange(dim_t): model['ec_%i_%i' % (i, ii)] = tf.concat( 1, [ model['eFc_%i_%i' % (i, ii)], model['eBc_%i_%i' % (i, ii)] ], 'ec_%i_%i' % (i, ii)) with tf.name_scope('hidden_%i' % i): model['eWz_%i' % i] = tf.Variable( tf.random_uniform([2 * dim_i, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), collections=[ tf.GraphKeys.VARIABLES, tf.GraphKeys.REGULARIZATION_LOSSES ], name='eFWz_%i' % i) model['eBz_%i' % i] = tf.Variable(tf.random_uniform( [1, dim_i], -np.sqrt(6. / dim_i), np.sqrt(6. / dim_i)), name='eFBz_%i' % i) for ii in xrange(dim_t): model['ez_%i_%i' % (i, ii)] = tf.add( tf.matmul(model['ec_%i_%i' % (i, ii)], model['eWz_%i' % i]), model['eBz_%i' % i], name='ez_%i_%i' % (i, ii)) with tf.name_scope('output_%i' % i): for ii in xrange(dim_t): model['eh_%i_%i' % (i, ii)] = tf.mul( model['eo_%i_%i' % (i, ii)], tf.nn.tanh(model['ez_%i_%i' % (i, ii)]), name='eh_%i_%i' % (i, ii)) model['eh_%i_%i' % (dim_d - 1, -1)] = tf.zeros([dim_b, dim_i], tf.float32) with tf.name_scope('output'): for ii in xrange(dim_t): model['eh_%i' % ii] = tf.select(tf.equal(model['exi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['eh_%i_%i' % (dim_d - 1, ii - 1)], model['eh_%i_%i' % (dim_d - 1, ii)], name='eh_%i' % ii) with tf.name_scope('meansquared'): for ii in xrange(dim_t): model['emse_%i' % ii] = tf.select(tf.equal(model['exi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['ey_%i' % ii], model['eh_%i' % ii])), [1]), name='emse_%i' % ii) model['emse'] = tf.reduce_sum(tf.add_n( [model['emse_%i' % ii] for ii in xrange(dim_t)]), name='emse') model['semse'] = tf.scalar_summary(model['emse'].name, model['emse']) with tf.name_scope('negativeloglikelihood'): for ii in xrange(dim_t): model['enll_%i' % ii] = tf.select(tf.equal(model['exi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.nn.sampled_softmax_loss( embedder['We'], tf.zeros([dim_v], tf.float32), model['eh_%i' % ii], tf.reshape(model['eyi_%i' % ii], [dim_b, 1]), samp, dim_v), name='enll_%i' % ii) model['enll'] = tf.reduce_sum(tf.add_n( [model['enll_%i' % ii] for ii in xrange(dim_t)]), name='enll') model['senll'] = tf.scalar_summary(model['enll'].name, model['enll']) model['gse'] = tf.Variable(0, trainable=False, name='gse') model['lre'] = tf.train.exponential_decay(lrate, model['gse'], dstep, drate, staircase=False, name='lre') model['reg'] = tf.contrib.layers.apply_regularization( reg(rfact), tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) model['temse'] = optim(model['lre']).minimize(model['emse'] + model['reg'], global_step=model['gse'], name='temse') model['tenll'] = optim(model['lre']).minimize(model['enll'] + model['reg'], global_step=model['gse'], name='tenll') return model
def salt_and_pepper(X, rate=0.3): a = binomial_draw(shape=tf.shape(X), p=1 - rate) b = binomial_draw(shape=tf.shape(X), p=0.5) z = tf.zeros(tf.shape(X), dtype='float32') c = tf.select(tf.equal(a, z), b, z) return tf.add(tf.mul(X, a), c)
def binomial_draw(shape=[1], p=0.5, dtype='float32'): return tf.select( tf.less( tf.random_uniform(shape=shape, minval=0, maxval=1, dtype='float32'), tf.fill(shape, p)), tf.ones(shape, dtype=dtype), tf.zeros(shape, dtype=dtype))
def buildModel(self, inputShape): #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2]) #Get convolution variables as placeholders self.inputImage = node_variable(self.imageShape, "inputImage") self.V1_W = [] self.normalize_W = [] self.V1_A = [] self.recon = [] self.error = [] self.reconError = [] self.sparseError = [] self.scaledInput = [] self.t_V1_A = [] self.t_recon = [] self.t_error = [] self.t_reconError = [] self.t_sparseError = [] self.underThresh = [] self.errorStd = [] self.l1_mean = [] self.t_errorStd = [] self.t_l1_mean = [] self.log_V1_A = [] self.WShape = [] self.VShape = [] self.inShape = [] for l in range(self.numLayers): if l == 0: numInF = inputShape[2] else: numInF = self.numV[l - 1] V_Y = float(inputShape[0]) V_X = float(inputShape[1]) for i in range(l + 1): V_Y_Prev = V_Y V_X_Prev = V_X assert (int(V_Y) % self.VStrideY[i] == 0) assert (int(V_X) % self.VStrideX[i] == 0) V_Y = V_Y / self.VStrideY[i] V_X = V_X / self.VStrideX[i] V_Y = int(V_Y) V_Y_Prev = int(V_Y_Prev) V_X = int(V_X) V_X_Prev = int(V_X_Prev) self.WShape.append((self.patchSizeY[l], self.patchSizeX[l], numInF, self.numV[l])) self.VShape.append((self.batchSize, V_Y, V_X, self.numV[l])) self.inShape.append( (self.batchSize, V_Y_Prev, V_X_Prev, numInF)) with tf.name_scope("Dictionary"): self.V1_W.append( sparse_weight_variable(self.WShape[l], "V1_W" + str(l))) with tf.name_scope("weightNorm"): self.normVals = tf.sqrt( tf.reduce_sum(tf.square(self.V1_W[l]), reduction_indices=[0, 1, 2], keep_dims=True)) self.normalize_W.append(self.V1_W[l].assign( self.V1_W[l] / (self.normVals + 1e-8))) with tf.name_scope("Encoding"): #Soft threshold self.V1_A.append( weight_variable(self.VShape[l], "V1_A" + str(l), 1e-3)) zeroConst = tf.zeros(self.VShape[l]) boolUnderThresh = tf.greater(self.zeroThresh[l], tf.abs(self.V1_A[l])) self.t_V1_A.append( tf.select(boolUnderThresh, zeroConst, self.V1_A[l])) with tf.name_scope("Recon"): assert (self.VStrideY[l] >= 1) assert (self.VStrideX[l] >= 1) #We build index tensor in numpy to gather self.recon.append( conv2d_oneToMany(self.V1_A[l], self.V1_W[l], self.inShape[l], "recon", self.VStrideY[l], self.VStrideX[l])) self.t_recon.append( conv2d_oneToMany(self.t_V1_A[l], self.V1_W[l], self.inShape[l], "t_recon", self.VStrideY[l], self.VStrideX[l])) with tf.name_scope("Error"): #Scale inputImage if (l == 0): #self.scaledInput.append(self.inputImage/np.sqrt(self.patchSizeX[0]*self.patchSizeY[0]*inputShape[2])) self.scaledInput.append(self.inputImage) else: #self.scaledInput.append(self.V1_A[l-1]/np.sqrt(self.patchSizeX[l]*self.patchSizeY[l]*self.numV[l-1])) self.scaledInput.append(self.V1_A[l - 1]) self.error.append(self.scaledInput[l] - self.recon[l]) self.t_error.append(self.scaledInput[l] - self.t_recon[l]) with tf.name_scope("Loss"): #Sum across axis except for match self.reconError.append( tf.reduce_mean( tf.reduce_sum(tf.square(self.error[l]), reduction_indices=[1, 2, 3]))) self.sparseError.append( tf.reduce_mean( tf.reduce_sum(tf.abs(self.V1_A[l]), reduction_indices=[1, 2, 3]))) self.t_reconError.append( tf.reduce_mean( tf.reduce_sum(tf.square(self.t_error[l]), reduction_indices=[1, 2, 3]))) self.t_sparseError.append( tf.reduce_mean( tf.reduce_sum(tf.abs(self.t_V1_A[l]), reduction_indices=[1, 2, 3]))) with tf.name_scope("stats"): self.underThresh.append( tf.reduce_mean( tf.cast( tf.abs(self.V1_A[l]) > self.zeroThresh[l], tf.float32))) eStd = tf.sqrt( tf.reduce_mean( tf.square(self.error[l] - tf.reduce_mean(self.error[l])))) t_eStd = tf.sqrt( tf.reduce_mean( tf.square(self.t_error[l] - tf.reduce_mean(self.t_error[l])))) inStd = tf.sqrt( tf.reduce_mean( tf.square(self.scaledInput[l] - tf.reduce_mean(self.scaledInput[l])))) self.errorStd.append(eStd / inStd) self.t_errorStd.append(t_eStd / inStd) self.l1_mean.append(tf.reduce_mean(tf.abs(self.V1_A[l]))) self.t_l1_mean.append( tf.reduce_mean(tf.abs(self.t_V1_A[l]))) #For log of activities self.log_V1_A.append(tf.log(tf.abs(self.V1_A[l]) + 1e-15)) with tf.name_scope("Loss"): #Define loss self.loss = self.reconError[0] / 2 + self.thresh[ 0] * self.sparseError[0] self.t_loss = self.t_reconError[0] / 2 + self.thresh[ 0] * self.t_sparseError[0] for l in range(1, self.numLayers): self.loss += self.reconError[l] / 2 + self.thresh[ l] * self.sparseError[l] self.t_loss += self.t_reconError[l] / 2 + self.thresh[ l] * self.t_sparseError[l] with tf.name_scope("Opt"): #Define optimizer #self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss, self.optimizerA = tf.train.AdamOptimizer( self.learningRateA ).minimize( self.loss, #self.optimizerA = tf.train.AdadeltaOptimizer(self.learningRateA).minimize(self.loss, var_list=self.V1_A) #self.optimizerW = tf.train.AdamOptimizer(self.learningRateW).minimize(self.loss, #Minimizing weights with respect to the cutoff weights #self.optimizerW = tf.train.AdamOptimizer(self.learningRateW).minimize(self.t_loss, self.optimizerW = tf.train.AdadeltaOptimizer( self.learningRateW, epsilon=1e-6).minimize(self.loss, var_list=self.V1_W) with tf.name_scope("ReconVis"): self.visRecon = [] self.t_visRecon = [] for l in range(self.numLayers): outRecon = self.recon[l] t_outRecon = self.t_recon[l] for ll in range(l)[::-1]: #We prob recons down layers outRecon = conv2d_oneToMany( outRecon, self.V1_W[ll], self.inShape[ll], "recon_" + str(l) + "_" + str(ll), self.VStrideY[ll], self.VStrideX[ll]) t_outRecon = conv2d_oneToMany( t_outRecon, self.V1_W[ll], self.inShape[ll], "recon_" + str(l) + "_" + str(ll), self.VStrideY[ll], self.VStrideX[ll]) self.visRecon.append(outRecon) self.t_visRecon.append(t_outRecon) with tf.name_scope("WeightVis"): self.visWeight = [] for l in range(self.numLayers): outWeight = tf.transpose(self.V1_W[l], [3, 0, 1, 2]) numN = self.WShape[l][3] numY = self.WShape[l][0] numX = self.WShape[l][1] numF = self.WShape[l][2] for ll in range(l)[::-1]: numY = self.WShape[ll][0] + (numY - 1) * self.VStrideY[ll] numX = self.WShape[ll][1] + (numX - 1) * self.VStrideX[ll] numF = self.WShape[ll][2] inShape = (numN, numY, numX, numF) outWeight = conv2d_oneToMany(outWeight, self.V1_W[ll], inShape, "weight_" + str(l) + "_" + str(ll), self.VStrideY[ll], self.VStrideX[ll], padding="VALID") self.visWeight.append(outWeight) #Summaries self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum") self.s_t_loss = tf.scalar_summary('t loss' + str(l), self.t_loss, name="t_lossSum") self.h_input = tf.histogram_summary('inputImage', self.inputImage, name="input") for l in range(self.numLayers): self.s_recon = tf.scalar_summary('recon error' + str(l), self.reconError[l], name="reconError") self.s_errorStd = tf.scalar_summary('errorStd' + str(l), self.errorStd[l], name="errorStd") self.s_l1 = tf.scalar_summary('l1 sparsity' + str(l), self.sparseError[l], name="sparseError") self.s_l1_mean = tf.scalar_summary('l1 mean' + str(l), self.l1_mean[l], name="l1Mean") self.s_s_nnz = tf.scalar_summary('nnz' + str(l), self.underThresh[l], name="nnz") self.s_t_recon = tf.scalar_summary('t recon error' + str(l), self.t_reconError[l], name="t_reconError") self.s_t_errorStd = tf.scalar_summary('t errorStd' + str(l), self.t_errorStd[l], name="t_errorStd") self.s_t_l1 = tf.scalar_summary('t l1 sparsity' + str(l), self.t_sparseError[l], name="t_sparseError") self.s_t_l1_mean = tf.scalar_summary('t l1 mean' + str(l), self.t_l1_mean[l], name="t_l1Mean") self.h_input = tf.histogram_summary('scaledInput' + str(l), self.scaledInput[l], name="input") self.h_recon = tf.histogram_summary('recon' + str(l), self.recon[l], name="recon") self.h_v1_w = tf.histogram_summary('V1_W' + str(l), self.V1_W[l], name="V1_W") self.h_v1_a = tf.histogram_summary('V1_A' + str(l), self.V1_A[l], name="V1_A") self.h_log_v1_a = tf.histogram_summary('Log_V1_A' + str(l), self.log_V1_A[l], name="Log_V1_A")
def add_prediction_op(self): # get relevent embedding data x = self.add_embedding() currBatch = tf.shape(x)[0] xDrop = tf.nn.dropout(x, self.dropoutPH) xRev = tf.reverse(xDrop, dims = [False, True, False]) # embeds = tf.concat(concat_dim=1, values = [xDrop, xRev]) # Extract sizes hidden_size = self.config.hidden_size n_class = self.config.n_class batch_size = self.config.batch_size max_sentence = self.config.max_sentence embedding_size = self.config.embedding_size # Define internal RNN Cells genCell1Layer1 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size, activation = tf.tanh) genCell2Layer1 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size, activation = tf.tanh) genCell1Layer2 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size, activation = tf.tanh) genCell2Layer2 = tf.nn.rnn_cell.LSTMCell(num_units = hidden_size, activation = tf.tanh) # Apply dropout to each cell genC1L1Drop = tf.nn.rnn_cell.DropoutWrapper(genCell1Layer1, output_keep_prob=self.dropoutPH) genC2L1Drop = tf.nn.rnn_cell.DropoutWrapper(genCell2Layer1, output_keep_prob=self.dropoutPH) genC1L2Drop = tf.nn.rnn_cell.DropoutWrapper(genCell1Layer2, output_keep_prob=self.dropoutPH) genC2L2Drop = tf.nn.rnn_cell.DropoutWrapper(genCell2Layer2, output_keep_prob=self.dropoutPH) # Stack each for multi Cell multiFwd = tf.nn.rnn_cell.MultiRNNCell([genC1L1Drop, genC1L2Drop]) multiBwd = tf.nn.rnn_cell.MultiRNNCell([genC2L1Drop, genC2L2Drop]) # Set inital states # fwdInitState = genC1L1Drop.zero_state(batch_size = currBatch, # dtype = tf.float32) # bwdInitState = genC2L1Drop.zero_state(batch_size = currBatch, # dtype = tf.float32) fwdInitState = multiFwd.zero_state(batch_size = currBatch, dtype = tf.float32) bwdInitState = multiBwd.zero_state(batch_size = currBatch, dtype = tf.float32) _, states = tf.nn.bidirectional_dynamic_rnn(cell_fw = multiFwd, cell_bw = multiBwd, inputs = x, initial_state_fw = fwdInitState, initial_state_bw = bwdInitState, dtype = tf.float32, sequence_length = self.seqPH ) # Return is 2 x 2 x 2 x batchsize x hiddensize tensor # repretedly unpakc and concat to batchsize x hiddensize * 8 tensor unpackedStates = tf.unpack(states, axis = 0) concatStates = tf.concat(concat_dim=3, values = unpackedStates) unpackedStates = tf.unpack(concatStates, axis = 0) concatStates = tf.concat(concat_dim=2, values=unpackedStates) unpackedStates = tf.unpack(concatStates, axis = 0) finalStates = tf.concat(concat_dim=1, values=unpackedStates) # Define our prediciton layer variables U = tf.get_variable(name='W_gen', shape=((8 * hidden_size), self.config.max_sentence), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) c = tf.get_variable(name='b_gen', shape=(self.config.max_sentence,), dtype=tf.float32, initializer=tf.constant_initializer(0.0)) # zLayer probabilities - each prob is prob of keeping word in review zProbs = tf.sigmoid(tf.matmul(finalStates, U) + c) zProbs = tf.select(self.maskPH, zProbs, tf.zeros(shape = tf.shape(zProbs), dtype = tf.float32)) # zProbs = tf.stop_gradient(zProbs) # sample zprobs to pick which review words to keep. mask unselected words uniform = tf.random_uniform(shape = tf.shape(zProbs), minval=0, maxval=1) < zProbs # uniform = tf.stop_gradient( # tf.random_uniform(shape=tf.shape(zProbs), minval=0, # maxval=1) < zProbs, 'uniform') self.zPreds = tf.select(uniform, tf.ones(shape = tf.shape(uniform), dtype = tf.float32), tf.zeros(shape = tf.shape(uniform), dtype = tf.float32)) masks = tf.zeros(shape = tf.shape(zProbs), dtype = tf.int32) + self.maskId maskedInputs = tf.select(uniform, self.inputPH, masks) # Return masked embeddings to pass to encoder embedding_shape = (-1, self.config.max_sentence, self.config.embedding_size) maskedEmbeddings = tf.nn.embedding_lookup(self.pretrained_embeddings, maskedInputs) maskedEmbeddings = tf.cast(maskedEmbeddings, tf.float32) maskedEmbeddings = tf.reshape(maskedEmbeddings, shape=embedding_shape) # Use encoder to make predictions # encoderPreds = self.encoder.add_prediction_op2(maskedEmbeddings) # Define our prediciton layer variables W = tf.get_variable(name='W', shape=((4 * hidden_size), n_class), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable(name='b', shape=(n_class,), dtype=tf.float32, initializer=tf.constant_initializer(0.0)) cell1 = tf.nn.rnn_cell.LSTMCell(embedding_size, activation=tf.tanh) cell2 = tf.nn.rnn_cell.LSTMCell(hidden_size, activation=tf.tanh) cell1_drop = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=self.dropoutPH) cell2_drop = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=self.dropoutPH) cell_multi = tf.nn.rnn_cell.MultiRNNCell([cell1_drop, cell2_drop]) _, result = tf.nn.dynamic_rnn(cell_multi, maskedEmbeddings, dtype=tf.float32, sequence_length=self.seqPH) # Return state is a 2 x 2 x batchsize x hiddensize tensor # repetedly unpack and concat to batchsize x 4 * hiddensize tensor unpackedStates = tf.unpack(result, axis = 0) packedStates = tf.concat(concat_dim=2, values = unpackedStates) unpackedStates = tf.unpack(packedStates, axis=0) finalStates = tf.concat(concat_dim=1, values = unpackedStates) y_t = tf.tanh(tf.matmul(finalStates, W) + b) return y_t
def huber_loss(x, delta=1.0): # https://en.wikipedia.org/wiki/Huber_loss return tf.select( tf.abs(x) < delta, tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta))
def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None): '''Iterates over the time dimension of a tensor. Parameters ---------- inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. mask: binary tensor with shape (samples, time, 1), with a zero for every element that is masked. Returns ------- A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' ndim = len(inputs.get_shape()) assert ndim >= 3, "Input should be at least 3D." axes = [1, 0] + list(range(2, ndim)) inputs = tf.transpose(inputs, (axes)) input_list = tf.unpack(inputs) states = initial_states successive_states = [] successive_outputs = [] if go_backwards: input_list.reverse() if mask is not None: # Transpose not supported by bool tensor types, hence round-trip to uint8. mask = tf.cast(mask, tf.uint8) if len(mask.get_shape()) == ndim - 1: mask = expand_dims(mask) mask = tf.cast(tf.transpose(mask, axes), tf.bool) mask_list = tf.unpack(mask) for input, mask_t in zip(input_list, mask_list): output, new_states = step_function(input, states) # tf.select needs its condition tensor to be the same shape as its two # result tensors, but in our case the condition (mask) tensor is # (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to # broadcast the mask to match the shape of A and B. That's what the # tile call does, is just repeat the mask along its second dimension # ndimensions times. tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]])) if len(successive_outputs) == 0: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = tf.select(tiled_mask_t, output, prev_output) return_states = [] for state, new_state in zip(states, new_states): # (see earlier comment for tile explanation) tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]])) return_states.append(tf.select(tiled_mask_t, new_state, state)) states = return_states successive_outputs.append(output) successive_states.append(states) else: for input in input_list: output, states = step_function(input, states) successive_outputs.append(output) successive_states.append(states) last_output = successive_outputs[-1] outputs = tf.pack(successive_outputs) new_states = successive_states[-1] axes = [1, 0] + list(range(2, len(outputs.get_shape()))) outputs = tf.transpose(outputs, axes) return last_output, outputs, new_states
def clipped_error(x): # Huber loss try: return tf.select(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5) except: return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)
def discretized_mix_logistic_loss(x, l, sum_all=True): """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """ xs = int_shape( x) # true image (i.e. labels) to regress to, e.g. (B,32,32,3) ls = int_shape(l) # predicted distribution, e.g. (B,32,32,100) nr_mix = int( ls[-1] / 10) # here and below: unpacking the params of the mixture of logistics logit_probs = l[:, :, :, :nr_mix] l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3]) means = l[:, :, :, :, :nr_mix] log_scales = tf.maximum(l[:, :, :, :, nr_mix:2 * nr_mix], -7.) coeffs = tf.nn.tanh(l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) x = tf.reshape(x, xs + [1]) + tf.zeros( xs + [nr_mix] ) # here and below: getting the means and adjusting them based on preceding sub-pixels m2 = tf.reshape( means[:, :, :, 1, :] + coeffs[:, :, :, 0, :] * x[:, :, :, 0, :], [xs[0], xs[1], xs[2], 1, nr_mix]) m3 = tf.reshape( means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] + coeffs[:, :, :, 2, :] * x[:, :, :, 1, :], [xs[0], xs[1], xs[2], 1, nr_mix]) means = tf.concat(3, [ tf.reshape(means[:, :, :, 0, :], [xs[0], xs[1], xs[2], 1, nr_mix]), m2, m3 ]) centered_x = x - means inv_stdv = tf.exp(-log_scales) plus_in = inv_stdv * (centered_x + 1. / 255.) cdf_plus = tf.nn.sigmoid(plus_in) min_in = inv_stdv * (centered_x - 1. / 255.) cdf_min = tf.nn.sigmoid(min_in) log_cdf_plus = plus_in - tf.nn.softplus( plus_in) # log probability for edge case of 0 (before scaling) log_one_minus_cdf_min = -tf.nn.softplus( min_in) # log probability for edge case of 255 (before scaling) cdf_delta = cdf_plus - cdf_min # probability for all other cases mid_in = inv_stdv * centered_x log_pdf_mid = mid_in - log_scales - 2. * tf.nn.softplus( mid_in ) # log probability in the center of the bin, to be used in extreme cases (not actually used in our code) # now select the right output: left edge case, right edge case, normal case, extremely low prob case (doesn't actually happen for us) # this is what we are really doing, but using the robust version below for extreme cases in other applications and to avoid NaN issue with tf.select() # log_probs = tf.select(x < -0.999, log_cdf_plus, tf.select(x > 0.999, log_one_minus_cdf_min, tf.log(cdf_delta))) # robust version, that still works if probabilities are below 1e-5 (which never happens in our code) # tensorflow backpropagates through tf.select() by multiplying with zero instead of selecting: this requires use to use some ugly tricks to avoid potential NaNs # the 1e-12 in tf.maximum(cdf_delta, 1e-12) is never actually used as output, it's purely there to get around the tf.select() gradient issue # if the probability on a sub-pixel is below 1e-5, we use an approximation based on the assumption that the log-density is constant in the bin of the observed sub-pixel value log_probs = tf.select( x < -0.999, log_cdf_plus, tf.select( x > 0.999, log_one_minus_cdf_min, tf.select(cdf_delta > 1e-5, tf.log(tf.maximum(cdf_delta, 1e-12)), log_pdf_mid - np.log(127.5)))) log_probs = tf.reduce_sum(log_probs, 3) + log_prob_from_logits(logit_probs) if sum_all: return -tf.reduce_sum(log_sum_exp(log_probs)) else: return -tf.reduce_sum(log_sum_exp(log_probs), [1, 2])
batch_size = 8 #None is used to match the batch x = tf.placeholder(tf.float32, shape=(None, 2), name='x-input') #regression problem usually have one output point y_ = tf.placeholder(tf.float32, shape=(None, 1), name='y-input') #define the parameters of the neural network w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1)) y = tf.matmul(x, w1) #define the chengben loss_less = 10 loss_more = 1 loss = tf.reduce_sum( tf.select(tf.greater(y, y_), (y - y_) * loss_more, (y_ - y) * loss_less)) #cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0))) train_step = tf.train.AdamOptimizer(0.001).minimize(loss) #random generate a simulation dataset rdm = RandomState(1) dataset_size = 128 X = rdm.rand(dataset_size, 2) Y = [[x1 + x2 + rdm.rand() / 10.0 - 0.05] for (x1, x2) in X] #create a session to run the program with tf.Session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) print sess.run(w1)
def gen_whitenoise_samps(shp): return tf.select( tf.random_uniform([shp, 784], dtype=tf.float32) > 0.5, tf.ones([shp, 784]), tf.zeros([shp, 784]))
def __init__(self, sess, abstraction_scope, visual_scope, num_actions, num_abstract_actions, num_abstract_states, gamma=0.99, learning_rate=0.00025, replay_start_size=5000, epsilon_start=1.0, epsilon_end=0.1, epsilon_steps=1000000, update_freq=4, target_copy_freq=10000, replay_memory_size=1000000, frame_history=1, batch_size=32, error_clip=1, abstraction_function=None, max_episode_steps=-1, base_network_file=None): self.sess = sess self.num_abstract_actions = num_abstract_actions self.num_abstract_states = num_abstract_states self.num_actions = num_actions self.batch_size = batch_size self.gamma = gamma self.frame_history = frame_history self.replay_buffer = ReplayMemory((84, 84), 'uint8', replay_memory_size, frame_history) self.abstraction_scope = abstraction_scope self.abstraction_function = abstraction_function self.inp_frames = tf.placeholder(tf.uint8, [None, 84, 84, self.frame_history]) self.inp_sp_frames = tf.placeholder(tf.uint8, [None, 84, 84, self.frame_history]) self.inp_terminated = tf.placeholder(tf.bool, [None]) self.inp_reward = tf.placeholder(tf.float32, [None]) self.inp_mask = tf.placeholder(tf.uint8, [None, frame_history]) self.inp_sp_mask = tf.placeholder(tf.uint8, [None, frame_history]) self.inp_actions = tf.placeholder(tf.float32, [None, num_actions]) # onehot vector #self.inp_sigma = tf.placeholder(tf.float32, [None, self.num_abstract_states]) self.reward_matrix = -np.ones((num_abstract_states, num_abstract_states, num_abstract_actions), dtype=np.float32) # make self transitions 0 for i in range(num_abstract_states): self.reward_matrix[i, i, :] = 0 # make goal transitions have reward 1 for a in range(num_abstract_actions): i, j = flat_actions_to_state_pairs(a, num_abstract_states) self.reward_matrix[i, j, a] = 1 self.actions_for_sigma = np.zeros((num_abstract_states, num_abstract_actions), dtype=np.float32) for a in range(num_abstract_actions): i, j = flat_actions_to_state_pairs(a, num_abstract_states) self.actions_for_sigma[i, a] = 1 # mask stuff here mask = tf.reshape(self.inp_mask, [-1, 1, 1, 1]) masked_input = self.inp_frames * mask l0_vis_scope = 'l0_vis' with tf.variable_scope(l0_vis_scope): self.visual_output_base = hook_visual(masked_input, self.frame_history) self.visual_output = tf.stop_gradient(self.visual_output_base) with tf.variable_scope('online_base'): self.q_online_base = hook_base(self.visual_output_base, self.num_actions) with tf.variable_scope('online_1'): self.q_online_1 = hook_l0(self.visual_output, 1, self.num_actions) with tf.variable_scope('online_2'): self.q_online_2 = hook_l0(self.visual_output, 1, self.num_actions) self.q_online = tf.concat(1, [self.q_online_1, self.q_online_2]) mask_sp = tf.reshape(self.inp_sp_mask, [-1, 1, 1, 1]) masked_input_sp = self.inp_sp_frames * mask_sp l0_target_vis_scope = 'l0_target_vis' with tf.variable_scope(l0_target_vis_scope): self.visual_output_sp = hook_visual(masked_input_sp, self.frame_history) with tf.variable_scope('target_base'): self.q_target_base = hook_base(self.visual_output_sp, self.num_actions) with tf.variable_scope('target_1'): self.q_target_1 = hook_l0(self.visual_output_sp, 1, self.num_actions) with tf.variable_scope('target_2'): self.q_target_2 = hook_l0(self.visual_output_sp, 1, self.num_actions) self.q_target = tf.concat(1, [self.q_target_1, self.q_target_2]) # with tf.variable_scope(visual_scope, reuse=True): # # mask stuff here # mask = tf.reshape(self.inp_mask, [-1, 1, 1, 1]) # masked_input = self.inp_frames * mask # self.visual_output = hook_visual(masked_input, self.frame_history) # # mask_sp = tf.reshape(self.inp_sp_mask, [-1, 1, 1, 1]) # masked_input_sp = self.inp_sp_frames * mask_sp # self.visual_output_sp = hook_visual(masked_input_sp, self.frame_history) # # with tf.variable_scope('online'): # self.q_online = hook_l0(self.visual_output, self.num_abstract_actions, self.num_actions) # with tf.variable_scope('target'): # self.q_target = hook_l0(self.visual_output_sp, self.num_abstract_actions, self.num_actions) # TODO set up double dqn for later experiments. # Q matrix is (num_abstract_actions, num_actions), results in vector with max-q for each abstract action. self.maxQ = tf.reduce_max(self.q_target, reduction_indices=2) with tf.variable_scope(visual_scope, reuse=True): self.l1_visual_output = hook_visual(masked_input, self.frame_history) self.l1_visual_output_sp = hook_visual(masked_input_sp, self.frame_history) with tf.variable_scope(self.abstraction_scope, reuse=True): self.sigma = tf.stop_gradient(hook_abstraction(self.l1_visual_output, num_abstract_states, batch_size)[0]) self.sigma_p = tf.stop_gradient(hook_abstraction(self.l1_visual_output_sp, num_abstract_states, batch_size)[0]) self.sigma_query, self.sigma_query_probs = hook_abstraction(self.l1_visual_output, self.num_abstract_states, 1) self.r = tf.reduce_sum( tf.reshape(self.sigma_p, [-1, 1, num_abstract_states, 1]) * \ tf.reshape(self.sigma, [-1, num_abstract_states, 1, 1]) * \ tf.reshape(self.reward_matrix, [1, num_abstract_states, num_abstract_states, num_abstract_actions]), reduction_indices=[1, 2]) # Give a reward of -1 if reached a terminal state self.r = (self.r * tf.reshape(tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32), [-1, 1])) +\ tf.reshape(tf.cast(self.inp_terminated, dtype=tf.float32) * -1, [-1, 1]) self.use_backup = tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32) * tf.reduce_sum(self.sigma_p * self.sigma, reduction_indices=1) self.y = tf.stop_gradient(self.r + tf.reshape(self.use_backup, [-1, 1]) * gamma * self.maxQ) self.delta = tf.reduce_sum(tf.reshape(self.inp_actions, [-1, 1, num_actions]) * self.q_online, reduction_indices=2) - self.y valid_actions_mask = valid_actions_for_sigma(self.actions_for_sigma, self.sigma, self.num_abstract_actions) self.masked_delta = self.delta * valid_actions_mask self.error = tf.select(tf.abs(self.masked_delta) < error_clip, 0.5 * tf.square(self.masked_delta), error_clip * tf.abs(self.masked_delta)) # base dqn self.maxQ_base = tf.reduce_max(self.q_target_base, reduction_indices=1) self.r_base = tf.sign(self.inp_reward) use_backup_base = tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32) self.y_base = tf.stop_gradient(self.r_base + use_backup_base * gamma * self.maxQ_base) self.delta_base = tf.reduce_sum(self.inp_actions * self.q_online_base, reduction_indices=1) - self.y_base self.error_base = tf.select(tf.abs(self.delta_base) < error_clip, 0.5 * tf.square(self.delta_base), error_clip * tf.abs(self.delta_base)) self.loss = tf.reduce_sum(self.error) + tf.reduce_sum(self.error_base) self.g = tf.gradients(self.loss, self.q_online) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.95, centered=True, epsilon=0.01) self.train_op = optimizer.minimize(self.loss, var_list=th.get_vars('online_1', 'online_2', 'online_base', l0_vis_scope)) self.copy_op = [th.make_copy_op('online_1', 'target_1'), th.make_copy_op('online_2', 'target_2'), th.make_copy_op(l0_vis_scope, l0_target_vis_scope), th.make_copy_op('online_base', 'target_base')] self.replay_buffer = L1ReplayMemory((84, 84), 'uint8', replay_memory_size, frame_history) self.frame_history = frame_history self.replay_start_size = replay_start_size self.epsilon = epsilon_start self.epsilon_min = epsilon_end self.epsilon_steps = epsilon_steps self.epsilon_delta = (self.epsilon - self.epsilon_min) / self.epsilon_steps self.update_freq = update_freq self.target_copy_freq = target_copy_freq self.action_ticker = 1 self.max_episode_steps = max_episode_steps self.num_actions = num_actions self.batch_size = batch_size self.base_network_saver = tf.train.Saver(var_list=th.get_vars('online_base', l0_vis_scope))
def bernoulli(p, y): return tf.log(tf.select(tf.equal(y, 1), p, 1 - p))
sequence_length_lst = [1, 1, 1, 1, 1] sequence_length = tf.constant(sequence_length_lst) done_mask = tf.cast(tf.zeros(batch_size), tf.bool) for time in range(0, 5): print(time) current_date = char_prob[:, time, :] max_vals = tf.argmax(current_date, 1) mask = tf.equal(max_vals, tf.constant(0, tf.int64)) current_mask = tf.logical_and(mask, tf.logical_not(done_mask)) done_mask = tf.logical_or(mask, done_mask) time_vec = tf.ones(batch_size, tf.int32) * (time + 2) sequence_length = tf.select(done_mask, sequence_length, time_vec, name=None) not_done_no = tf.reduce_sum(tf.cast(tf.logical_not(done_mask), tf.int32)) all_eos = tf.equal(not_done_no, tf.constant(0)) stop_loop = tf.logical_or(all_eos, tf.greater(time, max_it)) keep_working = tf.logical_not(stop_loop) sess = tf.Session() with sess.as_default(): tf.initialize_all_variables().run() #print(char_prob.eval()) print(max_vals.eval()) print(mask.eval()) print(done_mask.eval()) print(sequence_length.eval())
z = tf.mul(u, tf.nn.tanh(g)) a_newmax = tf.maximum(a_max, a) exp_diff = tf.exp(a_max - a_newmax) exp_scaled = tf.exp(a - a_newmax) n = tf.mul(n, exp_diff) + tf.mul( z, exp_scaled) # Numerically stable update of numerator d = tf.mul( d, exp_diff) + exp_scaled # Numerically stable update of denominator h_new = activation(tf.div(n, d)) a_max = a_newmax h = tf.select( tf.greater(l, i), h_new, h ) # Use new hidden state only if the sequence length has not been exceeded ly = tf.matmul(h, W_o) + b_o py = tf.nn.softmax(ly) ########################################################################################## # Optimizer/Analyzer ########################################################################################## # Cost function and optimizer # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( ly, y)) # Cross-entropy cost function optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
sess.as_default() # L = .5[r + discount * max a' Q(s', a') - Q(s, a)]^2 # |------target-------| |prediction| # Do a feedforward pass for the current state s to get predicted Q-values for all actions. action_array_1 = network(state_input_1) # Do a feedforward pass for the next state s' and calculate maximum overall network outputs max a' Q(s', a'). # Set Q-value target for action to r + discount * max a' Q(s', a') (use the max calculated in step 2). # For all other actions, set the Q-value target to the same as originally returned from step 1, making the error 0 for those outputs. # tt = rr + discount * max(a') Q(ss',aa') or rr if terminal state tt = reward_input + terminal_input * (GAMMA * max_val_input) tt = tf.reshape(tt,(BATCH,1)) target_prep = tf.tile(tt,[1,4]) target = tf.select(action_input, target_prep, action_array_1) # loss is .5(tt - Q(ss,aa))^2 Qerror = tf.sub(target, action_array_1) loss = .5*tf.reduce_sum(tf.mul(Qerror, Qerror)) # Update the weights using backpropagation. optimizer = tf.train.GradientDescentOptimizer(1e-3).minimize(loss) # saving and loading networks saver = tf.train.Saver() tf.initialize_all_variables().run() checkpoint = tf.train.get_checkpoint_state("saved_networks") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path)
def build_networks(self): self.nA = self.action_space.n self.actor_input = tf.placeholder(tf.float32, name='actor_input') self.actions_taken = tf.placeholder(tf.float32, name='actions_taken') self.critic_feedback = tf.placeholder(tf.float32, name='critic_feedback') self.critic_rewards = tf.placeholder(tf.float32, name='critic_rewards') # Actor network W0 = tf.Variable(tf.random_normal( [self.nO, self.config['actor_n_hidden']]), name='W0') b0 = tf.Variable(tf.zeros([self.config['actor_n_hidden']]), name='b0') L1 = tf.tanh(tf.matmul(self.actor_input, W0) + b0[None, :], name='L1') W1 = tf.Variable(tf.random_normal( [self.config['actor_n_hidden'], self.nA]), name='W1') b1 = tf.Variable(tf.zeros([self.nA]), name='b1') self.prob_na = tf.nn.softmax(tf.matmul(L1, W1) + b1[None, :], name='prob_na') good_probabilities = tf.reduce_sum(tf.mul(self.prob_na, self.actions_taken), reduction_indices=[1]) eligibility = tf.log(tf.select(tf.equal(good_probabilities, tf.fill(tf.shape(good_probabilities), 0.0)), tf.fill(tf.shape(good_probabilities), 1e-30), good_probabilities)) \ * (self.critic_rewards - self.critic_feedback) loss = -tf.reduce_mean(eligibility) loss = tf.Print(loss, [loss], message='Actor loss=') optimizer = tf.train.RMSPropOptimizer( learning_rate=self.config['actor_learning_rate'], decay=0.9, epsilon=1e-9) self.actor_train = optimizer.minimize(loss) self.critic_state_in = tf.placeholder("float", [None, self.nO], name='critic_state_in') self.critic_returns = tf.placeholder("float", name="critic_returns") # Critic network critic_W0 = tf.Variable(tf.random_normal( [self.nO, self.config['critic_n_hidden']]), name='W0') critic_b0 = tf.Variable(tf.zeros([self.config['actor_n_hidden']]), name='b0') critic_L1 = tf.tanh(tf.matmul(self.critic_state_in, critic_W0) + critic_b0[None, :], name='L1') critic_W1 = tf.Variable(tf.random_normal( [self.config['actor_n_hidden'], 1]), name='W1') critic_b1 = tf.Variable(tf.zeros([1]), name='b1') self.critic_value = tf.matmul(critic_L1, critic_W1) + critic_b1[None, :] critic_loss = tf.reduce_mean( tf.square(self.critic_returns - self.critic_value)) critic_loss = tf.Print(critic_loss, [critic_loss], message='Critic loss=') critic_optimizer = tf.train.RMSPropOptimizer( learning_rate=self.config['critic_learning_rate'], decay=0.9, epsilon=1e-9) self.critic_train = critic_optimizer.minimize(critic_loss) init = tf.initialize_all_variables() # Launch the graph. self.sess = tf.Session() self.sess.run(init)
def __init__(self, num_abstract_states, num_actions, gamma=0.9, learning_rate=0.00025, replay_start_size=32, epsilon_start=1.0, epsilon_end=0.1, epsilon_steps=10000, replay_memory_size=100, frame_history=1, batch_size=32, error_clip=1, abstraction_function=None, base_network_file=None): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.num_abstract_states = num_abstract_states self.num_abstract_actions = num_abstract_states * (num_abstract_states - 1) self.frame_history = frame_history self.abstraction_function = abstraction_function self.sess = tf.Session(config=config) self.inp_actions = tf.placeholder(tf.float32, [None, self.num_abstract_actions]) inp_shape = [None, 84, 84, self.frame_history] inp_dtype = 'uint8' assert type(inp_dtype) is str self.inp_frames = tf.placeholder(inp_dtype, inp_shape) self.inp_sp_frames = tf.placeholder(inp_dtype, inp_shape) self.inp_terminated = tf.placeholder(tf.bool, [None]) self.inp_reward = tf.placeholder(tf.float32, [None]) self.inp_mask = tf.placeholder(inp_dtype, [None, frame_history]) # convert t self.inp_sigma = tf.placeholder(tf.uint8, [None]) self.inp_sigma_onehot = tf.cast(tf.sparse_to_dense(tf.concat(1, [tf.expand_dims(tf.range(0, batch_size), -1), tf.expand_dims(tf.cast(self.inp_sigma, tf.int32), -1)]), [batch_size, self.num_abstract_states], 1), tf.float32) self.inp_sigma_p = tf.placeholder(tf.uint8, [None]) self.inp_sigma_p_onehot = tf.cast(tf.sparse_to_dense(tf.concat(1, [tf.expand_dims(tf.range(0, batch_size), -1), tf.expand_dims(tf.cast(self.inp_sigma_p, tf.int32), -1)]), [batch_size, self.num_abstract_states], 1), tf.float32) self.inp_sp_mask = tf.placeholder(inp_dtype, [None, frame_history]) self.gamma = gamma self.actions_for_sigma = np.zeros((self.num_abstract_states, self.num_abstract_actions), dtype=np.float32) for a in range(self.num_abstract_actions): i, j = flat_actions_to_state_pairs(a, num_abstract_states) self.actions_for_sigma[i, a] = 1 self.visual_scope = 'visual' self.abstraction_scope = 'abstraction' with tf.variable_scope(self.visual_scope): # mask stuff here mask = tf.reshape(self.inp_mask, [-1, 1, 1, 1]) masked_input = self.inp_frames * mask self.visual_output = hook_visual(masked_input, self.frame_history) with tf.variable_scope(self.abstraction_scope): self.sigma, self.sigma_probs = hook_abstraction(self.visual_output, self.num_abstract_states, batch_size, I=self.inp_sigma_onehot) with tf.variable_scope(self.abstraction_scope, reuse=True): # the one that samples self.sigma_query, self.sigma_query_probs = hook_abstraction(self.visual_output, self.num_abstract_states, 1) with tf.variable_scope(self.visual_scope, reuse=True): mask_sp = tf.reshape(self.inp_sp_mask, [-1, 1, 1, 1]) masked_input_sp = self.inp_sp_frames * mask_sp self.visual_output_sp = hook_visual(masked_input_sp, self.frame_history) with tf.variable_scope(self.abstraction_scope, reuse=True): self.sigma_p, self.sigma_p_probs = hook_abstraction(self.visual_output_sp, self.num_abstract_states, batch_size, I=self.inp_sigma_p_onehot) self.possible_action_vector = tf.stop_gradient(valid_actions_for_sigma(self.actions_for_sigma, self.sigma, self.num_abstract_actions)) with tf.variable_scope('l1_online'): self.q_online = hook_l1(self.sigma, self.num_abstract_actions) with tf.variable_scope('l1_online', reuse=True): self.possible_action_vector_query = -np.inf * (1 - valid_actions_for_sigma(self.actions_for_sigma, self.sigma_query, self.num_abstract_actions)) self.possible_action_vector_query = tf.select(tf.is_nan(self.possible_action_vector_query), tf.zeros_like(self.possible_action_vector_query), self.possible_action_vector_query) self.q_online_query = self.possible_action_vector_query + hook_l1(self.sigma_query, self.num_abstract_actions) with tf.variable_scope('l1_online', reuse=True): self.possible_action_vector_prime = -np.inf * (1 - valid_actions_for_sigma(self.actions_for_sigma, self.sigma_p, self.num_abstract_actions)) self.possible_action_vector_prime = tf.select(tf.is_nan(self.possible_action_vector_prime), tf.zeros_like(self.possible_action_vector_prime), self.possible_action_vector_prime) self.q_target = self.possible_action_vector_prime + hook_l1(self.sigma_p, self.num_abstract_actions) self.maxQ = tf.reduce_max(self.q_target, reduction_indices=1) self.r = tf.sign(self.inp_reward) use_backup = tf.cast(tf.logical_not(self.inp_terminated), dtype=tf.float32) self.y = tf.stop_gradient(self.r + use_backup * gamma * self.maxQ) self.delta = tf.reduce_sum(self.inp_actions * self.q_online, reduction_indices=1) - self.y self.error = tf.select(tf.abs(self.delta) < error_clip, 0.5 * tf.square(self.delta), error_clip * tf.abs(self.delta)) self.loss = tf.reduce_sum(self.error) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.95, centered=True, epsilon=0.01) # TODO: add th.get_vars(self.visual_scope)+th.get_vars(self.abstraction_scope) if self.abstraction_function is None: self.train_op = optimizer.minimize(self.loss, var_list=th.get_vars('l1_online', self.abstraction_scope, self.visual_scope)) else: self.train_op = optimizer.minimize(self.loss, var_list=th.get_vars('l1_online')) self.saver = tf.train.Saver(var_list=th.get_vars(self.visual_scope)+th.get_vars(self.abstraction_scope)+th.get_vars('l1_online')+th.get_vars('online')) self.replay_buffer = L1ReplayMemory((84, 84), np.uint8, replay_memory_size, 1) self.frame_history = frame_history self.replay_start_size = replay_start_size self.epsilon = epsilon_start self.epsilon_min = epsilon_end self.epsilon_steps = epsilon_steps self.epsilon_delta = (self.epsilon - self.epsilon_min) / self.epsilon_steps self.action_ticker = 1 self.num_actions = num_actions self.batch_size = batch_size self.l0_learner = L0_Learner(self.sess, self.abstraction_scope, self.visual_scope, num_actions, #self.visual_scope, num_actions, self.num_abstract_actions, self.num_abstract_states, abstraction_function=self.abstraction_function, max_episode_steps=20, base_network_file=base_network_file) self.sess.run(tf.initialize_all_variables()) if base_network_file is not None: self.l0_learner.base_network_saver.restore(self.sess, base_network_file) print 'Restored network from file'
def __init__(self, config, use_lstm=False, num_samples=512, forward=False, scope_name='gen_seq2seq', dtype=tf.float32): self.scope_name = scope_name with tf.variable_scope(self.scope_name): self.source_vocab_size = config.vocab_size self.target_vocab_size = config.vocab_size self.buckets = config.buckets self.learning_rate = tf.Variable(float(config.learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.batch_size = config.batch_size self.emb_dim = config.emb_dim self.num_layers = config.num_layers self.max_gradient_norm = config.max_gradient_norm #self.up_reward = tf.placeholder(tf.bool, name="up_reward") self.mc_search = tf.placeholder(tf.bool, name="mc_search") self.forward_only = tf.placeholder(tf.bool, name="forward_only") # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.GRUCell(self.emb_dim) if use_lstm: single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_dim) cell = single_cell if self.num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * self.num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=self.source_vocab_size, num_decoder_symbols=self.target_vocab_size, embedding_size=self.emb_dim, output_projection=output_projection, feed_previous=do_decode, mc_search=self.mc_search, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange( self.buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(self.buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) self.reward = [ tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets)) ] # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, self.emb_dim, self.batch_size, lambda x, y: seq2seq_f( x, y, tf.select(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) with tf.name_scope("gradient_descent"): self.gradient_norms = [] self.updates = [] self.gen_params = [ p for p in tf.trainable_variables() if self.scope_name in p.name ] opt = tf.train.GradientDescentOptimizer(self.learning_rate) for b in xrange(len(self.buckets)): adjusted_losses = tf.mul(self.losses[b], self.reward[b]) gradients = tf.gradients(adjusted_losses, self.gen_params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, self.gen_params), global_step=self.global_step)) self.gen_variables = [ k for k in tf.global_variables() if self.scope_name in k.name ] self.saver = tf.train.Saver(self.gen_variables)
def ModelHelper(y_pred_conf, y_pred_loc): """ Define loss function, optimizer, predictions, and accuracy metric Loss includes confidence loss and localization loss conf_loss_mask is created at batch generation time, to mask the confidence losses It has 1 at locations w/ positives, and 1 at select negative locations such that negative-to-positive ratio of NEG_POS_RATIO is satisfied Arguments: * y_pred_conf: Class predictions from model, a tensor of shape [batch_size, num_feature_map_cells * num_defaul_boxes * num_classes] * y_pred_loc: Localization predictions from model, a tensor of shape [batch_size, num_feature_map_cells * num_defaul_boxes * 4] Returns relevant tensor references """ num_total_preds = 0 for fm_size in FM_SIZES: num_total_preds += fm_size[0] * fm_size[1] * NUM_DEFAULT_BOXES num_total_preds_conf = num_total_preds * NUM_CLASSES num_total_preds_loc = num_total_preds * 4 # Input tensors y_true_conf = tf.placeholder( tf.int32, [None, num_total_preds], name='y_true_conf') # classification ground-truth labels y_true_loc = tf.placeholder( tf.float32, [None, num_total_preds_loc], name='y_true_loc') # localization ground-truth labels conf_loss_mask = tf.placeholder( tf.float32, [None, num_total_preds], name='conf_loss_mask') # 1 mask "bit" per def. box # Confidence loss logits = tf.reshape(y_pred_conf, [-1, num_total_preds, NUM_CLASSES]) conf_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, y_true_conf) conf_loss = conf_loss_mask * conf_loss # "zero-out" the loss for don't-care negatives conf_loss = tf.reduce_sum(conf_loss) # Localization loss (smooth L1 loss) # loc_loss_mask is analagous to conf_loss_mask, except 4 times the size diff = y_true_loc - y_pred_loc loc_loss_l2 = 0.5 * (diff**2.0) loc_loss_l1 = tf.abs(diff) - 0.5 smooth_l1_condition = tf.less(tf.abs(diff), 1.0) loc_loss = tf.select(smooth_l1_condition, loc_loss_l2, loc_loss_l1) loc_loss_mask = tf.minimum( y_true_conf, 1 ) # have non-zero localization loss only where we have matching ground-truth box loc_loss_mask = tf.to_float(loc_loss_mask) loc_loss_mask = tf.stack( [loc_loss_mask] * 4, axis=2 ) # [0, 1, 1] -> [[[0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]], ...] loc_loss_mask = tf.reshape( loc_loss_mask, [-1, num_total_preds_loc ]) # removing the inner-most dimension of above loc_loss = loc_loss_mask * loc_loss loc_loss = tf.reduce_sum(loc_loss) # Weighted average of confidence loss and localization loss # Also add regularization loss loss = conf_loss + LOC_LOSS_WEIGHT * loc_loss + tf.reduce_sum( slim.losses.get_regularization_losses()) optimizer = OPT.minimize(loss) #reported_loss = loss #tf.reduce_sum(loss, 1) # DEBUG # Class probabilities and predictions probs_all = tf.nn.softmax(logits) probs, preds_conf = tf.nn.top_k( probs_all ) # take top-1 probability, and the index is the predicted class probs = tf.reshape(probs, [-1, num_total_preds]) preds_conf = tf.reshape(preds_conf, [-1, num_total_preds]) # Return a dictionary of {tensor_name: tensor_reference} ret_dict = { 'y_true_conf': y_true_conf, 'y_true_loc': y_true_loc, 'conf_loss_mask': conf_loss_mask, 'optimizer': optimizer, 'conf_loss': conf_loss, 'loc_loss': loc_loss, 'loss': loss, 'probs': probs, 'preds_conf': preds_conf, 'preds_loc': y_pred_loc, } return ret_dict
def attention(self): self.middle = 200 self.max_candidate = 20 self.disamb_in = tf.placeholder(tf.int32, [None, self.max_candidate], name='disamb_in') self.embedding = tf.placeholder(tf.float32, [14951, self.transe_size], name='embedding') left_query_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_size) right_query_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_size) with tf.variable_scope(self.scope): with tf.variable_scope('query'): left_query_out, _ = tf.nn.rnn(left_query_lstm, self.left_in_rev, dtype=tf.float32) with tf.variable_scope('query', reuse=True): right_query_out, _ = tf.nn.rnn(right_query_lstm, self.right_in_rev, dtype=tf.float32) query_in = tf.concat( 1, [self.entity_in, left_query_out[-1], right_query_out[-1]]) Wq1 = tf.Variable(tf.random_normal([self.word_size+2*self.lstm_size, self.middle], \ stddev=self.dev)) Wq2 = tf.Variable( tf.random_normal([self.middle, self.transe_size], stddev=self.dev)) self.query = tf.tanh(tf.matmul(tf.tanh(tf.matmul(query_in, Wq1)), Wq2)) self.query_ = tf.placeholder(tf.float32, [None, self.transe_size]) #choose the most likely embedding expand = tf.gather(self.embedding, self.disamb_in) multi = tf.transpose(tf.pack([self.query] * self.max_candidate), perm=[1, 0, 2]) diff = tf.reduce_sum(tf.pow(expand - multi, 2), 2) ladder = tf.expand_dims(tf.to_int64(tf.range(self.batch_size)), 1) DIFF = tf.expand_dims(tf.argmin(diff, 1), 1) choice = tf.gather_nd(self.disamb_in, tf.concat(1, [ladder, DIFF])) self.sh = tf.placeholder(tf.float32) miss = tf.logical_not(tf.logical_or(\ tf.equal(self.disamb_in[:,1], 0), \ tf.less(tf.reduce_min(diff, 1), self.sh))) # should be false for training temp_query = tf.gather(self.embedding, choice) real_query = tf.select(tf.logical_or(miss, \ tf.equal(choice, tf.zeros([self.batch_size], dtype=tf.int32))), self.query, temp_query) self.A = tf.Variable(tf.random_normal([self.lstm_size*2, self.transe_size], \ mean=0, stddev=self.dev)) self.test = tf.placeholder(tf.bool, [None]) Q = tf.select(self.test, real_query, self.query_) left_att = [tf.pow(tf.reduce_sum(tf.matmul(self.left_att_in[i], self.A) * Q, \ [1], keep_dims=True),2)\ for i in range(self.window)] right_att = [tf.pow(tf.reduce_sum(tf.matmul(self.right_att_in[i], self.A) * Q, \ [1], keep_dims=True),2)\ for i in range(self.window)] return (left_att, right_att)
z = tf.mul(u, tf.nn.tanh(g)) a_newmax = tf.maximum(a_max, a) exp_diff = tf.exp(a_max-a_newmax) exp_scaled = tf.exp(a-a_newmax) n = tf.mul(n, exp_diff)+tf.mul(z, exp_scaled) # Numerically stable update of numerator d = tf.mul(d, exp_diff)+exp_scaled # Numerically stable update of denominator h = activation(tf.div(n, d)) a_max = a_newmax ly = tf.matmul(h, W_o)+b_o error_step = tf.nn.softmax_cross_entropy_with_logits(ly, y[:,i,:]) # Cross-entropy cost function error += tf.select(tf.greater(l, i), error_step, tf.zeros([batch_size])) # Include cost from this step only if the sequence length has not been exceeded ########################################################################################## # Optimizer ########################################################################################## # Optimizer # cost = tf.reduce_mean(tf.div(error, l)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) ########################################################################################## # Train ########################################################################################## # Operation to initialize session
def k_m_tf(defect_tensor, clusters, max_iters, summaries_dir, stage_str, name_str, go_to_max=False): length = len(defect_tensor[:, 0]) num_clus = clusters MAX_ITERS = max_iters tiles = len(defect_tensor[0, :]) start = time.time() sess = tf.InteractiveSession() with tf.name_scope('input'): points = tf.Variable(tf.random_uniform([length, tiles]), dtype=tf.float32) with tf.name_scope('cluster_assigns'): cluster_assignments = tf.Variable(tf.zeros([length], dtype=tf.float32)) with tf.name_scope('cents'): centroids = tf.Variable(tf.random_crop(points.initialized_value(), [num_clus, tiles]), dtype=tf.float32) # centroids = tf.Print(centroids,[centroids], summarize = 16, message = 'centroids') # Replicate to N copies of each centroid and K copies of each # point, then subtract and compute the sum of squared distances. with tf.name_scope('Replicate'): rep_centroids = tf.reshape(tf.tile(centroids, [length, 1]), [length, num_clus, tiles]) # rep_centroids = tf.Print(rep_centroids,[tf.shape(rep_centroids)],message='shape_rep_centroids') rep_points = tf.reshape(tf.tile(points, [1, num_clus]), [length, num_clus, tiles]) with tf.name_scope('Sum_squares'): squares = tf.square(rep_points - rep_centroids) sum_squares = tf.reduce_sum(tf.square(squares), reduction_indices=2) squares_1d = tf.scalar_summary('sum_squares', tf.reduce_mean(sum_squares)) # sum_squares = tf.Print(sum_squares,[sum_squares], summarize = 40, message = 'sum_squares') # sum_squares = tf.Print(sum_squares,[tf.shape(sum_squares)], summarize = 16, message = 'sum_squares_shape') # Use argmin to select the lowest-distance point with tf.name_scope('argmin'): best_centroids = tf.argmin(sum_squares, 1) # best_centroids = tf.Print(best_centroids,[best_centroids], summarize = 40, message = ' best_cents') did_assignments_change = tf.reduce_any( tf.not_equal(tf.cast(best_centroids, tf.float32), cluster_assignments)) ## This part exists for counting purposes, since I can't simply access the count in the means part with tf.name_scope('counting'): const_1d = {} num_1d = {} found_1d = {} scalar_1d = {} for i in range(0, num_clus): const_1d[i] = tf.constant(i, shape=[320, 1], dtype=tf.int64) # string_1d[i] = tf.constant(str[i], shape =[320,1], dtype = tf.string) for i in range(0, num_clus): num_1d[i] = tf.equal(tf.reshape(best_centroids, [320, 1]), const_1d[i]) found_1d[i] = tf.reduce_sum(tf.cast(num_1d[i], tf.int32)) found_1d[i] = tf.expand_dims(found_1d[i], -1) scalar_1d[i] = tf.scalar_summary(str(i), tf.squeeze(found_1d[i])) # found_1d[i] = tf.Print(found_1d[i], [found_1d[i]], summarize=40, message=str(i)) # found_1d[i] = tf.Print(found_1d[i], [tf.shape(found_1d[i])], summarize=40, message=str(i)) # found_1d[i] = tf.Print(found_1d[i],[tf.expand_dims(found_1d[i],0)], summarize = 40, message =str(i)) # found_1d[i] = tf.Print(found_1d[i],[tf.shape(tf.expand_dims(found_1d[i],0))], summarize = 40, message =str(i)) # found_1d[i] = tf.Print(found_1d[i], [tf.shape(tf.reshape(found_1d[i],[1,1]))], summarize=40, message=str(i)) found_tensor = tf.concat(0, [found_1d[i] for i in range(0, num_clus)]) distro = tf.histogram_summary('Distribution', found_tensor) ## calculate the means at the indices of best_centroids. with tf.name_scope('means'): total = tf.unsorted_segment_sum(points, best_centroids, num_clus) count = tf.unsorted_segment_sum(tf.ones_like(points), best_centroids, num_clus) # count = tf.Print(count, [tf.shape(count)]) means = total / count means = tf.select(tf.is_nan(means), tf.ones_like(means) * 0, means) means_1d = tf.scalar_summary('means', tf.reduce_mean(means)) # means = tf.Print(means,[means],summarize = 16, message = 'MEANS') # means = tf.Print(means,[tf.shape(means)], message = 'm_shape') # Do not write to the assigned clusters variable until after # computing whether the assignments have changed - hence with_dependencies with tf.name_scope('Do_updates'): with tf.control_dependencies([did_assignments_change]): do_updates = tf.group( centroids.assign(means), cluster_assignments.assign(tf.cast(best_centroids, tf.float32))) changed = True iters = 0 found_numerical = {} # found_1d = tf.Print(found_1d,[found_1d]) # Merge summaries scalar_summary = tf.merge_summary( [scalar_1d[i] for i in range(0, num_clus)]) other_summary = tf.merge_summary([means_1d, squares_1d]) histogram_summary = tf.merge_summary([distro]) writer = tf.train.SummaryWriter( summaries_dir + '/' + stage_str + '/kmeans/' + name_str, sess.graph) init = tf.initialize_all_variables() sess.run(init) # loop # check for assignment changes and assign new based on new means. If assignments didnt change, stop. while changed and iters < MAX_ITERS: iters += 1 run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # if iters%10 == 1: [changed, _, histogram_sum_run, scalar_sum_run, other_sum_run] = sess.run([ did_assignments_change, do_updates, histogram_summary, scalar_summary, other_summary ], feed_dict={points: defect_tensor}) writer.add_run_metadata(run_metadata, 'step%03d' % iters) writer.add_summary(histogram_sum_run, iters) writer.add_summary(scalar_sum_run, iters) writer.add_summary(other_sum_run, iters) # else: # [changed, _, scalar_sum_run] = sess.run([did_assignments_change, do_updates, scalar_summary], feed_dict={points: defect_tensor}) # writer.add_run_metadata(run_metadata, 'step%03d' % iters) # writer.add_summary(scalar_sum_run, iters) ## Note: due to the interconnectivity of found_1d, it seems as you need to run it ALONG the session a couple lines before in order to get numerical results ## Can't do that in a seperate run. Weirdly enough it works for found_tensor, which is simply a concat of found_1d. I don't know why. # found_numerical[0] = sess.run([found_1d[0]], feed_dict={points:defect_tensor}) found_numerical[1] = sess.run([found_1d[1]], feed_dict={points: defect_tensor}) found_numerical[3] = sess.run([found_1d[3]], feed_dict={points: defect_tensor}) found_numerical[4] = sess.run([found_1d[4]], feed_dict={points: defect_tensor}) if go_to_max == True: changed = True writer.close() [centers, assignments] = sess.run([centroids, cluster_assignments]) end = time.time() print("Found in %.2f seconds" % (end - start), iters, "iterations") print('Distribution:', sess.run(found_tensor, feed_dict={points: defect_tensor})) tf.reset_default_graph() sess.close() return centers, assignments