def crf_binary_score(tag_indices, sequence_lengths, transition_params): """Computes the binary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] matrix of binary potentials. Returns: binary_scores: A [batch_size] vector of binary scores. """ # Get shape information. num_tags = transition_params.get_shape()[0] num_transitions = array_ops.shape(tag_indices)[1] - 1 # Truncate by one on each side of the sequence to get the start and end # indices of each transition. start_tag_indices = array_ops.slice(tag_indices, [0, 0], [-1, num_transitions]) end_tag_indices = array_ops.slice(tag_indices, [0, 1], [-1, num_transitions]) # Encode the indices in a flattened representation. flattened_transition_indices = start_tag_indices * num_tags + end_tag_indices flattened_transition_params = array_ops.reshape(transition_params, [-1]) # Get the binary scores based on the flattened representation. binary_scores = array_ops.gather(flattened_transition_params, flattened_transition_indices) masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1]) truncated_masks = array_ops.slice(masks, [0, 1], [-1, -1]) binary_scores = math_ops.reduce_sum(binary_scores * truncated_masks, 1) return binary_scores
def input_fn(): start = random_ops.random_uniform( (), minval=0, maxval=sequence_length, dtype=dtypes.int32, seed=seed) # Concatenate lyrics_list so inputs and labels wrap when start > 0. lyrics_list_concat = lyrics_list + lyrics_list inputs_dense = array_ops.slice(lyrics_list_concat, [start], [sequence_length]) indices = array_ops.constant( [[i, 0] for i in range(sequence_length)], dtype=dtypes.int64) dense_shape = [sequence_length, 1] inputs = sparse_tensor.SparseTensor( indices=indices, values=inputs_dense, dense_shape=dense_shape) table = lookup.string_to_index_table_from_tensor( mapping=list(vocab), default_value=-1, name='lookup') labels = table.lookup( array_ops.slice(lyrics_list_concat, [start + 1], [sequence_length])) input_key = string_ops.string_join([ 'key_', string_ops.as_string( random_ops.random_uniform( (), minval=0, maxval=10000000, dtype=dtypes.int32, seed=seed)) ]) return {'lyrics': inputs, input_key_column_name: input_key}, labels
def crf_log_norm(inputs, sequence_lengths, transition_params): """Computes the normalization for a CRF. Args: inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials to use as input to the CRF layer. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix. Returns: log_norm: A [batch_size] vector of normalizers for a CRF. """ # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def _check_shapes_dynamic(self, operator, v, diag): """Return (v, diag) with Assert dependencies, which check shape.""" checks = [] with ops.op_scope([operator, v, diag], 'check_shapes'): s_v = array_ops.shape(v) r_op = operator.rank() r_v = array_ops.rank(v) if diag is not None: s_d = array_ops.shape(diag) r_d = array_ops.rank(diag) # Check tensor rank. checks.append(check_ops.assert_rank(v, r_op)) if diag is not None: checks.append(check_ops.assert_rank(diag, r_op - 1)) # Check batch shape checks.append(check_ops.assert_equal( operator.batch_shape(), array_ops.slice(s_v, [0], [r_v - 2]))) if diag is not None: checks.append(check_ops.assert_equal( operator.batch_shape(), array_ops.slice(s_d, [0], [r_d - 1]))) # Check event shape checks.append(check_ops.assert_equal( operator.vector_space_dimension(), array_ops.gather(s_v, r_v - 2))) if diag is not None: checks.append(check_ops.assert_equal( array_ops.gather(s_v, r_v - 1), array_ops.gather(s_d, r_d - 1))) v = control_flow_ops.with_dependencies(checks, v) if diag is not None: diag = control_flow_ops.with_dependencies(checks, diag) return v, diag
def input_fn(): random_sequence = random_ops.random_uniform( [sequence_length + 1], 0, 2, dtype=dtypes.int32, seed=seed) labels = array_ops.slice(random_sequence, [0], [sequence_length]) inputs = math_ops.to_float( array_ops.slice(random_sequence, [1], [sequence_length])) return {'inputs': inputs}, labels
def testSlicingWithInt64Index(self): with self.cached_session(force_gpu=test.is_gpu_available()): a = constant_op.constant([0, 1, 2], dtype=dtypes.int32) # Slice using int64 Tensor. i = constant_op.constant(1, dtype=dtypes.int64) slice_t = a[i] slice_val = self.evaluate(slice_t) self.assertAllEqual(1, slice_val) slice_t = a[i:i+1] slice_val = self.evaluate(slice_t) self.assertAllEqual([1], slice_val) # Slice using int64 integer. i = np.asarray(1).astype(np.int64) slice_t = a[i] slice_val = self.evaluate(slice_t) self.assertAllEqual(1, slice_val) slice_t = a[i:i+1] slice_val = self.evaluate(slice_t) self.assertAllEqual([1], slice_val) a_int32 = constant_op.constant([0, 1, 2], dtype=dtypes.int32) slice_t = array_ops.slice(a_int32, np.asarray([1]).astype(np.int64), np.asarray([2]).astype(np.int64)) slice_val = self.evaluate(slice_t) self.assertAllEqual([1, 2], slice_val) a_float32 = constant_op.constant([0, 1, 2], dtype=dtypes.float32) slice_t = array_ops.slice(a_float32, np.asarray([1]).astype(np.int64), np.asarray([2]).astype(np.int64)) slice_val = self.evaluate(slice_t) self.assertAllEqual([1, 2], slice_val)
def testCollapseAdjacentNonPaddedDimensions(self): # pyformat: disable paddings_values = [[[0, 0], [0, 0], [0, 0], [0, 1]], [[0, 0], [2, 3], [0, 0], [0, 0]], [[0, 0], [0, 0], [0, 0], [0, 0]]] # pyformat: enable for paddings_value in paddings_values: for dtype in [dtypes.float32, dtypes.int32]: inp = constant_op.constant(1, shape=[8, 28, 28, 3], dtype=dtype) paddings = constant_op.constant(paddings_value, dtype=dtypes.int32) padded = array_ops.pad(inp, paddings) middle = array_ops.slice(padded, [row[0] for row in paddings_value], [dim.value for dim in inp.shape.dims]) left = array_ops.slice(padded, [0, 0, 0, 0], [row[0] for row in paddings_value]) right = array_ops.slice( padded, [paddings_value[i][0] + inp.shape.dims[i].value for i in range(4)], [-1, -1, -1, -1]) with self.test_session(use_gpu=True): self.assertAllEqual(inp.eval(), middle.eval()) self.assertAllEqual( np.zeros([row[0] for row in paddings_value]), left.eval()) self.assertAllEqual( np.zeros([row[1] for row in paddings_value]), right.eval())
def circular_pad(input_, width, kernel_size): """Pad input_ for computing (circular) convolution. Args: input_: the input tensor width: the width of the tensor. kernel_size: the kernel size of the filter. Returns: a tensor whose width is (width + kernel_size - 1). """ beginning = kernel_size // 2 end = kernel_size - 1 - beginning tmp_up = array_ops.slice(input_, [0, width - beginning, 0, 0], [-1, beginning, width, -1]) tmp_down = array_ops.slice(input_, [0, 0, 0, 0], [-1, end, width, -1]) tmp = array_ops.concat([tmp_up, input_, tmp_down], 1) new_width = width + kernel_size - 1 tmp_left = array_ops.slice(tmp, [0, 0, width - beginning, 0], [-1, new_width, beginning, -1]) tmp_right = array_ops.slice(tmp, [0, 0, 0, 0], [-1, new_width, end, -1]) final = array_ops.concat([tmp_left, tmp, tmp_right], 2) return final
def __call__(self, inputs, state, scope=None): """Long short-term memory cell with attention (LSTMA).""" with vs.variable_scope(scope or type(self).__name__): if self._state_is_tuple: state, attns, attn_states = state else: states = state state = array_ops.slice(states, [0, 0], [-1, self._cell.state_size]) attns = array_ops.slice( states, [0, self._cell.state_size], [-1, self._attn_size]) attn_states = array_ops.slice( states, [0, self._cell.state_size + self._attn_size], [-1, self._attn_size * self._attn_length]) attn_states = array_ops.reshape(attn_states, [-1, self._attn_length, self._attn_size]) input_size = self._input_size if input_size is None: input_size = inputs.get_shape().as_list()[1] inputs = _linear([inputs, attns], input_size, True) lstm_output, new_state = self._cell(inputs, state) if self._state_is_tuple: new_state_cat = array_ops.concat(1, _unpacked_state(new_state)) else: new_state_cat = new_state new_attns, new_attn_states = self._attention(new_state_cat, attn_states) with vs.variable_scope("AttnOutputProjection"): output = _linear([lstm_output, new_attns], self._attn_size, True) new_attn_states = array_ops.concat(1, [new_attn_states, array_ops.expand_dims(output, 1)]) new_attn_states = array_ops.reshape( new_attn_states, [-1, self._attn_length * self._attn_size]) new_state = (new_state, new_attns, new_attn_states) if not self._state_is_tuple: new_state = array_ops.concat(1, list(new_state)) return output, new_state
def input_fn(): start = random_ops.random_uniform( (), minval=0, maxval=(np.pi * 2.0), dtype=dtypes.float32, seed=seed) sin_curves = math_ops.sin( math_ops.linspace(start, (sequence_length - 1) * increment, sequence_length + 1)) inputs = array_ops.slice(sin_curves, [0], [sequence_length]) labels = array_ops.slice(sin_curves, [1], [sequence_length]) return {'inputs': inputs}, labels
def testPartialShapeInference(self): z = array_ops.zeros((1, 2, 3)) self.assertAllEqual(z.get_shape().as_list(), [1, 2, 3]) m1 = array_ops.slice(z, [0, 0, 0], [-1, -1, -1]) self.assertAllEqual(m1.get_shape().as_list(), [1, 2, 3]) m2 = array_ops.slice(z, [0, 0, 0], [constant_op.constant(1) + 0, 2, -1]) self.assertAllEqual(m2.get_shape().as_list(), [1, 2, 3])
def _LSTMBlockCellGrad(op, *grad): """Gradient for LSTMBlockCell.""" (x, cs_prev, h_prev, w, wci, wco, wcf, b) = op.inputs (i, cs, f, o, ci, co, _) = op.outputs (_, cs_grad, _, _, _, _, h_grad) = grad batch_size = x.get_shape().with_rank(2)[0].value if batch_size is None: batch_size = -1 input_size = x.get_shape().with_rank(2)[1].value if input_size is None: raise ValueError("input_size from `x` should not be None.") cell_size = cs_prev.get_shape().with_rank(2)[1].value if cell_size is None: raise ValueError("cell_size from `cs_prev` should not be None.") (cs_prev_grad, dicfo, wci_grad, wcf_grad, wco_grad) = gen_lstm_ops.lstm_block_cell_grad( x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, cs_grad, h_grad, use_peephole=op.get_attr("use_peephole")) # Backprop from dicfo to xh. xh_grad = math_ops.matmul(dicfo, w, transpose_b=True) x_grad = array_ops.slice(xh_grad, (0, 0), (batch_size, input_size)) x_grad.get_shape().merge_with(x.get_shape()) h_prev_grad = array_ops.slice(xh_grad, (0, input_size), (batch_size, cell_size)) h_prev_grad.get_shape().merge_with(h_prev.get_shape()) # Backprop from dicfo to w. xh = array_ops.concat([x, h_prev], 1) w_grad = math_ops.matmul(xh, dicfo, transpose_a=True) w_grad.get_shape().merge_with(w.get_shape()) # Backprop from dicfo to b. b_grad = nn_ops.bias_add_grad(dicfo) b_grad.get_shape().merge_with(b.get_shape()) return (x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wcf_grad, wco_grad, b_grad)
def crf_decode(potentials, transition_params, sequence_length): """Decode the highest scoring sequence of tags in TensorFlow. This is a function for tensor. Args: potentials: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. transition_params: A [num_tags, num_tags] matrix of binary potentials. sequence_length: A [batch_size] vector of true sequence lengths. Returns: decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indicies. best_score: A [batch_size] vector, containing the score of `decode_tags`. """ # For simplicity, in shape comments, denote: # 'batch_size' by 'B', 'max_seq_len' by 'T' , 'num_tags' by 'O' (output). num_tags = potentials.get_shape()[2].value # Computes forward decoding. Get last score and backpointers. crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params) initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1]) initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O] inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O] backpointers, last_score = rnn.dynamic_rnn( crf_fwd_cell, inputs=inputs, sequence_length=sequence_length - 1, initial_state=initial_state, time_major=False, dtype=dtypes.int32) # [B, T - 1, O], [B, O] backpointers = gen_array_ops.reverse_sequence( backpointers, sequence_length - 1, seq_dim=1) # [B, T-1, O] # Computes backward decoding. Extract tag indices from backpointers. crf_bwd_cell = CrfDecodeBackwardRnnCell(num_tags) initial_state = math_ops.cast(math_ops.argmax(last_score, axis=1), dtype=dtypes.int32) # [B] initial_state = array_ops.expand_dims(initial_state, axis=-1) # [B, 1] decode_tags, _ = rnn.dynamic_rnn( crf_bwd_cell, inputs=backpointers, sequence_length=sequence_length - 1, initial_state=initial_state, time_major=False, dtype=dtypes.int32) # [B, T - 1, 1] decode_tags = array_ops.squeeze(decode_tags, axis=[2]) # [B, T - 1] decode_tags = array_ops.concat([initial_state, decode_tags], axis=1) # [B, T] decode_tags = gen_array_ops.reverse_sequence( decode_tags, sequence_length, seq_dim=1) # [B, T] best_score = math_ops.reduce_max(last_score, axis=1) # [B] return decode_tags, best_score
def _untransform_gru_canonical(self, transformed_weights, transformed_biases): """The reverse procedure of _fuse_gru_canonical(). Args: transformed_weights: a list of tensors, 3 for each layer. The 1st for reset and update gates; the 2nd and 3rd for the new memory gate. transformed_biases: 5 tensors each layer. The first for reset_and_update gate; the next two in line for candidate gate. The last 2 are original tensors for reset_and_update gates, retained since cuDNN biases are not restorable from the fused version. Returns: Two lists of tensors for weights and biases respectively. There are 6 tensors per weight and per bias for each layer: tensor 0-2 are applied to the input from the previous layer and tensor 3-5 to the recurrent input. Tensor 0 and 3 are for the reset gate; tensor 1 and 4 the update gate; tensor 2 and 5 the new memory gate. """ weights, biases = [], [] assert 5 * len(transformed_weights) == len(transformed_biases) * 3 for i in range(len(transformed_weights) // 3): base_idx = 3 * i num_units = self._cudnn_rnn.num_units input_size = self._cudnn_rnn.input_size if i == 0 else num_units # reset and update gate weights applied on layer inputs. w_i = array_ops.slice(transformed_weights[base_idx], [0, 0], [input_size, 2 * num_units]) # reset and update gate weights applied on recurrent inputs. w_r = array_ops.slice(transformed_weights[base_idx], [input_size, 0], [num_units, 2 * num_units]) wi_list = array_ops.split(w_i, 2, axis=1) wr_list = array_ops.split(w_r, 2, axis=1) wi_list = [_flatten_transpose(w) for w in wi_list] wr_list = [_flatten_transpose(w) for w in wr_list] # candidate gate weights ih, hh = [ _flatten_transpose(w) for w in transformed_weights[base_idx + 1:base_idx + 3] ] weights.extend(wi_list) weights.append(ih) weights.extend(wr_list) weights.append(hh) base_idx = 5 * i # Recover biases for reset and update gates. bi_list = array_ops.split(transformed_biases[base_idx + 3], 2, axis=0) br_list = array_ops.split(transformed_biases[base_idx + 4], 2, axis=0) biases.extend(bi_list) biases.append(transformed_biases[base_idx + 1]) biases.extend(br_list) biases.append(transformed_biases[base_idx + 2]) return weights, biases
def input_fn(): starts = random_ops.random_uniform( [batch_size], maxval=(2 * np.pi), seed=seed) sin_curves = functional_ops.map_fn( _sin_fn, (starts,), dtype=dtypes.float32) inputs = array_ops.expand_dims( array_ops.slice(sin_curves, [0, 0], [batch_size, sequence_length]), 2) labels = array_ops.slice(sin_curves, [0, 1], [batch_size, sequence_length]) return {'inputs': inputs}, labels
def _broadcast_x_higher_rank_than_sigma(): x_shape_left = array_ops.slice(x_shape, [0], sigma_rank_vec - 1) x_shape_right = array_ops.slice(x_shape, sigma_rank_vec - 1, x_rank_vec - 1) x_shape_perm = array_ops.concat( 0, (math_ops.range(sigma_rank - 1, x_rank), math_ops.range(0, sigma_rank - 1)) ) return array_ops.reshape( # Convert to [D, E, F, ..., k, B, C] array_ops.transpose(x_centered, perm=x_shape_perm), # Reshape to [D, E, F, ..., k, B*C] array_ops.concat(0, (x_shape_right, array_ops.pack([math_ops.reduce_prod(x_shape_left, 0)]))), )
def _PadGrad(op, grad): """Gradient for Pad.""" # Pad introduces values around the original tensor, so the gradient function # slices the original shape out of the gradient.""" x = op.inputs[0] a = op.inputs[1] # [Rank(x), 2] # Takes a slice of a. The 1st column. [Rank(x), 1]. pad_before = array_ops.slice(a, [0, 0], array_ops.pack([array_ops.rank(x), 1])) # Make it a 1-D tensor. begin = array_ops.reshape(pad_before, [-1]) sizes = array_ops.shape(x) return array_ops.slice(grad, begin, sizes), None
def __call__(self, inputs, state, scope=None): """Field cell.""" with vs.variable_scope(scope or "basic_field_cell") as scope: if self._state_is_tuple: prev_inputs, h = state else: prev_inputs = array_ops.slice(state, [0, 0], [-1, self._input_size]) h = array_ops.slice(state, [0, self._input_size], [-1, self._num_units]) # print('higher scope name') # print(scope.name) # print('prev_field') # prev_fields = self._fields(prev_inputs) # scope.reuse_variables() # print('field') # fields = self._fields(inputs) # trapezoid_fields = 0.5 * (prev_fields + fields) # for n in range(1,self._n_inter+1): # alpha = n/(self._n_inter+1) # inter = (1-alpha)*prev_inputs + alpha*inputs # print('int_field '+str(n)) # trapezoid_fields += self._fields(inter) # d_inputs = (inputs - prev_inputs)/(self._n_inter+1) w = 1.0*0.5*np.cos(np.linspace(0,2*np.pi,num=self._n_inter+2,endpoint=True)) w = w/np.sum(w) trapezoid_fields = w[0]*self._fields(prev_inputs) scope.reuse_variables() for n in range(1,self._n_inter+2): alpha = n/(self._n_inter+1) inter = (1-alpha)*prev_inputs + alpha*inputs trapezoid_fields += w[n]*self._fields(inter) d_inputs = (inputs - prev_inputs) path_int = tf.reduce_sum(tf.mul(trapezoid_fields,d_inputs),reduction_indices=-1) path_int = tf.transpose(path_int) new_h = path_int + h if self._state_is_tuple: new_state = FieldStateTuple(inputs, new_h) else: new_state = array_ops.concat(1, [inputs, new_h]) return new_h, new_state
def testInt64Slicing(self): with self.cached_session(force_gpu=test.is_gpu_available()): a_large = array_ops.tile( constant_op.constant(np.array([False, True] * 4)), [2**29 + 3]) slice_t = array_ops.slice(a_large, np.asarray([3]).astype(np.int64), [3]) slice_val = slice_t.eval() self.assertAllEqual([True, False, True], slice_val) slice_t = array_ops.slice( a_large, constant_op.constant([long(2)**32 + 3], dtype=dtypes.int64), [3]) slice_val = slice_t.eval() self.assertAllEqual([True, False, True], slice_val)
def input_fn(): start = random_ops.random_uniform( (), minval=0, maxval=(np.pi * 2.0), dtype=dtypes.float32, seed=seed) sin_curves = math_ops.sin( math_ops.linspace(start, (sequence_length - 1) * increment, sequence_length + 1)) inputs = array_ops.slice(sin_curves, [0], [sequence_length]) labels = array_ops.slice(sin_curves, [1], [sequence_length]) input_key = string_ops.string_join([ 'key_', string_ops.as_string(math_ops.cast(10000 * start, dtypes.int32)) ]) return {'inputs': inputs, input_key_column_name: input_key}, labels
def _GetRealValue(value): """Get the real value. If backprop "uses" a value produced by forward inference, an accumulator is added in the forward loop to accumulate its values, so we use the accumulated value, indexed by the backprop counter. Args: value: A tensor to be captured. Returns: The same tensor value from the saved history. """ real_value = value forward_ctxt = value.op._get_control_flow_context() real_value = forward_ctxt.history_map.get(value.name) assert value.op.type != "Variable" if real_value is None: if value.op.type == "Enter" and value.op.get_attr("is_constant"): # Use the input of this Enter node real_value = GetRealOp(value.op).inputs[0] else: # Accumulate the history of this value. # NOTE(yuanbyu): Don't accumulate for constants. One approach is # to deepcopy the constants for the grad while context. history_value = forward_ctxt.AddForwardAccumulateLoop(value) # The shapes of the whole history and a single event element. forward_ctxt.grad_context.Exit() elem_rank = array_ops.rank(history_value) - 1 elem_rank_vec = array_ops.expand_dims(elem_rank, 0) elem_shape = array_ops.slice(array_ops.shape(history_value), [1], elem_rank_vec) slice_shape = array_ops.concat(0, [[1], elem_shape]) forward_ctxt.grad_context.Enter() # The begin position of the slice at slice_index. slice_index = forward_ctxt.grad_context.index b1 = array_ops.zeros(elem_rank_vec, dtype=types.int32) b = array_ops.concat(0, [array_ops.expand_dims(slice_index, 0), b1]) # The slice at slice_index. # TODO(irving): Replace with gather once that's GPU accelerated real_value = array_ops.squeeze( array_ops.slice(history_value, b, slice_shape, name="real"), squeeze_dims=[0]) forward_ctxt.history_map[value.name] = real_value return real_value
def _num_present(losses, weights, per_batch=False): """Computes the number of elements in the loss function induced by `weights`. A given weights tensor induces different numbers of usable elements in the `losses` tensor. The `weights` tensor is broadcast across `losses` for all possible dimensions. For example, if `losses` is a tensor of dimension `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is, in effect, tiled to match the shape of `losses`. Following this effective tile, the total number of present elements is the number of non-zero weights. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: `Tensor` of shape `[]`, `[batch_size]` or `[batch_size, d1, ... dK]`, where K < N. per_batch: Whether to return the number of elements per batch or as a sum total. Returns: The number of present (non-zero) elements in the losses tensor. If `per_batch` is `True`, the value is returned as a tensor of size `[batch_size]`. Otherwise, a single scalar tensor is returned. """ # If weights is a scalar, its easy to compute: if weights.get_shape().ndims == 0: if losses.get_shape().ndims == 0: batch_size = 1 else: batch_size = array_ops.reshape(array_ops.slice(array_ops.shape(losses), [0], [1]), []) num_per_batch = math_ops.div(math_ops.to_float(array_ops.size(losses)), math_ops.to_float(batch_size)) num_per_batch = array_ops.where(math_ops.equal(weights, 0), 0.0, num_per_batch) num_per_batch = math_ops.multiply(array_ops.ones( array_ops.reshape(batch_size, [1])), num_per_batch) return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch) # First, count the number of nonzero weights. if weights.get_shape().ndims >= 1: reduction_indices = list(range(1, weights.get_shape().ndims)) num_nonzero_per_batch = math_ops.reduce_sum( math_ops.to_float(math_ops.not_equal(weights, 0)), reduction_indices=reduction_indices) # Next, determine the number of elements that weight would broadcast to: broadcast_dims = array_ops.slice(array_ops.shape(losses), [weights.get_shape().ndims], [-1]) num_to_broadcast = math_ops.to_float(math_ops.reduce_prod(broadcast_dims)) num_per_batch = math_ops.multiply(num_nonzero_per_batch, num_to_broadcast) return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
def adjust_hue(image, delta, name=None): """Adjust hue of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image hue is adjusted by converting the image to HSV and rotating the hue channel (H) by `delta`. The image is then converted back to RGB. `delta` must be in the interval `[-1, 1]`. Args: image: RGB image or images. Size of the last dimension must be 3. delta: float. How much to add to the hue channel. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.name_scope(name, 'adjust_hue', [image]) as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) # TODO(zhengxq): we will switch to the fused version after we add a GPU # kernel for that. fused = os.environ.get('TF_ADJUST_HUE_FUSED', '') fused = fused.lower() in ('true', 't', '1') if not fused: hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) # Note that we add 2*pi to guarantee that the resulting hue is a positive # floating point number since delta is [-0.5, 0.5]. hue = math_ops.mod(hue + (delta + 1.), 1.) hsv_altered = array_ops.concat_v2([hue, saturation, value], 2) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) else: rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) return convert_image_dtype(rgb_altered, orig_dtype)
def input_fn(): features = {} random_sequence = random_ops.random_uniform( [sequence_length + 1], 0, 2, dtype=dtypes.int32, seed=seed) labels = array_ops.slice(random_sequence, [0], [sequence_length]) inputs = math_ops.to_float( array_ops.slice(random_sequence, [1], [sequence_length])) features = {'inputs': inputs} if mode == model_fn_lib.ModeKeys.INFER: input_examples = array_ops.placeholder(dtypes.string) features[input_feature_key] = input_examples labels = None return features, labels
def _num_present(losses, weight, per_batch=False): """Computes the number of elements in the loss function induced by `weight`. A given weight tensor induces different numbers of usable elements in the `losses` tensor. The `weight` tensor is broadcast across `losses` for all possible dimensions. For example, if `losses` is a tensor of dimension [4, 5, 6, 3] and weight is a tensor of size [4, 5], then weight is, in effect, tiled to match the size of `losses`. Following this effective tile, the total number of present elements is the number of non-zero weights. Args: losses: A tensor of size [batch_size, d1, ... dN]. weight: A tensor of size [1] or [batch_size, d1, ... dK] where K < N. per_batch: Whether to return the number of elements per batch or as a sum total. Returns: The number of present (non-zero) elements in the losses tensor. If `per_batch` is True, the value is returned as a tensor of size [batch_size]. Otherwise, a single scalar tensor is returned. """ # To ensure that dims of [2, 1] gets mapped to [2,] weight = array_ops.squeeze(weight) # If the weight is a scalar, its easy to compute: if weight.get_shape().ndims == 0: batch_size = array_ops.reshape(array_ops.slice(array_ops.shape(losses), [0], [1]), []) num_per_batch = math_ops.div(math_ops.to_float(array_ops.size(losses)), math_ops.to_float(batch_size)) num_per_batch = math_ops.select(math_ops.equal(weight, 0), 0.0, num_per_batch) num_per_batch = math_ops.mul(array_ops.ones( array_ops.reshape(batch_size, [1])), num_per_batch) return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch) # First, count the number of nonzero weights: if weight.get_shape().ndims >= 1: reduction_indices = list(range(1, weight.get_shape().ndims)) num_nonzero_per_batch = math_ops.reduce_sum( math_ops.to_float(math_ops.not_equal(weight, 0)), reduction_indices=reduction_indices) # Next, determine the number of elements that weight would broadcast to: broadcast_dims = array_ops.slice(array_ops.shape(losses), [weight.get_shape().ndims], [-1]) num_to_broadcast = math_ops.to_float(math_ops.reduce_prod(broadcast_dims)) num_per_batch = math_ops.mul(num_nonzero_per_batch, num_to_broadcast) return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
def input_fn(): random_sequence = random_ops.random_uniform( [batch_size, sequence_length + 1], 0, 2, dtype=dtypes.int32, seed=seed) labels = array_ops.slice(random_sequence, [0, 0], [batch_size, sequence_length]) inputs = array_ops.expand_dims( math_ops.to_float( array_ops.slice(random_sequence, [0, 1], [batch_size, sequence_length])), 2) return {'inputs': inputs}, labels
def adjust_saturation(image, saturation_factor, name=None): """Adjust saturation of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the saturation channel, converts back to RGB and then back to the original data type. If several adjustments are chained it is advisable to minimize the number of redundant conversions. `image` is an RGB image. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S) channel by `saturation_factor` and clipping. The image is then converted back to RGB. Args: image: RGB image or images. Size of the last dimension must be 3. saturation_factor: float. Factor to multiply the saturation by. name: A name for this operation (optional). Returns: Adjusted image(s), same shape and DType as `image`. """ with ops.name_scope(name, 'adjust_saturation', [image]) as name: image = ops.convert_to_tensor(image, name='image') # Remember original dtype to so we can convert back if needed orig_dtype = image.dtype flt_image = convert_image_dtype(image, dtypes.float32) # TODO(zhengxq): we will switch to the fused version after we add a GPU # kernel for that. fused = os.environ.get('TF_ADJUST_SATURATION_FUSED', '') fused = fused.lower() in ('true', 't', '1') if fused: return convert_image_dtype( gen_image_ops.adjust_saturation(flt_image, saturation_factor), orig_dtype) hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0], [-1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 1], [-1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 2], [-1, -1, 1]) saturation *= saturation_factor saturation = clip_ops.clip_by_value(saturation, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 2) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) return convert_image_dtype(rgb_altered, orig_dtype)
def _get_diff_for_monotonic_comparison(x): """Gets the difference x[1:] - x[:-1].""" x = array_ops.reshape(x, [-1]) if not is_numeric_tensor(x): raise TypeError("Expected x to be numeric, instead found: %s" % x) # If x has less than 2 elements, there is nothing to compare. So return []. is_shorter_than_two = math_ops.less(array_ops.size(x), 2) short_result = lambda: ops.convert_to_tensor([], dtype=x.dtype) # With 2 or more elements, return x[1:] - x[:-1] s_len = array_ops.shape(x) - 1 diff = lambda: array_ops.slice(x, [1], s_len) - array_ops.slice(x, [0], s_len) return control_flow_ops.cond(is_shorter_than_two, short_result, diff)
def input_fn(): sequence = constant_op.constant( [[(starting_step + i + j) % 2 for j in range(sequence_length + 1)] for i in range(batch_size)], dtype=dtypes.int32) labels = array_ops.slice(sequence, [0, 0], [batch_size, sequence_length]) inputs = array_ops.expand_dims( math_ops.to_float( array_ops.slice(sequence, [0, 1], [batch_size, sequence_length ])), 2) input_dict = state_dict input_dict['inputs'] = inputs return input_dict, labels
def testComputedShape(self): # NOTE(mrry): We cannot currently handle partially-known values, # because `tf.slice()` uses -1 to specify a wildcard size, and # this can't be handled using the # `tensor_util.constant_value_as_shape()` trick. a = constant_op.constant([[1, 2, 3], [4, 5, 6]]) begin = constant_op.constant(0) size = constant_op.constant(1) b = array_ops.slice(a, [begin, 0], [size, 2]) self.assertEqual([1, 2], b.get_shape()) begin = array_ops.placeholder(dtypes.int32, shape=()) c = array_ops.slice(a, [begin, 0], [-1, 2]) self.assertEqual([None, 2], c.get_shape().as_list())
def __call__(self, inputs, state, scope=None): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. scope: VariableScope for the created subgraph; defaults to "LSTMCell". Returns: A tuple containing: - A `2-D, [batch x output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") with vs.variable_scope(scope or type(self).__name__, initializer=self._initializer): # "LSTMCell" s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32) b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32) # s1 = tf.Variable(tf.ones([4 * self._num_units]), name="s1") # s2 = tf.Variable(tf.ones([4 * self._num_units]), name="s2") # s3 = tf.Variable(tf.ones([self._num_units]), name="s3") # # b1 = tf.Variable(tf.zeros([4 * self._num_units]), name="b1") # b2 = tf.Variable(tf.zeros([4 * self._num_units]), name="b2") # b3 = tf.Variable(tf.zeros([self._num_units]), name="b3") input_below_ = rnn_cell._linear([inputs], 4 * self._num_units, False, scope="out_1") input_below_ = ln(input_below_, s1, b1) state_below_ = rnn_cell._linear([m_prev], 4 * self._num_units, False, scope="out_2") state_below_ = ln(state_below_, s2, b2) lstm_matrix = tf.add(input_below_, state_below_) i, j, f, o = array_ops.split(1, 4, lstm_matrix) c = (sigmoid(f) * c_prev + sigmoid(i) * self._activation(j)) # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now. # c_ = ln(c, s3, b3) c_ = c m = sigmoid(o) * self._activation(c_) new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat(1, [c, m])) return m, new_state
def _ConcatGrad(op, grad): """Gradient for concat op.""" def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat(0, [ array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill(shape_of_shape - concat_dim - 1, 0) ]) begin = array_ops.fill(shape_of_shape, 0) return mask, begin def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break else: sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs) # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return [None, grad] concat_dim = op.inputs[0] out_grads = [] if isinstance(grad, ops.Tensor): # Get the inputs' tensor shapes sizes = _ExtractInputShapes(op.inputs[1:]) # pylint: disable=protected-access offset = gen_array_ops._concat_offset(concat_dim, sizes) # pylint: enable=protected-access for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) elif isinstance(grad, ops.IndexedSlices): concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in op.inputs[1:]] if concat_dim_static > 0: # IndexedSlices, concat_dim > 0. Each input gets IndexedSlices gradients # with all the indices, but with grad.values sliced accordingly. This # is like the Tensor case, except shape(grad.values)[0] is not equal to # shape(sizes[i])[0], since only a subset of the dim-0 values are stored. mask, begin = _CreateDenseMaskAndBegin(sizes, concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat( 0, [[-1], array_ops.slice(size, [1], [-1])])) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze(array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), squeeze_dims=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return [None] + out_grads
def _FFTSizeForGrad(grad, rank): return math_ops.reduce_prod(array_ops.slice( array_ops.reverse( array_ops.shape(grad), (True,)), (0,), (rank,)))
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This contains most of the synchronization implementation and also wraps the apply_gradients() from the real optimizer. Args: grads_and_vars: List of (gradient, variable) pairs as returned by compute_gradients(). global_step: Optional Variable to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: train_op: The op to dequeue a token so the replicas can exit this batch and start the next one. This is executed by each replica. Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required to check staleness") self._global_step = global_step train_ops = [] aggregated_grad = [] inputs = [] var_list = [] for x in grads_and_vars: inputs.extend(list(x)) with ops.device(global_step.device): self._local_steps = variables.Variable(array_ops.zeros( [self._total_num_replicas], dtype=global_step.dtype), trainable=False, name="local_steps") # Check staleness. Note that this has to be ref(), otherwise identity will # be accessed and it will be old values. local_step = array_ops.slice(self._local_steps.ref(), array_ops.reshape(self._replica_id, (1, )), [1], name="get_local_step") local_step = array_ops.reshape(local_step, ()) is_stale = math_ops.less(local_step, global_step) with ops.name_scope(name, self._name, inputs) as name: for grad, var in grads_and_vars: var_list.append(var) with ops.device(var.device): if isinstance(grad, ops.Tensor): gradient_queue = (data_flow_ops.FIFOQueue( self._tokens_per_step * 2, grad.dtype, shapes=var.get_shape(), shared_name=var.name)) self._one_element_queue_list.append( (gradient_queue, var.device)) train_ops.append(gradient_queue.enqueue([grad])) # Aggregate all gradients gradients = gradient_queue.dequeue_many( self._replicas_to_aggregate) aggregated_grad.append( math_ops.reduce_sum(gradients, [0])) elif grad is None: aggregated_grad.append(None) # pass-through. else: if not isinstance(grad, ops.IndexedSlices): raise ValueError("Unknown grad type!") aggregated_grad.append( self._aggregate_sparse_grad(grad, var, train_ops)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) # sync_op will be assigned to the same device as the global step. with ops.device(global_step.device), ops.name_scope(""): update_op = self._opt.apply_gradients( aggregated_grads_and_vars, global_step) # Create token queue. with ops.device(global_step.device), ops.name_scope(""): sync_token_queue = (data_flow_ops.FIFOQueue( -1, global_step.dtype.base_dtype, shapes=(), shared_name="sync_token_q")) self._sync_token_queue = sync_token_queue # dummy_queue is passed to the queue runner. Don't use the real queues # because the queue runner doesn't automatically reopen it once it # closed queues in PS devices. dummy_queue = (data_flow_ops.FIFOQueue( 1, types_pb2.DT_INT32, shapes=(), shared_name="dummy_queue")) # Clear all the gradients queues in case there are stale gradients. clear_queue_ops = [] with ops.control_dependencies([update_op]): for queue, dev in self._one_element_queue_list: with ops.device(dev): stale_grads = queue.dequeue_many(queue.size()) clear_queue_ops.append(stale_grads) for queue, dev in self._sparse_grad_queues_and_devs: with ops.device(dev): _, stale_indices = queue.dequeue_many(queue.size()) clear_queue_ops.append(stale_indices) with ops.device(global_step.device): self._clean_up_op = control_flow_ops.abort( error_msg="From sync_replicas") # According to the staleness, select between the enqueue op (real_grad) # or no-op (no_op_grad). Effectively dropping all the stale gradients. no_op_grad = lambda: [ control_flow_ops.no_op(name="no_grad_enqueue") ] real_grad = lambda: [control_flow_ops.group(*train_ops)] final_train_ops = control_flow_ops.cond(is_stale, no_op_grad, real_grad) with ops.device(global_step.device), ops.name_scope(""): # Replicas have to wait until they can get a token from the token queue. with ops.control_dependencies([final_train_ops]): token = sync_token_queue.dequeue() train_op = state_ops.scatter_update(self._local_steps, self._replica_id, token, name=name) with ops.control_dependencies(clear_queue_ops): # Sync_op needs to insert tokens to the token queue at the end of the # step so the replicas can fetch them to start the next step. # Note that ref() is used to avoid reading from the identity with old # the step. tokens = array_ops.fill([self._tokens_per_step], global_step.ref()) sync_op = sync_token_queue.enqueue_many((tokens, )) if self._variable_averages is not None: with ops.control_dependencies([sync_op ]), ops.name_scope(""): sync_op = self._variable_averages.apply( self._variables_to_average) self._chief_queue_runner = queue_runner.QueueRunner( dummy_queue, [sync_op]) self._gradients_applied = True return train_op
def _testGradientVariableSize(self): with self.cached_session(): inp = constant_op.constant([1.0, 2.0, 3.0], name="in") out = array_ops.slice(inp, [1], [-1]) grad_actual = self.evaluate(gradients_impl.gradients(out, inp)[0]) self.assertAllClose([0., 1., 1.], grad_actual)
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner='mean', default_id=None, name=None, partition_strategy='div', max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float `Tensor`s or values representing partitioned embedding `Tensor`s. Alternatively, a `PartitionedVariable` created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError('Missing embedding_weights %s.' % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list( embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None embedding_weights = [ w if (isinstance(w, resource_variable_ops.ResourceVariable) and dtype in (None, w.dtype)) else ops.convert_to_tensor( w, dtype=dtype) for w in embedding_weights ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) if combiner != 'sum': sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return final_result
def _parse_single_example_raw(serialized, names=None, sparse_keys=None, sparse_types=None, dense_keys=None, dense_types=None, dense_defaults=None, dense_shapes=None, name=None): """Parses a single `Example` proto. Args: serialized: A scalar string Tensor, a single serialized Example. See `_parse_example_raw` documentation for more details. names: (Optional) A scalar string Tensor, the associated name. See `_parse_example_raw` documentation for more details. sparse_keys: See `_parse_example_raw` documentation for more details. sparse_types: See `_parse_example_raw` documentation for more details. dense_keys: See `_parse_example_raw` documentation for more details. dense_types: See `_parse_example_raw` documentation for more details. dense_defaults: See `_parse_example_raw` documentation for more details. dense_shapes: See `_parse_example_raw` documentation for more details. name: A name for this operation (optional). Returns: A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Raises: ValueError: if any feature is invalid. """ with ops.name_scope(name, "ParseSingleExample", [serialized, names]): serialized = ops.convert_to_tensor(serialized) serialized_shape = serialized.get_shape() if serialized_shape.ndims is not None: if serialized_shape.ndims != 0: raise ValueError("Input serialized must be a scalar") else: serialized = control_flow_ops.with_dependencies( [ control_flow_ops.Assert(math_ops.equal( array_ops.rank(serialized), 0), ["Input serialized must be a scalar"], name="SerializedIsScalar") ], serialized, name="SerializedDependencies") serialized = array_ops.expand_dims(serialized, 0) if names is not None: names = ops.convert_to_tensor(names) names_shape = names.get_shape() if names_shape.ndims is not None: if names_shape.ndims != 0: raise ValueError("Input names must be a scalar") else: names = control_flow_ops.with_dependencies( [ control_flow_ops.Assert(math_ops.equal( array_ops.rank(names), 0), ["Input names must be a scalar"], name="NamesIsScalar") ], names, name="NamesDependencies") names = array_ops.expand_dims(names, 0) outputs = _parse_example_raw(serialized, names=names, sparse_keys=sparse_keys, sparse_types=sparse_types, dense_keys=dense_keys, dense_types=dense_types, dense_defaults=dense_defaults, dense_shapes=dense_shapes, name=name) if dense_keys is not None: for d in dense_keys: d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d) outputs[d] = array_ops.squeeze(outputs[d], [0], name="Squeeze_%s" % d_name) if sparse_keys is not None: for s in sparse_keys: s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s) outputs[s] = sparse_tensor.SparseTensor( array_ops.slice(outputs[s].indices, [0, 1], [-1, -1], name="Slice_Indices_%s" % s_name), outputs[s].values, array_ops.slice(outputs[s].dense_shape, [1], [-1], name="Squeeze_Shape_%s" % s_name)) return outputs
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" if (isinstance(features, ops.Tensor) or isinstance(features, sparse_tensor.SparseTensor)): features = {'features': features} if feature_columns: features = features.copy() if output_type == ModelBuilderOutputType.MODEL_FN_OPS: features.update( layers.transform_features(features, feature_columns)) else: for fc in feature_columns: tensor = fc_core._transform_features(features, [fc])[fc] # pylint: disable=protected-access features[fc.name] = tensor weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) logits, tree_paths, regression_variance = graph_builder.inference_graph( features) summary.scalar('average_tree_size', graph_builder.average_size()) # For binary classification problems, convert probabilities to logits. # Includes hack to get around the fact that a probability might be 0 or 1. if not params.regression and params.num_classes == 2: class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1]) logits = math_ops.log( math_ops.maximum( class_1_probs / math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON)) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_graph = None training_hooks = [] if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: with ops.control_dependencies([logits.op]): training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(training_util.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights # TensorForest's training graph isn't calculated directly from the loss # like many other models. def _train_fn(unused_loss): return training_graph # Ops are run in lexigraphical order of their keys. Run the resource # clean-up op last. all_handles = graph_builder.get_all_resource_handles() ops_at_end = { '9: clean up resources': control_flow_ops.group(*[ resource_variable_ops.destroy_resource_op(handle) for handle in all_handles ]) } if report_feature_importances: ops_at_end['1: feature_importances'] = ( graph_builder.feature_importances()) training_hooks = [TensorForestRunOpAtEndHook(ops_at_end)] if output_type == ModelBuilderOutputType.MODEL_FN_OPS: model_ops = model_head.create_model_fn_ops(features=features, labels=labels, mode=mode, train_op_fn=_train_fn, logits=logits, scope=head_scope) if early_stopping_rounds: training_hooks.append( TensorForestLossHook(early_stopping_rounds, early_stopping_loss_threshold= early_stopping_loss_threshold, loss_op=model_ops.loss)) model_ops.training_hooks.extend(training_hooks) if keys is not None: model_ops.predictions[keys_name] = keys if params.inference_tree_paths: model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths model_ops.predictions[ VARIANCE_PREDICTION_KEY] = regression_variance if include_all_in_serving: # In order to serve the variance we need to add the prediction dict # to output_alternatives dict. if not model_ops.output_alternatives: model_ops.output_alternatives = {} model_ops.output_alternatives[ALL_SERVING_KEY] = ( constants.ProblemType.UNSPECIFIED, model_ops.predictions) return model_ops else: # Estimator spec estimator_spec = model_head.create_estimator_spec( features=features, mode=mode, labels=labels, train_op_fn=_train_fn, logits=logits) if early_stopping_rounds: training_hooks.append( TensorForestLossHook(early_stopping_rounds, early_stopping_loss_threshold= early_stopping_loss_threshold, loss_op=estimator_spec.loss)) estimator_spec = estimator_spec._replace( training_hooks=training_hooks + list(estimator_spec.training_hooks)) if keys is not None: estimator_spec.predictions[keys_name] = keys if params.inference_tree_paths: estimator_spec.predictions[ TREE_PATHS_PREDICTION_KEY] = tree_paths estimator_spec.predictions[ VARIANCE_PREDICTION_KEY] = regression_variance if include_all_in_serving: outputs = estimator_spec.export_outputs if not outputs: outputs = {} outputs = { ALL_SERVING_KEY: PredictOutput(estimator_spec.predictions) } print(estimator_spec.export_outputs) # In order to serve the variance we need to add the prediction dict # to output_alternatives dict. estimator_spec = estimator_spec._replace( export_outputs=outputs) return estimator_spec
def _log_prob(self, x): if self.cholesky_input_output_matrices: x_sqrt = x else: # Complexity: O(nbk^3) x_sqrt = linalg_ops.batch_cholesky(x) batch_shape = self.batch_shape() event_shape = self.event_shape() ndims = array_ops.rank(x_sqrt) # sample_ndims = ndims - batch_ndims - event_ndims sample_ndims = ndims - array_ops.shape(batch_shape)[0] - 2 sample_shape = array_ops.slice(array_ops.shape(x_sqrt), [0], [sample_ndims]) # We need to be able to pre-multiply each matrix by its corresponding # batch scale matrix. Since a Distribution Tensor supports multiple # samples per batch, this means we need to reshape the input matrix `x` # so that the first b dimensions are batch dimensions and the last two # are of shape [dimension, dimensions*number_of_samples]. Doing these # gymnastics allows us to do a batch_solve. # # After we're done with sqrt_solve (the batch operation) we need to undo # this reshaping so what we're left with is a Tensor partitionable by # sample, batch, event dimensions. # Complexity: O(nbk^2) since transpose must access every element. scale_sqrt_inv_x_sqrt = x_sqrt perm = array_ops.concat(0, (math_ops.range( sample_ndims, ndims), math_ops.range(0, sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) shape = array_ops.concat( 0, (batch_shape, (math_ops.cast(self.dimension, dtype=dtypes.int32), -1))) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) # Complexity: O(nbM*k) where M is the complexity of the operator solving # a vector system. E.g., for OperatorPDDiag, each solve is O(k), so # this complexity is O(nbk^2). For OperatorPDCholesky, each solve is # O(k^2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator_pd.sqrt_solve( scale_sqrt_inv_x_sqrt) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat(0, (batch_shape, event_shape, sample_shape)) scale_sqrt_inv_x_sqrt = array_ops.reshape(scale_sqrt_inv_x_sqrt, shape) perm = array_ops.concat(0, (math_ops.range(ndims - sample_ndims, ndims), math_ops.range(0, ndims - sample_ndims))) scale_sqrt_inv_x_sqrt = array_ops.transpose(scale_sqrt_inv_x_sqrt, perm) # Write V = SS', X = LL'. Then: # tr[inv(V) X] = tr[inv(S)' inv(S) L L'] # = tr[inv(S) L L' inv(S)'] # = tr[(inv(S) L) (inv(S) L)'] # = sum_{ik} (inv(S) L)_{ik}^2 # The second equality follows from the cyclic permutation property. # Complexity: O(nbk^2) trace_scale_inv_x = math_ops.reduce_sum( math_ops.square(scale_sqrt_inv_x_sqrt), reduction_indices=[-2, -1]) # Complexity: O(nbk) half_log_det_x = math_ops.reduce_sum(math_ops.log( array_ops.batch_matrix_diag_part(x_sqrt)), reduction_indices=[-1]) # Complexity: O(nbk^2) log_prob = ((self.df - self.dimension - 1.) * half_log_det_x - 0.5 * trace_scale_inv_x - self.log_normalizing_constant()) # Set shape hints. # Try to merge what we know from the input then what we know from the # parameters of this distribution. if x.get_shape().ndims is not None: log_prob.set_shape(x.get_shape()[:-2]) if (log_prob.get_shape().ndims is not None and self.get_batch_shape().ndims is not None and self.get_batch_shape().ndims > 0): log_prob.get_shape()[-self.get_batch_shape().ndims:].merge_with( self.get_batch_shape()) return log_prob
def _eval_metric_ops(self, labels, probabilities, weights, unreduced_loss, regularization_loss): """Returns a dict of metrics for eval_metric_ops.""" with ops.name_scope(None, 'metrics', [ labels, probabilities, weights, unreduced_loss, regularization_loss ]): keys = metric_keys.MetricKeys metric_ops = { # Estimator already adds a metric for loss. head_lib._summary_key(self._name, keys.LOSS_MEAN): # pylint:disable=protected-access metrics_lib.mean( values=unreduced_loss, weights=weights, name=keys.LOSS_MEAN), head_lib._summary_key(self._name, keys.AUC): # pylint:disable=protected-access metrics_lib.auc(labels=labels, predictions=probabilities, weights=weights, name=keys.AUC), head_lib._summary_key(self._name, keys.AUC_PR): # pylint:disable=protected-access metrics_lib.auc(labels=labels, predictions=probabilities, weights=weights, curve='PR', name=keys.AUC_PR), } if regularization_loss is not None: loss_regularization_key = head_lib._summary_key( # pylint:disable=protected-access self._name, keys.LOSS_REGULARIZATION) metric_ops[loss_regularization_key] = (metrics_lib.mean( values=regularization_loss, name=keys.LOSS_REGULARIZATION)) for threshold in self._thresholds: accuracy_key = keys.ACCURACY_AT_THRESHOLD % threshold metric_ops[head_lib._summary_key(self._name, accuracy_key)] = ( # pylint:disable=protected-access head_lib._accuracy_at_threshold( # pylint:disable=protected-access labels=labels, predictions=probabilities, weights=weights, threshold=threshold, name=accuracy_key)) # Precision for positive examples. precision_key = keys.PRECISION_AT_THRESHOLD % threshold metric_ops[head_lib._summary_key( self._name, precision_key)] = ( # pylint:disable=protected-access head_lib._precision_at_threshold( # pylint:disable=protected-access labels=labels, predictions=probabilities, weights=weights, threshold=threshold, name=precision_key)) # Recall for positive examples. recall_key = keys.RECALL_AT_THRESHOLD % threshold metric_ops[head_lib._summary_key(self._name, recall_key)] = ( # pylint:disable=protected-access head_lib._recall_at_threshold( # pylint:disable=protected-access labels=labels, predictions=probabilities, weights=weights, threshold=threshold, name=recall_key)) for class_id in self._classes_for_class_based_metrics: batch_rank = array_ops.rank(probabilities) - 1 begin = array_ops.concat([ array_ops.zeros([batch_rank], dtype=dtypes.int32), [class_id] ], axis=0) size = array_ops.concat([ -1 * array_ops.ones([batch_rank], dtype=dtypes.int32), [1] ], axis=0) class_probabilities = array_ops.slice(probabilities, begin=begin, size=size) class_labels = array_ops.slice(labels, begin=begin, size=size) prob_key = keys.PROBABILITY_MEAN_AT_CLASS % class_id metric_ops[head_lib._summary_key(self._name, prob_key)] = ( # pylint:disable=protected-access head_lib._predictions_mean( # pylint:disable=protected-access predictions=class_probabilities, weights=weights, name=prob_key)) auc_key = keys.AUC_AT_CLASS % class_id metric_ops[head_lib._summary_key(self._name, auc_key)] = ( # pylint:disable=protected-access head_lib._auc( # pylint:disable=protected-access labels=class_labels, predictions=class_probabilities, weights=weights, name=auc_key)) auc_pr_key = keys.AUC_PR_AT_CLASS % class_id metric_ops[head_lib._summary_key(self._name, auc_pr_key)] = ( # pylint:disable=protected-access head_lib._auc( # pylint:disable=protected-access labels=class_labels, predictions=class_probabilities, weights=weights, curve='PR', name=auc_pr_key)) return metric_ops
def assign_sub(self, delta, use_locking=False, name=None, read_value=True): for i, v in enumerate(self._variables): v.assign_sub( array_ops.slice(delta, self._var_offsets[i], v.shape.as_list())) return self
def _auc(probs, targets, weights=None): return metric_ops.streaming_auc(array_ops.slice(probs, [0, 1], [-1, 1]), targets, weights=weights)
def _embedding_lookup_with_distributed_aggregation(params, ids, partition_strategy="mod", name=None, max_norm=None, weights=None, idx=None, segment_ids=None): """Lookup helper for embedding_lookup_sparse_with_distributed_aggregation.""" if params is None or params == []: # pylint: disable=g-explicit-bool-comparison raise ValueError("Need at least one param") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] def maybe_normalize(x): if max_norm is not None: if x.get_shape().ndims is not None: ndims = x.get_shape().ndims else: ndims = array_ops.size(array_ops.shape(x)) return clip_ops.clip_by_norm(x, max_norm, axes=list(range(1, ndims))) return x with ops.name_scope(name, "embedding_lookup_with_distributed_aggregation", params + [ids]) as name: np = len(params) # Number of partitions # Preserve the resource variable status to avoid accidental dense reads. if not any( isinstance(p, resource_variable_ops.ResourceVariable) for p in params): params = ops.convert_n_to_tensor_or_indexed_slices(params, name="params") if np == 1: with ops.colocate_with(params[0]): ret = maybe_normalize(_do_gather(params[0], ids)) ignore_weights = weights is None if not ignore_weights: if weights.dtype != ret.dtype: weights = math_ops.cast(weights, ret.dtype) # Reshape to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(ret) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set weights shape after reshape if ret.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(ret.get_shape().ndims - 1)])) ret *= weights return math_ops.segment_sum(ret, segment_ids, name=name) else: return math_ops.sparse_segment_sum(ret, idx, segment_ids, name=name) else: ids = ops.convert_to_tensor(ids, name="ids") flat_ids = array_ops.reshape(ids, [-1]) original_indices = math_ops.range(array_ops.size(flat_ids)) # Create p_assignments and set new_ids depending on the strategy. if partition_strategy == "mod": p_assignments = flat_ids % np new_ids = flat_ids // np elif partition_strategy == "div": # Compute num_total_ids as the sum of dim-0 of params, then assign to # partitions based on a constant number of ids per partition. Optimize # if we already know the full shape statically. dim_0_size = params[0].get_shape()[0] for p in xrange(1, np): dim_0_size += params[p].get_shape()[0] if dim_0_size.value: num_total_ids = constant_op.constant(dim_0_size.value, flat_ids.dtype) else: dim_0_sizes = [] for p in xrange(np): if params[p].get_shape()[0].value is not None: dim_0_sizes.append(params[p].get_shape()[0].value) else: with ops.colocate_with(params[p]): dim_0_sizes.append(array_ops.shape(params[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // np extras = num_total_ids % np p_assignments = math_ops.maximum(flat_ids // (ids_per_partition + 1), ( flat_ids - extras) // ids_per_partition) # Emulate a conditional using a boolean indicator tensor is_in_first_extras_partitions = math_ops.cast(p_assignments < extras, flat_ids.dtype) new_ids = (is_in_first_extras_partitions * (flat_ids % (ids_per_partition + 1)) + (1 - is_in_first_extras_partitions) * ( (flat_ids - extras) % ids_per_partition)) else: raise ValueError("Unrecognized partition strategy: " + partition_strategy) # Cast partition assignments to int32 for use in dynamic_partition. # There really should not be more than 2^32 partitions. p_assignments = math_ops.cast(p_assignments, dtypes.int32) # Partition list of ids based on assignments into np separate lists gather_ids = data_flow_ops.dynamic_partition(new_ids, p_assignments, np) # Similarly, partition the original indices. pindices = data_flow_ops.dynamic_partition(original_indices, p_assignments, np) # Do np separate lookups, finding embeddings for plist[p] in params[p] partitioned_result = [] for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result.append(_do_gather(params[p], gather_ids[p])) ignore_weights = weights is None if not ignore_weights: # Partition weights according to pindices. partitioned_weight = [] for p in xrange(np): partitioned_weight.append(array_ops.gather(weights, pindices[p])) # Reshape each partition result. element_shape = params[0].get_shape()[1:] for p in params[1:]: element_shape = element_shape.merge_with(p.get_shape()[1:]) if element_shape.is_fully_defined(): for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat([array_ops.shape(pindices[p]), element_shape], 0)) else: with ops.colocate_with(params[0]): params_shape = array_ops.shape(params[0]) for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = array_ops.reshape( partitioned_result[p], array_ops.concat([ array_ops.shape(pindices[p]), array_ops.slice( params_shape, [1], [-1]) ], 0)) # Normalize each partition result. for p in xrange(np): with ops.colocate_with(params[p]): partitioned_result[p] = maybe_normalize(partitioned_result[p]) if not ignore_weights: # Multiply each partition result with partition weights. for p in xrange(np): with ops.colocate_with(params[p]): if partitioned_weight[p].dtype != partitioned_result[p].dtype: partitioned_weight[p] = math_ops.cast(partitioned_weight[p], partitioned_result[p].dtype) # Reshape partition weights. ones = array_ops.fill( array_ops.expand_dims( array_ops.rank(partitioned_result[p]) - 1, 0), 1) bcast_weights_shape = array_ops.concat( [array_ops.shape(partitioned_weight[p]), ones], 0) orig_weights_shape = partitioned_weight[p].get_shape() partitioned_weight[p] = array_ops.reshape(partitioned_weight[p], bcast_weights_shape) if partitioned_result[p].get_shape().ndims is not None: partitioned_weight[p].set_shape( orig_weights_shape.concatenate([ 1 for _ in range(partitioned_result[p].get_shape().ndims - 1) ])) partitioned_result[p] *= partitioned_weight[p] partitioned_segment_ids = [] for p in xrange(np): if not ignore_weights: # Partition segment_ids according to pindices. p_segment_ids = array_ops.gather(segment_ids, pindices[p]) # Number the p_segment_ids to meet segment_sum's requirements. Note # that unique_p_segment_ids contains unique segment ids of this # partition and these ids' order is unchanged. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) partitioned_segment_ids.append(unique_p_segment_ids) # segment_sum this partition's result. with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.segment_sum( partitioned_result[p], unique_p_segment_idx) else: # When ignore weights, we need to get indexs of elements in idx and # segment_ids. _, exclude_idx = array_ops.setdiff1d(idx, pindices[p]) all_idx = math_ops.range(array_ops.shape(idx)[0]) _, include_idx = array_ops.setdiff1d(all_idx, exclude_idx) # Gather segment_ids and idx according to indexs. p_segment_ids = array_ops.gather(segment_ids, include_idx) p_idx = array_ops.gather(idx, include_idx) # Number the p_segment_ids, same as ignore_weights case above. unique_p_segment_ids, unique_p_segment_idx = array_ops.unique( p_segment_ids) _, unique_p_idx_idx = array_ops.unique(p_idx) partitioned_segment_ids.append(unique_p_segment_ids) with ops.colocate_with(params[p]): partitioned_result[p] = math_ops.sparse_segment_sum( partitioned_result[p], unique_p_idx_idx, unique_p_segment_idx) # Concat each partition's segment_ids and result for final segment_sum. concat_segment_ids = array_ops.concat(partitioned_segment_ids, 0) concat_partitioned_result = array_ops.concat(partitioned_result, 0) return math_ops.unsorted_segment_sum( concat_partitioned_result, concat_segment_ids, math_ops.reduce_max(concat_segment_ids) + 1, name=name)
def _precision_at_thresholds(predictions, targets, weights=None): return metric_ops.streaming_precision_at_thresholds( array_ops.slice(predictions, [0, 1], [-1, 1]), targets, np.arange(0, 1, 0.01, dtype=np.float32), weights=weights)
def _get_partitioned_variable( self, name, partitioner, shape=None, dtype=dtypes.float32, initializer=None, regularizer=None, reuse=None, trainable=True, collections=None, caching_device=None, validate_shape=True): """Gets or creates a sharded variable list with these parameters. The `partitioner` must be a callable that accepts a fully defined `TensorShape` and returns a sequence of integers (the `partitions`). These integers describe how to partition the given sharded `Variable` along the given dimension. That is, `partitions[1] = 3` means split the `Variable` into 3 shards along dimension 1. Currently, sharding along only one axis is supported. If the list of variables with the given name (prefix) is already stored, we return the stored variables. Otherwise, we create a new one. Set `reuse` to `True` when you only want to reuse existing Variables. Set `reuse` to `False` when you only want to create new Variables. If `reuse` is `None` (the default), both new and existing variables are returned. If initializer is `None` (the default), the default initializer passed in the constructor is used. If that one is `None` too, we use a new `uniform_unit_scaling_initializer`. If initializer is a Tensor, we use it as a value and derive the shape from the initializer. If the initializer is a callable, then it will be called for each shard. Otherwise the initializer should match the shape of the entire sharded Variable, and it will be sliced accordingly for each shard. Some useful partitioners are available. See, e.g., `variable_axis_size_partitioner`. Args: name: the name of the new or existing sharded variable. partitioner: Optional callable that accepts a fully defined `TensorShape` and `dtype` of the Variable to be created, and returns a list of partitions for each axis (currently only one axis can be partitioned). shape: shape of the new or existing sharded variable. dtype: type of the new or existing sharded variable (defaults to `DT_FLOAT`). initializer: initializer for the sharded variable. regularizer: a (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. reuse: a Boolean or `None`. Controls reuse or creation of variables. trainable: If `True` also add the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). collections: List of graph collections keys to add the Variable to. Defaults to `[GraphKeys.VARIABLES]` (see tf.Variable). caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. validate_shape: If False, allows the variable to be initialized with a value of unknown shape. If True, the default, the shape of initial_value must be known. Returns: A tuple `(shards, partitions)` where `shards` is the list of `Variable` shards and `partitions` is the output of the partitioner on the input shape. Raises: ValueError: when creating a new variable and shape is not declared, when reusing a variable and specifying a conflicting shape, when violating reuse during variable creation, or if an existing sharded variable exists for the given name but with different sharding. """ initializing_from_value = initializer is not None and isinstance( initializer, ops.Tensor) reuse_without_partition = reuse is True and partitioner is None if name in self._vars: raise ValueError( "A partitioner was provided, but an unpartitioned version of the " "variable was found: %s. Perhaps a variable of the same name was " "already created without partitioning?" % name) shape = tensor_shape.as_shape(shape) if initializing_from_value: shape = initializer.get_shape() if not reuse_without_partition: if not shape.is_fully_defined(): raise ValueError("Shape of a new partitioned variable (%s) must be " "fully defined, but instead was %s." % (name, shape)) if shape.ndims < 1: raise ValueError("A partitioned Variable must have rank at least 1, " "shape: %s" % shape) partitions = partitioner(shape=shape, dtype=dtype) if not isinstance(partitions, collections_lib.Sequence): raise ValueError("Partitioner must return a sequence, but saw: %s" % partitions) if len(partitions) != shape.ndims: raise ValueError( "Partitioner returned a partition list that does not match the " "Variable's rank: %s vs. %s" % (partitions, shape)) if any([p < 1 for p in partitions]): raise ValueError( "Partitioner returned zero partitions for some axes: %s" % partitions) should_check = reuse is not None if name in self._partitioned_vars: if should_check and not reuse: raise ValueError( "Partitioned variable with name %s already exists. Did you mean to " "set reuse=True in VarScope?" % name) existing_var = self._partitioned_vars[name] if not shape.is_compatible_with(existing_var.get_shape()): raise ValueError( "Trying to reuse partitioned variable %s, but specified shape %s " "and found shape %s." % (name, shape, existing_var.get_shape())) if not dtype.is_compatible_with(existing_var.dtype): raise ValueError( "Trying to reuse partitioned variable %s, but specified dtype %s " "and found dtype %s." % (name, dtype.name, existing_var.dtype.name)) # pylint: disable=protected-access if (not reuse_without_partition and existing_var._get_partitions() != partitions): raise ValueError( "Trying to reuse partitioned variable %s, but specified partitions " "%s and found partitions %s." % (name, partitions, existing_var._get_partitions())) # pylint: enable=protected-access return existing_var if should_check and reuse: raise ValueError("PartitionedVariable %s does not exist, or was not " "created with tf.get_variable(). Did you mean to set " "reuse=None in VarScope?" % name) slice_dim, slice_shape = _compute_slice_dim_and_shape( shape.as_list(), partitions) vs = [] num_slices = partitions[slice_dim] num_slices_with_excess = shape[slice_dim].value % num_slices slice_offset = [0] * shape.ndims if "%s/part_0" % name in self._vars: if "%s/part_%d" % (name, num_slices - 1) not in self._vars: raise ValueError( "Partitioner returned a different partitioning than what was " "already found. Partitioner returned %d shards, and shard " "%s/part_0 was found, but %s/part_%d was not." % (num_slices, name, name, num_slices - 1)) if "%s/part_%d" % (name, num_slices) in self._vars: raise ValueError( "Partitioner returned a different partitioning than what was " "already found. Partitioner returned %d shards, and shard " "%s/part_0 was found, but so was the extra shard %s/part_%d." % (num_slices, name, name, num_slices)) for i in xrange(num_slices): var_shape = slice_shape[:] var_offset = slice_offset[:] if i < num_slices_with_excess: var_shape[slice_dim] += 1 slice_offset[slice_dim] += var_shape[slice_dim] var_full_name = "%s/part_%d" % (name, i) with ops.op_scope([], var_full_name + "/PartitionedInitializer"): if initializer is None: init = init_ops.uniform_unit_scaling_initializer( full_shape=shape.as_list()) init_shape = var_shape elif callable(initializer): init = initializer init_shape = var_shape elif isinstance(initializer, ops.Tensor): init = array_ops.slice(initializer, var_offset, var_shape) # Use the dtype of the given tensor. dtype = init.dtype.base_dtype init_shape = None else: init = ops.convert_to_tensor(initializer, dtype=dtype) init = array_ops.slice(init, var_offset, var_shape) init_shape = None with ops.name_scope(None): var = self._get_single_variable( name=var_full_name, shape=init_shape, dtype=dtype, initializer=init, regularizer=regularizer, reuse=reuse, trainable=trainable, collections=collections, caching_device=caching_device, validate_shape=validate_shape) # pylint: disable=protected-access var._set_save_slice_info(variables.Variable.SaveSliceInfo( name, shape.as_list(), var_offset, var_shape)) vs.append(var) # pylint: enable=protected-access # pylint: disable=protected-access partitioned_var = variables._PartitionedVariable(name=name, shape=shape, dtype=dtype, variable_list=vs, partitions=partitions) # pylint: enable=protected-access self._partitioned_vars[name] = partitioned_var return partitioned_var
def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): """Gradient for concat op. Args: op: An operation. grad: `Tensor` or `IndexedSlices` representing the gradients with respect to each output of the op. start_value_index: An integer index of the first value in the op.inputs. end_value_index: An integer index of the last value in the op.inputs. dim_index: An interger index of concat_dim or axis parameter in op.inputs. Returns: Tensors represending the partial gradients with respect to each input of the op. Raises: ValueError: if concat_dim/axis is not statically known. """ def _CreateDenseMaskAndBegin(sizes, concat_dim): """Create variables for iteratively slicing a dense gradients tensor.""" # Since shape is 1-D, shape_of_shape = [rank-of-inputs] shape_of_shape = array_ops.shape(sizes[0]) # Make a vector of length equal to the input's dimensions, # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat([ array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], array_ops.fill(shape_of_shape - concat_dim - 1, 0) ], 0) begin = array_ops.fill(shape_of_shape, 0) return mask, begin def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" sizes = [] fully_known = True for x in inputs: input_shape = array_ops.shape(x) if not isinstance(input_shape, ops.Tensor) or input_shape.op.type != "Const": fully_known = False break else: sizes.append(input_shape) if fully_known: return sizes else: return array_ops.shape_n(inputs) # Degenerate concatenation, just return grad. if len(op.inputs) == 2: return grad + [None] if end_value_index <= dim_index else [None] + grad concat_dim = op.inputs[dim_index] input_values = op.inputs[start_value_index:end_value_index] # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. non_neg_concat_dim = concat_dim % array_ops.rank(input_values[0]) out_grads = [] if isinstance(grad, ops.Tensor): # Get the inputs' tensor shapes sizes = _ExtractInputShapes(input_values) # The magic number of 16 was found through benchmarking a range of sizes # on CPUs and a Maxwell TitanX. A speedup was seen in a large majority of # cases when switching implementations at N=16, but it is possible that # there will be a small number of performance regressions. # pylint: disable=protected-access if len(sizes) > 16: # extract the size of each input along the concat dimension sizes = array_ops.squeeze( array_ops.slice(array_ops.stack(sizes, axis=1), [non_neg_concat_dim, 0], [1, -1])) out_grads = array_ops.split(grad, sizes, non_neg_concat_dim) else: offset = gen_array_ops._concat_offset(non_neg_concat_dim, sizes) for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) # pylint: enable=protected-access elif isinstance(grad, ops.IndexedSlices): concat_dim_static = tensor_util.constant_value(concat_dim) if concat_dim_static is None: raise ValueError("Can only compute IndexedSlices gradient with " "statically-known concat_dim") if concat_dim_static < 0: rank = tensor_util.constant_value(array_ops.rank(input_values[0])) if rank is None: raise ValueError( "Can only compute IndexedSlices gradient with " "negative concat_dim when first value rank is " "statically-known.") concat_dim_static %= rank # Get the inputs' tensor shapes sizes = [array_ops.shape(x) for x in input_values] if concat_dim_static > 0: # IndexedSlices, non_neg_concat_dim > 0. Each input gets IndexedSlices # gradients with all the indices, but with grad.values sliced accordingly. # This is like the Tensor case, except shape(grad.values)[0] is not equal # to shape(sizes[i])[0], since only a subset of the dim-0 values are # stored. mask, begin = _CreateDenseMaskAndBegin(sizes, non_neg_concat_dim) for size in sizes: new_values = array_ops.slice( grad.values, begin, array_ops.concat( [[-1], array_ops.slice(size, [1], [-1])], 0)) out_grads.append( ops.IndexedSlices(new_values, grad.indices, size)) # Lint complains begin = begin + ... begin = math_ops.add(begin, size * mask) else: # IndexedSlices, concat_dim == 0. Each input gets IndexedSlices gradients # only for the relevant indices. start = constant_op.constant(0, dtype=grad.indices.dtype) for size in sizes: size_concat_dim = array_ops.gather(size, non_neg_concat_dim) if size_concat_dim.dtype != grad.indices.dtype: size_concat_dim = math_ops.cast(size_concat_dim, dtype=grad.indices.dtype) end = start + size_concat_dim # Compute the 1-D Tensor of indices relevant for this input. indices_to_select = array_ops.squeeze(array_ops.where( math_ops.logical_and(grad.indices >= start, grad.indices < end)), squeeze_dims=[1]) new_indices = array_ops.gather(grad.indices, indices_to_select) - start new_values = array_ops.gather(grad.values, indices_to_select) out_grads.append( ops.IndexedSlices(new_values, new_indices, size)) start = end else: raise TypeError("Expected Tensor or IndexedSlices, got %s" % type(grad)) return (out_grads + [None] if end_value_index <= dim_index else [None] + out_grads)
def residual_with_slice_fn(inp, out): inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3]) return inp_sliced + out
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.compat.v1.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A single tensor representing the complete embedding tensor, or a list tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by `sp_ids`, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if `shape(combined embedding_weights) = [p0, p1, ..., pm]` and `shape(sparse_ids) = shape(sparse_weights) = [d0, d1, ..., dn]` then `shape(output) = [d0, d1, ... dn-1, p1, ..., pm]`. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are ```python [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id -1, weight 1.0 [2, 3]: id 1, weight 3.0 ``` `default_id` is 0. with `combiner`="mean", then the output will be a 3x20 matrix where ```python output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = (params[0, :] * 1.0) / 1.0 output[2, :] = (params[1, :] * 3.0) / 3.0 ``` Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError(f"Missing embedding_weights {embedding_weights}.") if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list( embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError(f"Missing embedding_weights {embedding_weights}.") dtype = sparse_weights.dtype if sparse_weights is not None else None embedding_weights = [ w if (isinstance(w, resource_variable_ops.ResourceVariable) and dtype in (None, w.dtype)) else ops.convert_to_tensor( w, dtype=dtype) for w in embedding_weights ] with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return final_result
def _event_shape(self): s = self.scale_operator_pd.shape() return array_ops.slice(s, array_ops.shape(s) - 2, [2])
def _testSliceMatrixDim0(self, x, begin, size): tf_ans = self.evaluate( array_ops.slice(x, [begin, 0], [size, x.shape[1]])) np_ans = x[begin:begin + size, :] self.assertAllEqual(tf_ans, np_ans)
def call(self, inputs, state): sigmoid = math_ops.sigmoid num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: c_prev, m_prev = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") self.w_xi = tf.get_variable("_w_xi", [input_size.value, self._num_units]) self.w_hi = tf.get_variable("_w_hi", [self._num_units, self._num_units]) self.w_ci = tf.get_variable("_w_ci", [self._num_units, self._num_units]) # Output gate weights self.w_xo = tf.get_variable("_w_xo", [input_size.value, self._num_units]) self.w_ho = tf.get_variable("_w_ho", [self._num_units, self._num_units]) self.w_co = tf.get_variable("_w_co", [self._num_units, self._num_units]) # Cell weights self.w_xc = tf.get_variable("_w_xc", [input_size.value, self._num_units]) self.w_hc = tf.get_variable("_w_hc", [self._num_units, self._num_units]) # Initialize the bias vectors self.b_i = tf.get_variable("_b_i", [self._num_units], initializer=init_ops.zeros_initializer()) self.b_c = tf.get_variable("_b_c", [self._num_units], initializer=init_ops.zeros_initializer()) self.b_o = tf.get_variable("_b_o", [self._num_units], initializer=init_ops.zeros_initializer()) i_t = sigmoid( math_ops.matmul(inputs, self.w_xi) + math_ops.matmul(m_prev, self.w_hi) + math_ops.matmul(c_prev, self.w_ci) + self.b_i) c_t = ((1 - i_t) * c_prev + i_t * self._activation( math_ops.matmul(inputs, self.w_xc) + math_ops.matmul(m_prev, self.w_hc) + self.b_c)) o_t = sigmoid( math_ops.matmul(inputs, self.w_xo) + math_ops.matmul(m_prev, self.w_ho) + math_ops.matmul(c_t, self.w_co) + self.b_o) h_t = o_t * self._activation(c_t) new_state = (rnn_cell_impl.LSTMStateTuple(c_t, h_t) if self._state_is_tuple else array_ops.concat([c_t, h_t], 1)) return h_t, new_state
def training_graph(self, input_data, input_labels, random_seed, data_spec, epoch=None, input_weights=None): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or SparseTensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. data_spec: A list of tf.dtype values specifying the original types of each column. epoch: A tensor or placeholder for the epoch the training data comes from. input_weights: A float tensor or placeholder holding per-input weights, or None if all inputs are to be weighted equally. Returns: The last op in the random tree training graph. """ epoch = [0] if epoch is None else epoch if input_weights is None: input_weights = [] sparse_indices = [] sparse_values = [] sparse_shape = [] if isinstance(input_data, ops.SparseTensor): sparse_indices = input_data.indices sparse_values = input_data.values sparse_shape = input_data.shape input_data = [] # Count extremely random stats. (node_sums, node_squares, splits_indices, splits_sums, splits_squares, totals_indices, totals_sums, totals_squares, input_leaves) = (self.training_ops.count_extremely_random_stats( input_data, sparse_indices, sparse_values, sparse_shape, data_spec, input_labels, input_weights, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, self.variables.start_epoch, epoch, num_classes=self.params.num_output_columns, regression=self.params.regression)) node_update_ops = [] node_update_ops.append( state_ops.assign_add(self.variables.node_sums, node_sums)) splits_update_ops = [] splits_update_ops.append( self.training_ops.scatter_add_ndim( self.variables.candidate_split_sums, splits_indices, splits_sums)) splits_update_ops.append( self.training_ops.scatter_add_ndim(self.variables.accumulator_sums, totals_indices, totals_sums)) if self.params.regression: node_update_ops.append( state_ops.assign_add(self.variables.node_squares, node_squares)) splits_update_ops.append( self.training_ops.scatter_add_ndim( self.variables.candidate_split_squares, splits_indices, splits_squares)) splits_update_ops.append( self.training_ops.scatter_add_ndim( self.variables.accumulator_squares, totals_indices, totals_squares)) # Sample inputs. update_indices, feature_updates, threshold_updates = ( self.training_ops.sample_inputs( input_data, sparse_indices, sparse_values, sparse_shape, input_weights, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = state_ops.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = state_ops.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with ops.control_dependencies(splits_update_ops): finished, stale = self.training_ops.finished_nodes( self.variables.accumulator_to_node_map, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, self.variables.start_epoch, epoch, num_split_after_samples=self.params.split_after_samples, min_split_samples=self.params.min_split_samples, dominate_method=self.params.dominate_method, dominate_fraction=self.params.dominate_fraction) # Update leaf scores. # TODO(thomaswc): Store the leaf scores in a TopN and only update the # scores of the leaves that were touched by this batch of input. children = array_ops.squeeze(array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32( array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) non_fertile_leaves = array_ops.boolean_mask( leaves, math_ops.less( array_ops.gather(self.variables.node_to_accumulator_map, leaves), 0)) # TODO(gilberth): It should be possible to limit the number of non # fertile leaves we calculate scores for, especially since we can only take # at most array_ops.shape(finished)[0] of them. with ops.control_dependencies(node_update_ops): sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves) if self.params.regression: squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves) non_fertile_leaf_scores = self._variance(sums, squares) else: non_fertile_leaf_scores = self._weighted_gini(sums) # Calculate best splits. with ops.control_dependencies(splits_update_ops): split_indices = self.training_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, regression=self.params.regression) # Grow tree. with ops.control_dependencies( [update_features_op, update_thresholds_op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, new_eot) = (self.training_ops.grow_tree( self.variables.end_of_tree, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = state_ops.scatter_update(self.variables.tree, tree_update_indices, tree_children_updates) thresholds_update_op = state_ops.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) # TODO(thomaswc): Only update the epoch on the new leaves. new_epoch_updates = epoch * array_ops.ones_like( tree_threshold_updates, dtype=dtypes.int32) epoch_update_op = state_ops.scatter_update( self.variables.start_epoch, tree_update_indices, new_epoch_updates) # Update fertile slots. with ops.control_dependencies([tree_update_op]): (n2a_map_updates, a2n_map_updates, accumulators_cleared, accumulators_allocated) = (self.training_ops.update_fertile_slots( finished, non_fertile_leaves, non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.accumulator_sums, self.variables.node_to_accumulator_map, stale, regression=self.params.regression)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. gated_new_eot, = control_flow_ops.tuple( [new_eot], control_inputs=[n2a_map_updates]) eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(thresholds_update_op) updates.append(epoch_update_op) updates.append( state_ops.scatter_update(self.variables.node_to_accumulator_map, n2a_map_updates[0], n2a_map_updates[1])) updates.append( state_ops.scatter_update(self.variables.accumulator_to_node_map, a2n_map_updates[0], a2n_map_updates[1])) cleared_and_allocated_accumulators = array_ops.concat( 0, [accumulators_cleared, accumulators_allocated]) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. split_values = array_ops.tile( array_ops.expand_dims( array_ops.expand_dims( array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1), 2), [ 1, self.params.num_splits_to_consider, self.params.num_output_columns ]) updates.append( state_ops.scatter_update(self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values)) if self.params.regression: updates.append( state_ops.scatter_update( self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values)) # Calculate values to put into scatter update for total counts. total_cleared = array_ops.tile( array_ops.expand_dims( math_ops.neg( array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1), [1, self.params.num_output_columns]) total_reset = array_ops.tile( array_ops.expand_dims( array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1), [1, self.params.num_output_columns]) accumulator_updates = array_ops.concat(0, [total_cleared, total_reset]) updates.append( state_ops.scatter_update(self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates)) if self.params.regression: updates.append( state_ops.scatter_update(self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = array_ops.tile( array_ops.expand_dims( math_ops.neg( array_ops.ones_like(cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append( state_ops.scatter_update(self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) updates += self.finish_iteration() return control_flow_ops.group(*updates)
def _maybe_update_block_mask(self, weights, threshold): """Performs block-granular masking of the weights. Block pruning occurs only if the block_height or block_width is > 1 and if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise pruning occurs. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if block pooling function is not AVG or MAX """ block_dims = self._get_block_dims(weights.op.name) squeezed_weights = array_ops.squeeze(weights) if squeezed_weights.get_shape().ndims != 2 or block_dims == [1, 1]: return self._update_mask(weights, threshold) for i in range(2): if block_dims[i] == -1: block_dims[i] = squeezed_weights.get_shape()[i] if self._block_pooling_function not in ['AVG', 'MAX']: raise ValueError( 'Unknown pooling function for block sparsity: %s' % self._block_pooling_function) with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(squeezed_weights) pool_window = block_dims pool_fn = pruning_utils.factorized_pool squeeze_axis = None if not self._spec.use_tpu: pool_fn = nn_ops.pool abs_weights = array_ops.reshape(abs_weights, [ 1, abs_weights.get_shape()[0], abs_weights.get_shape()[1], 1 ]) squeeze_axis = [0, 3] pooled_weights = pool_fn(abs_weights, window_shape=pool_window, pooling_type=self._block_pooling_function, strides=pool_window, padding='SAME', name=weights.op.name + '_pooled') if pooled_weights.get_shape().ndims != 2: pooled_weights = array_ops.squeeze(pooled_weights, axis=squeeze_axis) smoothed_threshold, new_mask = self._update_mask( pooled_weights, threshold) updated_mask = pruning_utils.expand_tensor(new_mask, block_dims) sliced_mask = array_ops.slice(updated_mask, [0, 0], [ squeezed_weights.get_shape()[0], squeezed_weights.get_shape()[1] ]) return smoothed_threshold, array_ops.reshape(sliced_mask, array_ops.shape(weights))
def training_graph(self, input_data, input_labels, data_spec=None, epoch=None, **tree_kwargs): """Constructs a TF graph for training a random forest. Args: input_data: A tensor or SparseTensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. data_spec: A list of tf.dtype values specifying the original types of each column. epoch: A tensor or placeholder for the epoch the training data comes from. **tree_kwargs: Keyword arguments passed to each tree's training_graph. Returns: The last op in the random forest training graph. """ data_spec = [constants.DATA_FLOAT] if data_spec is None else data_spec tree_graphs = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): seed = self.params.base_random_seed if seed != 0: seed += i # If using bagging, randomly select some of the input. tree_data = input_data tree_labels = input_labels if self.params.bagging_fraction < 1.0: # TODO(thomaswc): This does sampling without replacment. Consider # also allowing sampling with replacement as an option. batch_size = array_ops.slice(array_ops.shape(input_data), [0], [1]) r = random_ops.random_uniform(batch_size, seed=seed) mask = math_ops.less( r, array_ops.ones_like(r) * self.params.bagging_fraction) gather_indices = array_ops.squeeze(array_ops.where(mask), squeeze_dims=[1]) # TODO(thomaswc): Calculate out-of-bag data and labels, and store # them for use in calculating statistics later. tree_data = array_ops.gather(input_data, gather_indices) tree_labels = array_ops.gather(input_labels, gather_indices) if self.params.bagged_features: tree_data = self._bag_features(i, tree_data) initialization = self.trees[i].tree_initialization() with ops.control_dependencies([initialization]): tree_graphs.append(self.trees[i].training_graph( tree_data, tree_labels, seed, data_spec=data_spec, epoch=([0] if epoch is None else epoch), **tree_kwargs)) return control_flow_ops.group(*tree_graphs, name='train')
def sparse_merge(sp_ids, sp_values, vocab_size, name=None): """Combines a batch of feature ids and values into a single `SparseTensor`. The most common use case for this function occurs when feature ids and their corresponding values are stored in `Example` protos on disk. `parse_example` will return a batch of ids and a batch of values, and this function joins them into a single logical `SparseTensor` for use in functions such as `sparse_tensor_dense_matmul`, `sparse_to_dense`, etc. The `SparseTensor` returned by this function has the following properties: - `indices` is equivalent to `sp_ids.indices` with the last dimension discarded and replaced with `sp_ids.values`. - `values` is simply `sp_values.values`. - If `sp_ids.shape = [D0, D1, ..., Dn, K]`, then `output.shape = [D0, D1, ..., Dn, vocab_size]`. For example, consider the following feature vectors: vector1 = [-3, 0, 0, 0, 0, 0] vector2 = [ 0, 1, 0, 4, 1, 0] vector3 = [ 5, 0, 0, 9, 0, 0] These might be stored sparsely in the following Example protos by storing only the feature ids (column number if the vectors are treated as a matrix) of the non-zero elements and the corresponding values: examples = [Example(features={ "ids": Feature(int64_list=Int64List(value=[0])), "values": Feature(float_list=FloatList(value=[-3]))}), Example(features={ "ids": Feature(int64_list=Int64List(value=[1, 4, 3])), "values": Feature(float_list=FloatList(value=[1, 1, 4]))}), Example(features={ "ids": Feature(int64_list=Int64List(value=[0, 3])), "values": Feature(float_list=FloatList(value=[5, 9]))})] The result of calling parse_example on these examples will produce a dictionary with entries for "ids" and "values". Passing those two objects to this function will produce a `SparseTensor` that sparsely represents all three instances. Namely, the `indices` property will contain the coordinates of the non-zero entries in the feature matrix (the first dimension is the row number in the matrix, i.e., the index within the batch, and the second dimension is the column number, i.e., the feature id); `values` will contain the actual values. `shape` will be the shape of the original matrix, i.e., (3, 7). For our example above, the output will be equal to: SparseTensor(indices=[[0, 0], [1, 1], [1, 3], [1, 4], [2, 0], [2, 3]], values=[-3, 1, 4, 1, 5, 9], shape=[3, 7]) Args: sp_ids: A `SparseTensor` with `values` property of type `int32` or `int64`. sp_values: A`SparseTensor` of any type. vocab_size: A scalar `int64` Tensor (or Python int) containing the new size of the last dimension, `all(0 <= sp_ids.values < vocab_size)`. name: A name prefix for the returned tensors (optional) Returns: A `SparseTensor` compactly representing a batch of feature ids and values, useful for passing to functions that expect such a `SparseTensor`. Raises: TypeError: If `sp_ids` or `sp_values` are not a `SparseTensor`. """ if not isinstance(sp_ids, ops.SparseTensor): raise TypeError("sp_ids must be a SparseTensor") if not isinstance(sp_values, ops.SparseTensor): raise TypeError("sp_values must be a SparseTensor") with ops.op_scope([sp_ids, sp_values], name, "SparseMerge"): indices_shape = array_ops.shape(sp_ids.indices) rank = indices_shape[1] ids = sp_ids.values if ids.dtype != dtypes.int64: ids = math_ops.cast(ids, dtypes.int64) # Slice off the last dimension of indices, then then tack on the ids indices_columns_to_preserve = array_ops.slice( sp_ids.indices, [0, 0], array_ops.pack([-1, rank - 1])) new_indices = array_ops.concat(1, [indices_columns_to_preserve, array_ops.reshape(ids, [-1, 1])]) new_values = sp_values.values new_shape = array_ops.concat( 0, [array_ops.slice(sp_ids.shape, [0], array_ops.expand_dims(rank - 1, 0)), math_ops.cast(array_ops.pack([vocab_size]), dtypes.int64)]) return sparse_reorder(ops.SparseTensor(new_indices, new_values, new_shape))
def _split_logits(self, logits): """Splits logits along the last dimension and returns a dict. If the input logits is not a dict, splitting is applied based on the logits dimension of each head. For example: ```python # head1.logits_dimension = 2 # head2.logits_dimension = 3 head1 = tf.estimator.MultiLabelHead(n_classes=2, name='head1_name') head2 = tf.estimator.MultiClassHead(n_classes=3, name='head2_name') multi_head = tf.estimator.MultiHead([head1, head2]) # Input logits logits = np.array([[-1., 1., 2., -2., 2.], [-1.5, 1., -3., 2., -2.]], dtype=np.float32) # As logits is not a dict, _split_logits is applied and returns the # logits_dict as logits_dict = {'head1_name': [[-1., 1.], [-1.5, 1.]], 'head2_name': [[2., -2., 2.], [-3., 2., -2.]]} ``` Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. Returns: logits_dict: A dict of logits for each head. """ logits_dict = {} with ops.name_scope('split_logits', values=[logits]): logits = ops.convert_to_tensor(logits) logits_dimensions = [head.logits_dimension for head in self._heads] total_logits_dimension = sum(logits_dimensions) logits_tensor_shape = logits.shape.as_list() last_dimension_size = logits_tensor_shape[-1] if last_dimension_size is not None: if last_dimension_size != total_logits_dimension: raise ValueError( 'Could not split logits of shape %r among the heads with ' 'individual logits dimensions: %r. The last dimension of the ' 'logits tensor should equal %d but is %d.' % ((logits_tensor_shape, logits_dimensions, last_dimension_size, total_logits_dimension))) # TODO(b/119617064): unify eager and graph implementations if context.executing_eagerly(): logits_shape = logits._shape_tuple() # pylint: disable=protected-access batch_shape = logits_shape[:-1] else: batch_shape = array_ops.shape(logits)[:-1] zeros_like_batch_shape = array_ops.zeros_like(batch_shape) minus_ones_like_batch_shape = -1 * array_ops.ones_like(batch_shape) begin_idx = 0 for head in self._heads: begin_tensor = array_ops.concat( [zeros_like_batch_shape, [begin_idx]], axis=0) size_tensor = array_ops.concat( [minus_ones_like_batch_shape, [head.logits_dimension]], axis=0) logits_dict[head.name] = array_ops.slice(logits, begin=begin_tensor, size=size_tensor) begin_idx += head.logits_dimension return logits_dict
def main(margin, batch_size, output_size, learning_rate, whichGPU, is_finetuning, pretrained_net): def handler(signum, frame): print 'Saving checkpoint before closing' pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'Checkpoint-', pretrained_net + '-' + str(step), ' saved!' sys.exit(0) signal.signal(signal.SIGINT, handler) ckpt_dir = './output/sameChain/ilsvrc/ckpts' log_dir = './output/sameChain/ilsvrc/logs' train_filename = './input/train_by_chain.txt' mean_file = './input/meanIm.npy' img_size = [256, 256] crop_size = [224, 224] num_iters = 200000 summary_iters = 100 save_iters = 5000 featLayer = 'resnet_v2_50/logits' is_training = True margin = float(margin) batch_size = int(batch_size) output_size = int(output_size) learning_rate = float(learning_rate) whichGPU = str(whichGPU) if batch_size % 10 != 0: print 'Batch size must be divisible by 10!' sys.exit(0) num_pos_examples = batch_size / 10 # Create data "batcher" train_data = SameClassSet(train_filename, mean_file, img_size, crop_size, batch_size, num_pos_examples, isTraining=is_training) datestr = datetime.now().strftime("%Y_%m_%d_%H%M") param_str = datestr + '_tcam_with_doctoring_lr' + str( learning_rate).replace('.', 'pt') + '_outputSz' + str( output_size) + '_margin' + str(margin).replace('.', 'pt') logfile_path = os.path.join(log_dir, param_str + '_train.txt') train_log_file = open(logfile_path, 'a') print '------------' print '' print 'Going to train with the following parameters:' print 'Margin: ', margin train_log_file.write('Margin: ' + str(margin) + '\n') print 'Output size: ', output_size train_log_file.write('Output size: ' + str(output_size) + '\n') print 'Learning rate: ', learning_rate train_log_file.write('Learning rate: ' + str(learning_rate) + '\n') print 'Logging to: ', logfile_path train_log_file.write('Param_str: ' + param_str + '\n') train_log_file.write('----------------\n') print '' print '------------' # Queuing op loads data into input tensor image_batch = tf.placeholder( tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 3]) people_mask_batch = tf.placeholder( tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 1]) # doctor image params percent_crop = .5 percent_people = .5 percent_rotate = .2 percent_filters = .4 percent_text = .1 # # richard's argument: since the data is randomly loaded, we don't need to change the indices that we perform operations on every time; i am on board with this, but had already implemented the random crops, so will leave that for now # # apply random rotations num_rotate = int(batch_size * percent_rotate) rotate_inds = np.random.choice(np.arange(0, batch_size), num_rotate, replace=False) rotate_vals = np.random.randint(-65, 65, num_rotate).astype('float32') / float(100) rotate_angles = np.zeros((batch_size)) rotate_angles[rotate_inds] = rotate_vals rotated_batch = tf.contrib.image.rotate(image_batch, rotate_angles, interpolation='BILINEAR') # do random crops num_to_crop = int(batch_size * percent_crop) num_to_not_crop = batch_size - num_to_crop shuffled_inds = tf.random_shuffle(np.arange(0, batch_size, dtype='int32')) # shuffled_inds = np.arange(0,batch_size,dtype='int32') # np.random.shuffle(shuffled_inds) crop_inds = tf.slice(shuffled_inds, [0], [num_to_crop]) uncropped_inds = tf.slice(shuffled_inds, [num_to_crop], [num_to_not_crop]) # crop_ratio = float(3)/float(5) # crop_yx = tf.random_uniform([num_to_crop,2], 0,1-crop_ratio, dtype=tf.float32, seed=0) # crop_sz = tf.add(crop_yx,np.tile([crop_ratio,crop_ratio],[num_to_crop, 1])) # crop_boxes = tf.concat([crop_yx,crop_sz],axis=1) # randomly select a crop between 3/5 of the image and the entire image crop_ratio = tf.random_uniform([num_to_crop, 1], float(3) / float(5), 1, dtype=tf.float32, seed=0) # randomly select a starting location between 0 and the max valid x position crop_yx = tf.random_uniform([1, 2], 0., 1. - crop_ratio, dtype=tf.float32, seed=0) crop_sz = tf.add(crop_yx, tf.concat([crop_ratio, crop_ratio], axis=1)) crop_boxes = tf.concat([crop_yx, crop_sz], axis=1) uncropped_boxes = np.tile([0, 0, 1, 1], [num_to_not_crop, 1]) all_inds = tf.concat([crop_inds, uncropped_inds], axis=0) all_boxes = tf.concat([crop_boxes, uncropped_boxes], axis=0) sorted_inds = tf.nn.top_k(-shuffled_inds, sorted=True, k=batch_size).indices cropped_batch = tf.gather( tf.image.crop_and_resize(rotated_batch, all_boxes, all_inds, crop_size), sorted_inds) # apply different filters flt_image = convert_image_dtype(cropped_batch, dtypes.float32) num_to_filter = int(batch_size * percent_filters) filter_inds = np.random.choice(np.arange(0, batch_size), num_to_filter, replace=False) filter_mask = np.zeros(batch_size) filter_mask[filter_inds] = 1 filter_mask = filter_mask.astype('float32') inv_filter_mask = np.ones(batch_size) inv_filter_mask[filter_inds] = 0 inv_filter_mask = inv_filter_mask.astype('float32') # hsv = gen_image_ops.rgb_to_hsv(flt_image) hue = array_ops.slice(hsv, [0, 0, 0, 0], [batch_size, -1, -1, 1]) saturation = array_ops.slice(hsv, [0, 0, 0, 1], [batch_size, -1, -1, 1]) value = array_ops.slice(hsv, [0, 0, 0, 2], [batch_size, -1, -1, 1]) # hue delta_vals = random_ops.random_uniform([batch_size], -.15, .15) hue_deltas = tf.multiply(filter_mask, delta_vals) hue_deltas2 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(hue_deltas, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) # hue = math_ops.mod(hue + (hue_deltas2 + 1.), 1.) hue_mod = tf.add(hue, hue_deltas2) hue = clip_ops.clip_by_value(hue_mod, 0.0, 1.0) # saturation saturation_factor = random_ops.random_uniform([batch_size], -.05, .05) saturation_factor2 = tf.multiply(filter_mask, saturation_factor) saturation_factor3 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(saturation_factor2, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) saturation_mod = tf.add(saturation, saturation_factor3) saturation = clip_ops.clip_by_value(saturation_mod, 0.0, 1.0) hsv_altered = array_ops.concat([hue, saturation, value], 3) rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered) # brightness brightness_factor = random_ops.random_uniform([batch_size], -.25, .25) brightness_factor2 = tf.multiply(filter_mask, brightness_factor) brightness_factor3 = tf.expand_dims( tf.transpose( tf.tile(tf.reshape(brightness_factor2, [1, 1, batch_size]), (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3) adjusted = math_ops.add(rgb_altered, math_ops.cast(brightness_factor3, dtypes.float32)) filtered_batch = clip_ops.clip_by_value(adjusted, 0.0, 255.0) # insert people masks num_people_masks = int(batch_size * percent_people) mask_inds = np.random.choice(np.arange(0, batch_size), num_people_masks, replace=False) start_masks = np.zeros([batch_size, crop_size[0], crop_size[0], 1], dtype='float32') start_masks[mask_inds, :, :, :] = 1 inv_start_masks = np.ones([batch_size, crop_size[0], crop_size[0], 1], dtype='float32') inv_start_masks[mask_inds, :, :, :] = 0 masked_masks = tf.add( inv_start_masks, tf.cast(tf.multiply(people_mask_batch, start_masks), dtype=tf.float32)) masked_masks2 = tf.cast(tf.tile(masked_masks, [1, 1, 1, 3]), dtype=tf.float32) masked_batch = tf.multiply(masked_masks, filtered_batch) noise = tf.random_normal(shape=[batch_size, crop_size[0], crop_size[0], 1], mean=0.0, stddev=0.0025, dtype=tf.float32) final_batch = tf.add(masked_batch, noise) print("Preparing network...") with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, layers = resnet_v2.resnet_v2_50(final_batch, num_classes=output_size, is_training=True) variables_to_restore = [] for var in slim.get_model_variables(): excluded = False if is_finetuning.lower() == 'true' and var.op.name.startswith( 'resnet_v2_50/logits') or 'momentum' in var.op.name.lower(): excluded = True if not excluded: variables_to_restore.append(var) feat = tf.squeeze(tf.nn.l2_normalize(layers[featLayer], 3)) expanded_a = tf.expand_dims(feat, 1) expanded_b = tf.expand_dims(feat, 0) #D = tf.reduce_sum(tf.squared_difference(expanded_a, expanded_b), 2) D = 1 - tf.reduce_sum(tf.multiply(expanded_a, expanded_b), 2) # if not train_data.isOverfitting: # D_max = tf.reduce_max(D) # D_mean, D_var = tf.nn.moments(D, axes=[0,1]) # lowest_nonzero_distance = tf.reduce_max(-D) # bottom_thresh = 1.2*lowest_nonzero_distance # top_thresh = (D_max + D_mean)/2.0 # bool_mask = tf.logical_and(D>=bottom_thresh,D<=top_thresh) # D = tf.multiply(D,tf.cast(bool_mask,tf.float32)) posIdx = np.floor(np.arange(0, batch_size) / num_pos_examples).astype('int') posIdx10 = num_pos_examples * posIdx posImInds = np.tile(posIdx10, (num_pos_examples, 1)).transpose() + np.tile( np.arange(0, num_pos_examples), (batch_size, 1)) anchorInds = np.tile(np.arange(0, batch_size), (num_pos_examples, 1)).transpose() posImInds_flat = posImInds.ravel() anchorInds_flat = anchorInds.ravel() posPairInds = zip(posImInds_flat, anchorInds_flat) posDists = tf.reshape(tf.gather_nd(D, posPairInds), (batch_size, num_pos_examples)) shiftPosDists = tf.reshape(posDists, (1, batch_size, num_pos_examples)) posDistsRep = tf.tile(shiftPosDists, (batch_size, 1, 1)) allDists = tf.tile(tf.expand_dims(D, 2), (1, 1, num_pos_examples)) ra, rb, rc = np.meshgrid(np.arange(0, batch_size), np.arange(0, batch_size), np.arange(0, num_pos_examples)) bad_negatives = np.floor((ra) / num_pos_examples) == np.floor( (rb) / num_pos_examples) bad_positives = np.mod(rb, num_pos_examples) == np.mod(rc, num_pos_examples) mask = ((1 - bad_negatives) * (1 - bad_positives)).astype('float32') # loss = tf.reduce_sum(tf.maximum(0.,tf.multiply(mask,margin + posDistsRep - allDists)))/batch_size loss = tf.reduce_mean( tf.maximum(0., tf.multiply(mask, margin + posDistsRep - allDists))) # slightly counterintuitive to not define "init_op" first, but tf vars aren't known until added to graph update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): #train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = slim.learning.create_train_op(loss, optimizer) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2000) # tf will consume any GPU it finds on the system. Following lines restrict it to specific gpus c = tf.ConfigProto() c.gpu_options.visible_device_list = whichGPU print("Starting session...") sess = tf.Session(config=c) sess.run(init_op) writer = tf.summary.FileWriter(log_dir, sess.graph) restore_fn = slim.assign_from_checkpoint_fn(pretrained_net, variables_to_restore) restore_fn(sess) print("Start training...") ctr = 0 for step in range(num_iters): start_time = time.time() batch, labels, ims = train_data.getBatch() people_masks = train_data.getPeopleMasks() _, loss_val = sess.run([train_op, loss], feed_dict={ image_batch: batch, people_mask_batch: people_masks }) end_time = time.time() duration = end_time - start_time out_str = 'Step %d: loss = %.6f -- (%.3f sec)' % (step, loss_val, duration) # print(out_str) if step % summary_iters == 0: print(out_str) train_log_file.write(out_str + '\n') # Update the events file. # summary_str = sess.run(summary_op) # writer.add_summary(summary_str, step) # writer.flush() # # Save a checkpoint if (step + 1) % save_iters == 0: print('Saving checkpoint at iteration: %d' % (step)) pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'checkpoint-', pretrained_net + '-' + str(step), ' saved!' if (step + 1) == num_iters: print('Saving final') pretrained_net = os.path.join(ckpt_dir, 'final-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'final-', pretrained_net + '-' + str(step), ' saved!' sess.close() train_log_file.close()
def __call__(self, inputs, state, scope=None): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. scope: VariableScope for the created subgraph; defaults to "LSTMCell". Returns: A tuple containing: - A `2-D, [batch x output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") with vs.variable_scope(scope or type(self).__name__, initializer=self._initializer): # "LSTMCell" concat_w = _get_concat_variable( "W", [input_size.value + num_proj, 4 * self._num_units], dtype, self._num_unit_shards) b = vs.get_variable("B", shape=[4 * self._num_units], initializer=array_ops.zeros_initializer, dtype=dtype) # i = input_gate, j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat(1, [inputs, m_prev]) lstm_matrix = nn_ops.bias_add( math_ops.matmul(cell_inputs, concat_w), b) i, j, f, o = array_ops.split(1, 4, lstm_matrix) # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable("W_F_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable("W_I_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable("W_O_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: concat_w_proj = _get_concat_variable( "W_P", [self._num_units, self._num_proj], dtype, self._num_proj_shards) m = math_ops.matmul(m, concat_w_proj) new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat(1, [c, m])) return m, new_state
def _process_window(self, features, mode): """Compute model outputs on a single window of data.""" # TODO (agarwal): Use exogenous features id:1600 gh:1601 times = math_ops.cast(features[TrainEvalFeatures.TIMES], dtypes.int64) values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) original_values = values # Extra shape checking for the window size (above that in # `head.create_estimator_spec`). expected_times_shape = [None, self.window_size] if not times.get_shape().is_compatible_with(expected_times_shape): raise ValueError(( "ARModel with input_window_size={input_window_size} " "and output_window_size={output_window_size} expects " "feature '{times_feature}' to have shape (batch_size, " "{window_size}) (for any batch_size), but got shape {times_shape}. " "If you are using RandomWindowInputFn, set " "window_size={window_size} or adjust the input_window_size and " "output_window_size arguments to ARModel.").format( input_window_size=self.input_window_size, output_window_size=self.output_window_size, times_feature=TrainEvalFeatures.TIMES, window_size=self.window_size, times_shape=times.get_shape())) values = self._scale_data(values) if self.input_window_size > 0: input_values = values[:, :self.input_window_size, :] else: input_values = None prediction_ops = self.prediction_ops(times, input_values) prediction = prediction_ops["mean"] covariance = prediction_ops["covariance"] targets = array_ops.slice(values, [0, self.input_window_size, 0], [-1, -1, -1]) targets.get_shape().assert_is_compatible_with(prediction.get_shape()) if (mode == estimator_lib.ModeKeys.EVAL and self.loss == ARModel.SQUARED_LOSS): # Report an evaluation loss which matches the expected # (observed - predicted) ** 2. # Note that this affects only evaluation; the training loss is unaffected. loss = self.loss_op( self._scale_back_data(targets), {"mean": self._scale_back_data(prediction_ops["mean"])}) else: loss = self.loss_op(targets, prediction_ops) # Scale back the prediction. prediction = self._scale_back_data(prediction) covariance = self._scale_back_variance(covariance) return model.ModelOutputs( loss=loss, end_state=(times[:, -self.input_window_size:], values[:, -self.input_window_size:, :]), predictions={ "mean": prediction, "covariance": covariance, "observed": original_values[:, -self.output_window_size:] }, prediction_times=times[:, -self.output_window_size:])
def loop_fn(i): x1 = array_ops.gather(x, i) return array_ops.slice(x1, begin=(0, 1), size=(2, 1))