def chebyshev5(self, x, L, Fout, K): N, M, Fin = x.get_shape() N, M, Fin = int(N), int(M), int(Fin) # Rescale Laplacian and store as a TF sparse tensor. Copy to not modify the shared L. L = scipy.sparse.csr_matrix(L) L = graph.rescale_L(L, lmax=2) L = L.tocoo() indices = np.column_stack((L.row, L.col)) L = tf.SparseTensor(indices, L.data, L.shape) L = tf.sparse_reorder(L) # Transform to Chebyshev basis x0 = tf.transpose(x, perm=[1, 2, 0]) # M x Fin x N x0 = tf.reshape(x0, [M, Fin*N]) # M x Fin*N x = tf.expand_dims(x0, 0) # 1 x M x Fin*N print("Test") def concat(x, x_): x_ = tf.expand_dims(x_, 0) # 1 x M x Fin*N return tf.concat([x, x_], axis=0) # K x M x Fin*N if K > 1: x1 = tf.sparse_tensor_dense_matmul(L, x0) x = concat(x, x1) print(" K = 1") for k in range(2, K): x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0 # M x Fin*N x = concat(x, x2) x0, x1 = x1, x2 x = tf.reshape(x, [K, M, Fin, N]) # K x M x Fin x N x = tf.transpose(x, perm=[3,1,2,0]) # N x M x Fin x K x = tf.reshape(x, [N*M, Fin*K]) # N*M x Fin*K # Filter: Fin*Fout filters of order K, i.e. one filterbank per feature pair. W = self._weight_variable([Fin*K, Fout], regularization=False) x = tf.matmul(x, W) # N*M x Fout return tf.reshape(x, [N, M, Fout]) # N x M x Fout
def chebyshev(self, x, L, Fout, K , normalized=False, algo='LB'): '''normalized or not, algo='LB' or 'gL' (graph Laplacian) will affact the value of "lmax" (maximum eigenvalue)''' N, M, Fin = x.get_shape() N, M, Fin = int(N), int(M), int(Fin) # Rescale Laplacian and store as a TF sparse tensor. Copy to not modify the shared L. L = scipy.sparse.csr_matrix(L) lmax=graph.lmax(L, normalized, algo) # 202000912 # L = graph.rescale_L(L, lmax=2) L = graph.rescale_L(L, lmax) # 202000912 L = L.tocoo() indices = np.column_stack((L.row, L.col)) L = tf.SparseTensor(indices, L.data, L.shape) L = tf.sparse_reorder(L) # Transform to Chebyshev basis x0 = tf.transpose(x, perm=[1, 2, 0]) # M x Fin x N x0 = tf.reshape(x0, [M, Fin*N]) # M x Fin*N x = tf.expand_dims(x0, 0) # 1 x M x Fin*N def concat(x, x_): x_ = tf.expand_dims(x_, 0) # 1 x M x Fin*N return tf.concat([x, x_], axis=0) # K x M x Fin*N if K > 1: x1 = tf.sparse_tensor_dense_matmul(L, x0) x = concat(x, x1) for k in range(1, K-1): x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0 # M x Fin*N x = concat(x, x2) x0, x1 = x1, x2 x = tf.reshape(x, [K, M, Fin, N]) # K x M x Fin x N x = tf.transpose(x, perm=[3,1,2,0]) # N x M x Fin x K x = tf.reshape(x, [N*M, Fin*K]) # N*M x Fin*K # Filter: Fin*Fout filters of order K, i.e. one filterbank per feature pair. W = self._weight_variable([Fin*K, Fout], regularization=False) x = tf.matmul(x, W) # N*M x Fout return tf.reshape(x, [N, M, Fout]) # N x M x Fout
def _extend_with_dummy(extend_with, to_extend, dummy_value='n/a'): """Extends one SparseTensor with dummy_values at positions of other.""" dense_shape = tf.to_int64( tf.concat([[tf.shape(extend_with)[0]], [tf.maximum(tf.shape(extend_with)[1], tf.shape(to_extend)[1])], [tf.maximum(tf.shape(extend_with)[2], tf.shape(to_extend)[2])]], axis=0)) additional_indices = tf.sets.set_difference( tf.SparseTensor( indices=extend_with.indices, values=tf.zeros_like(extend_with.values, dtype=tf.int32), dense_shape=dense_shape), tf.SparseTensor( indices=to_extend.indices, values=tf.zeros([tf.shape(to_extend.indices)[0]], dtype=tf.int32), dense_shape=dense_shape)).indices # Supply defaults for all other indices. default = tf.tile( tf.constant([dummy_value]), multiples=[tf.shape(additional_indices)[0]]) string_value = ( tf.as_string(to_extend.values) if to_extend.values.dtype != tf.string else to_extend.values) return tf.sparse_reorder( tf.SparseTensor( indices=tf.concat([to_extend.indices, additional_indices], axis=0), values=tf.concat([string_value, default], axis=0), dense_shape=dense_shape))
def get_tensor(self, tensor_dict): """Construct a SparseTensor representation of the relation from tensors. This can either be called with a decoded tf.Example, in which case this returns a sparse tensor for an individual instance, or with the result of `batching_loop_results`, in which case it returns a sparse tensor for the whole batch. Args: tensor_dict: A dictionary mapping key names to tensors. Returns: A SparseTensor representation of the shepherd's data. """ dense_shape = tensor_dict[self.dense_shape_key()] source_indices = tensor_dict[self.source_indices_key()] dest_indices = tensor_dict[self.dest_indices_key()] values = tensor_dict[self.values_key()] # 'indices' is a [source_indices.shape[0], 2] with each row representing # a (row, column) pair where there is a nonzero entry in the SparseTensor. indices = tf.concat([tf.expand_dims(source_indices, 1), tf.expand_dims(dest_indices, 1)], axis=1) return tf.sparse_reorder( tf.SparseTensor(indices=indices, values=values, dense_shape=dense_shape))
def get_sparse_variable(name, indices, shape, dtype=None, trainable=True, initializer=None, partitioner=None, regularizer=None): n = len(indices) values = tf.get_variable(name, [n], dtype=dtype, initializer=initializer, partitioner=partitioner, regularizer=regularizer, trainable=trainable) return tf.sparse_reorder( tf.SparseTensor(indices=indices, values=values, dense_shape=shape))
def __init__(self, L, F, K): super().__init__() L = graph.rescale_L(L, lmax=2) # Graph Laplacian, M x M L = L.tocoo() data = L.data indices = np.empty((L.nnz, 2)) indices[:,0] = L.row indices[:,1] = L.col L = tf.SparseTensor(indices, data, L.shape) self.L = tf.sparse_reorder(L) self.F = F # Number of filters self.K = K # Polynomial order, i.e. filter size (number of hopes)
def unpool_layer2x2_batch(bottom, argmax): bottom_shape = tf.shape(bottom) top_shape = [ bottom_shape[0], bottom_shape[1] * 2, bottom_shape[2] * 2, bottom_shape[3] ] batch_size = top_shape[0] height = top_shape[1] width = top_shape[2] channels = top_shape[3] argmax_shape = tf.to_int64([batch_size, height, width, channels]) argmax = unravel_argmax(argmax, argmax_shape) t1 = tf.to_int64(tf.range(channels)) t1 = tf.tile(t1, [batch_size * (width // 2) * (height // 2)]) t1 = tf.reshape(t1, [-1, channels]) t1 = tf.transpose(t1, perm=[1, 0]) t1 = tf.reshape(t1, [channels, batch_size, height // 2, width // 2, 1]) t1 = tf.transpose(t1, perm=[1, 0, 2, 3, 4]) t2 = tf.to_int64(tf.range(batch_size)) t2 = tf.tile(t2, [channels * (width // 2) * (height // 2)]) t2 = tf.reshape(t2, [-1, batch_size]) t2 = tf.transpose(t2, perm=[1, 0]) t2 = tf.reshape(t2, [batch_size, channels, height // 2, width // 2, 1]) t3 = tf.transpose(argmax, perm=[1, 4, 2, 3, 0]) t = tf.concat(4, [t2, t3, t1]) indices = tf.reshape(t, [(height // 2) * (width // 2) * channels * batch_size, 4]) x1 = tf.transpose(bottom, perm=[0, 3, 1, 2]) values = tf.reshape(x1, [-1]) delta = tf.SparseTensor(indices, values, tf.to_int64(top_shape)) return tf.sparse_tensor_to_dense(tf.sparse_reorder(delta))
def construct_input(sequence_feature_map, categorical_values, categorical_seq_feature, feature_value, mode, normalize, momentum, min_value, max_value, input_keep_prob): """Returns a function to build the model. Args: sequence_feature_map: A dictionary of (Sparse)Tensors of dense shape [batch_size, max_sequence_length, None] keyed by the feature name. categorical_values: Potential values of the categorical_seq_feature. categorical_seq_feature: Name of feature of observation code. feature_value: Name of feature of observation value. mode: The execution mode, as defined in tf.estimator.ModeKeys. normalize: Whether to normalize each lab test. momentum: For the batch normalization mean and variance will be updated as momentum*old_value + (1-momentum) * new_value. min_value: Observation values smaller than this will be capped to min_value. max_value: Observation values larger than this will be capped to max_value. input_keep_prob: Keep probability for input observation values. Returns: - diff_delta_time: Tensor of shape [batch_size, max_seq_length, 1] with the - obs_values: A dense representation of the observation_values with obs_values[b, t, :] has at most one non-zero value at the position of the corresponding lab test from obs_code_ids with the value of the lab result. A padded Tensor of shape [batch_size, max_sequence_length, vocab_size] of type float32 of possibly normalized observation values. - indicator: A one-hot encoding of whether a value in obs_values comes from observation_values or is just filled in to be 0. A Tensor of shape [batch_size, max_sequence_length, vocab_size] and type float32. """ with tf.variable_scope('input'): sequence_feature_map = { k: tf.sparse_reorder(s) if isinstance(s, tf.SparseTensor) else s for k, s in sequence_feature_map.items() } # Filter out invalid values. # For invalid observation values we do this through a sparse retain. # This makes sure that the invalid values will not be considered in the # normalization. observation_values = sequence_feature_map[feature_value] observation_code_sparse = sequence_feature_map[categorical_seq_feature] # Future work: Create a flag for the missing value indicator. valid_values = tf.abs(observation_values.values - 9999999.0) > TOLERANCE # apply input dropout if input_keep_prob < 1.0: random_tensor = input_keep_prob random_tensor += tf.random_uniform(tf.shape(observation_values.values)) # 0. if [input_keep_prob, 1.0) and 1. if [1.0, 1.0 + input_keep_prob) dropout_mask = tf.floor(random_tensor) if mode == tf.estimator.ModeKeys.TRAIN: valid_values = tf.to_float(valid_values) * dropout_mask valid_values = valid_values > 0.5 sequence_feature_map[feature_value] = tf.sparse_retain( observation_values, valid_values) sequence_feature_map[categorical_seq_feature] = tf.sparse_retain( observation_code_sparse, valid_values) # 1. Construct the sequence of observation values to feed into the RNN # and their indicator. # We assign each observation code an id from 0 to vocab_size-1. At each # timestep we will lookup the id for the observation code and take the value # of the lab test and a construct a vector with all zeros but the id-th # position is set to the lab test value. obs_code = sequence_feature_map[categorical_seq_feature] obs_code_dense_ids = contrib_lookup.index_table_from_tensor( tuple(categorical_values), num_oov_buckets=0, name='vocab_lookup').lookup(obs_code.values) obs_code_sparse = tf.SparseTensor( values=obs_code_dense_ids, indices=obs_code.indices, dense_shape=obs_code.dense_shape) obs_code_sparse = tf.sparse_reorder(obs_code_sparse) observation_values = sequence_feature_map[feature_value] observation_values = tf.sparse_reorder(observation_values) vocab_size = len(categorical_values) obs_values, indicator = combine_observation_code_and_values( obs_code_sparse, observation_values, vocab_size, mode, normalize, momentum, min_value, max_value) # 2. We compute the diff_delta_time as additional sequence feature. # Note, the LSTM is very sensitive to how you encode time. delta_time = sequence_feature_map['deltaTime'] diff_delta_time = tf.concat( [delta_time[:, :1, :], delta_time[:, :-1, :]], axis=1) - delta_time diff_delta_time = tf.to_float(diff_delta_time) / (60.0 * 60.0) return (diff_delta_time, obs_values, indicator)
def _most_recent_obs_value(obs_values, indicator, delta_time, attribution_max_delta_time): """Returns the most recent lab result for each test within a time frame. The eligible lab values fall into a time window until time of prediction - attribution_max_delta_time. Among those we select their most recent value or zero if there are none. Args: obs_values: A dense representation of the observation_values at the position of their obs_code_ids. A padded Tensor of shape [batch_size, max_sequence_length, vocab_size] of type float32 where obs_values[b, t, id] = observation_values[b, t, 0] and id = observation_code_ids[b, t, 0] and obs_values[b, t, x] = 0 for all other x != id. If t is greater than the sequence_length of batch entry b then the result is 0 as well. indicator: A one-hot encoding of whether a value in obs_values comes from observation_values or is just filled in to be 0. A Tensor of shape [batch_size, max_sequence_length, vocab_size] and type float32. delta_time: A Tensor of shape [batch_size, max_sequence_length] describing the time to prediction. attribution_max_delta_time: Time threshold so that we return the most recent lab values among those that are at least attribution_max_delta_time seconds old at time of prediction. Returns: A Tensor of shape [batch_size, 1, vocab_size] of the most recent lab results for all lab tests that are at least attribution_max_delta_time old at time of prediction. """ batch_size = tf.shape(indicator)[0] seq_len = tf.shape(indicator)[1] num_obs = indicator.shape[2] # Prepend a dummy so that for lab tests for which we have no eligible lab # values we will select 0. obs_values = tf.concat( [tf.zeros([batch_size, 1, num_obs]), obs_values], axis=1) indicator = tf.concat([tf.ones([batch_size, 1, num_obs]), indicator], axis=1) delta_time = tf.to_int32(delta_time) delta_time = tf.concat( [ tf.zeros([batch_size, 1, 1], dtype=tf.int32) + attribution_max_delta_time, delta_time ], axis=1) # First we figure out what the eligible lab values are that are at least # attribution_max_delta_time old. indicator = tf.to_int32(indicator) indicator *= tf.to_int32(delta_time >= attribution_max_delta_time) range_val = tf.expand_dims(tf.range(seq_len + 1), axis=0) range_val = tf.tile(range_val, multiples=[tf.shape(indicator)[0], 1]) # [[[0], [1], ..., [max_sequence_length]], # [[0], [1], ..., [max_sequence_length]], # ...] range_val = tf.expand_dims(range_val, axis=2) # [batch_size, max_sequence_length, vocab_size] with 1 non-zero number per # time-step equal to that time-step. seq_indicator = indicator * range_val # [batch_size, vocab_size] with the time-step of the last lab value. last_val_indicator = tf.reduce_max(seq_indicator, axis=1, keepdims=True) last_val_indicator = tf.tile( last_val_indicator, multiples=[1, tf.shape(indicator)[1], 1]) # eq indicates which lab values are the most recent ones. eq = tf.logical_and( tf.equal(last_val_indicator, seq_indicator), indicator > 0) most_recent_obs_value_indicator = tf.where(eq) # Collect the lab values associated with those indices. res = tf.gather_nd(obs_values, most_recent_obs_value_indicator) # Reorder the values by batch and then by lab test. res_sorted = tf.sparse_reorder( tf.sparse_transpose( tf.SparseTensor( indices=most_recent_obs_value_indicator, values=res, dense_shape=tf.to_int64( tf.stack([batch_size, seq_len + 1, num_obs]))), perm=[0, 2, 1])).values return tf.reshape(res_sorted, [batch_size, 1, num_obs])
def normalize_each_feature(observation_values, obs_code, vocab_size, mode, momentum): """Combines SparseTensors of observation codes and values into a Tensor. Args: observation_values: A SparseTensor of type float with the observation values of dense shape [batch_size, max_sequence_length, 1]. There may be no time gaps in between codes. obs_code: A Tensor of shape [?, 3] of type int32 with the ids that go along with the observation_values. We will do the normalization separately for each lab test. vocab_size: The range of the values in obs_code is from 0 to vocab_size. mode: The execution mode, as defined in tf.estimator.ModeKeys. momentum: Mean and variance will be updated as momentum*old_value + (1-momentum) * new_value. Returns: observation_values as in the input only with normalized values. """ with tf.variable_scope('batch_normalization'): new_indices = [] new_values = [] for i in range(vocab_size): with tf.variable_scope('bn' + str(i)): positions_of_feature_i = tf.where(tf.equal(obs_code, i)) values_of_feature_i = tf.gather_nd(observation_values.values, positions_of_feature_i) if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('avg_observation_values/' + str(i), tf.reduce_mean(values_of_feature_i)) tf.summary.histogram('observation_values/' + str(i), values_of_feature_i) batchnorm_layer = tf.layers.BatchNormalization( axis=1, momentum=momentum, epsilon=0.01, trainable=True) normalized_values = tf.squeeze( batchnorm_layer.apply( tf.expand_dims(values_of_feature_i, axis=1), training=(mode == tf.estimator.ModeKeys.TRAIN) ), axis=1, name='squeeze_normalized_values') if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('batchnorm_layer/moving_mean/' + str(i), tf.squeeze(batchnorm_layer.moving_mean)) tf.summary.scalar('batchnorm_layer/moving_variance/' + str(i), tf.squeeze(batchnorm_layer.moving_variance)) tf.summary.scalar('avg_normalized_values/' + str(i), tf.reduce_mean(normalized_values)) tf.summary.histogram('normalized_observation_values/' + str(i), normalized_values) indices_i = tf.gather_nd(observation_values.indices, positions_of_feature_i) new_indices += [indices_i] normalized_values = tf.where(tf.is_nan(normalized_values), tf.zeros_like(normalized_values), normalized_values) new_values += [normalized_values] normalized_sp_tensor = tf.SparseTensor( indices=tf.concat(new_indices, axis=0), values=tf.concat(new_values, axis=0), dense_shape=observation_values.dense_shape) normalized_sp_tensor = tf.sparse_reorder(normalized_sp_tensor) return normalized_sp_tensor
def knn_affinity(input_x, n_nbrs, scale=None, scale_nbr=None, local_scale=None, verbose=False): """Calculates Gaussian affinity matrix. Calculates the symmetrized Gaussian affinity matrix with k1 nonzero affinities for each point, scaled by 1) a provided scale, 2) the median distance of the k2-th neighbor of each point in X, or 3) a covariance matrix S where S_ii is the distance of the k2-th neighbor of each point i, and S_ij = 0 for all i != j Here, k1 = n_nbrs, k2 = scale_nbr Args: input_x: input dataset of size n n_nbrs: k1 scale: provided scale scale_nbr: k2, used if scale not provided local_scale: if True, then we use the aforementioned option 3), else we use option 2) verbose: extra printouts Returns: n x n affinity matrix """ if isinstance(n_nbrs, np.float): n_nbrs = int(n_nbrs) elif isinstance(n_nbrs, tf.Variable) and n_nbrs.dtype.as_numpy_dtype != np.int32: n_nbrs = tf.cast(n_nbrs, np.int32) # get squared distance dist_x = squared_distance(input_x) # calculate the top k losest neighbors nn = tf.nn.top_k(-dist_x, n_nbrs, sorted=True) vals = nn[0] # apply scale if scale is None: # if scale not provided, use local scale if scale_nbr is None: scale_nbr = 0 else: assert scale_nbr > 0 and scale_nbr <= n_nbrs if local_scale: scale = -nn[0][:, scale_nbr - 1] scale = tf.reshape(scale, [-1, 1]) scale = tf.tile(scale, [1, n_nbrs]) scale = tf.reshape(scale, [-1, 1]) vals = tf.reshape(vals, [-1, 1]) if verbose: vals = tf.Print(vals, [tf.shape(vals), tf.shape(scale)], 'vals, scale shape') vals = vals / (2 * scale) vals = tf.reshape(vals, [-1, n_nbrs]) else: def get_median(scales, m): with tf.device('/cpu:0'): scales = tf.nn.top_k(scales, m)[0] scale = scales[m - 1] return scale, scales scales = -vals[:, scale_nbr - 1] const = tf.shape(input_x)[0] // 2 scale, scales = get_median(scales, const) vals = vals / (2 * scale) else: # otherwise, use provided value for global scale vals = vals / (2 * scale**2) # get the affinity aff_vals = tf.exp(vals) # flatten this into a single vector of values to shove in a sparse matrix aff_vals = tf.reshape(aff_vals, [-1]) # get the matrix of indices corresponding to each rank # with 1 in the first column and k in the kth column nn_ind = nn[1] # get the j index for the sparse matrix j_index = tf.reshape(nn_ind, [-1, 1]) # the i index is just sequential to the j matrix i_index = tf.range(tf.shape(nn_ind)[0]) i_index = tf.reshape(i_index, [-1, 1]) i_index = tf.tile(i_index, [1, tf.shape(nn_ind)[1]]) i_index = tf.reshape(i_index, [-1, 1]) # concatenate the indices to build the sparse matrix indices = tf.concat((i_index, j_index), axis=1) # assemble the sparse weight matrix weight_mat = tf.SparseTensor( indices=tf.cast(indices, dtype='int64'), values=aff_vals, dense_shape=tf.cast(tf.shape(dist_x), dtype='int64')) # fix the ordering of the indices weight_mat = tf.sparse_reorder(weight_mat) # convert to dense tensor weight_mat = tf.sparse_tensor_to_dense(weight_mat) # symmetrize weight_mat = (weight_mat + tf.transpose(weight_mat)) / 2.0 return weight_mat
def _process(examples): """Supplies input to our model. This function supplies input to our model after parsing. Args: examples: The dictionary from key to (Sparse)Tensors with context and sequence features. Returns: A tuple consisting of 1) a dictionary of tensors whose keys are the feature names, and 2) a tensor of target labels if the mode is not INFER (and None, otherwise). """ # Combine into a single dictionary. feature_map = {} # Add age if requested. if include_age: age_in_seconds = ( examples[CONTEXT_KEY_PREFIX + 'timestamp'] - examples.pop(CONTEXT_KEY_PREFIX + 'Patient.birthDate')) age_in_years = tf.to_float(age_in_seconds) / (60 * 60 * 24 * 365.0) feature_map[CONTEXT_KEY_PREFIX + AGE_KEY] = age_in_years sequence_length = examples.pop(CONTEXT_KEY_PREFIX + 'sequenceLength') # Cross the requested features. for cross in time_crossed_features: # The features may be missing at different rates - we take the union # of the indices supplying defaults. extended_features = dict() dense_shape = tf.concat( [[tf.to_int64(tf.shape(sequence_length)[0])], [tf.reduce_max(sequence_length)], tf.constant([1], dtype=tf.int64)], axis=0) for i, feature in enumerate(cross): sp_tensor = examples[SEQUENCE_KEY_PREFIX + feature] additional_indices = [] covered_indices = sp_tensor.indices for j, other_feature in enumerate(cross): if i != j: additional_indices.append( tf.sets.set_difference( tf.sparse_reorder( tf.SparseTensor( indices=examples[ SEQUENCE_KEY_PREFIX + other_feature].indices, values=tf.zeros([ tf.shape(examples[ SEQUENCE_KEY_PREFIX + other_feature].indices)[0] ], dtype=tf.int32), dense_shape=dense_shape)), tf.sparse_reorder( tf.SparseTensor( indices=covered_indices, values=tf.zeros( [tf.shape(covered_indices)[0]], dtype=tf.int32), dense_shape=dense_shape))).indices) covered_indices = tf.concat([sp_tensor.indices] + additional_indices, axis=0) additional_indices = tf.concat(additional_indices, axis=0) # Supply defaults for all other indices. default = tf.tile(tf.constant(['n/a']), multiples=[tf.shape(additional_indices)[0]]) string_value = sp_tensor.values if string_value.dtype != tf.string: string_value = tf.as_string(string_value) extended_features[feature] = tf.sparse_reorder( tf.SparseTensor(indices=tf.concat( [sp_tensor.indices, additional_indices], axis=0), values=tf.concat([string_value, default], axis=0), dense_shape=dense_shape)) new_values = tf.strings.join( [extended_features[f].values for f in cross], separator='-') crossed_sp_tensor = tf.sparse_reorder( tf.SparseTensor( indices=extended_features[cross[0]].indices, values=new_values, dense_shape=extended_features[cross[0]].dense_shape)) examples[SEQUENCE_KEY_PREFIX + '_'.join(cross)] = crossed_sp_tensor # Remove unwanted features that are used in the cross but should not be # considered outside the cross. for cross in time_crossed_features: for feature in cross: if (feature not in sequence_features and SEQUENCE_KEY_PREFIX + feature in examples): del examples[SEQUENCE_KEY_PREFIX + feature] # Flatten sparse tensor to compute event age. This dense tensor also # contains padded values. These will not be used when gathering elements # from the dense tensor since each sparse feature won't have a value # defined for the padding. padded_event_age = ( # Broadcast current time along sequence dimension. tf.expand_dims(examples.pop(CONTEXT_KEY_PREFIX + 'timestamp'), 1) # Subtract time of events. - examples.pop(SEQUENCE_KEY_PREFIX + 'eventId')) for i in range(len(time_windows) - 1): max_age = time_windows[i] min_age = time_windows[i + 1] padded_in_time_window = tf.logical_and(padded_event_age <= max_age, padded_event_age > min_age) for k, v in examples.iteritems(): if k.startswith(CONTEXT_KEY_PREFIX): continue # For each sparse feature entry, look up whether it is in the time # window or not. in_time_window = tf.gather_nd(padded_in_time_window, v.indices[:, 0:2]) v = tf.sparse_retain(v, in_time_window) sp_tensor = tf.sparse_reshape(v, [v.dense_shape[0], -1]) if dedup: sp_tensor = _dedup_tensor(sp_tensor) feature_map[k + '-til-%d' % min_age] = sp_tensor for k, v in examples.iteritems(): if k.startswith(CONTEXT_KEY_PREFIX): feature_map[k] = v return feature_map