def sparse_trim(line, max_line_len, max_word_len, trim_from_start=False): """ line: a 2D sparse tensor with shape [num_words, num_chars] representence a line of text max_line_len: maximum number of words in the line to trim to max_word_len: maximum number of chars for each word to trim to trim_from_start: whether or not the words should be trimmed from the beginning (not chars) """ if trim_from_start: num_words = line.dense_shape[0] start = tf.maximum(tf.constant(0, dtype=num_words.dtype), num_words - max_line_len) size = num_words - start return tf.sparse_slice(line, [start, 0], [size, max_word_len]) else: return tf.sparse_slice(line, [0, 0], [max_line_len, max_word_len])
def extractRegion(offset_idx): result = tf.sparse_tensor_to_dense(tf.sparse_slice( lattice, offset_idx, self.SIZET), default_value=0, validate_indices=False) result.set_shape((self.K, self.K, self.K)) return result
def _split_string_to_fix_words(line, delimiter, max_words): words = tf.string_split(line, delimiter) fix_shape = [words.dense_shape[0], max_words] fix_words = tf.sparse_reset_shape( tf.sparse_slice(words, [0, 0], fix_shape), fix_shape) return fix_words
def _sparse_or_dense_matmul_onehot(sparse_or_dense_matrix, col_index): """Returns a (dense) column of a Tensor or SparseTensor. Args: sparse_or_dense_matrix: matrix-shaped, `float` `Tensor` or `SparseTensor`. col_index: scalar, `int` `Tensor` representing the index of the desired column. Returns: column: vector-shaped, `float` `Tensor` with the same dtype as `sparse_or_dense_matrix`, representing the `col_index`th column of `sparse_or_dense_matrix`. """ if isinstance(sparse_or_dense_matrix, (tf.SparseTensor, tf.SparseTensorValue)): # TODO(b/111924846): Implement better (ideally in a way that allows us to # eliminate the `num_rows` arg, if possible). num_rows = _get_shape(sparse_or_dense_matrix)[-2] batch_shape = _get_shape(sparse_or_dense_matrix)[:-2] slice_start = tf.concat([tf.zeros_like(batch_shape), [0, col_index]], axis=0) slice_size = tf.concat([batch_shape, [num_rows, 1]], axis=0) # We momentarily lose static shape information in tf.sparse_slice. However # we regain it in the following tf.reshape. sparse_slice = tf.sparse_slice(sparse_or_dense_matrix, tf.cast(slice_start, tf.int64), tf.cast(slice_size, tf.int64)) output_shape = tf.concat([batch_shape, [num_rows]], axis=0) return tf.reshape(tf.sparse_tensor_to_dense(sparse_slice), output_shape) else: return tf.gather(sparse_or_dense_matrix, col_index, axis=-1)
def _sparse_or_dense_matmul_onehot(sparse_or_dense_matrix, col_index, size): """Returns a (dense) column of a Tensor or SparseTensor. Args: sparse_or_dense_matrix: matrix-shaped, `float` `Tensor` or `SparseTensor`. col_index: scalar, `int` `Tensor` representing the index of the desired column. size: scalar, `int` `Tensor` representing the number of rows in `sparse_or_dense_matrix`. Used only in the sparse case, so that the caller can give side information about the shape of `sparse_or_dense_matrix`. Returns: column: vector-shaped, `float` `Tensor` with the same dtype as `sparse_or_dense_matrix`, representing the `col_index`th column of `sparse_or_dense_matrix`. """ if isinstance(sparse_or_dense_matrix, (tf.SparseTensor, tf.SparseTensorValue)): # TODO(b/111924846): Implement better (ideally in a way that allows us to # eliminate the `size` arg, if possible). return tf.sparse_tensor_to_dense( tf.sparse_reshape( tf.sparse_slice(sparse_or_dense_matrix, tf.cast([0, col_index], tf.int64), tf.cast([size, 1], tf.int64)), [size])) else: return tf.gather(sparse_or_dense_matrix, col_index, axis=-1)
def map_function(x): i, dense_slice = x[0], x[1] sparse_slice = tf.sparse_reshape(tf.sparse_slice(sp_a, [i, 0, 0], [1, sp_a.dense_shape[1], sp_a.dense_shape[2]]), [sp_a.dense_shape[1], sp_a.dense_shape[2]]) mult_slice = tf.sparse_tensor_dense_matmul(sparse_slice, dense_slice) return mult_slice
def format_features(features): q_t_neib_sparse = [features['q'], features['sna'], features['qiq1_t'], features['qiq1_q'], features['qiq2_t'], features['qiq2_q'], features['iqi1_q'], features['iqi1_t'], features['iqi2_q'], features['iqi2_t']] ## word -> id (sparse -> val -> ids -> sparse) feature_name_unpad = ['q_unpad', 'sna_unpad', 'qiq1_t_unpad', 'qiq1_q_unpad', 'qiq2_t_unpad', 'qiq2_q_unpad', 'iqi1_q_unpad', 'iqi1_t_unpad', 'iqi2_q_unpad', 'iqi2_t_unpad'] feature_name_dense = ['q', 'sna', 'qiq1_t', 'qiq1_q', 'qiq2_t', 'qiq2_q', 'iqi1_q', 'iqi1_t', 'iqi2_q', 'iqi2_t'] for i in range(len(q_t_neib_sparse)): text_ids = tf.reshape(q_t_neib_sparse[i].values, [-1]) text_word2ids = char_input_fn.word2ids(text_ids) text_ids = tf.reshape(text_word2ids, [-1]) text_sparse = tf.SparseTensor(indices=q_t_neib_sparse[i].indices, values=tf.cast(text_ids, tf.int64), dense_shape=q_t_neib_sparse[i].dense_shape) features[feature_name_unpad[i]] = text_sparse if i in {0, 3, 5, 6, 8}: text_ids_pad = tf.sparse_slice(sp_input=text_sparse, start=[0,0,0], size=[text_sparse.dense_shape[0], text_sparse.dense_shape[1], model_config.query_size]) text_ids_dense = sparse_tensor_to_dense(sparse_tensor=text_ids_pad, width=model_config.query_size, sparse_values=text_ids_pad.values) features[feature_name_dense[i]] = text_ids_dense else: text_ids_pad = tf.sparse_slice(sp_input=text_sparse, start=[0,0,0], size=[text_sparse.dense_shape[0], text_sparse.dense_shape[1], model_config.title_size]) text_ids_dense = sparse_tensor_to_dense(sparse_tensor=text_ids_pad, width=model_config.title_size, sparse_values=text_ids_pad.values) features[feature_name_dense[i]] = text_ids_dense return features
def _split_chars(line): def body(index, words): next_word = tf.sparse_slice(line, tf.to_int64(index), [1, 1]).values next_word = tf.string_split(next_word, delimiter='') words = tf.sparse_concat(axis=0, sp_inputs=[words, next_word], expand_nonconcat_dim=True) return index+[0, 1], words def condition(index, words): return tf.less(index[1], tf.size(line)) i0 = tf.constant([0,1]) firstWord = tf.string_split(tf.sparse_slice(line, [0,0], [1, 1]).values, delimiter='') _, line = tf.while_loop(condition, body, loop_vars=[i0, firstWord], back_prop=False) return line
def get_train_dataset(path, batch_size, n_batches, original_dim=202498, trim_dim=0, shuffle_buffer=30000, idf_path=None, max_path=None, seed=123456, compression="", repeat=10, trim_head=200): ''' Note: Keep path input and shuffling settings consistent between runs. trim_head removes the first *n* words of the vocabulary. ''' sparse_features = tfrecord_schema(original_dim) if compression == "GZIP": path = path + ".gz" filenames = glob.glob(path) np.random.seed(seed=seed) np.random.shuffle(filenames) dataset = tf.data.TFRecordDataset(filenames, compression_type=compression, num_parallel_reads=2) dataset = dataset.shuffle(shuffle_buffer, seed=12345, reshuffle_each_iteration=True) dataset = dataset.batch(batch_size) dataset = dataset.map( lambda x: tf.parse_example(x, features=sparse_features)['sparse']) trim_end = (trim_dim + trim_head) if trim_dim else original_dim dataset = dataset.map(lambda x: tf.sparse_slice(x, [0, trim_head], [ batch_size, trim_dim if trim_dim else original_dim ])) if idf_path: idf = np.load(idf_path)[trim_head:trim_end] dataset = dataset.map(lambda x: tf.cast(x, tf.float32) * idf) if max_path: maxarr = np.load(max_path)[trim_head:trim_end] dataset = dataset.map(lambda x: tf.cast(x, tf.float32) / maxarr) dataset = dataset.map(sparse_to_dense) dataset = dataset.repeat(repeat) return dataset
def sparse_tensor_to_dense(sparse_tensor, width, default=0): ### dense_tensor_shape = sparse_tensor.dense_shape dense_tensor_axis = tf.shape(dense_tensor_shape)[0] sparse_tensor_pad = tf.cond( tf.equal(dense_tensor_axis, 3), lambda: tf.sparse_slice( sp_input=sparse_tensor, start=[0, 0, 0], size=[dense_tensor_shape[0], dense_tensor_shape[1], width]), lambda: tf.sparse_slice(sp_input=sparse_tensor, start=[0, 0], size=[dense_tensor_shape[0], width])) return tf.cond( tf.equal(dense_tensor_axis, 3), lambda: tf.sparse_to_dense( sparse_indices=sparse_tensor_pad.indices, output_shape=[dense_tensor_shape[0], dense_tensor_shape[1], width], sparse_values=sparse_tensor_pad.values, default_value=default), lambda: tf.sparse_to_dense(sparse_indices=sparse_tensor_pad.indices, output_shape=[dense_tensor_shape[0], width], sparse_values=sparse_tensor_pad.values, default_value=default))
def _any_indices_with_type(type_url, full_name): """Returns the parent indices that have a type_url of full_name.""" tensors_parsed = tf.string_split(type_url.value, delimiter="/") second_column_shape = tf.stack([ tf.shape(type_url.value, out_type=tf.int64)[0], tf.constant(1, dtype=tf.int64) ], axis=0) second_column = tf.reshape( tf.sparse_tensor_to_dense(tf.sparse_slice( tensors_parsed, tf.constant([0, 1], dtype=tf.int64), second_column_shape), default_value=""), [-1]) equal_to_full_name = tf.equal(second_column, full_name) return tf.boolean_mask(type_url.index, equal_to_full_name)
def _call(self, inputs): x = inputs # print(x.eval) dtype = x.dtype shape = tf.shape(x) # print(shape) # dropout if self.sparse_inputs: x = sparse_dropout(x, 1 - self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1 - self.dropout) # convolve operators = self.support[0] supports = list() Vm_matrix = {} for j in range(len(self.support)): Vm_matrix[j] = [] for i in range(self.input_dim): if self.sparse_inputs: cur_feature = tf.sparse_slice(x, [0, i], [self.node_num, 1]) # print(cur_feature.dense_shape) Vm = self.Lanczos(operators, len(self.support), cur_feature) for j in range(len(self.support)): Vm_matrix[j].append(Vm[j]) else: cur_feature = tf.slice(x, [0, i], [self.node_num, 1]) Vm = self.Lanczos(operators, len(self.support), cur_feature) for j in range(len(self.support)): Vm_matrix[j].append(Vm[j]) print("finish Lanczos") for i in range(len(self.support)): print("concate: ", i) pre_matrix = tf.concat(Vm_matrix[i], 1) support = dot(pre_matrix, self.vars['weights_' + str(i)], sparse=False) supports.append(support) output = tf.add_n(supports) # bias if self.bias: output += self.vars['bias'] # print(output) return self.act(output)
def get_slice(data, i, parts): shape = K.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == parts - 1: size = batch_size - step * i else: size = step size = K.concatenate([size, input_shape], axis=0) stride = K.concatenate([step, input_shape * 0], axis=0) start = stride * i if K.is_sparse(data): casted_start = tf.cast(start, dtype=tf.int64) casted_size = tf.cast(size, dtype=tf.int64) return tf.sparse_slice(data, casted_start, casted_size) else: return K.slice(data, start, size)
def get_validation_dataset(path, n_pages, original_dim=202498, trim_dim=0, shuffle_buffer=30000, trim_head=200, idf_path=None, seed=123456, max_path=None, compression=""): ''' Note: Keep path input and shuffling settings consistent between runs. ''' sparse_features = tfrecord_schema(original_dim) if compression == "GZIP": path = path + ".gz" filenames = glob.glob(path) np.random.seed(seed=seed) np.random.shuffle(filenames) dataset = tf.data.TFRecordDataset( filenames, compression_type=compression).shuffle(shuffle_buffer, seed=30303) dataset = dataset.batch(n_pages).take(1) dataset = dataset.map( lambda x: tf.parse_example(x, features=sparse_features)['sparse']) trim_end = (trim_dim + trim_head) if trim_dim else original_dim dataset = dataset.map(lambda x: tf.sparse_slice( x, [0, trim_head], [n_pages, trim_dim if trim_dim else original_dim])) if idf_path: idf = np.load(idf_path)[trim_head:trim_end] dataset = dataset.map(lambda x: tf.cast(x, tf.float32) * idf) if max_path: maxarr = np.load(max_path)[trim_head:trim_end] dataset = dataset.map(lambda x: tf.cast(x, tf.float32) / maxarr) return dataset
def build_infer_placeholder(src_ph, vocab_table): src_eos_id = tf.cast(vocab_table.lookup(tf.constant(EOS)), tf.int32) inputs = tf.string_split(src_ph) inputs = tf.cast(vocab_table.lookup(inputs), tf.int32) shape = tf.shape(inputs) slice_size = tf.cast(tf.stack([shape[0], MAX_SRC_LEN]), tf.int64) slice_start = tf.constant([0, 0], dtype=tf.int64) inputs = tf.sparse_slice(inputs, start=slice_start, size=slice_size) line_number = inputs.indices[:, 0] line_position = inputs.indices[:, 1] lengths = tf.segment_max(data=line_position, segment_ids=line_number) + 1 inputs = tf.sparse_tensor_to_dense(inputs, src_eos_id) src = inputs src_len = lengths batchedInput = namedtuple("batchedInput", ('initializer', 'source', 'source_length')) return batchedInput(initializer=None, source=tf.identity(src, 'src'), source_length=tf.identity(src_len, 'src_len'))
def lookup_sparse_tensor_by_index(sparse_tensor, index): row, col = sparse_tensor.get_shape().as_list() ret = tf.sparse_slice(sparse_tensor, [index, 0], [1, col]).values ret = tf.cast(ret, tf.int64) return ret
tf.initialize_all_variables tf.local_variables_initializer() tf.global_variables_initializer() tf.constant_initializer tf.variables_initializer() tf.eye() tf.expand_dims() tf.random_shuffle() tf.expm1() tf.as_dtype() tf.as_string() # slice tf.slice() tf.sparse_slice() tf.strided_slice() tf.convert_to_tensor_or_indexed_slices() tf.resource_strided_slice_assign() tf.strided_slice_assign() tf.strided_slice_grad() tf.gather() tf.gather_nd() tf.gather_v2() tf.get_summary_op() tf.gradients() tf.boolean_mask() tf.sparse_mask() tf.sequence_mask()
def build(self, input_paths, epochs=1, mode='train', variable_partitions=8, config=None): variable_partitions = 1 logging.info('build model: mode = %s partitions = %s', mode, variable_partitions) self.global_step = tf.train.get_or_create_global_step() dataset = self.get_dataset(input_paths, mode=mode, epochs=epochs).repeat() dataset = dataset.prefetch(1) self.next_batch = dataset.make_one_shot_iterator().get_next() label, features = self.next_batch # Sparse tensor not supported # self.per_sample = tf.split(features, self.batch_size) # self.examples = self.next_batch # SparseTensor not supported # col_sum = tf.reduce_sum(features, 0) # where = tf.not_equal(col_sum, 0) # indices = tf.where(where) self.non_zero_i = features.values self.idx, _ = tf.unique(self.non_zero_i) self.sorted_idx = tf.contrib.framework.sort(self.idx) self.shape = self.sorted_idx.shape partitioner = tf.min_max_variable_partitioner( max_partitions=variable_partitions, min_slice_size=64 << 20) with tf.variable_scope('linear', partitioner=partitioner): self.ps_parameters = tf.get_variable( name="psconstants", shape=(3, self.model_size), initializer=tf.zeros_initializer()) # pull partial varibles from ps_parameters self.local_parameter = tf.gather(self.ps_parameters, self.sorted_idx, axis=1) # keep updating during training w_init = tf.reshape(tf.gather(self.local_parameter, [0]), [-1]) ni_init = tf.reshape(tf.gather(self.local_parameter, [1]), [-1]) zi_init = tf.reshape(tf.gather(self.local_parameter, [2]), [-1]) self.w_init_var = tf.Variable(w_init, trainable=False, validate_shape=False) self.n_init_var = tf.Variable(ni_init, trainable=False, validate_shape=False) self.z_init_var = tf.Variable(zi_init, trainable=False, validate_shape=False) # keep clean to get final deltas init_w = tf.gather(self.local_parameter, [0]) init_n = tf.gather(self.local_parameter, [1]) init_z = tf.gather(self.local_parameter, [2]) for i in range(self.batch_size): self.line = tf.sparse_slice(features, [i, 0, 0], [i, 1, self.model_size]) feas = self.line.values re_values = tf.zeros_like(feas) + 1 zeros = tf.zeros_like(feas) re_indices = tf.stack([zeros, feas], 1) lens = tf.shape(feas, out_type=tf.int32)[0] self.init_feas_idx = tf.zeros_like(feas) t = tf.constant(0) initial_outputs = tf.TensorArray(dtype=tf.int64, size=lens) def cond(t, *args): return t < lens def body(t, sorted_idx, outputs_): cur_fea = tf.gather(feas, t) cur_index = tf.where(tf.equal(sorted_idx, cur_fea)) outputs_ = outputs_.write(t, cur_index) return t + 1, sorted_idx, outputs_ t, _, outputs = tf.while_loop( cond, body, [t, self.sorted_idx, initial_outputs]) outputs = outputs.stack() self.feas_indics = tf.reshape(outputs, [-1]) self.w_ii = tf.gather(self.w_init_var, self.feas_indics) n_ii = tf.gather(self.n_init_var, self.feas_indics) z_ii = tf.gather(self.z_init_var, self.feas_indics) lower = tf.map_fn( lambda x: self.l2_weight + (self.l2_shrinkage + tf.sqrt(x)) / self.learning_rate, n_ii) upper = tf.map_fn( lambda x: tf.cond( tf.abs(x) > self.l1_weight, lambda: tf.sign(x) * self. l1_weight - x, lambda: 0.0), z_ii) w_new = upper / lower logit = tf.reduce_sum(w_new) p = tf.sigmoid(logit) grad = tf.gather(label, [i]) - p sigmai = (tf.sqrt(n_ii + tf.square(grad)) - tf.sqrt(n_ii)) * self.learning_rate z_new = z_ii + grad - sigmai * w_new n_new = n_ii + tf.square(grad) self.w_updated_var = tf.scatter_update(self.w_init_var, self.feas_indics, w_new) self.n_updated_var = tf.scatter_update(self.n_init_var, self.feas_indics, n_new) self.z_updated_var = tf.scatter_update(self.z_init_var, self.feas_indics, z_new) self.w_delta = self.w_updated_var - init_w self.n_delta = self.n_updated_var - init_n self.z_delta = self.z_updated_var - init_z
def body(index, words): next_word = tf.sparse_slice(line, tf.to_int64(index), [1, 1]).values next_word = tf.string_split(next_word, delimiter='') words = tf.sparse_concat(axis=0, sp_inputs=[words, next_word], expand_nonconcat_dim=True) return index+[0, 1], words
def _parse_function(example_proto): """ Parses an example from a tfrecords file """ features = { "targetPos": tf.FixedLenFeature([3], tf.float32), 'numTarget': tf.FixedLenFeature([], tf.int64), 'spikeRaster': tf.SparseFeature(index_key=['spikeRaster_x', 'spikeRaster_y'], value_key='spikeRaster_values', dtype=tf.float32, size=[n_features_spikeRaster, max_len_sequence]), 'spikeRaster2': tf.SparseFeature(index_key=['spikeRaster2_x', 'spikeRaster2_y'], value_key='spikeRaster2_values', dtype=tf.float32, size=[n_features_spikeRaster, max_len_sequence]), "spikeRaster_shape": tf.FixedLenFeature([2], tf.int64), "isSuccessful": tf.FixedLenFeature([1], tf.int64), "delayTime": tf.FixedLenFeature([1], tf.float32), 'timeTargetOn': tf.FixedLenFeature([1], tf.float32), } parsed_features = tf.parse_single_example(example_proto, features) # Predictive period => from timeTargetOn to delay_time_in begin_time = tf.cast(parsed_features["timeTargetOn"], tf.int64) + delay_after_start begin_sparse = tf.pad(begin_time, [[1, 0]], 'CONSTANT') # Preprocess spikeRaster => [Time Series n_steps x n_features_spikeRaster] spikeRaster = tf.sparse_slice( parsed_features["spikeRaster"], begin_sparse, [n_features_spikeRaster, delay_time_min - delay_after_start]) spikeRaster = tf.sparse_tensor_to_dense(spikeRaster) spikeRaster = tf.transpose(spikeRaster) spikeRaster.set_shape( (delay_time_min - delay_after_start, n_features_spikeRaster)) # Preprocess spikeRaster2 => [Time Series n_steps x n_features_spikeRaster] spikeRaster2 = tf.sparse_slice( parsed_features["spikeRaster2"], begin_sparse, [n_features_spikeRaster2, delay_time_min - delay_after_start]) spikeRaster2 = tf.sparse_tensor_to_dense(spikeRaster2) spikeRaster2 = tf.transpose(spikeRaster2) spikeRaster2.set_shape( (delay_time_min - delay_after_start, n_features_spikeRaster2)) # Combine spikeRaster + spikeRaster2 spikeRasters = tf.concat([spikeRaster, spikeRaster2], axis=1) # isSuccessful into a boolean isSuccessful = tf.cast(parsed_features["isSuccessful"], tf.bool) # target Position targetPos = parsed_features['targetPos'][0:2] # num_Target numTarget = parsed_features['numTarget'] - 1 # label in case model with Quadrants if params.model_with_quadrants: circle_mapping = np.load(os.path.join(params.mapping, 'circle.npy')) angle_mapping = np.load( os.path.join(params.mapping, 'angle_in_quadrant.npy')) quadrant_mapping = np.load( os.path.join(params.mapping, 'quadrants.npy')) label_mapping = np.concatenate( [circle_mapping, quadrant_mapping, angle_mapping], axis=1) label_mapping_tensor = tf.placeholder_with_default( label_mapping, [48, 11]) else: label_mapping_tensor = tf.placeholder_with_default( np.zeros((48, 11)), [48, 11]) label = label_mapping_tensor[numTarget] # delayTime delayTime = parsed_features['delayTime'] # Preprocess target_pos return spikeRasters, isSuccessful, targetPos, numTarget, label, delayTime