def read_camera_parameters(path, n_timestamp, parallel_camera_process=10): """Read a camera's parameters.""" # parse the lines lines = tf.string_split([tf.read_file(path)], '\n').values # ignore the header lines = lines[6:] # parse the columns fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 15]) # convert string to float32 fields = tf.strings.to_number(fields) # <camera info: f, cx, cy, dist.coeff[0],dist.coeff[1],dist.coeff[2]> # <orientation: w,x,y,z> <position: x,y,z> <image resolution: width, height> camera_info, orientation, position, resolution = tf.split( fields, [6, 4, 3, 2], -1) camera_ds = tf.data.Dataset.from_tensor_slices( (camera_info, orientation, position, resolution)) def process_camera_parameters(camera_info, orientation, position, resolution): # convert quaternion to 3x3 matrix rotation_matrix = from_quaternion(orientation) # 3x4 pose matrix [R_3x3 |t_3x1] pose_matrix = tf.concat( [rotation_matrix, tf.expand_dims(position, -1)], -1) intrinsic_matrix = build_intrinsic_matrix(camera_info[0], camera_info[1], camera_info[2]) return (pose_matrix, intrinsic_matrix, resolution) return dataset_to_tensors(camera_ds, capacity=n_timestamp, map_fn=process_camera_parameters, parallelism=parallel_camera_process)
def check_cam_coherence(path): """Check the coherence of a camera path.""" cam_gt = path + 'cam0_gt.visim' cam_render = path + 'cam0.render' lines = tf.string_split([tf.read_file(cam_render)], '\n').values lines = lines[3:] lines = tf.strided_slice(lines, [0], [lines.shape_as_list()[0]], [2]) fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 10]) timestamp_from_render, numbers = tf.split(fields, [1, 9], -1) numbers = tf.strings.to_number(numbers) eye, lookat, up = tf.split(numbers, [3, 3, 3], -1) up_vector = tf.nn.l2_normalize(up - eye) lookat_vector = tf.nn.l2_normalize(lookat - eye) rotation_from_lookat = lookat_matrix(up_vector, lookat_vector) lines = tf.string_split([tf.read_file(cam_gt)], '\n').values lines = lines[1:] fields = tf.reshape(tf.string_split(lines, ',').values, [-1, 8]) timestamp_from_gt, numbers = tf.split(fields, [1, 7], -1) numbers = tf.strings.to_number(numbers) position, quaternion = tf.split(numbers, [3, 4], -1) rotation_from_quaternion = from_quaternion(quaternion) assert tf.reduce_all(tf.equal(timestamp_from_render, timestamp_from_gt)) assert tf.reduce_all(tf.equal(eye, position)) so3_diff = (tf.trace( tf.matmul(rotation_from_lookat, rotation_from_quaternion, transpose_a=True)) - 1) / 2 tf.assert_near(so3_diff, tf.ones_like(so3_diff))
def read_timestamp(path): """Read a path's timestamp.""" # parse the lines lines = tf.string_split([tf.read_file(path)], '\n').values # ignore the header lines = lines[1:] # parse the columns fields = tf.reshape(tf.string_split(lines, ',').values, [-1, 2]) timestamp, img_name = tf.split(fields, [1, 1], -1) timestamp = tf.squeeze(timestamp, -1) img_name = tf.squeeze(img_name, -1) return timestamp, img_name
def build_planner_inputs(question, answer, length, lookup_table): """Convert text to TextInputs for conditional text planner. Args: question: <string>, space-separated token string. answer: <string>, space-separated token string. length: Length to pad or truncate to. lookup_table: Instance of contrib.lookup.index_table_from_tensor. Returns: Instance of TextInputs. """ # Build question. q_tokens = tf.string_split([question]).values q_tokens = tf.concat([["[Q]"], q_tokens], axis=0) q_token_ids = tf.cast(lookup_table.lookup(q_tokens), tf.int32) q_len = tensor_utils.shape(q_token_ids, 0) q_positions = tf.range(q_len) # Build answer. a_tokens = tf.string_split([answer]).values a_tokens = tf.concat([["[A]"], a_tokens], axis=0) a_token_ids = tf.cast(lookup_table.lookup(a_tokens), tf.int32) a_len = tensor_utils.shape(a_token_ids, 0) a_positions = tf.range(a_len) # Combine. token_ids = tf.concat([q_token_ids, a_token_ids], axis=0) segment_ids = tf.concat([tf.fill([q_len], 2), tf.fill([a_len], 1)], axis=0) positions = tf.concat([q_positions, a_positions], axis=0) q_mask = tf.ones_like(q_token_ids) mask = tf.concat([q_mask, tf.ones_like(a_token_ids)], axis=0) # Truncate. token_ids = token_ids[:length] segment_ids = segment_ids[:length] mask = mask[:length] positions = positions[:length] # Pad. pad = [[0, length - tf.size(token_ids)]] token_ids = tf.pad(token_ids, pad) mask = tf.pad(mask, pad) segment_ids = tf.pad(segment_ids, pad) positions = tf.pad(positions, pad) text_input = TextInputs(token_ids=tf.ensure_shape(token_ids, [length]), mask=tf.ensure_shape(mask, [length]), segment_ids=tf.ensure_shape(segment_ids, [length]), positions=tf.ensure_shape(positions, [length])) return text_input
def load_sequence(sequence_dir, data_dir, parallelism=10): """Load a sequence.""" n_timestamp = 1000 v = tf.string_split([sequence_dir], '/').values scene_id, sequence_id = v[-2], v[-1] camera_dir = data_dir + 'GroundTruth_HD1-HD6/' + scene_id + '/' trajectory_name = 'velocity_angular' + tf.strings.substr(v[-1], -4, -4) + '/' camera_dir = camera_dir + trajectory_name camera_timestamp_path = camera_dir + 'cam0.timestamp' timestamp, img_name = read_timestamp(camera_timestamp_path) rgb_paths = sequence_dir + '/cam0/data/' + img_name pano_paths = sequence_dir + '/cam0_pano/data/' + img_name depth_paths = sequence_dir + '/depth0/data/' + img_name normal_paths = sequence_dir + '/normal0/data/' + img_name camera_parameters_path = camera_dir + 'cam0.ccam' pose_matrix, intrinsic_matrix, resolution = read_camera_parameters( camera_parameters_path, n_timestamp, parallel_camera_process=parallelism) return ViewSequence(scene_id, sequence_id, timestamp, rgb_paths, pano_paths, depth_paths, normal_paths, pose_matrix, intrinsic_matrix, resolution)
def map_fn_1(src, tgt): src = tf.string_split([src]).values tgt = tf.string_split([tgt]).values src_size = tf.size(src) tgt_size = tf.size(tgt) size_ok_bool = tf.logical_and(src_size > 0, tgt_size > 0) if filter_oversized_sequences: oversized = tf.logical_and(src_size < src_max_len, tgt_size < tgt_max_len) size_ok_bool = tf.logical_and(size_ok_bool, oversized) if src_max_len: src = src[:src_max_len] if tgt_max_len: tgt = tgt[:tgt_max_len] return (src, tgt, size_ok_bool)
def from_tokens(raw, lookup_): gathered = tf.gather(lookup_, tf.cast(raw, tf.int32)) joined = tf.regex_replace(tf.reduce_join(gathered, axis=1), b"<EOS>.*", b"") cleaned = tf.regex_replace(joined, b"_", b" ") tokens = tf.string_split(cleaned, " ") return tokens
def get_random_span(text, p, max_span_len, max_iter=10): """Get random subspan from text token sequence, following heuristics. Heuristics: 1) Should not start or end mid-wordpiece. 2) Must contain at least one non-stopword token. 3) Length should be drawn from Geo(p) and less than max_span_len. Args: text: <string> [], space-separated token string. p: <float32> Geometric distribution parameter. max_span_len: Length to pad or truncate to. max_iter: Maximum rejection sampling iterations. Returns: span_wid: <string> """ # Split text into tokens. tokens = tf.string_split([text]).values seq_len = tf.size(tokens) def reject(start, end): """Reject span sample.""" span = tokens[start:end + 1] wordpiece_boundary = tf.logical_or( tf.strings.regex_full_match(span[0], r"^##.*"), tf.strings.regex_full_match(span[-1], r"^##.*")) span = tokens[start:end] stopwords = list(nltk_utils.get_stopwords() | set(string.punctuation)) non_stopword = tf.setdiff1d(span, stopwords) all_stopword = tf.equal(tf.size(non_stopword.out), 0) length = tf.equal(tf.size(span), 0) return tf.reduce_any([wordpiece_boundary, all_stopword, length]) def sample(start, end): """Sample length from truncated Geo(p).""" # Sample from truncated geometric distribution. geometric = lambda k: (1 - p)**(k - 1) * p probs = np.array([geometric(k) for k in range(1, max_span_len + 1)]) probs /= probs.sum() length = tf.distributions.Categorical(probs=probs).sample() + 1 # Sample start uniformly. max_offset = tf.maximum(1, seq_len - length + 1) start = tf.random.uniform([], 0, max_offset, dtype=tf.int32) end = start + length # Return span. return [start, end] # Rejection sample. Start with dummy span variable. start = tf.constant(0) end = tf.constant(0) start, end = tf.while_loop(reject, sample, [start, end], maximum_iterations=max_iter) span = tf.strings.reduce_join(tokens[start:end], separator=" ") return span
def process_boundary(boundaries, input_length, t1_id, t2_id, all_dialogue): """process the boundaries of the dialogue.""" points = tf.string_split([boundaries]).values points_val = tf.string_to_number(points, out_type=tf.int32) siz = tf.size(points_val) // 2 start_points, end_points = points_val[0:siz], points_val[siz:] return do_process_boundary(start_points, end_points, input_length, t1_id, t2_id, all_dialogue)
def get_sub_items_self_play(data, kb): """process procedure for self play.""" all_data = tf.string_split([data], sep="|", skip_empty=False).values # action is empty for self-play inference intent, pred_action, truth_action, utterance, boundary, reward_diag, reward_action = all_data[ 0], all_data[1], all_data[2], all_data[3], all_data[4], all_data[ 5], all_data[6] return intent, pred_action, truth_action, kb, utterance, boundary, reward_diag, reward_action
def _deserialize_label(im, lab): lab = tf.cond(tf.equal(tf.rank(lab), 0), lambda: tf.reshape(lab, [1]), lambda: lab) sparse_lab = tf.string_split(lab, sep=' ') lab_values = tf.strings.to_number(sparse_lab.values) lab = tf.reshape(lab_values, [self._num_regression_outputs]) return im, lab
def label_string_to_tensor(x, batch_size, num_outputs=None): sparse = tf.string_split(x, delimiter=' ') values = tf.string_to_number(sparse.values) if num_outputs is None: dense = tf.reshape(values, [batch_size, -1]) else: dense = tf.reshape(values, (batch_size, num_outputs)) return dense
def from_characters(raw, lookup_): """Convert ascii+2 encoded codes to string-tokens.""" corrected = tf.bitcast(tf.clip_by_value(tf.subtract(raw, 2), 0, 255), tf.uint8) gathered = tf.gather(lookup_, tf.cast(corrected, tf.int32))[:, :, 0] joined = tf.reduce_join(gathered, axis=1) cleaned = tf.regex_replace(joined, b"\0", b"") tokens = tf.string_split(cleaned, " ") return tokens
def process_entry_self_play(intent, action, truth_action, kb, utterance, boundary, reward_diag, reward_action, vocab_table): """Pro-proess procedure for the self-play iterator.""" t1_id = tf.cast(vocab_table.lookup(tf.constant("<t1>")), tf.int32) t2_id = tf.cast(vocab_table.lookup(tf.constant("<t2>")), tf.int32) res = process_entry_common(intent, action, utterance, boundary, kb, vocab_table, t1_id, t2_id) tensor_intent, size_intent, source_diag, target_diag, size_dialogue, tensor_action, size_action, tensor_kb, has_reservation, mask1, mask2, turn_point = res truth_action, _ = process_data(truth_action, vocab_table) splitted_reward_d = tf.string_split([reward_diag]).values splitted_reward_a = tf.string_split([reward_action]).values tensor_reward_diag = tf.string_to_number( splitted_reward_d, out_type=tf.float32, name=None)[:-1] # remove the last dialogue ??? tensor_reward_action = tf.string_to_number(splitted_reward_a, out_type=tf.float32, name=None) return tensor_intent, size_intent, source_diag, target_diag, size_dialogue, tensor_action, size_action, truth_action, tensor_reward_diag, tensor_reward_action, tensor_kb, has_reservation, mask1, mask2, turn_point
def _file_to_matrix(pts_path): """Read Nx3 point cloud from a .pts file.""" file_buffer = tf.read_file(pts_path) lines = tf.string_split([file_buffer], delimiter='\n') values = tf.stack(tf.decode_csv(lines.values, record_defaults=[[0.0], [0.0], [0.0]], field_delim=' ')) values = tf.transpose(values) # 3xN --> Nx3. # The experiment code in # github.com/papagina/RotationContinuity/.../shapenet/code/train_pointnet.py # only used the first half of the points in each file. return values[:(tf.shape(values)[0] // 2), :]
def _mapper(dataset): """Tokenizes strings using tf.string_split and truncates by length.""" for k in keys_to_map: # pylint: disable=g-explicit-length-test if len(dataset[k].get_shape()) == 0: # Used for questions. # pylint: enable=g-explicit-length-test # <string> [num_tokens] tokens = tf.string_split([dataset[k]]).values else: # Used for contexts. # <string> [num_context, num_tokens] (sparse) sparse_tokens = tf.string_split(dataset[k]) # <string>[num_tokens, max_num_tokens] (dense) tokens = tf.sparse_tensor_to_dense(sparse_tokens, default_value="") dataset[k + suffix] = tokens # Compute exact length of each context. dataset[k + suffix + "_len"] = tf.count_nonzero(tokens, axis=-1, dtype=tf.int32) return dataset
def parse_text(self, sentence, label=None): # Split sentence into words, and convert it into ids sentence_split = tf.string_split([sentence]).values if self.max_seq_len: # Trim the sentence to max_seq_len sentence_split = sentence_split[:self.max_seq_len] src_seq_len = tf.size(sentence_split) sentence = self.src_vocab.lookup(sentence_split) if label is not None: label_split = tf.string_split([label]).values else: label_split = sentence_split[1:] if self.max_seq_len is not None: label_split = label_split[:self.max_seq_len] tgt_seq_len = tf.size(label_split) label = self.tgt_vocab.lookup(label_split) # Prepend and append SOS and EOS tokens to label #label = tf.concat([[self.tgt_sos_token], label, [self.tgt_eos_token]], # 0) return sentence, label, src_seq_len, tgt_seq_len
def build_text_inputs( text, length, lookup_table, segment_id=0, start_token=None, end_token=None, ): """Convert text to TextInputs. Args: text: <string>, space-separated token string. length: Length to pad or truncate to. lookup_table: Instance of contrib.lookup.index_table_from_tensor. segment_id: Integer denoting segment type. start_token: Optional start token. end_token: Optional end token. Returns: Instance of TextInputs. """ # Tokenize and truncate. tokens = tf.string_split([text]).values length_offset = sum( [0 if i is None else 1 for i in [start_token, end_token]]) tokens = tokens[:length - length_offset] if start_token is not None: tokens = tf.concat([[start_token], tokens], axis=0) if end_token is not None: tokens = tf.concat([tokens, [end_token]], axis=0) token_ids = tf.cast(lookup_table.lookup(tokens), tf.int32) mask = tf.ones_like(token_ids) segment_ids = tf.fill(tf.shape(token_ids), segment_id) pad = [[0, length - tf.size(token_ids)]] token_ids = tf.pad(token_ids, pad) mask = tf.pad(mask, pad) segment_ids = tf.pad(segment_ids, pad) positions = tf.range(length) text_input = TextInputs(token_ids=tf.ensure_shape(token_ids, [length]), mask=tf.ensure_shape(mask, [length]), segment_ids=tf.ensure_shape(segment_ids, [length]), positions=tf.ensure_shape(positions, [length])) return text_input
def module_fn_with_preprocessing(): """Spec function for a full-text embedding module with preprocessing.""" sentences = tf.placeholder(shape=[None], dtype=tf.string, name="sentences") # Perform a minimalistic text preprocessing by removing punctuation and # splitting on spaces. normalized_sentences = tf.regex_replace(input=sentences, pattern=r"\pP", rewrite="") tokens = tf.string_split(normalized_sentences, " ") embeddings_var = tf.get_variable(initializer=tf.zeros( [vocab_size + num_oov_buckets, embeddings_dim]), name=EMBEDDINGS_VAR_NAME, dtype=tf.float32) table_initializer = tf.lookup.TextFileInitializer( vocabulary_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER) lookup_table = tf.lookup.StaticVocabularyTable( table_initializer, num_oov_buckets=num_oov_buckets) sparse_ids = tf.SparseTensor(indices=tokens.indices, values=lookup_table.lookup(tokens.values), dense_shape=tokens.dense_shape) # In case some of the input sentences are empty before or after # normalization, we will end up with empty rows. We do however want to # return embedding for every row, so we have to fill in the empty rows with # a default. sparse_ids, _ = tf.sparse_fill_empty_rows( sparse_ids, lookup_table.lookup(tf.constant(""))) # In case all of the input sentences are empty before or after # normalization, we will end up with a SparseTensor with shape [?, 0]. After # filling in the empty rows we must ensure the shape is set properly to # [?, 1]. At this point, there are no empty rows, so the new shape will be # [sparse_ids.dense_shape[0], max(1, sparse_ids.dense_shape[1])]. sparse_ids = tf.sparse_reset_shape(sparse_ids) combined_embedding = tf.nn.embedding_lookup_sparse( params=embeddings_var, sp_ids=sparse_ids, sp_weights=None, combiner="sqrtn") hub.add_signature("default", {"sentences": sentences}, {"default": combined_embedding})
def get_infer_iterator(src_dataset, src_vocab_table, batch_size, eos, sos, src_max_len=None): """Get dataset for inference.""" # Totol number of examples in src_dataset # (3003 examples + 69 padding examples). src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(eos)), tf.int32) src_sos_id = tf.cast(src_vocab_table.lookup(tf.constant(sos)), tf.int32) src_dataset = src_dataset.map(lambda src: tf.string_split([src]).values) # Convert the word strings to ids src_dataset = src_dataset.map( lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32)) # Add in the word counts. src_dataset = src_dataset.map(lambda src: (tf.concat( ([src_sos_id], src, [src_eos_id]), 0), 2 + tf.size(src))) def batching_func(x): return x.padded_batch( batch_size, # The entry is the source line rows; # this has unknown-length vectors. The last entry is # the source row size; this is a scalar. padded_shapes=( tf.TensorShape([src_max_len]), # src tf.TensorShape([])), # src_len # Pad the source sequences with eos tokens. # (Though notice we don't generally need to do this since # later on we will be masking out calculations past the true sequence. padding_values=( src_eos_id, # src 0), drop_remainder=True) # src_len -- unused batched_dataset = batching_func(src_dataset) batched_dataset = batched_dataset.map( lambda src_ids, src_seq_len: ({ "source": src_ids, "source_sequence_length": src_seq_len })) return batched_dataset
def _map_sequence_to_ints(example, amino_acid_table): """Take amino acids in features as strings and replaces them with ints. Args: example: dictionary from string to tensor, containing key SEQUENCE_KEY. amino_acid_table: tf.contrib.lookup.index_table_from_tensor. Returns: dict from string to tensor, where the value at SEQUENCE_KEY is converted from a np.array of string labels to a np.array of ints. """ seq = example[SEQUENCE_KEY] seq_char_by_char_sparse = tf.string_split([seq], delimiter='') seq_char_by_char = seq_char_by_char_sparse.values seq_indices = amino_acid_table.lookup(seq_char_by_char) example[SEQUENCE_KEY] = seq_indices return example
def parse_single_tfexample(_, serialized_example): """Parsing serialized pb2 example.""" # read data from serialized examples features = tf.parse_single_example( serialized_example, features={ 'x': tf.FixedLenFeature([], tf.string), 'y': tf.FixedLenFeature([], tf.int64), # z is for sequence origins, # i.e. which genome and which position the seq is from # 'z': tf.VarLenFeature(tf.string) }) seq_str = features['x'] x_str = tf.string_split([seq_str], delimiter=' ').values features['x'] = tf.string_to_number(x_str, out_type=tf.int32) features['y'] = tf.cast(features['y'], dtype=tf.int32) return features
def _dedup_tensor(sp_tensor: tf.SparseTensor) -> tf.SparseTensor: """Dedup values of a SparseTensor along each row. Args: sp_tensor: A 2D SparseTensor to be deduped. Returns: A deduped SparseTensor of shape [batch_size, max_len], where max_len is the maximum number of unique values for a row in the Tensor. """ string_batch_index = tf.as_string(sp_tensor.indices[:, 0]) # tf.unique only works on 1D tensors. To avoid deduping across examples, # prepend each feature value with the example index. This requires casting # to and from strings for non-string features. string_values = sp_tensor.values original_dtype = sp_tensor.values.dtype if original_dtype != tf.string: string_values = tf.as_string(sp_tensor.values) index_and_value = tf.strings.join([string_batch_index, string_values], separator='|') unique_index_and_value, _ = tf.unique(index_and_value) # split is a shape [tf.size(values), 2] tensor. The first column contains # indices and the second column contains the feature value (we assume no # feature contains | so we get exactly 2 values from the string split). split = tf.string_split(unique_index_and_value, delimiter='|') split = tf.reshape(split.values, [-1, 2]) string_indices = split[:, 0] values = split[:, 1] indices = tf.reshape( tf.string_to_number(string_indices, out_type=tf.int32), [-1]) if original_dtype != tf.string: values = tf.string_to_number(values, out_type=original_dtype) values = tf.reshape(values, [-1]) # Convert example indices into SparseTensor indices, e.g. # [0, 0, 0, 1, 3, 3] -> [[0,0], [0,1], [0,2], [1,0], [3,0], [3,1]] batch_size = tf.to_int32(sp_tensor.dense_shape[0]) new_indices, max_len = _example_index_to_sparse_index(indices, batch_size) return tf.SparseTensor( indices=tf.to_int64(new_indices), values=values, dense_shape=[tf.to_int64(batch_size), max_len])
def _maybe_actually_init(self): """Lazily create example converter.""" if self._session is None: self._vocab = text_utils.Vocab.load(self._params["vocab_path"]) self._graph = tf.Graph() with self._graph.as_default(): # Placeholder for input lines of tokenized text. self._text = tf.placeholder(tf.string, []) # Truncate text. tokens = tf.string_split([self._text]).values length = self._params["max_length"] - self._params[ "query_length"] - 3 tokens = tokens[:length] # Create full input together with empty question. question = ["[PAD]"] * self._params["query_length"] inputs = tf.concat( [[self._vocab.CLS], question, [self._vocab.SEP], tokens, [self._vocab.SEP]], axis=0) # Convert to ids. lookup_table = self._vocab.get_string_lookup_table() input_ids = tf.cast(lookup_table.lookup(inputs), tf.int32) input_mask = tf.ones_like(input_ids) segment_ids = tf.concat([[0] * (self._params["query_length"] + 2), tf.fill(tf.shape(tokens), 1), [1]], axis=0) # Pad to final length. pad = [[0, self._params["max_length"] - tf.size(input_ids)]] input_ids = tf.pad(input_ids, pad) input_mask = tf.pad(input_mask, pad) segment_ids = tf.pad(segment_ids, pad) self._rc_inputs = RCInputs(input_ids, input_mask, segment_ids) # Initialize session. self._session = tf.Session() self._session.run(tf.initialize_all_tables())
def _file_to_matrix(pts_path): """Read Nx3 point cloud and 3x3 rotation matrix from a .pts file. The test data is a modified version of the original files. For each .pts file we have (1) added a 3x3 rotation matrix for testing, and (2) removed the second half of the point cloud since it is not used at all. Args: pts_path: path to a .pts file. Returns: A Nx3 point cloud. A 3x3 rotation matrix. """ file_buffer = tf.read_file(pts_path) lines = tf.string_split([file_buffer], delimiter='\n') values = tf.stack(tf.decode_csv(lines.values, record_defaults=[[0.0], [0.0], [0.0]], field_delim=' ')) values = tf.transpose(values) # 3xN --> Nx3. # First three rows are the rotation matrix, remaining rows the point cloud. rot = values[:3, :] return values[4:, :], rot
def split_on_whitespace(str_tensor): return tf.string_split(tf.expand_dims(str_tensor, -1)).values
def filter_random_lighting(sequence_dir): sequence_name = tf.string_split([sequence_dir], '/').values[-1] lighting = tf.substr(sequence_name, 0, 6) return tf.not_equal(lighting, 'random')
def process_data(object_str, vocab_table): """prelinminary process of dialogue data.""" separated = tf.string_split([object_str]).values indices = tf.cast(vocab_table.lookup(separated), tf.int32) return indices, tf.size(indices)
def label_string_to_tensor(x, batch_size, num_outputs=-1): sparse = tf.string_split(x, sep=' ') values = tf.string_to_number(sparse.values) dense = tf.reshape(values, [batch_size, num_outputs]) return dense
def convert_string_neighbors(string_neighbors): split = tf.string_split(string_neighbors, "") string_dense = tf.sparse_tensor_to_dense(split, default_value="0") num = tf.string_to_number(string_dense, out_type=tf.int32) bool_neigh = tf.cast(num, tf.bool) return bool_neigh