Esempi in Python per string_split, esempi in Python per tensorflow.compat.v1.string_split

Esempio n. 1

0

Mostra file

File: data_loader.py Progetto: ziyouzizai111/google-research

def read_camera_parameters(path, n_timestamp, parallel_camera_process=10):
    """Read a camera's parameters."""
    # parse the lines
    lines = tf.string_split([tf.read_file(path)], '\n').values
    # ignore the header
    lines = lines[6:]
    # parse the columns
    fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 15])
    # convert string to float32
    fields = tf.strings.to_number(fields)
    # <camera info: f, cx, cy, dist.coeff[0],dist.coeff[1],dist.coeff[2]>
    # <orientation: w,x,y,z> <position: x,y,z> <image resolution: width, height>
    camera_info, orientation, position, resolution = tf.split(
        fields, [6, 4, 3, 2], -1)
    camera_ds = tf.data.Dataset.from_tensor_slices(
        (camera_info, orientation, position, resolution))

    def process_camera_parameters(camera_info, orientation, position,
                                  resolution):
        # convert quaternion to 3x3 matrix
        rotation_matrix = from_quaternion(orientation)
        # 3x4 pose matrix [R_3x3 |t_3x1]
        pose_matrix = tf.concat(
            [rotation_matrix, tf.expand_dims(position, -1)], -1)
        intrinsic_matrix = build_intrinsic_matrix(camera_info[0],
                                                  camera_info[1],
                                                  camera_info[2])
        return (pose_matrix, intrinsic_matrix, resolution)

    return dataset_to_tensors(camera_ds,
                              capacity=n_timestamp,
                              map_fn=process_camera_parameters,
                              parallelism=parallel_camera_process)

Esempio n. 2

0

Mostra file

File: data_loader.py Progetto: ziyouzizai111/google-research

def check_cam_coherence(path):
    """Check the coherence of a camera path."""
    cam_gt = path + 'cam0_gt.visim'
    cam_render = path + 'cam0.render'
    lines = tf.string_split([tf.read_file(cam_render)], '\n').values
    lines = lines[3:]
    lines = tf.strided_slice(lines, [0], [lines.shape_as_list()[0]], [2])
    fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 10])
    timestamp_from_render, numbers = tf.split(fields, [1, 9], -1)
    numbers = tf.strings.to_number(numbers)
    eye, lookat, up = tf.split(numbers, [3, 3, 3], -1)
    up_vector = tf.nn.l2_normalize(up - eye)
    lookat_vector = tf.nn.l2_normalize(lookat - eye)
    rotation_from_lookat = lookat_matrix(up_vector, lookat_vector)

    lines = tf.string_split([tf.read_file(cam_gt)], '\n').values
    lines = lines[1:]
    fields = tf.reshape(tf.string_split(lines, ',').values, [-1, 8])
    timestamp_from_gt, numbers = tf.split(fields, [1, 7], -1)
    numbers = tf.strings.to_number(numbers)
    position, quaternion = tf.split(numbers, [3, 4], -1)
    rotation_from_quaternion = from_quaternion(quaternion)

    assert tf.reduce_all(tf.equal(timestamp_from_render, timestamp_from_gt))
    assert tf.reduce_all(tf.equal(eye, position))
    so3_diff = (tf.trace(
        tf.matmul(rotation_from_lookat,
                  rotation_from_quaternion,
                  transpose_a=True)) - 1) / 2
    tf.assert_near(so3_diff, tf.ones_like(so3_diff))

Esempio n. 3

0

Mostra file

File: data_loader.py Progetto: ziyouzizai111/google-research

def read_timestamp(path):
    """Read a path's timestamp."""
    # parse the lines
    lines = tf.string_split([tf.read_file(path)], '\n').values
    # ignore the header
    lines = lines[1:]
    # parse the columns
    fields = tf.reshape(tf.string_split(lines, ',').values, [-1, 2])
    timestamp, img_name = tf.split(fields, [1, 1], -1)
    timestamp = tf.squeeze(timestamp, -1)
    img_name = tf.squeeze(img_name, -1)
    return timestamp, img_name

Esempio n. 4

0

Mostra file

File: text_utils.py Progetto: xiaoqiangzhang203/language

def build_planner_inputs(question, answer, length, lookup_table):
    """Convert text to TextInputs for conditional text planner.

  Args:
    question: <string>, space-separated token string.
    answer: <string>, space-separated token string.
    length: Length to pad or truncate to.
    lookup_table: Instance of contrib.lookup.index_table_from_tensor.

  Returns:
    Instance of TextInputs.
  """
    # Build question.
    q_tokens = tf.string_split([question]).values
    q_tokens = tf.concat([["[Q]"], q_tokens], axis=0)
    q_token_ids = tf.cast(lookup_table.lookup(q_tokens), tf.int32)
    q_len = tensor_utils.shape(q_token_ids, 0)
    q_positions = tf.range(q_len)

    # Build answer.
    a_tokens = tf.string_split([answer]).values
    a_tokens = tf.concat([["[A]"], a_tokens], axis=0)
    a_token_ids = tf.cast(lookup_table.lookup(a_tokens), tf.int32)
    a_len = tensor_utils.shape(a_token_ids, 0)
    a_positions = tf.range(a_len)

    # Combine.
    token_ids = tf.concat([q_token_ids, a_token_ids], axis=0)
    segment_ids = tf.concat([tf.fill([q_len], 2), tf.fill([a_len], 1)], axis=0)
    positions = tf.concat([q_positions, a_positions], axis=0)
    q_mask = tf.ones_like(q_token_ids)
    mask = tf.concat([q_mask, tf.ones_like(a_token_ids)], axis=0)

    # Truncate.
    token_ids = token_ids[:length]
    segment_ids = segment_ids[:length]
    mask = mask[:length]
    positions = positions[:length]

    # Pad.
    pad = [[0, length - tf.size(token_ids)]]
    token_ids = tf.pad(token_ids, pad)
    mask = tf.pad(mask, pad)
    segment_ids = tf.pad(segment_ids, pad)
    positions = tf.pad(positions, pad)

    text_input = TextInputs(token_ids=tf.ensure_shape(token_ids, [length]),
                            mask=tf.ensure_shape(mask, [length]),
                            segment_ids=tf.ensure_shape(segment_ids, [length]),
                            positions=tf.ensure_shape(positions, [length]))

    return text_input

Esempio n. 5

0

Mostra file

File: data_loader.py Progetto: ziyouzizai111/google-research

def load_sequence(sequence_dir, data_dir, parallelism=10):
    """Load a sequence."""
    n_timestamp = 1000
    v = tf.string_split([sequence_dir], '/').values
    scene_id, sequence_id = v[-2], v[-1]
    camera_dir = data_dir + 'GroundTruth_HD1-HD6/' + scene_id + '/'
    trajectory_name = 'velocity_angular' + tf.strings.substr(v[-1], -4,
                                                             -4) + '/'
    camera_dir = camera_dir + trajectory_name
    camera_timestamp_path = camera_dir + 'cam0.timestamp'
    timestamp, img_name = read_timestamp(camera_timestamp_path)

    rgb_paths = sequence_dir + '/cam0/data/' + img_name
    pano_paths = sequence_dir + '/cam0_pano/data/' + img_name
    depth_paths = sequence_dir + '/depth0/data/' + img_name
    normal_paths = sequence_dir + '/normal0/data/' + img_name

    camera_parameters_path = camera_dir + 'cam0.ccam'
    pose_matrix, intrinsic_matrix, resolution = read_camera_parameters(
        camera_parameters_path,
        n_timestamp,
        parallel_camera_process=parallelism)
    return ViewSequence(scene_id, sequence_id, timestamp, rgb_paths,
                        pano_paths, depth_paths, normal_paths, pose_matrix,
                        intrinsic_matrix, resolution)

Esempio n. 6

0

Mostra file

File: distributed_iterator_utils.py Progetto: shjwudp/training_results_v0.7

        def map_fn_1(src, tgt):
            src = tf.string_split([src]).values
            tgt = tf.string_split([tgt]).values
            src_size = tf.size(src)
            tgt_size = tf.size(tgt)
            size_ok_bool = tf.logical_and(src_size > 0, tgt_size > 0)
            if filter_oversized_sequences:
                oversized = tf.logical_and(src_size < src_max_len,
                                           tgt_size < tgt_max_len)
                size_ok_bool = tf.logical_and(size_ok_bool, oversized)

            if src_max_len:
                src = src[:src_max_len]
            if tgt_max_len:
                tgt = tgt[:tgt_max_len]
            return (src, tgt, size_ok_bool)

Esempio n. 7

0

Mostra file

File: metrics.py Progetto: syzymon/tensor2tensor

 def from_tokens(raw, lookup_):
     gathered = tf.gather(lookup_, tf.cast(raw, tf.int32))
     joined = tf.regex_replace(tf.reduce_join(gathered, axis=1), b"<EOS>.*",
                               b"")
     cleaned = tf.regex_replace(joined, b"_", b" ")
     tokens = tf.string_split(cleaned, " ")
     return tokens

Esempio n. 8

0

Mostra file

File: text_utils.py Progetto: xiaoqiangzhang203/language

def get_random_span(text, p, max_span_len, max_iter=10):
    """Get random subspan from text token sequence, following heuristics.

  Heuristics:
    1) Should not start or end mid-wordpiece.
    2) Must contain at least one non-stopword token.
    3) Length should be drawn from Geo(p) and less than max_span_len.

  Args:
    text: <string> [], space-separated token string.
    p: <float32> Geometric distribution parameter.
    max_span_len: Length to pad or truncate to.
    max_iter: Maximum rejection sampling iterations.

  Returns:
    span_wid: <string>
  """
    # Split text into tokens.
    tokens = tf.string_split([text]).values
    seq_len = tf.size(tokens)

    def reject(start, end):
        """Reject span sample."""
        span = tokens[start:end + 1]
        wordpiece_boundary = tf.logical_or(
            tf.strings.regex_full_match(span[0], r"^##.*"),
            tf.strings.regex_full_match(span[-1], r"^##.*"))
        span = tokens[start:end]
        stopwords = list(nltk_utils.get_stopwords() | set(string.punctuation))
        non_stopword = tf.setdiff1d(span, stopwords)
        all_stopword = tf.equal(tf.size(non_stopword.out), 0)
        length = tf.equal(tf.size(span), 0)
        return tf.reduce_any([wordpiece_boundary, all_stopword, length])

    def sample(start, end):
        """Sample length from truncated Geo(p)."""
        # Sample from truncated geometric distribution.
        geometric = lambda k: (1 - p)**(k - 1) * p
        probs = np.array([geometric(k) for k in range(1, max_span_len + 1)])
        probs /= probs.sum()
        length = tf.distributions.Categorical(probs=probs).sample() + 1

        # Sample start uniformly.
        max_offset = tf.maximum(1, seq_len - length + 1)
        start = tf.random.uniform([], 0, max_offset, dtype=tf.int32)
        end = start + length

        # Return span.
        return [start, end]

    # Rejection sample. Start with dummy span variable.
    start = tf.constant(0)
    end = tf.constant(0)
    start, end = tf.while_loop(reject,
                               sample, [start, end],
                               maximum_iterations=max_iter)
    span = tf.strings.reduce_join(tokens[start:end], separator=" ")

    return span

Esempio n. 9

0

Mostra file

File: iterator_utils.py Progetto: josephch405/airdialogue_model

 def process_boundary(boundaries, input_length, t1_id, t2_id, all_dialogue):
     """process the boundaries of the dialogue."""
     points = tf.string_split([boundaries]).values
     points_val = tf.string_to_number(points, out_type=tf.int32)
     siz = tf.size(points_val) // 2
     start_points, end_points = points_val[0:siz], points_val[siz:]
     return do_process_boundary(start_points, end_points, input_length,
                                t1_id, t2_id, all_dialogue)

Esempio n. 10

0

Mostra file

File: iterator_utils.py Progetto: josephch405/airdialogue_model

def get_sub_items_self_play(data, kb):
    """process procedure for self play."""
    all_data = tf.string_split([data], sep="|", skip_empty=False).values
    # action is empty for self-play inference
    intent, pred_action, truth_action, utterance, boundary, reward_diag, reward_action = all_data[
        0], all_data[1], all_data[2], all_data[3], all_data[4], all_data[
            5], all_data[6]
    return intent, pred_action, truth_action, kb, utterance, boundary, reward_diag, reward_action

Esempio n. 11

0

Mostra file

File: regression_model.py Progetto: zhangjiahui56/deepplantphenomics

 def _deserialize_label(im, lab):
     lab = tf.cond(tf.equal(tf.rank(lab), 0),
                   lambda: tf.reshape(lab, [1]), lambda: lab)
     sparse_lab = tf.string_split(lab, sep=' ')
     lab_values = tf.strings.to_number(sparse_lab.values)
     lab = tf.reshape(lab_values,
                      [self._num_regression_outputs])
     return im, lab

Esempio n. 12

0

Mostra file

File: loaders.py Progetto: wujians122/deepplantphenomics

def label_string_to_tensor(x, batch_size, num_outputs=None):
    sparse = tf.string_split(x, delimiter=' ')
    values = tf.string_to_number(sparse.values)
    if num_outputs is None:
        dense = tf.reshape(values, [batch_size, -1])
    else:
        dense = tf.reshape(values, (batch_size, num_outputs))

    return dense

Esempio n. 13

0

Mostra file

File: metrics.py Progetto: syzymon/tensor2tensor

    def from_characters(raw, lookup_):
        """Convert ascii+2 encoded codes to string-tokens."""
        corrected = tf.bitcast(tf.clip_by_value(tf.subtract(raw, 2), 0, 255),
                               tf.uint8)

        gathered = tf.gather(lookup_, tf.cast(corrected, tf.int32))[:, :, 0]
        joined = tf.reduce_join(gathered, axis=1)
        cleaned = tf.regex_replace(joined, b"\0", b"")
        tokens = tf.string_split(cleaned, " ")
        return tokens

Esempio n. 14

0

Mostra file

File: iterator_utils.py Progetto: josephch405/airdialogue_model

def process_entry_self_play(intent, action, truth_action, kb, utterance,
                            boundary, reward_diag, reward_action, vocab_table):
    """Pro-proess procedure for the self-play iterator."""
    t1_id = tf.cast(vocab_table.lookup(tf.constant("<t1>")), tf.int32)
    t2_id = tf.cast(vocab_table.lookup(tf.constant("<t2>")), tf.int32)
    res = process_entry_common(intent, action, utterance, boundary, kb,
                               vocab_table, t1_id, t2_id)
    tensor_intent, size_intent, source_diag, target_diag, size_dialogue, tensor_action, size_action, tensor_kb, has_reservation, mask1, mask2, turn_point = res
    truth_action, _ = process_data(truth_action, vocab_table)
    splitted_reward_d = tf.string_split([reward_diag]).values
    splitted_reward_a = tf.string_split([reward_action]).values

    tensor_reward_diag = tf.string_to_number(
        splitted_reward_d, out_type=tf.float32,
        name=None)[:-1]  # remove the last dialogue ???
    tensor_reward_action = tf.string_to_number(splitted_reward_a,
                                               out_type=tf.float32,
                                               name=None)
    return tensor_intent, size_intent, source_diag, target_diag, size_dialogue, tensor_action, size_action, truth_action, tensor_reward_diag, tensor_reward_action, tensor_kb, has_reservation, mask1, mask2, turn_point

Esempio n. 15

0

Mostra file

File: main_point_cloud.py Progetto: tallamjr/google-research

 def _file_to_matrix(pts_path):
   """Read Nx3 point cloud from a .pts file."""
   file_buffer = tf.read_file(pts_path)
   lines = tf.string_split([file_buffer], delimiter='\n')
   values = tf.stack(tf.decode_csv(lines.values,
                                   record_defaults=[[0.0], [0.0], [0.0]],
                                   field_delim=' '))
   values = tf.transpose(values)  # 3xN --> Nx3.
   # The experiment code in
   # github.com/papagina/RotationContinuity/.../shapenet/code/train_pointnet.py
   # only used the first half of the points in each file.
   return values[:(tf.shape(values)[0] // 2), :]

Esempio n. 16

0

Mostra file

    def _mapper(dataset):
        """Tokenizes strings using tf.string_split and truncates by length."""
        for k in keys_to_map:
            # pylint: disable=g-explicit-length-test
            if len(dataset[k].get_shape()) == 0:  # Used for questions.
                # pylint: enable=g-explicit-length-test
                # <string> [num_tokens]
                tokens = tf.string_split([dataset[k]]).values
            else:  # Used for contexts.
                # <string> [num_context, num_tokens] (sparse)
                sparse_tokens = tf.string_split(dataset[k])

                # <string>[num_tokens, max_num_tokens] (dense)
                tokens = tf.sparse_tensor_to_dense(sparse_tokens,
                                                   default_value="")

            dataset[k + suffix] = tokens
            # Compute exact length of each context.
            dataset[k + suffix + "_len"] = tf.count_nonzero(tokens,
                                                            axis=-1,
                                                            dtype=tf.int32)
        return dataset

Esempio n. 17

0

Mostra file

    def parse_text(self, sentence, label=None):
        # Split sentence into words, and convert it into ids
        sentence_split = tf.string_split([sentence]).values
        if self.max_seq_len:  # Trim the sentence to max_seq_len
            sentence_split = sentence_split[:self.max_seq_len]
        src_seq_len = tf.size(sentence_split)
        sentence = self.src_vocab.lookup(sentence_split)

        if label is not None:
            label_split = tf.string_split([label]).values
        else:
            label_split = sentence_split[1:]
        if self.max_seq_len is not None:
            label_split = label_split[:self.max_seq_len]
        tgt_seq_len = tf.size(label_split)
        label = self.tgt_vocab.lookup(label_split)

        # Prepend and append SOS and EOS tokens to label
        #label = tf.concat([[self.tgt_sos_token], label, [self.tgt_eos_token]],
        #        0)

        return sentence, label, src_seq_len, tgt_seq_len

Esempio n. 18

0

Mostra file

File: text_utils.py Progetto: xiaoqiangzhang203/language

def build_text_inputs(
    text,
    length,
    lookup_table,
    segment_id=0,
    start_token=None,
    end_token=None,
):
    """Convert text to TextInputs.

  Args:
    text: <string>, space-separated token string.
    length: Length to pad or truncate to.
    lookup_table: Instance of contrib.lookup.index_table_from_tensor.
    segment_id: Integer denoting segment type.
    start_token: Optional start token.
    end_token: Optional end token.

  Returns:
    Instance of TextInputs.
  """
    # Tokenize and truncate.
    tokens = tf.string_split([text]).values
    length_offset = sum(
        [0 if i is None else 1 for i in [start_token, end_token]])
    tokens = tokens[:length - length_offset]
    if start_token is not None:
        tokens = tf.concat([[start_token], tokens], axis=0)
    if end_token is not None:
        tokens = tf.concat([tokens, [end_token]], axis=0)

    token_ids = tf.cast(lookup_table.lookup(tokens), tf.int32)
    mask = tf.ones_like(token_ids)
    segment_ids = tf.fill(tf.shape(token_ids), segment_id)

    pad = [[0, length - tf.size(token_ids)]]
    token_ids = tf.pad(token_ids, pad)
    mask = tf.pad(mask, pad)
    segment_ids = tf.pad(segment_ids, pad)
    positions = tf.range(length)
    text_input = TextInputs(token_ids=tf.ensure_shape(token_ids, [length]),
                            mask=tf.ensure_shape(mask, [length]),
                            segment_ids=tf.ensure_shape(segment_ids, [length]),
                            positions=tf.ensure_shape(positions, [length]))

    return text_input

Esempio n. 19

0

Mostra file

    def module_fn_with_preprocessing():
        """Spec function for a full-text embedding module with preprocessing."""
        sentences = tf.placeholder(shape=[None],
                                   dtype=tf.string,
                                   name="sentences")
        # Perform a minimalistic text preprocessing by removing punctuation and
        # splitting on spaces.
        normalized_sentences = tf.regex_replace(input=sentences,
                                                pattern=r"\pP",
                                                rewrite="")
        tokens = tf.string_split(normalized_sentences, " ")

        embeddings_var = tf.get_variable(initializer=tf.zeros(
            [vocab_size + num_oov_buckets, embeddings_dim]),
                                         name=EMBEDDINGS_VAR_NAME,
                                         dtype=tf.float32)
        table_initializer = tf.lookup.TextFileInitializer(
            vocabulary_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE,
            tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER)
        lookup_table = tf.lookup.StaticVocabularyTable(
            table_initializer, num_oov_buckets=num_oov_buckets)
        sparse_ids = tf.SparseTensor(indices=tokens.indices,
                                     values=lookup_table.lookup(tokens.values),
                                     dense_shape=tokens.dense_shape)

        # In case some of the input sentences are empty before or after
        # normalization, we will end up with empty rows. We do however want to
        # return embedding for every row, so we have to fill in the empty rows with
        # a default.
        sparse_ids, _ = tf.sparse_fill_empty_rows(
            sparse_ids, lookup_table.lookup(tf.constant("")))
        # In case all of the input sentences are empty before or after
        # normalization, we will end up with a SparseTensor with shape [?, 0]. After
        # filling in the empty rows we must ensure the shape is set properly to
        # [?, 1]. At this point, there are no empty rows, so the new shape will be
        # [sparse_ids.dense_shape[0], max(1, sparse_ids.dense_shape[1])].
        sparse_ids = tf.sparse_reset_shape(sparse_ids)

        combined_embedding = tf.nn.embedding_lookup_sparse(
            params=embeddings_var,
            sp_ids=sparse_ids,
            sp_weights=None,
            combiner="sqrtn")

        hub.add_signature("default", {"sentences": sentences},
                          {"default": combined_embedding})

Esempio n. 20

0

Mostra file

File: iterator_utils.py Progetto: shjwudp/training_results_v0.7

def get_infer_iterator(src_dataset,
                       src_vocab_table,
                       batch_size,
                       eos,
                       sos,
                       src_max_len=None):
    """Get dataset for inference."""
    # Totol number of examples in src_dataset
    # (3003 examples + 69 padding examples).
    src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(eos)), tf.int32)
    src_sos_id = tf.cast(src_vocab_table.lookup(tf.constant(sos)), tf.int32)
    src_dataset = src_dataset.map(lambda src: tf.string_split([src]).values)

    # Convert the word strings to ids
    src_dataset = src_dataset.map(
        lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32))

    # Add in the word counts.
    src_dataset = src_dataset.map(lambda src: (tf.concat(
        ([src_sos_id], src, [src_eos_id]), 0), 2 + tf.size(src)))

    def batching_func(x):
        return x.padded_batch(
            batch_size,
            # The entry is the source line rows;
            # this has unknown-length vectors.  The last entry is
            # the source row size; this is a scalar.
            padded_shapes=(
                tf.TensorShape([src_max_len]),  # src
                tf.TensorShape([])),  # src_len
            # Pad the source sequences with eos tokens.
            # (Though notice we don't generally need to do this since
            # later on we will be masking out calculations past the true sequence.
            padding_values=(
                src_eos_id,  # src
                0),
            drop_remainder=True)  # src_len -- unused

    batched_dataset = batching_func(src_dataset)
    batched_dataset = batched_dataset.map(
        lambda src_ids, src_seq_len: ({
            "source": src_ids,
            "source_sequence_length": src_seq_len
        }))
    return batched_dataset

Esempio n. 21

0

Mostra file

File: protein_dataset.py Progetto: iponamareva/proteinfer

def _map_sequence_to_ints(example, amino_acid_table):
    """Take amino acids in features as strings and replaces them with ints.

  Args:
    example: dictionary from string to tensor, containing key
      SEQUENCE_KEY.
    amino_acid_table: tf.contrib.lookup.index_table_from_tensor.

  Returns:
    dict from string to tensor, where the value at SEQUENCE_KEY is
    converted from a np.array of string labels to a np.array of ints.
  """
    seq = example[SEQUENCE_KEY]
    seq_char_by_char_sparse = tf.string_split([seq], delimiter='')
    seq_char_by_char = seq_char_by_char_sparse.values
    seq_indices = amino_acid_table.lookup(seq_char_by_char)
    example[SEQUENCE_KEY] = seq_indices
    return example

Esempio n. 22

0

Mostra file

File: utils.py Progetto: yeshwanthv5/google-research

def parse_single_tfexample(_, serialized_example):
    """Parsing serialized pb2 example."""
    # read data from serialized examples
    features = tf.parse_single_example(
        serialized_example,
        features={
            'x': tf.FixedLenFeature([], tf.string),
            'y': tf.FixedLenFeature([], tf.int64),
            # z is for sequence origins,
            # i.e. which genome and which position the seq is from
            # 'z': tf.VarLenFeature(tf.string)
        })
    seq_str = features['x']

    x_str = tf.string_split([seq_str], delimiter=' ').values
    features['x'] = tf.string_to_number(x_str, out_type=tf.int32)
    features['y'] = tf.cast(features['y'], dtype=tf.int32)

    return features

Esempio n. 23

0

Mostra file

File: model.py Progetto: samreider/fhir

def _dedup_tensor(sp_tensor: tf.SparseTensor) -> tf.SparseTensor:
  """Dedup values of a SparseTensor along each row.

  Args:
    sp_tensor: A 2D SparseTensor to be deduped.
  Returns:
    A deduped SparseTensor of shape [batch_size, max_len], where max_len is
    the maximum number of unique values for a row in the Tensor.
  """
  string_batch_index = tf.as_string(sp_tensor.indices[:, 0])

  # tf.unique only works on 1D tensors. To avoid deduping across examples,
  # prepend each feature value with the example index. This requires casting
  # to and from strings for non-string features.
  string_values = sp_tensor.values
  original_dtype = sp_tensor.values.dtype
  if original_dtype != tf.string:
    string_values = tf.as_string(sp_tensor.values)
  index_and_value = tf.strings.join([string_batch_index, string_values],
                                    separator='|')
  unique_index_and_value, _ = tf.unique(index_and_value)

  # split is a shape [tf.size(values), 2] tensor. The first column contains
  # indices and the second column contains the feature value (we assume no
  # feature contains | so we get exactly 2 values from the string split).
  split = tf.string_split(unique_index_and_value, delimiter='|')
  split = tf.reshape(split.values, [-1, 2])
  string_indices = split[:, 0]
  values = split[:, 1]

  indices = tf.reshape(
      tf.string_to_number(string_indices, out_type=tf.int32), [-1])
  if original_dtype != tf.string:
    values = tf.string_to_number(values, out_type=original_dtype)
  values = tf.reshape(values, [-1])
  # Convert example indices into SparseTensor indices, e.g.
  # [0, 0, 0, 1, 3, 3] -> [[0,0], [0,1], [0,2], [1,0], [3,0], [3,1]]
  batch_size = tf.to_int32(sp_tensor.dense_shape[0])
  new_indices, max_len = _example_index_to_sparse_index(indices, batch_size)
  return tf.SparseTensor(
      indices=tf.to_int64(new_indices),
      values=values,
      dense_shape=[tf.to_int64(batch_size), max_len])

Esempio n. 24

0

Mostra file

File: text_synthetic_qa.py Progetto: xiaoqiangzhang203/language

    def _maybe_actually_init(self):
        """Lazily create example converter."""
        if self._session is None:
            self._vocab = text_utils.Vocab.load(self._params["vocab_path"])
            self._graph = tf.Graph()
            with self._graph.as_default():
                # Placeholder for input lines of tokenized text.
                self._text = tf.placeholder(tf.string, [])

                # Truncate text.
                tokens = tf.string_split([self._text]).values
                length = self._params["max_length"] - self._params[
                    "query_length"] - 3
                tokens = tokens[:length]

                # Create full input together with empty question.
                question = ["[PAD]"] * self._params["query_length"]
                inputs = tf.concat(
                    [[self._vocab.CLS], question, [self._vocab.SEP], tokens,
                     [self._vocab.SEP]],
                    axis=0)

                # Convert to ids.
                lookup_table = self._vocab.get_string_lookup_table()
                input_ids = tf.cast(lookup_table.lookup(inputs), tf.int32)
                input_mask = tf.ones_like(input_ids)
                segment_ids = tf.concat([[0] *
                                         (self._params["query_length"] + 2),
                                         tf.fill(tf.shape(tokens), 1), [1]],
                                        axis=0)

                # Pad to final length.
                pad = [[0, self._params["max_length"] - tf.size(input_ids)]]
                input_ids = tf.pad(input_ids, pad)
                input_mask = tf.pad(input_mask, pad)
                segment_ids = tf.pad(segment_ids, pad)
                self._rc_inputs = RCInputs(input_ids, input_mask, segment_ids)

                # Initialize session.
                self._session = tf.Session()
                self._session.run(tf.initialize_all_tables())

Esempio n. 25

0

Mostra file

File: main_point_cloud.py Progetto: tallamjr/google-research

  def _file_to_matrix(pts_path):
    """Read Nx3 point cloud and 3x3 rotation matrix from a .pts file.

    The test data is a modified version of the original files. For each .pts
    file we have (1) added a 3x3 rotation matrix for testing, and (2) removed
    the second half of the point cloud since it is not used at all.

    Args:
      pts_path: path to a .pts file.

    Returns:
      A Nx3 point cloud.
      A 3x3 rotation matrix.
    """
    file_buffer = tf.read_file(pts_path)
    lines = tf.string_split([file_buffer], delimiter='\n')
    values = tf.stack(tf.decode_csv(lines.values,
                                    record_defaults=[[0.0], [0.0], [0.0]],
                                    field_delim=' '))
    values = tf.transpose(values)  # 3xN --> Nx3.
    # First three rows are the rotation matrix, remaining rows the point cloud.
    rot = values[:3, :]
    return values[4:, :], rot

Esempio n. 26

0

Mostra file

File: nq_long_dataset.py Progetto: yyht/language

def split_on_whitespace(str_tensor):
  return tf.string_split(tf.expand_dims(str_tensor, -1)).values

Esempio n. 27

0

Mostra file

File: data_loader.py Progetto: ziyouzizai111/google-research

def filter_random_lighting(sequence_dir):
    sequence_name = tf.string_split([sequence_dir], '/').values[-1]
    lighting = tf.substr(sequence_name, 0, 6)
    return tf.not_equal(lighting, 'random')

Esempio n. 28

0

Mostra file

File: iterator_utils.py Progetto: josephch405/airdialogue_model

def process_data(object_str, vocab_table):
    """prelinminary process of dialogue data."""
    separated = tf.string_split([object_str]).values
    indices = tf.cast(vocab_table.lookup(separated), tf.int32)
    return indices, tf.size(indices)

Esempio n. 29

0

Mostra file

def label_string_to_tensor(x, batch_size, num_outputs=-1):
    sparse = tf.string_split(x, sep=' ')
    values = tf.string_to_number(sparse.values)
    dense = tf.reshape(values, [batch_size, num_outputs])
    return dense

Esempio n. 30

0

Mostra file

File: model_input.py Progetto: MitchellTesla/google-research

def convert_string_neighbors(string_neighbors):
    split = tf.string_split(string_neighbors, "")
    string_dense = tf.sparse_tensor_to_dense(split, default_value="0")
    num = tf.string_to_number(string_dense, out_type=tf.int32)
    bool_neigh = tf.cast(num, tf.bool)
    return bool_neigh