Python PipeModeDataset.cache 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sagemaker_tensorflow

클래스/타입: PipeModeDataset

메소드/함수: cache

hotexamples.com에서의 예제들: 3

Python PipeModeDataset.cache - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sagemaker_tensorflow.PipeModeDataset.cache에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

repeat(23)

PipeModeDataset(22)

prefetch(22)

map(20)

batch(18)

make_one_shot_iterator(16)

shuffle(15)

apply(7)

cache(3)

as_numpy_iterator(2)

shard(2)

take(1)

예제 #1

파일 보기

파일: tf_bert_reviews.py 프로젝트: vijay-khanna/data-science-on-aws-workshop

def file_based_input_dataset_builder(channel,
                                     input_filenames,
                                     pipe_mode,
                                     is_training,
                                     drop_remainder,
                                     batch_size,
                                     epochs,
                                     steps_per_epoch,
                                     max_seq_length):

    # For training, we want a lot of parallel reading and shuffling.
    # For eval, we want no shuffling and parallel reading doesn't matter.

    if pipe_mode:
        print('***** Using pipe_mode with channel {}'.format(channel))
        from sagemaker_tensorflow import PipeModeDataset
        dataset = PipeModeDataset(channel=channel,
                                  record_format='TFRecord')
    else:
        print('***** Using input_filenames {}'.format(input_filenames))
        dataset = tf.data.TFRecordDataset(input_filenames)

    dataset = dataset.repeat(epochs * steps_per_epoch * 100)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    name_to_features = {
      "input_ids": tf.io.FixedLenFeature([max_seq_length], tf.int64),
      "input_mask": tf.io.FixedLenFeature([max_seq_length], tf.int64),
      "segment_ids": tf.io.FixedLenFeature([max_seq_length], tf.int64),
      "label_ids": tf.io.FixedLenFeature([], tf.int64),
    }

    def _decode_record(record, name_to_features):
        """Decodes a record to a TensorFlow example."""
        record = tf.io.parse_single_example(record, name_to_features)
        # TODO:  wip/bert/bert_attention_head_view/train.py
        # Convert input_ids into input_tokens with DistilBert vocabulary 
        #  if hook.get_collections()['all'].save_config.should_save_step(modes.EVAL, hook.mode_steps[modes.EVAL]):
        #    hook._write_raw_tensor_simple("input_tokens", input_tokens)
        return record
    
    dataset = dataset.apply(
        tf.data.experimental.map_and_batch(
          lambda record: _decode_record(record, name_to_features),
          batch_size=batch_size,
          drop_remainder=drop_remainder,
          num_parallel_calls=tf.data.experimental.AUTOTUNE))

    dataset.cache()

    if is_training:
        dataset = dataset.shuffle(seed=42,
                                  buffer_size=100,
                                  reshuffle_each_iteration=True)

    return dataset

예제 #2

파일 보기

파일: tf_bert_reviews_TT_PIPEMODE.py 프로젝트: vanessa920/aws-workshop

def file_based_input_dataset_builder(channel, input_filenames, pipe_mode,
                                     is_training, drop_remainder, batch_size,
                                     epochs, steps_per_epoch, max_seq_length):

    # For training, we want a lot of parallel reading and shuffling.
    # For eval, we want no shuffling and parallel reading doesn't matter.

    if pipe_mode:
        print('***** Using pipe_mode with channel {}'.format(channel))
        from sagemaker_tensorflow import PipeModeDataset
        dataset = PipeModeDataset(channel=channel, record_format='TFRecord')
    else:
        print('***** Using input_filenames {}'.format(input_filenames))
        dataset = tf.data.TFRecordDataset(input_filenames)

    dataset = dataset.repeat(epochs * steps_per_epoch)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    name_to_features = {
        "input_ids": tf.io.FixedLenFeature([max_seq_length], tf.int64),
        "input_mask": tf.io.FixedLenFeature([max_seq_length], tf.int64),
        "segment_ids": tf.io.FixedLenFeature([max_seq_length], tf.int64),
        "label_ids": tf.io.FixedLenFeature([], tf.int64),
        #      "is_real_example": tf.io.FixedLenFeature([], tf.int64),
    }

    def _decode_record(record, name_to_features):
        """Decodes a record to a TensorFlow example."""
        return tf.io.parse_single_example(record, name_to_features)

    dataset = dataset.apply(
        tf.data.experimental.map_and_batch(
            lambda record: _decode_record(record, name_to_features),
            batch_size=batch_size,
            drop_remainder=drop_remainder,
            num_parallel_calls=tf.data.experimental.AUTOTUNE))

    dataset.cache()

    if is_training:
        dataset = dataset.shuffle(seed=42,
                                  buffer_size=1000,
                                  reshuffle_each_iteration=True)

    return dataset

예제 #3

파일 보기

파일: image_segmentation_base.py 프로젝트: gonsoomoon-ml/image_segmentation

def _input(args, channel_name):
    try:
        mode_channel_name = channel_name + 'ing' if channel_name == 'train' else channel_name
        mode = args.data_config[mode_channel_name]['TrainingInputMode']
    except:
        mode = 'File'
    """Uses the tf.data input pipeline for dataset.
    Args:
        mode: Standard names for model modes (tf.estimators.ModeKeys).
        batch_size: The number of samples per batch of input requested.
    """
    filenames = get_filenames(args, channel_name)
    # Repeat infinitely.
    logging.info("Running {} in {} mode".format(channel_name, mode))
    if mode == 'Pipe':
        from sagemaker_tensorflow import PipeModeDataset
        dataset = PipeModeDataset(channel=channel_name,
                                  record_format='TFRecord')
    else:
        dataset = tf.data.TFRecordDataset(filenames)

    # Potentially shuffle records.
    if channel_name == 'train':
        # Ensure that the capacity is sufficiently large to provide good random
        # shuffling.
        dataset = dataset.map(_load_image_train,
                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
        buffer_size = int(args.train_num_examples * 0.4) + 3 * args.BATCH_SIZE

        dataset = dataset.cache().shuffle(buffer_size=buffer_size).batch(
            args.BATCH_SIZE).repeat()

    elif channel_name == 'test':
        dataset = dataset.map(_load_image_test)

        for image, mask in dataset.take(1):
            sample_image, sample_mask = image, mask

        _img_save('sample_image.jpg', sample_image)
        _img_save('sample_mask.png', sample_mask)

        dataset = dataset.batch(args.BATCH_SIZE)

    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset