Python as_numpy 예제들, tensorflow_datasets.as_numpy Python 예제들

예제 #1

0

파일 보기

파일: datasets.py 프로젝트: ndbastian/armory

def imagenet_adversarial(
    preprocessing_fn: Callable = None,
    dirpath: str = os.path.join(paths.DATASETS, "external", "imagenet_adv"),
) -> (np.ndarray, np.ndarray, np.ndarray):
    """
    ILSVRC12 adversarial image dataset for ResNet50

    ProjectedGradientDescent
        Iterations = 10
        Max pertibation epsilon = 8
        Attack step size = 2
        Targeted = True

    :param preprocessing_fn: Callable function to preprocess inputs
    :param dirpath: Directory where the dataset is stored
    :return: (Adversarial_images, Labels)
    """
    def _parse(serialized_example):
        ds_features = {
            "height": tf.io.FixedLenFeature([], tf.int64),
            "width": tf.io.FixedLenFeature([], tf.int64),
            "label": tf.io.FixedLenFeature([], tf.int64),
            "adv-image": tf.io.FixedLenFeature([], tf.string),
            "clean-image": tf.io.FixedLenFeature([], tf.string),
        }

        example = tf.io.parse_single_example(serialized_example, ds_features)

        clean_img = tf.io.decode_raw(example["clean-image"], tf.float32)
        clean_img = tf.reshape(clean_img,
                               (example["height"], example["width"], -1))

        adv_img = tf.io.decode_raw(example["adv-image"], tf.float32)
        adv_img = tf.reshape(adv_img,
                             (example["height"], example["width"], -1))

        label = tf.cast(example["label"], tf.int32)
        return clean_img, adv_img, label

    num_images = 1000
    filename = "ILSVRC12_ResNet50_PGD_adversarial_dataset_v0.1.tfrecords"
    output_filepath = os.path.join(dirpath, filename)

    os.makedirs(dirpath, exist_ok=True)
    download_file_from_s3(
        bucket_name="armory-public-data",
        key=f"imagenet-adv/{filename}",
        local_path=output_filepath,
    )

    adv_ds = tf.data.TFRecordDataset(filenames=[output_filepath])
    image_label_ds = adv_ds.map(lambda example_proto: _parse(example_proto))

    image_label_ds = image_label_ds.batch(num_images)
    image_label_ds = tf.data.experimental.get_single_element(image_label_ds)
    clean_x, adv_x, labels = tfds.as_numpy(image_label_ds)

    # Temporary flip from BGR to RGB since dataset was saved in BGR.
    clean_x = clean_x[..., ::-1]
    adv_x = adv_x[..., ::-1]

    # Preprocessing should always be done on RGB inputs
    if preprocessing_fn:
        clean_x = preprocessing_fn(clean_x)
        adv_x = preprocessing_fn(adv_x)

    return clean_x, adv_x, labels

예제 #2

0

파일 보기

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

tf.enable_v2_behavior()

from joblib import Parallel, delayed

ds_train, ds_info = tfds.load(
    'howell',
    split=['train'],
    shuffle_files=False,
    with_info=True,
)

ds_numpy = tfds.as_numpy(ds_train)
profile_features = []
labels = []
for ex in ds_numpy[0]:
  profile_features.append([ex['age'],ex['height'],ex['male']])
  labels.append(ex['weight'])


print("dataset size:",len(labels))

"""## Limited Data Experiments"""
print("begin experiment")
num_trials = 10
this_train_sizes = np.linspace(1/len(labels),1,len(labels))
results = [0 for i in range(len(this_train_sizes)*num_trials)]
results = Manager().list([0 for i in range(len(this_train_sizes)*num_trials)])

예제 #3

0

파일 보기

def cifar10_test(model, num_label=4000):

    # load data on the cpu
    with tf.device('/CPU:0'):

        # Load in training and test data
        X_train, y_train = tfds.as_numpy(
            tfds.load('cifar10',
                      split='train',
                      as_supervised=True,
                      batch_size=-1))  #cifar_10.load_cifar_10()
        X_test, y_test = tfds.as_numpy(
            tfds.load('cifar10',
                      split='test',
                      as_supervised=True,
                      batch_size=-1))

        # one-hot encode the outs
        y_train = np.eye(10)[y_train.reshape(-1)]
        # print('y_train sample:', y_train[0:10])
        y_test = np.eye(10)[y_test.reshape(-1)]
        # print('y_test sample:', y_test[0:10])

        # cast it all to floats for image augmentation, rescale to [0,1]
        X_train = X_train.astype('float32') / np.float(255.0)
        X_test = X_test.astype('float32') / np.float(255.0)

        # whiten the data or apply zca
        X_train = whiten_norm(X_train)
        X_test = whiten_norm(X_test)
        # X_train = whiten_norm(X_train)
        # X_test  = whiten_norm(X_test)

        # X_train, y_train, X_test, y_test = cifar_10.load_cifar_10()

        print('loaded cifar10', X_train.shape, X_test.shape)
        # Setup test set
        test = util.Data(X_test, y_test, None)

        # Split training test into labeled and unlabeled
        train = util.label_unlabel_split(X_train, y_train, num_label, 10)

        # Split training data into training and validation
        (train, valid) = util.train_test_valid_split(train.X,
                                                     train.y,
                                                     split=(0.9, 0.1),
                                                     U=train.U)

        # One-hot encode cifar_10.y_train and cifar_10.y_test?
        ## ^^ yes. Done.
        print('TR:', train.X.shape, train.y.shape, train.U.shape)
        print('v', valid.X.shape, valid.y.shape)

    # fit on the gpu
    with tf.device('/GPU:0'):

        # Train model using training and validation sets
        hist = model.fit(train, valid)

    print('evaluating on (subset) of test set...')
    with tf.device('/CPU:0'):
        # Test the model using test set
        y_pred = model.predict(test.X[0:1000])

        # if outputs are one-hot encoded, need to decode for correctness test
        # wrong = util.percent_wrong(y_pred, test.y)
        # acc = 1.0 - wrong
        acc = float(
            tf.reduce_mean(
                tf.keras.metrics.categorical_accuracy(test.y[0:1000], y_pred)))
        print(model.name, ' : acc:', acc)

    return model, {'hist': hist, 'acc': acc}

예제 #4

0

파일 보기

파일: dsLoad.py 프로젝트: AreYouRuben/MyFirstStandAlone-LM

import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
#Put on VSC terminal to activate virtualenv after
#   Set-ExecutionPolicy Unrestricted -Scope Process

#print(len(tf.config.list_physical_devices('GPU')))

###### Shows a grid with different examples
# (ds_train, ds_test), ds_info = tfds.load(
#     'mnist',
#     split=['train', 'test'],
#     shuffle_files=True,
#     as_supervised=True,
#     with_info=True,
# )
#print(ds_info)
#fig = tfds.show_examples(ds_train, ds_info)

###### This does the same as DatasetPLT.py with more code
dataset = tfds.load('mnist')
train, test = dataset['train'], dataset['test']
dsnp = np.vstack(tfds.as_numpy(test))

X_test = np.array(tuple(map(lambda x: x[0]['image'], dsnp)))
y_test = np.array(tuple(map(lambda x: x[0]['label'], dsnp)))
plt.imshow(X_test[1], cmap='gray')
plt.show()

예제 #5

0

파일 보기

def load(split,
         *,
         preprocess_mode,
         batch_dims,
         transpose=False,
         allow_caching=False):
    """Loads the given split of the dataset."""
    start, end = _shard(split, jax.host_id(), jax.host_count())

    total_batch_size = np.prod(batch_dims)

    tfds_split = tfds.core.ReadInstruction(_to_tfds_split(split),
                                           from_=start,
                                           to=end,
                                           unit='abs')
    ds = tfds.load('imagenet2012:5.*.*',
                   split=tfds_split,
                   decoders={'image': tfds.decode.SkipDecoding()})

    options = ds.options()
    options.experimental_threading.private_threadpool_size = 48
    options.experimental_threading.max_intra_op_parallelism = 1

    if preprocess_mode is not PreprocessMode.EVAL:
        options.experimental_deterministic = False
        if jax.host_count() > 1 and allow_caching:
            # Only cache if we are reading a subset of the dataset.
            ds = ds.cache()
        ds = ds.repeat()
        ds = ds.shuffle(buffer_size=10 * total_batch_size, seed=0)

    else:
        if split.num_examples % total_batch_size != 0:
            raise ValueError(
                f'Test/valid must be divisible by {total_batch_size}')

    def preprocess_pretrain(example):
        view1 = _preprocess_image(example['image'], mode=preprocess_mode)
        view2 = _preprocess_image(example['image'], mode=preprocess_mode)
        label = tf.cast(example['label'], tf.int32)
        return {'view1': view1, 'view2': view2, 'labels': label}

    def preprocess_linear_train(example):
        image = _preprocess_image(example['image'], mode=preprocess_mode)
        label = tf.cast(example['label'], tf.int32)
        return {'images': image, 'labels': label}

    def preprocess_eval(example):
        image = _preprocess_image(example['image'], mode=preprocess_mode)
        label = tf.cast(example['label'], tf.int32)
        return {'images': image, 'labels': label}

    if preprocess_mode is PreprocessMode.PRETRAIN:
        ds = ds.map(preprocess_pretrain,
                    num_parallel_calls=tf.data.experimental.AUTOTUNE)
    elif preprocess_mode is PreprocessMode.LINEAR_TRAIN:
        ds = ds.map(preprocess_linear_train,
                    num_parallel_calls=tf.data.experimental.AUTOTUNE)
    else:
        ds = ds.map(preprocess_eval,
                    num_parallel_calls=tf.data.experimental.AUTOTUNE)

    def transpose_fn(batch):
        # We use the double-transpose-trick to improve performance for TPUs. Note
        # that this (typically) requires a matching HWCN->NHWC transpose in your
        # model code. The compiler cannot make this optimization for us since our
        # data pipeline and model are compiled separately.
        batch = dict(**batch)
        if preprocess_mode is PreprocessMode.PRETRAIN:
            batch['view1'] = tf.transpose(batch['view1'], (1, 2, 3, 0))
            batch['view2'] = tf.transpose(batch['view2'], (1, 2, 3, 0))
        else:
            batch['images'] = tf.transpose(batch['images'], (1, 2, 3, 0))
        return batch

    for i, batch_size in enumerate(reversed(batch_dims)):
        ds = ds.batch(batch_size)
        if i == 0 and transpose:
            ds = ds.map(transpose_fn)  # NHWC -> HWCN

    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)

    yield from tfds.as_numpy(ds)

예제 #6

0

파일 보기

파일: dogPicGenerator.py 프로젝트: liamcr/dog-generator

    return net


def get_eval_metric_options_fn(gan_model):
    real_data_logits = tf.reduce_mean(gan_model.discriminator_real_outputs)
    gen_data_logits = tf.reduce_mean(gan_model.discriminator_gen_outputs)
    return {
        'real_data_logits': tf.metrics.mean(real_data_logits),
        'gen_data_logits': tf.metrics.mean(gen_data_logits)
    }


params = {'batch_size': 64, 'noise_dims': 64}
with tf.Graph().as_default():
    ds = input_fn(tf.estimator.ModeKeys.TRAIN, params)
    numpy_imgs = tfds.as_numpy(ds).__next__()[1]
img_grid = tfgan.eval.python_image_grid(numpy_imgs, grid_shape=(8, 8))
plt.axis('off')
plt.imshow(np.squeeze(img_grid))
plt.show()

train_batch_size = 64
noise_dimensions = 32
generator_lr = 0.0001
discriminator_lr = 0.00005


def gen_opt():
    gstep = tf.train.get_or_create_global_step()
    base_lr = generator_lr
    lr = tf.cond(gstep < 1000, lambda: base_lr, lambda: base_lr / 2.0)

예제 #7

0

파일 보기

파일: MyNet11_L2_Regularization.py 프로젝트: ResilientSpring/Python

from tensorflow.keras.losses import categorical_crossentropy, sparse_categorical_crossentropy
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report
import numpy as np
from tensorflow.keras.regularizers import l2

(ds_train, ds_test), ds_info = tfds.load(
    name='cifar10',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

label_train = []  # [1]
for image, label in tfds.as_numpy(ds_train):
    label_train.append(label)

label_test = []
for image, label in tfds.as_numpy(ds_test):
    label_test.append(label)

CLASS_NAMES = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
    'ship', 'truck'
]


# TFDS provide the images as tf.uint8, while the model expect tf.float32, so normalize images
def normalize_img(image, label):
    """Normalizes images: `uint8` -> `float32`."""

예제 #8

0

파일 보기

# Fetch the dataset directly
mnist = tfds.image.MNIST()
# or by string name
mnist = tfds.builder('mnist')

# Download the data, prepare it, and write it to disk
mnist.download_and_prepare()

# Load data from disk as tf.data.Datasets
datasets = mnist.as_dataset()
train_dataset, test_dataset = datasets['train'], datasets['test']

# convert the Dataset to NumPy arrays and flatten the data
Xtrain, ytrain = [], []
for example in tfds.as_numpy(train_dataset):
    Xtrain.append(example['image'].flatten())
    ytrain.append(example['label'])

Xtrain, ytrain = np.asarray(Xtrain), np.asarray(ytrain)
Xtrain = Xtrain.astype(float)

# set random seed and shuffle the data
np.random.seed(1)
idx = np.arange(len(ytrain))
np.random.shuffle(idx)
Xtrain, ytrain = Xtrain[idx, :], ytrain[idx]

Xtrain.shape, ytrain.shape

# convert the test set to NumPy arrays and flatten the data

예제 #9

0

파일 보기

파일: train.py 프로젝트: lucasb-eyer/flax

def train(
    model: nn.Model,
    learning_rate: float = None,
    num_epochs: int = None,
    seed: int = None,
    model_dir: Text = None,
    data_source: Any = None,
    batch_size: int = None,
    checkpoints_to_keep: int = None,
    l2_reg: float = None,
) -> Tuple[Dict[Text, Any], nn.Model]:
    """Training loop.

  Args:
    model: An initialized model to be trained.
    learning_rate: The learning rate.
    num_epochs: Train for this many epochs.
    seed: Seed for shuffling.
    model_dir: Directory to save best model.
    data_source: The data source with pre-processed data examples.
    batch_size: The batch size to use for training and validation data.
    l2_reg: L2 regularization weight.

  Returns:
    A dict with training statistics and the best model.
  """
    rng = jax.random.PRNGKey(seed)
    optimizer = flax.optim.Adam(learning_rate=learning_rate).create(model)
    stats = collections.defaultdict(list)
    best_score = 0.
    train_batches = input_pipeline.get_shuffled_batches(
        data_source.train_dataset, batch_size=batch_size, seed=seed)
    valid_batches = input_pipeline.get_batches(data_source.valid_dataset,
                                               batch_size=batch_size)

    for epoch in range(num_epochs):
        train_metrics = collections.defaultdict(float)

        # Train for one epoch.
        for ex in tfds.as_numpy(train_batches):
            inputs, lengths, labels = ex['sentence'], ex['length'], ex['label']
            optimizer, loss, rng = train_step(optimizer, inputs, lengths,
                                              labels, rng, l2_reg)
            train_metrics['loss'] += loss * inputs.shape[0]
            train_metrics['total'] += inputs.shape[0]

        # Evaluate on validation data. optimizer.target is the updated model.
        valid_metrics = evaluate(optimizer.target, valid_batches)
        log(stats, epoch, train_metrics, valid_metrics)

        # Save a checkpoint if this is the best model so far.
        if valid_metrics['acc'] > best_score:
            best_score = valid_metrics['acc']
            flax.training.checkpoints.save_checkpoint(model_dir,
                                                      optimizer.target,
                                                      epoch + 1,
                                                      keep=checkpoints_to_keep)

    # Done training. Restore best model.
    logging.info('Training done! Best validation accuracy: %.2f', best_score)
    best_model = flax.training.checkpoints.restore_checkpoint(model_dir, model)

    return stats, best_model

예제 #10

0

파일 보기

def _generator_from_tfds(
    dataset_name: str,
    split_type: str,
    batch_size: int,
    epochs: int,
    dataset_dir: str,
    preprocessing_fn: Callable,
    as_supervised: bool = True,
    supervised_xy_keys=None,
    download_and_prepare_kwargs=None,
    variable_length=False,
    shuffle_files=True,
    cache_dataset: bool = True,
    framework: str = "numpy",
    lambda_map: Callable = None,
) -> Union[ArmoryDataGenerator, tf.data.Dataset]:
    """
    If as_supervised=False, must designate keys as a tuple in supervised_xy_keys:
        supervised_xy_keys=('video', 'label')  # ucf101 dataset
    if variable_length=True and batch_size > 1:
        output batches are 1D np.arrays of objects
    lambda_map - if not None, mapping function to apply to dataset elements
    """
    if not dataset_dir:
        dataset_dir = paths.runtime_paths().dataset_dir

    if cache_dataset:
        _cache_dataset(
            dataset_dir,
            dataset_name=dataset_name,
        )

    if framework == "pytorch":
        logger.warning(
            "PyTorch Dataset loaders are experimental!! Support for multi-worker loading is still to come."
        )

        if not shuffle_files:
            raise ValueError(
                "Armory PyTorch DataLoaders use dareblopy which shuffles reads from TFRecord files by default"
            )

        ds_name, ds_version = dataset_name.split(":")
        dataset_map = _get_pytorch_dataset_map()
        if ds_name not in dataset_map.keys():
            raise NotImplementedError(
                f"PyTorch DataLoader for `{ds_name}` not yet available.")

        ds = dataset_map[ds_name](ds_name, ds_version, split_type, epochs)
        generator = torch.utils.data.DataLoader(ds,
                                                batch_size=batch_size,
                                                num_workers=0)
    else:
        default_graph = tf.compat.v1.keras.backend.get_session().graph

        ds, ds_info = tfds.load(
            dataset_name,
            split=split_type,
            as_supervised=as_supervised,
            data_dir=dataset_dir,
            with_info=True,
            download_and_prepare_kwargs=download_and_prepare_kwargs,
            shuffle_files=shuffle_files,
        )
        if not as_supervised:
            try:
                x_key, y_key = supervised_xy_keys
            except (TypeError, ValueError):
                raise ValueError(
                    f"When as_supervised=False, supervised_xy_keys must be a (x_key, y_key)"
                    f" tuple, not {supervised_xy_keys}")
            if not isinstance(x_key, str) or not isinstance(y_key, str):
                raise ValueError(f"supervised_xy_keys be a tuple of strings,"
                                 f" not {type(x_key), type(y_key)}")
            ds = ds.map(lambda x: (x[x_key], x[y_key]))
        if lambda_map is not None:
            ds = ds.map(lambda_map)

        ds = ds.repeat(epochs)
        if shuffle_files:
            ds = ds.shuffle(batch_size * 10, reshuffle_each_iteration=True)
        if variable_length and batch_size > 1:
            ds = ds.batch(1, drop_remainder=False)
        else:
            ds = ds.batch(batch_size, drop_remainder=False)
        ds = ds.prefetch(tf.data.experimental.AUTOTUNE)

        if framework == "numpy":
            ds = tfds.as_numpy(ds, graph=default_graph)
            generator = ArmoryDataGenerator(
                ds,
                size=ds_info.splits[split_type].num_examples,
                batch_size=batch_size,
                epochs=epochs,
                preprocessing_fn=preprocessing_fn,
                variable_length=bool(variable_length and batch_size > 1),
            )

        elif framework == "tf":
            generator = ds

        else:
            raise ValueError(
                f"`framework` must be one of ['tf', 'pytorch', 'numpy']. Found {framework}"
            )

    return generator

예제 #11

0

파일 보기

def tf_dataset(dataset_pars):
    """
        dataset_pars ={ "dataset_id" : "mnist", "batch_size" : 5000, "n_train": 500, "n_test": 500,
                            "out_path" : "dataset/vision/mnist2/" }
        tf_dataset(dataset_pars)


        https://www.tensorflow.org/datasets/api_docs/python/tfds
        import tensorflow_datasets as tfds
        import tensorflow as tf

        # Here we assume Eager mode is enabled (TF2), but tfds also works in Graph mode.
        print(tfds.list_builders())

        # Construct a tf.data.Dataset
        ds_train = tfds.load(name="mnist", split="train", shuffle_files=True)

        # Build your input pipeline
        ds_train = ds_train.shuffle(1000).batch(128).prefetch(10)
        for features in ds_train.take(1):
          image, label = features["image"], features["label"]


        NumPy Usage with tfds.as_numpy
        train_ds = tfds.load("mnist", split="train")
        train_ds = train_ds.shuffle(1024).batch(128).repeat(5).prefetch(10)

        for example in tfds.as_numpy(train_ds):
          numpy_images, numpy_labels = example["image"], example["label"]
        You can also use tfds.as_numpy in conjunction with batch_size=-1 to get the full dataset in NumPy arrays from the returned tf.Tensor object:

        train_ds = tfds.load("mnist", split=tfds.Split.TRAIN, batch_size=-1)
        numpy_ds = tfds.as_numpy(train_ds)
        numpy_images, numpy_labels = numpy_ds["image"], numpy_ds["label"]


        FeaturesDict({
    'identity_attack': tf.float32,
    'insult': tf.float32,
    'obscene': tf.float32,
    'severe_toxicity': tf.float32,
    'sexual_explicit': tf.float32,
    'text': Text(shape=(), dtype=tf.string),
    'threat': tf.float32,
    'toxicity': tf.float32,
})

    """
    import tensorflow_datasets as tfds

    d = dataset_pars
    dataset_id = d['dataset_id']
    batch_size = d.get('batch_size', -1)  # -1 neans all the dataset
    n_train = d.get("n_train", 500)
    n_test = d.get("n_test", 500)
    out_path = path_norm(d['out_path'])
    name = dataset_id.replace(".", "-")
    os.makedirs(out_path, exist_ok=True)

    train_ds = tfds.as_numpy(
        tfds.load(dataset_id,
                  split=f"train[0:{n_train}]",
                  batch_size=batch_size))
    test_ds = tfds.as_numpy(
        tfds.load(dataset_id, split=f"test[0:{n_test}]",
                  batch_size=batch_size))

    # test_ds  = tfds.as_numpy( tfds.load(dataset_id, split= f"test[0:{n_test}]", batch_size=batch_size) )

    print("train", train_ds.shape)
    print("test", test_ds.shape)

    def get_keys(x):
        if "image" in x.keys(): xkey = "image"
        if "text" in x.keys(): xkey = "text"
        return xkey

    for x in train_ds:
        #print(x)
        xkey = get_keys(x)
        np.savez_compressed(out_path + f"{name}_train",
                            X=x[xkey],
                            y=x.get('label'))

    for x in test_ds:
        #print(x)
        np.savez_compressed(out_path + f"{name}_test",
                            X=x[xkey],
                            y=x.get('label'))

    print(out_path, os.listdir(out_path))

예제 #12

0

파일 보기

파일: dataset.py 프로젝트: stjordanis/dm-haiku

def load(
    split: Split,
    *,
    is_training: bool,
    batch_dims: Sequence[int],
    dtype: jnp.dtype = jnp.float32,
    transpose: bool = False,
    zeros: bool = False,
) -> Generator[Batch, None, None]:
    """Loads the given split of the dataset."""
    if zeros:
        h, w, c = 224, 224, 3
        if transpose:
            image_dims = (*batch_dims[:-1], h, w, c, batch_dims[0])
        else:
            image_dims = (*batch_dims, h, w, c)
        batch = {
            'images': np.zeros(image_dims, dtype=dtype),
            'labels': np.zeros(batch_dims, dtype=np.uint32)
        }
        if is_training:
            yield from it.repeat(batch)
        else:
            num_batches = split.num_examples // np.prod(batch_dims)
            yield from it.repeat(batch, num_batches)

    if is_training:
        start, end = _shard(split, jax.host_id(), jax.host_count())
    else:
        start, end = _shard(split, 0, 1)
    tfds_split = tfds.core.ReadInstruction(_to_tfds_split(split),
                                           from_=start,
                                           to=end,
                                           unit='abs')
    ds = tfds.load('imagenet2012:5.*.*',
                   split=tfds_split,
                   decoders={'image': tfds.decode.SkipDecoding()})

    total_batch_size = np.prod(batch_dims)

    options = tf.data.Options()
    options.experimental_threading.private_threadpool_size = 48
    options.experimental_threading.max_intra_op_parallelism = 1
    options.experimental_optimization.map_parallelization = True
    if is_training:
        options.experimental_deterministic = False
    ds = ds.with_options(options)

    if is_training:
        if jax.host_count() > 1:
            # Only cache if we are reading a subset of the dataset.
            ds = ds.cache()
        ds = ds.repeat()
        ds = ds.shuffle(buffer_size=10 * total_batch_size, seed=0)

    else:
        if split.num_examples % total_batch_size != 0:
            raise ValueError(
                f'Test/valid must be divisible by {total_batch_size}')

    def preprocess(example):
        image = _preprocess_image(example['image'], is_training)
        label = tf.cast(example['label'], tf.int32)
        return {'images': image, 'labels': label}

    ds = ds.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    def transpose_fn(batch):
        # We use the "double transpose trick" to improve performance for TPUs. Note
        # that this (typically) requires a matching HWCN->NHWC transpose in your
        # model code. The compiler cannot make this optimization for us since our
        # data pipeline and model are compiled separately.
        batch = dict(**batch)
        batch['images'] = tf.transpose(batch['images'], (1, 2, 3, 0))
        return batch

    def cast_fn(batch):
        batch = dict(**batch)
        batch['images'] = tf.cast(batch['images'], tf.dtypes.as_dtype(dtype))
        return batch

    for i, batch_size in enumerate(reversed(batch_dims)):
        ds = ds.batch(batch_size)
        if i == 0:
            if transpose:
                ds = ds.map(transpose_fn)  # NHWC -> HWCN
            # NOTE: You may be tempted to move the casting earlier on in the pipeline,
            # but for bf16 some operations will end up silently placed on the TPU and
            # this causes stalls while TF and JAX battle for the accelerator.
            if dtype != jnp.float32:
                ds = ds.map(cast_fn)

    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
    yield from tfds.as_numpy(ds)

예제 #13

0

파일 보기

파일: hf_cola.py 프로젝트: silvia0v0/Label-Representation-in-Modeling-Classification-as-Seq2Seq

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# model = t5.models.HfPyTorchModel("t5-base", "/tmp/hft5/", device)
model = t5.models.HfPyTorchModel("t5-base", MODEL_DIR, device)

ds = tfds.load(
    "glue/cola",
    data_dir=DATA_DIR,
    # Download data locally for preprocessing to avoid using GCS space.
    download_and_prepare_kwargs={"download_dir": "./downloads"})

print("A few raw validation examples...")
for ex in tfds.as_numpy(ds["validation"].take(2)):
    print(ex)

possible_labels = [0, 1]


def randomString():
    stringLength = random.randint(1, 15)
    """Generate a random string of random length """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))


label_map = {}
label_set = set()

예제 #14

0

파일 보기

파일: rnn_machine_translation.py 프로젝트: jackyluo-learning/TensorFlow2

def evaluation(input_sentence, subword_encoder_en, subword_encoder_zh, model):
    encoded_input = subword_encoder_en.encode(input_sentence)
    encoded_input = tf.expand_dims(encoded_input, 0)  # add one more dimension meaning the batch_size
    output = model.predict(encoded_input)
    output = tf.squeeze(0, output)
    print(output.shape)
    decoded_output = subword_encoder_zh.decode(predition for predition in np.argmax(output, axis=1))
    print(decoded_output)


if __name__ == '__main__':
    builder = fetch_data(download_dir, builder_name, config)
    dataset = builder.as_dataset()
    train_data = dataset["train"]
    test_data = dataset["test"]
    train_sentences = tfds.as_numpy(train_data)
    test_sentences = tfds.as_numpy(test_data)
    en_sentences = []
    zh_sentences = []
    # for ex in train_sentences:
    #     if index > 3:
    #         break
    #     index += 1
    #     print("en train sentence {}: {}".format(index, str(ex["en"], encoding="utf-8")))
    #     print("zh train sentence {}: {}".format(index, str(ex["zh"], encoding="utf-8")))
    # index = 0
    # for ex in test_sentences:
    #     if index > 3:
    #         break
    #     index += 1
    #     print("en test sentence {}: {}".format(index, str(ex["en"], encoding="utf-8")))

예제 #15

0

파일 보기

파일: alexnet_gray.py 프로젝트: Priya15073/Plant-Leaf-Disease-Detection-and-Classification

image_size = 0

# ds, info = tfds.load("plant_village", split="train[:80%]",shuffle_files=True,
#     as_supervised=True, with_info=True)
# ds_test,info= tfds.load("plant_village", split="train[-20%:]",shuffle_files=True,
#     as_supervised=True, with_info=True)

x_train=[]
y_train=[]
x_test=[]
y_test=[]

num_classes = 38

ds = tfds.load("plant_village", split=tfds.Split.TRAIN, batch_size=-1)
ds = tfds.as_numpy(ds)

images, labels = ds["image"], ds["label"]
images_new=[]
for i in range(20000):
  img=images[i]
  img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
  img=cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
  images_new.append(img)

# ds = tfds.load("plant_village", split='train', batch_size=-1,as_supervised=True)
# ds = tfds.as_numpy(ds)
# images, labels = ds["image"], ds["label"]
images_new=np.array(images_new)
x_train, x_test, y_train, y_test = train_test_split( images_new[:20000], labels[:20000], test_size=0.2, random_state=42 )
print(x_train.shape, x_test.shape)

예제 #16

0

파일 보기

파일: resnet50.py 프로젝트: harryherold/application_measurements

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.applications import resnet50

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('batchsize', type=int)
    parser.add_argument('batchcount', type=int)
    args = parser.parse_args()

    ds_all, info = tfds.load('imagenet_resized/32x32',
                             with_info=True,
                             split="train")

    classes = info.features["label"].num_classes
    shape = info.features['image'].shape

    model = resnet50.ResNet50(weights=None, input_shape=shape, classes=classes)

    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.Adam(),
        metrics=['accuracy'])

    for batch in tfds.as_numpy(
            ds_all.take(args.batchsize * args.batchcount).batch(
                args.batchsize)):
        np_image, np_label = batch["image"], batch["label"]
        model.fit(np_image, np_label, epochs=1, verbose=0)

예제 #17

0

파일 보기

    datetime.now().strftime('%d-%m-%Y-%H:%M:%S'))
DATA_SIZE = 100


def scheduler(epoch):
    lr = float(LEARNING_RATE * tf.math.exp(-(epoch - 1) * LEARNING_RATE_DECAY))
    print(lr)
    return lr


print('loading dataset...')

#Load training data
print('loading training data...')
train_ds = tfds.load(name="svhn_cropped", split=tfds.Split.TRAIN)
train_list = list(tfds.as_numpy(train_ds))

#Load test data
print('loading test data...')
test_ds = tfds.load(name="svhn_cropped", split=tfds.Split.TEST)
test_list = list(tfds.as_numpy(test_ds))

#x_train is the data for training the dataset
#y_train is the set of labels to all the data in x_train
x_train = list()
y_train = list()

#x_test is de data for testing the dataset
#y_test is the set of labels to all the data in x_test
x_test = list()
y_test = list()

예제 #18

0

파일 보기

def load_celeba_data_classifier(batch_size):
    #lines = [line.rstrip() for line in open('C:/Users/marku/Desktop/list_attr_celeba.txt', 'r')]
    lines = [line.rstrip() for line in open('/user/student.aau.dk/mjuuln15/list_attr_celeba.txt', 'r')]
    all_attr_names = lines[1].split()
    attr2idx = {}
    idx2attr = {}
    mask = []
    dataset = []
    labels = []

    for i, attr_name in enumerate(all_attr_names):
        attr2idx[attr_name] = i
        idx2attr[i] = attr_name
    lines = lines[2:]
    for i, line in enumerate(lines):
        split = line.split()
        values = split[1:]

        temp_label = []
        has_attribute = False
        for attr_name in ['Arched_Eyebrows', 'Attractive', 'Heavy_Makeup', 'High_Cheekbones', 'Male',
                          'Mouth_Slightly_Open', 'No_Beard', 'Oval_Face', 'Pointy_Nose', 'Smiling', 'Wavy_Hair',
                          'Wearing_Lipstick', 'Young']:
            idx = attr2idx[attr_name]
            if not has_attribute:
                has_attribute = (values[idx] == '1')
            temp_label.append(int(values[attr2idx[attr_name]]))

        if has_attribute:
            labels.append(temp_label)
        mask.append(has_attribute)

    images = glob.glob('C:/Users/marku/Desktop/img_align_celeba/*.jpg')
    #images = glob.glob('/user/student.aau.dk/mjuuln15/img_align_celeba/*.jpg')
    for i in images:
        image = plt.imread(i)
        dataset.append(image)

    mask = np.array(mask)
    dataset = np.array(dataset)
    X1 = dataset[mask]

    X1 = X1[:200000]
    labels = labels[:200000]

    X1 = X1[8000:]
    X2 = X1[:8000]
    L1 = np.asarray(labels[8000:])
    L2 = np.asarray(labels[:8000])
    L1[L1 == -1] = 0
    L2[L2 == -1] = 0
    X1_num_examples = len(X1)
    X2_num_examples = len(X2)

    X1 = tf.data.Dataset.from_tensor_slices(X1)
    X2 = tf.data.Dataset.from_tensor_slices(tf.convert_to_tensor(X2))

    X1 = X1.map(format_example_to128)
    X2 = X2.map(format_example_to128)

    X1 = tfds.as_numpy(X1)
    X2 = tfds.as_numpy(X2)

    test=[]
    for i in X1:
        test.extend(i)
    test = np.asarray(test)
    X1 = np.reshape(test,(X1_num_examples,128,128,3))

    test=[]
    for i in X2:
        test.extend(i)
    test = np.asarray(test)
    X2 = np.reshape(test,(X2_num_examples,128,128,3))

    X1 = [X1, L1]
    X2 = [X2, L2]

    return X1, X2

예제 #19

0

파일 보기

파일: eval_utils.py 프로젝트: lessleslie/text-to-text-transfer-transformer

def run_eval(mixture_or_task_name: str,
             predict_or_score_fn: PredictOrScoreFnCallable,
             checkpoint_steps: Iterable[int],
             dataset_fn: Optional[Callable[
                 [t5.data.Task, Mapping[str, int], int, str, Optional[bool]],
                 tf.data.Dataset]] = None,
             summary_dir: Optional[str] = None,
             split: Optional[str] = "validation",
             sequence_length: Optional[Mapping[str, int]] = None,
             batch_size: Optional[int] = None):
    """Run evaluation on the given mixture or task.

  Args:
    mixture_or_task_name: str, the name of the Mixture or Task to evaluate
      on. Must be pre-registered in the global `TaskRegistry` or
      `MixtureRegistry.`
    predict_or_score_fn: function, This function takes in the sequence length,
      checkpoint step, tasks to evaluate, an eval_dataset_fn, a dict mapping
      task names to cached examples, a dict mapping task names to datasets,
      and returns a list of outputs or a list of scores.
    checkpoint_steps: an iterator with integers for checkpoint steps to
      evaluate on.
    dataset_fn: function, This function takes a task and returns the dataset
      associated with it. If None, the default mesh_eval_dataset_fn is used.
    summary_dir: str, path to write TensorBoard events file summaries for
      eval. If None, use model_dir/eval_{split}.
    split: str, the mixture/task split to evaluate on.
    sequence_length: an integer or a dict from feature-key to integer
      the sequence length to pad or truncate to,
      e.g. {"inputs": 512, "targets": 128}.
      If None, sequence length is automatically computed during eval.
    batch_size: integer, used only to check that expected padding matches the
      targets. If None, the check is skipped.
  """

    vocabulary = model_utils.get_vocabulary(mixture_or_task_name)

    tasks = t5.data.get_subtasks(
        t5.data.get_mixture_or_task(mixture_or_task_name))
    tasks = seqio.evaluation.get_valid_eval_tasks(tasks, split)

    if not tasks:
        logging.info(
            "All provided tasks have metric_fns=[] or no matching splits; "
            "eval is not possible.")
        return

    if not dataset_fn:

        def _get_task_eval_dataset(task, sequence_length, split):
            # TODO(sharannarang): Replace with more general function.
            eval_datasets = mesh_transformer.mesh_eval_dataset_fn(
                sequence_length=sequence_length,
                dataset_split=split,
                mixture_or_task_name=task.name,
            )

            return eval_datasets[0].dataset_fn()

        dataset_fn = _get_task_eval_dataset

    summary_writer = None

    cached_targets, cached_datasets, max_sequence_length = \
        seqio.evaluation.get_targets_and_examples(
            tasks=tasks,
            dataset_fn=functools.partial(
                dataset_fn, split=split, sequence_length=None))

    if summary_dir:
        model_utils.write_targets_and_examples(summary_dir, cached_targets,
                                               cached_datasets)

    if sequence_length is None:
        logging.info("Setting sequence lengths to %s", max_sequence_length)
        sequence_length = max_sequence_length
    elif (sequence_length["inputs"] < max_sequence_length["inputs"]
          or sequence_length["targets"] < max_sequence_length["targets"]):
        logging.warning(
            "Given sequence lengths are insufficient for some evaluation inputs "
            "or targets. These sequences will be truncated to fit, likely "
            "leading to sub-optimal results. Consider passing `None` for "
            "sequence_length to have them be automatically computed.\n Got: %s, "
            "\n Max Lengths:%s", sequence_length, max_sequence_length)
    elif (sequence_length["inputs"] > max_sequence_length["inputs"]
          or sequence_length["targets"] > max_sequence_length["targets"]):
        logging.warning(
            "Given sequence lengths are longer than necessary for some "
            "evaluation inputs or targets, resulting in wasted computation. "
            "Consider passing `None` for sequence_length to have them be "
            "automatically computed.\n Got: %s,\n Max Lengths: %s",
            sequence_length, max_sequence_length)

    for step in checkpoint_steps:
        logging.info("Evaluating checkpoint step: %d", step)
        outputs = predict_or_score_fn(checkpoint_step=step,
                                      vocabulary=vocabulary,
                                      tasks=tasks,
                                      datasets=cached_datasets,
                                      sequence_length=sequence_length)

        for task in tasks:
            # Extract the portion of decodes corresponding to this dataset
            dataset = cached_datasets[task.name]
            dataset_size = len(cached_targets[task.name])
            predictions = [
                task.postprocess_fn(d, example=ex) for d, ex in zip(
                    outputs[:dataset_size], tfds.as_numpy(dataset))
            ]

            # Remove the used decodes.
            del outputs[:dataset_size]

            if summary_dir:
                predictions_filename = os.path.join(
                    summary_dir, "{}_{}_predictions".format(task.name, step))
                model_utils.write_lines_to_file(predictions,
                                                predictions_filename)

            with tf.Graph().as_default():
                if summary_dir:
                    summary_writer = summary_writer or tf.summary.FileWriter(
                        summary_dir)

                for metric_fn in task.metric_fns:
                    if summary_dir:
                        summary = tf.Summary()
                    targets = cached_targets[task.name]
                    metric_result = metric_fn(targets, predictions)
                    for metric_name, metric_value in metric_result.items():
                        tag = "eval/{}/{}".format(task.name, metric_name)
                        logging.info("%s at step %d: %.3f", tag, step,
                                     metric_value)
                        if summary_dir:
                            summary.value.add(tag=tag,
                                              simple_value=metric_value)
                            summary_writer.add_summary(summary, step)  # pytype: disable=attribute-error
                if summary_dir:
                    summary_writer.flush()  # pytype: disable=attribute-error

        # Only padding should remain.
        if batch_size:
            expected_pad = -sum(len(t)
                                for t in cached_targets.values()) % batch_size
            if outputs and len(outputs) != expected_pad:
                raise ValueError("{} padded outputs, {} expected.".format(
                    len(outputs), expected_pad))

예제 #20

0

파일 보기

파일: evaluate.py 프로젝트: DavidWAbrahams/pet_segmentation

    #input_image = preprocess_input(input_image)

    return input_image, input_mask


test_dataset = test_ds.map(load_image_test,
                           num_parallel_calls=AUTOTUNE).batch(args.batch_size)

###############################################################################
# Load the best model snapshot and evaluate the quality
###############################################################################
model = load_model(args.model_path, compile=False)
model.compile(optimizer='adam', loss=combined_loss, metrics=['accuracy'])

print('Final test set evaluation:')
test_loss, test_accuracy = model.evaluate(tfds.as_numpy(test_dataset),
                                          verbose=0,
                                          steps=2)
print('Test loss: {:.4f}. Test Accuracy: {:.4f}'.format(
    test_loss, test_accuracy))

print('Displaying some example predictions from the test set')


def display(display_list):
    plt.figure(figsize=(15, 15))

    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i + 1)

예제 #21

0

파일 보기

파일: tfrecords_reader_test.py 프로젝트: oliver4701/TensorFlow-documentation

 def test_overlap(self):
     self._write_tfrecord('train', 5, 'abcdefghijkl')
     ds = self.reader.read('mnist', 'train+train[:2]', self.SPLIT_INFOS)
     read_data = list(tfds.as_numpy(ds))
     self.assertEqual(read_data, [six.b(l) for l in 'abcdefghijklab'])

예제 #22

0

파일 보기

import tensorflow_datasets as tfds

# Create google cloud storage to save the tensorflow datasets.
STORAGE_BUCKET = 'gs://CLOUD_STORAGE_BUCKET'
data_dir = f'{STORAGE_BUCKET}/data'

# Make sure that you put ILSVRC2012_img_train.tar and ILSVRC2012_img_val.tar
# into the cache_dir.
cache_dir = 'IMAGENET_TAR_FILE_DIR/'

ds = tfds.load("imagenet2012:5.0.0",
               split="train",
               data_dir=data_dir,
               download_and_prepare_kwargs={
                   'download_kwargs':
                   tfds.download.DownloadConfig(manual_dir=cache_dir)
               })
tfds.as_numpy(ds)

예제 #23

0

파일 보기

 def _materialize(task):
   list(tfds.as_numpy(TaskRegistry.get_dataset(
       task, {"inputs": 13, "targets": 13},
       "train", use_cached=False)))

예제 #24

0

파일 보기

import numpy as np
import pandas as pd
import tensorflow_datasets as tfds

if __name__ == '__main__':
    train_ds = tfds.load('ag_news_subset', split='train', shuffle_files=True)
    test_ds = tfds.load('ag_news_subset', split='test', shuffle_files=True)
    texts, labels = [], []
    for ds in (train_ds, test_ds):
        for example in tfds.as_numpy(ds):
            text, label = example['description'], example['label']
            texts.append(text.decode("utf-8"))
            labels.append(label)
    labels = np.array(labels)

    save = pd.DataFrame()
    save['texts'] = texts
    save['labels'] = labels

    save.to_csv('ag_news.csv', index=False)

예제 #25

0

파일 보기

파일: utils.py 프로젝트: rogervaas/text-to-text-transfer-transformer

def _log_mixing_proportions(tasks, datasets, rates, mixed_dataset,
                            sequence_length, compute_stats_empirically):
    """Log information about the mixing proportions.

  Called from Mixture.get_dataset.

  Args:
    tasks: a list of Task
    datasets: a list of tf.data.Dataset
    rates: a list of floats
    mixed_dataset: a tf.data.Dataset
    sequence_length: dict from string to int (packed lengths)
    compute_stats_empirically: a boolean - does not work on TPU
  """
    def _normalize(l):
        denom = sum(l)
        return [x / denom for x in l]

    # compute some stats about the mixture
    examples_fraction = _normalize(rates)
    if compute_stats_empirically:
        stats_examples = 100
        mean_inputs_length = []
        mean_targets_length = []
        for dataset in datasets:
            inputs_sum = 0
            targets_sum = 0
            for ex in tfds.as_numpy(dataset.take(stats_examples)):
                inputs_sum += ex["inputs"].size
                targets_sum += ex["targets"].size
            mean_inputs_length.append(inputs_sum / float(stats_examples))
            mean_targets_length.append(targets_sum / float(stats_examples))
    else:

        def _estimated_mean_length(task, key):
            if task.token_preprocessor:
                return sequence_length[key]
            else:
                return min(sequence_length[key],
                           (task.get_cached_stats("train")[key + "_tokens"] /
                            task.get_cached_stats("train")["examples"]))

        mean_inputs_length = [
            _estimated_mean_length(task, "inputs") for task in tasks
        ]
        mean_targets_length = [
            _estimated_mean_length(task, "targets") for task in tasks
        ]
    inputs_fraction = _normalize(
        [l * r for l, r in zip(mean_inputs_length, rates)])
    targets_fraction = _normalize(
        [l * r for l, r in zip(mean_targets_length, rates)])
    logging.info("%12s %12s %12s %12s %12s %12s %s", "rate", "ex.frac.",
                 "inp.frac.", "tgt.frac.", "inp.len.", "tgt.len", "task")
    for i in range(len(rates)):
        logging.info("%12g %12g %12g %12g %12g %12g %s", rates[i],
                     examples_fraction[i], inputs_fraction[i],
                     targets_fraction[i], mean_inputs_length[i],
                     mean_targets_length[i], tasks[i].name)
    if compute_stats_empirically:
        _log_padding_fractions(mixed_dataset, sequence_length)

예제 #26

0

파일 보기

파일: imagenet_tfds.py 프로젝트: nealmcb/pyprobml

    plt.savefig(os.path.join(figdir, fname))


import tensorflow as tf
from tensorflow import keras

import tensorflow_datasets as tfds

#tf.enable_eager_execution()

# See all registered datasets
tfds.list_builders()

# Load a given dataset by name, along with the DatasetInfo
data, info = tfds.load("mnist", with_info=True)
train_data, test_data = data['train'], data['test']
assert isinstance(train_data, tf.data.Dataset)
assert info.features['label'].num_classes == 10
assert info.splits['train'].num_examples == 60000

# You can also access a builder directly
builder = tfds.builder("mnist")
assert builder.info.splits['train'].num_examples == 60000
builder.download_and_prepare()
datasets = builder.as_dataset()

# If you need NumPy arrays
np_datasets = tfds.as_numpy(datasets)

#data, info = tfds.load("Imagenet2012", with_info=True)
# tfds.image.imagenet.Imagenet2012

예제 #27

0

파일 보기

    print("args:", args)

    sc = SparkContext(conf=SparkConf().setAppName("mnist_data_setup"))

    classpath = os.environ['CLASSPATH']
    hadoop_path = os.path.join(os.environ['HADOOP_PREFIX'], 'bin', 'hadoop')
    hadoop_classpath = subprocess.check_output(
        [hadoop_path, 'classpath', '--glob']).decode()
    os.environ['CLASSPATH'] = classpath + os.pathsep + hadoop_classpath
    mnist, info = tfds.load('mnist',
                            with_info=True,
                            data_dir='hdfs:///hadoop/tfds_datasets')
    print(info.as_json)

    # convert to numpy, then RDDs
    mnist_train = tfds.as_numpy(mnist['train'])
    mnist_test = tfds.as_numpy(mnist['test'])

    train_rdd = sc.parallelize(mnist_train, args.num_partitions).cache()
    test_rdd = sc.parallelize(mnist_test, args.num_partitions).cache()

    # save as CSV (label,comma-separated-features)
    def to_csv(example):
        return str(example['label']) + ',' + ','.join(
            [str(i) for i in example['image'].reshape(784)])

    train_rdd.map(to_csv).saveAsTextFile(args.output + "/csv/train")
    test_rdd.map(to_csv).saveAsTextFile(args.output + "/csv/test")

    # save as TFRecords (numpy vs. PNG)
    # note: the MNIST tensorflow_dataset is already provided as TFRecords but with a PNG bytes_list

예제 #28

0

파일 보기

파일: run_bc_jax.py 프로젝트: pchtsp/acme

def main(_):
    # Create an environment and grab the spec.
    raw_environment = bsuite.load_and_record_to_csv(
        bsuite_id=FLAGS.bsuite_id,
        results_dir=FLAGS.results_dir,
        overwrite=FLAGS.overwrite,
    )
    environment = single_precision.SinglePrecisionWrapper(raw_environment)
    environment_spec = specs.make_environment_spec(environment)

    # Build demonstration dataset.
    if hasattr(raw_environment, 'raw_env'):
        raw_environment = raw_environment.raw_env

    batch_dataset = bsuite_demonstrations.make_dataset(raw_environment)
    # Combine with demonstration dataset.
    transition = functools.partial(_n_step_transition_from_episode,
                                   n_step=1,
                                   additional_discount=1.)

    dataset = batch_dataset.map(transition)

    # Batch and prefetch.
    dataset = dataset.batch(FLAGS.batch_size, drop_remainder=True)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    dataset = tfds.as_numpy(dataset)

    # Create the networks to optimize.
    policy_network = make_policy_network(environment_spec.actions)
    policy_network = hk.without_apply_rng(hk.transform(policy_network))

    # If the agent is non-autoregressive use epsilon=0 which will be a greedy
    # policy.
    def evaluator_network(params: hk.Params, key: jnp.DeviceArray,
                          observation: jnp.DeviceArray) -> jnp.DeviceArray:
        action_values = policy_network.apply(params, observation)
        return rlax.epsilon_greedy(FLAGS.epsilon).sample(key, action_values)

    counter = counting.Counter()
    learner_counter = counting.Counter(counter, prefix='learner')

    # The learner updates the parameters (and initializes them).
    learner = learning.BCLearner(network=policy_network,
                                 optimizer=optax.adam(FLAGS.learning_rate),
                                 obs_spec=environment.observation_spec(),
                                 dataset=dataset,
                                 counter=learner_counter,
                                 rng=hk.PRNGSequence(FLAGS.seed))

    # Create the actor which defines how we take actions.
    variable_client = variable_utils.VariableClient(learner, '')
    evaluator = actors.FeedForwardActor(evaluator_network,
                                        variable_client=variable_client,
                                        rng=hk.PRNGSequence(FLAGS.seed))

    eval_loop = acme.EnvironmentLoop(environment=environment,
                                     actor=evaluator,
                                     counter=counter,
                                     logger=loggers.TerminalLogger(
                                         'evaluation', time_delta=1.))

    # Run the environment loop.
    while True:
        for _ in range(FLAGS.evaluate_every):
            learner.step()
        learner_counter.increment(learner_steps=FLAGS.evaluate_every)
        eval_loop.run(FLAGS.evaluation_episodes)

예제 #29

0

파일 보기

파일: test_utils.py 프로젝트: melchingrichie/text-to-text-transfer-transformer

def dataset_as_text(ds):
    for ex in tfds.as_numpy(ds):
        yield {k: _maybe_as_text(v) for k, v in ex.items()}

예제 #30

0

파일 보기

 def train_on_epoch(self, dataset):
     for i, batch in enumerate(tfds.as_numpy(dataset)):
         np_x = batch['image']
         self.sess.run(self.train_op, {self.x: np_x})