Exemple #1
0
def pretrain(batch_size,
             epochs,
             model_dir=tempfile.mkdtemp(),
             train_steps_per_epoch=None,
             eval_steps_per_epoch=None):
    train_data, eval_data = cifair100.load_data()
    pipeline = fe.Pipeline(
        train_data=train_data,
        eval_data=eval_data,
        batch_size=batch_size,
        ops=[
            Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)),
            PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"),
            RandomCrop(32, 32, image_in="x", image_out="x", mode="train"),
            Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")),
            CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1)
        ])
    backbone, vit = fe.build(
        model_fn=lambda: vision_transformer(
            num_class=100, image_size=(32, 32, 3), patch_size=4, num_layers=6, em_dim=256, num_heads=8, dff=512),
        optimizer_fn=[None, lambda: tf.optimizers.SGD(0.01, momentum=0.9)])
    network = fe.Network(ops=[
        ModelOp(model=vit, inputs="x", outputs="y_pred"),
        CrossEntropy(inputs=("y_pred", "y"), outputs="ce", from_logits=True),
        UpdateOp(model=vit, loss_name="ce")
    ])
    traces = [
        Accuracy(true_key="y", pred_key="y_pred"),
        BestModelSaver(model=backbone, save_dir=model_dir, metric="accuracy", save_best_mode="max")
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces,
                             train_steps_per_epoch=train_steps_per_epoch,
                             eval_steps_per_epoch=eval_steps_per_epoch)
    estimator.fit(warmup=False)
    return traces[1].model_path  # return the weights path
Exemple #2
0
def get_estimator(epochs=50,
                  batch_size=256,
                  max_train_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp()):
    train_data, _ = mnist.load_data()
    pipeline = fe.Pipeline(
        train_data=train_data,
        batch_size=batch_size,
        ops=[
            ExpandDims(inputs="x", outputs="x"),
            Normalize(inputs="x",
                      outputs="x",
                      mean=1.0,
                      std=1.0,
                      max_pixel_value=127.5),
            LambdaOp(fn=lambda: np.random.normal(size=[100]).astype('float32'),
                     outputs="z")
        ])
    gen_model = fe.build(model_fn=generator,
                         optimizer_fn=lambda: tf.optimizers.Adam(1e-4))
    disc_model = fe.build(model_fn=discriminator,
                          optimizer_fn=lambda: tf.optimizers.Adam(1e-4))
    network = fe.Network(ops=[
        ModelOp(model=gen_model, inputs="z", outputs="x_fake"),
        ModelOp(model=disc_model, inputs="x_fake", outputs="fake_score"),
        GLoss(inputs="fake_score", outputs="gloss"),
        UpdateOp(model=gen_model, loss_name="gloss"),
        ModelOp(inputs="x", model=disc_model, outputs="true_score"),
        DLoss(inputs=("true_score", "fake_score"), outputs="dloss"),
        UpdateOp(model=disc_model, loss_name="dloss")
    ])
    estimator = fe.Estimator(
        pipeline=pipeline,
        network=network,
        epochs=epochs,
        traces=ModelSaver(model=gen_model, save_dir=save_dir, frequency=5),
        max_train_steps_per_epoch=max_train_steps_per_epoch)
    return estimator
Exemple #3
0
 def instantiate_system():
     system = sample_system_object()
     x_train = np.ones((2, 28, 28, 3))
     y_train = np.ones((2, ))
     data = {
         0: {
             'x': x_train[0],
             'y': y_train[0]
         },
         1: {
             'x': x_train[1],
             'y': y_train[1]
         }
     }
     train_data = RepeatScheduler([
         TestNonTraceableDataset(data=data, var=3),
         TestDataset(data={
             'x': x_train,
             'y': y_train
         }, var=7), None
     ])
     system.pipeline = fe.Pipeline(train_data=train_data, batch_size=1)
     return system
def get_estimator(epochs=24, batch_size=512, max_train_steps_per_epoch=None, save_dir=tempfile.mkdtemp()):
    # step 1: prepare dataset
    train_data, test_data = load_data()
    pipeline = fe.Pipeline(
        train_data=train_data,
        test_data=test_data,
        batch_size=batch_size,
        ops=[
            Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)),
            PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"),
            RandomCrop(32, 32, image_in="x", image_out="x", mode="train"),
            Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")),
            CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1),
            ChannelTranspose(inputs="x", outputs="x"),
            Onehot(inputs="y", outputs="y", mode="train", num_classes=10, label_smoothing=0.2)
        ])

    # step 2: prepare network
    model = fe.build(model_fn=FastCifar, optimizer_fn="adam")
    network = fe.Network(ops=[
        ModelOp(model=model, inputs="x", outputs="y_pred"),
        CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
        UpdateOp(model=model, loss_name="ce")
    ])

    # step 3 prepare estimator
    traces = [
        Accuracy(true_key="y", pred_key="y_pred"),
        BestModelSaver(model=model, save_dir=save_dir, metric="accuracy", save_best_mode="max"),
        LRScheduler(model=model, lr_fn=lr_schedule)
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces,
                             max_train_steps_per_epoch=max_train_steps_per_epoch)
    return estimator
Exemple #5
0
def get_estimator(epochs=2,
                  batch_size=32,
                  steps_per_epoch=None,
                  validation_steps=None,
                  model_dir=tempfile.mkdtemp()):
    # step 1. prepare data
    (x_train, y_train), (x_eval, y_eval) = tf.keras.datasets.mnist.load_data()
    train_data = {"x": np.expand_dims(x_train, -1), "y": y_train}
    eval_data = {"x": np.expand_dims(x_eval, -1), "y": y_eval}
    data = {"train": train_data, "eval": eval_data}
    pipeline = fe.Pipeline(batch_size=batch_size,
                           data=data,
                           ops=Minmax(inputs="x", outputs="x"))

    # step 2. prepare model
    model = fe.build(model_def=LeNet,
                     model_name="lenet",
                     optimizer="adam",
                     loss_name="loss")

    network = fe.Network(ops=[
        ModelOp(inputs="x", model=model, outputs="y_pred"),
        SparseCategoricalCrossentropy(inputs=("y", "y_pred"), outputs="loss")
    ])

    # step 3.prepare estimator
    traces = [
        Accuracy(true_key="y", pred_key="y_pred", output_name='acc'),
        ModelSaver(model_name="lenet", save_dir=model_dir, save_best=True)
    ]
    estimator = fe.Estimator(network=network,
                             pipeline=pipeline,
                             epochs=epochs,
                             traces=traces,
                             steps_per_epoch=steps_per_epoch,
                             validation_steps=validation_steps)
    return estimator
def get_estimator(epochs=2, batch_size=32, save_dir=tempfile.mkdtemp()):
    # step 1
    train_data, eval_data = mnist.load_data()
    test_data = eval_data.split(0.5)
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           test_data=test_data,
                           batch_size=batch_size,
                           ops=[
                               ExpandDims(inputs="x", outputs="x"),
                               Minmax(inputs="x", outputs="x")
                           ])

    # step 2
    model = fe.build(model_fn=LeNet, optimizer_fn="adam")
    network = fe.Network(ops=[
        ModelOp(model=model, inputs="x", outputs="y_pred"),
        CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
        UpdateOp(model=model, loss_name="ce")
    ])
    # step 3
    traces = [
        Accuracy(true_key="y", pred_key="y_pred"),
        BestModelSaver(model=model,
                       save_dir=save_dir,
                       metric="accuracy",
                       save_best_mode="max"),
        LRScheduler(model=model,
                    lr_fn=lambda step: cosine_decay(
                        step, cycle_length=3750, init_lr=1e-3)),
        Traceability(save_path=save_dir)
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces)
    return estimator
Exemple #7
0
def get_estimator(max_words=10000,
                  max_len=500,
                  epochs=10,
                  batch_size=64,
                  max_train_steps_per_epoch=None,
                  max_eval_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp()):

    # step 1. prepare data
    train_data, eval_data = imdb_review.load_data(max_len, max_words)
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           batch_size=batch_size,
                           ops=Reshape(1, inputs="y", outputs="y"))

    # step 2. prepare model
    model = fe.build(model_fn=lambda: create_lstm(max_len, max_words),
                     optimizer_fn="adam")
    network = fe.Network(ops=[
        ModelOp(model=model, inputs="x", outputs="y_pred"),
        CrossEntropy(inputs=("y_pred", "y"), outputs="loss"),
        UpdateOp(model=model, loss_name="loss")
    ])

    traces = [
        Accuracy(true_key="y", pred_key="y_pred"),
        BestModelSaver(model=model, save_dir=save_dir)
    ]
    # step 3.prepare estimator
    estimator = fe.Estimator(
        network=network,
        pipeline=pipeline,
        epochs=epochs,
        traces=traces,
        max_train_steps_per_epoch=max_train_steps_per_epoch,
        max_eval_steps_per_epoch=max_eval_steps_per_epoch)
    return estimator
Exemple #8
0
def get_estimator(batch_size=100, epochs=100, model_dir=tempfile.mkdtemp()):
    # prepare data
    (x_train, _), (x_eval, _) = tf.keras.datasets.mnist.load_data()
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
    x_eval = x_eval.reshape(x_eval.shape[0], 28, 28, 1).astype('float32')
    data = {"train": {"x": x_train}, "eval": {"x": x_eval}}
    pipeline = fe.Pipeline(batch_size=batch_size,
                           data=data,
                           ops=[
                               Myrescale(inputs="x", outputs="x"),
                               Mybinarize(inputs="x", outputs="x")
                           ])
    # prepare model
    infer_model = fe.FEModel(model_def=inference_net,
                             model_name="encoder",
                             loss_name="loss",
                             optimizer=tf.optimizers.Adam(1e-4))
    gen_model = fe.FEModel(model_def=generative_net,
                           model_name="decoder",
                           loss_name="loss",
                           optimizer=tf.optimizers.Adam(1e-4))

    network = fe.Network(ops=[
        ModelOp(inputs="x", model=infer_model, outputs="meanlogvar",
                mode=None),
        SplitOp(inputs="meanlogvar", outputs=("mean", "logvar"), mode=None),
        ReparameterizepOp(inputs=("mean", "logvar"), outputs="z", mode=None),
        ModelOp(inputs="z", model=gen_model, outputs="x_logit"),
        CVAELoss(inputs=("x", "mean", "logvar", "z", "x_logit"), mode=None)
    ])
    estimator = fe.Estimator(network=network,
                             pipeline=pipeline,
                             epochs=epochs,
                             traces=ModelSaver(model_name="decoder",
                                               save_dir=model_dir,
                                               save_best=True))
    return estimator
        def run_test(mixed_precision, merge_grad, gradient):
            lr = 0.1
            lr2 = 0.01
            lr3 = 0.001
            pipeline = fe.Pipeline(train_data=self.train_data,
                                   batch_size=4,
                                   ops=[ExpandDims(inputs="x", outputs="x", axis=0), Minmax(inputs="x", outputs="x")])

            optimizer_fn = EpochScheduler({
                1: lambda x: torch.optim.SGD(params=x, lr=lr),
                2: lambda x: torch.optim.SGD(params=x, lr=lr2),
                3: lambda x: torch.optim.SGD(params=x, lr=lr3)
            })

            model = fe.build(model_fn=LeNet_torch, optimizer_fn=optimizer_fn, mixed_precision=mixed_precision)
            network = fe.Network(ops=[
                ModelOp(model=model, inputs="x", outputs="y_pred"),
                CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
                GradientOp(model=model, finals="ce", outputs="grad"),
                UpdateOp(model=model, loss_name="ce", gradients=gradient, merge_grad=merge_grad),
            ])

            traces = [
                CheckNetworkWeight(model=model,
                                   grad_key="grad",
                                   merge_grad=merge_grad,
                                   test_self=self,
                                   framework="torch",
                                   lrs=[lr, lr2, lr3],
                                   work_intervals=[[1, 2], [2, 3], [3, 4]])
            ]
            estimator = fe.Estimator(pipeline=pipeline,
                                     network=network,
                                     epochs=3,
                                     traces=traces,
                                     train_steps_per_epoch=2)
            estimator.fit(warmup=False)
Exemple #10
0
def get_estimator(batch_size=256,
                  epochs=50,
                  steps_per_epoch=None,
                  validation_steps=None,
                  model_dir=tempfile.mkdtemp()):
    # prepare data
    (x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
    data = {"train": {"x": np.expand_dims(x_train, -1)}}
    pipeline = fe.Pipeline(batch_size=batch_size,
                           data=data,
                           ops=Myrescale(inputs="x", outputs="x"))
    # prepare model
    g_femodel = fe.build(model_def=make_generator_model,
                         model_name="gen",
                         loss_name="gloss",
                         optimizer=tf.optimizers.Adam(1e-4))
    d_femodel = fe.build(model_def=make_discriminator_model,
                         model_name="disc",
                         loss_name="dloss",
                         optimizer=tf.optimizers.Adam(1e-4))
    network = fe.Network(ops=[
        ModelOp(inputs=lambda: tf.random.normal([batch_size, 100]),
                model=g_femodel),
        ModelOp(model=d_femodel, outputs="pred_fake"),
        ModelOp(inputs="x", model=d_femodel, outputs="pred_true"),
        GLoss(inputs=("pred_fake"), outputs="gloss"),
        DLoss(inputs=("pred_true", "pred_fake"), outputs="dloss")
    ])
    # prepare estimator
    traces = [ModelSaver(model_name='gen', save_dir=model_dir, save_freq=5)]
    estimator = fe.Estimator(network=network,
                             pipeline=pipeline,
                             epochs=epochs,
                             traces=traces,
                             steps_per_epoch=steps_per_epoch,
                             validation_steps=validation_steps)
    return estimator
def get_estimator(epochs=50,
                  batch_size=32,
                  steps_per_epoch=None,
                  validation_steps=None,
                  model_dir=tempfile.mkdtemp()):
    (X, y) = load_breast_cancer(True)
    x_train, x_eval, y_train, y_eval = train_test_split(X, y, test_size=0.2)

    # step 1. prepare data
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_eval = scaler.transform(x_eval)
    train_data = {"x": x_train, "y": np.expand_dims(y_train, -1)}
    eval_data = {"x": x_eval, "y": np.expand_dims(y_eval, -1)}
    data = {"train": train_data, "eval": eval_data}
    pipeline = fe.Pipeline(batch_size=batch_size, data=data)

    # step 2. prepare model
    model = fe.build(model_def=create_dnn,
                     model_name="dnn",
                     optimizer="adam",
                     loss_name="loss")
    network = fe.Network(ops=[
        ModelOp(inputs="x", model=model, outputs="y_pred"),
        MeanSquaredError(inputs=("y", "y_pred"), outputs="loss")
    ])

    # step 3.prepare estimator
    traces = [ModelSaver(model_name="dnn", save_dir=model_dir, save_best=True)]
    estimator = fe.Estimator(network=network,
                             pipeline=pipeline,
                             epochs=epochs,
                             steps_per_epoch=steps_per_epoch,
                             validation_steps=validation_steps,
                             log_steps=10,
                             traces=traces)
    return estimator
Exemple #12
0
    def test_estimator_configure_loader_tf_data_loader_torch_model(self):
        loader = get_sample_tf_dataset()
        pipeline = fe.Pipeline(train_data=loader)
        model = fe.build(model_fn=LeNetTorch, optimizer_fn="adam")

        network = fe.Network(ops=[
            ModelOp(model=model, inputs="x_out", outputs="y_pred"),
            CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
            UpdateOp(model=model, loss_name="ce")
        ])

        est = fe.Estimator(pipeline=pipeline,
                           network=network,
                           max_train_steps_per_epoch=3,
                           epochs=1)

        est.system.mode = "train"
        new_loader = est._configure_loader(loader)

        with self.subTest(
                "check loader type"):  # it didn't change the data type
            strategy = tf.distribute.get_strategy()
            if isinstance(strategy,
                          tf.distribute.MirroredStrategy) and isinstance(
                              network, TFNetwork):
                self.assertIsInstance(new_loader,
                                      tf.distribute.DistributedDataset)
            else:
                self.assertIsInstance(new_loader, tf.data.Dataset)

        with self.subTest("max_train_steps_per_epoch=3"):
            iterator = iter(new_loader)
            for i in range(3):
                batch = next(iterator)

            with self.assertRaises(StopIteration):
                batch = next(iterator)
Exemple #13
0
 def test_pipeline_get_result_dict_batch_size_train_eval(self):
     pipeline = fe.Pipeline(train_data=self.sample_torch_dataset,
                            eval_data=self.sample_torch_dataset,
                            ops=NumpyOpAdd1(inputs="x", outputs="y"),
                            batch_size={
                                "train": 2,
                                "eval": 1
                            })
     data_train = pipeline.get_results(mode="train", epoch=1)
     data_eval = pipeline.get_results(mode="eval", epoch=1)
     data_train["x"] = data_train["x"].numpy()
     data_train["y"] = data_train["y"].numpy()
     data_eval["x"] = data_eval["x"].numpy()
     data_eval["y"] = data_eval["y"].numpy()
     ans_train = {
         "x": np.array([[0], [1]], dtype=np.float32),
         "y": np.array([[1], [2]], dtype=np.float32)
     }
     ans_eval = {
         "x": np.array([[0]], dtype=np.float32),
         "y": np.array([[1]], dtype=np.float32)
     }
     self.assertTrue(is_equal(data_train, ans_train))
     self.assertTrue(is_equal(data_eval, ans_eval))
def _build_estimator(model: Union[tf.keras.Model, torch.nn.Module],
                     trace: Traceability,
                     axis: int = -1):
    train_data, eval_data = mnist.load_data()
    test_data = eval_data.split(0.5)
    batch_size = 32
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           test_data=test_data,
                           batch_size=batch_size,
                           ops=[
                               ExpandDims(inputs="x", outputs="x", axis=axis),
                               Minmax(inputs="x", outputs="x")
                           ])
    network = fe.Network(ops=[
        ModelOp(model=model, inputs="x", outputs="y_pred"),
        CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
        UpdateOp(model=model, loss_name="ce")
    ])
    traces = [
        Accuracy(true_key="y", pred_key="y_pred"),
        LRScheduler(model=model,
                    lr_fn=lambda step: cosine_decay(
                        step, cycle_length=3750, init_lr=1e-3)), trace
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=1,
                             traces=traces,
                             max_train_steps_per_epoch=1,
                             max_eval_steps_per_epoch=None)
    fake_data = tf.ones(shape=(batch_size, 28, 28,
                               1)) if axis == -1 else torch.ones(
                                   size=(batch_size, 1, 28, 28))
    model.fe_input_spec = FeInputSpec(fake_data, model)
    return estimator
Exemple #15
0
        def run_test(mixed_precision, merge_grad, gradient):
            lr = 0.1
            pipeline = fe.Pipeline(train_data=self.train_data,
                                   batch_size=4,
                                   ops=[
                                       ExpandDims(inputs="x", outputs="x"),
                                       Minmax(inputs="x", outputs="x")
                                   ])

            model = fe.build(model_fn=LeNet_tf,
                             optimizer_fn=lambda: tf.optimizers.SGD(lr),
                             mixed_precision=mixed_precision)
            network = fe.Network(ops=[
                ModelOp(model=model, inputs="x", outputs="y_pred"),
                CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
                GradientOp(model=model, finals="ce", outputs="grad"),
                UpdateOp(model=model,
                         loss_name="ce",
                         gradients=gradient,
                         merge_grad=merge_grad),
            ])

            traces = [
                CheckNetworkWeight(model=model,
                                   grad_key="grad",
                                   merge_grad=merge_grad,
                                   test_self=self,
                                   lrs=lr,
                                   framework="tf")
            ]
            estimator = fe.Estimator(pipeline=pipeline,
                                     network=network,
                                     epochs=2,
                                     traces=traces,
                                     train_steps_per_epoch=2)
            estimator.fit(warmup=False)
Exemple #16
0
def get_estimator(batch_size=100, epochs=20, max_train_steps_per_epoch=None, save_dir=tempfile.mkdtemp()):
    train_data, _ = load_data()
    pipeline = fe.Pipeline(
        train_data=train_data,
        batch_size=batch_size,
        ops=[
            ExpandDims(inputs="x", outputs="x", axis=0),
            Minmax(inputs="x", outputs="x"),
            Binarize(inputs="x", outputs="x", threshold=0.5),
        ])

    encode_model = fe.build(model_fn=EncoderNet, optimizer_fn="adam", model_name="encoder")
    decode_model = fe.build(model_fn=DecoderNet, optimizer_fn="adam", model_name="decoder")

    network = fe.Network(ops=[
        ModelOp(model=encode_model, inputs="x", outputs="meanlogvar"),
        SplitOp(inputs="meanlogvar", outputs=("mean", "logvar")),
        ReparameterizepOp(inputs=("mean", "logvar"), outputs="z"),
        ModelOp(model=decode_model, inputs="z", outputs="x_logit"),
        CrossEntropy(inputs=("x_logit", "x"), outputs="cross_entropy"),
        CVAELoss(inputs=("cross_entropy", "mean", "logvar", "z"), outputs="loss"),
        UpdateOp(model=encode_model, loss_name="loss"),
        UpdateOp(model=decode_model, loss_name="loss"),
    ])

    traces = [
        BestModelSaver(model=encode_model, save_dir=save_dir), BestModelSaver(model=decode_model, save_dir=save_dir)
    ]

    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces,
                             max_train_steps_per_epoch=max_train_steps_per_epoch)

    return estimator
Exemple #17
0
def sample_system_object_torch():
    x_train = np.random.rand(3, 28, 28, 3)
    y_train = np.random.randint(10, size=(3, ))
    x_eval = np.random.rand(2, 28, 28, 3)
    y_eval = np.random.randint(10, size=(2, ))

    train_data = NumpyDataset({'x': x_train, 'y': y_train})
    eval_data = NumpyDataset({'x': x_eval, 'y': y_eval})
    test_data = eval_data.split(0.5)
    model = fe.build(model_fn=fe.architecture.pytorch.LeNet,
                     optimizer_fn='adam',
                     model_name='torch')
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           test_data=test_data,
                           batch_size=1)
    network = fe.Network(
        ops=[ModelOp(model=model, inputs="x_out", outputs="y_pred")])
    system = System(network=network,
                    pipeline=pipeline,
                    traces=[],
                    total_epochs=10,
                    mode='train')
    return system
Exemple #18
0
def get_estimator(data_dir=None,
                  model_dir=tempfile.mkdtemp(),
                  epochs=20,
                  em_dim=128,
                  batch_size=32,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None):
    train_ds, eval_ds, test_ds = tednmt.load_data(data_dir,
                                                  translate_option="pt_to_en")
    pt_tokenizer = BertTokenizer.from_pretrained(
        "neuralmind/bert-base-portuguese-cased")
    en_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    pipeline = fe.Pipeline(train_data=train_ds,
                           eval_data=eval_ds,
                           test_data=test_ds,
                           ops=[
                               Encode(inputs="source",
                                      outputs="source",
                                      tokenizer=pt_tokenizer),
                               Encode(inputs="target",
                                      outputs="target",
                                      tokenizer=en_tokenizer),
                               Batch(batch_size=batch_size, pad_value=0)
                           ])
    model = fe.build(
        model_fn=lambda: Transformer(num_layers=4,
                                     em_dim=em_dim,
                                     num_heads=8,
                                     ff_dim=512,
                                     input_vocab=pt_tokenizer.vocab_size,
                                     target_vocab=en_tokenizer.vocab_size,
                                     max_pos_enc=1000,
                                     max_pos_dec=1000),
        optimizer_fn="adam")
    network = fe.Network(ops=[
        ShiftData(inputs="target", outputs=("target_inp", "target_real")),
        CreateMasks(inputs=("source", "target_inp"),
                    outputs=("encode_pad_mask", "decode_pad_mask",
                             "dec_look_ahead_mask")),
        ModelOp(model=model,
                inputs=("source", "target_inp", "encode_pad_mask",
                        "decode_pad_mask", "dec_look_ahead_mask"),
                outputs="pred"),
        MaskedCrossEntropy(inputs=("pred", "target_real"), outputs="ce"),
        UpdateOp(model=model, loss_name="ce")
    ])
    traces = [
        MaskedAccuracy(inputs=("pred", "target_real"),
                       outputs="masked_acc",
                       mode="!train"),
        BestModelSaver(model=model,
                       save_dir=model_dir,
                       metric="masked_acc",
                       save_best_mode="max"),
        LRScheduler(model=model, lr_fn=lambda step: lr_fn(step, em_dim))
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             traces=traces,
                             epochs=epochs,
                             train_steps_per_epoch=train_steps_per_epoch,
                             eval_steps_per_epoch=eval_steps_per_epoch)
    return estimator
Exemple #19
0
    def test_pipeline_get_loader_torch_dataset_with_batch_size(self):
        with self.subTest(shuffle=False):
            pipeline = fe.Pipeline(train_data=self.sample_torch_dataset,
                                   batch_size=2)
            loader = pipeline.get_loader(mode="train", shuffle=False)

            results = []
            for idx, batch in enumerate(loader, start=1):
                results.append(batch)
                if idx == 2:
                    break
            ans = [{
                "x": torch.tensor([[0], [1]], dtype=torch.float32),
                "y": torch.tensor([[-99], [-98]], dtype=torch.float32)
            }, {
                "x": torch.tensor([[2], [3]], dtype=torch.float32),
                "y": torch.tensor([[-97], [-96]], dtype=torch.float32)
            }]
            self.assertTrue(is_equal(results, ans))

        with self.subTest(shuffle=True):
            pipeline = fe.Pipeline(train_data=self.sample_torch_dataset,
                                   batch_size=2)
            loader = pipeline.get_loader(mode="train", shuffle=True)

            results = []
            for idx, batch in enumerate(loader, start=1):
                results.append(batch)
                if idx == 2:
                    break
            wrong_ans = [{
                "x": torch.tensor([[0], [1]], dtype=torch.float32),
                "y": torch.tensor([[-99], [-98]], dtype=torch.float32)
            }, {
                "x": torch.tensor([[2], [3]], dtype=torch.float32),
                "y": torch.tensor([[-97], [-96]], dtype=torch.float32)
            }]

            self.assertFalse(is_equal(results, wrong_ans))

        with self.subTest(shuffle=None):
            pipeline = fe.Pipeline(train_data=self.sample_torch_dataset,
                                   batch_size=2)
            loader = pipeline.get_loader(mode="train", shuffle=None)

            results = []
            for idx, batch in enumerate(loader, start=1):
                results.append(batch)
                if idx == 2:
                    break
            wrong_ans = [{
                "x": torch.tensor([[0], [1]], dtype=torch.float32),
                "y": torch.tensor([[-99], [-98]], dtype=torch.float32)
            }, {
                "x": torch.tensor([[2], [3]], dtype=torch.float32),
                "y": torch.tensor([[-97], [-96]], dtype=torch.float32)
            }]

            self.assertFalse(
                is_equal(results, wrong_ans)
            )  # if shuffle is None and has specify batch_size, it will shuffle
Exemple #20
0
    def test_pipeline_get_loader_torch_dataloader(self):
        pipeline = fe.Pipeline(train_data=self.sample_torch_dataloader)
        loader = pipeline.get_loader(mode="train")

        self.assertEqual(loader, self.sample_torch_dataloader)
Exemple #21
0
 def test_pipeline_transform_no_ops(self):
     pipeline = fe.Pipeline()
     data = pipeline.transform(data=self.sample_data, mode="train")
     ans = {"x": np.array([[1, 2, 3]], dtype=np.float32)}
     self.assertTrue(is_equal(data, ans))
Exemple #22
0
def get_estimator(target_size=128,
                  epochs=55,
                  save_dir=tempfile.mkdtemp(),
                  max_train_steps_per_epoch=None,
                  data_dir=None):
    # assert growth parameters
    num_grow = np.log2(target_size) - 2
    assert num_grow >= 1 and num_grow % 1 == 0, "need exponential of 2 and greater than 8 as target size"
    num_phases = int(2 * num_grow + 1)
    assert epochs % num_phases == 0, "epoch must be multiple of {} for size {}".format(
        num_phases, target_size)
    num_grow, phase_length = int(num_grow), int(epochs / num_phases)
    event_epoch = [1, 1 + phase_length] + [
        phase_length * (2 * i + 1) + 1 for i in range(1, num_grow)
    ]
    event_size = [4] + [2**(i + 3) for i in range(num_grow)]
    # set up data schedules
    dataset = nih_chestxray.load_data(root_dir=data_dir)
    resize_map = {
        epoch: Resize(image_in="x", image_out="x", height=size, width=size)
        for (epoch, size) in zip(event_epoch, event_size)
    }
    resize_low_res_map1 = {
        epoch: Resize(image_in="x",
                      image_out="x_low_res",
                      height=size // 2,
                      width=size // 2)
        for (epoch, size) in zip(event_epoch, event_size)
    }
    resize_low_res_map2 = {
        epoch: Resize(image_in="x_low_res",
                      image_out="x_low_res",
                      height=size,
                      width=size)
        for (epoch, size) in zip(event_epoch, event_size)
    }
    batch_size_map = {
        epoch: 512 // size * get_num_devices() if size <= 128 else 4 *
        get_num_devices()
        for (epoch, size) in zip(event_epoch, event_size)
    }
    batch_scheduler = EpochScheduler(epoch_dict=batch_size_map)
    pipeline = fe.Pipeline(
        batch_size=batch_scheduler,
        train_data=dataset,
        drop_last=True,
        ops=[
            ReadImage(inputs="x", outputs="x", color_flag='gray'),
            EpochScheduler(epoch_dict=resize_map),
            EpochScheduler(epoch_dict=resize_low_res_map1),
            EpochScheduler(epoch_dict=resize_low_res_map2),
            Normalize(inputs=["x", "x_low_res"],
                      outputs=["x", "x_low_res"],
                      mean=1.0,
                      std=1.0,
                      max_pixel_value=127.5),
            LambdaOp(fn=lambda: np.random.normal(size=[512]).astype('float32'),
                     outputs="z")
        ])
    # now model schedule
    fade_in_alpha = tf.Variable(initial_value=1.0,
                                dtype='float32',
                                trainable=False)
    d_models = fe.build(
        model_fn=lambda: build_D(fade_in_alpha,
                                 target_resolution=int(np.log2(target_size)),
                                 num_channels=1),
        optimizer_fn=[
            lambda: Adam(0.001, beta_1=0.0, beta_2=0.99, epsilon=1e-8)
        ] * len(event_size),
        model_name=["d_{}".format(size) for size in event_size])
    g_models = fe.build(
        model_fn=lambda: build_G(fade_in_alpha,
                                 target_resolution=int(np.log2(target_size)),
                                 num_channels=1),
        optimizer_fn=[
            lambda: Adam(0.001, beta_1=0.0, beta_2=0.99, epsilon=1e-8)
        ] * len(event_size) + [None],
        model_name=["g_{}".format(size) for size in event_size] + ["G"])
    fake_img_map = {
        epoch: ModelOp(inputs="z", outputs="x_fake", model=model)
        for (epoch, model) in zip(event_epoch, g_models[:-1])
    }
    fake_score_map = {
        epoch: ModelOp(inputs="x_fake", outputs="fake_score", model=model)
        for (epoch, model) in zip(event_epoch, d_models)
    }
    real_score_map = {
        epoch: ModelOp(inputs="x_blend", outputs="real_score", model=model)
        for (epoch, model) in zip(event_epoch, d_models)
    }
    interp_score_map = {
        epoch: ModelOp(inputs="x_interp", outputs="interp_score", model=model)
        for (epoch, model) in zip(event_epoch, d_models)
    }
    g_update_map = {
        epoch: UpdateOp(loss_name="gloss", model=model)
        for (epoch, model) in zip(event_epoch, g_models[:-1])
    }
    d_update_map = {
        epoch: UpdateOp(loss_name="dloss", model=model)
        for (epoch, model) in zip(event_epoch, d_models)
    }
    network = fe.Network(ops=[
        EpochScheduler(fake_img_map),
        EpochScheduler(fake_score_map),
        ImageBlender(
            alpha=fade_in_alpha, inputs=("x", "x_low_res"), outputs="x_blend"),
        EpochScheduler(real_score_map),
        Interpolate(inputs=("x_fake", "x"), outputs="x_interp"),
        EpochScheduler(interp_score_map),
        GradientPenalty(inputs=("x_interp", "interp_score"), outputs="gp"),
        GLoss(inputs="fake_score", outputs="gloss"),
        DLoss(inputs=("real_score", "fake_score", "gp"), outputs="dloss"),
        EpochScheduler(g_update_map),
        EpochScheduler(d_update_map)
    ])
    traces = [
        AlphaController(alpha=fade_in_alpha,
                        fade_start_epochs=event_epoch[1:],
                        duration=phase_length,
                        batch_scheduler=batch_scheduler,
                        num_examples=len(dataset)),
        ModelSaver(model=g_models[-1],
                   save_dir=save_dir,
                   frequency=phase_length),
        ImageSaving(epoch_model_map={
            epoch - 1: model
            for (epoch,
                 model) in zip(event_epoch[1:] + [epochs + 1], g_models[:-1])
        },
                    save_dir=save_dir)
    ]
    estimator = fe.Estimator(
        pipeline=pipeline,
        network=network,
        epochs=epochs,
        traces=traces,
        max_train_steps_per_epoch=max_train_steps_per_epoch)
    return estimator
def get_estimator(batch_size=4,
                  epochs=200,
                  steps_per_epoch=1000,
                  validation_steps=None,
                  model_dir=tempfile.mkdtemp(),
                  imagenet_path=None,
                  srresnet_model_path=None):
    """Args:
        imagenet_path: folder path of ImageNet dataset, containing train and val subdirs .
        srresnet_model_path: srresnet model weights, srgan generator gets initialized with the weights.
    """

    assert imagenet_path is not None, 'Pass valid folder path of Imagenet dataset'
    assert srresnet_model_path is not None, 'srresnet model is needed to initialize srgan generator model'
    # Ensure ImageNet dataset is downloaded. Pass the folder contianing train and val subdirectories.
    # currently the script doesn't download the ImageNet data.
    train_csv, val_csv, path = srgan.load_data(path_imgnet=imagenet_path)

    writer = fe.RecordWriter(save_dir=os.path.join(path, "sr_tfrecords"),
                             train_data=train_csv,
                             validation_data=val_csv,
                             ops=[
                                 ImageReader(inputs="lowres",
                                             outputs="lowres"),
                                 ImageReader(inputs="highres",
                                             outputs="highres")
                             ],
                             compression="GZIP",
                             write_feature=['lowres', 'highres'])

    pipeline = fe.Pipeline(max_shuffle_buffer_mb=3000,
                           batch_size=batch_size,
                           data=writer,
                           ops=[
                               LowresRescale(inputs='lowres',
                                             outputs='lowres'),
                               Rescale(inputs='highres', outputs='highres'),
                           ])

    # prepare model
    model_gen = fe.build(model_def=srresnet_model_path,
                         model_name="srgan_gen",
                         optimizer=tf.optimizers.Adam(learning_rate=0.0001),
                         loss_name="mse_adv_loss",
                         custom_objects={'SubPixelConv2D': SubPixelConv2D})
    model_desc = fe.build(
        model_def=lambda: get_discriminator(input_shape=(96, 96, 3)),
        model_name="srgan_desc",
        optimizer=tf.optimizers.Adam(learning_rate=0.0001),
        loss_name="desc_loss")

    network = fe.Network(ops=[
        ModelOp(inputs='lowres', model=model_gen, outputs='superres'),
        ModelOp(inputs='superres', model=model_desc, outputs='pred_fake'),
        ModelOp(inputs='highres', model=model_desc, outputs='pred_true'),
        DLoss(inputs=("pred_true", "pred_fake"),
              outputs=("desc_loss", "real_loss", "fake_loss")),
        GLoss(inputs=('superres', 'highres', 'pred_fake'),
              outputs=("mse_adv_loss", "mse_loss", "adv_loss"),
              vgg_content=True)
    ])

    model_dir = os.path.join(path)
    estimator = fe.Estimator(
        network=network,
        pipeline=pipeline,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        traces=[
            ModelSaver(model_name="srgan_gen",
                       save_dir=model_dir,
                       save_best=True),
            ModelSaver(model_name="srgan_desc",
                       save_dir=model_dir,
                       save_best=True),
            LRController(model_name="srgan_gen",
                         lr_schedule=MyLRSchedule(schedule_mode='step')),
            LRController(model_name="srgan_desc",
                         lr_schedule=MyLRSchedule(schedule_mode='step'))
        ])

    return estimator
Exemple #24
0
def get_estimator(epsilon=0.04,
                  epochs=20,
                  batch_size=32,
                  code_length=16,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp()):
    # step 1
    train_data, eval_data = cifair10.load_data()
    test_data = eval_data.split(0.5)
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           test_data=test_data,
                           batch_size=batch_size,
                           ops=[
                               Normalize(inputs="x",
                                         outputs="x",
                                         mean=(0.4914, 0.4822, 0.4465),
                                         std=(0.2471, 0.2435, 0.2616))
                           ])

    # step 2
    model = fe.build(model_fn=lambda: ecc_lenet(code_length=code_length),
                     optimizer_fn="adam")

    network = fe.Network(ops=[
        Watch(inputs="x", mode=('eval', 'test')),
        ModelOp(model=model, inputs="x", outputs="y_pred"),
        CrossEntropy(inputs=("y_pred", "y"), outputs="base_ce"),
        UpdateOp(model=model, loss_name="base_ce"),
        FGSM(data="x",
             loss="base_ce",
             outputs="x_adverse",
             epsilon=epsilon,
             mode=('eval', 'test')),
        ModelOp(model=model,
                inputs="x_adverse",
                outputs="y_pred_adv",
                mode=('eval', 'test')),
        CrossEntropy(inputs=("y_pred_adv", "y"),
                     outputs="adv_ce",
                     mode=('eval', 'test')),
        Average(inputs=("base_ce", "adv_ce"), outputs="avg_ce", mode='eval')
    ])
    # step 3
    traces = [
        Accuracy(true_key="y", pred_key="y_pred", output_name="base_accuracy"),
        Accuracy(true_key="y",
                 pred_key="y_pred_adv",
                 output_name="adversarial_accuracy"),
        BestModelSaver(model=model,
                       save_dir=save_dir,
                       metric="avg_ce",
                       save_best_mode="min",
                       load_best_final=True)
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces,
                             train_steps_per_epoch=train_steps_per_epoch,
                             eval_steps_per_epoch=eval_steps_per_epoch,
                             monitor_names=["adv_ce", "avg_ce"])
    return estimator
Exemple #25
0
def get_estimator(max_len=20,
                  epochs=10,
                  batch_size=64,
                  max_train_steps_per_epoch=None,
                  max_eval_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp(),
                  pretrained_model='bert-base-uncased',
                  data_dir=None):
    # step 1 prepare data
    train_data, eval_data, data_vocab, label_vocab = german_ner.load_data(
        root_dir=data_dir)
    tokenizer = BertTokenizer.from_pretrained(pretrained_model,
                                              do_lower_case=True)
    tag2idx = char2idx(label_vocab)
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           batch_size=batch_size,
                           ops=[
                               Tokenize(inputs="x",
                                        outputs="x",
                                        tokenize_fn=tokenizer.tokenize),
                               WordtoId(
                                   inputs="x",
                                   outputs="x",
                                   mapping=tokenizer.convert_tokens_to_ids),
                               WordtoId(inputs="y",
                                        outputs="y",
                                        mapping=tag2idx),
                               PadSequence(max_len=max_len,
                                           inputs="x",
                                           outputs="x"),
                               PadSequence(max_len=max_len,
                                           value=len(tag2idx),
                                           inputs="y",
                                           outputs="y"),
                               AttentionMask(inputs="x", outputs="x_masks")
                           ])

    # step 2. prepare model
    model = fe.build(model_fn=lambda: ner_model(max_len, pretrained_model),
                     optimizer_fn=lambda: tf.optimizers.Adam(1e-5))
    network = fe.Network(ops=[
        ModelOp(model=model, inputs=["x", "x_masks"], outputs="y_pred"),
        Reshape(inputs="y", outputs="y", shape=(-1, )),
        Reshape(inputs="y_pred", outputs="y_pred", shape=(-1, 24)),
        CrossEntropy(inputs=("y_pred", "y"), outputs="loss"),
        UpdateOp(model=model, loss_name="loss")
    ])

    traces = [
        Accuracy(true_key="y", pred_key="y_pred"),
        BestModelSaver(model=model, save_dir=save_dir)
    ]

    # step 3 prepare estimator
    estimator = fe.Estimator(
        network=network,
        pipeline=pipeline,
        epochs=epochs,
        traces=traces,
        max_train_steps_per_epoch=max_train_steps_per_epoch,
        max_eval_steps_per_epoch=max_eval_steps_per_epoch)

    return estimator
Exemple #26
0
def get_estimator(batch_size=8,
                  epochs=50,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp(),
                  data_dir=None):
    # load CUB200 dataset.
    train_data = cub200.load_data(root_dir=data_dir)
    eval_data = train_data.split(0.3)
    test_data = eval_data.split(0.5)

    # step 1, pipeline
    pipeline = fe.Pipeline(batch_size=batch_size,
                           train_data=train_data,
                           eval_data=eval_data,
                           test_data=test_data,
                           ops=[
                               ReadImage(inputs="image",
                                         outputs="image",
                                         parent_path=train_data.parent_path),
                               Normalize(inputs="image",
                                         outputs="image",
                                         mean=1.0,
                                         std=1.0,
                                         max_pixel_value=127.5),
                               ReadMat(file='annotation',
                                       keys="seg",
                                       parent_path=train_data.parent_path),
                               LongestMaxSize(max_size=512,
                                              image_in="image",
                                              image_out="image",
                                              mask_in="seg",
                                              mask_out="seg"),
                               PadIfNeeded(min_height=512,
                                           min_width=512,
                                           image_in="image",
                                           image_out="image",
                                           mask_in="seg",
                                           mask_out="seg",
                                           border_mode=cv2.BORDER_CONSTANT,
                                           value=0,
                                           mask_value=0),
                               ShiftScaleRotate(
                                   image_in="image",
                                   mask_in="seg",
                                   image_out="image",
                                   mask_out="seg",
                                   mode="train",
                                   shift_limit=0.2,
                                   rotate_limit=15.0,
                                   scale_limit=0.2,
                                   border_mode=cv2.BORDER_CONSTANT,
                                   value=0,
                                   mask_value=0),
                               Sometimes(
                                   HorizontalFlip(image_in="image",
                                                  mask_in="seg",
                                                  image_out="image",
                                                  mask_out="seg",
                                                  mode="train")),
                               Reshape(shape=(512, 512, 1),
                                       inputs="seg",
                                       outputs="seg")
                           ])

    # step 2, network
    resunet50 = fe.build(model_fn=ResUnet50,
                         model_name="resunet50",
                         optimizer_fn=lambda: tf.optimizers.Adam(1e-4))
    uncertainty = fe.build(model_fn=UncertaintyLossNet,
                           model_name="uncertainty",
                           optimizer_fn=lambda: tf.optimizers.Adam(2e-5))

    network = fe.Network(ops=[
        ModelOp(inputs='image',
                model=resunet50,
                outputs=["label_pred", "mask_pred"]),
        CrossEntropy(inputs=["label_pred", "label"],
                     outputs="cls_loss",
                     form="sparse",
                     average_loss=False),
        CrossEntropy(inputs=["mask_pred", "seg"],
                     outputs="seg_loss",
                     form="binary",
                     average_loss=False),
        ModelOp(inputs=["cls_loss", "seg_loss"],
                model=uncertainty,
                outputs="total_loss"),
        ReduceLoss(inputs="total_loss", outputs="total_loss"),
        UpdateOp(model=resunet50, loss_name="total_loss"),
        UpdateOp(model=uncertainty, loss_name="total_loss")
    ])

    # step 3, estimator
    traces = [
        Accuracy(true_key="label", pred_key="label_pred"),
        Dice(true_key="seg", pred_key='mask_pred'),
        BestModelSaver(model=resunet50,
                       save_dir=save_dir,
                       metric="total_loss",
                       save_best_mode="min"),
        LRScheduler(model=resunet50,
                    lr_fn=lambda step: cosine_decay(
                        step, cycle_length=26400, init_lr=1e-4))
    ]
    estimator = fe.Estimator(network=network,
                             pipeline=pipeline,
                             traces=traces,
                             epochs=epochs,
                             train_steps_per_epoch=train_steps_per_epoch,
                             eval_steps_per_epoch=eval_steps_per_epoch,
                             log_steps=500)

    return estimator
Exemple #27
0
def get_estimator(data_dir=None,
                  model_dir=tempfile.mkdtemp(),
                  epochs=200,
                  batch_size_per_gpu=32,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None):
    num_device = get_num_devices()
    train_ds, val_ds = mscoco.load_data(root_dir=data_dir)
    train_ds = PreMosaicDataset(mscoco_ds=train_ds)
    batch_size = num_device * batch_size_per_gpu
    pipeline = fe.Pipeline(
        train_data=train_ds,
        eval_data=val_ds,
        ops=[
            ReadImage(inputs=("image1", "image2", "image3", "image4"),
                      outputs=("image1", "image2", "image3", "image4"),
                      mode="train"),
            ReadImage(inputs="image", outputs="image", mode="eval"),
            LongestMaxSize(max_size=640,
                           image_in="image1",
                           bbox_in="bbox1",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image2",
                           bbox_in="bbox2",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image3",
                           bbox_in="bbox3",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image4",
                           bbox_in="bbox4",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="train"),
            LongestMaxSize(max_size=640,
                           image_in="image",
                           bbox_in="bbox",
                           bbox_params=BboxParams("coco", min_area=1.0),
                           mode="eval"),
            PadIfNeeded(min_height=640,
                        min_width=640,
                        image_in="image",
                        bbox_in="bbox",
                        bbox_params=BboxParams("coco", min_area=1.0),
                        mode="eval",
                        border_mode=cv2.BORDER_CONSTANT,
                        value=(114, 114, 114)),
            CombineMosaic(inputs=("image1", "image2", "image3", "image4",
                                  "bbox1", "bbox2", "bbox3", "bbox4"),
                          outputs=("image", "bbox"),
                          mode="train"),
            CenterCrop(height=640,
                       width=640,
                       image_in="image",
                       bbox_in="bbox",
                       bbox_params=BboxParams("coco", min_area=1.0),
                       mode="train"),
            Sometimes(
                HorizontalFlip(image_in="image",
                               bbox_in="bbox",
                               bbox_params=BboxParams("coco", min_area=1.0),
                               mode="train")),
            HSVAugment(inputs="image", outputs="image", mode="train"),
            ToArray(inputs="bbox", outputs="bbox", dtype="float32"),
            CategoryID2ClassID(inputs="bbox", outputs="bbox"),
            GTBox(inputs="bbox",
                  outputs=("gt_sbbox", "gt_mbbox", "gt_lbbox"),
                  image_size=640),
            Delete(keys=("image1", "image2", "image3", "image4", "bbox1",
                         "bbox2", "bbox3", "bbox4", "bbox"),
                   mode="train"),
            Delete(keys="image_id", mode="eval"),
            Batch(batch_size=batch_size, pad_value=0)
        ])
    init_lr = 1e-2 / 64 * batch_size
    model = fe.build(
        lambda: YoloV5(w=640, h=640, c=3),
        optimizer_fn=lambda x: torch.optim.SGD(
            x, lr=init_lr, momentum=0.937, weight_decay=0.0005, nesterov=True),
        mixed_precision=True)
    network = fe.Network(ops=[
        RescaleTranspose(inputs="image", outputs="image"),
        ModelOp(model=model,
                inputs="image",
                outputs=("pred_s", "pred_m", "pred_l")),
        DecodePred(inputs=("pred_s", "pred_m", "pred_l"),
                   outputs=("pred_s", "pred_m", "pred_l")),
        ComputeLoss(inputs=("pred_s", "gt_sbbox"),
                    outputs=("sbbox_loss", "sconf_loss", "scls_loss")),
        ComputeLoss(inputs=("pred_m", "gt_mbbox"),
                    outputs=("mbbox_loss", "mconf_loss", "mcls_loss")),
        ComputeLoss(inputs=("pred_l", "gt_lbbox"),
                    outputs=("lbbox_loss", "lconf_loss", "lcls_loss")),
        Average(inputs=("sbbox_loss", "mbbox_loss", "lbbox_loss"),
                outputs="bbox_loss"),
        Average(inputs=("sconf_loss", "mconf_loss", "lconf_loss"),
                outputs="conf_loss"),
        Average(inputs=("scls_loss", "mcls_loss", "lcls_loss"),
                outputs="cls_loss"),
        Average(inputs=("bbox_loss", "conf_loss", "cls_loss"),
                outputs="total_loss"),
        PredictBox(width=640,
                   height=640,
                   inputs=("pred_s", "pred_m", "pred_l"),
                   outputs="box_pred",
                   mode="eval"),
        UpdateOp(model=model, loss_name="total_loss")
    ])
    traces = [
        MeanAveragePrecision(num_classes=80,
                             true_key='bbox',
                             pred_key='box_pred',
                             mode="eval"),
        BestModelSaver(model=model,
                       save_dir=model_dir,
                       metric='mAP',
                       save_best_mode="max")
    ]
    lr_schedule = {
        1:
        LRScheduler(model=model,
                    lr_fn=lambda step: lr_schedule_warmup(
                        step,
                        train_steps_epoch=np.ceil(len(train_ds) / batch_size),
                        init_lr=init_lr)),
        4:
        LRScheduler(model=model,
                    lr_fn=lambda epoch: cosine_decay(epoch,
                                                     cycle_length=epochs - 3,
                                                     init_lr=init_lr,
                                                     min_lr=init_lr / 100,
                                                     start=4))
    }
    traces.append(EpochScheduler(lr_schedule))
    estimator = fe.Estimator(
        pipeline=pipeline,
        network=network,
        epochs=epochs,
        traces=traces,
        monitor_names=["bbox_loss", "conf_loss", "cls_loss"],
        train_steps_per_epoch=train_steps_per_epoch,
        eval_steps_per_epoch=eval_steps_per_epoch)
    return estimator
Exemple #28
0
    def test_estimator_check_network_op_trace_invoke_sequence_tf_backend(self):
        epochs = 1
        batches = 10  # dataset has 100 sample, and batch_size is 10
        iostream = StringIO()
        loader = get_sample_tf_dataset()
        pipeline = fe.Pipeline(test_data=loader)
        model = fe.build(model_fn=LeNetTf, optimizer_fn="adam")
        ops = [
            ModelOp(model=model, inputs="x", outputs="y_pred"),
            ShoutNameOp(name="A", iostream=iostream),
            ShoutNameOp(name="B", iostream=iostream)
        ]

        network = fe.Network(ops=ops)

        traces = [
            ShoutNameTrace(name="a", iostream=iostream),
            ShoutNameTrace(name="b", iostream=iostream)
        ]
        est = fe.Estimator(pipeline=pipeline,
                           network=network,
                           epochs=epochs,
                           traces=traces)
        est.test()

        # create the expected calling sequence in another iostream
        iostream2 = StringIO()
        ops2 = [
            ShoutNameOp(name="A", iostream=iostream2),
            ShoutNameOp(name="B", iostream=iostream2)
        ]
        traces2 = [
            ShoutNameTrace(name="a", iostream=iostream2),
            ShoutNameTrace(name="b", iostream=iostream2)
        ]

        # determine if running environment is multi-gpu (only needed in tf backend)
        strategy = tf.distribute.get_strategy()
        if isinstance(strategy, tf.distribute.MirroredStrategy):
            device_count = len(
                tf.config.list_physical_devices(device_type="GPU"))
        else:
            device_count = 1

        for trace in traces2:
            trace.on_begin(None)
        for epoch in range(epochs):
            for trace in traces2:
                trace.on_epoch_begin(None)
            for batch in range(batches):
                for trace in traces2:
                    trace.on_batch_begin(None)
                if batch == 0:
                    # ShoutOutTrace will only be invoked the number of times equal to device count while building static
                    # graph (for tf backend). ex: 4 GPU -> 4 times, CPU -> 1 time
                    for _ in range(device_count):
                        for op in ops2:
                            op.forward(None, None)
                for trace in traces2:
                    trace.on_batch_end(None)
            for trace in traces2:
                trace.on_epoch_end(None)
        for trace in traces2:
            trace.on_end(None)

        self.assertEqual(iostream.getvalue(), iostream2.getvalue())
def get_estimator(epochs=20,
                  batch_size=4,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp(),
                  log_steps=20,
                  data_dir=None):
    # step 1
    csv = montgomery.load_data(root_dir=data_dir)
    pipeline = fe.Pipeline(
        train_data=csv,
        eval_data=csv.split(0.2),
        batch_size=batch_size,
        ops=[
            ReadImage(inputs="image",
                      parent_path=csv.parent_path,
                      outputs="image",
                      color_flag='gray'),
            ReadImage(inputs="mask_left",
                      parent_path=csv.parent_path,
                      outputs="mask_left",
                      color_flag='gray',
                      mode='!infer'),
            ReadImage(inputs="mask_right",
                      parent_path=csv.parent_path,
                      outputs="mask_right",
                      color_flag='gray',
                      mode='!infer'),
            CombineLeftRightMask(inputs=("mask_left", "mask_right"),
                                 outputs="mask",
                                 mode='!infer'),
            Resize(image_in="image", width=512, height=512),
            Resize(image_in="mask", width=512, height=512, mode='!infer'),
            Sometimes(numpy_op=HorizontalFlip(
                image_in="image", mask_in="mask", mode='train')),
            Sometimes(numpy_op=Rotate(image_in="image",
                                      mask_in="mask",
                                      limit=(-10, 10),
                                      border_mode=cv2.BORDER_CONSTANT,
                                      mode='train')),
            Minmax(inputs="image", outputs="image"),
            Minmax(inputs="mask", outputs="mask", mode='!infer')
        ])

    # step 2
    model = fe.build(
        model_fn=lambda: UNet(input_size=(512, 512, 1)),
        optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate=0.0001),
        model_name="lung_segmentation")
    network = fe.Network(ops=[
        ModelOp(inputs="image", model=model, outputs="pred_segment"),
        CrossEntropy(
            inputs=("pred_segment", "mask"), outputs="loss", form="binary"),
        UpdateOp(model=model, loss_name="loss")
    ])

    # step 3
    traces = [
        Dice(true_key="mask", pred_key="pred_segment"),
        BestModelSaver(model=model,
                       save_dir=save_dir,
                       metric='Dice',
                       save_best_mode='max')
    ]
    estimator = fe.Estimator(network=network,
                             pipeline=pipeline,
                             epochs=epochs,
                             log_steps=log_steps,
                             traces=traces,
                             train_steps_per_epoch=train_steps_per_epoch,
                             eval_steps_per_epoch=eval_steps_per_epoch)

    return estimator
Exemple #30
0
def get_estimator(epochs=50,
                  batch_size=128,
                  train_steps_per_epoch=None,
                  eval_steps_per_epoch=None,
                  save_dir=tempfile.mkdtemp()):
    # step 1
    train_data, eval_data = cifair100.load_data()

    # Add label noise to simulate real-world labeling problems
    corrupt_dataset(train_data)

    test_data = eval_data.split(range(len(eval_data) // 2))
    pipeline = fe.Pipeline(train_data=train_data,
                           eval_data=eval_data,
                           test_data=test_data,
                           batch_size=batch_size,
                           ops=[
                               Normalize(inputs="x",
                                         outputs="x",
                                         mean=(0.4914, 0.4822, 0.4465),
                                         std=(0.2471, 0.2435, 0.2616)),
                               PadIfNeeded(min_height=40,
                                           min_width=40,
                                           image_in="x",
                                           image_out="x",
                                           mode="train"),
                               RandomCrop(32,
                                          32,
                                          image_in="x",
                                          image_out="x",
                                          mode="train"),
                               Sometimes(
                                   HorizontalFlip(image_in="x",
                                                  image_out="x",
                                                  mode="train")),
                               CoarseDropout(inputs="x",
                                             outputs="x",
                                             mode="train",
                                             max_holes=1),
                           ])

    # step 2
    model = fe.build(model_fn=big_lenet, optimizer_fn='adam')
    network = fe.Network(ops=[
        ModelOp(model=model, inputs="x", outputs="y_pred"),
        SuperLoss(CrossEntropy(inputs=("y_pred", "y"), outputs="ce"),
                  output_confidence="confidence"),
        UpdateOp(model=model, loss_name="ce")
    ])

    # step 3
    traces = [
        MCC(true_key="y", pred_key="y_pred"),
        BestModelSaver(model=model,
                       save_dir=save_dir,
                       metric="mcc",
                       save_best_mode="max",
                       load_best_final=True),
        LabelTracker(metric="confidence",
                     label="data_labels",
                     label_mapping={
                         "Normal": 0,
                         "Corrupted": 1
                     },
                     mode="train",
                     outputs="label_confidence"),
        ImageSaver(inputs="label_confidence", save_dir=save_dir, mode="train"),
    ]
    estimator = fe.Estimator(pipeline=pipeline,
                             network=network,
                             epochs=epochs,
                             traces=traces,
                             train_steps_per_epoch=train_steps_per_epoch,
                             eval_steps_per_epoch=eval_steps_per_epoch)
    return estimator