def test_input(self): randomcrop = RandomCrop(image_in='x', height=self.height, width=self.width) output = randomcrop.forward(data=self.single_input, state={}) with self.subTest('Check output type'): self.assertEqual(type(output), list) with self.subTest('Check output image shape'): self.assertEqual(output[0].shape, self.single_output_shape)
def pretrain_model(epochs, batch_size, max_train_steps_per_epoch, save_dir): # step 1: prepare dataset train_data, test_data = load_data() pipeline = fe.Pipeline( train_data=train_data, batch_size=batch_size, ops=[ PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x"), # augmentation 1 RandomCrop(32, 32, image_in="x", image_out="x_aug"), Sometimes(HorizontalFlip(image_in="x_aug", image_out="x_aug"), prob=0.5), Sometimes( ColorJitter(inputs="x_aug", outputs="x_aug", brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2), prob=0.8), Sometimes(ToGray(inputs="x_aug", outputs="x_aug"), prob=0.2), Sometimes(GaussianBlur(inputs="x_aug", outputs="x_aug", blur_limit=(3, 3), sigma_limit=(0.1, 2.0)), prob=0.5), ToFloat(inputs="x_aug", outputs="x_aug"), # augmentation 2 RandomCrop(32, 32, image_in="x", image_out="x_aug2"), Sometimes(HorizontalFlip(image_in="x_aug2", image_out="x_aug2"), prob=0.5), Sometimes( ColorJitter(inputs="x_aug2", outputs="x_aug2", brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2), prob=0.8), Sometimes(ToGray(inputs="x_aug2", outputs="x_aug2"), prob=0.2), Sometimes(GaussianBlur(inputs="x_aug2", outputs="x_aug2", blur_limit=(3, 3), sigma_limit=(0.1, 2.0)), prob=0.5), ToFloat(inputs="x_aug2", outputs="x_aug2") ]) # step 2: prepare network model_con, model_finetune = fe.build(model_fn=ResNet9, optimizer_fn=["adam", "adam"]) network = fe.Network(ops=[ LambdaOp(lambda x, y: tf.concat([x, y], axis=0), inputs=["x_aug", "x_aug2"], outputs="x_com"), ModelOp(model=model_con, inputs="x_com", outputs="y_com"), LambdaOp(lambda x: tf.split(x, 2, axis=0), inputs="y_com", outputs=["y_pred", "y_pred2"]), NTXentOp(arg1="y_pred", arg2="y_pred2", outputs=["NTXent", "logit", "label"]), UpdateOp(model=model_con, loss_name="NTXent") ]) # step 3: prepare estimator traces = [ Accuracy(true_key="label", pred_key="logit", mode="train", output_name="contrastive_accuracy"), ModelSaver(model=model_con, save_dir=save_dir), ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch, monitor_names="contrastive_accuracy") estimator.fit() return model_con, model_finetune
def get_estimator(epochs=50, batch_size=128, max_train_steps_per_epoch=None, max_eval_steps_per_epoch=None, save_dir=tempfile.mkdtemp()): # step 1 train_data, eval_data = cifair100.load_data() # Add label noise to simulate real-world labeling problems corrupt_dataset(train_data) test_data = eval_data.split(range(len(eval_data) // 2)) pipeline = fe.Pipeline( train_data=train_data, eval_data=eval_data, test_data=test_data, batch_size=batch_size, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1), ChannelTranspose(inputs="x", outputs="x") ]) # step 2 model = fe.build(model_fn=big_lenet, optimizer_fn='adam') network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), SuperLoss(CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), output_confidence="confidence"), UpdateOp(model=model, loss_name="ce") ]) # step 3 traces = [ MCC(true_key="y", pred_key="y_pred"), BestModelSaver(model=model, save_dir=save_dir, metric="mcc", save_best_mode="max", load_best_final=True), LabelTracker(metric="confidence", label="data_labels", label_mapping={ "Normal": 0, "Corrupted": 1 }, mode="train", outputs="label_confidence"), ImageSaver(inputs="label_confidence", save_dir=save_dir, mode="train"), ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch, max_eval_steps_per_epoch=max_eval_steps_per_epoch) return estimator
def get_estimator(epochs=24, batch_size=128, lr_epochs=100, max_train_steps_per_epoch=None, save_dir=tempfile.mkdtemp()): # step 1: prepare dataset train_data, test_data = load_data() pipeline = fe.Pipeline( train_data=train_data, eval_data=test_data, batch_size=batch_size, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1), ChannelTranspose(inputs="x", outputs="x"), Onehot(inputs="y", outputs="y", mode="train", num_classes=10, label_smoothing=0.2) ]) # step 2: prepare network model = fe.build(model_fn=ResNet9, optimizer_fn="sgd") network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), UpdateOp(model=model, loss_name="ce") ]) # get the max learning rate lr_max = search_max_lr(pipeline=pipeline, model=model, network=network, epochs=lr_epochs) lr_min = lr_max / 40 print(f"The maximum LR: {lr_max}, and minimun LR: {lr_min}") mid_step = int(epochs * 0.45 * len(train_data) / batch_size) end_step = int(epochs * len(train_data) / batch_size) # reinitialize the model model = fe.build(model_fn=ResNet9, optimizer_fn="sgd") network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), UpdateOp(model=model, loss_name="ce") ]) # step 3: prepare estimator traces = [ Accuracy(true_key="y", pred_key="y_pred"), BestModelSaver(model=model, save_dir=save_dir, metric="accuracy", save_best_mode="max"), LRScheduler(model=model, lr_fn=lambda step: super_schedule(step, lr_max, lr_min, mid_step, end_step)) ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch) return estimator
def finetune(weights_path, batch_size, epochs, model_dir=tempfile.mkdtemp(), train_steps_per_epoch=None, eval_steps_per_epoch=None): train_data, eval_data = cifair10.load_data() pipeline = fe.Pipeline( train_data=train_data, eval_data=eval_data, batch_size=batch_size, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1) ]) _, model = fe.build( model_fn=lambda: vision_transformer(num_class=10, weights_path=weights_path, image_size=(32, 32, 3), patch_size=4, num_layers=6, em_dim=256, num_heads=8, dff=512), optimizer_fn=[None, lambda: tf.optimizers.SGD(0.01, momentum=0.9)]) network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce", from_logits=True), UpdateOp(model=model, loss_name="ce") ]) traces = [ Accuracy(true_key="y", pred_key="y_pred"), BestModelSaver(model=model, save_dir=model_dir, metric="accuracy", save_best_mode="max") ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) estimator.fit(warmup=False)
def get_estimator(epochs=24, batch_size=512, max_train_steps_per_epoch=None, save_dir=tempfile.mkdtemp()): # step 1: prepare dataset train_data, test_data = load_data() pipeline = fe.Pipeline( train_data=train_data, test_data=test_data, batch_size=batch_size, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1), ChannelTranspose(inputs="x", outputs="x"), Onehot(inputs="y", outputs="y", mode="train", num_classes=10, label_smoothing=0.2) ]) # step 2: prepare network model = fe.build(model_fn=FastCifar, optimizer_fn="adam") network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), UpdateOp(model=model, loss_name="ce") ]) # step 3 prepare estimator traces = [ Accuracy(true_key="y", pred_key="y_pred"), BestModelSaver(model=model, save_dir=save_dir, metric="accuracy", save_best_mode="max"), LRScheduler(model=model, lr_fn=lr_schedule) ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch) return estimator
def get_estimator(epochs=150, batch_size=32, save_dir=tempfile.mkdtemp(), train_steps_per_epoch=None, eval_steps_per_epoch=None): # step 1: prepare dataset train_data, eval_data = load_data() pipeline = fe.Pipeline(train_data=train_data, eval_data=eval_data, batch_size=batch_size * get_num_devices(), ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes( HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1) ]) # step 2: prepare network model = fe.build( model_fn=lambda: pyramidnet_cifar(inputs_shape=(32, 32, 3), depth=272, alpha=200, num_classes=10, bottleneck=True), optimizer_fn=lambda: tfa.optimizers.SGDW( weight_decay=0.0001, lr=0.1, momentum=0.9)) network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce", from_logits=True), UpdateOp(model=model, loss_name="ce") ]) # step 3 prepare estimator traces = [ Accuracy(true_key="y", pred_key="y_pred"), LRScheduler(model=model, lr_fn=lr_schedule), BestModelSaver(model=model, save_dir=save_dir, metric="accuracy", save_best_mode="max") ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) return estimator
def get_estimator(weight=10.0, epochs=200, batch_size=1, max_train_steps_per_epoch=None, save_dir=tempfile.mkdtemp(), data_dir=None): train_data, _ = load_data(batch_size=batch_size, root_dir=data_dir) device = "cuda" if torch.cuda.is_available() else "cpu" pipeline = fe.Pipeline(train_data=train_data, ops=[ ReadImage(inputs=["A", "B"], outputs=["A", "B"]), Normalize(inputs=["A", "B"], outputs=["real_A", "real_B"], mean=1.0, std=1.0, max_pixel_value=127.5), Resize(height=286, width=286, image_in="real_A", image_out="real_A", mode="train"), RandomCrop(height=256, width=256, image_in="real_A", image_out="real_A", mode="train"), Resize(height=286, width=286, image_in="real_B", image_out="real_B", mode="train"), RandomCrop(height=256, width=256, image_in="real_B", image_out="real_B", mode="train"), Sometimes( HorizontalFlip(image_in="real_A", image_out="real_A", mode="train")), Sometimes( HorizontalFlip(image_in="real_B", image_out="real_B", mode="train")), ChannelTranspose(inputs=["real_A", "real_B"], outputs=["real_A", "real_B"]) ]) g_AtoB = fe.build(model_fn=Generator, model_name="g_AtoB", optimizer_fn=lambda x: torch.optim.Adam( x, lr=2e-4, betas=(0.5, 0.999))) g_BtoA = fe.build(model_fn=Generator, model_name="g_BtoA", optimizer_fn=lambda x: torch.optim.Adam( x, lr=2e-4, betas=(0.5, 0.999))) d_A = fe.build(model_fn=Discriminator, model_name="d_A", optimizer_fn=lambda x: torch.optim.Adam( x, lr=2e-4, betas=(0.5, 0.999))) d_B = fe.build(model_fn=Discriminator, model_name="d_B", optimizer_fn=lambda x: torch.optim.Adam( x, lr=2e-4, betas=(0.5, 0.999))) network = fe.Network(ops=[ ModelOp(inputs="real_A", model=g_AtoB, outputs="fake_B"), ModelOp(inputs="real_B", model=g_BtoA, outputs="fake_A"), Buffer(image_in="fake_A", image_out="buffer_fake_A"), Buffer(image_in="fake_B", image_out="buffer_fake_B"), ModelOp(inputs="real_A", model=d_A, outputs="d_real_A"), ModelOp(inputs="fake_A", model=d_A, outputs="d_fake_A"), ModelOp(inputs="buffer_fake_A", model=d_A, outputs="buffer_d_fake_A"), ModelOp(inputs="real_B", model=d_B, outputs="d_real_B"), ModelOp(inputs="fake_B", model=d_B, outputs="d_fake_B"), ModelOp(inputs="buffer_fake_B", model=d_B, outputs="buffer_d_fake_B"), ModelOp(inputs="real_A", model=g_BtoA, outputs="same_A"), ModelOp(inputs="fake_B", model=g_BtoA, outputs="cycled_A"), ModelOp(inputs="real_B", model=g_AtoB, outputs="same_B"), ModelOp(inputs="fake_A", model=g_AtoB, outputs="cycled_B"), GLoss(inputs=("real_A", "d_fake_B", "cycled_A", "same_A"), weight=weight, device=device, outputs="g_AtoB_loss"), GLoss(inputs=("real_B", "d_fake_A", "cycled_B", "same_B"), weight=weight, device=device, outputs="g_BtoA_loss"), DLoss(inputs=("d_real_A", "buffer_d_fake_A"), outputs="d_A_loss", device=device), DLoss(inputs=("d_real_B", "buffer_d_fake_B"), outputs="d_B_loss", device=device), UpdateOp(model=g_AtoB, loss_name="g_AtoB_loss"), UpdateOp(model=g_BtoA, loss_name="g_BtoA_loss"), UpdateOp(model=d_A, loss_name="d_A_loss"), UpdateOp(model=d_B, loss_name="d_B_loss") ]) traces = [ ModelSaver(model=g_AtoB, save_dir=save_dir, frequency=10), ModelSaver(model=g_BtoA, save_dir=save_dir, frequency=10), LRScheduler(model=g_AtoB, lr_fn=lr_schedule), LRScheduler(model=g_BtoA, lr_fn=lr_schedule), LRScheduler(model=d_A, lr_fn=lr_schedule), LRScheduler(model=d_B, lr_fn=lr_schedule) ] estimator = fe.Estimator( network=network, pipeline=pipeline, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch) return estimator
def get_estimator(weight=10.0, epochs=200, batch_size=1, train_steps_per_epoch=None, save_dir=tempfile.mkdtemp(), data_dir=None): train_data, _ = load_data(batch_size=batch_size, root_dir=data_dir) pipeline = fe.Pipeline( train_data=train_data, ops=[ ReadImage(inputs=["A", "B"], outputs=["A", "B"]), Normalize(inputs=["A", "B"], outputs=["real_A", "real_B"], mean=1.0, std=1.0, max_pixel_value=127.5), Resize(height=286, width=286, image_in="real_A", image_out="real_A", mode="train"), RandomCrop(height=256, width=256, image_in="real_A", image_out="real_A", mode="train"), Resize(height=286, width=286, image_in="real_B", image_out="real_B", mode="train"), RandomCrop(height=256, width=256, image_in="real_B", image_out="real_B", mode="train"), Sometimes(HorizontalFlip(image_in="real_A", image_out="real_A", mode="train")), Sometimes(HorizontalFlip(image_in="real_B", image_out="real_B", mode="train")), PlaceholderOp(outputs=("index_A", "buffer_A")), PlaceholderOp(outputs=("index_B", "buffer_B")) ]) g_AtoB = fe.build(model_fn=build_generator, model_name="g_AtoB", optimizer_fn=lambda: tf.optimizers.Adam(2e-4, 0.5)) g_BtoA = fe.build(model_fn=build_generator, model_name="g_BtoA", optimizer_fn=lambda: tf.optimizers.Adam(2e-4, 0.5)) d_A = fe.build(model_fn=build_discriminator, model_name="d_A", optimizer_fn=lambda: tf.optimizers.Adam(2e-4, 0.5)) d_B = fe.build(model_fn=build_discriminator, model_name="d_B", optimizer_fn=lambda: tf.optimizers.Adam(2e-4, 0.5)) network = fe.Network(ops=[ ModelOp(inputs="real_A", model=g_AtoB, outputs="fake_B"), ModelOp(inputs="real_B", model=g_BtoA, outputs="fake_A"), Buffer(image_in="fake_A", buffer_in="buffer_A", index_in="index_A", image_out="buffer_fake_A"), Buffer(image_in="fake_B", buffer_in="buffer_B", index_in="index_B", image_out="buffer_fake_B"), ModelOp(inputs="real_A", model=d_A, outputs="d_real_A"), ModelOp(inputs="fake_A", model=d_A, outputs="d_fake_A"), ModelOp(inputs="buffer_fake_A", model=d_A, outputs="buffer_d_fake_A"), ModelOp(inputs="real_B", model=d_B, outputs="d_real_B"), ModelOp(inputs="fake_B", model=d_B, outputs="d_fake_B"), ModelOp(inputs="buffer_fake_B", model=d_B, outputs="buffer_d_fake_B"), ModelOp(inputs="real_A", model=g_BtoA, outputs="same_A"), ModelOp(inputs="fake_B", model=g_BtoA, outputs="cycled_A"), ModelOp(inputs="real_B", model=g_AtoB, outputs="same_B"), ModelOp(inputs="fake_A", model=g_AtoB, outputs="cycled_B"), GLoss(inputs=("real_A", "d_fake_B", "cycled_A", "same_A"), weight=weight, outputs="g_AtoB_loss"), GLoss(inputs=("real_B", "d_fake_A", "cycled_B", "same_B"), weight=weight, outputs="g_BtoA_loss"), DLoss(inputs=("d_real_A", "buffer_d_fake_A"), outputs="d_A_loss"), DLoss(inputs=("d_real_B", "buffer_d_fake_B"), outputs="d_B_loss"), UpdateOp(model=g_AtoB, loss_name="g_AtoB_loss"), UpdateOp(model=g_BtoA, loss_name="g_BtoA_loss"), UpdateOp(model=d_A, loss_name="d_A_loss"), UpdateOp(model=d_B, loss_name="d_B_loss") ]) traces = [ BufferUpdate(input_name="fake_A", buffer_size=50, batch_size=batch_size, mode="train", output_name=["buffer_A", "index_A"]), BufferUpdate(input_name="fake_B", buffer_size=50, batch_size=batch_size, mode="train", output_name=["buffer_B", "index_B"]), ModelSaver(model=g_AtoB, save_dir=save_dir, frequency=5), ModelSaver(model=g_BtoA, save_dir=save_dir, frequency=5), LRScheduler(model=g_AtoB, lr_fn=lr_schedule), LRScheduler(model=g_BtoA, lr_fn=lr_schedule), LRScheduler(model=d_A, lr_fn=lr_schedule), LRScheduler(model=d_B, lr_fn=lr_schedule) ] estimator = fe.Estimator(network=network, pipeline=pipeline, epochs=epochs, traces=traces, train_steps_per_epoch=train_steps_per_epoch) return estimator
def finetune(pretrained_model, batch_size, epochs, model_dir=tempfile.mkdtemp(), train_steps_per_epoch=None, eval_steps_per_epoch=None): train_data, eval_data = cifair10.load_data() pipeline = fe.Pipeline(train_data=train_data, eval_data=eval_data, batch_size=batch_size, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes( HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1), ChannelTranspose(inputs="x", outputs="x") ]) model = fe.build(model_fn=lambda: ViTModel(num_classes=100, image_size=32, patch_size=4, num_layers=6, num_channels=3, em_dim=256, num_heads=8, ff_dim=512), optimizer_fn=lambda x: torch.optim.SGD( x, lr=0.01, momentum=0.9, weight_decay=1e-4)) # load the encoder's weight if hasattr(model, "module"): model.module.vit_encoder.load_state_dict( pretrained_model.module.vit_encoder.state_dict()) else: model.vit_encoder.load_state_dict( pretrained_model.vit_encoder.state_dict()) network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce", from_logits=True), UpdateOp(model=model, loss_name="ce") ]) traces = [ Accuracy(true_key="y", pred_key="y_pred"), BestModelSaver(model=model, save_dir=model_dir, metric="accuracy", save_best_mode="max") ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) estimator.fit(warmup=False)
"train": 128, "test": 32 }, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes( HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1), Onehot(inputs="y", outputs="y", mode="train", num_classes=10, label_smoothing=0.2)
def pretrain_model(epochs, batch_size, train_steps_per_epoch, save_dir): train_data, test_data = load_data() pipeline = fe.Pipeline( train_data=train_data, batch_size=batch_size, ops=[ PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), # augmentation 1 RandomCrop(32, 32, image_in="x", image_out="x_aug"), Sometimes(HorizontalFlip(image_in="x_aug", image_out="x_aug"), prob=0.5), Sometimes(ColorJitter(inputs="x_aug", outputs="x_aug", brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2), prob=0.8), Sometimes(ToGray(inputs="x_aug", outputs="x_aug"), prob=0.2), Sometimes(GaussianBlur(inputs="x_aug", outputs="x_aug", blur_limit=(3, 3), sigma_limit=(0.1, 2.0)), prob=0.5), ChannelTranspose(inputs="x_aug", outputs="x_aug"), ToFloat(inputs="x_aug", outputs="x_aug"), # augmentation 2 RandomCrop(32, 32, image_in="x", image_out="x_aug2"), Sometimes(HorizontalFlip(image_in="x_aug2", image_out="x_aug2"), prob=0.5), Sometimes(ColorJitter(inputs="x_aug2", outputs="x_aug2", brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2), prob=0.8), Sometimes(ToGray(inputs="x_aug2", outputs="x_aug2"), prob=0.2), Sometimes(GaussianBlur(inputs="x_aug2", outputs="x_aug2", blur_limit=(3, 3), sigma_limit=(0.1, 2.0)), prob=0.5), ChannelTranspose(inputs="x_aug2", outputs="x_aug2"), ToFloat(inputs="x_aug2", outputs="x_aug2") ]) model_con = fe.build(model_fn=lambda: ResNet9OneLayerHead(length=128), optimizer_fn="adam") network = fe.Network(ops=[ LambdaOp(lambda x, y: torch.cat([x, y], dim=0), inputs=["x_aug", "x_aug2"], outputs="x_com"), ModelOp(model=model_con, inputs="x_com", outputs="y_com"), LambdaOp(lambda x: torch.chunk(x, 2, dim=0), inputs="y_com", outputs=["y_pred", "y_pred2"], mode="train"), NTXentOp(arg1="y_pred", arg2="y_pred2", outputs=["NTXent", "logit", "label"], mode="train"), UpdateOp(model=model_con, loss_name="NTXent") ]) traces = [ Accuracy(true_key="label", pred_key="logit", mode="train", output_name="contrastive_accuracy"), ModelSaver(model=model_con, save_dir=save_dir) ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, train_steps_per_epoch=train_steps_per_epoch) estimator.fit() return model_con
def get_estimator(epochs=12, batch_size=512, save_dir=tempfile.mkdtemp()): # epoch 1-10, train on cifair100, epoch 11-end: train on cifar10 cifair10_train, cifari10_test = cifair10.load_data() cifair100_train, _ = cifair100.load_data() train_ds = EpochScheduler({1: cifair100_train, 11: cifair10_train}) pipeline = fe.Pipeline(train_data=train_ds, test_data=cifari10_test, batch_size=batch_size, ops=[ Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)), PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"), RandomCrop(32, 32, image_in="x", image_out="x", mode="train"), Sometimes( HorizontalFlip(image_in="x", image_out="x", mode="train")), CoarseDropout(inputs="x", outputs="x", mode="train", max_holes=1), ChannelTranspose(inputs="x", outputs="x") ]) # step 2: prepare network backbone = fe.build(model_fn=lambda: Backbone(input_size=(3, 32, 32)), optimizer_fn="adam") cls_head_cifar100 = fe.build(model_fn=lambda: Classifier(classes=100), optimizer_fn="adam") cls_head_cifar10 = fe.build(model_fn=lambda: Classifier(classes=10), optimizer_fn="adam") # if you want to save the final cifar10 model, you can build a model then provide it to ModelSaver # final_model_cifar10 = fe.build(model_fn=lambda: MyModel(backbone, cls_head_cifar10), optimizer_fn=None) # epoch 1-10: train backbone and cls_head_cifar100, epoch 11-end: train cls_head_cifar10 only ModelOp_cls_head = EpochScheduler({ 1: ModelOp(model=cls_head_cifar100, inputs="feature", outputs="y_pred"), 11: ModelOp(model=cls_head_cifar10, inputs="feature", outputs="y_pred"), }) UpdateOp_backbone = EpochScheduler({ 1: UpdateOp(model=backbone, loss_name="ce"), 11: None }) UpdateOp_cls_head = EpochScheduler({ 1: UpdateOp(model=cls_head_cifar100, loss_name="ce"), 11: UpdateOp(model=cls_head_cifar10, loss_name="ce") }) network = fe.Network(ops=[ ModelOp(model=backbone, inputs="x", outputs="feature"), ModelOp_cls_head, CrossEntropy(inputs=("y_pred", "y"), outputs="ce", from_logits=True), UpdateOp_backbone, UpdateOp_cls_head ]) traces = [Accuracy(true_key="y", pred_key="y_pred")] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces) return estimator