def test_pipeline_get_mode_epoch_scheduler(self): train_data = EpochScheduler(epoch_dict={ 1: self.sample_torch_dataset, 2: None }) eval_data = EpochScheduler(epoch_dict={ 1: self.sample_torch_dataset, 3: None }) test_data = EpochScheduler(epoch_dict={ 1: self.sample_torch_dataset, 4: None }) pipeline = fe.Pipeline(train_data=train_data, eval_data=eval_data, test_data=test_data) with self.subTest(epoch=1): modes = pipeline.get_modes(epoch=1) self.assertEqual(modes, {"train", "eval", "test"}) with self.subTest(epoch=2): # pdb.set_trace() modes = pipeline.get_modes(epoch=2) self.assertEqual(modes, {"eval", "test"}) with self.subTest(epoch=3): modes = pipeline.get_modes(epoch=3) self.assertEqual(modes, {"test"}) with self.subTest(epoch=4): modes = pipeline.get_modes(epoch=4) self.assertEqual(modes, set())
def test_pipeline_init_tf_dataset_torch_dataloader_scheduler_have_op_batch_size_num_process( self): dataset = { "tf_dataset": self.sample_tf_dataset, "dataloader": self.sample_torch_dataloader } for data_type, data in dataset.items(): scheduler_dataset = EpochScheduler(epoch_dict={1: data, 2: None}) with self.subTest("{} with numpyop".format(data_type)): with self.assertRaises(AssertionError): pipeline = fe.Pipeline(train_data=scheduler_dataset, eval_data=scheduler_dataset, test_data=scheduler_dataset, ops=[self.sample_numpy_op]) with self.subTest("{} with batch_size not None".format(data_type)): with self.assertRaises(AssertionError): pipeline = fe.Pipeline(train_data=scheduler_dataset, eval_data=scheduler_dataset, test_data=scheduler_dataset, batch_size=10) with self.subTest( "{} with num_process not None".format(data_type)): with self.assertRaises(AssertionError): pipeline = fe.Pipeline(train_data=scheduler_dataset, eval_data=scheduler_dataset, test_data=scheduler_dataset, num_process=1)
def run_test(mixed_precision, merge_grad, gradient): lr = 0.1 lr2 = 0.01 lr3 = 0.001 pipeline = fe.Pipeline(train_data=self.train_data, batch_size=4, ops=[ExpandDims(inputs="x", outputs="x"), Minmax(inputs="x", outputs="x")]) optimizer_fn = EpochScheduler({ 1: lambda: tf.optimizers.SGD(lr), 2: lambda: tf.optimizers.SGD(lr2), 3: lambda: tf.optimizers.SGD(lr3) }) model = fe.build(model_fn=LeNet_tf, optimizer_fn=optimizer_fn, mixed_precision=mixed_precision) network = fe.Network(ops=[ ModelOp(model=model, inputs="x", outputs="y_pred"), CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), GradientOp(model=model, finals="ce", outputs="grad"), UpdateOp(model=model, loss_name="ce", gradients=gradient, merge_grad=merge_grad), ]) traces = [ CheckNetworkWeight(model=model, grad_key="grad", merge_grad=merge_grad, test_self=self, framework="tf", lrs=[lr, lr2, lr3], work_intervals=[[1, 2], [2, 3], [3, 4]]) ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=3, traces=traces, train_steps_per_epoch=2) estimator.fit(warmup=False)
def test_pipeline_init_torch_dataset_scheduler_have_op_batch_size_num_process( self): data = EpochScheduler(epoch_dict={ 1: self.sample_torch_dataset, 2: None }) with self.subTest("with numpyop"): try: pipeline = fe.Pipeline(train_data=data, eval_data=data, test_data=data, ops=[self.sample_numpy_op]) except: self.fail("exception occur") with self.subTest("with batch_size not None"): try: pipeline = fe.Pipeline(train_data=data, eval_data=data, test_data=data, batch_size=10) except: self.fail("exception occur") with self.subTest("with num_process not None"): try: pipeline = fe.Pipeline(train_data=data, eval_data=data, test_data=data, num_process=1) except: self.fail("exception occur")
def test_pipeline_get_epochs_with_data_with_scheduler(self): dataset = EpochScheduler(epoch_dict={ 1: self.sample_torch_dataset, 3: None }) pipeline = fe.Pipeline(train_data=dataset) epochs = pipeline.get_epochs_with_data(total_epochs=5, mode="train") self.assertEqual(epochs, {1, 2})
def instantiate_system(): system = sample_system_object_torch() system.pipeline.ops = [ EpochScheduler(epoch_dict={ 1: TestNumpyOp(inputs="x", outputs="x", mode="train", var=1) }) ] return system
def test_network_build_check_load_weight_from_path(self): with unittest.mock.patch("fastestimator.network.load_model") as fake: optimizer = EpochScheduler(epoch_dict={1: "adam", 10: "sgd"}) model = fe.build(model_fn=one_layer_tf_model, optimizer_fn=tf.optimizers.Adadelta, weights_path="example_path") _, weight = fake.call_args[0] self.assertEqual(weight, "example_path")
def test_network_fe_compile_optimizer_epochscheduler_tf_check_load_wight( self): with unittest.mock.patch("fastestimator.network.load_model") as fake: optimizer = EpochScheduler(epoch_dict={1: "adam", 10: "sgd"}) model = fe.network._fe_compile(model=self.tf_model, optimizer_fn=optimizer, weight="example_path", name="test", mixed_precision=False) _, weight = fake.call_args[0] self.assertEqual(weight, "example_path")
def instantiate_system(): system = sample_system_object() x_train = np.ones((2, 28, 28, 3)) y_train = np.ones((2, )) train_data = EpochScheduler( epoch_dict={ 1: TestDataset(data={ 'x': x_train, 'y': y_train }, var=1) }) system.pipeline = fe.Pipeline(train_data=train_data, batch_size=1) return system
def test_pipeline_get_result_dict_batch_size_scheduler(self): pipeline = fe.Pipeline(train_data=self.sample_torch_dataset, ops=NumpyOpAdd1(inputs="x", outputs="y"), batch_size=EpochScheduler({1: { "train": 1 }})) data = pipeline.get_results(mode="train", epoch=1) data["x"] = data["x"].numpy() data["y"] = data["y"].numpy() ans = { "x": np.array([[0]], dtype=np.float32), "y": np.array([[1]], dtype=np.float32) } self.assertTrue(is_equal(data, ans))
def test_network_fe_compile_optimizer_epochscheduler_torch_check_optimizer( self): optimizer = EpochScheduler(epoch_dict={1: "adam", 10: "sgd"}) model = fe.network._fe_compile(model=self.torch_model, optimizer_fn=optimizer, weight=None, name=None, mixed_precision=False) with self.subTest("check optimizer instantiation"): for optimizer in model.optimizer.get_all_values(): self.assertIsInstance(optimizer, torch.optim.Optimizer) with self.subTest("check current optimizer"): self.assertIsInstance(model.current_optimizer, torch.optim.Adam)
def instantiate_system(): system = sample_system_object_torch() model = fe.build(model_fn=fe.architecture.pytorch.LeNet, optimizer_fn='adam', model_name='torch') system.network = fe.Network(ops=[ EpochScheduler( epoch_dict={ 1: TestTensorOp(inputs="x_out", outputs="x_out", mode="train", var=1) }), ModelOp(model=model, inputs="x_out", outputs="y_pred") ]) return system
def test_network_fe_compile_optimizer_epochscheduler_tf_check_all(self): optimizer = EpochScheduler(epoch_dict={1: "adam", 10: "sgd"}) model = fe.network._fe_compile(model=self.tf_model, optimizer_fn=optimizer, weight=None, name="test", mixed_precision=False) with self.subTest("check optimizer instantiation"): for optimizer in model.optimizer.get_all_values(): self.assertIsInstance(optimizer, tf.optimizers.Optimizer) with self.subTest("check current_optimizer"): self.assertIsInstance(model.current_optimizer, tf.optimizers.Adam) with self.subTest("check model_name"): self.assertEqual(model.model_name, "test") with self.subTest("check fe_compiled"): self.assertEqual(model.fe_compiled, True)
def instantiate_system(): system = sample_system_object() x_train = np.ones((2, 28, 28, 3)) y_train = np.ones((2, )) data = { 0: { 'x': x_train[0], 'y': y_train[0] }, 1: { 'x': x_train[1], 'y': y_train[1] } } train_data = EpochScheduler( epoch_dict={ 1: TestNonTraceableDataset(data=data, var=3), 2: TestNonTraceableDataset(data=data, var=7), 3: None }) system.pipeline = fe.Pipeline(train_data=train_data, batch_size=1) return system
def instantiate_system(): system = sample_system_object_torch() system.traces.append( EpochScheduler(epoch_dict={1: TestTrace(var1=1)})) return system
def setUpClass(cls): cls.scheduler = RepeatScheduler(['a', 'b', 'c', 'c']) cls.epoch_scheduler = EpochScheduler({1: 'a', 2: 'b', 30: None}) cls.actual_current_items = ['a', 'b', 'c', 'c'] cls.signature_epochs = [1, 2, 3, 5, 30, 31, 33]
def setUpClass(cls): cls.input_data = {1: "a", 3: "b", 4: None, 100: "c"} cls.values = ['a', 'b', None, 'c'] cls.scheduler = EpochScheduler(cls.input_data)
def get_estimator(target_size=128, epochs=55, save_dir=tempfile.mkdtemp(), max_train_steps_per_epoch=None, data_dir=None): # assert growth parameters num_grow = np.log2(target_size) - 2 assert num_grow >= 1 and num_grow % 1 == 0, "need exponential of 2 and greater than 8 as target size" num_phases = int(2 * num_grow + 1) assert epochs % num_phases == 0, "epoch must be multiple of {} for size {}".format( num_phases, target_size) num_grow, phase_length = int(num_grow), int(epochs / num_phases) event_epoch = [1, 1 + phase_length] + [ phase_length * (2 * i + 1) + 1 for i in range(1, num_grow) ] event_size = [4] + [2**(i + 3) for i in range(num_grow)] # set up data schedules dataset = nih_chestxray.load_data(root_dir=data_dir) resize_map = { epoch: Resize(image_in="x", image_out="x", height=size, width=size) for (epoch, size) in zip(event_epoch, event_size) } resize_low_res_map1 = { epoch: Resize(image_in="x", image_out="x_low_res", height=size // 2, width=size // 2) for (epoch, size) in zip(event_epoch, event_size) } resize_low_res_map2 = { epoch: Resize(image_in="x_low_res", image_out="x_low_res", height=size, width=size) for (epoch, size) in zip(event_epoch, event_size) } batch_size_map = { epoch: 512 // size * get_num_devices() if size <= 128 else 4 * get_num_devices() for (epoch, size) in zip(event_epoch, event_size) } batch_scheduler = EpochScheduler(epoch_dict=batch_size_map) pipeline = fe.Pipeline( batch_size=batch_scheduler, train_data=dataset, drop_last=True, ops=[ ReadImage(inputs="x", outputs="x", color_flag='gray'), EpochScheduler(epoch_dict=resize_map), EpochScheduler(epoch_dict=resize_low_res_map1), EpochScheduler(epoch_dict=resize_low_res_map2), Normalize(inputs=["x", "x_low_res"], outputs=["x", "x_low_res"], mean=1.0, std=1.0, max_pixel_value=127.5), LambdaOp(fn=lambda: np.random.normal(size=[512]).astype('float32'), outputs="z") ]) # now model schedule fade_in_alpha = tf.Variable(initial_value=1.0, dtype='float32', trainable=False) d_models = fe.build( model_fn=lambda: build_D(fade_in_alpha, target_resolution=int(np.log2(target_size)), num_channels=1), optimizer_fn=[ lambda: Adam(0.001, beta_1=0.0, beta_2=0.99, epsilon=1e-8) ] * len(event_size), model_name=["d_{}".format(size) for size in event_size]) g_models = fe.build( model_fn=lambda: build_G(fade_in_alpha, target_resolution=int(np.log2(target_size)), num_channels=1), optimizer_fn=[ lambda: Adam(0.001, beta_1=0.0, beta_2=0.99, epsilon=1e-8) ] * len(event_size) + [None], model_name=["g_{}".format(size) for size in event_size] + ["G"]) fake_img_map = { epoch: ModelOp(inputs="z", outputs="x_fake", model=model) for (epoch, model) in zip(event_epoch, g_models[:-1]) } fake_score_map = { epoch: ModelOp(inputs="x_fake", outputs="fake_score", model=model) for (epoch, model) in zip(event_epoch, d_models) } real_score_map = { epoch: ModelOp(inputs="x_blend", outputs="real_score", model=model) for (epoch, model) in zip(event_epoch, d_models) } interp_score_map = { epoch: ModelOp(inputs="x_interp", outputs="interp_score", model=model) for (epoch, model) in zip(event_epoch, d_models) } g_update_map = { epoch: UpdateOp(loss_name="gloss", model=model) for (epoch, model) in zip(event_epoch, g_models[:-1]) } d_update_map = { epoch: UpdateOp(loss_name="dloss", model=model) for (epoch, model) in zip(event_epoch, d_models) } network = fe.Network(ops=[ EpochScheduler(fake_img_map), EpochScheduler(fake_score_map), ImageBlender( alpha=fade_in_alpha, inputs=("x", "x_low_res"), outputs="x_blend"), EpochScheduler(real_score_map), Interpolate(inputs=("x_fake", "x"), outputs="x_interp"), EpochScheduler(interp_score_map), GradientPenalty(inputs=("x_interp", "interp_score"), outputs="gp"), GLoss(inputs="fake_score", outputs="gloss"), DLoss(inputs=("real_score", "fake_score", "gp"), outputs="dloss"), EpochScheduler(g_update_map), EpochScheduler(d_update_map) ]) traces = [ AlphaController(alpha=fade_in_alpha, fade_start_epochs=event_epoch[1:], duration=phase_length, batch_scheduler=batch_scheduler, num_examples=len(dataset)), ModelSaver(model=g_models[-1], save_dir=save_dir, frequency=phase_length), ImageSaving(epoch_model_map={ epoch - 1: model for (epoch, model) in zip(event_epoch[1:] + [epochs + 1], g_models[:-1]) }, save_dir=save_dir) ] estimator = fe.Estimator( pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch) return estimator
def get_estimator(data_dir=None, model_dir=tempfile.mkdtemp(), epochs=200, batch_size_per_gpu=32, train_steps_per_epoch=None, eval_steps_per_epoch=None): num_device = get_num_devices() train_ds, val_ds = mscoco.load_data(root_dir=data_dir) train_ds = PreMosaicDataset(mscoco_ds=train_ds) batch_size = num_device * batch_size_per_gpu pipeline = fe.Pipeline( train_data=train_ds, eval_data=val_ds, ops=[ ReadImage(inputs=("image1", "image2", "image3", "image4"), outputs=("image1", "image2", "image3", "image4"), mode="train"), ReadImage(inputs="image", outputs="image", mode="eval"), LongestMaxSize(max_size=640, image_in="image1", bbox_in="bbox1", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image2", bbox_in="bbox2", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image3", bbox_in="bbox3", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image4", bbox_in="bbox4", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="eval"), PadIfNeeded(min_height=640, min_width=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="eval", border_mode=cv2.BORDER_CONSTANT, value=(114, 114, 114)), CombineMosaic(inputs=("image1", "image2", "image3", "image4", "bbox1", "bbox2", "bbox3", "bbox4"), outputs=("image", "bbox"), mode="train"), CenterCrop(height=640, width=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), Sometimes( HorizontalFlip(image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="train")), HSVAugment(inputs="image", outputs="image", mode="train"), ToArray(inputs="bbox", outputs="bbox", dtype="float32"), CategoryID2ClassID(inputs="bbox", outputs="bbox"), GTBox(inputs="bbox", outputs=("gt_sbbox", "gt_mbbox", "gt_lbbox"), image_size=640), Delete(keys=("image1", "image2", "image3", "image4", "bbox1", "bbox2", "bbox3", "bbox4", "bbox"), mode="train"), Delete(keys="image_id", mode="eval"), Batch(batch_size=batch_size, pad_value=0) ]) init_lr = 1e-2 / 64 * batch_size model = fe.build( lambda: YoloV5(w=640, h=640, c=3), optimizer_fn=lambda x: torch.optim.SGD( x, lr=init_lr, momentum=0.937, weight_decay=0.0005, nesterov=True), mixed_precision=True) network = fe.Network(ops=[ RescaleTranspose(inputs="image", outputs="image"), ModelOp(model=model, inputs="image", outputs=("pred_s", "pred_m", "pred_l")), DecodePred(inputs=("pred_s", "pred_m", "pred_l"), outputs=("pred_s", "pred_m", "pred_l")), ComputeLoss(inputs=("pred_s", "gt_sbbox"), outputs=("sbbox_loss", "sconf_loss", "scls_loss")), ComputeLoss(inputs=("pred_m", "gt_mbbox"), outputs=("mbbox_loss", "mconf_loss", "mcls_loss")), ComputeLoss(inputs=("pred_l", "gt_lbbox"), outputs=("lbbox_loss", "lconf_loss", "lcls_loss")), Average(inputs=("sbbox_loss", "mbbox_loss", "lbbox_loss"), outputs="bbox_loss"), Average(inputs=("sconf_loss", "mconf_loss", "lconf_loss"), outputs="conf_loss"), Average(inputs=("scls_loss", "mcls_loss", "lcls_loss"), outputs="cls_loss"), Average(inputs=("bbox_loss", "conf_loss", "cls_loss"), outputs="total_loss"), PredictBox(width=640, height=640, inputs=("pred_s", "pred_m", "pred_l"), outputs="box_pred", mode="eval"), UpdateOp(model=model, loss_name="total_loss") ]) traces = [ MeanAveragePrecision(num_classes=80, true_key='bbox', pred_key='box_pred', mode="eval"), BestModelSaver(model=model, save_dir=model_dir, metric='mAP', save_best_mode="max") ] lr_schedule = { 1: LRScheduler(model=model, lr_fn=lambda step: lr_schedule_warmup( step, train_steps_epoch=np.ceil(len(train_ds) / batch_size), init_lr=init_lr)), 4: LRScheduler(model=model, lr_fn=lambda epoch: cosine_decay(epoch, cycle_length=epochs - 3, init_lr=init_lr, min_lr=init_lr / 100, start=4)) } traces.append(EpochScheduler(lr_schedule)) estimator = fe.Estimator( pipeline=pipeline, network=network, epochs=epochs, traces=traces, monitor_names=["bbox_loss", "conf_loss", "cls_loss"], train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) return estimator
def get_estimator(data_dir=None, epochs=12, batch_size_per_gpu=4, im_size=1344, model_dir=tempfile.mkdtemp(), train_steps_per_epoch=None, eval_steps_per_epoch=None): assert im_size % 32 == 0, "im_size must be a multiple of 32" num_device = get_num_devices() train_ds, val_ds = mscoco.load_data(root_dir=data_dir, load_masks=True) batch_size = num_device * batch_size_per_gpu pipeline = fe.Pipeline( train_data=train_ds, eval_data=val_ds, test_data=val_ds, ops=[ ReadImage(inputs="image", outputs="image"), MergeMask(inputs="mask", outputs="mask"), GetImageSize(inputs="image", outputs="imsize", mode="test"), LongestMaxSize(max_size=im_size, image_in="image", mask_in="mask", bbox_in="bbox", bbox_params="coco"), RemoveIf(fn=lambda x: len(x) == 0, inputs="bbox"), PadIfNeeded(min_height=im_size, min_width=im_size, image_in="image", mask_in="mask", bbox_in="bbox", bbox_params="coco", border_mode=cv2.BORDER_CONSTANT, value=0), Sometimes( HorizontalFlip(image_in="image", mask_in="mask", bbox_in="bbox", bbox_params="coco", mode="train")), Resize(height=im_size // 4, width=im_size // 4, image_in='mask'), # downscale mask for memory efficiency Gt2Target(inputs=("mask", "bbox"), outputs=("gt_match", "mask", "classes")), Delete(keys="bbox"), Delete(keys="image_id", mode="!test"), Batch(batch_size=batch_size, pad_value=0) ], num_process=8 * num_device) init_lr = 1e-2 / 16 * batch_size model = fe.build( model_fn=SoloV2, optimizer_fn=lambda x: torch.optim.SGD(x, lr=init_lr, momentum=0.9)) network = fe.Network(ops=[ Normalize(inputs="image", outputs="image", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), Permute(inputs="image", outputs='image'), ModelOp(model=model, inputs="image", outputs=("feat_seg", "feat_cls_list", "feat_kernel_list")), LambdaOp(fn=lambda x: x, inputs="feat_cls_list", outputs=("cls1", "cls2", "cls3", "cls4", "cls5")), LambdaOp(fn=lambda x: x, inputs="feat_kernel_list", outputs=("k1", "k2", "k3", "k4", "k5")), Solov2Loss(0, 40, inputs=("mask", "classes", "gt_match", "feat_seg", "cls1", "k1"), outputs=("l_c1", "l_s1")), Solov2Loss(1, 36, inputs=("mask", "classes", "gt_match", "feat_seg", "cls2", "k2"), outputs=("l_c2", "l_s2")), Solov2Loss(2, 24, inputs=("mask", "classes", "gt_match", "feat_seg", "cls3", "k3"), outputs=("l_c3", "l_s3")), Solov2Loss(3, 16, inputs=("mask", "classes", "gt_match", "feat_seg", "cls4", "k4"), outputs=("l_c4", "l_s4")), Solov2Loss(4, 12, inputs=("mask", "classes", "gt_match", "feat_seg", "cls5", "k5"), outputs=("l_c5", "l_s5")), CombineLoss(inputs=("l_c1", "l_s1", "l_c2", "l_s2", "l_c3", "l_s3", "l_c4", "l_s4", "l_c5", "l_s5"), outputs=("total_loss", "cls_loss", "seg_loss")), L2Regularizaton(inputs="total_loss", outputs="total_loss_l2", model=model, beta=1e-5, mode="train"), UpdateOp(model=model, loss_name="total_loss_l2"), PointsNMS(inputs="feat_cls_list", outputs="feat_cls_list", mode="test"), Predict(inputs=("feat_seg", "feat_cls_list", "feat_kernel_list"), outputs=("seg_preds", "cate_scores", "cate_labels"), mode="test") ]) train_steps_epoch = int(np.ceil(len(train_ds) / batch_size)) lr_schedule = { 1: LRScheduler( model=model, lr_fn=lambda step: lr_schedule_warmup(step, init_lr=init_lr)), 2: LRScheduler( model=model, lr_fn=lambda step: cosine_decay(step, cycle_length=train_steps_epoch * (epochs - 1), init_lr=init_lr, min_lr=init_lr / 100, start=train_steps_epoch)) } traces = [ EpochScheduler(lr_schedule), COCOMaskmAP(data_dir=val_ds.root_dir, inputs=("seg_preds", "cate_scores", "cate_labels", "image_id", "imsize"), mode="test"), BestModelSaver(model=model, save_dir=model_dir, metric="total_loss") ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, monitor_names=("cls_loss", "seg_loss"), train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) return estimator