def get_estimator(batch_size=4, epochs=2, max_train_steps_per_epoch=None, log_steps=100, style_weight=5.0, content_weight=1.0, tv_weight=1e-4, save_dir=tempfile.mkdtemp(), style_img_path='Vassily_Kandinsky,_1913_-_Composition_7.jpg', data_dir=None): train_data, _ = mscoco.load_data(root_dir=data_dir, load_bboxes=False, load_masks=False, load_captions=False) device = "cuda" if torch.cuda.is_available() else "cpu" style_img = cv2.imread(style_img_path) assert style_img is not None, "cannot load the style image, please go to the folder with style image" style_img = cv2.resize(style_img, (256, 256)) style_img = (style_img.astype(np.float32) - 127.5) / 127.5 pipeline = fe.Pipeline( train_data=train_data, batch_size=batch_size, ops=[ ReadImage(inputs="image", outputs="image"), Normalize(inputs="image", outputs="image", mean=1.0, std=1.0, max_pixel_value=127.5), Resize(height=256, width=256, image_in="image", image_out="image"), LambdaOp(fn=lambda: style_img, outputs="style_image"), ChannelTranspose(inputs=["image", "style_image"], outputs=["image", "style_image"]) ]) model = fe.build(model_fn=StyleTransferNet, model_name="style_transfer_net", optimizer_fn=lambda x: torch.optim.Adam(x, lr=1e-3)) network = fe.Network(ops=[ ModelOp(inputs="image", model=model, outputs="image_out"), ExtractVGGFeatures(inputs="style_image", outputs="y_style", device=device), ExtractVGGFeatures(inputs="image", outputs="y_content", device=device), ExtractVGGFeatures(inputs="image_out", outputs="y_pred", device=device), StyleContentLoss(style_weight=style_weight, content_weight=content_weight, tv_weight=tv_weight, inputs=('y_pred', 'y_style', 'y_content', 'image_out'), outputs='loss'), UpdateOp(model=model, loss_name="loss") ]) estimator = fe.Estimator(network=network, pipeline=pipeline, traces=ModelSaver(model=model, save_dir=save_dir, frequency=1), epochs=epochs, max_train_steps_per_epoch=max_train_steps_per_epoch, log_steps=log_steps) return estimator
def get_estimator(data_dir=None, model_dir=tempfile.mkdtemp(), epochs=200, batch_size_per_gpu=32, train_steps_per_epoch=None, eval_steps_per_epoch=None): num_device = get_num_devices() train_ds, val_ds = mscoco.load_data(root_dir=data_dir) train_ds = PreMosaicDataset(mscoco_ds=train_ds) batch_size = num_device * batch_size_per_gpu pipeline = fe.Pipeline( train_data=train_ds, eval_data=val_ds, ops=[ ReadImage(inputs=("image1", "image2", "image3", "image4"), outputs=("image1", "image2", "image3", "image4"), mode="train"), ReadImage(inputs="image", outputs="image", mode="eval"), LongestMaxSize(max_size=640, image_in="image1", bbox_in="bbox1", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image2", bbox_in="bbox2", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image3", bbox_in="bbox3", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image4", bbox_in="bbox4", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), LongestMaxSize(max_size=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="eval"), PadIfNeeded(min_height=640, min_width=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="eval", border_mode=cv2.BORDER_CONSTANT, value=(114, 114, 114)), CombineMosaic(inputs=("image1", "image2", "image3", "image4", "bbox1", "bbox2", "bbox3", "bbox4"), outputs=("image", "bbox"), mode="train"), CenterCrop(height=640, width=640, image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="train"), Sometimes( HorizontalFlip(image_in="image", bbox_in="bbox", bbox_params=BboxParams("coco", min_area=1.0), mode="train")), HSVAugment(inputs="image", outputs="image", mode="train"), ToArray(inputs="bbox", outputs="bbox", dtype="float32"), CategoryID2ClassID(inputs="bbox", outputs="bbox"), GTBox(inputs="bbox", outputs=("gt_sbbox", "gt_mbbox", "gt_lbbox"), image_size=640), Delete(keys=("image1", "image2", "image3", "image4", "bbox1", "bbox2", "bbox3", "bbox4", "bbox"), mode="train"), Delete(keys="image_id", mode="eval"), Batch(batch_size=batch_size, pad_value=0) ]) init_lr = 1e-2 / 64 * batch_size model = fe.build( lambda: YoloV5(w=640, h=640, c=3), optimizer_fn=lambda x: torch.optim.SGD( x, lr=init_lr, momentum=0.937, weight_decay=0.0005, nesterov=True), mixed_precision=True) network = fe.Network(ops=[ RescaleTranspose(inputs="image", outputs="image"), ModelOp(model=model, inputs="image", outputs=("pred_s", "pred_m", "pred_l")), DecodePred(inputs=("pred_s", "pred_m", "pred_l"), outputs=("pred_s", "pred_m", "pred_l")), ComputeLoss(inputs=("pred_s", "gt_sbbox"), outputs=("sbbox_loss", "sconf_loss", "scls_loss")), ComputeLoss(inputs=("pred_m", "gt_mbbox"), outputs=("mbbox_loss", "mconf_loss", "mcls_loss")), ComputeLoss(inputs=("pred_l", "gt_lbbox"), outputs=("lbbox_loss", "lconf_loss", "lcls_loss")), Average(inputs=("sbbox_loss", "mbbox_loss", "lbbox_loss"), outputs="bbox_loss"), Average(inputs=("sconf_loss", "mconf_loss", "lconf_loss"), outputs="conf_loss"), Average(inputs=("scls_loss", "mcls_loss", "lcls_loss"), outputs="cls_loss"), Average(inputs=("bbox_loss", "conf_loss", "cls_loss"), outputs="total_loss"), PredictBox(width=640, height=640, inputs=("pred_s", "pred_m", "pred_l"), outputs="box_pred", mode="eval"), UpdateOp(model=model, loss_name="total_loss") ]) traces = [ MeanAveragePrecision(num_classes=80, true_key='bbox', pred_key='box_pred', mode="eval"), BestModelSaver(model=model, save_dir=model_dir, metric='mAP', save_best_mode="max") ] lr_schedule = { 1: LRScheduler(model=model, lr_fn=lambda step: lr_schedule_warmup( step, train_steps_epoch=np.ceil(len(train_ds) / batch_size), init_lr=init_lr)), 4: LRScheduler(model=model, lr_fn=lambda epoch: cosine_decay(epoch, cycle_length=epochs - 3, init_lr=init_lr, min_lr=init_lr / 100, start=4)) } traces.append(EpochScheduler(lr_schedule)) estimator = fe.Estimator( pipeline=pipeline, network=network, epochs=epochs, traces=traces, monitor_names=["bbox_loss", "conf_loss", "cls_loss"], train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) return estimator
def get_estimator(data_dir=None, model_dir=tempfile.mkdtemp(), batch_size=16, epochs=13, max_train_steps_per_epoch=None, max_eval_steps_per_epoch=None, image_size=512, num_classes=90): # pipeline train_ds, eval_ds = mscoco.load_data(root_dir=data_dir) pipeline = fe.Pipeline( train_data=train_ds, eval_data=eval_ds, batch_size=batch_size, ops=[ ReadImage(inputs="image", outputs="image"), LongestMaxSize(image_size, image_in="image", image_out="image", bbox_in="bbox", bbox_out="bbox", bbox_params=BboxParams("coco", min_area=1.0)), PadIfNeeded( image_size, image_size, border_mode=cv2.BORDER_CONSTANT, image_in="image", image_out="image", bbox_in="bbox", bbox_out="bbox", bbox_params=BboxParams("coco", min_area=1.0), ), Sometimes( HorizontalFlip(mode="train", image_in="image", image_out="image", bbox_in="bbox", bbox_out="bbox", bbox_params='coco')), # normalize from uint8 to [-1, 1] Normalize(inputs="image", outputs="image", mean=1.0, std=1.0, max_pixel_value=127.5), ShiftLabel(inputs="bbox", outputs="bbox"), AnchorBox(inputs="bbox", outputs="anchorbox", width=image_size, height=image_size), ChannelTranspose(inputs="image", outputs="image") ], pad_value=0) # network model = fe.build(model_fn=lambda: RetinaNet(num_classes=num_classes), optimizer_fn=lambda x: torch.optim.SGD( x, lr=2e-4, momentum=0.9, weight_decay=0.0001)) network = fe.Network(ops=[ ModelOp(model=model, inputs="image", outputs=["cls_pred", "loc_pred"]), RetinaLoss(inputs=["anchorbox", "cls_pred", "loc_pred"], outputs=["total_loss", "focal_loss", "l1_loss"]), UpdateOp(model=model, loss_name="total_loss"), PredictBox(input_shape=(image_size, image_size, 3), inputs=["cls_pred", "loc_pred"], outputs="pred", mode="eval") ]) # estimator traces = [ LRScheduler(model=model, lr_fn=lr_fn), BestModelSaver(model=model, save_dir=model_dir, metric='mAP', save_best_mode="max"), MeanAveragePrecision(num_classes=num_classes, true_key='bbox', pred_key='pred', mode="eval") ] estimator = fe.Estimator( pipeline=pipeline, network=network, epochs=epochs, traces=traces, max_train_steps_per_epoch=max_train_steps_per_epoch, max_eval_steps_per_epoch=max_eval_steps_per_epoch, monitor_names=["l1_loss", "focal_loss"]) return estimator
def get_estimator(data_dir=None, epochs=12, batch_size_per_gpu=4, im_size=1344, model_dir=tempfile.mkdtemp(), train_steps_per_epoch=None, eval_steps_per_epoch=None): assert im_size % 32 == 0, "im_size must be a multiple of 32" num_device = get_num_devices() train_ds, val_ds = mscoco.load_data(root_dir=data_dir, load_masks=True) batch_size = num_device * batch_size_per_gpu pipeline = fe.Pipeline( train_data=train_ds, eval_data=val_ds, test_data=val_ds, ops=[ ReadImage(inputs="image", outputs="image"), MergeMask(inputs="mask", outputs="mask"), GetImageSize(inputs="image", outputs="imsize", mode="test"), LongestMaxSize(max_size=im_size, image_in="image", mask_in="mask", bbox_in="bbox", bbox_params="coco"), RemoveIf(fn=lambda x: len(x) == 0, inputs="bbox"), PadIfNeeded(min_height=im_size, min_width=im_size, image_in="image", mask_in="mask", bbox_in="bbox", bbox_params="coco", border_mode=cv2.BORDER_CONSTANT, value=0), Sometimes( HorizontalFlip(image_in="image", mask_in="mask", bbox_in="bbox", bbox_params="coco", mode="train")), Resize(height=im_size // 4, width=im_size // 4, image_in='mask'), # downscale mask for memory efficiency Gt2Target(inputs=("mask", "bbox"), outputs=("gt_match", "mask", "classes")), Delete(keys="bbox"), Delete(keys="image_id", mode="!test"), Batch(batch_size=batch_size, pad_value=0) ], num_process=8 * num_device) init_lr = 1e-2 / 16 * batch_size model = fe.build( model_fn=SoloV2, optimizer_fn=lambda x: torch.optim.SGD(x, lr=init_lr, momentum=0.9)) network = fe.Network(ops=[ Normalize(inputs="image", outputs="image", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), Permute(inputs="image", outputs='image'), ModelOp(model=model, inputs="image", outputs=("feat_seg", "feat_cls_list", "feat_kernel_list")), LambdaOp(fn=lambda x: x, inputs="feat_cls_list", outputs=("cls1", "cls2", "cls3", "cls4", "cls5")), LambdaOp(fn=lambda x: x, inputs="feat_kernel_list", outputs=("k1", "k2", "k3", "k4", "k5")), Solov2Loss(0, 40, inputs=("mask", "classes", "gt_match", "feat_seg", "cls1", "k1"), outputs=("l_c1", "l_s1")), Solov2Loss(1, 36, inputs=("mask", "classes", "gt_match", "feat_seg", "cls2", "k2"), outputs=("l_c2", "l_s2")), Solov2Loss(2, 24, inputs=("mask", "classes", "gt_match", "feat_seg", "cls3", "k3"), outputs=("l_c3", "l_s3")), Solov2Loss(3, 16, inputs=("mask", "classes", "gt_match", "feat_seg", "cls4", "k4"), outputs=("l_c4", "l_s4")), Solov2Loss(4, 12, inputs=("mask", "classes", "gt_match", "feat_seg", "cls5", "k5"), outputs=("l_c5", "l_s5")), CombineLoss(inputs=("l_c1", "l_s1", "l_c2", "l_s2", "l_c3", "l_s3", "l_c4", "l_s4", "l_c5", "l_s5"), outputs=("total_loss", "cls_loss", "seg_loss")), L2Regularizaton(inputs="total_loss", outputs="total_loss_l2", model=model, beta=1e-5, mode="train"), UpdateOp(model=model, loss_name="total_loss_l2"), PointsNMS(inputs="feat_cls_list", outputs="feat_cls_list", mode="test"), Predict(inputs=("feat_seg", "feat_cls_list", "feat_kernel_list"), outputs=("seg_preds", "cate_scores", "cate_labels"), mode="test") ]) train_steps_epoch = int(np.ceil(len(train_ds) / batch_size)) lr_schedule = { 1: LRScheduler( model=model, lr_fn=lambda step: lr_schedule_warmup(step, init_lr=init_lr)), 2: LRScheduler( model=model, lr_fn=lambda step: cosine_decay(step, cycle_length=train_steps_epoch * (epochs - 1), init_lr=init_lr, min_lr=init_lr / 100, start=train_steps_epoch)) } traces = [ EpochScheduler(lr_schedule), COCOMaskmAP(data_dir=val_ds.root_dir, inputs=("seg_preds", "cate_scores", "cate_labels", "image_id", "imsize"), mode="test"), BestModelSaver(model=model, save_dir=model_dir, metric="total_loss") ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, monitor_names=("cls_loss", "seg_loss"), train_steps_per_epoch=train_steps_per_epoch, eval_steps_per_epoch=eval_steps_per_epoch) return estimator