drop_last=DROP_LAST) NUM_CLASS = len(train_loader.dataset.classes) print("Number of Training Classes: {}".format(NUM_CLASS)) BACKBONE = ResNet_50(INPUT_SIZE) print("=" * 60) print(BACKBONE) print("{} Backbone Generated".format(BACKBONE_NAME)) MaskNet = MaskNet() print("=" * 60) print(MaskNet) print("MaskNet Generated") masknet_paras_only_bn, masknet_paras_wo_bn = separate_irse_bn_paras( MaskNet ) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability OPTIMIZER = optim.SGD([{ 'params': masknet_paras_wo_bn, 'weight_decay': WEIGHT_DECAY }, { 'params': masknet_paras_only_bn }], lr=LR, momentum=MOMENTUM) print("=" * 60) print(OPTIMIZER) print("Optimizer Generated") print("=" * 60) # optionally resume from a checkpoint
HEAD = HEAD_DICT[HEAD_NAME] logger.info("=" * 60) logger.info(HEAD) logger.info("{} Head Generated".format(HEAD_NAME)) logger.info("=" * 60) LOSS_DICT = {'Focal': FocalLoss(), 'Softmax': nn.CrossEntropyLoss()} LOSS = LOSS_DICT[LOSS_NAME] logger.info("=" * 60) logger.info(LOSS) logger.info("{} Loss Generated".format(LOSS_NAME)) logger.info("=" * 60) if BACKBONE_NAME.find("IR") >= 0: backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras( BACKBONE ) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability _, head_paras_wo_bn = separate_irse_bn_paras(HEAD) else: backbone_paras_only_bn, backbone_paras_wo_bn = separate_resnet_bn_paras( BACKBONE ) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability _, head_paras_wo_bn = separate_resnet_bn_paras(HEAD) DISP_FREQ = len(train_loader) # frequency to display training loss & acc NUM_EPOCH_WARM_UP = NUM_EPOCH // 25 # use the first 1/25 epochs to warm up NUM_BATCH_WARM_UP = len( train_loader ) * NUM_EPOCH_WARM_UP # use the first 1/25 epochs to warm up scheduler = paddle.optimizer.lr.LinearWarmup( learning_rate=LR,