def build_models(self, model_paths): m = [] for path in model_paths: model = MobileNetV2(self.num_classes) if self.pretrained: model.load_state_dict(torch.load(path)) m.append(model) return nn.ModuleList(m)
def __init__(self, pe=0): super().__init__() self.pe = pe self.log('pe', pe) # self.net = ResNet18(pe) self.net = MobileNetV2(pe) self.lr = 1e-3 self.wd = 5e-4
def get_net(network: str, num_classes) -> torch.nn.Module: return VGG('VGG16', num_classes=num_classes) if network == 'VGG16' else \ ResNet34(num_classes=num_classes) if network == 'ResNet34' else \ PreActResNet18(num_classes=num_classes) if network == 'PreActResNet18' else \ GoogLeNet(num_classes=num_classes) if network == 'GoogLeNet' else \ densenet_cifar(num_classes=num_classes) if network == 'densenet_cifar' else \ ResNeXt29_2x64d(num_classes=num_classes) if network == 'ResNeXt29_2x64d' else \ MobileNet(num_classes=num_classes) if network == 'MobileNet' else \ MobileNetV2(num_classes=num_classes) if network == 'MobileNetV2' else \ DPN92(num_classes=num_classes) if network == 'DPN92' else \ ShuffleNetG2(num_classes=num_classes) if network == 'ShuffleNetG2' else \ SENet18(num_classes=num_classes) if network == 'SENet18' else \ ShuffleNetV2(1, num_classes=num_classes) if network == 'ShuffleNetV2' else \ EfficientNetB0( num_classes=num_classes) if network == 'EfficientNetB0' else None
def create_landmarks_model(landmarks_model_path, landmarks_network='resnet_50', landmarks_num_classes=196, landmarks_img_size=256): use_cuda = torch.cuda.is_available() #---------------------------------------------------------------- 构建 landmarks 模型 if landmarks_network == 'resnet_18': landmarks_model = resnet18(num_classes=landmarks_num_classes, img_size=landmarks_img_size) elif landmarks_network == 'resnet_34': landmarks_model = resnet34(num_classes=landmarks_num_classes, img_size=landmarks_img_size) elif landmarks_network == 'resnet_50': landmarks_model = resnet50(num_classes=landmarks_num_classes, img_size=landmarks_img_size) elif landmarks_network == 'resnet_101': landmarks_model = resnet101(num_classes=landmarks_num_classes, img_size=landmarks_img_size) elif landmarks_network == 'resnet_152': landmarks_model = resnet152(num_classes=landmarks_num_classes, img_size=landmarks_img_size) elif landmarks_network == 'mobilenetv2': landmarks_model = MobileNetV2(n_class=ops.landmarks_num_classes, input_size=ops.landmarks_img_size[0]) else: print('error no the struct model : {}'.format(ops.model)) device = torch.device("cuda:0" if use_cuda else "cpu") # 加载测试模型 if os.access(landmarks_model_path, os.F_OK): # checkpoint # chkpt = torch.load(ops.landmarks_model, map_location=device) # landmarks_model.load_state_dict(chkpt) chkpt = torch.load(landmarks_model_path, map_location=lambda storage, loc: storage) landmarks_model.load_state_dict(chkpt) landmarks_model.eval() # 设置为前向推断模式 print('load landmarks model : {}'.format(landmarks_model_path)) print( '\n/******************* landmarks model acc ******************/') acc_model('', landmarks_model) landmarks_model = landmarks_model.to(device) return landmarks_model
def f(): model = MobileNetV2() model_path = path.join('weights', filename) weights = torch.load(model_path) model.load_state_dict(weights) layers = [] layer = [] for m in model.features: if isinstance(m, nn.Sequential) or m.stride == 2: layers.append(nn.Sequential(*layer)) layer = [] layer.append(m) if len(layer) > 0: layers.append(nn.Sequential(*layer)) return layers
def fetch_specified_model(model_name, activation): """ Inits and returns the specified model """ # Specific hard-coding for CIFAR100 in_ch, num_classes = 3, 100 act_fact = kdm.get_activation_factory(activation) if model_name == "basenet": model = BaseNet(in_ch, num_classes, act_fact) elif model_name == "resnet18": model = ResNet18(in_ch, num_classes, act_fact) elif model_name == "resnet34": model = ResNet34(in_ch, num_classes, act_fact) elif model_name == "mobnet2": model = MobileNetV2(in_ch, num_classes, act_fact) elif model_name == "sqnet": model = SqueezeNet(in_ch, num_classes, act_fact) else: assert False, "Unsupported base model: {}".format(model_name) return model
elif args.model == 'resnet32': net = resnet32(args.cifar).to(args.device) elif args.model == 'resnet56': net = resnet56(args.cifar).to(args.device) elif args.model == 'resnet110': net = resnet110(args.cifar).to(args.device) elif args.model == 'resnet20_leaky': net = resnet20_leaky(args.cifar).to(args.device) elif args.model == 'resnet32_leaky': net = resnet32_leaky(args.cifar).to(args.device) elif args.model == 'resnet56_leaky': net = resnet56_leaky(args.cifar).to(args.device) elif args.model == 'resnet110_leaky': net = resnet110_leaky(args.cifar).to(args.device) elif args.model == 'mobilenetv2': net = MobileNetV2(args.cifar).to(args.device) start_epoch = 0 best_acc = 0 if args.difflr: loc = (1 + np.cos(np.pi * ((args.num - args.i) / args.num))) / 2 else: loc = 1 print('The initial learning rate is:', args.lr * loc) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr * loc, momentum=0.9, weight_decay=5e-4) # Data transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4),
def training(args): args.use_gpu = torch.cuda.is_available() #os.environ["CUDA_VISIBLE_DEVICES"]=args.cuda_devices torch.manual_seed('1') args.save_dir = osp.join(args.save_dir, args.data, args.model, str(args.custom_conv) + 'tiny') print(args.save_dir) sys.stdout = Logger(osp.join(args.save_dir, 'log_' + '.txt')) print_config(args) if args.use_gpu: print("Currently using GPU: {}".format(args.cuda_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all('0') else: print("Currently using CPU") print("Creating dataset: {}".format(args.data)) if args.data == 'mnist': dataset = MNIST(args.batch_size, args.use_gpu, args.workers) elif args.data == 'cifar10': dataset = CIFAR10(args.batch_size, args.use_gpu, args.workers) trainloader, testloader = dataset.trainloader, dataset.testloader args.num_classes = dataset.num_classes if args.model == 'lenetpp': model = LeNetpp(args.num_classes, custom_conv=args.custom_conv) elif args.model == 'mobilenetv2': model = MobileNetV2(num_classes=args.num_classes, custom=args.custom_conv) print("Creating model: {}".format(args.model)) #model.load_state_dict(torch.load('work_dir/adaptive_3/cifar10/mobilenetv2/True1/weights/LeNet_epoch_100.pth')) if args.use_gpu: model = model.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=5e-04, momentum=0.9) if args.stepsize > 0: scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) acc, err = test(args, model, trainloader, 0) print("Accuracy (%): {}\t Error rate(%): {}".format(acc, err)) start_time = time.time() for epoch in range(args.max_epoch): print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch)) train(args, model, criterion, optimizer, trainloader, epoch) if args.stepsize > 0: scheduler.step() if args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or ( epoch + 1) == args.max_epoch: print("==> Train") acc, err = test(args, model, trainloader, epoch) print("Accuracy (%): {}\t Error rate(%): {}".format(acc, err)) print("==> Test") acc, err = test(args, model, testloader, epoch) print("Accuracy (%): {}\t Error rate(%): {}".format(acc, err)) save_model(model, epoch, name='LeNet_', save_dir=args.save_dir) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
model_ = ShuffleNetV2(ratio=1., num_classes=ops.num_classes) elif ops.model == "shufflenet_v2_x1_5": model_ = shufflenet_v2_x1_5(pretrained=False, num_classes=ops.num_classes) elif ops.model == "shufflenet_v2_x1_0": model_ = shufflenet_v2_x1_0(pretrained=False, num_classes=ops.num_classes) elif ops.model == "shufflenet_v2_x2_0": model_ = shufflenet_v2_x2_0(pretrained=False, num_classes=ops.num_classes) elif ops.model == "shufflenet": model_ = ShuffleNet(num_blocks=[2, 4, 2], num_classes=ops.num_classes, groups=3) elif ops.model == "mobilenetv2": model_ = MobileNetV2(num_classes=ops.num_classes) elif ops.model == "ReXNetV1": model_ = ReXNetV1(num_classes=ops.num_classes) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") model_ = model_.to(device) model_.eval() # 设置为前向推断模式 # print(model_)# 打印模型结构 # 加载测试模型 if os.access(ops.model_path, os.F_OK): # checkpoint chkpt = torch.load(ops.model_path, map_location=device) model_.load_state_dict(chkpt)
def trainer(ops,f_log): try: os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS if ops.log_flag: sys.stdout = f_log set_seed(ops.seed) #---------------------------------------------------------------- 构建模型 print('use model : %s'%(ops.model)) if ops.model == 'resnet_18': model_=resnet18(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) elif ops.model == 'resnet_34': model_=resnet34(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) elif ops.model == 'resnet_50': model_=resnet50(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) elif ops.model == 'resnet_101': model_=resnet101(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) elif ops.model == 'resnet_152': model_=resnet152(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) elif ops.model == 'mobilenetv2': model_=MobileNetV2(n_class =ops.num_classes, input_size=ops.img_size[0],dropout_factor=ops.dropout) else: print('error no the struct model : {}'.format(ops.model)) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") model_ = model_.to(device) # print(model_)# 打印模型结构 # Dataset val_split = [] dataset = LoadImagesAndLabels(ops= ops,img_size=ops.img_size,flag_agu=ops.flag_agu,fix_res = ops.fix_res,val_split = val_split) print('len train datasets : %s'%(dataset.__len__())) # Dataloader dataloader = DataLoader(dataset, batch_size=ops.batch_size, num_workers=ops.num_workers, shuffle=True, pin_memory=False, drop_last = True) # 优化器设计 # optimizer_Adam = torch.optim.Adam(model_.parameters(), lr=init_lr, betas=(0.9, 0.99),weight_decay=1e-6) optimizer_SGD = optim.SGD(model_.parameters(), lr=ops.init_lr, momentum=ops.momentum, weight_decay=ops.weight_decay)# 优化器初始化 optimizer = optimizer_SGD # 加载 finetune 模型 if os.access(ops.fintune_model,os.F_OK):# checkpoint chkpt = torch.load(ops.fintune_model, map_location=device) model_.load_state_dict(chkpt) print('load fintune model : {}'.format(ops.fintune_model)) print('/**********************************************/') # 损失函数 if ops.loss_define != 'wing_loss': criterion = nn.MSELoss(reduce=True, reduction='mean') step = 0 idx = 0 # 变量初始化 best_loss = np.inf loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 flag_change_lr_cnt = 0 # 学习率更新计数器 init_lr = ops.init_lr # 学习率 epochs_loss_dict = {} for epoch in range(0, ops.epochs): if ops.log_flag: sys.stdout = f_log print('\nepoch %d ------>>>'%epoch) model_.train() # 学习率更新策略 if loss_mean!=0.: if best_loss > (loss_mean/loss_idx): flag_change_lr_cnt = 0 best_loss = (loss_mean/loss_idx) else: flag_change_lr_cnt += 1 if flag_change_lr_cnt > 10: init_lr = init_lr*ops.lr_decay set_learning_rate(optimizer, init_lr) flag_change_lr_cnt = 0 loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 for i, (imgs_, pts_) in enumerate(dataloader): # print('imgs_, pts_',imgs_.size(), pts_.size()) if use_cuda: imgs_ = imgs_.cuda() # pytorch 的 数据输入格式 : (batch, channel, height, width) pts_ = pts_.cuda() output = model_(imgs_.float()) if ops.loss_define == 'wing_loss': loss = got_total_wing_loss(output, pts_.float()) else: loss = criterion(output, pts_.float()) loss_mean += loss.item() loss_idx += 1. if i%10 == 0: loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(' %s - %s - epoch [%s/%s] (%s/%s):'%(loc_time,ops.model,epoch,ops.epochs,i,int(dataset.__len__()/ops.batch_size)),\ 'loss : %.6f - %.6f'%(loss_mean/loss_idx,loss.item()),\ ' lr : %.5f'%init_lr,' bs :',ops.batch_size,\ ' img_size: %s x %s'%(ops.img_size[0],ops.img_size[1]),' best_loss: %.4f'%best_loss) # time.sleep(7) # writer.add_scalar('data/loss', loss, step) # writer.add_scalars('data/scalar_group', {'acc':acc,'lr':init_lr,'baseline':0.}, step) # 计算梯度 loss.backward() # 优化器对模型参数更新 optimizer.step() # 优化器梯度清零 optimizer.zero_grad() step += 1 # 一个 epoch 保存连词最新的 模型 # if i%(int(dataset.__len__()/ops.batch_size/2-1)) == 0 and i > 0: # torch.save(model_.state_dict(), ops.model_exp + 'latest.pth') # 每一个 epoch 进行模型保存 torch.save(model_.state_dict(), ops.model_exp + 'model_epoch-{}.pth'.format(epoch)) if len(val_split) > 0 and (epoch%ops.test_interval==0): # test model_.eval() loss_train,loss_val = tester(ops,epoch,model_,criterion, train_split,train_split_label,val_split,val_split_label, use_cuda) epochs_loss_dict['epoch_'+str(epoch)] = {} epochs_loss_dict['epoch_'+str(epoch)]['loss_train'] = loss_train epochs_loss_dict['epoch_'+str(epoch)]['loss_val'] = loss_val f_loss = open(ops.model_exp + 'loss_epoch_trainval.json',"w",encoding='utf-8') json.dump(epochs_loss_dict,f_loss,ensure_ascii=False,indent = 1,cls = JSON_Encoder) f_loss.close() except Exception as e: print('Exception : ',e) # 打印异常 print('Exception file : ', e.__traceback__.tb_frame.f_globals['__file__'])# 发生异常所在的文件 print('Exception line : ', e.__traceback__.tb_lineno)# 发生异常所在的行数
[3, 72, 24, False, 'RE', 2], [3, 88, 24, False, 'RE', 1], [5, 96, 40, True, 'HS', 2], [5, 240, 40, True, 'HS', 1], [5, 240, 40, True, 'HS', 1], [5, 120, 48, True, 'HS', 1], [5, 144, 48, True, 'HS', 1], [5, 288, 96, True, 'HS', 2], [5, 576, 96, True, 'HS', 1], [5, 576, 96, True, 'HS', 1], ] if args.version == 'v1': model = MobileNetv1().to(device) elif args.version == 'v2': model = MobileNetV2(cfgv2).to(device) elif args.version == 'v3': model = MobileNetV3(cfgv3).to(device) model.apply(weights_init) if args.load: # load network model_path = args.mobilenet print('Loading resume network', model_path) model.load_state_dict(torch.load(model_path)) criterion = nn.CrossEntropyLoss().to(device) opt = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # opt = optim.Adam(model.parameters(), lr=args.lr)
model_ = resnet18(num_classes=ops.num_classes, img_size=ops.input_shape[2]) elif ops.network == 'resnet_34': model_ = resnet34(num_classes=ops.num_classes, img_size=ops.input_shape[2]) elif ops.network == 'resnet_50': model_ = resnet50(num_classes=ops.num_classes, img_size=ops.input_shape[2]) elif ops.network == 'resnet_101': model_ = resnet101(num_classes=ops.num_classes, img_size=ops.input_shape[2]) elif ops.network == 'resnet_152': model_ = resnet152(num_classes=ops.num_classes, img_size=ops.input_shape[2]) elif ops.network == 'mobilenetv2': model_ = MobileNetV2(n_class=ops.num_classes, input_size=ops.input_shape[2]) else: print('error no the struct model : {}'.format(ops.network)) dummy_input = torch.randn(ops.input_shape) flops, params = profile(model_, inputs=(dummy_input, )) print('flops : {} , params : {}'.format(flops, params)) flops, params = clever_format([flops, params], "%.3f") print('flops : {} , params : {}'.format(flops, params)) # params = list(model_.parameters()) # idx = 0 # for i in params: # idx += 1 # print('{}) : {}'.format(idx,i))
os.environ["CUDA_VISIBLE_DEVICES"] = '2' TIME_SCALES = {'s': 1, 'ms': 1000, 'us': 1000000} parser = argparse.ArgumentParser() parser.add_argument('-b', '--batch-size', type=int, default=16) parser.add_argument('-f', '--features', type=int, default=32) parser.add_argument('-s', '--state-size', type=int, default=128) parser.add_argument('-r', '--runs', type=int, default=100) parser.add_argument('--scale', choices=['s', 'ms', 'us'], default='ms') parser.add_argument('-c', '--cuda', action='store_true') parser.add_argument('-d', '--double', action='store_true') options = parser.parse_args() X = torch.randn(1, 3, 32, 32).cuda() model = MobileNetV2(num_classes=10, custom=False).cuda() forward_min = math.inf forward_time = 0 for _ in range(options.runs): start = time.time() y = model(X) elapsed = time.time() - start forward_min = min(forward_min, elapsed) forward_time += elapsed scale = TIME_SCALES[options.scale] forward_min *= scale forward_average = forward_time / options.runs * scale
def SEMobileNetV2(**kwargs): return MobileNetV2(block=SEInvertedResidual, **kwargs)
def trainer(ops, f_log): try: os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS if ops.log_flag: sys.stdout = f_log set_seed(ops.seed) #---------------------------------------------------------------- 构建模型 if ops.model == 'resnet_50': model_ = resnet50(pretrained=True, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_18': model_ = resnet18(pretrained=True, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_34': model_ = resnet34(pretrained=True, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_101': model_ = resnet101(pretrained=True, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == "squeezenet1_0": model_ = squeezenet1_0(pretrained=True, num_classes=ops.num_classes, dropout_factor=ops.dropout) elif ops.model == "squeezenet1_1": model_ = squeezenet1_1(pretrained=True, num_classes=ops.num_classes, dropout_factor=ops.dropout) elif ops.model == "shufflenetv2": model_ = ShuffleNetV2(ratio=1., num_classes=ops.num_classes, dropout_factor=ops.dropout) elif ops.model == "shufflenet_v2_x1_5": model_ = shufflenet_v2_x1_5(pretrained=False, num_classes=ops.num_classes) elif ops.model == "shufflenet_v2_x1_0": model_ = shufflenet_v2_x1_0(pretrained=False, num_classes=ops.num_classes) elif ops.model == "shufflenet_v2_x2_0": model_ = shufflenet_v2_x2_0(pretrained=False, num_classes=ops.num_classes) elif ops.model == "shufflenet": model_ = ShuffleNet(num_blocks=[2, 4, 2], num_classes=ops.num_classes, groups=3, dropout_factor=ops.dropout) elif ops.model == "mobilenetv2": model_ = MobileNetV2(num_classes=ops.num_classes, dropout_factor=ops.dropout) elif ops.model == "ReXNetV1": model_ = ReXNetV1(num_classes=ops.num_classes, dropout_factor=ops.dropout) else: print(" no support the model") use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") model_ = model_.to(device) # print(model_)# 打印模型结构 # Dataset dataset = LoadImagesAndLabels(ops=ops, img_size=ops.img_size, flag_agu=ops.flag_agu, fix_res=ops.fix_res, vis=False) print("handpose done") print('len train datasets : %s' % (dataset.__len__())) # Dataloader dataloader = DataLoader(dataset, batch_size=ops.batch_size, num_workers=ops.num_workers, shuffle=True, pin_memory=False, drop_last=True) # 优化器设计 optimizer_Adam = torch.optim.Adam(model_.parameters(), lr=ops.init_lr, betas=(0.9, 0.99), weight_decay=1e-6) # optimizer_SGD = optim.SGD(model_.parameters(), lr=ops.init_lr, momentum=ops.momentum, weight_decay=ops.weight_decay)# 优化器初始化 optimizer = optimizer_Adam # 加载 finetune 模型 if os.access(ops.fintune_model, os.F_OK): # checkpoint chkpt = torch.load(ops.fintune_model, map_location=device) model_.load_state_dict(chkpt) print('load fintune model : {}'.format(ops.fintune_model)) print('/**********************************************/') # 损失函数 if ops.loss_define != 'wing_loss': criterion = nn.MSELoss(reduce=True, reduction='mean') step = 0 idx = 0 # 变量初始化 best_loss = np.inf loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 flag_change_lr_cnt = 0 # 学习率更新计数器 init_lr = ops.init_lr # 学习率 epochs_loss_dict = {} for epoch in range(0, ops.epochs): if ops.log_flag: sys.stdout = f_log print('\nepoch %d ------>>>' % epoch) model_.train() # 学习率更新策略 if loss_mean != 0.: if best_loss > (loss_mean / loss_idx): flag_change_lr_cnt = 0 best_loss = (loss_mean / loss_idx) else: flag_change_lr_cnt += 1 if flag_change_lr_cnt > 50: init_lr = init_lr * ops.lr_decay set_learning_rate(optimizer, init_lr) flag_change_lr_cnt = 0 loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 for i, (imgs_, pts_) in enumerate(dataloader): # print('imgs_, pts_',imgs_.size(), pts_.size()) if use_cuda: imgs_ = imgs_.cuda( ) # pytorch 的 数据输入格式 : (batch, channel, height, width) pts_ = pts_.cuda() output = model_(imgs_.float()) if ops.loss_define == 'wing_loss': loss = got_total_wing_loss(output, pts_.float()) else: loss = criterion(output, pts_.float()) loss_mean += loss.item() loss_idx += 1. if i % 10 == 0: loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(' %s - %s - epoch [%s/%s] (%s/%s):'%(loc_time,ops.model,epoch,ops.epochs,i,int(dataset.__len__()/ops.batch_size)),\ 'Mean Loss : %.6f - Loss: %.6f'%(loss_mean/loss_idx,loss.item()),\ ' lr : %.8f'%init_lr,' bs :',ops.batch_size,\ ' img_size: %s x %s'%(ops.img_size[0],ops.img_size[1]),' best_loss: %.6f'%best_loss) # 计算梯度 loss.backward() # 优化器对模型参数更新 optimizer.step() # 优化器梯度清零 optimizer.zero_grad() step += 1 torch.save( model_.state_dict(), ops.model_exp + '{}-size-{}-model_epoch-{}.pth'.format( ops.model, ops.img_size[0], epoch)) except Exception as e: print('Exception : ', e) # 打印异常 print('Exception file : ', e.__traceback__.tb_frame.f_globals['__file__']) # 发生异常所在的文件 print('Exception line : ', e.__traceback__.tb_lineno) # 发生异常所在的行数
model_ = resnet18(num_classes=ops.num_classes, img_size=ops.img_size[0]) elif ops.model == 'resnet_34': model_ = resnet34(num_classes=ops.num_classes, img_size=ops.img_size[0]) elif ops.model == 'resnet_50': model_ = resnet50(num_classes=ops.num_classes, img_size=ops.img_size[0]) elif ops.model == 'resnet_101': model_ = resnet101(num_classes=ops.num_classes, img_size=ops.img_size[0]) elif ops.model == 'resnet_152': model_ = resnet152(num_classes=ops.num_classes, img_size=ops.img_size[0]) elif ops.model == 'mobilenetv2': model_ = MobileNetV2(n_class=ops.num_classes, input_size=ops.img_size[0]) else: print('error no the struct model : {}'.format(ops.model)) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") model_ = model_.to(device) model_.eval() # 设置为前向推断模式 # print(model_)# 打印模型结构 # 加载测试模型 if os.access(ops.test_model, os.F_OK): # checkpoint chkpt = torch.load(ops.test_model, map_location=device) model_.load_state_dict(chkpt)
def trainer(ops, f_log): if 1: person_list = define_sequence_datasets(ops.train_path) os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS if ops.log_flag: sys.stdout = f_log set_seed(ops.seed) #---------------------------------------------------------------- 构建模型 print('use model : %s' % (ops.model)) if ops.model == 'resnet_18': model_ = resnet18(pretrained=ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_34': model_ = resnet34(pretrained=ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_50': model_ = resnet50(pretrained=ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_101': model_ = resnet101(pretrained=ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'resnet_152': model_ = resnet152(pretrained=ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0], dropout_factor=ops.dropout) elif ops.model == 'mobilenetv2': model_ = MobileNetV2(n_class=ops.num_classes, input_size=ops.img_size[0], dropout_factor=ops.dropout) else: print('error no the struct model : {}'.format(ops.model)) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") model_ = model_.to(device) # print(model_)# 打印模型结构 # Dataset val_split = [] dataset = LoadImagesAndLabels(ops=ops, img_size=ops.img_size, flag_agu=ops.flag_agu) print('len train datasets : %s' % (dataset.__len__())) # Dataloader dataloader = DataLoader(dataset, batch_size=ops.batch_size, num_workers=ops.num_workers, shuffle=True, pin_memory=False, drop_last=True) # 优化器设计 # optimizer_Adam = torch.optim.Adam(model_.parameters(), lr=ops.init_lr, betas=(0.9, 0.99),weight_decay=1e-6) optimizer_SGD = optim.SGD(model_.parameters(), lr=ops.init_lr, momentum=ops.momentum, weight_decay=ops.weight_decay) # 优化器初始化 optimizer = optimizer_SGD # 加载 finetune 模型 if os.access(ops.fintune_model, os.F_OK): # checkpoint chkpt = torch.load(ops.fintune_model, map_location=device) model_.load_state_dict(chkpt) print('load fintune model : {}'.format(ops.fintune_model)) print('/**********************************************/') # 损失函数 if ops.loss_define != 'wing_loss': criterion = nn.MSELoss(reduce=True, reduction='mean') step = 0 idx = 0 # 变量初始化 best_loss = np.inf loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 flag_change_lr_cnt = 0 # 学习率更新计数器 init_lr = ops.init_lr # 学习率 epochs_loss_dict = {} for epoch in range(0, ops.epochs): if ops.log_flag: sys.stdout = f_log print('\nepoch %d ------>>>' % epoch) model_.train() # 学习率更新策略 if loss_mean != 0.: if best_loss > (loss_mean / loss_idx): flag_change_lr_cnt = 0 best_loss = (loss_mean / loss_idx) else: flag_change_lr_cnt += 1 if flag_change_lr_cnt > 10: init_lr = init_lr * ops.lr_decay set_learning_rate(optimizer, init_lr) flag_change_lr_cnt = 0 loss_mean = 0. # 损失均值 loss_idx = 0. # 损失计算计数器 for i, (imgs_, pts_) in enumerate(dataloader): seq_imgs, seq_fe = sample_sequence_datasets(person_list) # print('imgs_, pts_',imgs_.size(), pts_.size()) if use_cuda: imgs_ = imgs_.cuda().float( ) # pytorch 的 数据输入格式 : (batch, channel, height, width) pts_ = pts_.cuda().float() imgs_ = torch.cat([imgs_, seq_imgs], dim=0) pts_ = torch.cat([pts_, seq_fe], dim=0) # print('imgs_ , pts_ size :',imgs_.size(),pts_.size()) output = model_(imgs_.float()) if ops.loss_define == 'wing_loss': loss = got_total_wing_loss(output, pts_) else: loss_all = criterion(output, pts_) loss_none_eye = criterion(output[:, 2:24].float(), pts_[:, 2:24].float()) loss_mouth_corner = criterion(output[:, 9:13].float(), pts_[:, 9:13].float()) loss_nose = criterion(output[:, 21:24].float(), pts_[:, 21:24].float()) loss = loss_all * 0.4 + loss_none_eye * 0.6 + loss_mouth_corner * 0.5 + loss_nose * 0.5 # loss_eyebrow = torch.abs(output[:,17:27]-crop_landmarks[:,17:27].float()) # loss_nose = torch.abs(output[:,27:36]- crop_landmarks[:,27:36].float()) # loss_eye = torch.abs(output[:,36:48]- crop_landmarks[:,36:48].float()) # loss_mouse = torch.abs(output[:,48:66]- crop_landmarks[:,48:66].float()) loss_mean += loss.item() loss_idx += 1. if i % 10 == 0: loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(' %s - %s - epoch [%s/%s] (%s/%s):'%(loc_time,ops.model,epoch,ops.epochs,i,int(dataset.__len__()/ops.batch_size)),\ 'loss : %.6f - %.6f'%(loss_mean/loss_idx,loss.item()),\ ' lr : %.5f'%init_lr,' bs :',ops.batch_size,\ ' img_size: %s x %s'%(ops.img_size[0],ops.img_size[1]),' best_loss: %.6f'%best_loss) # time.sleep(10) if i % 500 == 0: torch.save( model_.state_dict(), ops.model_exp + 'model_epoch-{}.pth'.format(epoch)) # 计算梯度 loss.backward() # 优化器对模型参数更新 optimizer.step() # 优化器梯度清零 optimizer.zero_grad() step += 1 # 一个 epoch 保存连词最新的 模型 # if i%(int(dataset.__len__()/ops.batch_size/2-1)) == 0 and i > 0: # torch.save(model_.state_dict(), ops.model_exp + 'latest.pth') # 每一个 epoch 进行模型保存 torch.save(model_.state_dict(), ops.model_exp + 'model_epoch-{}.pth'.format(epoch))
landmarks_model = resnet18(num_classes=ops.landmarks_num_classes, img_size=ops.landmarks_img_size[0]) elif ops.landmarks_network == 'resnet_34': landmarks_model = resnet34(num_classes=ops.landmarks_num_classes, img_size=ops.landmarks_img_size[0]) elif ops.landmarks_network == 'resnet_50': landmarks_model = resnet50(num_classes=ops.landmarks_num_classes, img_size=ops.landmarks_img_size[0]) elif ops.landmarks_network == 'resnet_101': landmarks_model = resnet101(num_classes=ops.landmarks_num_classes, img_size=ops.landmarks_img_size[0]) elif ops.landmarks_network == 'resnet_152': landmarks_model = resnet152(num_classes=ops.landmarks_num_classes, img_size=ops.landmarks_img_size[0]) elif ops.landmarks_network == 'mobilenetv2': landmarks_model = MobileNetV2(n_class=ops.landmarks_num_classes, input_size=ops.landmarks_img_size[0]) else: print('error no the struct model : {}'.format(ops.model)) device = torch.device("cuda:0" if use_cuda else "cpu") # 加载测试模型 if os.access(ops.landmarks_model, os.F_OK): # checkpoint # chkpt = torch.load(ops.landmarks_model, map_location=device) # landmarks_model.load_state_dict(chkpt) chkpt = torch.load(ops.landmarks_model, map_location=lambda storage, loc: storage) landmarks_model.load_state_dict(chkpt) landmarks_model.eval() # 设置为前向推断模式 print('load landmarks model : {}'.format(ops.landmarks_model))
def main(): parser = argparse.ArgumentParser( description="Parameters for Training CIFAR-10") parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--num-workers', type=int, default=1, metavar='N', help='number of workers for cuda') parser.add_argument('--model-no', type=int, default=1, metavar='N', help='number of workers for cuda') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") cuda_args = { 'num_workers': args.num_workers, 'pin_memory': True } if use_cuda else {} data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) cifar_training_data = datasets.CIFAR10("../data/CIFAR10", train=True, transform=data_transform, download=True) cifar_testing_data = datasets.CIFAR10("../data/CIFAR10", train=False, transform=data_transform) train_loader = torch.utils.data.DataLoader(cifar_training_data, batch_size=args.batch_size, shuffle=True, **cuda_args) test_loader = torch.utils.data.DataLoader(cifar_testing_data, batch_size=args.test_batch_size, shuffle=True, **cuda_args) model_no = args.model_no if model_no == 1: model = Net().to(device) elif model_no == 2: model = ResNet18().to(device) elif model_no == 3: model = MobileNet().to(device) elif model_no == 4: model = MobileNetV2().to(device) elif model_no == 5: model = VGG('VGG16').to(device) else: model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for epoch in range(1, args.epochs + 1): train(epoch, model, train_loader, optimizer, device, args.log_interval) test(model, test_loader, device)