return factor * base_lr def learning_rate_schedule_tr(base_lr, epoch, total_epochs): alpha = epoch / total_epochs if alpha <= 0.3: factor = 1.0 elif alpha <= 0.6: factor = 1.0 - (alpha - 0.6) / 0.4 * 0.99 else: factor = 0.01 return factor * base_lr criterion = F.cross_entropy regularizer = None if args.curve is None else curves.l2_regularizer(args.wd) optimizer = torch.optim.SGD( filter(lambda param: param.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.wd if args.curve is None else 0.0) checkpoint = torch.load(args.ckpt[0]) record_train = [] record_test = [] # for i, ckp in enumerate(args.ckpt): for i in range(args.num_scale): # checkpoint = torch.load(ckp) print('next_scale') key_weight_name = [] key_bias_name = []
architecture = getattr(models, args.model) curve = getattr(curves, args.curve) model = curves.CurveNet( num_classes, curve, architecture.curve, args.num_bends, architecture_kwargs=architecture.kwargs, ) model.cuda() checkpoint = torch.load(args.ckpt) model.load_state_dict(checkpoint['model_state']) criterion = F.cross_entropy regularizer = curves.l2_regularizer(args.wd) T = args.num_points ts = np.linspace(0.0, 1.0, T) tr_loss = np.zeros(T) tr_nll = np.zeros(T) tr_acc = np.zeros(T) te_loss = np.zeros(T) te_nll = np.zeros(T) te_acc = np.zeros(T) tr_err = np.zeros(T) te_err = np.zeros(T) dl = np.zeros(T) previous_weights = None