def __init__(
     self,
     base_lr: float,
     max_lr: float,
     step_size_up: int = 2000,
     step_size_down: Optional[int] = None,
     mode: str = "triangular",
     gamma: float = 1.0,
     scale_fn: Optional[Callable[[float], float]] = None,
     scale_mode: str = "cycle",
     cycle_momentum: bool = True,
     base_momentum: float = 0.8,
     max_momentum: float = 0.9,
     last_epoch: int = -1,
     step_on_batch: bool = True,
 ):
     """Constructor for CyclicLR."""
     super().__init__(
         lambda opt: _schedulers.CyclicLR(
             opt,
             base_lr,
             max_lr,
             step_size_up=step_size_up,
             step_size_down=step_size_down,
             mode=mode,
             gamma=gamma,
             scale_fn=scale_fn,
             scale_mode=scale_mode,
             cycle_momentum=cycle_momentum,
             base_momentum=base_momentum,
             max_momentum=max_momentum,
             last_epoch=last_epoch,
         ),
         step_on_batch=step_on_batch,
     )
Example #2
0
 def __init__(
     self,
     optimizer: Optimizer,
     base_lr: Union[float, List[float]],
     max_lr: Union[float, List[float]],
     step_size_up: int = 2000,
     step_size_down: Optional[int] = None,
     mode: str = "triangular",
     gamma: float = 1.0,
     scale_fn: Optional[Callable[[int], float]] = None,
     scale_mode: str = "cycle",
     cycle_momentum: bool = True,
     base_momentum: float = 0.8,
     max_momentum: float = 0.9,
     last_epoch: int = -1,
     step_duration: int = 1,
 ):
     scheduler = lr_scheduler.CyclicLR(
         optimizer,
         base_lr,
         max_lr,
         step_size_up,
         step_size_down,
         mode,
         gamma,
         scale_fn,
         scale_mode,
         cycle_momentum,
         base_momentum,
         max_momentum,
         last_epoch,
     )
     super().__init__(scheduler, step_duration)
Example #3
0
 def __init__(self,
              base_lr,
              max_lr,
              step_size_up=2000,
              step_size_down=None,
              mode='triangular',
              gamma=1.,
              scale_fn=None,
              scale_mode='cycle',
              cycle_momentum=True,
              base_momentum=0.8,
              max_momentum=0.9):
     try:
         from torch.optim.lr_scheduler import CyclicLR
     except ImportError:
         raise ImportError("Update torch>=1.1.0 to use 'CyclicLR'")
     super().__init__(
         lambda opt: _scheduler.CyclicLR(opt,
                                         base_lr,
                                         max_lr,
                                         step_size_up=step_size_up,
                                         step_size_down=step_size_down,
                                         mode=mode,
                                         gamma=gamma,
                                         scale_fn=scale_fn,
                                         scale_mode=scale_mode,
                                         cycle_momentum=cycle_momentum,
                                         base_momentum=base_momentum,
                                         max_momentum=max_momentum)
     )
Example #4
0
def cyclic_lr(optimizer,
              last_epoch,
              base_lr=0.001,
              max_lr=0.01,
              epochs_up=1,
              epochs_down=None,
              epoch_size=None,
              mode='triangular',
              gamma=1.0,
              scale_fn=None,
              scale_mode='cycle',
              cycle_momentum=False,
              base_momentum=0.8,
              max_momentum=0.9,
              **_) -> Any:
    def exp_range_scale_fn(x):
        res = gamma**(x - 1)
        return res

    last_epoch = -1
    step_size_up = epochs_up * epoch_size
    step_size_down = step_size_up if epochs_down is None else epochs_down * epoch_size

    return lr_sched.CyclicLR(optimizer,
                             base_lr=base_lr,
                             max_lr=max_lr,
                             step_size_up=step_size_up,
                             step_size_down=step_size_down,
                             mode=mode,
                             scale_fn=exp_range_scale_fn,
                             scale_mode=scale_mode,
                             cycle_momentum=cycle_momentum,
                             base_momentum=base_momentum,
                             max_momentum=max_momentum,
                             last_epoch=last_epoch)
Example #5
0
def cyclic_lr(optimizer,
              last_epoch,
              base_lr=0.001,
              max_lr=0.01,
              step_size_up=2000,
              step_size_down=None,
              mode='triangular',
              gamma=1.0,
              scale_fn=None,
              scale_mode='cycle',
              cycle_momentum=True,
              base_momentum=0.8,
              max_momentum=0.9,
              **_):
    return lr_scheduler.CyclicLR(optimizer,
                                 base_lr=base_lr,
                                 max_lr=max_lr,
                                 step_size_up=step_size_up,
                                 step_size_down=step_size_down,
                                 mode=mode,
                                 gamma=gamma,
                                 scale_mode=scale_mode,
                                 cycle_momentum=cycle_momentum,
                                 base_momentum=base_momentum,
                                 max_momentum=max_momentum,
                                 last_epoch=last_epoch)
Example #6
0
    def __init__(self, optimizer, base_lr, max_lr, step_size_up,
                 step_size_down, cycle_momentum, base_momentum, max_momentum,
                 post_decay):
        # cyclic params
        self.optimizer = optimizer
        self.initial_lr = base_lr
        self.max_lr = max_lr
        self.step_size_up = step_size_up
        self.step_size_down = step_size_down
        self.cycle_momentum = cycle_momentum
        self.base_momentum = base_momentum
        self.max_momentum = max_momentum
        self.post_decay = post_decay

        # cap to one
        if self.step_size_up < 1:
            self.step_size_up = 1
        if self.step_size_down < 1:
            self.step_size_down = 1

        # cyclic lr
        self.initial_scheduler = toptim.CyclicLR(
            self.optimizer,
            base_lr=self.initial_lr,
            max_lr=self.max_lr,
            step_size_up=self.step_size_up,
            step_size_down=self.step_size_down,
            cycle_momentum=self.cycle_momentum,
            base_momentum=self.base_momentum,
            max_momentum=self.max_momentum)

        # our params
        self.oneshot_n = self.step_size_up + self.step_size_down  # steps to warm up for
        self.finished = False  # am i done
        super().__init__(optimizer)
def make_scheduler_with_cfg(optimizer, total_num, scheduler_cfg: dict):
    lr_strategy = scheduler_cfg["lr_strategy"]
    chosen_scheduler_cfg = scheduler_cfg[lr_strategy]
    if lr_strategy == "clr":
        # # cycle_id表示当前处于第几个cycle中,这里的cycle_id从1开始计数
        # # 这里的step_size表示半个cycle对应的迭代次数
        # cycle_id = np.floor(1 + curr_epoch / (2 * step_size))
        # # 这里实际上在判定当前处于cycle中的位置所对应的lr尺度,是一个 ^ 形状的折线
        # x = 1 - np.abs(curr_epoch / step_size - 2 * cycle_id + 1)
        # lr = base_lr + (max_lr - base_lr) * np.maximum(0, x)
        scheduler = lr_scheduler.CyclicLR(
            optimizer=optimizer,
            base_lr=chosen_scheduler_cfg["min_lr"],
            max_lr=chosen_scheduler_cfg["max_lr"],
            step_size_up=chosen_scheduler_cfg["step_size"],
            scale_mode=chosen_scheduler_cfg["mode"],
        )
    elif lr_strategy == 'step':
        scheduler = lr_scheduler.MultiStepLR(
            optimizer=optimizer,
            milestones=chosen_scheduler_cfg['milestones'],
            gamma=chosen_scheduler_cfg['gamma']
        )
    else:
        lr_func = partial(_get_lr_coefficient,
                          total_num=total_num,
                          lr_strategy=lr_strategy,
                          scheduler_cfg=chosen_scheduler_cfg)
        scheduler = lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lr_func)
    return scheduler
    def __init__(self, lr, weight_decay, class_weight, init_type, gpu_ids,
                 dataset_size, view, alpha, network):
        super(Net, self).__init__()
        self.view = view
        self.gpu_ids = gpu_ids
        self.alpha = alpha
        self.network = network
        self.device = torch.device('cuda:{}'.format(
            self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')

        self.model = networks.define_X(init_type, gpu_ids, view, network)
        self.criterion = nn.BCEWithLogitsLoss(
            pos_weight=torch.DoubleTensor(class_weight).cuda(gpu_ids[0]))
        self.optimizer = optim.AdamW(self.model.parameters(),
                                     lr=lr,
                                     weight_decay=weight_decay)
        self.scheduler = lr_scheduler.CyclicLR(self.optimizer,
                                               base_lr=lr,
                                               max_lr=10 * lr,
                                               step_size_up=dataset_size // 2,
                                               step_size_down=dataset_size -
                                               dataset_size // 2,
                                               cycle_momentum=False,
                                               mode='triangular2')
        self.softmax = nn.Softmax()
Example #9
0
    def __init__(self, optimizer, lr, warmup_steps, momentum, decay):
        # cyclic params
        self.optimizer = optimizer
        self.lr = lr
        self.warmup_steps = warmup_steps
        self.momentum = momentum
        self.decay = decay

        # cap to one
        if self.warmup_steps < 1:
            self.warmup_steps = 1

        # cyclic lr
        self.initial_scheduler = toptim.CyclicLR(
            self.optimizer,
            base_lr=0,
            max_lr=self.lr,
            step_size_up=self.warmup_steps,
            step_size_down=self.warmup_steps,
            cycle_momentum=False,
            base_momentum=self.momentum,
            max_momentum=self.momentum)

        # second optimizer
        #     self.final_scheduler = toptim.ReduceLROnPlateau(self.optimizer, factor=0.9, mode='min', patience=self.warmup_steps / 5)
        #     self.final_scheduler = toptim.ExponentialLR(self.optimizer, gamma=0.99997)

        # our params
        #     self.last_epoch = -1  # fix for pytorch 1.1 and below
        self.finished = False  # am i done
        super().__init__(optimizer)
Example #10
0
    def __init__(self, optimizer, lr, warmup_steps, momentum, decay):
        # cyclic params
        self.optimizer = optimizer
        self.lr = lr
        self.warmup_steps = warmup_steps
        self.momentum = momentum
        self.decay = decay

        # cap to one
        if self.warmup_steps < 1:
            self.warmup_steps = 1

        # cyclic lr
        self.initial_scheduler = toptim.CyclicLR(
            self.optimizer,
            base_lr=0,
            max_lr=self.lr,
            step_size_up=self.warmup_steps,
            step_size_down=self.warmup_steps,
            cycle_momentum=False,
            base_momentum=self.momentum,
            max_momentum=self.momentum)

        # our params
        self.last_epoch = -1  # fix for pytorch 1.1 and below
        self.finished = False  # am i done
        self.is_start = False
        super().__init__(optimizer)
Example #11
0
 def __init__(self,
              base_lr: float,
              max_lr: float,
              step_size_up: int = 2000,
              step_size_down: Optional[int] = None,
              mode: str = 'triangular',
              gamma: float = 1.,
              scale_fn: Optional[Callable[[float], float]] = None,
              scale_mode: str = 'cycle',
              cycle_momentum: bool = True,
              base_momentum: float = 0.8,
              max_momentum: float = 0.9,
              last_epoch: int = -1,
              step_on_iteration: bool = True):
     super().__init__(
         lambda opt: _scheduler.CyclicLR(opt,
                                         base_lr,
                                         max_lr,
                                         step_size_up=step_size_up,
                                         step_size_down=step_size_down,
                                         mode=mode,
                                         gamma=gamma,
                                         scale_fn=scale_fn,
                                         scale_mode=scale_mode,
                                         cycle_momentum=cycle_momentum,
                                         base_momentum=base_momentum,
                                         max_momentum=max_momentum,
                                         last_epoch=last_epoch),
         step_on_iteration=step_on_iteration)
Example #12
0
def bigcycle(optimizer, last_epoch, base_lr=1e-4, max_lr=1e-2):
    print(" cycle LR ")
    sss = lr_scheduler.CyclicLR(optimizer,
                                base_lr,
                                max_lr,
                                step_size_up=300,
                                step_size_down=300,
                                cycle_momentum=False)
    return sss
Example #13
0
def get_warmup_scheduler(config, optimizer, epoch_size) -> Any:
    return lr_sched.CyclicLR(optimizer,
                             base_lr=0,
                             max_lr=config.optimizer.params.lr,
                             step_size_up=config.train.warmup.epochs *
                             epoch_size,
                             step_size_down=0,
                             cycle_momentum=False,
                             mode='triangular')
Example #14
0
def get_scheduler(optimizer, scheduler_type, **kwargs):
    """ Return learning rate scheduler.
    
    Realize three type of scheduler.
    
    Parameters
    ----------
    optimizer: torch.optim
        optimizer picked for training
    scheduler_type: str
        define scheduler type
        'step' - decrease learning rate in 10 time step by step.
        'cos' - decrease learning rate using a cosine annealing schedule.
        'warmup' - increase learning rate from zero to initial.
    **kwargs : dict,
        learning_rate: float
            Initial learning rate.
        step_len: int
            Quantity of epochs between learning rate decay at 10 times. 
            Use with 'step' scheduler type only.
        cycle_len: int
            Quantity of epochs till the learning rate decay from initial to zero.
            Use with 'step' scheduler type only.
        batch_per_epoch: int
            Quantity batches in datasets.
        warmup_epoch: int
            Quantity epochs to rise learning rate from zero to initial.
        
    Returns
    -------
    scheduler: torch.optim.lr_scheduler
    
    See Also
    --------
    torch.optim.lr_scheduler.StepLR
    torch.optim.lr_scheduler.CosineAnnealingWarmRestarts
    torch.optim.lr_scheduler.CyclicLR
    
    """
    if scheduler_type == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, step_size=kwargs['step_size'], gamma=0.1)
    elif scheduler_type == 'cos':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=kwargs['cycle_len'], eta_min=0)
    elif scheduler_type == 'warmup':
        scheduler = lr_scheduler.CyclicLR(
                        optimizer, 
                        base_lr=kwargs['learning_rate'] / (kwargs['batch_per_epoch'] * kwargs['warmup_epoch']), 
                        max_lr=kwargs['learning_rate'],
                        step_size_up=(kwargs['batch_per_epoch'] + 1) * kwargs['warmup_epoch'],
                        step_size_down=0,
                        cycle_momentum=False
                        )
    return scheduler
Example #15
0
 def create_lr_scheduler(self,
                         lr_scheduler_type,
                         optimizer,
                         step_size=None,
                         restart_step=None,
                         multi_step=None):
     """创建学习率衰减器
     Args:
         lr_scheduler_type: 衰减器类型
         optimizer: 优化器
         step_size: 使用StepLR时,必须指定该参数
     Return:
         my_lr_scheduler: 学习率衰减器
     """
     print('Creating lr scheduler: %s' % lr_scheduler_type)
     if lr_scheduler_type == 'StepLR':
         if not step_size:
             raise ValueError(
                 'You must specified step_size when you are using StepLR.')
         my_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                               step_size=step_size,
                                               gamma=0.1)
     elif lr_scheduler_type == 'CosineLR':
         if not restart_step:
             raise ValueError(
                 'You must specified restart_step when you are using CosineLR.'
             )
         my_lr_scheduler = lr_scheduler.CosineAnnealingLR(
             optimizer, restart_step)
     elif lr_scheduler_type == 'MultiStepLR':
         if not multi_step:
             raise ValueError(
                 'You must specified multi step when you are using MultiStepLR.'
             )
         my_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, multi_step)
     elif lr_scheduler_type == 'ReduceLR':
         my_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                          mode='max',
                                                          factor=0.7,
                                                          patience=3,
                                                          verbose=True)
     elif lr_scheduler_type == 'CyclicLR':
         # 当使用Adam算法时,必须将cycle_momentum设置为False,默认为True;作者建议设定step_size_up = (2-8) x (training iterations in epoch)
         my_lr_scheduler = lr_scheduler.CyclicLR(optimizer,
                                                 base_lr=1e-4,
                                                 max_lr=2.6e-3,
                                                 step_size_up=1805,
                                                 cycle_momentum=False)
     elif lr_scheduler_type == 'Flat_CosAnneal':
         from torchtools.lr_scheduler import DelayerScheduler, DelayedCosineAnnealingLR
         my_lr_scheduler = DelayedCosineAnnealingLR(optimizer, 30, 80)
     return my_lr_scheduler
Example #16
0
def cyclic_lr(optimizer, last_epoch, base_lr=0.001, max_lr=0.01,
              step_size_up=2000, step_size_down=None, mode='triangular',
              gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=False,
              base_momentum=0.8, max_momentum=0.9, coeff=1, **_) -> Any:
    def exp_range_scale_fn(x):
        res = gamma ** (x - 1)
        return res

    return lr_sched.CyclicLR(optimizer, base_lr=base_lr*coeff, max_lr=max_lr*coeff,
                             step_size_up=step_size_up, step_size_down=
                             step_size_down, mode=mode, scale_fn=exp_range_scale_fn,
                             scale_mode=scale_mode, cycle_momentum=
                             cycle_momentum, base_momentum=base_momentum,
                             max_momentum=max_momentum, last_epoch=last_epoch)
Example #17
0
def str2sched(scheduler: Schedulerlike, optimiser: Optimiser,
              dataloader: DataLoader, epochs: Numeric,
              patience: Numeric) -> Scheduler:
    if not isinstance(scheduler, str):
        return scheduler
    elif scheduler == 'reduce_on_plateau':
        if not isinstance(patience, int): patience = 20
        return sched.ReduceLROnPlateau(optimiser, patience=patience // 2)
    elif scheduler == 'cyclic':
        return sched.CyclicLR(optimiser, base_lr=1e-4, max_lr=1.)
    elif scheduler == 'step':
        return sched.StepLR(optimiser, step_size=5)
    elif scheduler == 'exp':
        return sched.ExponentialLR(optimiser, gamma=0.1)
    else:
        raise RuntimeError(f'Scheduler {scheduler} not found.')
Example #18
0
 def prep_scheduler(self):
     if self.args.scheduler == "step":
         self.scheduler = scheduler.StepLR(self.optimizer, step_size=50)
     elif self.args.scheduler == "exp":
         self.scheduler = scheduler.ExponentialLR(self.optimizer,
                                                  gamma=0.999)
     elif self.args.scheduler == "cyclic":
         self.scheduler = scheduler.CyclicLR(self.optimizer,
                                             step_size_up=5000,
                                             base_lr=0.1 * self.args.lr,
                                             max_lr=self.args.lr)
         # Originally step_size_up: 2000
     elif self.args.scheduler == "plateau":
         self.scheduler = scheduler.ReduceLROnPlateau(self.optimizer)
     else:
         print("Scheduler not available: {}".format(self.args.scheduler))
         raise
Example #19
0
def get_scheduler(optimizer, opt):
    """Return a learning rate scheduler

    Parameters:
        optimizer          -- the optimizer of the network
        opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions. 
                              opt.lr_policy is the tag of learning rate policy: linear | step | plateau | cosine

    For 'linear', we keep the same learning rate for the first <opt.n_epochs> epochs
    and linearly decay the rate to zero over the next <opt.n_epochs_decay> epochs.
    For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers.
    See https://pytorch.org/docs/stable/optim.html for more details.
    """
    if opt.lr_policy == 'linear':

        def lambda_rule(epoch):
            lr_l = 1.0 - max(0, epoch + opt.epoch_start -
                             opt.n_epochs) / float(opt.n_epochs_decay + 2)
            return lr_l

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)

    elif opt.lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=opt.lr_decay_iters,
                                        gamma=0.1)
    elif opt.lr_policy == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   patience=10)
    elif opt.lr_policy == 'cosine':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                   T_max=opt.n_epochs,
                                                   eta_min=0)
    elif opt.lr_policy == 'cyclic':
        scheduler = lr_scheduler.CyclicLR(optimizer,
                                          opt.lr,
                                          max_lr=opt.lr * 1.1,
                                          cycle_momentum=False)

    else:
        return NotImplementedError(
            'learning rate policy [%s] is not implemented', opt.lr_policy)
    return scheduler
Example #20
0
def get_schedule(opt, optimizer, train_loader_len=None):
    if opt.scheduler == 'multistep':
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=[30, 60, 100, 130],
                                             gamma=0.1)
    elif opt.scheduler == 'cycle':
        step_size = train_loader_len * 4
        print(step_size)
        scheduler = lr_scheduler.CyclicLR(optimizer,
                                          step_size_up=step_size,
                                          base_lr=opt.lr / 100,
                                          max_lr=opt.lr,
                                          cycle_momentum=False)
    elif opt.scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=5)
    elif opt.scheduler == 'warmup':
        step = train_loader_len
        scheduler = WarmupMultiStepLR(
            optimizer,
            milestones=[step * 30, step * 60, step * 100, step * 130],
            gamma=0.1)
    elif opt.scheduler == 'cos':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,
                                                   train_loader_len * 5,
                                                   eta_min=1e-8)
    elif opt.scheduler == 'cosw':
        scheduler = WarmupCosineAnnealingLR(optimizer,
                                            train_loader_len * 5,
                                            eta_min=1e-8)
    elif opt.scheduler == 'sgdr':
        scheduler = CosineAnnealingWithRestartsLR(optimizer,
                                                  train_loader_len * 5,
                                                  eta_min=1e-10,
                                                  T_mult=1.1)
    elif opt.scheduler == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    elif opt.scheduler == 'exponential':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.97)
    else:
        scheduler = None

    return scheduler
Example #21
0
def get_schedule(opt, optimizer, train_loader_len):
    if opt.scheduler == 'multistep':
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[25, 45, 70], gamma=0.1)
    elif opt.scheduler == 'cycle':
        step_size = train_loader_len*6
        print(step_size)
        scheduler = lr_scheduler.CyclicLR(optimizer, step_size_up=step_size, base_lr=opt.lr/100, max_lr=opt.lr)
    elif opt.scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
    elif opt.scheduler == 'warmup':
        step = train_loader_len
        scheduler = WarmupMultiStepLR(optimizer, milestones=[step*25, step*70, step*90], gamma=0.1)
    elif opt.scheduler == 'cos':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, train_loader_len*3, eta_min=opt.lr/1000)
    elif opt.scheduler == 'cosw':
        scheduler = WarmupCosineAnnealingLR(optimizer, train_loader_len*4, eta_min=1e-8)
    else:
        scheduler = None

    return scheduler
Example #22
0
def get_scheduler(optimizer, opt):
    ''' Rules for how to adjust the learning rate. Lambda: custom method to
    change learning rate. StepLR: learning rate decays by gamma each step size.
    Plateau: reduce once the quantity monitored has stopped decreasing.
    '''
    if opt.lr_policy == 'lambda':

        def lambda_rule(epoch):
            lr_l = 1.0 - \
                max(0, epoch + 1 + opt.epoch_count - opt.niter) / \
                float(opt.niter_decay + 1)
            return lr_l

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=opt.lr_decay_iters,
                                        gamma=0.1)
    elif opt.lr_policy == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.9,
                                                   threshold=0.01,
                                                   patience=opt.patience)
    elif opt.lr_policy == 'cyclic':
        scheduler = lr_scheduler.CyclicLR(optimizer,
                                          base_lr=opt.lr,
                                          max_lr=opt.lr_max,
                                          step_size_up=opt.lr_step_size,
                                          cycle_momentum=False)
    elif opt.lr_policy == 'none':

        def lambda_rule(epoch):
            return 1.0

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    else:
        return NotImplementedError(
            'learning rate policy [{}] is not implemented'.format(
                opt.lr_policy))
    return scheduler
Example #23
0
def get_scheduler(optimizer, opt):
    if opt.lr_policy == 'lambda':
        def lambda_rule(epoch):
            lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1)
            return lr_l

        scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
    elif opt.lr_policy == 'step':
        scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.5)
    elif opt.lr_policy == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, threshold=0.01, patience=2,
                                                   min_lr=opt.min_lr)
    elif opt.lr_policy == 'cyclic':
        scheduler = lr_scheduler.CyclicLR(optimizer, opt.min_lr, opt.lr, step_size_up=5, step_size_down=None,
                                          gamma=0.99,
                                          mode='exp_range', cycle_momentum=False)
    elif opt.lr_policy == 'cosine_restarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, opt.lr_decay_iters, T_mult=1, eta_min=0)
    else:
        return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
    return scheduler
Example #24
0
def get_scheduler(args, optimizer, last_epoch, train_loader):
        assert args.scheduler_name in ['multistep', 'linear_warmup', 'onecycle', 'snapshot_ensemble_scheduler','cyclic', None]
        if args.scheduler_name is None:
            return None
        if args.scheduler_name == 'multistep':
            return lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,20,30], gamma=0.1, last_epoch=last_epoch)
        
        if args.scheduler_name == 'linear_warmup':
            # Total number of training steps is [number of batches] x [number of epochs]. 
            total_steps = len(train_loader) * args.num_epochs
            warmup_frac = 0.3
            return get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = int(total_steps * warmup_frac), 
                                            num_training_steps = total_steps)
        if args.scheduler_name == 'onecycle':
            return OneCycleLR(optimizer, n_epochs=args.num_epochs, n_batches=len(train_loader))
        if args.scheduler_name == 'snapshot_ensemble_scheduler':
            nb_cycles = 2
            return snapshot_ensemble_scheduler(optimizer, args.lr, args.num_epochs, nb_cycles, train_loader)
        if args.scheduler_name == 'cyclic':
            return lr_scheduler.CyclicLR(optimizer, 0.05, 0.01)
Example #25
0
def get_optimizer(policy, args):
    if args.optimizer == "adam":
        optimizer = optim.Adam(policy.parameters(), lr=args.lr)
    elif args.optimizer == "sgd":
        optimizer = optim.SGD(policy.parameters(), lr=args.lr)
    elif args.optimizer == "rmsprop":
        optimizer = optim.RMSprop(policy.parameters(), lr=args.lr)
    scheduler = args.opt_schedule
    if scheduler == "cyclic":
        scheduler = lr_scheduler.OneCycleLR(
            optimizer=optimizer,
            max_lr=args.div_factor * args.lr,
            total_steps=args.num_episodes_train)
    elif scheduler == "cyclic_multi":
        scheduler = lr_scheduler.CyclicLR(optimizer=optimizer,
                                          base_lr=args.lr,
                                          max_lr=args.div_factor * args.lr)
    elif scheduler == "WR":
        T_0 = max(1, int(args.num_episodes_train / 1000))
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer=optimizer, T_0=T_0)

    return optimizer, scheduler
Example #26
0
    def test_CyclicLR(self, debug=True):
        """
    Usage:
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/pypi/torch1_7_0 -d /cache/pypi -t copytree
        for filename in /cache/pypi/*.whl; do
            pip install $filename
        done
        proj_root=moco-exp
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/codes/$proj_root -d /cache/$proj_root -t copytree -b /cache/$proj_root/code.zip
        cd /cache/$proj_root
        pip install -r requirements.txt

        export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
        export TIME_STR=1
        export PYTHONPATH=./exp:./stylegan2-pytorch:./
        python 	-c "from exp.tests.test_styleganv2 import Testing_stylegan2;\
          Testing_stylegan2().test_train_ffhq_128()"

    :return:
    """
        if 'CUDA_VISIBLE_DEVICES' not in os.environ:
            os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        if 'TIME_STR' not in os.environ:
            os.environ['TIME_STR'] = '0' if utils.is_debugging() else '0'
        from template_lib.v2.config_cfgnode.argparser import \
          (get_command_and_outdir, setup_outdir_and_yaml, get_append_cmd_str, start_cmd_run)

        tl_opts = ' '.join(sys.argv[sys.argv.index('--tl_opts') +
                                    1:]) if '--tl_opts' in sys.argv else ''
        print(f'tl_opts:\n {tl_opts}')

        command, outdir = get_command_and_outdir(
            self, func_name=sys._getframe().f_code.co_name, file=__file__)
        argv_str = f"""
                --tl_config_file none
                --tl_command none
                --tl_outdir {outdir}
                """
        args = setup_outdir_and_yaml(argv_str, return_cfg=True)

        import torch.nn as nn
        from torch.optim import lr_scheduler
        from matplotlib import pyplot as plt

        model = nn.Linear(3, 64)

        def create_optimizer():
            return SGD(model.parameters(),
                       lr=0.1,
                       momentum=0.9,
                       weight_decay=1e-4)

        def plot_lr(scheduler, title='', labels=['base'], nrof_epoch=100):
            lr_li = [[] for _ in range(len(labels))]
            epoch_li = list(range(nrof_epoch))
            for epoch in epoch_li:
                scheduler.step()  # 调用step()方法,计算和更新optimizer管理的参数基于当前epoch的学习率
                lr = scheduler.get_last_lr()  # 获取当前epoch的学习率
                for i in range(len(labels)):
                    lr_li[i].append(lr[i])
            for lr, label in zip(lr_li, labels):
                plt.plot(epoch_li, lr, label=label)
            plt.grid()
            plt.xlabel('epoch')
            plt.ylabel('lr')
            plt.title(title)
            plt.legend()
            plt.show()

        optimizer = create_optimizer()
        scheduler = lr_scheduler.CyclicLR(optimizer,
                                          base_lr=0.01,
                                          max_lr=0.1,
                                          step_size_up=25,
                                          step_size_down=10)
        plot_lr(scheduler, title='CyclicLR')

        pass
Example #27
0
    def __init__(self, opt, **kwargs):
        super(ClassificationTask, self).__init__(opt, kwargs["comm"],
                                                 kwargs["device"])
        train_opt = opt['train']
        self.ran_cl = opt['rancl']
        self.num_classes = opt['datasets']['train']['num_classes']
        self.kd_transfer = opt['kd_transfer']
        self.att_transfer = opt['att_transfer']
        self.fsp_transfer = opt['fsp_transfer']
        self.w_transfer = opt['w_transfer']
        self.ws_transfer = opt['ws_transfer']
        self.replace_classifier = opt['varyOnData']
        # self.device = kwargs['device']

        self.device = torch.device("cuda:{}".format(kwargs['device']) if torch.
                                   cuda.is_available() else "cpu")
        # self.logger.info(self.device)
        # -----early stopping-------
        self.best_weights = None
        self.best_metric = None
        self.wait = 0
        self.stop_training = False
        self.patience = opt['patience']

        # -----prepare for transfer-------------
        self.most_related_task = -1

        if self.fsp_transfer or self.att_transfer:
            self.activation = OrderedDict()

        # -----define network and load pretrained tasks-----
        data_name, model_name = opt['network'].split('-')
        self.model_name = model_name
        if data_name.lower() == 'mnist':
            self.network = getattr(
                mnist,
                model_name)(num_classes=self.num_classes).to(self.device)
        elif data_name.lower() == 'cifar':
            self.network = getattr(
                cifar,
                model_name)(num_classes=self.num_classes).to(self.device)
            if self.att_transfer and 'resnet' in model_name.lower():
                self.network.layer1[-1].register_forward_hook(
                    self.get_activation('b1_out'))
                self.network.layer2[-1].register_forward_hook(
                    self.get_activation('b2_out'))
                self.network.layer3[-1].register_forward_hook(
                    self.get_activation('b3_out'))
                # self.network.layer4[-1].register_forward_hook(self.get_activation('b4_out'))
        elif data_name.lower() == 'imagenet':
            if opt['imagenet_pretrained']:
                self.network = getattr(imagenet, model_name)(pretrained=True)
                if opt['train_lastlayer']:
                    for param in self.network.parameters():
                        param.requires_grad = False

                if 'resnet' in model_name or 'inception' in model_name:
                    self.network.fc = nn.Linear(self.network.fc.in_features,
                                                self.num_classes)
                elif 'vgg' in model_name or 'alex' in model_name:
                    self.network.classifier[6] = nn.Linear(
                        4096, self.num_classes)
                elif 'squeeze' in self.model_name:
                    self.network.num_classes = self.num_classes
                    self.network.classifier[1] = nn.Conv2d(512,
                                                           self.num_classes,
                                                           kernel_size=1)
                elif 'dense' in self.model_name:
                    num_features = self.network.classifier.in_features
                    self.network.classifier = nn.Linear(
                        num_features, self.num_classes)
                elif 'mobile' in self.model_name:
                    num_features = self.network.classifier[-1].in_features
                    self.network.classifier[-1] = nn.Linear(
                        num_features, self.num_classes)

                self.network = self.network.to(self.device)
            else:
                self.network = getattr(
                    imagenet,
                    model_name)(num_classes=self.num_classes).to(self.device)

            if self.att_transfer:
                if 'resnet' in self.model_name.lower():
                    self.network.layer1[-1].register_forward_hook(
                        self.get_activation('b1_out'))
                    self.network.layer2[-1].register_forward_hook(
                        self.get_activation('b2_out'))
                    self.network.layer3[-1].register_forward_hook(
                        self.get_activation('b3_out'))
                    # self.network.layer4[-1].register_forward_hook(self.get_activation('b4_out'))
                # elif 'dense' in self.model_name.lower():
        else:
            raise NotImplementedError(
                'Network [{:s}, {:s}] is not defined.'.format(
                    data_name, model_name))

        # make starts the same
        # if USE_HVD:
        # 	hvd.broadcast_parameters(self.network.state_dict(), root_rank=0)
        # test if different task has the same initialization under same seed
        # for name, param in self.network.named_parameters():
        # 	print(param[0])

        # load pretrained model if exists
        if self._is_solver():
            # init_weights(self.network)
            self.load()
            # print network
            # self.print_network()

        # -----define loss function------
        self.one_hot = False
        self.prob_est = False
        loss_type = train_opt['loss']
        if loss_type == 'l1':
            self.loss_func = nn.L1Loss().to(self.device)
            self.one_hot = True
        elif loss_type == 'l2':
            self.loss_func = nn.MSELoss().to(self.device)
            self.one_hot = True
        elif loss_type == 'l1_pro':
            self.loss_func = nn.L1Loss().to(self.device)
            self.prob_est = True
            self.one_hot = True
        elif loss_type == 'l2_pro':
            self.loss_func = nn.MSELoss().to(self.device)
            self.prob_est = True
            self.one_hot = True
        elif loss_type == 'cross_entropy':
            self.loss_func = nn.CrossEntropyLoss().to(self.device)
        elif loss_type == 'marginloss':
            self.loss_func = nn.MultiMarginLoss().to(self.device)
        else:
            raise NotImplementedError(
                'Loss type [{:s}] is not recognized. Please specifiy it from following options:'
                .format(loss_type))

        if self.is_train:
            self.network.train()

            self.logits_loss = nn.KLDivLoss(reduction='batchmean').to(
                self.device)
            self.norm_loss = nn.MSELoss(reduction='batchmean').to(self.device)
            self.at_weight = train_opt['at_weight']
            self.kd_weight = train_opt['kd_weight']
            self.ws_weight = train_opt['ws_weight']

            # -----define optimizers-----
            optim_type = train_opt['optim']

            self.optimizer = getattr(optim, optim_type)(
                self.network.parameters(), **opt['train']['optimizer_param'])
            self.optimizers.append(self.optimizer)
            # self.lr = opt['train']['optimizer_param']['lr']

            # -----define schedulers-----
            for optimizer in self.optimizers:
                if train_opt['lr_scheme'] == 'MultiStepLR':
                    scheduler = lr_scheduler.MultiStepLR(
                        optimizer, **opt['train']['lr_scheme_param'])
                elif train_opt['lr_scheme'] == 'CycleLR':
                    scheduler = lr_scheduler.CyclicLR(
                        optimizer, **opt['train']['lr_scheme_param'])
                elif train_opt['lr_scheme'] == 'ReduceLROnPlateau':
                    scheduler = lr_scheduler.ReduceLROnPlateau(
                        optimizer, **opt['train']['lr_scheme_param'])
                elif train_opt['lr_scheme'] is None:
                    scheduler = None
                else:
                    raise NotImplementedError('{} is not implemented!'.format(
                        train_opt['lr_scheme']))
                self.schedulers.append(scheduler)

            # # -----register gradient clipping-----
            # for param in self.network.parameters():
            # 	param.register_hook(lambda grad: torch.clamp(grad, -0.2, 0.2))

            # -----define log_dict-----
            self.log_dict = OrderedDict()
            self.transfer_count = 0

            # -----prepare for transfer-----
            if self.kd_transfer or self.att_transfer:
                # set seeds of all tasks the same to ensure the dataloader is in the same order
                torch.manual_seed(0)
Example #28
0
import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision.models import resnet18
import matplotlib.pyplot as plt

model = resnet18(num_classes=2)
base_lr = 1e-4
max_lr = 0.1
optimizer = optim.SGD(params=model.parameters(), lr=0.1)
scheduler = lr_scheduler.CyclicLR(optimizer,
                                  base_lr=base_lr,
                                  max_lr=max_lr,
                                  step_size_up=20,
                                  step_size_down=20)

#100 iteration
plt.figure()
x = list(range(100))
y = []
for epoch in range(100):
    scheduler.step()
    lr = scheduler.get_lr()
    # print(epoch, scheduler.get_lr()[0])    # get_lr()
    y.append(scheduler.get_lr()[0])
plt.plot(x, y)
plt.savefig('lr_cyclic.png')
Example #29
0
        os.path.join(test_case_place, 'messages_{}'.format(target)))
    writer = SummaryWriter(
        log_dir=os.path.join(test_case_place, 'eval_{}'.format(target)))
    logger.info(config)
    sources = list(filter(lambda e: e != target, datasets))
    logger.info("Selected sources: {}".format(str(sources)))
    logger.info("Selected target: {}".format(target))
    logger.info("=" * 100)
    ## ==========================
    # Initialize MDAN model
    ## ==========================
    mdan = load_model('mdan', class_number, len(sources), extractor).to(device)
    #optimizer = optim.Adadelta(mdan.parameters(), lr=learning_rate)
    optimizer = optim.SGD(mdan.parameters(), lr=learning_rate, momentum=0.9)
    scheduler = lr_scheduler.CyclicLR(optimizer,
                                      base_lr=learning_rate,
                                      max_lr=0.009)
    # Decay LR by a factor of 0.1 every 7 epochs
    #scheduler = lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)
    resume_epoch = 0
    if constant.resume_train is True:
        resume_epoch, model_state_dict, optimizer_state_dict = resume_checkpoint(
            test_case_place, file_name='best_model.pt')
        mdan.load_state_dict(model_state_dict)
        optimizer.load_state_dict(optimizer_state_dict)
        mdan.eval()
        logger.info("Retain training from epoch {}".format(resume_epoch))
    else:
        mdan.train()

    #scheduler_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
def main_worker(gpu,ngpus_per_node,args):
    global best_acc1
    # args.gpu=gpu
    # if args.gpu is not None:
    #     print("Use GPU: {} for training".format(args.gpu))
    
    # For multiprocessing distributed training, rank needs to be the
    # global rank among all the processes
    ## args.rank=args.rank*ngpus_per_node+gpu
    ## dist.init_process_group(backend=args.dist_backend,init_method="env://",#args.dist_url,
    ## world_size=args.world_size,rank=args.rank)
    
    ##read the matlab matrix as Xvar and ResponseVar
    inputfile=args.inputfile
    f=h5py.File(inputdir+inputfile,'r')
    data=f.get('inputstore')
    Xvar=np.array(data).transpose()
    data=f.get('outputstore')
    ResponseVar=np.array(data).transpose()
    data=f.get('samplevec')
    samplevec=np.array(data)
    samplevec=np.squeeze(samplevec.astype(int)-1)##block index
    data=f.get('parastore')
    parastore=np.array(data)##omega normalizer
    data=f.get('nthetaset')
    nthetaset=int(np.array(data)[0][0])##block number
    data=f.get('ntime')
    ntimetotal=int(np.array(data)[0][0])##time seq including [training part, extrapolation part]
    f.close()
    ntime=args.timetrainlen
    # ResponseVarnorm=(ResponseVar-ResponseVar.mean(axis=0))/ResponseVar.std(axis=0)
    ResponseVarnorm=ResponseVar## the response variable was originally scale by omega {scaling} but not centered. and no more normalization will be done
    ##separation of train and test set
    nsample=(Xvar.shape)[0]
    ntheta=(Xvar.shape)[1]
    nspec=(ResponseVarnorm.shape)[1]
    simusamplevec=np.unique(samplevec)
    separation=['train','validate','test']
    numsamptest_validate=math.floor((simusamplevec.__len__())*args.test_validate_ratio/2)
    sampleind=set(range(0,nsample))
    simusampeind=set(range(0,nthetaset))
    ## a preset whole time range for test, validation (groups)
    simusamplevec_test=random.sample(simusampeind,numsamptest_validate)
    simusamplevec_validate=random.sample(simusampeind.difference(set(simusamplevec_test)),numsamptest_validate)
    ##index of training, testing, and validation
    testind=np.sort(np.where(np.isin(samplevec,simusamplevec_test)))[0]
    validateind=np.sort(np.where(np.isin(samplevec,simusamplevec_validate)))[0]
    testvalidte_ind_union=set(testind)
    testvalidte_ind_union=testvalidte_ind_union.union(set(validateind))
    trainind=np.sort(np.array(list(sampleind.difference(testvalidte_ind_union))))#index for training set
    ntrainset=nthetaset-numsamptest_validate*2
    sizeset={"train": (ntrainset), "validate": (numsamptest_validate), "test": (numsamptest_validate)}
    ind_separa={"train": (trainind), "validate": (validateind), "test": (testind)}
    ##training block index (time range) keep in the training time block
    timeind={x: np.tile(np.concatenate((np.repeat(1,ntime),np.repeat(0,ntimetotal-ntime))),sizeset[x]) for x in separation}
    time_in_ind={}
    time_extr_ind={}
    for x in separation:
        tempind=ind_separa[x]
        time_in_ind[x]=tempind[timeind[x]==1]
        time_extr_ind[x]=tempind[timeind[x]==0]

    ##train validate test "block" ind
    samplevec_separa={x: samplevec[time_in_ind[x]] for x in separation}
    Xvar_separa={x: Xvar[list(ind_separa[x]),:] for x in separation}
    Xvarnorm=np.empty_like(Xvar)
    # Xvar_norm_separa={}
    if args.normalize_flag is 'Y':
        ##the normalization if exist should be after separation of training and testing data to prevent leaking
        ##normalization (X-mean)/sd
        ##normalization include time. Train and test model need to have at least same range or same mean&sd for time
        del(Xvar)
        for x in separation:
            Xvartemp=Xvar_separa[x]
            meanvec=Xvartemp.mean(axis=0)
            stdvec=Xvartemp.std(axis=0)
            for coli in range(0,len(meanvec)):
                Xvartemp[:,coli]=(Xvartemp[:,coli]-meanvec[coli])/stdvec[coli]
            
            # Xvar_norm_separa[x]=copy.deepcopy(temp_norm_mat)
            Xvarnorm[list(ind_separa[x]),:]=copy.deepcopy(Xvartemp)
        
    else:
        # Xvar_norm_separa={x: Xvar_separa[x] for x in separation}
        Xvarnorm=np.copy(Xvar)
        del(Xvar)
    
    #samplevecXX repeat id vector, XXind index vector
    inputwrap={"Xvarnorm": (Xvarnorm),
        "ResponseVar": (ResponseVar),
        "trainind": (trainind),
        "testind": (testind),
        "validateind": (validateind),
        "ind_separa": (ind_separa),
        "time_in_ind": (time_in_ind),
        "time_extr_ind": (time_extr_ind),
        "samplevec": (samplevec),
        # "samplewholeselec": (samplewholeselec),
        "samplevec_separa": (samplevec_separa),
        # "Xvarmean": (Xvarmean),## these two value: Xvarmean, Xvarstd can be used for "new" test data not used in the original normalization
        # "Xvarstd": (Xvarstd),
        "inputfile": (inputfile),
        "ngpus_per_node": (ngpus_per_node),## number of gpus
        "numsamptest_validate": (numsamptest_validate),#number of testing samples
        "timeind": (timeind)
    }
    with open("pickle_inputwrap.dat","wb") as f1:
        pickle.dump(inputwrap,f1,protocol=4)##protocol=4 if there is error: cannot serialize a bytes object larger than 4 GiB
    
    del(inputwrap)
    
    Xtensor={x: torch.Tensor(Xvarnorm[list(time_in_ind[x]),:]) for x in separation}
    Resptensor={x: torch.Tensor(ResponseVar[list(time_in_ind[x]),:]) for x in separation}
    Dataset={x: utils.TensorDataset(Xtensor[x],Resptensor[x]) for x in separation}
    # train_sampler=torch.utils.data.distributed.DistributedSampler(traindataset)
    nblock=int(args.batch_size/ntime)
    # nblocktest=int(args.test_batch_size/ntime)
    # traindataloader=utils.DataLoader(traindataset,batch_size=args.batch_size,
    #     shuffle=(train_sampler is None),num_workers=args.workers,pin_memory=True,sampler=train_sampler)
    #
    # testdataloader=utils.DataLoader(testdataset,batch_size=args.test_batch_size,
    #     shuffle=False,num_workers=args.workers,pin_memory=True,sampler=test_sampler)
    if args.sampler=="block": # block sampler
        sampler={x: batch_sampler_block(Dataset[x],samplevec_separa[x],nblock=nblock) for x in separation}
        dataloader={x: utils.DataLoader(Dataset[x],num_workers=args.workers,pin_memory=True,batch_sampler=sampler[x]) for x in separation}
    elif args.sampler=="individual": #individual random sampler
        dataloader={x: utils.DataLoader(Dataset[x],batch_size=args.batch_size,shuffle=True,num_workers=args.workers,pin_memory=True) for x in separation}

    args.mintime=np.min(Xvarnorm[:,-1])
    ninnersize=int(args.layersize_ratio*ntheta)
    ##store data
    with open("pickle_dataloader.dat","wb") as f1:
        pickle.dump(dataloader,f1,protocol=4)
    
    dimdict={
        "nsample": (nsample,int),
        "ntheta": (ntheta,int),
        "nspec": (nspec,int),
        "ninnersize": (ninnersize,int)
    }
    args.nsample=nsample
    args.ntheta=ntheta
    args.nspec=nspec
    with open("pickle_dimdata.dat","wb") as f3:
        pickle.dump(dimdict,f3,protocol=4)
    
    ##free up some space (not currently set)
    ##create model
    if bool(re.search("[rR]es[Nn]et",args.net_struct)):
        model=models.__dict__[args.net_struct](ninput=ntheta,num_response=nspec,p=args.p,ncellscale=args.layersize_ratio)
    elif args.rnn_struct==1:
        model=models.__dict__[args.net_struct](ntheta=ntheta,nspec=nspec,num_layer=args.num_layer,ncellscale=args.layersize_ratio,p=args.p)
    else:
        model=models.__dict__[args.net_struct](ninput=ntheta,num_response=nspec,nlayer=args.num_layer,p=args.p,ncellscale=args.layersize_ratio,batchnorm_flag=(args.batchnorm_flag is 'Y'))
    
    # model.eval()
    # if args.gpu is not None:
    #     torch.cuda.set_device(args.gpu)
    #     model.cuda(args.gpu)
    #     # When using a single GPU per process and per
    #     # DistributedDataParallel, we need to divide the batch size
    #     # ourselves based on the total number of GPUs we have
    #     args.batch_size=int(args.batch_size/ngpus_per_node)
    #     args.workers=int((args.workers+ngpus_per_node-1)/ngpus_per_node)
    #     model=torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
    # else:
    # DistributedDataParallel will divide and allocate batch_size to all
    # available GPUs if device_ids are not set
    
    # model=torch.nn.DataParallel(model).cuda()
    model=torch.nn.DataParallel(model)
    if args.gpu_use==1:
        device=torch.device("cuda:0")#cpu
    else:
        device=torch.device("cpu")
    
    model.to(device)
    if args.optimizer=="sgd":
        optimizer=optim.SGD(model.parameters(),lr=args.learning_rate,momentum=args.momentum)
    elif args.optimizer=="adam":
        optimizer=optim.Adam(model.parameters(),lr=args.learning_rate)
    elif args.optimizer=="nesterov_momentum":
        optimizer=optim.SGD(model.parameters(),lr=args.learning_rate,momentum=args.momentum,nesterov=True)
    
    if args.scheduler=='step':
        scheduler=lr_scheduler.StepLR(optimizer,step_size=200,gamma=0.5)
    elif args.scheduler=='plateau':
        scheduler=lr_scheduler.ReduceLROnPlateau(optimizer,'min',factor=0.5)
    elif args.scheduler=='cyclelr':
        scheduler=lr_scheduler.CyclicLR(optimizer,args.learning_rate/100,args.learning_rate,step_size_up=1000,cycle_momentum=False,mode="triangular2")
    else:
        scheduler=None
    
    cudnn.benchmark=True
    ##model training
    for epoch in range(1,args.epochs+1):
        msetr=train(args,model,dataloader["train"],optimizer,epoch,device,ntime,scheduler)
        msevalidate=test(args,model,dataloader["validate"],device,ntime)
        if scheduler is not None:
            if args.scheduler=='step':
                scheduler.step()
            elif args.scheduler=='plateau':
                scheduler.step(msevalidate)##based on validation set. This is fine as we use train|validate|test separation
        if epoch==1:
            best_msevalidate=msevalidate
            best_train_mse=msetr
        
        # is_best=acc1>best_acc1
        is_best=msevalidate<best_msevalidate
        is_best_train=msetr<best_train_mse
        best_msevalidate=min(msevalidate,best_msevalidate)
        best_train_mse=min(msetr,best_train_mse)
        save_checkpoint({
            'epoch': epoch,
            'arch': args.net_struct,
            'state_dict': model.state_dict(),
            'best_acc1': best_msevalidate,
            'best_acctr': best_train_mse,
            'optimizer': optimizer.state_dict(),
            'args_input': args,
        },is_best,is_best_train)
    
    print('\nFinal test MSE\n')
    acctest=test(args,model,dataloader["test"],device,ntime)