Example #1
0
    def __init__(self,
                 *,
                 latent_dim,
                 image_size,
                 optimizer="adam",
                 fmap_max=512,
                 fmap_inverse_coef=12,
                 transparent=False,
                 disc_output_size=5,
                 attn_res_layers=[],
                 sle_spatial=False,
                 ttur_mult=1.,
                 lr=2e-4,
                 rank=0,
                 ddp=False):
        super().__init__()
        self.latent_dim = latent_dim
        self.image_size = image_size

        G_kwargs = dict(image_size=image_size,
                        latent_dim=latent_dim,
                        fmap_max=fmap_max,
                        fmap_inverse_coef=fmap_inverse_coef,
                        transparent=transparent,
                        attn_res_layers=attn_res_layers,
                        sle_spatial=sle_spatial)

        self.G = Generator(**G_kwargs)

        self.D = Discriminator(image_size=image_size,
                               fmap_max=fmap_max,
                               fmap_inverse_coef=fmap_inverse_coef,
                               transparent=transparent,
                               attn_res_layers=attn_res_layers,
                               disc_output_size=disc_output_size)

        self.ema_updater = EMA(0.995)
        self.GE = Generator(**G_kwargs)
        set_requires_grad(self.GE, False)

        if optimizer == "adam":
            self.G_opt = Adam(self.G.parameters(), lr=lr, betas=(0.5, 0.9))
            self.D_opt = Adam(self.D.parameters(),
                              lr=lr * ttur_mult,
                              betas=(0.5, 0.9))
        elif optimizer == "adabelief":
            self.G_opt = AdaBelief(self.G.parameters(),
                                   lr=lr,
                                   betas=(0.5, 0.9))
            self.D_opt = AdaBelief(self.D.parameters(),
                                   lr=lr * ttur_mult,
                                   betas=(0.5, 0.9))
        else:
            assert False, "No valid optimizer is given"

        self.apply(self._init_weights)
        self.reset_parameter_averaging()

        self.cuda(rank)
        self.D_aug = AugWrapper(self.D, image_size)
Example #2
0
  def __init__(self, args, env):
    self.action_space = env.action_space()
    self.atoms = args.atoms
    self.Vmin = args.V_min
    self.Vmax = args.V_max
    self.support = torch.linspace(args.V_min, args.V_max, self.atoms).to(device=args.device)  # Support (range) of z
    self.delta_z = (args.V_max - args.V_min) / (self.atoms - 1)
    self.batch_size = args.batch_size
    self.n = args.multi_step
    self.discount = args.discount
    self.norm_clip = args.norm_clip

    self.online_net = DQN(args, self.action_space).to(device=args.device)
    if args.model:  # Load pretrained model if provided
      if os.path.isfile(args.model):
        state_dict = torch.load(args.model, map_location='cpu')  # Always load tensors onto CPU by default, will shift to GPU if necessary
        if 'conv1.weight' in state_dict.keys():
          for old_key, new_key in (('conv1.weight', 'convs.0.weight'), ('conv1.bias', 'convs.0.bias'), ('conv2.weight', 'convs.2.weight'), ('conv2.bias', 'convs.2.bias'), ('conv3.weight', 'convs.4.weight'), ('conv3.bias', 'convs.4.bias')):
            state_dict[new_key] = state_dict[old_key]  # Re-map state dict for old pretrained models
            del state_dict[old_key]  # Delete old keys for strict load_state_dict
        self.online_net.load_state_dict(state_dict)
        print("Loading pretrained model: " + args.model)
      else:  # Raise error if incorrect model path provided
        raise FileNotFoundError(args.model)

    self.online_net.train()

    self.target_net = DQN(args, self.action_space).to(device=args.device)
    self.update_target_net()
    self.target_net.train()
    for param in self.target_net.parameters():
      param.requires_grad = False

    self.optimiser = AdaBelief(self.online_net.parameters(), lr=args.learning_rate, eps=args.adam_eps, rectify=True)#optim.Adam(self.online_net.parameters(), lr=args.learning_rate, eps=args.adam_eps)
Example #3
0
 def select_optimizer(self):
     if self.args.optimizer == 'Adam':
         self.optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                            self.mdl.parameters()),
                                     lr=self.args.learning_rate,
                                     weight_decay=self.args.weight_decay)
     elif self.args.optimizer == 'AdaBelief':
         self.optimizer = AdaBelief(self.mdl.parameters(),
                                    weight_decay=self.args.weight_decay)
     elif self.args.optimizer == 'RMS':
         self.optimizer = optim.RMSprop(filter(lambda p: p.requires_grad,
                                               self.mdl.parameters()),
                                        lr=self.args.learning_rate)
     elif self.args.optimizer == 'SGD':
         self.optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                           self.mdl.parameters()),
                                    lr=self.args.learning_rate,
                                    momentum=0.9)
     elif self.args.optimizer == 'Adagrad':
         self.optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                               self.mdl.parameters()),
                                        lr=self.args.learning_rate)
     elif self.args.optimizer == 'Adadelta':
         self.optimizer = optim.Adadelta(filter(lambda p: p.requires_grad,
                                                self.mdl.parameters()),
                                         lr=self.args.learning_rate)
Example #4
0
 def reset(self):
     self.model.reset()
     if self.adabelief:
         if self.adabelief_args != None:
             self.optimizer = AdaBelief(self.model.model.latents.parameters(), lr=self.adabelief_args.lr, betas=(self.adabelief_args.b1, self.adabelief_args.b2), eps=self.adabelief_args.eps,
                                        weight_decay=self.adabelief_args.weight_decay, amsgrad=self.adabelief_args.amsgrad, weight_decouple=self.adabelief_args.weight_decouple, 
                                        fixed_decay=self.adabelief_args.fixed_decay, rectify=self.adabelief_args.rectify)
         else:
             self.optimizer = AdaBelief(self.model.model.latents.parameters(), lr=self.lr, betas=(0.5, 0.999), eps=1e-12,
                                        weight_decay=0, amsgrad=False, weight_decouple=True, fixed_decay=False, rectify=True)
     else:
         self.optimizer = Adam(self.model.model.latents.parameters(), self.lr)
     if self.lr_scheduling:
         #self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=self.lr, steps_per_epoch=self.iterations, epochs=self.epochs)
         self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma = .96)
Example #5
0
def get_optimizer(
    name: str,
    model_params: Iterable,
    lr: float = 1e-3,
    wd: float = 0,
    lookahead: bool = False,
):
    if name == "adam":
        base_optimizer = optim.Adam(model_params, lr=lr, weight_decay=wd)
    elif name == "sgd":
        base_optimizer = optim.SGD(model_params,
                                   lr=lr,
                                   weight_decay=wd,
                                   momentum=0.9,
                                   nesterov=True)
    elif name == "radam":
        base_optimizer = RAdam(model_params, lr=lr, weight_decay=wd)
    elif name == "ralamb":
        base_optimizer = Ralamb(model_params, lr=lr, weight_decay=wd)
    elif name == "adabelief":
        base_optimizer = AdaBelief(model_params, lr=lr, weight_decay=wd)
    else:
        raise ValueError

    # Use lookahead
    if lookahead:
        optimizer = Lookahead(base_optimizer)
    else:
        optimizer = base_optimizer

    return optimizer
Example #6
0
    def __init__(self, summ_model, ckpt):
        super(ParallelLoss, self).__init__()
        self.loss_mse = torch.nn.MSELoss(reduction='mean')
        self.loss = EucDistanceLoss(1)
        self.summ = summ_model
        self.summ.eval()

        self.pre_att_model = PreAttModel(layers=2,
                                         d_model=1024,
                                         num_heads=16,
                                         dff=4096,
                                         rate=0.0)
        lr = 2e-5

        try:
            self.pre_att_model.load_state_dict(torch.load(ckpt))
            print('load {}'.format(ckpt))
        except:
            print('no checkpoints now!')

        self.optimizer = AdaBelief(self.pre_att_model.parameters(),
                                   lr=lr,
                                   eps=1e-16,
                                   betas=(0.9, 0.999),
                                   weight_decay=1e-4,
                                   weight_decouple=True,
                                   rectify=True)
Example #7
0
 def select_optimizer(self, model):
     if self.args.optimizer == 'Adam':
         optimizer = optim.Adam(
             filter(lambda p: p.requires_grad, model.parameters()),
             lr=self.args.learning_rate,
             betas=(0.5, 0.999),
         )
     elif self.args.optimizer == 'AdaBelief':
         optimizer = AdaBelief(model.parameters(),
                               lr=self.args.learning_rate,
                               betas=(0.5, 0.999))
     elif self.args.optimizer == 'RMS':
         optimizer = optim.RMSprop(filter(lambda p: p.requires_grad,
                                          model.parameters()),
                                   lr=self.args.learning_rate)
     elif self.args.optimizer == 'SGD':
         optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=self.args.learning_rate,
                               momentum=0.9)
     elif self.args.optimizer == 'Adagrad':
         optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                          model.parameters()),
                                   lr=self.args.learning_rate)
     elif self.args.optimizer == 'Adadelta':
         optimizer = optim.Adadelta(filter(lambda p: p.requires_grad,
                                           model.parameters()),
                                    lr=self.args.learning_rate)
     return optimizer
Example #8
0
 def configure_optimizers(self):
     # for transformer
     return AdaBelief(
         params=self.model.parameters(),
         lr=self.hparams.optimizer.lr,
         weight_decay=1.2e-6
     )
Example #9
0
def create_optimizer(args, model_params):
    args.optim = args.optim.lower()
    if args.optim == 'sgd':
        return torch.optim.SGD(model_params,
                               args.lr,
                               momentum=args.momentum,
                               weight_decay=args.weight_decay)
    elif args.optim == 'adam':
        return torch.optim.Adam(model_params,
                                args.lr,
                                betas=(args.beta1, args.beta2),
                                weight_decay=args.weight_decay,
                                eps=args.eps)
    elif args.optim == 'adamw':
        return torch.optim.AdamW(model_params,
                                 args.lr,
                                 betas=(args.beta1, args.beta2),
                                 weight_decay=args.weight_decay,
                                 eps=args.eps)
    elif args.optim == 'adabelief':
        return AdaBelief(model_params,
                         args.lr,
                         betas=(args.beta1, args.beta2),
                         weight_decay=args.weight_decay,
                         eps=args.eps,
                         print_change_log=False)
    else:
        print('Optimizer not found')
def get_optimizer(parameters, hparams):
    name = hparams.optimizer

    if name == 'sgd':
        print('Using SGD optimizer')
        return optim.SGD(
            parameters,
            lr=hparams.lr,
            momentum=hparams.momentum,
            weight_decay=hparams.weight_decay, 
            nesterov=hparams.nesterov
        )
    elif name == 'adam':
        print('Using Adam optimizer')
        return optim.Adam(
            parameters,
            lr=hparams.lr,
            betas=(hparams.beta1, hparams.beta2),
            weight_decay=hparams.weight_decay 
        )
    elif name == 'ranger':
        print('Using Ranger optimizer')
        return Ranger(
            parameters,
            lr=hparams.lr,
            alpha=hparams.ranger_alpha,
            k=hparams.ranger_k,
            betas=(hparams.beta1, hparams.beta2),
            weight_decay=hparams.weight_decay,
            use_gc=hparams.ranger_gc, 
        )
    elif name == 'adabelief':
        print('Using AdaBelief optimizer')
        return AdaBelief(
            parameters, 
            lr=hparams.lr, 
            weight_decay=hparams.weight_decay,
            eps=hparams.belief_eps, 
            betas=(hparams.beta1, hparams.beta2), 
            weight_decouple=hparams.belief_weight_decouple, 
            rectify=hparams.belief_recitfy,
            amsgrad=hparams.belief_amsgrad,
            fixed_decay=hparams.belief_fixed_decay
        )
    elif name == 'ranger_adabelief':
        print('Using RangerAdaBelief optimizer')
        return RangerAdaBelief(
            parameters, 
            lr=hparams.lr,
            alpha=hparams.ranger_alpha,
            k=hparams.ranger_k,
            betas=(hparams.beta1, hparams.beta2),
            weight_decay=hparams.weight_decay,
            use_gc=hparams.ranger_gc, 
            adabelief=True,
        )
    
    else:
        raise NotImplementedError(f'{name} is not an available optimizer')
Example #11
0
 def configure_optimizers(self):
     return AdaBelief(params=self.model.parameters(),
                      lr=self.hparams.optimizer.lr,
                      eps=1e-12,
                      weight_decay=1.2e-6,
                      weight_decouple=False,
                      rectify=False,
                      fixed_decay=False,
                      amsgrad=False)
Example #12
0
def get_optimizer(optim_params, model):
    # Optimizer
    if optim_params['optimizer'].lower() == 'adam':
        opt = Adam(model.parameters(),
                   lr=optim_params['step_size'],
                   weight_decay=optim_params['weight_decay'],
                   betas=(optim_params['momentum'], 0.999),
                   eps=1e-08)

    elif optim_params['optimizer'].lower() == 'adabelief':
        opt = AdaBelief(model.parameters(),
                        lr=optim_params['step_size'],
                        eps=1e-16,
                        betas=(0.9, 0.999),
                        weight_decouple=True,
                        rectify=False)

    elif optim_params['optimizer'].lower() == 'sgd':
        opt = SGD(model.parameters(),
                  lr=optim_params['step_size'],
                  weight_decay=optim_params['weight_decay'])

    elif optim_params['optimizer'].lower() == 'alig':
        opt = AliG(model.parameters(), max_lr=optim_params['step_size'])

    elif optim_params['optimizer'].lower() == 'aggmo':
        opt = AggMo(model.parameters(),
                    lr=optim_params['step_size'],
                    momentum=optim_params['betas'])

    elif optim_params['optimizer'].lower() == 'adahessian':
        opt = Adahessian(model.parameters(),
                         lr=optim_params['step_size'],
                         weight_decay=optim_params['weight_decay'],
                         betas=(optim_params['momentum'], 0.999),
                         eps=1e-08)
    # Scheduler
    schedule_params = optim_params['lr_scheduler']
    if optim_params['optimizer'].lower() == 'alig':
        return opt, None
    if schedule_params['name'] == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            opt,
            mode='min',
            patience=schedule_params['patience'],
            factor=schedule_params['factor'])
    elif schedule_params['name'] == 'exp':
        scheduler = lr_scheduler.ExponentialLR(opt,
                                               schedule_params['lr_decay'],
                                               schedule_params['last_epoch'])
    elif schedule_params['name'] == 'step':
        scheduler = lr_scheduler.MultiStepLR(opt,
                                             schedule_params['milestones'],
                                             schedule_params['lr_decay'])

    return opt, scheduler
Example #13
0
 def configure_optimizers(self):
     # for transformer
     return AdaBelief(params=self.model.parameters(),
                      lr=self.hparams.optimizer.lr,
                      eps=1e-16,
                      weight_decay=1e-4,
                      weight_decouple=True,
                      rectify=True,
                      fixed_decay=False,
                      amsgrad=False)
 def configure_optimizers(self):
     opt = AdaBelief(self.vae.parameters(),
                     lr=self.hparams.learning_rate,
                     weight_decay=self.hparams.weight_decay,
                     print_change_log=False)
     # opt = torch.optim.AdamW(
     #     self.vae.parameters(),
     #     lr=self.hparams.learning_rate,
     #     weight_decay=self.hparams.weight_decay,
     # )
     return opt
Example #15
0
    def configure_optimizers(self):
        if self.learning_params["optimizer"] == "belief":
            optimizer = AdaBelief(
                self.parameters(),
                lr=self.learning_params["lr"],
                eps=self.learning_params["eplison_belief"],
                weight_decouple=self.learning_params["weight_decouple"],
                weight_decay=self.learning_params["weight_decay"],
                rectify=self.learning_params["rectify"])
        elif self.learning_params["optimizer"] == "ranger_belief":
            optimizer = RangerAdaBelief(
                self.parameters(),
                lr=self.learning_params["lr"],
                eps=self.learning_params["eplison_belief"],
                weight_decouple=self.learning_params["weight_decouple"],
                weight_decay=self.learning_params["weight_decay"],
            )
        elif self.learning_params["optimizer"] == "adam":
            optimizer = torch.optim.Adam(self.parameters(),
                                         lr=self.learning_params["lr"])
        elif self.learning_params["optimizer"] == "adamW":
            optimizer = torch.optim.AdamW(self.parameters(),
                                          lr=self.learning_params["lr"])

        if self.learning_params["add_sch"]:
            lr_scheduler = {
                'scheduler':
                torch.optim.lr_scheduler.OneCycleLR(
                    optimizer,
                    max_lr=self.learning_params["lr"],
                    steps_per_epoch=self.hparams.
                    steps_per_epoch,  #int(len(train_loader))
                    epochs=self.learning_params["epochs"],
                    anneal_strategy='linear'),
                'name':
                'lr_scheduler_lr',
                'interval':
                'step',  # or 'epoch'
                'frequency':
                1,
            }
            print("sch added")
            return [optimizer], [lr_scheduler]

        return optimizer
Example #16
0
 def optimizer_cls(cls,
                   params=None,
                   lr=0.001,
                   beta1=0.9,
                   beta2=0.999,
                   eps=1e-8,
                   weight_decay=1e-5,
                   amsgrad=False,
                   weight_decouple=False,
                   fixed_decay=False,
                   rectify=False):
     return AdaBelief(params=params,
                      lr=lr,
                      betas=(beta1, beta2),
                      eps=eps,
                      weight_decay=weight_decay,
                      amsgrad=amsgrad,
                      weight_decouple=weight_decouple,
                      fixed_decay=fixed_decay,
                      rectify=rectify)
Example #17
0
def build_optimizer(parameters, config):
    if config.train.opt.type == "sgd":
        optimizer = torch.optim.SGD(
            parameters,
            lr=config.train.opt.lr,
            momentum=config.train.opt.sgd.momentum,
            weight_decay=config.train.opt.weight_decay,
            nesterov=True,
        )
    elif config.train.opt.type == "adam":
        optimizer = torch.optim.Adam(
            parameters,
            lr=config.train.opt.lr,
            weight_decay=config.train.opt.weight_decay,
        )
    elif config.train.opt.type == "ada_belief":
        optimizer = AdaBelief(
            parameters,
            lr=config.train.opt.lr,
            betas=(0.9, 0.999),
            eps=1e-8,
            weight_decay=config.train.opt.weight_decay,
            weight_decouple=config.train.opt.ada_belief.weight_decouple,
            rectify=False,
            fixed_decay=False,
            amsgrad=False,
        )
    else:
        raise AssertionError("invalid optimizer {}".format(
            config.train.opt.type))

    if config.train.opt.look_ahead is not None:
        optimizer = LookAhead(
            optimizer,
            lr=config.train.opt.look_ahead.lr,
            num_steps=config.train.opt.look_ahead.num_steps,
        )

    return optimizer
Example #18
0
 def configure_optimizers(self):
     # optimizer = optim.SGD(self.model.parameters(), lr=self.hparams.optimizer.lr, momentum=0.9, nesterov=True)
     # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2, eta_min=1e-6)
     # optimizer = AdaBelief(
     #     params=self.model.parameters(),
     #     lr=self.hparams.optimizer.lr,
     #     betas=(0.9, 0.999),
     #     eps=1e-16,
     #     weight_decouple=True,
     #     rectify=True,
     #     fixed_decay=False,
     #     amsgrad=False
     # )
     # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2, eta_min=1e-6)
     # return [optimizer], [scheduler]
     return AdaBelief(params=self.model.parameters(),
                      lr=self.hparams.optimizer.lr,
                      betas=(0.9, 0.999),
                      eps=1e-16,
                      weight_decouple=True,
                      rectify=True,
                      fixed_decay=False,
                      amsgrad=False)
Example #19
0
def train(hyp, opt, device, tb_writer=None, wandb=None):
    logger.info(f'Hyperparameters {hyp}')
    log_dir = Path(tb_writer.log_dir) if tb_writer else Path(
        opt.logdir) / 'evolve'  # logging directory
    wdir = log_dir / 'weights'  # weights directory
    os.makedirs(wdir, exist_ok=True)
    last = wdir / 'last.pt'
    best = wdir / 'best.pt'
    results_file = str(log_dir / 'results.txt')
    epochs, batch_size, total_batch_size, weights, rank = \
        opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank

    # Save run settings
    with open(log_dir / 'hyp.yaml', 'w') as f:
        yaml.dump(hyp, f, sort_keys=False)
    with open(log_dir / 'opt.yaml', 'w') as f:
        yaml.dump(vars(opt), f, sort_keys=False)

    # Configure
    cuda = device.type != 'cpu'
    init_seeds(2 + rank)
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
    with torch_distributed_zero_first(rank):
        check_dataset(data_dict)  # check
    train_path = data_dict['train']
    test_path = data_dict['val']
    nc, names = (1, ['item']) if opt.single_cls else (int(
        data_dict['nc']), data_dict['names'])  # number classes, names
    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (
        len(names), nc, opt.data)  # check

    # Model
    pretrained = weights.endswith('.pt')
    if pretrained:
        with torch_distributed_zero_first(rank):
            attempt_download(weights)  # download if not found locally
        ckpt = torch.load(weights, map_location=device)  # load checkpoint
        if hyp.get('anchors'):
            ckpt['model'].yaml['anchors'] = round(
                hyp['anchors'])  # force autoanchor
        model = Model(opt.cfg or ckpt['model'].yaml, ch=3,
                      nc=nc).to(device)  # create
        exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [
        ]  # exclude keys
        state_dict = ckpt['model'].float().state_dict()  # to FP32
        state_dict = intersect_dicts(state_dict,
                                     model.state_dict(),
                                     exclude=exclude)  # intersect
        model.load_state_dict(state_dict, strict=False)  # load
        logger.info(
            'Transferred %g/%g items from %s' %
            (len(state_dict), len(model.state_dict()), weights))  # report
    else:
        model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create

    # Freeze
    freeze = []  # parameter names to freeze (full or partial)
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            print('freezing %s' % k)
            v.requires_grad = False

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / total_batch_size),
                     1)  # accumulate loss before optimizing
    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in model.named_modules():
        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
            pg2.append(v.bias)  # biases
        if isinstance(v, nn.BatchNorm2d):
            pg0.append(v.weight)  # no decay
        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
            pg1.append(v.weight)  # apply decay

    if opt.adam:
        optimizer = AdaBelief(model.parameters(),
                              lr=1e-4,
                              eps=1e-16,
                              betas=(0.9, 0.999),
                              weight_decouple=True,
                              rectify=True)
    else:
        optimizer = optim.SGD(pg0,
                              lr=hyp['lr0'],
                              momentum=hyp['momentum'],
                              nesterov=True)

    # optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
    # optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' %
                (len(pg2), len(pg1), len(pg0)))
    del pg0, pg1, pg2

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[
        'lrf']) + hyp['lrf']  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # plot_lr_scheduler(optimizer, scheduler, epochs)

    # Logging
    if wandb and wandb.run is None:
        id = ckpt.get('wandb_id') if 'ckpt' in locals() else None
        wandb_run = wandb.init(config=opt,
                               resume="allow",
                               project="YOLOv5",
                               name=os.path.basename(log_dir),
                               id=id)

    # Resume
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        # Optimizer
        if ckpt['optimizer'] is not None:
            optimizer.load_state_dict(ckpt['optimizer'])
            best_fitness = ckpt['best_fitness']

        # Results
        if ckpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(ckpt['training_results'])  # write results.txt

        # Epochs
        start_epoch = ckpt['epoch'] + 1
        if opt.resume:
            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (
                weights, epochs)
            shutil.copytree(wdir, wdir.parent /
                            f'weights_backup_epoch{start_epoch - 1}'
                            )  # save previous weights
        if epochs < start_epoch:
            logger.info(
                '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.'
                % (weights, ckpt['epoch'], epochs))
            epochs += ckpt['epoch']  # finetune additional epochs

        del ckpt, state_dict

    # Image sizes
    gs = int(max(model.stride))  # grid size (max stride)
    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size
                         ]  # verify imgsz are gs-multiples

    # DP mode
    if cuda and rank == -1 and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # SyncBatchNorm
    if opt.sync_bn and cuda and rank != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        logger.info('Using SyncBatchNorm()')

    # Exponential moving average
    ema = ModelEMA(model) if rank in [-1, 0] else None

    # DDP mode
    if cuda and rank != -1:
        model = DDP(model,
                    device_ids=[opt.local_rank],
                    output_device=opt.local_rank)

    # Trainloader
    dataloader, dataset = create_dataloader(train_path,
                                            imgsz,
                                            batch_size,
                                            gs,
                                            opt,
                                            hyp=hyp,
                                            augment=True,
                                            cache=opt.cache_images,
                                            rect=opt.rect,
                                            rank=rank,
                                            world_size=opt.world_size,
                                            workers=opt.workers)
    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
    nb = len(dataloader)  # number of batches
    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (
        mlc, nc, opt.data, nc - 1)

    # Process 0
    if rank in [-1, 0]:
        ema.updates = start_epoch * nb // accumulate  # set EMA updates
        testloader = create_dataloader(test_path,
                                       imgsz_test,
                                       total_batch_size,
                                       gs,
                                       opt,
                                       hyp=hyp,
                                       augment=False,
                                       cache=opt.cache_images
                                       and not opt.notest,
                                       rect=True,
                                       rank=-1,
                                       world_size=opt.world_size,
                                       workers=opt.workers)[0]  # testloader

        if not opt.resume:
            labels = np.concatenate(dataset.labels, 0)
            c = torch.tensor(labels[:, 0])  # classes
            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
            # model._initialize_biases(cf.to(device))
            plot_labels(labels, save_dir=log_dir)
            if tb_writer:
                # tb_writer.add_hparams(hyp, {})  # causes duplicate https://github.com/ultralytics/yolov5/pull/384
                tb_writer.add_histogram('classes', c, 0)

            # Anchors
            if not opt.noautoanchor:
                check_anchors(dataset,
                              model=model,
                              thr=hyp['anchor_t'],
                              imgsz=imgsz)

    # Model parameters
    hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(
        device)  # attach class weights
    model.names = names

    # Start training
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb),
             1e3)  # number of warmup iterations, max(3 epochs, 1k iterations)
    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0
               )  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
    scheduler.last_epoch = start_epoch - 1  # do not move
    scaler = amp.GradScaler(enabled=cuda)
    logger.info('Image sizes %g train, %g test\n'
                'Using %g dataloader workers\nLogging results to %s\n'
                'Starting training for %g epochs...' %
                (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
    for epoch in range(
            start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional)
        if opt.image_weights:
            # Generate indices
            if rank in [-1, 0]:
                cw = model.class_weights.cpu().numpy() * (
                    1 - maps)**2  # class weights
                iw = labels_to_image_weights(dataset.labels,
                                             nc=nc,
                                             class_weights=cw)  # image weights
                dataset.indices = random.choices(
                    range(dataset.n), weights=iw,
                    k=dataset.n)  # rand weighted idx
            # Broadcast if DDP
            if rank != -1:
                indices = (torch.tensor(dataset.indices)
                           if rank == 0 else torch.zeros(dataset.n)).int()
                dist.broadcast(indices, 0)
                if rank != 0:
                    dataset.indices = indices.cpu().numpy()

        # Update mosaic border
        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

        mloss = torch.zeros(4, device=device)  # mean losses
        if rank != -1:
            dataloader.sampler.set_epoch(epoch)
        pbar = enumerate(dataloader)
        logger.info(
            ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls',
                                   'total', 'targets', 'img_size'))
        if rank in [-1, 0]:
            pbar = tqdm(pbar, total=nb)  # progress bar
        optimizer.zero_grad()
        for i, (
                imgs, targets, paths, _
        ) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device, non_blocking=True).float(
            ) / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
                xi = [0, nw]  # x interp
                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                accumulate = max(
                    1,
                    np.interp(ni, xi, [1, nbs / total_batch_size]).round())
                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, xi, [
                        hyp['warmup_bias_lr'] if j == 2 else 0.0,
                        x['initial_lr'] * lf(epoch)
                    ])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(
                            ni, xi, [hyp['warmup_momentum'], hyp['momentum']])

            # Multi-scale
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5,
                                      imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
                          ]  # new shape (stretched to gs-multiple)
                    imgs = F.interpolate(imgs,
                                         size=ns,
                                         mode='bilinear',
                                         align_corners=False)

            # Forward
            with amp.autocast(enabled=cuda):
                pred = model(imgs)  # forward
                loss, loss_items = compute_loss(
                    pred, targets.to(device),
                    model)  # loss scaled by batch_size
                if rank != -1:
                    loss *= opt.world_size  # gradient averaged between devices in DDP mode

            # Backward
            scaler.scale(loss).backward()

            # Optimize
            if ni % accumulate == 0:
                scaler.step(optimizer)  # optimizer.step
                scaler.update()
                optimizer.zero_grad()
                if ema:
                    ema.update(model)

            # Print
            if rank in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1
                                                    )  # update mean losses
                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9
                                 if torch.cuda.is_available() else 0)  # (GB)
                s = ('%10s' * 2 +
                     '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem,
                                      *mloss, targets.shape[0], imgs.shape[-1])
                pbar.set_description(s)

                # Plot
                if ni < 3:
                    f = str(log_dir / f'train_batch{ni}.jpg')  # filename
                    result = plot_images(images=imgs,
                                         targets=targets,
                                         paths=paths,
                                         fname=f)
                    # if tb_writer and result is not None:
                    # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                    # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------

        # Scheduler
        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
        scheduler.step()

        # DDP process 0 or single-GPU
        if rank in [-1, 0]:
            # mAP
            if ema:
                ema.update_attr(
                    model,
                    include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride'])
            final_epoch = epoch + 1 == epochs
            if not opt.notest or final_epoch:  # Calculate mAP
                results, maps, times = test.test(
                    opt.data,
                    batch_size=total_batch_size,
                    imgsz=imgsz_test,
                    model=ema.ema,
                    single_cls=opt.single_cls,
                    dataloader=testloader,
                    save_dir=log_dir,
                    plots=epoch == 0 or final_epoch,  # plot first and last
                    log_imgs=opt.log_imgs)

            # Write
            with open(results_file, 'a') as f:
                f.write(
                    s + '%10.4g' * 7 % results +
                    '\n')  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
            if len(opt.name) and opt.bucket:
                os.system('gsutil cp %s gs://%s/results/results%s.txt' %
                          (results_file, opt.bucket, opt.name))

            # Log
            tags = [
                'train/giou_loss',
                'train/obj_loss',
                'train/cls_loss',  # train loss
                'metrics/precision',
                'metrics/recall',
                'metrics/mAP_0.5',
                'metrics/mAP_0.5:0.95',
                'val/giou_loss',
                'val/obj_loss',
                'val/cls_loss',  # val loss
                'x/lr0',
                'x/lr1',
                'x/lr2'
            ]  # params
            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                if tb_writer:
                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
                if wandb:
                    wandb.log({tag: x})  # W&B

            # Update best mAP
            fi = fitness(np.array(results).reshape(
                1, -1))  # weighted combination of [P, R, [email protected], [email protected]]
            if fi > best_fitness:
                best_fitness = fi

            # Save model
            save = (not opt.nosave) or (final_epoch and not opt.evolve)
            if save:
                with open(results_file, 'r') as f:  # create checkpoint
                    ckpt = {
                        'epoch':
                        epoch,
                        'best_fitness':
                        best_fitness,
                        'training_results':
                        f.read(),
                        'model':
                        ema.ema,
                        'optimizer':
                        None if final_epoch else optimizer.state_dict(),
                        'wandb_id':
                        wandb_run.id if wandb else None
                    }

                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fi:
                    torch.save(ckpt, best)
                del ckpt
        # end epoch ----------------------------------------------------------------------------------------------------
    # end training

    if rank in [-1, 0]:
        # Strip optimizers
        n = opt.name if opt.name.isnumeric() else ''
        fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt'
        for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file],
                          [flast, fbest, fresults]):
            if os.path.exists(f1):
                os.rename(f1, f2)  # rename
                if str(f2).endswith('.pt'):  # is *.pt
                    strip_optimizer(f2)  # strip optimizer
                    os.system(
                        'gsutil cp %s gs://%s/weights' %
                        (f2, opt.bucket)) if opt.bucket else None  # upload
        # Finish
        if not opt.evolve:
            plot_results(save_dir=log_dir)  # save as results.png
        logger.info('%g epochs completed in %.3f hours.\n' %
                    (epoch - start_epoch + 1, (time.time() - t0) / 3600))

    dist.destroy_process_group() if rank not in [-1, 0] else None
    torch.cuda.empty_cache()
    return results
Example #20
0
def memo_valor(env_fn,
                model=MEMO,
                  memo_kwargs=dict(),
                  annealing_kwargs=dict(),
                  seed=0,
                  episodes_per_expert=40,
                  epochs=50,
                  # warmup=10,
                  train_iters=5,
                  step_size=5,
                  memo_lr=1e-3,
                  train_batch_size=50,
                  eval_batch_size=200,
                  max_ep_len=1000,
                  logger_kwargs=dict(),
                  config_name='standard',
                  save_freq=10,
               # replay_buffers=[],
               memories=[]):
    # W&B Logging
    wandb.login()

    composite_name = 'E ' + str(epochs) + ' B ' + str(train_batch_size) + ' ENC ' + \
                     str(memo_kwargs['encoder_hidden']) + 'DEC ' + str(memo_kwargs['decoder_hidden'])

    wandb.init(project="MEMO", group='Epochs: ' + str(epochs),  name=composite_name, config=locals())

    assert memories != [], "No examples found! Replay/memory buffers must be set to proceed."

    # Special function to avoid certain slowdowns from PyTorch + MPI combo.
    setup_pytorch_for_mpi()

    # Set up logger and save configuration
    logger = EpochLogger(**logger_kwargs)
    logger.save_config(locals())

    seed += 10000 * proc_id()
    torch.manual_seed(seed)
    np.random.seed(seed)

    # Instantiate environment
    env = env_fn()
    obs_dim = env.observation_space.shape
    act_dim = env.action_space.shape

    # Model    # Create discriminator and monitor it
    con_dim = len(memories)
    memo = model(obs_dim=obs_dim[0], out_dim=act_dim[0], **memo_kwargs)

    # Set up model saving
    logger.setup_pytorch_saver([memo])

    # Sync params across processes
    sync_params(memo)
    N_expert = episodes_per_expert*max_ep_len
    print("N Expert: ", N_expert)

    # Buffer
    # local_episodes_per_epoch = int(episodes_per_epoch / num_procs())
    local_iter_per_epoch = int(train_iters / num_procs())

    # Count variables
    var_counts = tuple(count_vars(module) for module in [memo])
    logger.log('\nNumber of parameters: \t d: %d\n' % var_counts)

    # Optimizers
    # memo_optimizer = AdaBelief(memo.parameters(), lr=memo_lr, eps=1e-20, rectify=True)
    memo_optimizer = AdaBelief(memo.parameters(), lr=memo_lr, eps=1e-16, rectify=True)
    # memo_optimizer = Adam(memo.parameters(), lr=memo_lr, betas=(0.9, 0.98), eps=1e-9)

    start_time = time.time()

    # Prepare data
    mem = MemoryBatch(memories, step=step_size)

    # transition_states, pure_states, transition_actions, expert_ids = mem.collate()
    transition_states, pure_states, transition_actions, expert_ids = mem.collate()
    total_l_old, recon_l_old, context_l_old = 0, 0, 0

    # Main Loop
    kl_beta_schedule = frange_cycle_sigmoid(epochs, **annealing_kwargs)

    for epoch in range(epochs):
        memo.train()

        # Select state transitions and actions at random indexes
        batch_indexes = torch.randint(len(transition_states), (train_batch_size,))

        raw_states_batch, delta_states_batch, actions_batch, sampled_experts = \
           pure_states[batch_indexes], transition_states[batch_indexes], transition_actions[batch_indexes], expert_ids[batch_indexes]


        for i in range(local_iter_per_epoch):
            # kl_beta = kl_beta_schedule[epoch]
            kl_beta = 1
            # only take context labeling into account for first label
            loss, recon_loss, X, latent_labels, vq_loss = memo(raw_states_batch, delta_states_batch,  actions_batch,
                                                                     kl_beta)
            memo_optimizer.zero_grad()
            loss.mean().backward()
            mpi_avg_grads(memo)
            memo_optimizer.step()

        # scheduler.step(loss.mean().data.item())

        total_l_new, recon_l_new, vq_l_new = loss.mean().data.item(), recon_loss.mean().data.item(), vq_loss.mean().data.item()

        memo_metrics = {'MEMO Loss': total_l_new, 'Recon Loss': recon_l_new, "VQ Labeling Loss": vq_l_new,
                        "KL Beta": kl_beta_schedule[epoch]}
        wandb.log(memo_metrics)

        logger.store(TotalLoss=total_l_new, PolicyLoss=recon_l_new, # ContextLoss=context_l_new,
                     DeltaTotalLoss=total_l_new-total_l_old, DeltaPolicyLoss=recon_l_new-recon_l_old,
                     )

        total_l_old, recon_l_old = total_l_new, recon_l_new  # , context_l_new

        if (epoch % save_freq == 0) or (epoch == epochs - 1):
            logger.save_state({'env': env}, [memo], None)

        # Log
        logger.log_tabular('Epoch', epoch)
        logger.log_tabular('EpochBatchSize', train_batch_size)
        logger.log_tabular('TotalLoss', average_only=True)
        logger.log_tabular('PolicyLoss', average_only=True)
        logger.log_tabular('Time', time.time() - start_time)
        logger.dump_tabular()

    print("Finished training, and detected %d contexts!" % len(memo.found_contexts))
    # wandb.finish()
    print('memo type', memo)
    return memo, mem
def load_frameowrk(
        seed, disable_debugging_API, num_workers, config_path,
        checkpoint_folder, reduce_train_dataset, standing_statistics,
        standing_step, freeze_layers, load_current, eval_type, dataset_name,
        num_classes, img_size, data_path, architecture, conditional_strategy,
        hypersphere_dim, nonlinear_embed, normalize_embed, g_spectral_norm,
        d_spectral_norm, activation_fn, attention,
        attention_after_nth_gen_block, attention_after_nth_dis_block, z_dim,
        shared_dim, g_conv_dim, d_conv_dim, G_depth, D_depth, optimizer,
        batch_size, d_lr, g_lr, momentum, nesterov, alpha, beta1, beta2,
        total_step, adv_loss, cr, g_init, d_init, random_flip_preprocessing,
        prior, truncated_factor, ema, ema_decay, ema_start, synchronized_bn,
        mixed_precision, hdf5_path_train, train_config, model_config, **_):
    if seed == 0:
        cudnn.benchmark = True
        cudnn.deterministic = False
    else:
        fix_all_seed(seed)
        cudnn.benchmark = False
        cudnn.deterministic = True

    if disable_debugging_API:
        torch.autograd.set_detect_anomaly(False)

    n_gpus = torch.cuda.device_count()
    default_device = torch.cuda.current_device()

    check_flag_0(batch_size, n_gpus, standing_statistics, ema, freeze_layers,
                 checkpoint_folder)
    assert batch_size % n_gpus == 0, "batch_size should be divided by the number of gpus "

    if n_gpus == 1:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    prev_ada_p, step, best_step, best_fid, best_fid_checkpoint_path = None, 0, 0, None, None
    standing_step = standing_step if standing_statistics is True else batch_size

    run_name = make_run_name(RUN_NAME_FORMAT,
                             framework=config_path.split('/')[-1][:-5],
                             phase='train')

    logger = make_logger(run_name, None)
    writer = SummaryWriter(log_dir=join('./logs', run_name))
    logger.info('Run name : {run_name}'.format(run_name=run_name))
    logger.info(train_config)
    logger.info(model_config)

    logger.info('Loading train datasets...')
    train_dataset = LoadDataset(dataset_name,
                                data_path,
                                train=True,
                                download=True,
                                resize_size=img_size,
                                hdf5_path=hdf5_path_train,
                                random_flip=random_flip_preprocessing)
    if reduce_train_dataset < 1.0:
        num_train = int(reduce_train_dataset * len(train_dataset))
        train_dataset, _ = torch.utils.data.random_split(
            train_dataset,
            [num_train, len(train_dataset) - num_train])
    logger.info('Train dataset size : {dataset_size}'.format(
        dataset_size=len(train_dataset)))

    logger.info('Loading {mode} datasets...'.format(mode=eval_type))
    eval_mode = True if eval_type == 'train' else False
    eval_dataset = LoadDataset(dataset_name,
                               data_path,
                               train=eval_mode,
                               download=True,
                               resize_size=img_size,
                               hdf5_path=None,
                               random_flip=False)
    logger.info('Eval dataset size : {dataset_size}'.format(
        dataset_size=len(eval_dataset)))

    logger.info('Building model...')
    if architecture == "dcgan":
        assert img_size == 32, "Sry, StudioGAN does not support dcgan models for generation of images larger than 32 resolution."
    module = __import__(
        'models.{architecture}'.format(architecture=architecture),
        fromlist=['something'])
    logger.info('Modules are located on models.{architecture}'.format(
        architecture=architecture))
    Gen = module.Generator(z_dim, shared_dim, img_size, g_conv_dim,
                           g_spectral_norm, attention,
                           attention_after_nth_gen_block, activation_fn,
                           conditional_strategy, num_classes, g_init, G_depth,
                           mixed_precision).to(default_device)

    Dis = module.Discriminator(img_size, d_conv_dim, d_spectral_norm,
                               attention, attention_after_nth_dis_block,
                               activation_fn, conditional_strategy,
                               hypersphere_dim, num_classes, nonlinear_embed,
                               normalize_embed, d_init, D_depth,
                               mixed_precision).to(default_device)

    if ema:
        print('Preparing EMA for G with decay of {}'.format(ema_decay))
        Gen_copy = module.Generator(
            z_dim,
            shared_dim,
            img_size,
            g_conv_dim,
            g_spectral_norm,
            attention,
            attention_after_nth_gen_block,
            activation_fn,
            conditional_strategy,
            num_classes,
            initialize=False,
            G_depth=G_depth,
            mixed_precision=mixed_precision).to(default_device)
        Gen_ema = ema_(Gen, Gen_copy, ema_decay, ema_start)
    else:
        Gen_copy, Gen_ema = None, None

    logger.info(count_parameters(Gen))
    logger.info(Gen)

    logger.info(count_parameters(Dis))
    logger.info(Dis)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=num_workers,
                                  drop_last=True)
    eval_dataloader = DataLoader(eval_dataset,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 pin_memory=True,
                                 num_workers=num_workers,
                                 drop_last=False)

    G_loss = {
        'vanilla': loss_dcgan_gen,
        'least_square': loss_lsgan_gen,
        'hinge': loss_hinge_gen,
        'wasserstein': loss_wgan_gen
    }
    D_loss = {
        'vanilla': loss_dcgan_dis,
        'least_square': loss_lsgan_dis,
        'hinge': loss_hinge_dis,
        'wasserstein': loss_wgan_dis
    }

    if optimizer == "SGD":
        G_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                             Gen.parameters()),
                                      g_lr,
                                      momentum=momentum,
                                      nesterov=nesterov)
        D_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                             Dis.parameters()),
                                      d_lr,
                                      momentum=momentum,
                                      nesterov=nesterov)
    elif optimizer == "RMSprop":
        G_optimizer = torch.optim.RMSprop(filter(lambda p: p.requires_grad,
                                                 Gen.parameters()),
                                          g_lr,
                                          momentum=momentum,
                                          alpha=alpha)
        D_optimizer = torch.optim.RMSprop(filter(lambda p: p.requires_grad,
                                                 Dis.parameters()),
                                          d_lr,
                                          momentum=momentum,
                                          alpha=alpha)
    elif optimizer == "Adam":
        G_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                              Gen.parameters()),
                                       g_lr, [beta1, beta2],
                                       eps=1e-6)
        D_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                              Dis.parameters()),
                                       d_lr, [beta1, beta2],
                                       eps=1e-6)
    elif optimizer == "AdaBelief":
        G_optimizer = AdaBelief(filter(lambda p: p.requires_grad,
                                       Gen.parameters()),
                                g_lr, [beta1, beta2],
                                eps=1e-12,
                                rectify=False)
        D_optimizer = AdaBelief(filter(lambda p: p.requires_grad,
                                       Dis.parameters()),
                                d_lr, [beta1, beta2],
                                eps=1e-12,
                                rectify=False)
    else:
        raise NotImplementedError

    if checkpoint_folder is not None:
        when = "current" if load_current is True else "best"
        if not exists(abspath(checkpoint_folder)):
            raise NotADirectoryError
        checkpoint_dir = make_checkpoint_dir(checkpoint_folder, run_name)
        g_checkpoint_dir = glob.glob(
            join(checkpoint_dir,
                 "model=G-{when}-weights-step*.pth".format(when=when)))[0]
        d_checkpoint_dir = glob.glob(
            join(checkpoint_dir,
                 "model=D-{when}-weights-step*.pth".format(when=when)))[0]
        Gen, G_optimizer, trained_seed, run_name, step, prev_ada_p = load_checkpoint(
            Gen, G_optimizer, g_checkpoint_dir)
        Dis, D_optimizer, trained_seed, run_name, step, prev_ada_p, best_step, best_fid, best_fid_checkpoint_path =\
            load_checkpoint(Dis, D_optimizer, d_checkpoint_dir, metric=True)
        logger = make_logger(run_name, None)
        if ema:
            g_ema_checkpoint_dir = glob.glob(
                join(checkpoint_dir,
                     "model=G_ema-{when}-weights-step*.pth".format(
                         when=when)))[0]
            Gen_copy = load_checkpoint(Gen_copy,
                                       None,
                                       g_ema_checkpoint_dir,
                                       ema=True)
            Gen_ema.source, Gen_ema.target = Gen, Gen_copy

        writer = SummaryWriter(log_dir=join('./logs', run_name))
        if train_config['train']:
            assert seed == trained_seed, "seed for sampling random numbers should be same!"
        logger.info('Generator checkpoint is {}'.format(g_checkpoint_dir))
        logger.info('Discriminator checkpoint is {}'.format(d_checkpoint_dir))
        if freeze_layers > -1:
            prev_ada_p, step, best_step, best_fid, best_fid_checkpoint_path = None, 0, 0, None, None
    else:
        checkpoint_dir = make_checkpoint_dir(checkpoint_folder, run_name)

    if n_gpus > 1:
        Gen = DataParallel(Gen, output_device=default_device)
        Dis = DataParallel(Dis, output_device=default_device)
        if ema:
            Gen_copy = DataParallel(Gen_copy, output_device=default_device)

        if synchronized_bn:
            Gen = convert_model(Gen).to(default_device)
            Dis = convert_model(Dis).to(default_device)
            if ema:
                Gen_copy = convert_model(Gen_copy).to(default_device)

    if train_config['eval']:
        inception_model = InceptionV3().to(default_device)
        if n_gpus > 1:
            inception_model = DataParallel(inception_model,
                                           output_device=default_device)
        mu, sigma = prepare_inception_moments(dataloader=eval_dataloader,
                                              generator=Gen,
                                              eval_mode=eval_type,
                                              inception_model=inception_model,
                                              splits=1,
                                              run_name=run_name,
                                              logger=logger,
                                              device=default_device)
    else:
        mu, sigma, inception_model = None, None, None

    train_eval = Train_Eval(
        run_name=run_name,
        best_step=best_step,
        dataset_name=dataset_name,
        eval_type=eval_type,
        logger=logger,
        writer=writer,
        n_gpus=n_gpus,
        gen_model=Gen,
        dis_model=Dis,
        inception_model=inception_model,
        Gen_copy=Gen_copy,
        Gen_ema=Gen_ema,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        train_dataloader=train_dataloader,
        eval_dataloader=eval_dataloader,
        freeze_layers=freeze_layers,
        conditional_strategy=conditional_strategy,
        pos_collected_numerator=model_config['model']
        ['pos_collected_numerator'],
        z_dim=z_dim,
        num_classes=num_classes,
        hypersphere_dim=hypersphere_dim,
        d_spectral_norm=d_spectral_norm,
        g_spectral_norm=g_spectral_norm,
        G_optimizer=G_optimizer,
        D_optimizer=D_optimizer,
        batch_size=batch_size,
        g_steps_per_iter=model_config['optimization']['g_steps_per_iter'],
        d_steps_per_iter=model_config['optimization']['d_steps_per_iter'],
        accumulation_steps=model_config['optimization']['accumulation_steps'],
        total_step=total_step,
        G_loss=G_loss[adv_loss],
        D_loss=D_loss[adv_loss],
        contrastive_lambda=model_config['loss_function']['contrastive_lambda'],
        margin=model_config['loss_function']['margin'],
        tempering_type=model_config['loss_function']['tempering_type'],
        tempering_step=model_config['loss_function']['tempering_step'],
        start_temperature=model_config['loss_function']['start_temperature'],
        end_temperature=model_config['loss_function']['end_temperature'],
        weight_clipping_for_dis=model_config['loss_function']
        ['weight_clipping_for_dis'],
        weight_clipping_bound=model_config['loss_function']
        ['weight_clipping_bound'],
        gradient_penalty_for_dis=model_config['loss_function']
        ['gradient_penalty_for_dis'],
        gradient_penalty_lambda=model_config['loss_function']
        ['gradient_penalty_lambda'],
        deep_regret_analysis_for_dis=model_config['loss_function']
        ['deep_regret_analysis_for_dis'],
        regret_penalty_lambda=model_config['loss_function']
        ['regret_penalty_lambda'],
        cr=cr,
        cr_lambda=model_config['loss_function']['cr_lambda'],
        bcr=model_config['loss_function']['bcr'],
        real_lambda=model_config['loss_function']['real_lambda'],
        fake_lambda=model_config['loss_function']['fake_lambda'],
        zcr=model_config['loss_function']['zcr'],
        gen_lambda=model_config['loss_function']['gen_lambda'],
        dis_lambda=model_config['loss_function']['dis_lambda'],
        sigma_noise=model_config['loss_function']['sigma_noise'],
        diff_aug=model_config['training_and_sampling_setting']['diff_aug'],
        ada=model_config['training_and_sampling_setting']['ada'],
        prev_ada_p=prev_ada_p,
        ada_target=model_config['training_and_sampling_setting']['ada_target'],
        ada_length=model_config['training_and_sampling_setting']['ada_length'],
        prior=prior,
        truncated_factor=truncated_factor,
        ema=ema,
        latent_op=model_config['training_and_sampling_setting']['latent_op'],
        latent_op_rate=model_config['training_and_sampling_setting']
        ['latent_op_rate'],
        latent_op_step=model_config['training_and_sampling_setting']
        ['latent_op_step'],
        latent_op_step4eval=model_config['training_and_sampling_setting']
        ['latent_op_step4eval'],
        latent_op_alpha=model_config['training_and_sampling_setting']
        ['latent_op_alpha'],
        latent_op_beta=model_config['training_and_sampling_setting']
        ['latent_op_beta'],
        latent_norm_reg_weight=model_config['training_and_sampling_setting']
        ['latent_norm_reg_weight'],
        default_device=default_device,
        print_every=train_config['print_every'],
        save_every=train_config['save_every'],
        checkpoint_dir=checkpoint_dir,
        evaluate=train_config['eval'],
        mu=mu,
        sigma=sigma,
        best_fid=best_fid,
        best_fid_checkpoint_path=best_fid_checkpoint_path,
        mixed_precision=mixed_precision,
        train_config=train_config,
        model_config=model_config,
    )

    if train_config['train']:
        step = train_eval.train(current_step=step, total_step=total_step)

    if train_config['eval']:
        is_save = train_eval.evaluation(
            step=step,
            standing_statistics=standing_statistics,
            standing_step=standing_step)

    if train_config['save_images']:
        train_eval.save_images(is_generate=True,
                               png=True,
                               npz=True,
                               standing_statistics=standing_statistics,
                               standing_step=standing_step)

    if train_config['image_visualization']:
        train_eval.run_image_visualization(
            nrow=train_config['nrow'],
            ncol=train_config['ncol'],
            standing_statistics=standing_statistics,
            standing_step=standing_step)

    if train_config['k_nearest_neighbor']:
        train_eval.run_nearest_neighbor(
            nrow=train_config['nrow'],
            ncol=train_config['ncol'],
            standing_statistics=standing_statistics,
            standing_step=standing_step)

    if train_config['interpolation']:
        assert architecture in [
            "big_resnet", "biggan_deep"
        ], "Not supported except for biggan and biggan_deep."
        train_eval.run_linear_interpolation(
            nrow=train_config['nrow'],
            ncol=train_config['ncol'],
            fix_z=True,
            fix_y=False,
            standing_statistics=standing_statistics,
            standing_step=standing_step)
        train_eval.run_linear_interpolation(
            nrow=train_config['nrow'],
            ncol=train_config['ncol'],
            fix_z=False,
            fix_y=True,
            standing_statistics=standing_statistics,
            standing_step=standing_step)

    if train_config['frequency_analysis']:
        train_eval.run_frequency_analysis(
            num_images=len(train_dataset) // num_classes,
            standing_statistics=standing_statistics,
            standing_step=standing_step)
Example #22
0
#
# Create capsule network.
#

network = HDGCN(nnodes=max_seq_len,
                nfeat=bert_dim,
                nhid=nhid,
                nclass=nclass,
                max_seq_len=max_seq_len,
                device=device,
                batch_size=batch_size,
                vocab=vocab).to(device)

optimizer = AdaBelief(network.parameters(),
                      lr=learning_rate,
                      eps=1e-16,
                      betas=(0.9, 0.999),
                      weight_decouple=True,
                      rectify=False)


# Converts batches of class indices to classes of one-hot vectors.
def to_one_hot(x, length):
    batch_size = x.size(0)
    x_one_hot = torch.zeros(batch_size, length)
    for i in range(batch_size):
        x_one_hot[i, x[i]] = 1.0
    return x_one_hot


def test():
    network.eval()
                                 lr=args.lr,
                                 weight_decay=args.wdecay)
 if args.optimizer == 'adam':
     optimizer = torch.optim.Adam(params,
                                  lr=args.lr,
                                  weight_decay=args.wdecay)
 if args.optimizer == 'fromage':
     optimizer = Fromage(params, lr=args.lr)
 if args.optimizer == 'adamw':
     optimizer = AdamW(params, lr=args.lr, weight_decay=args.wdecay)
 if args.optimizer == 'radam':
     optimizer = RAdam(params, lr=args.lr, weight_decay=args.wdecay)
 if args.optimizer.lower() == 'adabelief':
     optimizer = AdaBelief(params,
                           lr=args.lr,
                           weight_decay=args.wdecay,
                           eps=args.eps,
                           betas=(args.beta1, args.beta2))
 if args.optimizer == 'adabound':
     optimizer = AdaBound(params,
                          lr=args.lr,
                          weight_decay=args.wdecay,
                          final_lr=30,
                          gamma=1e-3)
 if args.optimizer == 'amsbound':
     optimizer = AdaBound(params,
                          lr=args.lr,
                          weight_decay=args.wdecay,
                          final_lr=30,
                          gamma=1e-3,
                          amsbound=True)
Example #24
0
    def __init__(self,
                 policy,
                 value_fun,
                 cost_fun,
                 simulator,
                 target_kl=1e-2,
                 vf_lr=1e-2,
                 cf_lr=1e-2,
                 cost_lim=0.1,
                 train_v_iters=5,
                 train_c_iters=5,
                 val_l2_reg=1e-3,
                 cost_l2_reg=1e-3,
                 gamma=0.995,
                 cost_gamma=0.995,
                 cg_damping=1e-3,
                 cg_max_iters=10,
                 line_search_coef=0.9,
                 line_search_max_iter=10,
                 line_search_accept_ratio=0.1,
                 optim_mode="adam",
                 optim_max_iter=25,
                 model_name=None,
                 continue_from_file=False,
                 save_every=10,
                 save_dir='trained-models-dir',
                 print_updates=True):

        # Special function to avoid certain slowdowns from PyTorch + MPI combo.
        setup_pytorch_for_mpi()

        self.save_dir = save_dir
        self.mse_loss = MSELoss(reduction='mean')

        # Set policy and functions if starting from scratch
        # if continue_from_file == False:

        # Different Optimizer Modes (Think LBFGS, Adam and AdaBelief)

        if optim_mode == "adam":
            self.value_fun_optimizer = Adam(self.value_fun.parameters(),
                                            lr=vf_lr)
            self.cost_fun_optimizer = Adam(self.cost_fun.parameters(),
                                           lr=vf_lr)

        elif optim_mode == "adabelief":
            self.value_fun_optimizer = AdaBelief(self.value_fun.parameters(),
                                                 betas=(0.9, 0.999),
                                                 eps=1e-8)
            self.cost_fun_optimizer = AdaBelief(self.cost_fun.parameters(),
                                                betas=(0.9, 0.999),
                                                eps=1e-8)

        else:
            self.value_fun_optimizer = LBFGS(self.value_fun.parameters(),
                                             lr=vf_lr,
                                             max_iter=optim_max_iter)
            self.cost_fun_optimizer = LBFGS(self.cost_fun.parameters(),
                                            lr=cf_lr,
                                            max_iter=optim_max_iter)

        self.epoch_num = 0
        self.elapsed_time = timedelta(0)
        self.device = get_device()
        self.mean_rewards = []
        self.mean_costs = []
        self.session_cum_avg_rewards = 0
        self.session_cum_avg_costs = 0

        if not model_name and continue_from_file:
            raise Exception('Argument continue_from_file to __init__ method of ' \
                            'CPO case was set to True but model_name was not ' \
                            'specified.')

        if not model_name and save_every:
            raise Exception('Argument save_every to __init__ method of CPO ' \
                            'was set to a value greater than 0 but model_name ' \
                            'was not specified.')

        if continue_from_file:
            print("about to continue")
            self.load_session()
Example #25
0
def vail(env_fn,
        actor_critic=MLPActorCritic,
        discrim = Discriminator,
        agent=PPOAgent(),
        ac_kwargs=dict(),
        seed=0,
        # Experience Collection
        steps_per_epoch=4000,
        epochs=50,
        max_ep_len=1000,
        # Discount factors:
        gamma=0.99,
        lam=0.97,
        cost_gamma=0.99,
        cost_lam=0.97,
        # Policy Learning:
        ent_reg=0.,
        # Cost constraints / penalties:
        cost_lim=25,
        penalty_init=1.,
        penalty_lr=5e-3,
        # KL divergence:
        target_kl=0.01,
        # Value learning:
        vf_lr=1e-3,
        train_v_iters=100,
        # Policy Learning:
        pi_lr=3e-4,
        train_pi_iters=100,
        # Discriminator Learning:
        discrim_lr= 1e-3,
        train_discrim_iters=100,
        # Clipping
        clip_ratio=0.2,
        logger_kwargs=dict(),
        # Experimenting
        config_name = 'standard',
        save_every=10):
    """
        ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object you provided to PPO.
        seed (int): Seed for random number generators.
        steps_per_epoch (int): Number of steps of interaction (state-action pairs)
            for the agent and the environment in each epoch.
        epochs (int): Number of epochs of interaction (equivalent to
            number of policy updates) to perform.
        gamma (float): Discount factor. (Always between 0 and 1.)
        clip_ratio (float): Hyperparameter for clipping in the policy objective.
            Roughly: how far can the new policy go from the old policy while
            still profiting (improving the objective function)? The new policy
            can still go farther than the clip_ratio says, but it doesn't help
            on the objective anymore. (Usually small, 0.1 to 0.3.) Typically
            denoted by :math:`\epsilon`.
        pi_lr (float): Learning rate for policy optimizer.
        vf_lr (float): Learning rate for value function optimizer.
        train_pi_iters (int): Maximum number of gradient descent steps to take
            on policy loss per epoch. (Early stopping may cause optimizer
            to take fewer than this.)
        train_v_iters (int): Number of gradient descent steps to take on
            value function per epoch.
        lam (float): Lambda for GAE-Lambda. (Always between 0 and 1, close to 1.)
        max_ep_len (int): Maximum length of trajectory / episode / rollout.
        target_kl (float): Roughly what KL divergence we think is appropriate
            between new and old policies after an update. This will get used
            for early stopping. (Usually small, 0.01 or 0.05.)
        logger_kwargs (dict): Keyword args for EpochLogger.
        save_every (int): How often (in terms of gap between epochs) to save
            the current policy and value function.
    """

    # Print some params
    print("here are some params")
    print("penalty lr: ", penalty_lr)
    print("cost limit: ", cost_lim)
    print("gamma: ", gamma)
    print("cost gamma", cost_gamma)
    print("seed: ", seed)

    # W&B Logging
    wandb.login()

    # config_name = 'marigold-gail'
    config_name = 'marigold'

    # train_discriminator = True

    composite_name = 'ppo_penalized_' + config_name + '_' + str(int(steps_per_epoch/1000)) + \
                     'Ks_' + str(epochs) + 'e_' + str(ac_kwargs['hidden_sizes'][0]) + 'x' + \
                     str(len(ac_kwargs['hidden_sizes']))

    # 4 million env interactions
    wandb.init(project="vail-experts-1000epochs", group="full_runs", name='vail_'+composite_name)

    # Special function to avoid certain slowdowns from PyTorch + MPI combo.
    setup_pytorch_for_mpi()

    # Set up logger and save configuration
    logger = EpochLogger(**logger_kwargs)
    logger.save_config(locals())

    # Random seed
    seed += 10000 * proc_id()
    torch.manual_seed(seed)
    np.random.seed(seed)

    # Instantiate environment
    env = env_fn()


    # Paths
    _project_dir = '/home/tyna/Documents/openai/research-project/'
    _root_data_path = _project_dir + 'data/'
    _expert_path = _project_dir + 'expert_data/'
    _clone_path = _project_dir + 'clone_data/'
    _demo_dir = os.path.join(_expert_path, config_name + '_episodes/')

    # load demonstrations
    # expert_demo, _ = pickle.load(open('./expert_demo/expert_demo.p', "rb"))
    # demonstrations = np.array(expert_demo)
    # print("demonstrations.shape", demonstrations.shape)

    f = open(_demo_dir + 'sim_data_' + str(1000) + '_buffer.pkl', "rb")
    buffer_file = pickle.load(f)
    f.close()

    expert_demonstrations = samples_from_cpprb(npsamples=buffer_file)

    # Reconstruct the data, then pass it to replay buffer
    np_states, np_rewards, np_actions, np_next_states, np_dones, np_next_dones = samples_to_np(expert_demonstrations)

    print("constraints in the environment")
    print("constrain hazards: ", env.constrain_hazards)
    print("hazards cost: ", env.hazards_cost)





    obs_dim = env.observation_space.shape
    act_dim = env.action_space.shape
    running_state = ZFilter((obs_dim[0],), clip=1)


    # Create actor-critic module and monitor it
    ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs)
    discrim = discrim(env.observation_space, env.action_space, **ac_kwargs)

    # Sync params across processes
    sync_params(ac)
    # Note, also sync for Discriminator
    sync_params(discrim)

    # Count variables
    var_counts = tuple(count_vars(module) for module in [ac.pi, ac.v, discrim])
    logger.log('\nNumber of parameters: \t pi: %d, \t v: %d, \t discrim: %d \n' % var_counts)

    z_filter = False

    # Set up experience buffer
    local_steps_per_epoch = int(steps_per_epoch / num_procs())
    buf = CostPOBuffer(obs_dim, act_dim, local_steps_per_epoch, gamma, lam, cost_gamma, cost_lam)

    # Set up optimizers for policy and value function
    # pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr)
    # vf_optimizer = Adam(ac.v.parameters(), lr=vf_lr)
    # discrim_optimizer = Adam(discrim.parameters(), lr=discrim_lr)
    pi_optimizer = AdaBelief(ac.pi.parameters(), betas=(0.9, 0.999), eps=1e-8)
    vf_optimizer = AdaBelief(ac.v.parameters(), betas=(0.9, 0.999), eps=1e-8)
    discrim_optimizer = AdaBelief(discrim.parameters(), betas=(0.9, 0.999), eps=1e-8)

    # self.value_fun_optimizer = AdaBelief(self.value_fun.parameters(), betas=(0.9, 0.999), eps=1e-8)
    # self.cost_fun_optimizer = AdaBelief(self.cost_fun.parameters(), betas=(0.9, 0.999), eps=1e-8)

    penalty = np.log(max(np.exp(penalty_init)-1, 1e-8))

    mov_avg_ret = 0
    mov_avg_cost = 0

    # Discriminator reward
    def get_reward(discrim, state, action):
        state = torch.Tensor(state)
        action = torch.Tensor(action)
        state_action = torch.cat([state, action])
        with torch.no_grad():
            return -math.log(discrim(state_action)[0].item())

    # Set up function for computing PPO policy loss
    def compute_loss_pi(data):
        obs, act, adv, logp_old = data['obs'], data['act'], data['adv'], data['logp']

        # Policy loss
        pi, logp = ac.pi(obs, act)
        ratio = torch.exp(logp - logp_old)
        clip_adv = torch.clamp(ratio, 1 - clip_ratio, 1 + clip_ratio) * adv
        loss_pi = -(torch.min(ratio * adv, clip_adv)).mean()

        # Useful extra info
        approx_kl = (logp_old - logp).mean().item()
        ent = pi.entropy().mean().item()
        clipped = ratio.gt(1 + clip_ratio) | ratio.lt(1 - clip_ratio)
        clipfrac = torch.as_tensor(clipped, dtype=torch.float32).mean().item()
        pi_info = dict(kl=approx_kl, ent=ent, cf=clipfrac)

        return loss_pi, pi_info

    # Set up functions for computing value loss(es)
    def compute_loss_v(data):
        obs, ret, cret = data['obs'], data['ret'], data['cret']
        v_loss = ((ac.v(obs) - ret) ** 2).mean()
        return v_loss

    def compute_loss_discrim(data, demonstrations, acc=False):
        # memory = np.array(memory)
        # states = np.vstack(memory[:, 0])
        # actions = list(memory[:, 1])
        obs = data['obs']
        act = data['act']

        # states = torch.Tensor(states)
        # actions = torch.Tensor(actions)

        criterion = torch.nn.BCELoss()

        # change demo format
        demonstrations = torch.Tensor(demonstrations)

        # Pass both expert and learner through discriminator
        learner = discrim(torch.cat([obs, act], dim=1))
        expert = discrim(demonstrations)

        learner_acc = (learner  > 0.5).float().mean()
        expert_acc = (expert < 0.5).float().mean()

        discrim_loss = criterion(learner, torch.ones((obs.shape[0], 1))) + \
                       criterion(expert, torch.zeros((demonstrations.shape[0], 1)))

        if acc:
            return discrim_loss, expert_acc, learner_acc
        else:
            return discrim_loss


    # Set up model saving
    logger.setup_pytorch_saver(ac)

    penalty_init_param = np.log(max(np.exp(penalty_init) - 1, 1e-8))

    TRAIN_DISC = True

    def update(cur_penalty, TRAIN_DISC):

        cur_cost = logger.get_stats('EpCost')[0]
        cur_rew = logger.get_stats('EpRet')[0]

        if len(rew_mov_avg_10) >= 10:
            rew_mov_avg_10.pop(0)
            cost_mov_avg_10.pop(0)

        rew_mov_avg_10.append(cur_rew)
        cost_mov_avg_10.append(cur_cost)

        mov_avg_ret  = np.mean(rew_mov_avg_10)
        mov_avg_cost = np.mean(cost_mov_avg_10)

        c = cur_cost - cost_lim

        if c > 0 and agent.cares_about_cost:
            logger.log('Warning! Safety constraint is already violated.', 'red')

        # c is the safety constraint
        print("current cost: ", cur_cost)

        data = buf.get()

        pi_l_old, pi_info_old = compute_loss_pi(data)

        pi_l_old = pi_l_old.item()
        v_l_old = compute_loss_v(data).item()
        # discrim_l_old = compute_loss_discrim(data, expert_demonstrations).item()
        # print("data shape")
        # print(data['obs'].shape)
        # print("states shape")
        # print(np_states.shape)
        # print("obs shape")
        # print(np_actions.shape)

        # print("combined shape")
        combined_expert_demos = np.concatenate((np_states, np_actions), axis=1)
        # print(comb.shape)
        # print(comb.shape)
        # discrim_l_old = compute_loss_discrim(data, np_states).item()
        discrim_l_old = compute_loss_discrim(data, combined_expert_demos, acc=False).item()

        # Train policy with multiple steps of gradient descent
        for i in range(train_pi_iters):
            pi_optimizer.zero_grad()
            loss_pi, pi_info = compute_loss_pi(data)
            kl = mpi_avg(pi_info['kl'])
            loss_pi.backward()
            mpi_avg_grads(ac.pi)  # average grads across MPI processes
            pi_optimizer.step()

        logger.store(StopIter=i)

        # Value function learning
        for i in range(train_v_iters):
            vf_optimizer.zero_grad()
            loss_v = compute_loss_v(data)
            loss_v.backward()
            mpi_avg_grads(ac.v)  # average grads across MPI processes
            vf_optimizer.step()

        # Discriminator learning
        if TRAIN_DISC:
            for i in range(train_discrim_iters):
                discrim_optimizer.zero_grad()
                # loss_discrim = compute_loss_discrim(data, expert_demonstrations)
                loss_discrim, expert_acc, learner_acc = compute_loss_discrim(data, combined_expert_demos, acc=True)
                print("discriminator loss: ", loss_discrim)
                loss_discrim.backward()
                mpi_avg_grads(discrim)  # average grads across MPI processes
                discrim_optimizer.step()

            if expert_acc.item() > 0.99 and learner_acc.item() > 0.98:
                # train_discriminator = False
                # print("hello")
                TRAIN_DISC = False



                # expert_acc = ((discrim(combined_expert_demos) < 0.5).float()).mean()
                # learner_acc = ((discrim(torch.cat([data['obs'], data['act']], dim=1)) > 0.5).float()).mean()

                # learner = discrim(torch.cat([obs, act], dim=1))
                #
                # print("expert acc: ", expert_acc.item())
                # print("learning acc: ", learner_acc.item())

            # expert_acc, learner_acc = train_discrim(discrim, memory, discrim_optim, demonstrations, args)
            # print("Expert: %.2f%% | Learner: %.2f%%" % (expert_acc * 100, learner_acc * 100))
            # if expert_acc > args.suspend_accu_exp and learner_acc > args.suspend_accu_gen:
            #     train_discrim_flag = False


        # Penalty update
        print("old penalty: ", cur_penalty)
        cur_penalty = max(0, cur_penalty + penalty_lr*(cur_cost - cost_lim))
        print("new penalty: ", cur_penalty)

        # Log changes from update
        kl, ent, cf = pi_info['kl'], pi_info_old['ent'], pi_info['cf']
        logger.store(LossPi=pi_l_old, LossV=v_l_old,
                     LossDiscrim=discrim_l_old,
                     KL=kl, Entropy=ent, ClipFrac=cf,
                     DeltaLossPi=(loss_pi.item() - pi_l_old),
                     DeltaLossV=(loss_v.item() - v_l_old),
                     # DeltaLossDiscrim=(loss_discrim.item() - discrim_l_old)
                     )

        vf_loss_avg = mpi_avg(v_l_old)
        pi_loss_avg = mpi_avg(pi_l_old)

        update_metrics = {'10p mov avg ret': mov_avg_ret,
                          '10p mov avg cost': mov_avg_cost,
                          'value function loss': vf_loss_avg,
                          'policy loss': pi_loss_avg,
                          'current penalty': cur_penalty
                          }

        wandb.log(update_metrics)
        # return cur_penalty, train_discriminator
        return cur_penalty, TRAIN_DISC

    # Prepare for interaction with environment
    start_time = time.time()
    o, r, d, c, ep_ret, ep_cost, ep_len, cum_cost, cum_reward = env.reset(), 0, False, 0, 0, 0, 0, 0, 0


    rew_mov_avg_10 = []
    cost_mov_avg_10 = []

    cur_penalty = penalty_init_param

    # Main loop: collect experience in env and update/log each epoch
    for epoch in range(epochs):

        for t in range(local_steps_per_epoch):
            state = running_state(o)
            #
            # print("filtered observations")
            # print(state)
            # print("unfiltered observations")
            # print(o)

            if z_filter:
                a, v, vc, logp = ac.step(torch.as_tensor(state, dtype=torch.float32))
            else:
                a, v, vc, logp = ac.step(torch.as_tensor(o, dtype=torch.float32))


            # env.step => Take action
            next_o, r, d, info = env.step(a)

            if z_filter:
                next_o = running_state(next_o)


            irl_reward = get_reward(discrim, o, a)

            # Include penalty on cost
            c = info.get('cost', 0)

            # Track cumulative cost over training
            cum_reward += r
            cum_cost += c

            ep_ret += r
            ep_cost += c
            ep_len += 1

            r_total = r - cur_penalty * c
            r_total /= (1 + cur_penalty)

            irl_updated = irl_reward - cur_penalty*c
            irl_updated /= (1 + cur_penalty)


            # buf.store(o, a, r_total, v, 0, 0, logp, info)  # modify
            # buf.store(o, a, irl_reward, v, 0, 0, logp, info)
            buf.store(o, a, irl_updated, v, 0, 0, logp, info)

            # save and log
            logger.store(VVals=v)

            # Update obs (critical!)
            o = next_o

            timeout = ep_len == max_ep_len
            terminal = d or timeout
            epoch_ended = t == local_steps_per_epoch - 1

            if terminal or epoch_ended:
                if epoch_ended and not terminal:
                    print('Warning: trajectory cut off by epoch at %d steps.' % ep_len, flush=True)
                # if trajectory didn't reach terminal state, bootstrap value target
                if timeout or epoch_ended:
                    _, v, _, _ = ac.step(torch.as_tensor(o, dtype=torch.float32))
                    # if z_filter:
                    #     _, v, _, _ = ac.step(torch.as_tensor(state, dtype=torch.float32))
                    # else:

                    last_v = v
                    last_vc = 0

                else:
                    last_v = 0
                buf.finish_path(last_v, last_vc)

                if terminal:
                    # only save EpRet / EpLen if trajectory finished
                    print("end of episode return: ", ep_ret)
                    logger.store(EpRet=ep_ret, EpLen=ep_len, EpCost=ep_cost)

                    # average ep ret and cost
                    avg_ep_ret = ep_ret
                    avg_ep_cost = ep_cost

                    episode_metrics = {'average ep ret': avg_ep_ret, 'average ep cost': avg_ep_cost}

                    wandb.log(episode_metrics)

                # o, ep_ret, ep_len, ep_cost = env.reset(), 0, 0, 0
                # Reset environment
                o, r, d, c, ep_ret, ep_len, ep_cost = env.reset(), 0, False, 0, 0, 0, 0

        # Save model and save last trajectory
        if (epoch % save_every == 0) or (epoch == epochs - 1):
            logger.save_state({'env': env}, None)

        # Perform PPO update!
        cur_penalty, TRAIN_DISC = update(cur_penalty, TRAIN_DISC)

        #  Cumulative cost calculations
        cumulative_cost = mpi_sum(cum_cost)
        cumulative_reward = mpi_sum(cum_reward)

        cost_rate = cumulative_cost / ((epoch + 1) * steps_per_epoch)
        reward_rate = cumulative_reward / ((epoch + 1) * steps_per_epoch)

        log_metrics = {'cost rate': cost_rate, 'reward rate': reward_rate}

        wandb.log(log_metrics)

        # Log info about epoch
        logger.log_tabular('Epoch', epoch)
        logger.log_tabular('EpRet', with_min_and_max=True)
        logger.log_tabular('EpLen', average_only=True)
        logger.log_tabular('EpCost', with_min_and_max=True)
        logger.log_tabular('VVals', with_min_and_max=True)
        logger.log_tabular('TotalEnvInteracts', (epoch + 1) * steps_per_epoch)
        logger.log_tabular('LossPi', average_only=True)
        logger.log_tabular('LossV', average_only=True)
        # logger.log_tabular('LossDiscrim', average_only=True)
        logger.log_tabular('DeltaLossPi', average_only=True)
        logger.log_tabular('DeltaLossV', average_only=True)
        # logger.log_tabular('DeltaLossDiscrim', average_only=True)
        logger.log_tabular('Entropy', average_only=True)
        logger.log_tabular('KL', average_only=True)
        logger.log_tabular('ClipFrac', average_only=True)
        logger.log_tabular('StopIter', average_only=True)
        logger.log_tabular('Time', time.time() - start_time)
        logger.dump_tabular()
Example #26
0
def train(hyp,  # path/to/hyp.yaml or hyp dictionary
          opt,
          device,
          callbacks
          ):
    save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, = \
        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
        opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze

    # Directories
    w = save_dir / 'weights'  # weights dir
    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
    last, best = w / 'last.pt', w / 'best.pt'

    # Hyperparameters
    if isinstance(hyp, str):
        with open(hyp, errors='ignore') as f:
            hyp = yaml.safe_load(f)  # load hyps dict
    LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))

    # Save run settings
    if not evolve:
        with open(save_dir / 'hyp.yaml', 'w') as f:
            yaml.safe_dump(hyp, f, sort_keys=False)
        with open(save_dir / 'opt.yaml', 'w') as f:
            yaml.safe_dump(vars(opt), f, sort_keys=False)

    # Loggers
    data_dict = None
    if RANK in [-1, 0]:
        loggers = Loggers(save_dir, weights, opt, hyp, LOGGER)  # loggers instance
        if loggers.wandb:
            data_dict = loggers.wandb.data_dict
            if resume:
                weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp

        # Register actions
        for k in methods(loggers):
            callbacks.register_action(k, callback=getattr(loggers, k))

    # Config
    plots = not evolve  # create plots
    cuda = device.type != 'cpu'
    init_seeds(1 + RANK)
    with torch_distributed_zero_first(LOCAL_RANK):
        data_dict = data_dict or check_dataset(data)  # check if None
    train_path, val_path = data_dict['train'], data_dict['val']
    nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
    names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
    assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset

    # Model
    check_suffix(weights, '.pt')  # check weights
    pretrained = weights.endswith('.pt')
    if pretrained:
        with torch_distributed_zero_first(LOCAL_RANK):
            weights = attempt_download(weights)  # download if not found locally
        ckpt = torch.load(weights, map_location=device)  # load checkpoint
        model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
        model.load_state_dict(csd, strict=False)  # load
        LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}')  # report
    else:
        model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create

    # Freeze
    freeze = [f'model.{x}.' for x in range(freeze)]  # layers to freeze
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            LOGGER.info(f'freezing {k}')
            v.requires_grad = False

    # Image size
    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple

    # Batch size
    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
        batch_size = check_train_batch_size(model, imgsz)

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
    hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
    LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")

    g0, g1, g2 = [], [], []  # optimizer parameter groups
    for v in model.modules():
        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias
            g2.append(v.bias)
        if isinstance(v, nn.BatchNorm2d):  # weight (no decay)
            g0.append(v.weight)
        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
            g1.append(v.weight)

    if opt.adam:
        optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
    elif opt.adabelief:
        from adabelief_pytorch import AdaBelief
        optimizer_parameters = {'lr': hyp['lr0'], 'weight_decay': hyp['weight_decay'],
                                'eps': 1e-8, 'betas': (0.9, 0.999),
                                'weight_decouple': True, 'rectify': False,
                                'print_change_log': False}
        optimizer = AdaBelief(g0, **optimizer_parameters)
    else:
        optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

    optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']})  # add g1 with weight_decay
    optimizer.add_param_group({'params': g2})  # add g2 (biases)
    LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
                f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias")
    del g0, g1, g2

    # Scheduler
    if opt.linear_lr:
        def lf(x): return (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
    else:
        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)

    # EMA
    ema = ModelEMA(model) if RANK in [-1, 0] else None

    # Resume
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        # Optimizer
        if ckpt['optimizer'] is not None:
            optimizer.load_state_dict(ckpt['optimizer'])
            best_fitness = ckpt['best_fitness']

        # EMA
        if ema and ckpt.get('ema'):
            ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
            ema.updates = ckpt['updates']

        # Epochs
        start_epoch = ckpt['epoch'] + 1
        if resume:
            assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
        if epochs < start_epoch:
            LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
            epochs += ckpt['epoch']  # finetune additional epochs

        del ckpt, csd

    # DP mode
    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
        model = torch.nn.DataParallel(model)

    # SyncBatchNorm
    if opt.sync_bn and cuda and RANK != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        LOGGER.info('Using SyncBatchNorm()')

    # Trainloader
    train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls,
                                              hyp=hyp, augment=True, cache=opt.cache, rect=opt.rect, rank=LOCAL_RANK,
                                              workers=workers, image_weights=opt.image_weights, quad=opt.quad,
                                              prefix=colorstr('train: '), shuffle=True)
    mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max())  # max label class
    nb = len(train_loader)  # number of batches
    assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'

    # Process 0
    if RANK in [-1, 0]:
        val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls,
                                       hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1,
                                       workers=workers, pad=0.5,
                                       prefix=colorstr('val: '))[0]

        if not resume:
            labels = np.concatenate(dataset.labels, 0)
            # c = torch.tensor(labels[:, 0])  # classes
            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
            # model._initialize_biases(cf.to(device))
            if plots:
                plot_labels(labels, names, save_dir)

            # Anchors
            if not opt.noautoanchor:
                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
            model.half().float()  # pre-reduce anchor precision

        callbacks.run('on_pretrain_routine_end')

    # DDP mode
    if cuda and RANK != -1:
        model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)

    # Model attributes
    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
    hyp['box'] *= 3 / nl  # scale to layers
    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
    hyp['label_smoothing'] = opt.label_smoothing
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
    model.names = names

    # Start training
    t0 = time.time()
    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
    last_opt_step = -1
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
    scheduler.last_epoch = start_epoch - 1  # do not move
    scaler = amp.GradScaler(enabled=cuda)
    stopper = EarlyStopping(patience=opt.patience)
    compute_loss = ComputeLoss(model)  # init loss class
    LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
                f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
                f"Logging results to {colorstr('bold', save_dir)}\n"
                f'Starting training for {epochs} epochs...')
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional, single-GPU only)
        if opt.image_weights:
            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx

        # Update mosaic border (optional)
        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

        mloss = torch.zeros(4, device=device)  # mean losses # lmello changed
        if RANK != -1:
            train_loader.sampler.set_epoch(epoch)
        pbar = enumerate(train_loader)
        LOGGER.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'closs','labels', 'img_size'))
        if RANK in [-1, 0]:
            pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
        optimizer.zero_grad()
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
                xi = [0, nw]  # x interp
                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])

            # Multi-scale
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
                    imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

            # Forward
            with amp.autocast(enabled=cuda):
                pred = model(imgs)  # forward
                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
                if RANK != -1:
                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
                if opt.quad:
                    loss *= 4.

            # Backward
            scaler.scale(loss).backward()

            # Optimize
            if ni - last_opt_step >= accumulate:
                scaler.step(optimizer)  # optimizer.step
                scaler.update()
                optimizer.zero_grad()
                if ema:
                    ema.update(model)
                last_opt_step = ni

            # Log
            if RANK in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G'  # (GB)
                pbar.set_description(('%10s' * 2 + '%10.4g' * 6) % ( # lmello changed
                    f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
                callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots, opt.sync_bn)
            # end batch ------------------------------------------------------------------------------------------------

        # Scheduler
        lr = [x['lr'] for x in optimizer.param_groups]  # for loggers
        scheduler.step()

        if RANK in [-1, 0]:
            # mAP
            callbacks.run('on_train_epoch_end', epoch=epoch)
            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
            if not noval or final_epoch:  # Calculate mAP
                results, maps, _ = val.run(data_dict,
                                           batch_size=batch_size // WORLD_SIZE * 2,
                                           imgsz=imgsz,
                                           model=ema.ema,
                                           single_cls=single_cls,
                                           dataloader=val_loader,
                                           save_dir=save_dir,
                                           plots=False,
                                           callbacks=callbacks,
                                           compute_loss=compute_loss)

            # Update best mAP
            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, [email protected], [email protected]]
            if fi > best_fitness:
                best_fitness = fi
            log_vals = list(mloss) + list(results) + lr
            callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)

            # Save model
            if (not nosave) or (final_epoch and not evolve):  # if save
                ckpt = {'epoch': epoch,
                        'best_fitness': best_fitness,
                        'model': deepcopy(de_parallel(model)).half(),
                        'ema': deepcopy(ema.ema).half(),
                        'updates': ema.updates,
                        'optimizer': optimizer.state_dict(),
                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
                        'date': datetime.now().isoformat()}

                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fi:
                    torch.save(ckpt, best)
                if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0):
                    torch.save(ckpt, w / f'epoch{epoch}.pt')
                del ckpt
                callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)

            # Stop Single-GPU
            if RANK == -1 and stopper(epoch=epoch, fitness=fi):
                break

            # Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
            # stop = stopper(epoch=epoch, fitness=fi)
            # if RANK == 0:
            #    dist.broadcast_object_list([stop], 0)  # broadcast 'stop' to all ranks

        # Stop DPP
        # with torch_distributed_zero_first(RANK):
        # if stop:
        #    break  # must break all DDP ranks

        # end epoch ----------------------------------------------------------------------------------------------------
    # end training -----------------------------------------------------------------------------------------------------
    if RANK in [-1, 0]:
        LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
        for f in last, best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is best:
                    LOGGER.info(f'\nValidating {f}...')
                    results, _, _ = val.run(data_dict,
                                            batch_size=batch_size // WORLD_SIZE * 2,
                                            imgsz=imgsz,
                                            model=attempt_load(f, device).half(),
                                            iou_thres=0.65 if is_coco else 0.60,  # best pycocotools results at 0.65
                                            single_cls=single_cls,
                                            dataloader=val_loader,
                                            save_dir=save_dir,
                                            save_json=is_coco,
                                            verbose=True,
                                            plots=True,
                                            callbacks=callbacks,
                                            compute_loss=compute_loss)  # val best model with plots
                    if is_coco:
                        callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)

        callbacks.run('on_train_end', last, best, plots, epoch, results)
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")

    torch.cuda.empty_cache()
    return results
    def __init__(self, args, params):
        super().__init__(args)

        self._optimizer = AdaBelief(params, **self.optimizer_config)
def get_optimizer_and_scheduler(net, dataloader):
    print_fn = print if not config.USE_TPU else xm.master_print
    # m = xm.xrt_world_size() if config.USE_TPU else 1
    m = 1
    print_fn(f"World Size:                  {m}")

    m /= config.WARMUP_FACTOR
    print_fn(f"Learning Rate Multiplier:    {m}")

    print_fn(f"Start Learning Rate:         {config.LEARNING_RATE * m}")

    # Optimizers

    print_fn(f"Optimizer:                   {config.OPTIMIZER}")
    if config.OPTIMIZER == "Adam":
        optimizer = torch.optim.Adam(
            params=net.parameters(),
            lr=config.LEARNING_RATE * m,
            weight_decay=1e-5,
            amsgrad=False
        )
    elif config.OPTIMIZER == "AdamW":
        optimizer = optim.AdamW(
            net.parameters(), lr=config.LEARNING_RATE * m, weight_decay=0.001)
    elif config.OPTIMIZER == "AdaBelief":
        optimizer = AdaBelief(net.parameters(
        ), lr=config.LEARNING_RATE * m, eps=1e-16, betas=(0.9, 0.999), weight_decouple=True, rectify=False, print_change_log=False)
    elif config.OPTIMIZER == "RangerAdaBelief":
        optimizer = RangerAdaBelief(
            net.parameters(), lr=config.LEARNING_RATE * m, eps=1e-12, betas=(0.9, 0.999), print_change_log=False)
    elif config.OPTIMIZER == "RAdam":
        optimizer = RAdam(
            net.parameters(),
            lr=config.LEARNING_RATE * m
        )
    else:
        optimizer = optim.SGD(
            net.parameters(), lr=config.LEARNING_RATE * m)

    # Schedulers

    print_fn(f"Scheduler:                   {config.SCHEDULER}")
    if config.SCHEDULER == "ReduceLROnPlateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            patience=0,
            factor=0.1,
            verbose=config.LEARNING_VERBOSE)
    elif config.SCHEDULER == "CosineAnnealingLR":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=5, eta_min=0)
    elif config.SCHEDULER == "OneCycleLR":
        steps_per_epoch = len(dataloader)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer=optimizer,
            max_lr=1e-2,
            epochs=config.MAX_EPOCHS,
            steps_per_epoch=steps_per_epoch,
            pct_start=0.25,)
    elif config.SCHEDULER == "CosineAnnealingWarmRestarts":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer,
            T_0=config.MAX_EPOCHS - config.WARMUP_EPOCHS,
            T_mult=1,
            eta_min=1e-6,
            last_epoch=-1)
    elif config.SCHEDULER == "StepLR":
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer,
            step_size=2,
            gamma=0.1)
    else:
        scheduler = None

    print_fn(f"Gradual Warmup:              {config.SCHEDULER_WARMUP}")
    if config.SCHEDULER_WARMUP:
        scheduler = scheduler = GradualWarmupSchedulerV2(
            optimizer, multiplier=config.WARMUP_FACTOR, total_epoch=config.WARMUP_EPOCHS, after_scheduler=scheduler)

    return optimizer, scheduler
Example #29
0
    options = {}
    options.update({'method': 'Dopri5'})
    options.update({'h': 0.1})
    options.update({'rtol': 1e-5})
    options.update({'atol': 1e-6})
    options.update({'print_neval': False})
    options.update({'neval_max': 1000000})
    options.update({'safety': None})

    # create multiple-shooting instance
    multi_shoot = MultipleShoot( ode_func=dcmfunc, observation_length=time_length, ODE_options=options,
                                 smooth_penalty=smooth_penalty, chunk_length=chunk_length)
    multi_shoot.prepare_intermediate( input_tensor )

    # create model
    optimizer = AdaBelief(filter(lambda p: p.requires_grad, multi_shoot.parameters()), lr = lr, eps=1e-16, rectify=False,
                          betas=(0.5, 0.9))
    #optimizer = Adam(filter(lambda p: p.requires_grad, multi_shoot.parameters()), lr=lr, eps=1e-16,
    #                      betas=(0.5, 0.9))

    best_loss = np.inf
    for _epoch in range(N_epoch):
        # adjust learning rate
        for param_group in optimizer.param_groups:
            param_group['lr'] *= gamma

        optimizer.zero_grad()

        # forward and optimizer
        prediction_chunks, data_chunks = multi_shoot.fit_and_grad( input_tensor, time_points )
        loss = multi_shoot.get_loss( prediction_chunks, data_chunks )
Example #30
0
        print(f"Not using pretrained {args.arch}")
        model = models.__dict__[args.arch]()

model = model.to(device)
# print(model)
start_epoch = 1
if args.resume == True:
    loc = "cuda:0"
    checkpoint = torch.load(args.save_path, map_location=loc)
    model.load_state_dict(checkpoint['net'])

    print(f"Done loading pretrained, ")

# optimizer = optim.AdamW(model.parameters(), lr = args.lr, weight_decay =
# args.weight_decay)
#
optimizer = AdaBelief(model.parameters(),
                      lr=args.lr,
                      weight_decay=args.weight_decay,
                      eps=1e-10,
                      weight_decouple=True,
                      rectify=True)

# scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr =
# args.max_lr,steps_per_epoch =
# len(train_loader), epochs = 10)
for epoch in tqdm(range(start_epoch, args.epochs + 1)):
    train(args, model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    # scheduler.step()