save_checkpoint(epoch, model, optimizer, prefix) val_acc, val_preci, val_recall, mAP_scores = evaluate( epoch, val_loader, model, bce_loss, log_writer) avg_map = np.mean(mAP_scores) if avg_map > best_mAP: logger.info('mAP improved from {} to {}'.format( best_mAP, avg_map)) best_mAP = avg_map if last_model is not None: os.remove(last_model) fn = os.path.join( c['model_dir'], '{}_epoch{}_mAP{:.3}_preci{:.3}_recall{:.3}.pdparams'. format(prefix, epoch, avg_map, val_preci, val_recall)) paddle.save(model.state_dict(), fn) last_model = fn else: logger.info( f'mAP {avg_map} did not improved from {best_mAP}') if step % c['lr_dec_per_step'] == 0 and step != 0: if optimizer.get_lr() <= 3e-6: factor = 0.95 else: factor = 0.1 optimizer.set_lr(optimizer.get_lr() * factor) logger.info('decreased lr to {}'.format(optimizer.get_lr()))
else: best_mAP = 0.0 step = 0 for epoch in range(start_epoch, epoch_num): avg_loss = 0.0 avg_preci = 0.0 avg_recall = 0.0 model.train() model.clear_gradients() t0 = time.time() for batch_id, (x, y) in enumerate(train_loader()): if step < warm_steps: optimizer.set_lr(lrs[step]) x.stop_gradient = False if c['balanced_sampling']: x = x.squeeze() y = y.squeeze() x = x.unsqueeze((1)) if c['mixup']: mixed_x, mixed_y = mixup_data(x, y, c['mixup_alpha']) logits = model(mixed_x) loss_val = loss_fn(logits, mixed_y) loss_val.backward() else: logits = model(x) loss_val = bce_loss(logits, y) loss_val.backward() optimizer.step()
p.stop_gradient = True if not isinstance(p, nn.BatchNorm1D): p.stop_gradient = True for epoch in range(start_epoch, epoch_num): avg_loss = 0.0 avg_acc = 0.0 model.train() model.clear_gradients() t0 = time.time() if config['max_lr'] > config['base_lr']: lr = get_lr(epoch - start_epoch, config['base_lr'], config['max_lr'], config['half_cycle'], config['reverse_lr']) optimizer.set_lr(lr) logger.info(f'Setting lr to {lr}') for batch_id, (x, y) in enumerate(train_loader()): x_mel = transforms(x) logits = model(x_mel) loss, pred = loss_fn(logits, y) loss.backward() optimizer.step() model.clear_gradients() acc = np.mean(np.argmax(pred.numpy(), axis=1) == y.numpy()) if batch_id < 100: avg_acc = acc avg_loss = loss.numpy()[0]