def __init__(self, model, criterion, optimizer, lr_scheduler=None, metrics=None, test_metrics=None, save_path=".", name="Net"): self.model = model self.criterion = criterion self.optimizer = optimizer self.lr_scheduler = lr_scheduler self.metrics = metrics or {} self.test_metrics = test_metrics if test_metrics is None: self.test_metrics = metrics.copy() if 'loss' in metrics and isinstance(metrics['loss'], TrainLoss): self.test_metrics['loss'] = Loss(criterion=criterion) self.save_path = os.path.join(save_path, 'trainer') self.name = name current_time = datetime.now().strftime('%b%d_%H-%M-%S') log_dir = os.path.join(save_path, 'runs', self.name, current_time) self.writer = SummaryWriter(log_dir) self.metric_history = defaultdict(list) self.device = 'cuda' if CUDA else 'cpu' self._timer = Timer() self._epochs = 0 self.model.to(self.device)
def fit_fn(ds_train, ds_val, verbose): net = LeNet5() criterion = nn.CrossEntropyLoss() optimizer = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4, nesterov=True) # lr_scheduler = MultiStepLR(optimizer, [10, 20], gamma=0.1) lr_scheduler = CosineAnnealingLR(optimizer, T_max=30, eta_min=0.001, warmup=5, warmup_eta_min=0.01) metrics = { 'loss': TrainLoss(), 'acc': Accuracy(), } test_metrics = { 'loss': Loss(criterion), 'acc': Accuracy(), } trainer = Trainer(net, criterion, optimizer, lr_scheduler, metrics=metrics, test_metrics=test_metrics, work_dir="./checkpoints/MNIST-LeNet5") trainer._verbose = False # summary(net, (1, 32, 32)) train_loader = DataLoader(ds_train, batch_size=128, shuffle=True, num_workers=2, pin_memory=True) val_loader = DataLoader(ds_val, batch_size=128) accs = trainer.fit(train_loader, 5, val_loader=val_loader)['acc'] return accs[-1], max(accs)
def __init__(self, model, criterion, optimizer, lr_scheduler=None, metrics=None, test_metrics=None, save_path=".", name="Net", fp16=False, lr_step_on_iter=None): self.fp16 = fp16 self.device = 'cuda' if CUDA else 'cpu' model.to(self.device) if self.fp16: from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) self.model = model self.criterion = criterion self.optimizer = optimizer self.lr_scheduler = lr_scheduler self.metrics = metrics or {} self.test_metrics = test_metrics if test_metrics is None: self.test_metrics = metrics.copy() if 'loss' in metrics and isinstance(metrics['loss'], TrainLoss): self.test_metrics['loss'] = Loss(criterion=criterion) self.save_path = os.path.join(save_path, 'trainer') self.name = name self.lr_step_on_iter = lr_step_on_iter current_time = datetime.now().strftime('%b%d_%H-%M-%S') log_dir = os.path.join(save_path, 'runs', self.name, current_time) self.writer = SummaryWriter(log_dir) self.metric_history = defaultdict(list) self._timer = Timer() self._epochs = 0 self._verbose = True
lr=3e-4, betas=(0.5, 0.999), weight_decay=1e-3) optimizer_model = SGD(model.parameters(), 0.025, momentum=0.9, weight_decay=3e-4) lr_scheduler = CosineAnnealingLR(optimizer_model, T_max=50, eta_min=0.001) metrics = { "loss": TrainLoss(), "acc": Accuracy(), } test_metrics = { "loss": Loss(criterion), "acc": Accuracy(), } trainer = DARTSTrainer(model, criterion, [optimizer_arch, optimizer_model], lr_scheduler, metrics, test_metrics, save_path='checkpoints/DARTS') def tau_schedule(engine, trainer): iteration = engine.state.iteration iters_per_epoch = engine.state.epoch_length steps = iteration / iters_per_epoch
cfg.Model.num_classes = num_classes model = get_model(cfg.Model, horch.models.cifar) criterion = CrossEntropyLoss(non_sparse=use_mix, label_smoothing=cfg.get("label_smooth")) epochs = cfg.epochs optimizer = get_optimizer(cfg.Optimizer, model.parameters()) lr_scheduler = get_lr_scheduler(cfg.LRScheduler, optimizer, epochs) train_metrics = {'loss': TrainLoss()} if not use_mix: train_metrics['acc'] = Accuracy() test_metrics = { 'loss': Loss(CrossEntropyLoss()), 'acc': Accuracy(), } work_dir = fmt_path(cfg.get("work_dir")) trainer = Trainer(model, criterion, optimizer, lr_scheduler, train_metrics, test_metrics, work_dir=work_dir, fp16=fp16, device=cfg.get("device", 'auto')) if args.resume:
weight_decay=1e-4, nesterov=True) # lr_scheduler = MultiStepLR(optimizer, [10, 20], gamma=0.1) lr_scheduler = CosineAnnealingLR(optimizer, T_max=30, eta_min=0.001, warmup=5, warmup_eta_min=0.001) metrics = { 'loss': TrainLoss(), 'acc': Accuracy(), } test_metrics = { 'loss': Loss(criterion), 'acc': Accuracy(), } trainer = Trainer(net, criterion, optimizer, lr_scheduler, metrics=metrics, test_metrics=test_metrics, work_dir="./checkpoints/MNIST-LeNet5") # summary(net, (1, 32, 32)) train_loader = DataLoader(ds_train, batch_size=128, shuffle=True,
ds_test = CIFAR10(data_home, train=False, download=True, transform=test_transform) batch_size = 128 train_loader = DataLoader(ds_train, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2) test_loader = DataLoader(ds_test, batch_size=batch_size * 8, shuffle=False, pin_memory=True, num_workers=2) net = ResNet(28, 10) criterion = CrossEntropyLoss() epochs = 200 base_lr = 0.1 optimizer = SGD(net.parameters(), lr=base_lr, momentum=0.9, weight_decay=5e-4) lr_scheduler = CosineAnnealingLR(optimizer, epochs, min_lr=0) train_metrics = { "loss": TrainLoss(), "acc": Accuracy(), } eval_metrics = { "loss": Loss(CrossEntropyLoss()), "acc": Accuracy(), } learner = CNNLearner( net, criterion, optimizer, lr_scheduler, train_metrics=train_metrics, eval_metrics=eval_metrics, work_dir=gpath('models/WRN'), fp16=True) learner.fit(train_loader, epochs, test_loader, val_freq=1)
def main(): torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False manual_seed(args.seed) train_transform = Compose([ RandomCrop(32, padding=4), RandomHorizontalFlip(), ToTensor(), Normalize([0.491, 0.482, 0.447], [0.247, 0.243, 0.262]), ]) ds = CIFAR10(root=args.data, train=True, download=True) ds_train, ds_search = train_test_split( ds, test_ratio=0.5, shuffle=True, random_state=args.seed, transform=train_transform, test_transform=train_transform) train_queue = DataLoader( ds_train, batch_size=args.batch_size, pin_memory=True, shuffle=True, num_workers=2) valid_queue = DataLoader( ds_search, batch_size=args.batch_size, pin_memory=True, shuffle=True, num_workers=2) set_defaults({ 'relu': { 'inplace': False, }, 'bn': { 'affine': False, } }) model = Network(args.init_channels, args.layers, num_classes=CIFAR_CLASSES) criterion = nn.CrossEntropyLoss() optimizer_arch = Adam( model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) optimizer_model = SGD( model.model_parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = CosineLR( optimizer_model, float(args.epochs), min_lr=args.learning_rate_min) train_metrics = { "loss": TrainLoss(), "acc": Accuracy(), } eval_metrics = { "loss": Loss(criterion), "acc": Accuracy(), } learner = DARTSLearner(model, criterion, optimizer_arch, optimizer_model, scheduler, train_metrics=train_metrics, eval_metrics=eval_metrics, search_loader=valid_queue, grad_clip_norm=5.0, work_dir='models') for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) print(F.softmax(model.betas_normal[2:5], dim=-1)) # training train_acc, train_obj = train(learner, train_queue, epoch) logging.info('train_acc %f', train_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
net = eval(cfg.Model)(**cfg.get(cfg.Model)) criterion = CrossEntropyLoss(label_smoothing=cfg.get("label_smooth")) optimizer = get_optimizer(cfg.Optimizer, net) lr_scheduler = get_lr_scheduler(cfg.LRScheduler, optimizer) mix = get_mix(cfg.get("Mix")) metrics = { 'loss': TrainLoss(), 'acc': Accuracy(mix), } test_metrics = { 'loss': Loss(nn.CrossEntropyLoss()), 'acc': Accuracy(), } trainer = Trainer(net, criterion, optimizer, lr_scheduler, metrics, test_metrics, save_path=cfg.save_path, mix=mix) if args.resume: if args.resume == 'default': trainer.resume()