def train(hidden_dim_sweep=(5, 10, 25), n_epochs=20, out_dir='out', data_dir='data', device=util.device(), Optimizer=optim.Adam, seed=42): out_dir, data_dir = map(Path, (out_dir, data_dir)) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) tracess = [] best_trainer = None best_loss = util.INF vocab = util.Vocab.load(data_dir / 'vocab.txt') for hidden_dim in hidden_dim_sweep: model = Model(hidden_dim=hidden_dim, vocab=vocab, out_dim=2) loss_fn = nn.CrossEntropyLoss() optimizer = Optimizer(model.parameters(), lr=1e-4) trainer = Trainer(model, loss_fn, vocab, device) traces, loss_cur = trainer.train_loop(data_dir=data_dir, n_epochs=n_epochs, optimizer=optimizer, scheduler=None) if loss_cur < best_loss: best_trainer = trainer best_loss = loss_cur tracess.append((hidden_dim, traces)) out_dir.mkdir(exist_ok=True) for h, traces in tracess: plotting.plot_traces(traces, out=out_dir / f'traces_{h}.png', title=f'Loss,hidden_dim={h}') util.jsondump(traces, out_dir / f'traces.dim_{h}.seed_{seed}.json') L.info('Best model loss: %s', best_loss) model_file = out_dir / 'model.pt' L.info('Saving best model to %s', model_file) torch.save(best_trainer.model.state_dict(), model_file)
print('Length of training and validation set are {} {}'.format( len(t_dataset), len(v_dataset))) trainloader=DataLoader(t_dataset, batch_size=32, shuffle=True, num_workers=8) validloader=DataLoader(v_dataset, batch_size=32, shuffle=False, num_workers=8) """ Training """ # model = ResNetModel() # model = EfficientModelwithoutMeta() model = Model(arch='efficientnet-b2') # model = EfficientModel(n_meta_features=len(meta_features)) model.to(device) # model = nn.DataParallel(model) criterion=nn.BCEWithLogitsLoss() optimizer=torch.optim.AdamW(model.parameters(), lr=3e-4, betas=(0.9, 0.999)) scheduler=torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=3e-4, div_factor=10, pct_start=1 / epochs, steps_per_epoch=len(trainloader), epochs=epochs) writer = SummaryWriter(f'../checkpoint/fold_{fold}/efficient_{resolution}') print(f'Training Started Fold_{fold}') training_loss = [] validation_loss = [] c_acc = 0.0 for epoch in range(epochs): start_time = time.time() train_prob = [] valid_prob = []