target_network = Mlp( input_size=obs_dim + action_dim, output_size=1, hidden_sizes=[M, M], ).to(device) for param in target_network.parameters(): param.requires_grad = False optimizer = optim.Adam(network.parameters(), lr=args.lr) best_loss = np.Inf for epoch in range(args.epochs): t_loss = train(network, target_network, dataloader, optimizer, epoch, use_cuda) if use_tb: logger.add_scalar(log_dir + '/train-loss', t_loss, epoch) if t_loss < best_loss: best_loss = t_loss file_name = 'models/{}_{}.pt'.format(timestamp, args.env) print('Writing model checkpoint, loss:{:.2g}'.format(t_loss)) print('Writing model checkpoint : {}'.format(file_name)) torch.save( { 'epoch': epoch + 1, 'network_state_dict': network.state_dict(), 'target_state_dict': target_network.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': t_loss }, file_name)
checkpoint = torch.load(args.load_model) network.load_state_dict(checkpoint['network_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) t_loss = checkpoint['train_loss'] epch = checkpoint['epoch'] print('Loading model: {}. Resuming from epoch: {}'.format( args.load_model, epch)) else: print('Model: {} not found'.format(args.load_model)) best_loss = np.Inf for epoch in range(epch, args.epochs): t_loss = train(network, dataloader, optimizer, epoch, device) print('=> epoch: {} Average Train loss: {:.4f}'.format(epoch, t_loss)) if use_tb: logger.add_scalar(log_dir + '/train-loss', t_loss, epoch) if t_loss < best_loss: best_loss = t_loss file_name = 'models/bc_{}_{}.pt'.format(timestamp, args.env) print('Writing model checkpoint, loss:{:.2g}'.format(t_loss)) print('Writing model checkpoint : {}'.format(file_name)) torch.save( { 'epoch': epoch + 1, 'network_state_dict': network.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': t_loss }, file_name)