import utils import model import torch import torch.nn as nn import time import os import data import math import pickle from loguru import logger args = utils.get_train_parser() torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: logger.warning( "You have a CUDA device, so you should probably run with --cuda") device = torch.device('cuda' if args.cuda else 'cpu') def train(_model, criterion, train_data, ntokens, learning_rate, epoch): _model.train() total_loss = .0 start_time = time.time() hidden = _model.init_hidden(args.batch_size) for batch, i in enumerate( range(0, train_data.size(0) - 1, args.sequence_length)): data, targets = utils.get_batch(
from pgdrive import PGDriveEnv from ray import tune from utils import train, get_train_parser if __name__ == '__main__': args = get_train_parser().parse_args() exp_name = "main_ppo" stop = int(10000000) config = dict( env=PGDriveEnv, env_config=dict( environment_num=tune.grid_search([1, 3, 6, 15, 40, 100, 1000]), start_seed=tune.grid_search([5000, 6000, 7000, 8000, 9000]), ), # ===== Evaluation ===== evaluation_interval=5, evaluation_num_episodes=20, evaluation_config=dict( env_config=dict(environment_num=200, start_seed=0)), evaluation_num_workers=2, metrics_smoothing_episodes=20, # ===== Training ===== horizon=1000, num_sgd_iter=20, lr=5e-5, rollout_fragment_length=200,
optimizer.step() epoch_loss = epoch_metrics['loss'].item() log_metrics(epoch_metrics, writer, phase, epoch) # deep copy the model if phase == 'val' and epoch_loss < best_loss: best_loss = epoch_loss now = datetime.datetime.now() torch.save( model.state_dict(), save_dir / f"{now.month}{now.day}{now.hour}{now.minute}_{best_loss}") best_model = copy.deepcopy(model.state_dict()) writer.close() now = datetime.datetime.now() torch.save( model.state_dict(), save_dir / f"end_{now.month}{now.day}{now.hour}{now.minute}_{best_loss}") # load best model weights model.load_state_dict(best_model) now = datetime.datetime.now() torch.save(model.state_dict(), save_dir / "best") if __name__ == '__main__': parser = get_train_parser() args = parser.parse_args() train_model(args)