def __init__(self, config): with open(config, 'r') as f: config = json.load(f) self.episode_length = config['train']['episode_length'] self.env = gym.make(config['model']['gym']) state_size = config['model']['state_size'] action_size = config['model']['action_size'] self.policy_net = Policy1D(state_size, action_size) self.value_net = Value1D(state_size) self.plosses = [] self.vlosses = [] self.stand_time = [] if torch.cuda.is_available(): self.policy_net.cuda() self.value_net.cuda() self.device = torch.device("cuda") print("Using GPU") else: self.device = torch.device("cpu") print("No GPU detected") self.write_interval = config['model']['write_interval'] self.train_info_path = config['model']['trainer_save_path'] self.policy_path = config['model']['policy_save_path'].split('.pt')[0] self.value_path = config['model']['value_save_path'].split('.pt')[0]
def __init__(self, config): with open(config, 'r') as f: config = json.load(f) self.env = gym.make(config['model']['gym']) self.mtcar = (config['model']['gym'] == 'MountainCar-v0') state_size = config['model']['state_size'] action_size = config['model']['action_size'] hidden_size = config['model']['hidden_size'] layer_size = config['model']['hidden_layers'] logheat = config['model']['logheat'] self.policy_net = Policy1D(state_size, action_size, hidden_size=hidden_size, layers=layer_size, logheat=logheat) if torch.cuda.is_available(): self.policy_net.cuda() self.value_net.cuda() self.device = torch.device("cuda") print("Using GPU") else: self.device = torch.device("cpu") print("No GPU detected") self.visual = True self.cut = config['train']['cutearly'] self.write_interval = config['model']['write_interval'] self.train_info_path = config['model']['trainer_save_path'] self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
def __init__(self, config): with open(config, 'r') as f: config = json.load(f) self.env = gym.make(config['model']['gym']) state_size = config['model']['state_size'] action_size = config['model']['action_size'] hidden_size = config['model']['hidden_size'] layer_size = config['model']['hidden_layers'] self.action_size = action_size self.policy_net = Policy1D(state_size, action_size, hidden_size=hidden_size, layers=layer_size) self.value_net = Value1D(state_size, hidden_size=hidden_size, layers=layer_size) if torch.cuda.is_available(): self.policy_net.cuda() self.value_net.cuda() self.device = torch.device("cuda") print("Using GPU") else: self.device = torch.device("cpu") print("No GPU detected") self.write_interval = config['model']['write_interval'] self.train_info_path = config['model']['trainer_save_path'] self.policy_path = config['model']['policy_save_path'].split('.pt')[0] self.value_path = config['model']['value_save_path'].split('.pt')[0] self.gif_path = config['model']['gif_save_path'].split('.gif')[0] self.graph_path = config['model']['graph_save_path']
def __init__(self, config): with open(config, 'r') as f: config = json.load(f) self.epochs = config['train']['epochs'] self.env_samples = config['train']['env_samples'] self.episode_length = config['train']['episode_length'] self.gamma = config['train']['gamma'] self.value_epochs = config['train']['value_epochs'] self.policy_epochs = config['train']['policy_epochs'] self.batch_size = config['train']['batch_size'] self.policy_batch_size = config['train']['policy_batch_size'] epsilon = config['train']['epsilon'] # self.env = gym.make(config['model']['gym']) env_factory = CartPoleEnvironmentFactory() self.env_threads=1 self.envs = [env_factory.new() for _ in range(self.env_threads)] rollouts_per_thread = self.env_samples // self.env_threads remainder = self.env_samples % self.env_threads self.rollout_nums = ([rollouts_per_thread + 1] * remainder) + ([rollouts_per_thread] * (self.env_threads - remainder)) state_size = config['model']['state_size'] action_size = config['model']['action_size'] self.action_size = action_size self.policy_net = Policy1D(state_size, action_size) # self.value_net = Value1D(state_size) self.value_loss = nn.MSELoss() self.ppoloss = PPOLoss(epsilon) self.ppo_low_bnd = 1 - epsilon self.ppo_up_bnd = 1 + epsilon betas = (config['train']['betas1'], config['train']['betas2']) weight_decay = config['train']['weight_decay'] lr = config['train']['lr'] # params = chain(self.policy_net.parameters(), self.value_net.parameters()) self.optim = optim.Adam(self.policy_net.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) self.plosses = [] self.vlosses = [] self.stand_time = [] if torch.cuda.is_available(): self.policy_net.cuda() self.value_net.cuda() self.device = torch.device("cuda") print("Using GPU") else: self.device = torch.device("cpu") print("No GPU detected") self.write_interval = config['model']['write_interval'] self.train_info_path = config['model']['trainer_save_path'] self.policy_path = config['model']['policy_save_path'].split('.pt')[0] self.value_path = config['model']['value_save_path'].split('.pt')[0]
def __init__(self, config): with open(config, 'r') as f: config = json.load(f) self.epochs = config['train']['epochs'] self.env_samples = config['train']['env_samples'] self.episode_length = config['train']['episode_length'] self.gamma = config['train']['gamma'] self.value_epochs = config['train']['value_epochs'] self.policy_epochs = config['train']['policy_epochs'] self.batch_size = config['train']['batch_size'] self.policy_batch_size = config['train']['policy_batch_size'] epsilon = config['train']['epsilon'] self.env = gym.make(config['model']['gym']) state_size = config['model']['state_size'] action_size = config['model']['action_size'] self.action_size = action_size self.policy_net = Policy1D(state_size, action_size) self.value_net = Value1D(state_size) self.value_loss = nn.MSELoss() self.ppoloss = PPOLoss(epsilon) policy_lr = config['train']['policy_lr'] value_lr = config['train']['value_lr'] policy_decay = config['train']['policy_decay'] value_decay = config['train']['value_decay'] self.policy_optim = optim.Adam(self.policy_net.parameters(), lr=policy_lr, weight_decay=policy_decay) self.value_optim = optim.Adam(self.value_net.parameters(), lr=value_lr, weight_decay=value_decay) self.plosses = [] self.vlosses = [] self.stand_time = [] if torch.cuda.is_available(): self.policy_net.cuda() self.value_net.cuda() self.device = torch.device("cuda") print("Using GPU") else: self.device = torch.device("cpu") print("No GPU detected") self.write_interval = config['model']['write_interval'] self.train_info_path = config['model']['trainer_save_path'] self.policy_path = config['model']['policy_save_path'].split('.pt')[0] self.value_path = config['model']['value_save_path'].split('.pt')[0]
def __init__(self, config): with open(config, 'r') as f: config = json.load(f) self.epochs = config['train']['epochs'] self.value_epochs = config['train']['value_epochs'] self.policy_epochs = config['train']['policy_epochs'] self.policy_batch_size = config['train']['policy_batch_size'] state_size = config['model']['state_size'] action_size = config['model']['action_size'] self.action_size = action_size self.policy_net = Policy1D(state_size, action_size) self.value_loss = nn.MSELoss() epsilon = config['train']['epsilon'] self.ppoloss = PPOLoss(epsilon) self.ppo_low_bnd = 1 - epsilon self.ppo_up_bnd = 1 + epsilon betas = (config['train']['betas1'], config['train']['betas2']) weight_decay = config['train']['weight_decay'] lr = config['train']['lr'] # params = chain(self.policy_net.parameters(), self.value_net.parameters()) self.optim = optim.Adam(self.policy_net.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) self.plosses = [] self.vlosses = [] self.avg_rewards = [] self.stand_time = [] if torch.cuda.is_available(): self.policy_net.cuda() self.value_net.cuda() self.device = torch.device("cuda") print("Using GPU") else: self.device = torch.device("cpu") print("No GPU detected") env = gym.make(config['model']['gym']) env_samples = config['train']['env_samples'] episode_length = config['train']['episode_length'] gamma = config['train']['gamma'] self.rollFact = RolloutFactory(env, config['model']['gym'], self.policy_net, env_samples, episode_length, gamma, cutearly=config['train']['cutearly']) self.write_interval = config['model']['write_interval'] self.train_info_path = config['model']['trainer_save_path'] self.policy_path = config['model']['policy_save_path'].split('.pt')[0] self.value_path = config['model']['value_save_path'].split('.pt')[0] self.graph_path = config['model']['graph_save_path'].split('.png')[0]