Ejemplo n.º 1
0
  def __init__(self, config):
    with open(config, 'r') as f:
      config = json.load(f)

    self.episode_length = config['train']['episode_length']

    self.env = gym.make(config['model']['gym'])

    state_size = config['model']['state_size']
    action_size = config['model']['action_size']
    self.policy_net = Policy1D(state_size, action_size)
    self.value_net = Value1D(state_size)

    self.plosses = []
    self.vlosses = []
    self.stand_time = []

    if torch.cuda.is_available():
      self.policy_net.cuda()
      self.value_net.cuda()
      self.device = torch.device("cuda")
      print("Using GPU")
    else:
      self.device = torch.device("cpu")
      print("No GPU detected")

    self.write_interval = config['model']['write_interval']
    self.train_info_path = config['model']['trainer_save_path']
    self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
    self.value_path = config['model']['value_save_path'].split('.pt')[0]
Ejemplo n.º 2
0
    def __init__(self, config):
        with open(config, 'r') as f:
            config = json.load(f)

        self.env = gym.make(config['model']['gym'])
        self.mtcar = (config['model']['gym'] == 'MountainCar-v0')

        state_size = config['model']['state_size']
        action_size = config['model']['action_size']
        hidden_size = config['model']['hidden_size']
        layer_size = config['model']['hidden_layers']
        logheat = config['model']['logheat']
        self.policy_net = Policy1D(state_size,
                                   action_size,
                                   hidden_size=hidden_size,
                                   layers=layer_size,
                                   logheat=logheat)

        if torch.cuda.is_available():
            self.policy_net.cuda()
            self.value_net.cuda()
            self.device = torch.device("cuda")
            print("Using GPU")
        else:
            self.device = torch.device("cpu")
            print("No GPU detected")

        self.visual = True
        self.cut = config['train']['cutearly']
        self.write_interval = config['model']['write_interval']
        self.train_info_path = config['model']['trainer_save_path']
        self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
Ejemplo n.º 3
0
    def __init__(self, config):
        with open(config, 'r') as f:
            config = json.load(f)

        self.env = gym.make(config['model']['gym'])

        state_size = config['model']['state_size']
        action_size = config['model']['action_size']
        hidden_size = config['model']['hidden_size']
        layer_size = config['model']['hidden_layers']
        self.action_size = action_size
        self.policy_net = Policy1D(state_size,
                                   action_size,
                                   hidden_size=hidden_size,
                                   layers=layer_size)
        self.value_net = Value1D(state_size,
                                 hidden_size=hidden_size,
                                 layers=layer_size)

        if torch.cuda.is_available():
            self.policy_net.cuda()
            self.value_net.cuda()
            self.device = torch.device("cuda")
            print("Using GPU")
        else:
            self.device = torch.device("cpu")
            print("No GPU detected")

        self.write_interval = config['model']['write_interval']
        self.train_info_path = config['model']['trainer_save_path']
        self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
        self.value_path = config['model']['value_save_path'].split('.pt')[0]
        self.gif_path = config['model']['gif_save_path'].split('.gif')[0]
        self.graph_path = config['model']['graph_save_path']
Ejemplo n.º 4
0
  def __init__(self, config):
    with open(config, 'r') as f:
      config = json.load(f)

    self.epochs = config['train']['epochs']
    self.env_samples = config['train']['env_samples']
    self.episode_length = config['train']['episode_length']
    self.gamma = config['train']['gamma']
    self.value_epochs = config['train']['value_epochs']
    self.policy_epochs = config['train']['policy_epochs']
    self.batch_size = config['train']['batch_size']
    self.policy_batch_size = config['train']['policy_batch_size']
    epsilon = config['train']['epsilon']

    # self.env = gym.make(config['model']['gym'])
    env_factory = CartPoleEnvironmentFactory()
    self.env_threads=1
    self.envs = [env_factory.new() for _ in range(self.env_threads)]
    rollouts_per_thread = self.env_samples // self.env_threads
    remainder = self.env_samples % self.env_threads
    self.rollout_nums = ([rollouts_per_thread + 1] * remainder) + ([rollouts_per_thread] * (self.env_threads - remainder))

    state_size = config['model']['state_size']
    action_size = config['model']['action_size']
    self.action_size = action_size
    self.policy_net = Policy1D(state_size, action_size)
    # self.value_net = Value1D(state_size)

    self.value_loss = nn.MSELoss()
    self.ppoloss = PPOLoss(epsilon)
    self.ppo_low_bnd = 1 - epsilon
    self.ppo_up_bnd = 1 + epsilon

    betas = (config['train']['betas1'], config['train']['betas2'])
    weight_decay = config['train']['weight_decay']
    lr = config['train']['lr']
    # params = chain(self.policy_net.parameters(), self.value_net.parameters())
    self.optim = optim.Adam(self.policy_net.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)

    self.plosses = []
    self.vlosses = []
    self.stand_time = []

    if torch.cuda.is_available():
      self.policy_net.cuda()
      self.value_net.cuda()
      self.device = torch.device("cuda")
      print("Using GPU")
    else:
      self.device = torch.device("cpu")
      print("No GPU detected")

    self.write_interval = config['model']['write_interval']
    self.train_info_path = config['model']['trainer_save_path']
    self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
    self.value_path = config['model']['value_save_path'].split('.pt')[0]
Ejemplo n.º 5
0
  def __init__(self, config):
    with open(config, 'r') as f:
      config = json.load(f)

    self.epochs = config['train']['epochs']
    self.env_samples = config['train']['env_samples']
    self.episode_length = config['train']['episode_length']
    self.gamma = config['train']['gamma']
    self.value_epochs = config['train']['value_epochs']
    self.policy_epochs = config['train']['policy_epochs']
    self.batch_size = config['train']['batch_size']
    self.policy_batch_size = config['train']['policy_batch_size']
    epsilon = config['train']['epsilon']

    self.env = gym.make(config['model']['gym'])

    state_size = config['model']['state_size']
    action_size = config['model']['action_size']
    self.action_size = action_size
    self.policy_net = Policy1D(state_size, action_size)
    self.value_net = Value1D(state_size)

    self.value_loss = nn.MSELoss()
    self.ppoloss = PPOLoss(epsilon)

    policy_lr = config['train']['policy_lr']
    value_lr = config['train']['value_lr']
    policy_decay = config['train']['policy_decay']
    value_decay = config['train']['value_decay']
    self.policy_optim = optim.Adam(self.policy_net.parameters(), lr=policy_lr, weight_decay=policy_decay)
    self.value_optim = optim.Adam(self.value_net.parameters(), lr=value_lr, weight_decay=value_decay)

    self.plosses = []
    self.vlosses = []
    self.stand_time = []

    if torch.cuda.is_available():
      self.policy_net.cuda()
      self.value_net.cuda()
      self.device = torch.device("cuda")
      print("Using GPU")
    else:
      self.device = torch.device("cpu")
      print("No GPU detected")

    self.write_interval = config['model']['write_interval']
    self.train_info_path = config['model']['trainer_save_path']
    self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
    self.value_path = config['model']['value_save_path'].split('.pt')[0]
Ejemplo n.º 6
0
    def __init__(self, config):
        with open(config, 'r') as f:
            config = json.load(f)

        self.epochs = config['train']['epochs']
        self.value_epochs = config['train']['value_epochs']
        self.policy_epochs = config['train']['policy_epochs']
        self.policy_batch_size = config['train']['policy_batch_size']

        state_size = config['model']['state_size']
        action_size = config['model']['action_size']
        self.action_size = action_size
        self.policy_net = Policy1D(state_size, action_size)

        self.value_loss = nn.MSELoss()

        epsilon = config['train']['epsilon']
        self.ppoloss = PPOLoss(epsilon)
        self.ppo_low_bnd = 1 - epsilon
        self.ppo_up_bnd = 1 + epsilon

        betas = (config['train']['betas1'], config['train']['betas2'])
        weight_decay = config['train']['weight_decay']
        lr = config['train']['lr']
        # params = chain(self.policy_net.parameters(), self.value_net.parameters())
        self.optim = optim.Adam(self.policy_net.parameters(),
                                lr=lr,
                                betas=betas,
                                weight_decay=weight_decay)

        self.plosses = []
        self.vlosses = []
        self.avg_rewards = []
        self.stand_time = []

        if torch.cuda.is_available():
            self.policy_net.cuda()
            self.value_net.cuda()
            self.device = torch.device("cuda")
            print("Using GPU")
        else:
            self.device = torch.device("cpu")
            print("No GPU detected")

        env = gym.make(config['model']['gym'])
        env_samples = config['train']['env_samples']
        episode_length = config['train']['episode_length']
        gamma = config['train']['gamma']
        self.rollFact = RolloutFactory(env,
                                       config['model']['gym'],
                                       self.policy_net,
                                       env_samples,
                                       episode_length,
                                       gamma,
                                       cutearly=config['train']['cutearly'])

        self.write_interval = config['model']['write_interval']
        self.train_info_path = config['model']['trainer_save_path']
        self.policy_path = config['model']['policy_save_path'].split('.pt')[0]
        self.value_path = config['model']['value_save_path'].split('.pt')[0]
        self.graph_path = config['model']['graph_save_path'].split('.png')[0]