self.done = False state = self.reset() while not self.done: action = self.action_space.sample() state, reward, self.done, _ = self.step(action) print('Reward: {:2.3f}, state: {}, action: {}'.format( reward, state, action)) self.render(True) cv2.destroyAllWindows() def create_window(self): cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(self.window_name, 300, 300) # if __name__ == "__main__": # from rl.baselines import get_parameters, Trainer # import rl.environments # env = TestEnv(get_parameters('TestEnv')) # # model = Trainer('TestEnv', 'models').create_model() # model._tensorboard() # model.train() # print('Training done') # input('Run trained model (Enter)') # env.create_window() # env.run(model) from rl.baselines import get_parameters env = TestEnv(get_parameters('TestEnv'))
parser.add_argument('-c', '--config', type=str, default=None, help='Adusted configuration file located in config/custom folder') parser.print_help() args = parser.parse_args() path = pathlib.Path().absolute() trainer = Trainer(args.environment, args.subdir) if args.config is not None: try: config_path = join(path, 'rl', 'config', 'custom', '{}.yml'.format(args.config)) with open(config_path) as f: config = yaml.safe_load(f) print('\nLoaded config file from: {}\n'.format(config_path)) except: print('specified config is not in path, getting original config: {}.yml...'.format(args.environment)) # load config and variables needed config = get_parameters(args.environment) else: config = get_parameters(args.environment) if args.model is not None: config['main']['model'] = args.model trainer.create_model(name=args.name, config_file=config) trainer._tensorboard() t0 = time.time() trainer.train() ts = time.time() print('Running time for training: {} minutes.'.format((ts-t0)/60)) #trainer.run(1000) trainer._save()
print('start state:', state) while not self.done: action = self.action_space.sample() state, reward, self.done, _ = self.step(action) print('action: {}, Reward: {:2.3f}, new actions: {}, new state: {}'.format(action, reward, self.possible_actions, state)) if __name__ == "__main__": from rl.baselines import get_parameters, Trainer import rl.environments env = PathPlanningEnv4(get_parameters('PathPlanningEnv4')) # #SAMPLE RANDOM ACTIONS print('Sampling random actions...') env.sample() # #TRAIN NEW MODEL (DOES NOT SAVE) AND SAMPLE ACTIONS FROM IT # model = Trainer('PathPlanningEnv4', 'models').create_model() # model._tensorboard() # model.train() # print('Training done') # input('Run trained model (Enter)') # env.run(model) # #LOAD IN TRAINED MODEL FOR SAMPLING ACTIONS # model = Trainer('PathPlanningEnv4', 'train5050').load_model(1)
Run one timestep of the environment's dynamics. When end of episode is reached, call reset() to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). Args: action (object): an action provided by the agent Returns: observation (object): agent's observation of the current environment reward (float) : amount of reward returned after previous action done (bool): whether the episode has ended, in which case further step() calls will return undefined results info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) """" #return next_state, reward, terminate, info def render(): """" Should render the observation based on the current state. (Pure visualization) """" if __name__ == "__main__": from rl.baselines import get_parameters, Trainer import rl.environments env = custom_env(get_parameters('custom_env')) model = Trainer('custom_env', 'models').create_model() model._tensorboard() model.train() print('Training done') input('Run trained model (Enter)') env.create_window() env.run(model)
def initializeEnv(self): """ Initializes the actor and critic neural networks and variables related to training Can be called to reinitialize the network to it's original state """ # Set random seed self.statusBox.setText('Creating environment...') s = self.parameters['Learning']['random_seed'] from random import seed if s != 0: seed(s) tf.random.set_random_seed(s) # Create environment envName = self.envSelectionDropdown.currentText().strip() try: self.env = gym.make(envName) except: import rl from rl.baselines import get_parameters config = get_parameters(envName) self.env = getattr(rl.environments, envName)(config=config) # Show screen try: self.env.render(mode="human") except: pass self.env.reset() self.done = False self.gamma = self.parameters['Learning']['gamma'] self.lam = self.parameters['Learning']['lambda'] self.policy_logvar = self.parameters['Learning']['log_variance'] self.trajectories = [] self.obs = self.env.observation_space.shape[0] try: self.actions = self.env.action_space.shape[0] self.actionWidget.setYRange(self.env.action_space.low[0] - .4, self.env.action_space.high[0] + .4) except: self.actions = self.env.action_space.n self.discrete = True # Create the list of deques that is used for averaging out the outputs of the actor network # during training of the network self.testAction = [deque(maxlen=5) for _ in range(self.actions)] self.valueFunction = NNValueFunction(self.obs, self.actions, self.parameters['Learning'], self.parameters['Networks']) self.policy = Policy(self.obs, self.actions, self.parameters['Learning'], self.parameters['Networks'], self.policy_logvar) self.policyLoss = [0] self.episode = 0 self.mean_reward = [] self.sums = 0.0 self.mean_actions = np.zeros( [self.parameters['Learning']['batch_size'], 3]) self.scaler = Scaler(self.env.observation_space.shape[0]) self.observes, self.rewards, self.unscaled_obs = None, None, None self.step = 0 self.statusBox.setText('Created {} environment.'.format(envName)) self.buttonStatus('initialized')
while not self.done: action = model.model.predict(state) state, reward, self.done, _ = self.step(action[0]) print( ' Episode {:2}, Step {:3}, Reward: {:.2f}, State: {}, Action: {:2}'.format(episode, step, reward, state[0], action[0]), end='\r') self.render() step += 1 except KeyboardInterrupt: pass def sample(self): """ Sample random actions and run the environment """ self.create_window() for _ in range(10): self.done = False state = self.reset() while not self.done: action = self.action_space.sample() state, reward, self.terminate, _ = self.step(action) print('Reward: {:2.3f}, state: {}, action: {}'.format(reward, state, action)) self.render() cv2.destroyAllWindows() from rl.baselines import get_parameters env = simple_conveyor_1(get_parameters('simple_conveyor_1'))