Ejemplo n.º 1
0
            self.done = False
            state = self.reset()
            while not self.done:
                action = self.action_space.sample()
                state, reward, self.done, _ = self.step(action)
                print('Reward: {:2.3f}, state: {}, action: {}'.format(
                    reward, state, action))
                self.render(True)
        cv2.destroyAllWindows()

    def create_window(self):
        cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(self.window_name, 300, 300)


# if __name__ == "__main__":
#     from rl.baselines import get_parameters, Trainer
#     import rl.environments
#     env = TestEnv(get_parameters('TestEnv'))
#
#     model = Trainer('TestEnv', 'models').create_model()
#     model._tensorboard()
#     model.train()
#     print('Training done')
#     input('Run trained model (Enter)')
#     env.create_window()
#     env.run(model)
from rl.baselines import get_parameters

env = TestEnv(get_parameters('TestEnv'))
Ejemplo n.º 2
0
    parser.add_argument('-c', '--config', type=str, default=None, help='Adusted configuration file located in config/custom folder')
    parser.print_help()
    args = parser.parse_args()
    path = pathlib.Path().absolute()

    trainer = Trainer(args.environment, args.subdir)

    if args.config is not None:
        try:
            config_path = join(path, 'rl', 'config', 'custom', '{}.yml'.format(args.config))
            with open(config_path) as f:
                config = yaml.safe_load(f)
            print('\nLoaded config file from: {}\n'.format(config_path))

        except:
            print('specified config is not in path, getting original config: {}.yml...'.format(args.environment))
            # load config and variables needed
            config = get_parameters(args.environment)
    else:
        config = get_parameters(args.environment)

    if args.model is not None:
        config['main']['model'] = args.model
    trainer.create_model(name=args.name, config_file=config)
    trainer._tensorboard()
    t0 = time.time()
    trainer.train()
    ts = time.time()
    print('Running time for training: {} minutes.'.format((ts-t0)/60))
    #trainer.run(1000)
    trainer._save()
Ejemplo n.º 3
0
            print('start state:', state)
            while not self.done:
                action = self.action_space.sample()
                state, reward, self.done, _ = self.step(action)
                print('action: {}, Reward: {:2.3f}, new actions: {}, new state: {}'.format(action, reward,
                                                                                           self.possible_actions,
                                                                                           state))




if __name__ == "__main__":
    from rl.baselines import get_parameters, Trainer
    import rl.environments

    env = PathPlanningEnv4(get_parameters('PathPlanningEnv4'))

    # #SAMPLE RANDOM ACTIONS
    print('Sampling random actions...')
    env.sample()

    # #TRAIN NEW MODEL (DOES NOT SAVE) AND SAMPLE ACTIONS FROM IT
    # model = Trainer('PathPlanningEnv4', 'models').create_model()
    # model._tensorboard()
    # model.train()
    # print('Training done')
    # input('Run trained model (Enter)')
    # env.run(model)

    # #LOAD IN TRAINED MODEL FOR SAMPLING ACTIONS
    # model = Trainer('PathPlanningEnv4', 'train5050').load_model(1)
Ejemplo n.º 4
0
		Run one timestep of the environment's dynamics. When end of
        episode is reached, call reset() to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object): an action provided by the agent
        
        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
		""""
        #return next_state, reward, terminate, info

	def render():
		""""
		Should render the observation based on the current state. (Pure visualization)
		""""

if __name__ == "__main__":
    from rl.baselines import get_parameters, Trainer
    import rl.environments
    env = custom_env(get_parameters('custom_env'))

    model = Trainer('custom_env', 'models').create_model()
    model._tensorboard()
    model.train()
    print('Training done')
    input('Run trained model (Enter)')
    env.create_window()
    env.run(model)
Ejemplo n.º 5
0
    def initializeEnv(self):
        """
        Initializes the actor and critic neural networks and variables related to training
        Can be called to reinitialize the network to it's original state
        """
        # Set random seed
        self.statusBox.setText('Creating environment...')
        s = self.parameters['Learning']['random_seed']
        from random import seed

        if s != 0:
            seed(s)
            tf.random.set_random_seed(s)

        # Create environment
        envName = self.envSelectionDropdown.currentText().strip()
        try:
            self.env = gym.make(envName)
        except:
            import rl
            from rl.baselines import get_parameters
            config = get_parameters(envName)
            self.env = getattr(rl.environments, envName)(config=config)

        # Show screen
        try:
            self.env.render(mode="human")
        except:
            pass

        self.env.reset()
        self.done = False

        self.gamma = self.parameters['Learning']['gamma']
        self.lam = self.parameters['Learning']['lambda']
        self.policy_logvar = self.parameters['Learning']['log_variance']
        self.trajectories = []

        self.obs = self.env.observation_space.shape[0]
        try:
            self.actions = self.env.action_space.shape[0]
            self.actionWidget.setYRange(self.env.action_space.low[0] - .4,
                                        self.env.action_space.high[0] + .4)
        except:
            self.actions = self.env.action_space.n
            self.discrete = True

        # Create the list of deques that is used for averaging out the outputs of the actor network
        # during training of the network
        self.testAction = [deque(maxlen=5) for _ in range(self.actions)]

        self.valueFunction = NNValueFunction(self.obs, self.actions,
                                             self.parameters['Learning'],
                                             self.parameters['Networks'])
        self.policy = Policy(self.obs, self.actions,
                             self.parameters['Learning'],
                             self.parameters['Networks'], self.policy_logvar)
        self.policyLoss = [0]
        self.episode = 0
        self.mean_reward = []
        self.sums = 0.0
        self.mean_actions = np.zeros(
            [self.parameters['Learning']['batch_size'], 3])
        self.scaler = Scaler(self.env.observation_space.shape[0])
        self.observes, self.rewards, self.unscaled_obs = None, None, None
        self.step = 0
        self.statusBox.setText('Created {} environment.'.format(envName))
        self.buttonStatus('initialized')
Ejemplo n.º 6
0
                while not self.done:
                    action = model.model.predict(state)
                    state, reward, self.done, _ = self.step(action[0])
                    print(
                        '   Episode {:2}, Step {:3}, Reward: {:.2f}, State: {}, Action: {:2}'.format(episode, step, reward,
                                                                                                     state[0], action[0]),
                        end='\r')
                    self.render()
                    step += 1
        except KeyboardInterrupt:
            pass

    def sample(self):
        """
        Sample random actions and run the environment
        """
        self.create_window()
        for _ in range(10):
            self.done = False
            state = self.reset()
            while not self.done:
                action = self.action_space.sample()
                state, reward, self.terminate, _ = self.step(action)
                print('Reward: {:2.3f}, state: {}, action: {}'.format(reward, state, action))
                self.render()
        cv2.destroyAllWindows()

from rl.baselines import get_parameters

env = simple_conveyor_1(get_parameters('simple_conveyor_1'))