Esempio n. 1
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()
    def run(self):
        global agent, rl_environment
        if self.agent_name == self.DqnAgentNameString:
            rl_environment = gym.make('CartPole-v0').unwrapped
            agent = DqnAgent(
                rl_environment=rl_environment,
                plot_environment_statistics=self.plot_environment_statistics)
        elif self.agent_name == self.DdpgAgentNameString:
            rl_environment = gym.make('Pendulum-v0').env
            agent = Ddpg(
                rl_environment=rl_environment,
                plot_environment_statistics=self.plot_environment_statistics)
        elif self.agent_name == self.A3CAgentNameString:
            rl_environment = gym.make('Pendulum-v0').unwrapped
            agent = A3CMain(
                rl_environment=rl_environment,
                plot_environment_statistics=self.plot_environment_statistics)
            agent.initialize_workers()
        elif self.agent_name == self.A2CAgentNameString:
            rl_environment = gym.make("CartPole-v0").env
            agent = A2C(
                rl_environment=rl_environment,
                plot_environment_statistics=self.plot_environment_statistics)
        elif self.agent_name == self.A2CMultiAgentNameString:
            rl_environment = gym.make("CartPole-v0")
            agent = A2CMultiAgent(
                rl_environment=rl_environment,
                plot_environment_statistics=self.plot_environment_statistics)
        elif self.agent_name == self.TRPOAgentNameString:
            rl_environment = gym.make("Pendulum-v0").unwrapped
            agent = TRPO(
                rl_environment=rl_environment,
                plot_environment_statistics=self.plot_environment_statistics)

        agent.train(rl_environment)
        agent.test_agent(rl_environment)
Esempio n. 3
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env)

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if(args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir,
        args.type,
        args.env,
        args.nb_episodes,
        args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
#target_fixation_time = 2

# Configuration parameters
save_variables = 0
save_figures = 0

hidden_representations = np.zeros([2,10])
""" RUN """
for learning_rate in learning_rates:

    # define empty states as initial values
    empty_state_memory = np.zeros([num_steps_unrolled, input_size])  # for fixation period
    shortempty_state = np.zeros(10)

    # create instance of Advantage Actor Critic
    algorithm = A2C(a_size, input_size, num_lstm_units, num_steps_unrolled, gamma, learning_rate, print_summary=True)

    # empty variables for storing actions, rewards, states and trial indices
    reward_pertrial_matrix = np.zeros([num_episode_train, num_trial_per_episode])
    rewarded_image = np.zeros([num_episode_train, num_trial_per_episode])
    selected_image = np.zeros([num_episode_train, num_trial_per_episode])
    discounted_rewards = np.zeros([num_episode_train, num_trial_per_episode])
    state_values = []
    action_values = np.zeros([num_episode_train, num_trial_per_episode, a_size])
    run_duration = 0
    model = []

    time_start = time.time()
    state_memory = np.array(empty_state_memory)

    # run episodes
Esempio n. 5
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high

    else:
        if args.env == 'cell':
            #do this
            env = Environment(opticalTweezers(), args.consecutive_frames)
            # env=opticalTweezers(consecutive_frames=args.consecutive_frames)
            env.reset()
            state_dim = (6, )
            action_dim = 4  #note that I have to change the reshape code for a 2d agent # should be 4
        else:
            # Standard Environments
            env = Environment(gym.make(args.env), args.consecutive_frames)
            env.reset()
            state_dim = env.get_state_size()
            print(state_dim)
            action_dim = gym.make(args.env).action_space.n
            print(action_dim)
    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    # all_old_states=[old_state for i in range(args.consecutive_frames)]
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()