Esempio n. 1
0
def make_bot(un, pw, expected_opponent, team, challenge, trainer, epsilon=None, 
	model_path=None, target_model_path=None
):
	
	if trainer:
		if model_path:
			agent = DQNAgent(INPUT_SHAPE, training=False)
		else:
			agent = RandomAgent()
	else:
		agent = DQNAgent(
			INPUT_SHAPE, epsilon=epsilon, random_moves=True, training=False, 
			copy_target_model=False
		)
		agent.load_model(model_path)
		if target_model_path != None:
			agent.target_model = load_model(target_model_path)
		else:
			agent.target_model.set_weights(agent.model.get_weights())

	bot = BotClient(
		name=un, password=pw, expected_opponent=expected_opponent, team=team, 
		challenge=challenge, runType=RunType.Iterations, runTypeData=1, 
		agent=agent, trainer=trainer, save_model=False, 
		should_write_replay=(not trainer)
	)
	bot.start()
Esempio n. 2
0
def main():
    # parser = argparse.ArgumentParser(description='Run DQN on Atari SpaceInvaders')
    # parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name')
    # parser.add_argument(
    #     '-o', '--output', default='SpaceInvaders-v0', help='Directory to save data to')
    # parser.add_argument('--seed', default=0, type=int, help='Random seed')
    # # parser.add_argument('--input_shape', default=(84, 84, 4), type=tuple, help='Size of each frame')
    #
    # args = parser.parse_args()
    #
    # args.output = get_output_folder(args.output, args.env)

    #vehicle_network
    veh_network = create_lstm_model(nb_time_steps,
                                    nb_input_vector,
                                    num_actions=g1)
    #Attacker network
    att_network = create_lstm_model(nb_time_steps,
                                    nb_input_vector,
                                    num_actions=gym.make(
                                        args.env).action_space.n)
    veh_agent = DQNAgent(q_network=veh_network,
                         preprocessor=core.Preprocessor(),
                         memory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    att_agent = DQNAgent(q_network=att_network,
                         preprocessor=core.Preprocessor(),
                         memory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    veh_agent.compile('Adam', 'mse')
    att_agent.compile('Adam', 'mse')
    env = VehicleFollowingENV
    for i_episode in range(20):
        agent.fit(env, 10**6)
    # env.close()
    model_json = q_network.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
Esempio n. 3
0
def play_it():
    #ENV_NAME = 'CartPole-v0'
    #ENV_NAME = 'MountainCar-v0'
    ENV_NAME = 'Single_virtual-v0'
    # Get the environment and extract the number of actions.
    env = make(ENV_NAME)
    env1 = make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = build_model(nb_actions,env.observation_space)
    # model = build_model1(nb_actions, env.observation_space)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy,)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME+ datetime.now().strftime("%Y%m%d-%H%M%S"))), overwrite=True)
    # dqn.load_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME)))
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env1, nb_episodes=5, visualize=True)
Esempio n. 4
0
    def predict_dqn(self):
        # get size of state and action from environment
        state_size = 4
        action_size = 2

        agent = DQNAgent(state_size, action_size, load_model=True)

        done = False
        score = 0

        self.reset()
        state, _, _, _ = self.step(-1)
        state = np.reshape(state, [1, state_size])

        while not done:
            # get action for the current state and go one step in environment
            action = agent.get_action(state)
            next_state, reward, done, info = self.step(action)
            next_state = np.reshape(next_state, [1, state_size])

            score += reward
            state = next_state

            if done or score >= 500:
                print("score:", score)
                break
Esempio n. 5
0
 def __init__(self, host, port):
     self.state_size = 3
     self.action_size = 7
     self.done = False
     self.batch_size = 32
     self.agent = DQNAgent(self.state_size, self.action_size)
     self.state_now = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                 [1, self.state_size])
     self.state_last = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                  [1, self.state_size])
     self.action_for_next = 0
     self.action_for_now = 0
     self.reward = 0
     self.forward = "T394"
     self.left = "S450"
     self.right = "S270"
     self.backward = "T330"
     self.stop = "T370"
     self.middle = "S360"
     #dqn parameters
     self.server_socket = socket.socket()
     self.server_socket.bind((host, port))
     self.server_socket.listen(0)
     self.connection, self.client_address = self.server_socket.accept()
     self.connection = self.connection.makefile("rb")
     self.host_name = socket.gethostname()
     self.host_ip = socket.gethostbyname(self.host_name)
     self.temp_result = None
     self.finnal_result = None
     self.RANGE = 350
     self.WIDTH = 720
     self.time_now = 0
     self.count = 0
     self.streaming()
Esempio n. 6
0
    def __init__(self, config):
        # Create session to store trained parameters
        self.session = tf.Session()

        self.action_count = config["action_count"]

        # Create agent for training
        self.agent = DQNAgent(self.action_count)

        # Create memory to store observations
        self.memory = ExperienceMemory(config["replay_memory_size"])

        # Tools for saving and loading networks
        self.saver = tf.train.Saver()

        # Last action that agent performed
        self.last_action_index = None

        # Deque to keep track of average reward and play time
        self.game_history = GameHistory(config["match_memory_size"])

        # Deque to store losses
        self.episode_history = EpisodeHistory(config["replay_memory_size"])

        self.INITIAL_EPSILON = config["initial_epsilon"]
        self.FINAL_EPSILON = config["final_epsilon"]
        self.OBSERVE = config["observe_step_count"]
        self.EXPLORE = config["explore_step_count"]
        self.FRAME_PER_ACTION = config["frame_per_action"]
        self.GAMMA = config["gamma"]
        self.LOG_PERIOD = config["log_period"]
        self.BATCH_SIZE = config["batch_size"]
Esempio n. 7
0
def test_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    agent.load(args.save_dir)
    for _ in range(10):
        agent.test_one_episode(True)
Esempio n. 8
0
    def run(self):
        ### create TORCS environment
        env = TorcsEnv(vision=False, throttle=True)   

        ### start run according to supplied arguments
        if self.algorithm == "dqn" and self.modus == "train":
            agent = DQNAgent(env, self.track, self.numOfEpisodes)
            agent.trainAgent()
        elif self.algorithm == "dqn" and self.modus == "test":
            agent = DQNAgent(env, self.track, self.numOfEpisodes)
            agent.testAgent()
        elif self.algorithm == "ddpg" and self.modus == "train":
            agent = DDPGAgent(env, self.track, self.numOfEpisodes)
            agent.trainAgent()
        elif self.algorithm == "ddpg" and self.modus == "test":
            agent = DDPGAgent(env, self.track, self.numOfEpisodes)
            agent.testAgent()
Esempio n. 9
0
    def get_agent(env, **kwargs):
        replay_capacity = 1e6
        n_episodes = 10e7

        return DQNAgent(env=env or gym.make('CartPole-v0'),
                        n_episodes=n_episodes,
                        replay_capacity=replay_capacity,
                        **kwargs)
Esempio n. 10
0
def train_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    pre_best = -1e9
    for ep in range(args.max_ep):
        agent.train_one_episode()
        if ep % args.test_interval == 0:
            r = agent.test_model()
            if r > pre_best:
                pre_best = r
                agent.save(args.save_dir)
Esempio n. 11
0
def main():
    train_data, parameter[1]["episode_length"] = data_prepare(parameter)
    parameter[2]['action_size'], parameter[2][
        'state_size'], state, env = create_states(parameter, train_data)
    #create model
    agent = DQNAgent(parameter)
    #train model and save
    train(agent, parameter, state, env)
    caculation(agent, env)

    #test model
    parameter[0]["mode"] = 'test'
    test_data = data_prepare(parameter)[0]
    test_env = create_states(parameter, test_data)[3]
    caculation(agent, test_env)
Esempio n. 12
0
 async def on_challenge_update(self, challenge_data):
     incoming = challenge_data.get('challengesFrom', {})
     if self.expected_opponent.lower() in incoming:
         if self.trainer:
             model_paths = [
                 os.path.join(self.logs_dir, content)
                 for content in os.listdir(self.logs_dir) if
                 content.endswith('.model') and content.startswith('Epoch')
             ]
             if len(model_paths) > 0:
                 sorted_model_paths = sorted(
                     model_paths,
                     key=lambda x: int(
                         os.path.basename(x).lstrip('Epoch').rstrip('.model'
                                                                    )))
                 model_to_load = sorted_model_paths[-1]
                 self.log(f'Loading model {model_to_load}')
                 self.agent = DQNAgent(INPUT_SHAPE, training=False)
                 self.agent.load_model(model_to_load)
         await self.accept_challenge(self.expected_opponent, self.team_text)
Esempio n. 13
0
    def __init__(self, player_name=None, letter=None):
        if player_name is None:
            self.player_name = common_utils.get_random_name()
        else:
            self.player_name = player_name

        if letter is not None:
            self.letter = letter
        else:
            pass
            # TODO: Handle this

        if letter == 'X':
            self.enemy_letter = 'O'
        else:
            self.enemy_letter = 'X'

        logger.debug("Initializing player {} with letter {} ...".format(
            self.player_name, self.letter))

        self.agent = DQNAgent()
Esempio n. 14
0
def load_model(MODEL_TYPE):
    curr_model = None
    if MODEL_TYPE == "SVM":
        print("LOADING SVM...")
        curr_model = load("svm.joblib")
    elif MODEL_TYPE == "LR":
        print("LOADING LR...")
        lr = LogReg(74)  #(env.matches.shape[1])
        lr.load_weights("weights/weights-improvement-100-0.31.hdf5")
        curr_model = lr
    elif MODEL_TYPE == "DT":
        print("LOADING DT...")
        curr_model = load("dt.joblib")
    elif MODEL_TYPE == "GB":
        print("LOADING GB...")
        curr_model = load("gb.joblib")
    elif MODEL_TYPE == "RF":
        print("LOADING RF...")
        curr_model = load("rfc.joblib")
    elif MODEL_TYPE == "NB":
        print("LOADING NB...")
        curr_model = load("nb.joblib")
    elif MODEL_TYPE == "AB":
        print("LOADING AB...")
        curr_model = load("ab.joblib")
    elif MODEL_TYPE == "DQN":
        print("LOADING DQN...")
        BetNet = DQNAgent(75)
        BetNet.load("weights/betnet-weights-dqn.h5")
        curr_model = BetNet
    else:
        print("LOADING NN...")
        BetNet = Network(74)  #(env.matches.shape[1])
        BetNet.load_weights(
            'weights/Adadelta/test9_400_Best/weights-improvement-400-0.48.hdf5'
        )  #PCA("weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5")  # Most recent weights
        curr_model = BetNet
    return curr_model
Esempio n. 15
0
    def simulateGustsControl(self):
        '''
        Simulate the response of the controller to gusts.

        :return: A plot of the simulation.
        '''
        self.sim_time = 100
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = 0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        wind_heading = np.ones(0)

        for time in range(self.sim_time):
            WH = self.wh.generateWind()
            if time == 20:
                WH = self.wh.generateGust(10 * TORAD)
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])
            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        f, axarr = plt.subplots(2, sharex=True)
        axarr[0].plot(time_vec, i / TORAD)
        axarr[1].plot(time_vec, v)
        axarr[0].set_ylabel("angle of attack")
        axarr[1].set_ylabel("v")

        plt.show()
Esempio n. 16
0
    def simulateDQNControl(self, hdg0):
        '''
        Plots the control law of the network over a simulation.

        :param hdg0: Initial heading of the boat for the simulation.
        :return: A plot of the angle of attack and velocity during the control.
        '''
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = hdg0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        wind_heading = np.ones(0)

        for time in range(self.sim_time):
            WH = self.wh.generateWind()
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])
            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        f, axarr = plt.subplots(2, sharex=True)
        axarr[0].plot(time_vec, i / TORAD)
        axarr[1].plot(time_vec, v)
        axarr[0].set_ylabel("i [°]")
        axarr[1].set_ylabel("v [m/s]")
        axarr[0].set_xlabel("t [s]")
        axarr[1].set_xlabel("t [s]")

        plt.show()
Esempio n. 17
0
def build(args):
    # Params
    training = is_training(args)
    # Hack for switching number of DQN input features (see help)
    n_feats = {'all': 11, 'distance': 1}
    n_actions = 4  # we are ignoring action 0 (for now)

    # Maximum number of steps per episode
    max_steps = 8 * (args.dims[0] + args.dims[1]) - 1
    # Total feature dimension
    total_feats = n_feats[args.feats] * sum(
        [4**i for i in range(args.n_nodes + 1)])

    # Flatland Environment
    environment = FlatlandEnv(x_dim=args.dims[0],
                              y_dim=args.dims[1],
                              n_cars=args.n_agents,
                              n_acts=n_actions,
                              min_obs=-1.0,
                              max_obs=1.0,
                              n_nodes=args.n_nodes,
                              feats=args.feats)

    # Simple DQN agent
    agent = DQNAgent(alpha=0.0005,
                     gamma=0.99,
                     epsilon=1.0,
                     input_shape=total_feats,
                     sample_size=512,
                     batch_size=32,
                     n_actions=n_actions,
                     training=training)

    if not training:
        agent.load_model()

    return environment, agent, max_steps
Esempio n. 18
0
def main():
    # vehicle_network
    veh_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4)
    # Attacker network
    # att_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4)
    veh_agent = DQNAgent(q_network=veh_network,
                         q_network2=veh_network,
                         preprocessor=core.Preprocessor(),
                         RLmemory=core.ReplayMemory(),
                         SLmemory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    # att_agent = DQNAgent(q_network=att_network,
    #                      q_network2=att_network,
    #                      preprocessor=core.Preprocessor(),
    #                      memory=core.ReplayMemory(),
    #                      policy=1,
    #                      gamma=0.1,
    #                      target_update_freq=100,
    #                      num_burn_in=100,
    #                      train_freq=20,
    #                      batch_size=32)
    veh_agent.compile('Adam', 'mse')
    # att_agent.compile('Adam', 'mse')
    env = VehicleFollowingENV()
    for i_episode in range(20):
        veh_agent.fit(env=env, num_iterations=10 ** 6)
        # att_agent.fit(env, 10 ** 6)
    # env.close()
    model_json = veh_network.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
Esempio n. 19
0
def main(argv):
    args = parser.parse_args(argv[1:])

    if args.usage == 'help':
        return parser.print_help()

    if is_environments_gen(args):
        _write_env_file(args)
    elif is_environments_list(args):
        all_registry = registry.all()
        registry_envs_name = [
            trim_env_spec_name(env.__repr__()) for env in all_registry
        ]
        for environment in registry_envs_name:
            print(environment)
    elif is_environments_act(args):
        env = gym.make(args.environment_name)
        if is_action_type('dqn', args):
            if args.pre_defined_state_size == 'nesgym':
                pre_state_size = 172032
            elif args.pre_defined_state_size == 'gym':
                pre_state_size = env.observation_space.shape[0]
            elif args.pre_defined_state_size == 'gym-atari':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-atari-extend':
                pre_state_size = 120000
            elif args.pre_defined_state_size == 'gym-atari-small':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-gomoku':
                pre_state_size = 361
            # state_size = (1,) + env.observation_space.shape
            state_size = pre_state_size
            action_size = env.action_space.n
            agent = DQNAgent(state_size, action_size)
            # try:
            #     agent.load('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
            #                                           args.i_episodes))
            # except Exception:
            #     pass
            done = False
            batch_size = 64
        i_episodes = args.i_episodes
        timesteps = args.timesteps
        factor = args.seed_factor
        for i_episode in range(i_episodes):
            state = env.reset()
            if is_action_type('dqn', args):
                state = np.reshape(state, [1, pre_state_size])
            for t in range(timesteps):
                try:
                    if args.render == 'present': env.render()
                    if args.render == 'presented': env.render(args.render)
                    if args.action_type == 'alternate':
                        action_choice = i_episodes * 2
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'specific':
                        action = env.action_space.sample()
                    elif args.action_type == 'conditional':
                        action_choice = i_episodes
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'numerical':
                        action = env.action_space.n
                    elif is_action_type('dqn', args) and len(state) == 5:
                        action = agent.act(state)
                    elif is_action_type('dqn', args) and len(state) != 5:
                        action = env.action_space.sample()
                    collect_stat(action, ['input', 'actions'], stats)
                    observation, reward, done, info = env.step(action)
                    if is_action_type('dqn', args):
                        reward = reward if not done else -10
                        observation = np.reshape(observation,
                                                 [1, pre_state_size])
                        agent.remember(state, action, reward, observation,
                                       done)
                        state = observation
                    # collect_stat(observation,['observation'],stats)
                    collect_stat(reward, ['rewards'], stats)
                    # collect_stat(done,['output','done'],stats)
                    # collect_stat(info,['output','info'],stats)
                    if done:
                        max_episodes_range = (i_episodes - 1)
                        episode_timesteps_iteration_limit = max_episodes_range - 1
                        is_latest_episode = is_filled_latest_episode_with_iteration(
                            i_episode, episode_timesteps_iteration_limit)
                        increased_timestep = increase_timestep(t)
                        print('i_episode {}'.format(i_episode))
                        print('Episode finished after {} timesteps'.format(
                            increased_timestep))
                        if is_action_type('dqn', args):
                            print('Episode: {}/{}, score: {}, e: {:.2}'.format(
                                i_episode, i_episodes, t, agent.epsilon))
                        collect_stat(t, ['output', 'timestep', 'iteration'],
                                     stats)
                        collect_stat(increased_timestep,
                                     ['output', 'timestep', 'increased'],
                                     stats)
                        is_latest_episode_to_save_state = lambda args_cached: is_latest_episode and args_cached.output_stats_filename
                        if is_latest_episode_to_save_state(args):
                            filename = args.output_stats_filename
                            pre_df = {
                                # 'observations': stats['observations'],
                                'rewards': stats['rewards'],
                                # 'done-output': stats['output']['done'],
                                # 'info-output': stats['output']['info'],
                                # 'iteration-timestep': stats['output']['timestep']['iteration'],
                                # 'increased-timestep': stats['output']['timestep']['increased'],
                                'actions-input': stats['input']['actions']
                            }
                            df = pd.DataFrame(pre_df)
                            stamp = lambda: '%s' % (int(datetime.now().
                                                        timestamp()))
                            with open(
                                    'data/{}-{}.csv'.format(stamp(), filename),
                                    'w') as f:
                                f.write(df.to_csv())
                                f.close()
                            print('Statistics file saved ({}.csv)!'.format(
                                filename))
                            del df
                            del filename
                        print(check_output_env_label())
                        del is_latest_episode_to_save_state
                        del increased_timestep
                        del is_latest_episode
                        del episode_timesteps_iteration_limit
                        del max_episodes_range
                        break
                except Exception as e:
                    print('Rendering execution ({})'.format(e))
                finally:
                    print('Execution of timestep done')
            if is_action_type('dqn',
                              args) and (len(agent.memory) > batch_size):
                agent.replay(batch_size)
        # agent.save('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
        #                                       args.i_episodes))
        # env.close()
    else:
        parser.print_help()
Esempio n. 20
0
def main():
    print "Creating DQN agent..."
    # env = gym.make("codegen-v0")
    set_debugger_org_frc()

    iters = 6300
    n_goal = 0
    n_goal_all = 0
    time_stamp = 0

    max_steps = 5
    agent = DQNAgent(max_steps)
    agent.dqn.initial_exploration = 6000 * max_steps

    for iter in range(iters):
        print "\n********Iteration # ", iter, "***********\n"
        # 1 iteration
        env = gym.make("codegen-v0")
        num = random.randrange(1, 100)
        print "Goal Number : ", num + 1
        env.my_input = num
        #env.goal = "['" + env.my_input + "']"
        env.goal = str(num + 1)

        code = env._reset()
        step_in_episode = 0
        total_score = 0.0
        reward = 0.0
        mystate = []
        my_state_new = []

        # debug : the sys
        # sss = []
        # for arg in sys.argv[1:]:
        #    sss.append(arg)
        # print "sss = " , sss

        # while True:
        while step_in_episode < max_steps:

            # state = env.code_index_list + [-1]*(max_steps-len(env.code_index_list
            state = env.code_index_list[:]
            state += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                              dtype=int).tolist()
            # state = state.tolist()
            # state = 1;
            # print "env = ",env.code_index_list
            # print "state = ",state
            # raw_input()

            if step_in_episode == 0:
                action_idx = agent.start(code, state)
            else:
                action_idx = agent.act(code, state, reward)

            code, reward, terminal, info = env._step(action_idx,
                                                     agent.dqn.actions)
            state_prime = env.code_index_list[:]
            state_prime += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                                    dtype=int).tolist()

            # debug : the sys
            # sss = []
            # for arg in sys.argv[1:]:
            #    sss.append(arg)
            # print "sss = " , sss

            print "state : "
            print state
            print "state' : "
            print state_prime

            if step_in_episode == max_steps - 1:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 1)
            else:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 0)

            agent.dqn.experience_replay(agent.dqn.time_stamp)

            agent.dqn.target_model_update(agent.dqn.time_stamp,
                                          soft_update=False)

            total_score += reward

            if terminal:

                agent.dqn.goal_idx.append(agent.dqn.time_stamp)

                agent.end(reward)
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 1)

                n_goal_all += 1
                step_in_episode += 1
                agent.dqn.time_stamp += 1

                if iters - iter <= 100:
                    n_goal += 1

                break

            step_in_episode += 1
            agent.dqn.time_stamp += 1

        if iter == 1 + (agent.dqn.initial_exploration / max_steps):
            print "n_goal_all = ", n_goal_all
            print agent.dqn.goal_idx
            raw_input()

    print "n_goal : ", n_goal
    print "epsilon : ", agent.epsilon
Esempio n. 21
0
    curr_model = load("dt.joblib")
elif MODEL_TYPE == "GB":
    print("LOADING GB...")
    curr_model = load("gb.joblib")
elif MODEL_TYPE == "RF":
    print("LOADING RF...")
    curr_model = load("rfc.joblib")
elif MODEL_TYPE == "NB":
    print("LOADING NB...")
    curr_model = load("nb.joblib")
elif MODEL_TYPE == "AB":
    print("LOADING AB...")
    curr_model = load("ab.joblib")
elif MODEL_TYPE == "DQN":
    print("LOADING DQN...")
    BetNet = DQNAgent(75)
    BetNet.load("weights/betnet-weights-dqn.h5")
    curr_model = BetNet
else:
    print("LOADING NN...")
    BetNet = Network(env.matches.shape[1])
    BetNet.load_weights(
        "weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5"
    )  # Most recent weights
    curr_model = BetNet

###############################################################################


#GETS THE PREDICTION VEC GIVEN MODEL
def generatePrediction(mt, curr_model, to_process):
Esempio n. 22
0
from mdp import MDP
import random
'''
MDP Parameters
'''
mdp = MDP(duration_history=3, duration_simulation=1, delta_t=0.1)
'''
Environment Parameters
'''
w = wind(mean=45 * TORAD, std=0 * TORAD, samples=10)
WH = w.generateWind()

hdg0 = 0 * np.ones(10)
mdp.initializeMDP(hdg0, WH)

agent = DQNAgent(mdp.size, action_size=2)
#agent.load("../Networks/lighter_archi")
batch_size = 50

EPISODES = 500
hdg0_rand_vec = [-3, 0, 3, 6, 9, 12, 15, 18, 21]

loss_of_episode = []
i = []
v = []
r = []
for e in range(EPISODES):
    WH = w.generateWind()
    hdg0_rand = random.choice(hdg0_rand_vec) * TORAD
    hdg0 = hdg0_rand * np.ones(10)
Esempio n. 23
0
def optimize_agent(trial, args):
    "Optimize the model."

    model_name = args.study_name + "_" + str(trial.number)
    env_kwargs = dict()
    callback_checkpoint_kwargs = dict()
    save_dir = args.save_dir
    log_interval = args.log_interval
    num_cpus = args.num_cpus
    eval_episodes = args.eval_episodes
    n_steps = args.n_steps
    layer_normalization = args.layer_normalization
    layers = args.layers
    env_kwargs["board_size"] = 4
    env_kwargs["binary"] = not args.no_binary
    env_kwargs["extractor"] = args.extractor
    env_kwargs["seed"] = args.seed
    env_kwargs["penalty"] = args.penalty
    callback_checkpoint_kwargs["save_freq"] = args.save_freq
    callback_checkpoint_kwargs["save_path"] = args.save_dir
    callback_checkpoint_kwargs["name_prefix"] = model_name

    if args.agent == "ppo2":
        model_kwargs = trial_hiperparameter_ppo2(trial)
        model_kwargs["agent"] = "ppo2"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model = PPO2Agent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    elif args.agent == "dqn":
        # model_kwargs = trial_hiperparameter_dqn(trial)
        model_kwargs = {}
        model_kwargs["learning_rate"] = 0.0001
        model_kwargs["batch_size"] = 10000
        model_kwargs["learning_starts"] = 10000
        model_kwargs["target_network_update_freq"] = 1000
        model_kwargs["train_freq"] = 4
        model_kwargs["agent"] = "dqn"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model_kwargs["double_q"] = True
        model_kwargs["prioritized_replay"] = True
        model_kwargs["param_noise"] = True
        print(model_kwargs)
        model = DQNAgent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            layers,
            args.load_path,
            args.num_timesteps_log,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    elif args.agent == "acer":
        model_kwargs = trial_hiperparameter_acer(trial)
        model_kwargs["agent"] = "acer"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model_kwargs["replay_start"] = 2000
        model = ACERAgent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    else:
        ValueError("Choose a valid agent model")

    model.train()
    total_score = model.test()

    return total_score
Esempio n. 24
0
 def __init__(self, simulator):
     self.agent = DQNAgent(25, 6)
     self.agent.load("./save/car-100-dqn.h5")
     self.simulator = simulator
     self.agent.epsilon = 0
Esempio n. 25
0
if __name__ == '__main__':

    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    #device = torch.device('cpu')
    print(device)
    env = make_env(seed)
    state_shape = env.observation_space.shape
    n_actions = env.action_space.n
    state = env.reset()

    agent = DQNAgent(state_shape, n_actions, epsilon=0.9).to(device)
    #agent.load_state_dict(torch.load('dqn.weights'))
    target_network = DQNAgent(state_shape, n_actions).to(device)
    target_network.load_state_dict(agent.state_dict())
    opt = torch.optim.Adam(agent.parameters(), lr=1e-4)
    exp_replay = ReplayBuffer(buffer_size)

    print('test_buffer')
    for i in range(100):
        play_and_record(state, agent, env, exp_replay, n_steps=10**2)
        if len(exp_replay) == buffer_size:
            break
    print(len(exp_replay))

    state = env.reset()
    for step in trange(step, total_steps + 1):
Esempio n. 26
0
optimizer = Adam(learning_rate=0.001)

memory = SequentialMemory(limit=20000, window_length=WINDOW_LENGTH)

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                              attr='eps',
                              value_max=1.0,
                              value_min=0.1,
                              value_test=0.05,
                              nb_steps=1000000)

dqn = DQNAgent(model=model,
               nb_actions=3,
               policy=policy,
               memory=memory,
               nb_steps_warmup=2000,
               gamma=0.95,
               target_model_update=2000,
               train_interval=1,
               delta_clip=1.0)

dqn.compile(optimizer, metrics=['mae'])

env = Tetris()

start = time.time()

weights_filename = 'dqn_{}_tetris_weights.h5'.format(start)
checkpoint_weights_filename = 'dqn_{}_tetris_weights_.h5'.format(start)
log_filename = 'dqn_tetris_log.json'
callbacks = [
Esempio n. 27
0
def create_dqn_agent(num_states, num_actions):
    return DQNAgent(num_states, num_actions)
Esempio n. 28
0
					debug_log(f'file content with syntax error\n{s}')
					debug_log('')

				for i in range(5):
					try:
						os.remove(file_path)
						break
					except PermissionError:
						debug_log('Permission error when removing the file')
						time.sleep(1)

			#NOTE: train
			#NOTE: create/load DQN and target DQN in main thread
			keras.backend.clear_session()
			agent = DQNAgent(INPUT_SHAPE, training=True, 
				replay_memory=minibatch, copy_target_model=False
			)
			agent.target_model = load_model(target_model_path)
			#NOTE: train newly loaded model on new data
			if len(minibatch) > 0:
				minibatch_history = agent.train_only(len(minibatch), len(minibatch))
				if minibatch_history == None:
					debug_log('ERROR: Unable to train on iteration\'s data')
				replay_memory.extend(minibatch)
			else:
				debug_log('WARNING: Skipping minibatch training since no new data was found')

			#NOTE: train newly loaded model on random selection of old data
			agent.replay_memory = replay_memory
			sum_loss = 0
			if len(replay_memory) > MIN_REPLAY_MEMORY_SIZE: 
Esempio n. 29
0
def main():
    print "Creating DQN agent..."

    iters = 10000
    n_goal = 0
    n_goal_all = 0
    time_stamp = 0

    ############################################################
    # print x
    # max_steps = 3
    # actions = ["print", " ", "x"]
    ############################################################

    ############################################################
    # print x+1
    max_steps = 5
    actions = ["print", " ", "x", "+", "1"]
    ############################################################

    agent = DQNAgent(max_steps, actions)
    agent.dqn.initial_exploration = iters * 0.6

    results = []
    policy_frozen = False
    wins_file = "wins.txt"
    with io.FileIO(wins_file, "w") as file:
        file.write("Winning codes:\n")

    for iter in range(iters):
        print "\n\n::{}::".format(iter)

        if iter == 4300:  # 2300:
            policy_frozen = True

        env = gym.make("codegen-v0")
        num = random.randrange(1, 100)
        env.my_input = num

        ############################################################
        # print x
        # env.goal = str(num)
        ############################################################

        ############################################################
        # print x+1
        env.goal = str(num + 1)
        ############################################################

        code = env._reset()
        step_in_episode = 0
        total_score = 0.0
        reward = 0.0
        mystate = []
        my_state_new = []

        while step_in_episode < max_steps:
            state = env.code_index_list[:]
            state += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                              dtype=int).tolist()

            if step_in_episode == 0:
                action_idx = agent.start(code, state, policy_frozen)
            else:
                action_idx = agent.act(code, state, reward)

            code, reward, terminal, info = env._step(action_idx,
                                                     agent.dqn.actions)
            state_prime = env.code_index_list[:]
            state_prime += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                                    dtype=int).tolist()

            agent.dqn.experience_replay(agent.dqn.time_stamp)
            if step_in_episode == max_steps - 1 or terminal:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime,
                                           True)
                if terminal:
                    agent.dqn.goal_idx.append(agent.dqn.time_stamp)
                agent.dqn.time_stamp += 1
            else:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime,
                                           False)

            total_score += reward

            if terminal:
                agent.end(reward)

                n_goal_all += 1
                step_in_episode += 1

                if iters - iter <= 100:
                    n_goal += 1

            step_in_episode += 1

        if iter >= 100:
            results = results[1:]
        if reward >= 1:
            print "WIN"
            results.append(1.0)
            with io.FileIO(wins_file, "a") as f:
                f.write(
                    "\n=====================\n{}\n=====================\n\n".
                    format(code))
                f.flush()
                os.fsync(f)
        else:
            results.append(0.0)
        total_iters = 100 if iter >= 100 else iter + 1
        print "TOTAL {:.2f}% of wins in last {} iters, sum: {}, total good: {}".format(
            100 * sum(results) / total_iters, total_iters, sum(results),
            len(agent.dqn.goal_idx))

        if iter == 1 + agent.dqn.initial_exploration:
            print "n_goal_all = ", n_goal_all
            print agent.dqn.goal_idx
            raw_input()

    print "n_goal : ", n_goal
    print "epsilon : ", agent.epsilon
Esempio n. 30
0
kwargs['ACT_EVERY']=int(kwargs['ACT_EVERY'])
kwargs['SEED']=int(kwargs['SEED'])

#get environment
env = UnityEnvironment(file_name="Banana_Windows_x86_64/Banana.exe")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# get environment state and action space sizes
state = env_info.vector_observations[0]
state_size = len(state)
action_size = brain.vector_action_space_size

#make agent
agent = DQNAgent(state_size=state_size, action_size=action_size, **kwargs)       

#load trained agent's weights
weights_name = test_name+'-weights.pth'
weights_path = os.path.join(test_results_path, weights_name)

agent.qnetwork_local.load_state_dict(torch.load(weights_path, map_location=lambda storage, loc: storage))

#navigate
navigate(env, agent, brain_name)