예제 #1
0
def run_a3c_thread(baseDir, runNo, args, max_episode, test_only):
    init_nn_library(True, args['gpu'])
    if not test_only:
        stats = run_a3c(**args)
    #with open(baseDir + '/test-' + str(runNo)  + '/final_stats.pkl', 'w') as f:
    #	pickle.dump(stats, f)
    run_test('dqn', args, baseDir, runNo, max_episode)
예제 #2
0
def run_td_realtime(**kargs):
	if kargs['output_dir'] is None and kargs['logdir'] is not None:
		kargs['output_dir'] = kargs['logdir']

	from collections import namedtuple
	args = namedtuple("TDRealtimeParams", kargs.keys())(*kargs.values())

	if 'dont_init_tf' not in kargs.keys() or not kargs['dont_init_tf']:
		init_nn_library(True, "1")

	env = get_env(args.game, args.atari, args.env_transforms)

	envOps = EnvOps(env.observation_space.shape, env.action_space.n, args.learning_rate, mode="train")
	print(env.observation_space.low)
	print(env.observation_space.high)

	env_model = globals()[args.env_model](envOps)
	if args.env_weightfile is not None:
		env_model.model.load_weights(args.env_weightfile)

	v_model = globals()[args.vmodel](envOps)

	import numpy as np
	td_model = TDNetwork(env_model.model, v_model, envOps)

	summary_writer = tf.summary.FileWriter(args.logdir, K.get_session().graph) if not args.logdir is None else None

	replay_buffer = ReplayBuffer(args.replay_buffer_size, 1, args.update_frequency, args.replay_start_size, args.batch_size)

	from utils.network_utils import NetworkSaver
	network_saver = NetworkSaver(args.save_freq, args.logdir, v_model.model)

	v_agent = VAgent(env.action_space, env_model, v_model, envOps, summary_writer, True, replay_buffer, args.target_network_update)

	egreedyOps = EGreedyOps()
	if replay_buffer is not None:
		egreedyOps.REPLAY_START_SIZE = replay_buffer.REPLAY_START_SIZE
	egreedyOps.mode = args.mode
	egreedyOps.test_epsilon = args.test_epsilon
	#egreedyOps.FINAL_EXPLORATION_FRAME = 10000
	if args.mode == "train":
		egreedyOps.FINAL_EXPLORATION_FRAME = args.egreedy_final_step

	if args.mode == "train":
		if args.egreedy_decay<1:
			egreedyOps.DECAY = args.egreedy_decay
			egreedyAgent = EGreedyAgentExp(env.action_space, egreedyOps, v_agent)
		else:
			egreedyAgent = MultiEGreedyAgent(env.action_space, egreedyOps, v_agent, args.egreedy_props, args.egreedy_final, final_exp_frame=args.egreedy_final_step)
	else:
		egreedyAgent = EGreedyAgent(env.action_space, egreedyOps, v_agent)


	runner = Runner(env, egreedyAgent, None, 1, max_step=args.max_step, max_episode=args.max_episode)
	runner.listen(replay_buffer, None)
	runner.listen(v_agent, None)
	runner.listen(egreedyAgent, None)
	runner.listen(network_saver, None)
	#runner.run()
	return runner, v_agent
예제 #3
0
def run_td_realtime_thread(baseDir, runNo, thread_id, args):
    init_nn_library(True, '0')
    runner, agent = run_td_realtime(**args)
    runner.run()
    with open(
            baseDir + '/' + str(args['target_network_update']) + '-' +
            str(runNo) + '.pkl', 'w') as f:
        pickle.dump(agent.stats, f)
def run_dqn_thread(baseDir, runNo, thread_id, args):
    init_nn_library(True, '0')
    runner, agent = run_dqn(**args)
    runner.run()
    with open(
            baseDir + '/' + str(args['learning_rate']) + '-' + str(runNo) +
            '.pkl', 'w') as f:
        pickle.dump(agent.stats, f)
예제 #5
0
args = parser.parse_args()

from PIL import Image

from envs.gym_env import gym_env
from envs.env_transform import WarmUp, ActionRepeat, ObservationStack, EnvTransform
from utils.preprocess import *
from utils.network_utils import NetworkSaver
from runner.runner import Runner
from agents.agent import DqnAgent, DqnAgentOps, EGreedyOps, EGreedyAgent, MultiEGreedyAgent, EGreedyAgentExp
from utils.memory import ReplayBuffer, NStepBuffer
from nets.net import QModel, DQNModel, DqnOps, init_nn_library
import tensorflow as tf
import keras.backend as K

init_nn_library(True, "1")

from utils.viewer import EnvViewer


class Penalizer(EnvTransform):
    def __init__(self, env):
        super(Penalizer, self).__init__(env)

    def reset(self):
        ob = self.env.reset()
        self.score = 0
        return ob

    def step(self, action):
        ob, reward, done = self.env.step(action)
예제 #6
0
def run_td_realtime_thread(baseDir, runNo, args, max_episode, test_only):
    init_nn_library(True, args['gpu'])
    if not test_only:
        runner, _ = run_td_realtime(**args)
        runner.run()
    run_test('td_realtime', args, baseDir, runNo, max_episode)
예제 #7
0
def run_td_thread(baseDir, runNo, args, max_episode, test_only):
    print('*************GPU: *************** ' + args['gpu'])
    init_nn_library(True, args['gpu'])
    if not test_only:
        stats = run_td(**args)
    run_test('td', args, baseDir, runNo, max_episode)
예제 #8
0
from runner.runner import Runner, RunnerListener
from agents.agent import DqnAgent, DqnAgentOps, EGreedyOps, EGreedyAgent, MultiEGreedyAgent
from utils.memory import ReplayBuffer, NStepBuffer
from nets.net import A3CModel, QModel, DQNModel, DqnOps, init_nn_library, TabularQModel, CartPoleModel
import tensorflow as tf
import keras.backend as K

import threading
from threading import Lock
from utils.stopable_thread import StoppableThread

if ENABLE_RENDER:
    from gym.envs.classic_control import rendering
    viewer = rendering.SimpleImageViewer()

init_nn_library(False, "1")

if args.game == "Grid":
    env = GridEnv()
else:
    env = gym_env(args.game)

#modelOps = DqnOps(env.action_count)
#modelOps.dueling_network = args.dueling_dqn
#modelOps.INPUT_SIZE = env.observation_space.n
#modelOps.LEARNING_RATE = 0.2
#modelOps.AGENT_HISTORY_LENGTH = 1
#model = TabularQModel(modelOps)

from nets.initializers import dqn_uniform
from keras.layers import Input, Permute, ZeroPadding2D, Conv2D, Flatten, Dense, Add, Subtract, Lambda
예제 #9
0
def run_td_test(**kargs):
    if ('output_dir' not in kargs
            or kargs['output_dir'] is None) and kargs['logdir'] is not None:
        kargs['output_dir'] = kargs['logdir']

    from collections import namedtuple
    args = namedtuple("TDTestParams", kargs.keys())(*kargs.values())

    if 'dont_init_tf' in kargs and not kargs['dont_init_tf']:
        init_nn_library(True, "1")

    #env = gym_env(args.game)
    print('Monitor dir',
          kargs['monitor_dir'] if 'monitor_dir' in kargs else None)
    env = get_env(args.game, args.atari, args.env_transforms,
                  kargs['monitor_dir'] if 'monitor_dir' in kargs else None)

    viewer = None
    if args.enable_render:
        viewer = EnvViewer(env, args.render_step, 'human')

    envOps = EnvOps(env.observation_space.shape, env.action_space.n, 0)
    #print(env.observation_space.low)
    #print(env.observation_space.high)

    env_model = globals()[args.env_model](envOps)
    if args.env_weightfile is not None:
        env_model.model.load_weights(args.env_weightfile)

    v_model = globals()[args.vmodel](envOps)

    weight_files = []
    if not isinstance(args.load_weightfile, list):
        weight_files = [(args.load_weightfile, 0)]
    else:
        idxs = range(int(args.load_weightfile[1]),
                     int(args.load_weightfile[3]),
                     int(args.load_weightfile[2]))
        weight_files = [(args.load_weightfile[0] + str(I) + '.h5', I)
                        for I in idxs]

    summary_writer = tf.summary.FileWriter(
        args.logdir,
        K.get_session().graph) if not args.logdir is None else None

    sw = SummaryWriter(summary_writer, ['Average reward', 'Total reward'])
    #sw = SummaryWriter(summary_writer, ['Reward'])

    stats = {'reward': []}
    for I, weight_file_info in enumerate(weight_files):
        weight_file = weight_file_info[0]
        total_step_count = weight_file_info[1]
        v_model.model.load_weights(weight_file)
        v_agent = VAgent(env.action_space, env_model, v_model, envOps, None,
                         False)
        runner = Runner(env,
                        v_agent,
                        None,
                        1,
                        max_step=args.max_step,
                        max_episode=args.max_episode)
        runner.listen(v_agent, None)
        if viewer is not None:
            runner.listen(viewer, None)
        runner.run()
        tmp_stats = np.array(v_agent.stats['reward'])
        total_reward = tmp_stats[:, 1].sum()
        total_reward = total_reward / args.max_episode
        aver_reward = total_reward / tmp_stats[-1, 0]
        sw.add([aver_reward, total_reward], I)
        stats['reward'].append((total_step_count, total_reward))
        print(
            '{0} / {1}: Aver Reward per step = {2}, Aver Reward per espisode = {3}'
            .format(I + 1, len(weight_files), aver_reward, total_reward))
    return stats
예제 #10
0
def run_td(**kargs):
    debug = False
    if kargs['output_dir'] is None and kargs['logdir'] is not None:
        kargs['output_dir'] = kargs['logdir']

    from collections import namedtuple
    args = namedtuple("TDParams", kargs.keys())(*kargs.values())

    target_network_update = ParameterDecay(args.target_network_update)

    if 'dont_init_tf' in kargs and not kargs['dont_init_tf']:
        init_nn_library(True, "1")

    env = get_env(args.game, args.atari, args.env_transforms,
                  kargs['monitor_dir'] if 'monitor_dir' in kargs else None)

    envOps = EnvOps(env.observation_space.shape, env.action_space.n,
                    args.learning_rate)
    #print(env.observation_space.low)
    #print(env.observation_space.high)

    env_model = globals()[args.env_model](envOps)
    if args.env_weightfile is not None:
        env_model.model.load_weights(args.env_weightfile)

    v_model = globals()[args.vmodel](envOps)

    import numpy as np
    td_model = TDNetwork(
        env_model.model, v_model, envOps, False,
        kargs['derivative_coef'] if 'derivative_coef' in kargs else 0)

    summary_writer = tf.summary.FileWriter(
        args.logdir,
        K.get_session().graph) if not args.logdir is None else None
    sw = SummaryWriter(summary_writer, ['Loss'])

    if args.load_trajectory is not None:
        from utils.trajectory_utils import TrajectoryLoader
        traj = TrajectoryLoader(args.load_trajectory)

    from utils.network_utils import NetworkSaver
    network_saver = NetworkSaver(args.save_freq, args.logdir, v_model.model)

    import scipy.stats as stats

    td_exponent = ParameterDecay(kargs['td_exponent'] if 'td_exponent' in kargs
                                 and kargs['td_exponent'] is not None else 2)

    #from tensorflow.keras.utils import plot_model
    #plot_model(td_model.td_model, to_file='td_model.png')
    td_model.td_model.summary()

    print('TDNetwork Layers')
    for layer_idx, layer in enumerate(td_model.td_model.layers):
        print('Layer ', layer_idx, layer.name,
              layer.shape if hasattr(layer, "shape") else layer.input_shape)

    for I in range(args.max_step):
        #batch = np.random.uniform([-4.8, -5, -0.48, -5], [4.8, 5, 0.48, 5], size=(args.batch_size,4))
        #lower, upper = -1, 1
        #mu, sigma = 0.5, 0.4
        #X = stats.truncnorm((lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
        #samples = np.random.uniform([-1], [1], size=(5000,1))
        if hasattr(env_model, "get_samples"):
            samples = env_model.get_samples(args.sample_count)
        else:
            samples = np.random.uniform(args.smin,
                                        args.smax,
                                        size=(args.sample_count,
                                              len(args.smin)))
        res = td_model.test(samples)
        if isinstance(res, (list, )):
            res = res[0]
        td_errors = res.flatten()
        props = np.abs(td_errors)
        #props = np.multiply(props, props)
        props = np.power(props, td_exponent())
        props = props / props.sum()
        count = 0
        if isinstance(samples, list):
            count = samples[0].shape[0]
        else:
            count = samples.shape[0]
        idxs = np.random.choice(count, args.batch_size, False, props)
        batch = {
            #'current': np.random.uniform([-1], [1], size=(args.batch_size,1))
            #'current': X.rvs((args.batch_size,1))
            'current':
            [a[idxs]
             for a in samples] if isinstance(samples, list) else samples[idxs]
        }
        #batch = traj.sample(args.batch_size)

        if debug:
            # @ersin - freeway icin test kodu
            old_loss = td_model.test(batch['current'])
            print(I, old_loss.flatten())
            decoder_input = [
                batch['current'][1].astype(np.float32),
                batch['current'][0].astype(np.float32)
            ]
            decoded_output = env_model.model_decoder.predict_on_batch(
                decoder_input)
            save_image(decoded_output, args.batch_size,
                       f'{kargs["output_dir"]}/{I}_sample')

        loss = td_model.train(batch['current'])

        #@ersin - tests
        if debug and False:
            save_image(batch['current'][0], args.batch_size,
                       f'{kargs["output_dir"]}/{I}_current_cars')
            save_image(batch['current'][1], args.batch_size,
                       f'{kargs["output_dir"]}/{I}_current_tavuks')
            save_image(batch['current'][2], args.batch_size,
                       f'{kargs["output_dir"]}/{I}_current_cross')
            save_image(batch['current'][3], args.batch_size,
                       f'{kargs["output_dir"]}/{I}_current_carpisma')

            next_state = env_model.predict_next(batch['current'])

            for J in range(3):
                save_image(next_state[0 + J * 4], args.batch_size,
                           f'{kargs["output_dir"]}/{I}_next_{J}_cars')
                save_image(next_state[1 + J * 4], args.batch_size,
                           f'{kargs["output_dir"]}/{I}_next_{J}_tavuks')
                save_image(next_state[2 + J * 4], args.batch_size,
                           f'{kargs["output_dir"]}/{I}_next_{J}_cross')
                save_image(next_state[3 + J * 4], args.batch_size,
                           f'{kargs["output_dir"]}/{I}_next_{J}_carpisma')

        print(loss)
        if td_model.include_derivative:
            loss = loss[0]
        sw.add([loss], I)
        network_saver.on_step()
        td_exponent.on_step()
        target_network_update.on_step()
        if target_network_update.is_step() == 0:
            td_model.v_model_eval.set_weights(td_model.v_model.get_weights())
예제 #11
0
args = parser.parse_args()

from PIL import Image

from envs.gym_env import gym_env
from envs.env_transform import WarmUp, ActionRepeat, ObservationStack
from utils.preprocess import *
from runner.runner import Runner
from agents.agent import DqnAgent, DqnAgentOps, EGreedyOps, EGreedyAgent
from utils.memory import ReplayBuffer, NStepBuffer
from nets.net import TabularQModel, DqnOps, init_nn_library
import tensorflow as tf
import keras.backend as K
from envs.env import GridEnv

init_nn_library(True, "0")

if args.game == "Grid":
	env = GridEnv()
else:
	env = gym_env(args.game)


#print(env.observation_space.n)

modelOps = DqnOps(env.action_count)
modelOps.dueling_network = args.dueling_dqn
modelOps.INPUT_SIZE = env.observation_space.n
modelOps.LEARNING_RATE = 0.2

q_model = TabularQModel(modelOps)