Exemple #1
0
 def __init__(self, train_mode=0):
     self.train_mode = train_mode
     agent = DQNAgent(mode=self.train_mode)
     user = user_simulator()
     self.manager = dialog_manager(agent,
                                   user,
                                   self.train_mode,
                                   maximum_turn=20)
     self.simulation_epoch_size = 800
Exemple #2
0
                      gamma=gamma,
                      prior_eps=prior_eps,
                      alpha=alpha,
                      beta=beta,
                      v_min=v_min,
                      v_max=v_max,
                      atom_size=atom_size,
                      support=support,
                      batch_size=batch_size)

# train
agent = DQNAgent(algorithm=algorithm,
                 env=env,
                 memory_size=memory_size,
                 batch_size=batch_size,
                 obs_dim=obs_dim,
                 action_dim=action_dim,
                 target_update=target_update,
                 gamma=gamma,
                 alpha=alpha,
                 beta=beta,
                 n_step=n_step,
                 device=device)

#%%
agent.train(num_frames, plot=False)

#%%
# agent.env = gym.wrappers.Monitor(env, "videos", force=True)
agent.test(render=True)
Exemple #3
0
from real_user import real_user
from state_tracker import state_tracker
from DQN_agent import DQNAgent
from natural_language_understanding import NL_understanding as NLU
from natural_language_generator_rule import NL_rule_generator as NLG

nlu = NLU()
nlg = NLG()
agent = DQNAgent(mode=1)
agent.initialize()


def respond(msg, user, state_keeper):
    # user turn
    user.update_sentence(msg)
    user_action, episode_over, dialog_status = user.generate_user_response()
    user_action = nlu.convert_nl_to_state(user_action)
    print('User State: {}\nEpisode_over: {}'.format(user_action, episode_over))
    state_keeper.update(user_action=user_action)
    # agent turn
    agent_state = state_keeper.get_agent_input_vector()
    agent_action, action, episode_over = agent.generate_agent_response(agent_state, \
                                                                       state_keeper.all_slots['user_informed_slots'],
                                                                       state_keeper.act)
    if nlg.convert_state_to_nl(agent_action) != '':
        agent_action['sentence'] = nlg.convert_state_to_nl(agent_action)
    else:
        agent_action['sentence'] = 'Response not available ...'
    print('Agent State: {}'.format(agent_action))
    state_keeper.update(agent_action=agent_action)
    return agent_action['sentence']
Exemple #4
0
def execute_experiment(args):
    #### HARD RULES
    if args['parallel'] != 0:
        args['use_gpu'] = 0
    if args['agent_type'] == 'human':
        args['use_gpu'] = 0
        args['render_delay'] = 0
        args['mode'] = 'play'
        args['display_prob'] = 1
#        args['action_repeat'] = 1

    if args['env_name'] == 'key_mdp-v0':
        args['action_repeat'] = 1
    arch_names = [n for n in args.keys() if 'architecture' in n]
    for arch_name in arch_names:
        if args[arch_name] is None:
            continue
        else:
            args[arch_name] = args[arch_name].split('-')

    cnf = configuration.Configuration()

    #Global settings
    gl_st = configuration.GlobalSettings(args)

    cnf.set_global_settings(gl_st)

    #Agent settings
    if args['agent_type'] == 'dqn':
        ag_st = configuration.DQNSettings(args['scale'])
    elif args['agent_type'] == 'hdqn':
        ag_st = configuration.hDQNSettings(args['scale'])
    elif args['agent_type'] == 'human':
        ag_st = configuration.HumanSettings()
    else:
        raise ValueError("Wrong agent %s" % args['agent_type'])

    ag_st.update(args)
    cnf.set_agent_settings(ag_st)

    #Environment settings
    utils.insert_dirs(cnf.gl.env_dirs)
    if args['env_name'] == 'SF-v0':
        #Space Fortress
        env_st = configuration.SpaceFortressSettings(new_attrs=args)
    elif args['env_name'] == 'key_mdp-v0':
        #MDP
        env_st = configuration.Key_MDPSettings(new_attrs=args)
    else:
        raise ValueError("Wrong env_name %s, (env_names: s%)"\
                         .format(args['env_name'], ', '.join(CT.env_names)))

    env_st.set_reward_function()
    cnf.set_environment_settings(env_st)
    environment = Environment(cnf)

    tf.set_random_seed(gl_st.random_seed)
    random.seed(gl_st.random_seed)

    if gl_st.gpu_fraction == '':
        raise ValueError("--gpu_fraction should be defined")

    if not gl_st.use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = "-1"

    frac = utils.calc_gpu_fraction(gl_st.gpu_fraction)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=frac)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        if ag_st.agent_type == 'dqn':
            agent = DQNAgent(cnf, environment, sess)

        elif ag_st.agent_type == 'hdqn':
            agent = HDQNAgent(cnf, environment, sess)

        elif ag_st.agent_type == 'human':
            agent = HumanAgent(cnf, environment)
        else:
            raise ValueError("Wrong agent %s".format())

        if ag_st.mode == 'train':
            agent.train()
        elif ag_st.mode == 'play':
            agent.play()
        elif ag_st.mode == 'graph':
            pass
        else:
            raise ValueError("Wrong mode " + str(ag_st.mode))

        #agent.show_attrs()
    tf.reset_default_graph()
Exemple #5
0
target_update = 100
seed = 0
env = LunarLander()

# whether or not to use wandb. All wandb code is commented out so that code can be run without it
log = False


def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True


seed_everything(seed)

agent = DQNAgent(env,
                 memory_size,
                 batch_size,
                 target_update,
                 log=log,
                 seed=seed)
agent.train(num_frames)
BATCH_SIZE = 32
TAU = 0.001
EPSILON = 0.99
GAMMA = 0.97
LR = 1e-3
MEMORY_SIZE = 10000
f1 = 128

env = gym.make("CartPole-v1")
state_dim = env.observation_space.shape
action_dim = env.action_space.n

agent = DQNAgent(state_dim=state_dim,
                 action_dim=action_dim,
                 tau=TAU,
                 epsilon=EPSILON,
                 mem_size=MEMORY_SIZE,
                 batch_size=BATCH_SIZE,
                 gamma=GAMMA,
                 lr=LR)
# tf.summary.FileWriter('logs/',agent.sess.graph)

# initialize the buffer with some transitions
counter = 0
while counter < 5 * BATCH_SIZE:
    s = env.reset()
    while True:
        a = agent.choose_action(s)
        s_, r, done, _ = env.step(a)
        agent.store(s, a, r, s_, done)
        counter += 1
        if done: