Example #1
0
    config=config,
    device=device,
    init_exp=config.init_exp,  # initial exploration prob
    final_exp=config.final_exp,  # final exploration prob
    anneal_steps=10000,  # N steps for annealing exploration
    discount_factor=config.discounted_factor,  # discount future rewards
    reg_param=0.01,  # regularization constants
    max_gradient=5,  # max gradient norms
    summary_every=100,
    batch_size=config.batch_size,
    verbose=True,
    with_bit=config.with_bit,
    replay=config.replay)

user = Seq_User_Act(nlg_sample=True, nlg_template=False)
system = LooseSystem(config=config)
env = Enviroment(user=user, system=system, verbose=True, config=config)
sys_act = None
status = []

while True:
    print("-" * 20)
    # turker_response =
    state = env.reset(mode=MODE)  # turker_response
    #state = state[0]
    sys_act = None  # initial sys act
    total_rewards = 0
    while True:
        # print(state)
        # print(env.system.state)
        if config.with_bit:
Example #2
0
    with_bit_all = True

if args.nlg_sample:
    config.nlg_sample = True
else:
    config.nlg_sample = False

if args.save_dir:
    config.save_dir = args.save_dir




if config.loose_agents:
    user = LooseUser(nlg_sample=False)
    system = LooseSystem()
else:
    user = User(nlg_sample=False)
    system = System()

env = Enviroment(user=user, system=system, verbose=True)
sys_act = None
status = []

state_dim   = dialog_config.STATE_DIM
num_actions = dialog_config.SYS_ACTION_CARDINALITY


def run_one_dialog(env, pg_reinforce):
    print("Test Episode "+"-"*20)
    cur_mode = dialog_config.RL_TRAINING