def run_model_with_noise(model, noise_level): env = tce.TeaCoffeeEnv() # Get the target actions and goals outputs_actions = [] goal_list = [ "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream", "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag" ] for goal in goal_list: targets = tce.target_list[goal] env.state = state.State(tce.TeaCoffeeData()) # Reinitialize the state env.state.current.set_field(goal, 1.) # Set the goalS as active model.context = np.random.uniform(0.01, 0.1, (1, model.size_hidden)) for i, target in enumerate(targets): model.action = utils.str_to_onehot(targets[i][0], tce.TeaCoffeeData.actions_list) model.goal1 = utils.str_to_onehot(targets[i][1], tce.TeaCoffeeData.goals1_list) model.goal2 = utils.str_to_onehot(targets[i][2], tce.TeaCoffeeData.goals2_list) model.context += noise_level * np.random.normal( size=tf.shape(model.context)) observation = env.observe() model.feedforward(observation) env.do_action(target[0]) outputs_actions.append(model.action) return outputs_actions
def reinitialize(self, initial_state=None): if initial_state is None: initial_state = state.State(GoalEnvData()) self.state = initial_state
def __init__(self): super().__init__() self.state = state.State(GoalEnvData())
def train_supervised_teacoffeeenv(model, environment, num_episodes): env = environment rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. rng_avg_goal1 = 0. rng_avg_goal2 = 0. rng_avg_action1 = 0. goal_list = [ "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream", "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag" ] for episode in range(num_episodes): goal = goal_list[episode % len(goal_list)] # Cycle through the goals targets = tce.target_list[goal] # Get the target actions and goals env.state = state.State(tce.TeaCoffeeData()) # Reinitialize the state env.state.current.set_field(goal, 1.) # Set the goalS as active # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. #model.context = np.zeros((1, model.size_hidden)) model.context = np.float32( np.random.uniform(0.01, 0.99, (1, model.size_hidden))) targets_onehot = [[], [], []] # actions, goal1s, goal2s for i, target in enumerate(targets): # Set up the input to be the correct actions and goals targets_onehot[0].append( utils.str_to_onehot(targets[i][0], tce.TeaCoffeeData.actions_list)) targets_onehot[1].append( utils.str_to_onehot(targets[i][1], tce.TeaCoffeeData.goals1_list)) targets_onehot[2].append( utils.str_to_onehot(targets[i][2], tce.TeaCoffeeData.goals2_list)) model.action, model.goal1, model.goal2 = [ targets_onehot[j][-1] for j in range(3) ] observation = env.observe() model.feedforward(observation) env.do_action( target[0] ) # Transition the MDP according to the *target* action, not the chosen action! # Get some statistics about what was correct and what wasn't ratios = evaluate( [model.h_action_wta, model.h_goal1_wta, model.h_goal2_wta], targets_onehot) rng_avg_action1 = utils.rolling_avg( rng_avg_action1, ratio_correct([model.h_action_wta[0]], [targets_onehot[0][0]]), 2. / (episode + 2) if episode < 1000 else 0.001) # Train model, record loss. loss = model.train_obsolete(targets_onehot[0], targets_onehot[1], targets_onehot[2], tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? rng_avg_goal1 = utils.rolling_avg(rng_avg_goal1, ratios[1], speed) rng_avg_goal2 = utils.rolling_avg(rng_avg_goal2, ratios[2], speed) # Display on the console at regular intervals if (episode < 1000 and episode in [3**n for n in range(50)]) or episode % 1000 == 0 \ or episode+1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}\tgoal1={4}\tgoal2={5}\tfirst_action={6}" .format(episode, rng_avg_loss, rng_avg_actions, rng_avg_goals, rng_avg_goal1, rng_avg_goal2, rng_avg_action1))
def __init__(self): super().__init__() self.state = state.State(TeaCoffeeData())