def run_model_with_noise(model, noise_level): env = tce.TeaCoffeeEnv() # Get the target actions and goals outputs_actions = [] goal_list = [ "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream", "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag" ] for goal in goal_list: targets = tce.target_list[goal] env.state = state.State(tce.TeaCoffeeData()) # Reinitialize the state env.state.current.set_field(goal, 1.) # Set the goalS as active model.context = np.random.uniform(0.01, 0.1, (1, model.size_hidden)) for i, target in enumerate(targets): model.action = utils.str_to_onehot(targets[i][0], tce.TeaCoffeeData.actions_list) model.goal1 = utils.str_to_onehot(targets[i][1], tce.TeaCoffeeData.goals1_list) model.goal2 = utils.str_to_onehot(targets[i][2], tce.TeaCoffeeData.goals2_list) model.context += noise_level * np.random.normal( size=tf.shape(model.context)) observation = env.observe() model.feedforward(observation) env.do_action(target[0]) outputs_actions.append(model.action) return outputs_actions
def run_models_with_noise(models, noise): goal_list = [ "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream", "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag" ] all_action_targets_str = utils.flatten_onelevel( [tce.action_list[goal] for goal in goal_list]) all_action_targets = [ utils.str_to_onehot(action, tce.TeaCoffeeData.actions_list) for action in all_action_targets_str ] # Gather actual outputs all_outputs = [] for model in models: all_outputs.append(run_model_with_noise(model, noise)) # Now check which actions and which goals they got correctly num_bad_actions = np.zeros(len(all_action_targets)) for output in all_outputs: for i, action in enumerate(output): if (all_action_targets[i] != action).any(): num_bad_actions[i] += 1 print(all_action_targets_str[i], utils.onehot_to_str(action, tce.TeaCoffeeData.actions_list)) return num_bad_actions
def make_targets_ari(seq_ari, blanks): target0 = utils.str_to_onehot(seq_ari[1], task.output_symbols) target1 = utils.str_to_onehot(seq_ari[2], task.output_symbols) target2 = utils.str_to_onehot(seq_ari[3], task.output_symbols) target3 = utils.str_to_onehot(seq_ari[4], task.output_symbols) target4 = utils.str_to_onehot(seq_ari[5], task.output_symbols) target5 = utils.str_to_onehot(seq_ari[6], task.output_symbols) targets = [] if blanks: targets.append(task.Target(target0, task.goal_target_ari[0])) targets.append(task.Target(None, None)) targets.append(task.Target(target1, task.goal_target_ari[1])) targets.append(task.Target(None, None)) targets.append(task.Target(target2, task.goal_target_ari[2])) targets.append(task.Target(None, None)) targets.append(task.Target(target3, task.goal_target_ari[3])) targets.append(task.Target(None, None)) targets.append(task.Target(target4, task.goal_target_ari[4])) targets.append(task.Target(None, None)) targets.append(task.Target(target5, task.goal_target_ari[-1])) else: targets.append(task.Target(target0, task.goal_target_ari[0])) targets.append(task.Target(target1, task.goal_target_ari[1])) targets.append(task.Target(target2, task.goal_target_ari[2])) targets.append(task.Target(target3, task.goal_target_ari[3])) targets.append(task.Target(target4, task.goal_target_ari[4])) targets.append( task.Target(utils.str_to_onehot(seq_ari[6], task.output_symbols), task.goal_target_ari[-1])) return targets
def get_model_hidden_activations(model): env = tce.TeaCoffeeEnv() # Get the target actions and goals hidden = [] for goal in tce.goal_list: targets = tce.target_list[goal] model.context = np.random.uniform(0.01, 0.1, (1, model.size_hidden)) model.action = utils.str_to_onehot(targets[0][0], tce.TeaCoffeeData.actions_list) model.goal1 = utils.str_to_onehot(targets[0][1], tce.TeaCoffeeData.goals1_list) model.goal2 = utils.str_to_onehot(targets[0][2], tce.TeaCoffeeData.goals2_list) for i, target in enumerate(targets): observation = env.observe() model.feedforward(observation) hidden.append(model.context.numpy().flatten()) env.do_action(target[0]) return hidden
def make_targets_all(seq_bev, seq_ari): targets = [] targets.append(task.Target(utils.str_to_onehot(seq_bev[1], task.symbols))) targets.append(task.Target(None)) targets.append(task.Target(utils.str_to_onehot(seq_bev[2], task.symbols))) targets.append(task.Target(None)) targets.append(task.Target(utils.str_to_onehot(seq_bev[3], task.symbols))) targets.append(task.Target(None)) targets.append(task.Target(utils.str_to_onehot(seq_bev[4], task.symbols))) targets.append(task.Target(None)) targets.append(task.Target(utils.str_to_onehot(seq_bev[5], task.symbols))) targets.append(task.Target(None)) targets.append(task.Target(utils.str_to_onehot(seq_bev[6], task.symbols))) targets.append(task.Target(utils.str_to_onehot(seq_ari[6], task.symbols))) return targets
def make_targets_ari(seq_ari): targets = [] targets.append( task.Target(utils.str_to_onehot(seq_ari[0], task.output_symbols))) targets.append( task.Target(utils.str_to_onehot(seq_ari[0], task.output_symbols))) if seq_ari[1] == '+': interm = int(seq_ari[0]) + int(seq_ari[2]) else: interm = int(seq_ari[0]) - int(seq_ari[2]) targets.append( task.Target(utils.str_to_onehot(str(interm), task.output_symbols))) targets.append( task.Target(utils.str_to_onehot(str(interm), task.output_symbols))) targets.append( task.Target(utils.str_to_onehot(str(interm), task.output_symbols))) targets.append( task.Target(utils.str_to_onehot(seq_ari[6], task.output_symbols)) ) # Original = none none none except this line. return targets
def make_targets_bev(seq_bev): targets = [] for i in range(1, 7): targets.append( task.Target(utils.str_to_onehot(seq_bev[i], task.symbols))) return targets
def ff_bev(nnet, seq_bev): for i in range(6): nnet.feedforward(utils.str_to_onehot(seq_bev[i], task.symbols))
def ff_ari(nnet, seq_ari): for i in range(6): nnet.feedforward(utils.str_to_onehot(seq_ari[i], task.symbols))
def ff_all(nnet, seq_bev, seq_ari): nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[0], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[1], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[2], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[3], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[4], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[5], task.symbols))
def __init__(self, action, goal1=None): self.action_one_hot = action if isinstance(goal1, str): goal1 = utils.str_to_onehot(goal1, goal_symbols) self.goal1_one_hot = goal1 self.goal2_one_hot = None
def train_supervised_teacoffeeenv(model, environment, num_episodes): env = environment rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. rng_avg_goal1 = 0. rng_avg_goal2 = 0. rng_avg_action1 = 0. goal_list = [ "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream", "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag" ] for episode in range(num_episodes): goal = goal_list[episode % len(goal_list)] # Cycle through the goals targets = tce.target_list[goal] # Get the target actions and goals env.state = state.State(tce.TeaCoffeeData()) # Reinitialize the state env.state.current.set_field(goal, 1.) # Set the goalS as active # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. #model.context = np.zeros((1, model.size_hidden)) model.context = np.float32( np.random.uniform(0.01, 0.99, (1, model.size_hidden))) targets_onehot = [[], [], []] # actions, goal1s, goal2s for i, target in enumerate(targets): # Set up the input to be the correct actions and goals targets_onehot[0].append( utils.str_to_onehot(targets[i][0], tce.TeaCoffeeData.actions_list)) targets_onehot[1].append( utils.str_to_onehot(targets[i][1], tce.TeaCoffeeData.goals1_list)) targets_onehot[2].append( utils.str_to_onehot(targets[i][2], tce.TeaCoffeeData.goals2_list)) model.action, model.goal1, model.goal2 = [ targets_onehot[j][-1] for j in range(3) ] observation = env.observe() model.feedforward(observation) env.do_action( target[0] ) # Transition the MDP according to the *target* action, not the chosen action! # Get some statistics about what was correct and what wasn't ratios = evaluate( [model.h_action_wta, model.h_goal1_wta, model.h_goal2_wta], targets_onehot) rng_avg_action1 = utils.rolling_avg( rng_avg_action1, ratio_correct([model.h_action_wta[0]], [targets_onehot[0][0]]), 2. / (episode + 2) if episode < 1000 else 0.001) # Train model, record loss. loss = model.train_obsolete(targets_onehot[0], targets_onehot[1], targets_onehot[2], tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? rng_avg_goal1 = utils.rolling_avg(rng_avg_goal1, ratios[1], speed) rng_avg_goal2 = utils.rolling_avg(rng_avg_goal2, ratios[2], speed) # Display on the console at regular intervals if (episode < 1000 and episode in [3**n for n in range(50)]) or episode % 1000 == 0 \ or episode+1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}\tgoal1={4}\tgoal2={5}\tfirst_action={6}" .format(episode, rng_avg_loss, rng_avg_actions, rng_avg_goals, rng_avg_goal1, rng_avg_goal2, rng_avg_action1))
def make_targets_all(seq_bev, seq_ari, start): target0 = utils.str_to_onehot(seq_ari[1], task.output_symbols) target1 = utils.str_to_onehot(seq_ari[2], task.output_symbols) target2 = utils.str_to_onehot(seq_ari[3], task.output_symbols) target3 = utils.str_to_onehot(seq_ari[4], task.output_symbols) target4 = utils.str_to_onehot(seq_ari[5], task.output_symbols) target5 = utils.str_to_onehot(seq_ari[6], task.output_symbols) targets = [] if start == task.START_BEV: targets.append( task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols), task.goal_target_ari[0])) targets.append(task.Target(target0, task.goal_target_bev[0])) targets.append( task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols), task.goal_target_ari[1])) targets.append(task.Target(target1, task.goal_target_bev[1])) targets.append( task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols), task.goal_target_ari[2])) targets.append(task.Target(target2, task.goal_target_bev[2])) targets.append( task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols), task.goal_target_ari[3])) targets.append(task.Target(target3, task.goal_target_bev[3])) targets.append( task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols), task.goal_target_ari[4])) targets.append(task.Target(target4, task.goal_target_bev[4])) targets.append( task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols), task.goal_target_ari[5])) targets.append(task.Target(target5, task.goal_target_bev[5])) elif start == task.START_ARI: targets.append(task.Target(target0, task.goal_target_bev[0])) targets.append( task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols), task.goal_target_ari[0])) targets.append(task.Target(target1, task.goal_target_bev[1])) targets.append( task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols), task.goal_target_ari[1])) targets.append(task.Target(target2, task.goal_target_bev[2])) targets.append( task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols), task.goal_target_ari[2])) targets.append(task.Target(target3, task.goal_target_bev[3])) targets.append( task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols), task.goal_target_ari[3])) targets.append(task.Target(target4, task.goal_target_bev[4])) targets.append( task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols), task.goal_target_ari[4])) targets.append(task.Target(target5, task.goal_target_bev[5])) targets.append( task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols), task.goal_target_ari[5])) return targets
def ff_all(nnet, seq_bev, seq_ari, start): if start == task.START_BEV: nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[0], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[1], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[2], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[3], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[4], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[5], task.input_symbols)) elif start == task.START_ARI: nnet.feedforward(utils.str_to_onehot(seq_ari[0], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[1], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[2], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[3], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[4], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_ari[5], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols)) else: raise NotImplementedError("only starts are ari and bev")
def make_targets_bev(seq_bev, blanks): targets = [] if blanks: targets.append( task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols), None)) targets.append(task.Target(None, task.goal_target_bev[0])) targets.append( task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols), None)) targets.append(task.Target(None, task.goal_target_bev[1])) targets.append( task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols), None)) targets.append(task.Target(None, task.goal_target_bev[2])) targets.append( task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols), None)) targets.append(task.Target(None, task.goal_target_bev[3])) targets.append( task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols), None)) targets.append(task.Target(None, task.goal_target_bev[4])) targets.append( task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols), None)) else: targets.append( task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols), task.goal_target_bev[0])) targets.append( task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols), task.goal_target_bev[1])) targets.append( task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols), task.goal_target_bev[2])) targets.append( task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols), task.goal_target_bev[3])) targets.append( task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols), task.goal_target_bev[4])) targets.append( task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols), None)) return targets
def ff_bev(nnet, seq_bev, blanks): if blanks: zeros = np.zeros_like( utils.str_to_onehot(seq_bev[0], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols)) nnet.feedforward(zeros) nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols)) nnet.feedforward(zeros) nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols)) nnet.feedforward(zeros) nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols)) nnet.feedforward(zeros) nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols)) nnet.feedforward(zeros) nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols)) else: nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols)) nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols))