Ejemplo n.º 1
0
def run_model_with_noise(model, noise_level):
    env = tce.TeaCoffeeEnv()
    # Get the target actions and goals
    outputs_actions = []
    goal_list = [
        "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream",
        "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag"
    ]
    for goal in goal_list:
        targets = tce.target_list[goal]
        env.state = state.State(tce.TeaCoffeeData())  # Reinitialize the state
        env.state.current.set_field(goal, 1.)  # Set the goalS as active
        model.context = np.random.uniform(0.01, 0.1, (1, model.size_hidden))
        for i, target in enumerate(targets):
            model.action = utils.str_to_onehot(targets[i][0],
                                               tce.TeaCoffeeData.actions_list)
            model.goal1 = utils.str_to_onehot(targets[i][1],
                                              tce.TeaCoffeeData.goals1_list)
            model.goal2 = utils.str_to_onehot(targets[i][2],
                                              tce.TeaCoffeeData.goals2_list)
            model.context += noise_level * np.random.normal(
                size=tf.shape(model.context))
            observation = env.observe()
            model.feedforward(observation)
            env.do_action(target[0])
            outputs_actions.append(model.action)
    return outputs_actions
Ejemplo n.º 2
0
def run_models_with_noise(models, noise):
    goal_list = [
        "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream",
        "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag"
    ]
    all_action_targets_str = utils.flatten_onelevel(
        [tce.action_list[goal] for goal in goal_list])
    all_action_targets = [
        utils.str_to_onehot(action, tce.TeaCoffeeData.actions_list)
        for action in all_action_targets_str
    ]

    # Gather actual outputs
    all_outputs = []
    for model in models:
        all_outputs.append(run_model_with_noise(model, noise))
    # Now check which actions and which goals they got correctly
    num_bad_actions = np.zeros(len(all_action_targets))
    for output in all_outputs:
        for i, action in enumerate(output):
            if (all_action_targets[i] != action).any():
                num_bad_actions[i] += 1
            print(all_action_targets_str[i],
                  utils.onehot_to_str(action, tce.TeaCoffeeData.actions_list))

    return num_bad_actions
Ejemplo n.º 3
0
def make_targets_ari(seq_ari, blanks):
    target0 = utils.str_to_onehot(seq_ari[1], task.output_symbols)
    target1 = utils.str_to_onehot(seq_ari[2], task.output_symbols)
    target2 = utils.str_to_onehot(seq_ari[3], task.output_symbols)
    target3 = utils.str_to_onehot(seq_ari[4], task.output_symbols)
    target4 = utils.str_to_onehot(seq_ari[5], task.output_symbols)
    target5 = utils.str_to_onehot(seq_ari[6], task.output_symbols)

    targets = []
    if blanks:
        targets.append(task.Target(target0, task.goal_target_ari[0]))
        targets.append(task.Target(None, None))
        targets.append(task.Target(target1, task.goal_target_ari[1]))
        targets.append(task.Target(None, None))
        targets.append(task.Target(target2, task.goal_target_ari[2]))
        targets.append(task.Target(None, None))
        targets.append(task.Target(target3, task.goal_target_ari[3]))
        targets.append(task.Target(None, None))
        targets.append(task.Target(target4, task.goal_target_ari[4]))
        targets.append(task.Target(None, None))
        targets.append(task.Target(target5, task.goal_target_ari[-1]))
    else:
        targets.append(task.Target(target0, task.goal_target_ari[0]))
        targets.append(task.Target(target1, task.goal_target_ari[1]))
        targets.append(task.Target(target2, task.goal_target_ari[2]))
        targets.append(task.Target(target3, task.goal_target_ari[3]))
        targets.append(task.Target(target4, task.goal_target_ari[4]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_ari[6], task.output_symbols),
                        task.goal_target_ari[-1]))
    return targets
Ejemplo n.º 4
0
def get_model_hidden_activations(model):
    env = tce.TeaCoffeeEnv()
    # Get the target actions and goals
    hidden = []
    for goal in tce.goal_list:
        targets = tce.target_list[goal]
        model.context = np.random.uniform(0.01, 0.1, (1, model.size_hidden))
        model.action = utils.str_to_onehot(targets[0][0],
                                           tce.TeaCoffeeData.actions_list)
        model.goal1 = utils.str_to_onehot(targets[0][1],
                                          tce.TeaCoffeeData.goals1_list)
        model.goal2 = utils.str_to_onehot(targets[0][2],
                                          tce.TeaCoffeeData.goals2_list)
        for i, target in enumerate(targets):
            observation = env.observe()
            model.feedforward(observation)
            hidden.append(model.context.numpy().flatten())
            env.do_action(target[0])
    return hidden
Ejemplo n.º 5
0
def make_targets_all(seq_bev, seq_ari):
    targets = []
    targets.append(task.Target(utils.str_to_onehot(seq_bev[1], task.symbols)))
    targets.append(task.Target(None))
    targets.append(task.Target(utils.str_to_onehot(seq_bev[2], task.symbols)))
    targets.append(task.Target(None))
    targets.append(task.Target(utils.str_to_onehot(seq_bev[3], task.symbols)))
    targets.append(task.Target(None))
    targets.append(task.Target(utils.str_to_onehot(seq_bev[4], task.symbols)))
    targets.append(task.Target(None))
    targets.append(task.Target(utils.str_to_onehot(seq_bev[5], task.symbols)))
    targets.append(task.Target(None))
    targets.append(task.Target(utils.str_to_onehot(seq_bev[6], task.symbols)))
    targets.append(task.Target(utils.str_to_onehot(seq_ari[6], task.symbols)))
    return targets
Ejemplo n.º 6
0
def make_targets_ari(seq_ari):
    targets = []
    targets.append(
        task.Target(utils.str_to_onehot(seq_ari[0], task.output_symbols)))
    targets.append(
        task.Target(utils.str_to_onehot(seq_ari[0], task.output_symbols)))
    if seq_ari[1] == '+':
        interm = int(seq_ari[0]) + int(seq_ari[2])
    else:
        interm = int(seq_ari[0]) - int(seq_ari[2])
    targets.append(
        task.Target(utils.str_to_onehot(str(interm), task.output_symbols)))
    targets.append(
        task.Target(utils.str_to_onehot(str(interm), task.output_symbols)))
    targets.append(
        task.Target(utils.str_to_onehot(str(interm), task.output_symbols)))
    targets.append(
        task.Target(utils.str_to_onehot(seq_ari[6], task.output_symbols))
    )  # Original = none none none except this line.
    return targets
Ejemplo n.º 7
0
def make_targets_bev(seq_bev):
    targets = []
    for i in range(1, 7):
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[i], task.symbols)))
    return targets
Ejemplo n.º 8
0
def ff_bev(nnet, seq_bev):
    for i in range(6):
        nnet.feedforward(utils.str_to_onehot(seq_bev[i], task.symbols))
Ejemplo n.º 9
0
def ff_ari(nnet, seq_ari):
    for i in range(6):
        nnet.feedforward(utils.str_to_onehot(seq_ari[i], task.symbols))
Ejemplo n.º 10
0
def ff_all(nnet, seq_bev, seq_ari):
    nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_ari[0], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_ari[1], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_ari[2], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_ari[3], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_ari[4], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.symbols))
    nnet.feedforward(utils.str_to_onehot(seq_ari[5], task.symbols))
Ejemplo n.º 11
0
 def __init__(self, action, goal1=None):
     self.action_one_hot = action
     if isinstance(goal1, str):
         goal1 = utils.str_to_onehot(goal1, goal_symbols)
     self.goal1_one_hot = goal1
     self.goal2_one_hot = None
Ejemplo n.º 12
0
def train_supervised_teacoffeeenv(model, environment, num_episodes):
    env = environment

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.
    rng_avg_goal1 = 0.
    rng_avg_goal2 = 0.
    rng_avg_action1 = 0.

    goal_list = [
        "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream",
        "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag"
    ]
    for episode in range(num_episodes):
        goal = goal_list[episode % len(goal_list)]  # Cycle through the goals
        targets = tce.target_list[goal]  # Get the target actions and goals
        env.state = state.State(tce.TeaCoffeeData())  # Reinitialize the state
        env.state.current.set_field(goal, 1.)  # Set the goalS as active

        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            #model.context = np.zeros((1, model.size_hidden))
            model.context = np.float32(
                np.random.uniform(0.01, 0.99, (1, model.size_hidden)))
            targets_onehot = [[], [], []]  # actions, goal1s, goal2s
            for i, target in enumerate(targets):
                # Set up the input to be the correct actions and goals
                targets_onehot[0].append(
                    utils.str_to_onehot(targets[i][0],
                                        tce.TeaCoffeeData.actions_list))
                targets_onehot[1].append(
                    utils.str_to_onehot(targets[i][1],
                                        tce.TeaCoffeeData.goals1_list))
                targets_onehot[2].append(
                    utils.str_to_onehot(targets[i][2],
                                        tce.TeaCoffeeData.goals2_list))
                model.action, model.goal1, model.goal2 = [
                    targets_onehot[j][-1] for j in range(3)
                ]

                observation = env.observe()
                model.feedforward(observation)
                env.do_action(
                    target[0]
                )  # Transition the MDP according to the *target* action, not the chosen action!

            # Get some statistics about what was correct and what wasn't
            ratios = evaluate(
                [model.h_action_wta, model.h_goal1_wta, model.h_goal2_wta],
                targets_onehot)
            rng_avg_action1 = utils.rolling_avg(
                rng_avg_action1,
                ratio_correct([model.h_action_wta[0]], [targets_onehot[0][0]]),
                2. / (episode + 2) if episode < 1000 else 0.001)
            # Train model, record loss.
            loss = model.train_obsolete(targets_onehot[0], targets_onehot[1],
                                        targets_onehot[2], tape)

            # Monitor progress using rolling averages.
            speed = 2. / (
                episode + 2
            ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
            rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
            rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0],
                                                speed)
            rng_avg_goals = utils.rolling_avg(
                rng_avg_goals, ratios[0] == 1,
                speed)  # whole action sequence correct ?
            rng_avg_goal1 = utils.rolling_avg(rng_avg_goal1, ratios[1], speed)
            rng_avg_goal2 = utils.rolling_avg(rng_avg_goal2, ratios[2], speed)
            # Display on the console at regular intervals
            if (episode < 1000 and episode in [3**n for n in range(50)]) or episode % 1000 == 0 \
                               or episode+1 == num_episodes:
                print(
                    "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}\tgoal1={4}\tgoal2={5}\tfirst_action={6}"
                    .format(episode, rng_avg_loss, rng_avg_actions,
                            rng_avg_goals, rng_avg_goal1, rng_avg_goal2,
                            rng_avg_action1))
Ejemplo n.º 13
0
def make_targets_all(seq_bev, seq_ari, start):
    target0 = utils.str_to_onehot(seq_ari[1], task.output_symbols)
    target1 = utils.str_to_onehot(seq_ari[2], task.output_symbols)
    target2 = utils.str_to_onehot(seq_ari[3], task.output_symbols)
    target3 = utils.str_to_onehot(seq_ari[4], task.output_symbols)
    target4 = utils.str_to_onehot(seq_ari[5], task.output_symbols)
    target5 = utils.str_to_onehot(seq_ari[6], task.output_symbols)

    targets = []
    if start == task.START_BEV:
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols),
                        task.goal_target_ari[0]))
        targets.append(task.Target(target0, task.goal_target_bev[0]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols),
                        task.goal_target_ari[1]))
        targets.append(task.Target(target1, task.goal_target_bev[1]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols),
                        task.goal_target_ari[2]))
        targets.append(task.Target(target2, task.goal_target_bev[2]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols),
                        task.goal_target_ari[3]))
        targets.append(task.Target(target3, task.goal_target_bev[3]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols),
                        task.goal_target_ari[4]))
        targets.append(task.Target(target4, task.goal_target_bev[4]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols),
                        task.goal_target_ari[5]))
        targets.append(task.Target(target5, task.goal_target_bev[5]))
    elif start == task.START_ARI:
        targets.append(task.Target(target0, task.goal_target_bev[0]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols),
                        task.goal_target_ari[0]))
        targets.append(task.Target(target1, task.goal_target_bev[1]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols),
                        task.goal_target_ari[1]))
        targets.append(task.Target(target2, task.goal_target_bev[2]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols),
                        task.goal_target_ari[2]))
        targets.append(task.Target(target3, task.goal_target_bev[3]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols),
                        task.goal_target_ari[3]))
        targets.append(task.Target(target4, task.goal_target_bev[4]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols),
                        task.goal_target_ari[4]))
        targets.append(task.Target(target5, task.goal_target_bev[5]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols),
                        task.goal_target_ari[5]))
    return targets
Ejemplo n.º 14
0
def ff_all(nnet, seq_bev, seq_ari, start):
    if start == task.START_BEV:
        nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[0], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[1], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[2], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[3], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[4], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[5], task.input_symbols))
    elif start == task.START_ARI:
        nnet.feedforward(utils.str_to_onehot(seq_ari[0], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[1], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[2], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[3], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[4], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_ari[5], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols))
    else:
        raise NotImplementedError("only starts are ari and bev")
Ejemplo n.º 15
0
def make_targets_bev(seq_bev, blanks):
    targets = []
    if blanks:
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols),
                        None))
        targets.append(task.Target(None, task.goal_target_bev[0]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols),
                        None))
        targets.append(task.Target(None, task.goal_target_bev[1]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols),
                        None))
        targets.append(task.Target(None, task.goal_target_bev[2]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols),
                        None))
        targets.append(task.Target(None, task.goal_target_bev[3]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols),
                        None))
        targets.append(task.Target(None, task.goal_target_bev[4]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols),
                        None))
    else:
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[1], task.output_symbols),
                        task.goal_target_bev[0]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[2], task.output_symbols),
                        task.goal_target_bev[1]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[3], task.output_symbols),
                        task.goal_target_bev[2]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[4], task.output_symbols),
                        task.goal_target_bev[3]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[5], task.output_symbols),
                        task.goal_target_bev[4]))
        targets.append(
            task.Target(utils.str_to_onehot(seq_bev[6], task.output_symbols),
                        None))
    return targets
Ejemplo n.º 16
0
def ff_bev(nnet, seq_bev, blanks):
    if blanks:
        zeros = np.zeros_like(
            utils.str_to_onehot(seq_bev[0], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols))
        nnet.feedforward(zeros)
        nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols))
        nnet.feedforward(zeros)
        nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols))
        nnet.feedforward(zeros)
        nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols))
        nnet.feedforward(zeros)
        nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols))
        nnet.feedforward(zeros)
        nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols))
    else:
        nnet.feedforward(utils.str_to_onehot(seq_bev[0], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[1], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[2], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[3], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[4], task.input_symbols))
        nnet.feedforward(utils.str_to_onehot(seq_bev[5], task.input_symbols))