Beispiel #1
0
def train_and_save(num_models, name, num_episodes):
    for i in range(num_models):
        model = nn.ElmanGoalNet()
        train_supervised_teacoffeeenv(model, num_episodes)
        utils.save_object(name, model)
        print('Trained and saved model #{0} of {1}\n'.format(
            i + 1, num_models))
Beispiel #2
0
def train_with_goals(noise=0, iterations=10000, learning_rate=0.1):
    model = nn.ElmanGoalNet(size_hidden=15,
                            size_observation=9,
                            size_action=8,
                            size_goal1=2,
                            size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = 0.

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(
            pnas2018task.sequence_probabilities)

        goal = pnas2018task.goals[seqid]
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                # Add noise
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            loss, _ = model.train_obsolete(targets, goal, None, tape)
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model
Beispiel #3
0
def train(model=None, noise=0., iterations=5000, l1reg=0.0, l2reg= 0.0, algorithm=nn.SGD,
          size_hidden=15, learning_rate=None, loss_type='cross_entropy',
          initial_context=pnas2018.ZEROS):
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=size_hidden, size_observation=len(rewardtask.all_inputs),
                                size_action=len(rewardtask.all_outputs), size_goal1=0, size_goal2=0,
                                algorithm=algorithm, initialization="normal")
    num_episodes = iterations
    if learning_rate is not None:  # Else keep the model's learning rate
        model.learning_rate = learning_rate
    model.L1_regularization = l1reg
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_sequence = 0.

    for episode in range(num_episodes):
        model.new_episode(initial_context=initial_context)
        seqid = utils.idx_from_probabilities(rewardtask.sequence_probabilities)

        sequence = rewardtask.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs)
        # run the network
        with tf.GradientTape(persistent=True) as tape:
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            #if episode % 2 == 0:
                # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape((-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            # Train model, record loss.
            if loss_type==pnas2018.MSE:
                loss, _ = model.train_MSE(targets, None, None, tape)
            elif loss_type==pnas2018.CROSS_ENTROPY:
                loss, _ = model.train_obsolete(targets, None, None, tape)
            else:
                loss, _ = model.train(tape, targets)
        del tape

        #if episode % 2 == 0:
            # Monitor progress using rolling averages.
        speed = 2. / (episode + 2) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_sequence = utils.rolling_avg(rng_avg_sequence, ratios[0] == 1,
                                          speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_sequence))
    return model, rng_avg_sequence
Beispiel #4
0
def run_model1_ari():
    # ARI #
    num_training_steps = 10000
    nnet = nn.ElmanGoalNet(size_hidden=15,
                           initialization=nn.UNIFORM,
                           size_goal1=0,
                           size_goal2=0,
                           size_observation=len(task.symbols),
                           size_action=len(task.symbols),
                           learning_rate=0.01,
                           algorithm=nn.ADAM)
    nnet.L2_regularization = 0.00001
    train_ari(nnet, num_training_steps)
    utils.save_object("cogloadtasknet_ari", nnet)
    nnet = utils.load_object("cogloadtasknet_ari")
    generate_rdm_ari(nnet, name="cogloadtasknet_ari")
Beispiel #5
0
def train_with_goals(model=None,
                     mse=False,
                     learning_rate=0.1,
                     noise=0.,
                     iterations=5000,
                     l2reg=0.0,
                     algorithm=nn.SGD,
                     hidden_units=15,
                     reg_strength=0.,
                     reg_increase="square"):
    num_goals = 2
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=hidden_units,
                                algorithm=algorithm,
                                size_observation=len(all_inputs),
                                size_action=len(all_inputs),
                                size_goal1=num_goals,
                                size_goal2=0)
    num_episodes = iterations
    model.learning_rate = 0.5 if mse else learning_rate
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        decider = np.random.uniform()
        if decider < 0.6:
            seqid = 0
        elif decider < 0.8:
            seqid = 1
        else:
            seqid = 2

        sequence = seqs[seqid]
        goal = goals[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], all_outputs)
        targets_goal1 = goal
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])

            cols = model.size_hidden
            # Regularization in the hidden layer weights
            # Recurrent hidden to hidden connections
            extra_loss = pnashierarchy.weight_regularization_calculator(
                model.hidden_layer.w, [0, model.size_hidden], [0, cols],
                reg_strength,
                reg_type="recurrent",
                reg_increase=reg_increase)
            # Prev action to hidden
            # extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                               [model.size_hidden+9, model.size_hidden+9+model.size_action],
            #                                               [0, cols],
            #                                               reg_strength, reg_type="input_right", reg_increase=reg_increase)
            # Prev goal to hidden
            extra_loss += pnashierarchy.weight_regularization_calculator(
                model.hidden_layer.w, [
                    model.size_hidden + 9 + model.size_action,
                    model.size_hidden + 9 + model.size_action + num_goals
                ], [0, cols],
                reg_strength,
                reg_type="input_left",
                reg_increase=reg_increase)

            # SWITCHED OUTPUT LEFT AND OUTPUT RIGHT.
            #Regularization in the output layers (goals and actions) weights
            # hidden to next action
            extra_loss += pnashierarchy.weight_regularization_calculator(
                model.action_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_right",
                reg_increase=reg_increase)
            # Hidden to next goal
            extra_loss += pnashierarchy.weight_regularization_calculator(
                model.goal1_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_left",
                reg_increase=reg_increase)

            # Regularization of the observation (only goes to the action side)
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden, model.size_hidden+model.size_observation],
            #                                                     [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)

            loss, _ = model.train_obsolete(targets, goal, None, tape,
                                           extra_loss)

            # Train model, record loss.
            #if mse:
            #    loss = model.train_MSE(targets, None, None, tape)
            #else:
            #    loss, gradients = model.train_obsolete(targets, targets_goal1, None, tape)
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model
Beispiel #6
0
def train(model=None,
          mse=False,
          noise=0.,
          iterations=5000,
          l2reg=0.0,
          learning_rate=0.1,
          algorithm=nn.SGD,
          hidden_units=15):
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=hidden_units,
                                algorithm=algorithm,
                                size_observation=len(all_inputs),
                                size_action=len(all_inputs),
                                size_goal1=0,
                                size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_full_seq = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(sequence_probabilities)
        sequence = seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)

        # run the network
        # Initialize context with random/uniform values.
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])

            # Train model, record loss.
            if mse:
                loss, gradients = model.train_MSE(targets, None, None, tape)
            else:
                loss, gradients = model.train_obsolete(targets, None, None,
                                                       tape)

        # Monitor progress using averages
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_full_seq = utils.rolling_avg(
            rng_avg_full_seq, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            grad_avg = sum([
                np.sum(tf.reduce_sum(tf.abs(gradient)).numpy())
                for gradient in gradients
            ]) / sum([tf.size(gradient).numpy() for gradient in gradients])
            grad_max = max([
                np.max(tf.reduce_max(tf.abs(gradient)).numpy())
                for gradient in gradients
            ])
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tgrad_avg={4}, \tgrad_max={5}"
                .format(episode, rng_avg_loss, rng_avg_actions,
                        rng_avg_full_seq, grad_avg, grad_max))

    return model
Beispiel #7
0
def run_model3_multiple(stopping_params,
                        nnparams,
                        blanks,
                        from_file=None,
                        num_networks=1,
                        name="model3",
                        hrp=None):
    if from_file is not None:
        networks = utils.load_objects(from_file, num_networks)
    else:
        networks = []
        for i in range(num_networks):
            nnparams.size_goal1 = 2
            nnparams.size_action = len(task.output_symbols)
            nnparams.size_observation = len(task.input_symbols)
            nnet = nn.ElmanGoalNet(params=nnparams)
            train_all(stopping_params, nnet, hrp=hrp, blanks=blanks)
            utils.save_object(name, nnet)
            networks.append(nnet)
            # Print some stuff
            hidden_activation, accuracy_totals, accuracy_fullseqs = test_network_all(
                nnet)
            print("network {0}: ")
            print(accuracy_totals)
            print(accuracy_fullseqs)

    # pattern of sequences, for the mds
    pattern = [6] * 4 + [6] * 4 + [12] * 4
    if hrp is None:
        sum_rdm = None
        labels = None
        for net in networks:
            rdm, labels = generate_rdm_all(net, name=name, from_file=False)
            if sum_rdm is None:
                sum_rdm = rdm
            else:
                sum_rdm += rdm
        average_rdm = sum_rdm / num_networks

        # Save it
        utils.save_rdm(average_rdm,
                       name,
                       labels,
                       title="RDM training combined")
        analysis.make_mds(average_rdm,
                          name,
                          labels=labels,
                          title="MDS training combined",
                          pattern=pattern)
    else:
        sum_rdm_left = sum_rdm_right = None
        labels = None
        for net in networks:
            rdmleft, rdmright, labels = generate_rdm_all_gradient(
                net,
                name=name,
                blanks=blanks,
                from_file=False,
                delete_blank_states=True)
            if sum_rdm_left is None:
                sum_rdm_left = rdmleft
                sum_rdm_right = rdmright
            else:
                sum_rdm_left += rdmleft
                sum_rdm_right += rdmright
        average_rdm_left = sum_rdm_left / num_networks
        average_rdm_right = sum_rdm_right / num_networks

        utils.save_rdm(average_rdm_left,
                       name + "left",
                       labels,
                       title="RDM training combined: left (goals)",
                       fontsize=1.)
        utils.save_rdm(average_rdm_right,
                       name + "right",
                       labels,
                       title="RDM training combined: right (actions)",
                       fontsize=1.)
        analysis.make_mds(average_rdm_left,
                          name + "left",
                          labels=labels,
                          title="MDS training combined: left (goals)",
                          pattern=pattern)
        analysis.make_mds(average_rdm_right,
                          name + "right",
                          labels=labels,
                          title="MDS training combined: right (actions)",
                          pattern=pattern)
Beispiel #8
0
def train_hierarchical_nogoals(noise=0,
                               iterations=10000,
                               learning_rate=0.1,
                               reg_strength=0.001,
                               reg_increase="linear"):
    model = nn.ElmanGoalNet(size_hidden=15,
                            size_observation=9,
                            size_action=8,
                            size_goal1=0,
                            size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = 0.

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        model.new_episode()
        seqid = utils.idx_from_probabilities(
            pnas2018task.sequence_probabilities)

        #goal = pnas2018task.goals[seqid]
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            #model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            #model.goal1 = np.zeros_like(goal[0])
            for i in range(len(targets)):
                #model.action = np.zeros((1, model.size_action), dtype=np.float32)
                # Add noise
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            # Train model, record loss.
            cols = model.size_hidden
            # Regularization in the hidden layer weights
            # Recurrent hidden to hidden connections
            extra_loss = utils.weight_regularization_calculator(
                model.hidden_layer.w, [0, model.size_hidden], [0, cols],
                reg_strength,
                reg_type="recurrent",
                reg_increase=reg_increase)
            # Prev action to hidden
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden+9, model.size_hidden+9+model.size_action], [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)
            # Prev goal to hidden
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden+9+model.size_action, model.size_hidden+9+model.size_action+2], [0, cols],
            #                                                     reg_strength, reg_type="input_left", reg_increase=reg_increase)

            #Regularization in the output layers (goals and actions) weights
            # hidden to next action
            extra_loss += utils.weight_regularization_calculator(
                model.action_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_right",
                reg_increase=reg_increase)

            # Hidden to next goal
            #extra_loss += weight_regularization_calculator(model.goal1_layer.w,
            #                                                    [0, model.size_hidden], [0, model.size_action],
            #                                                     reg_strength, reg_type="output_left", reg_increase=reg_increase)

            # Regularization of the observation (only goes to the action side)
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden, model.size_hidden+model.size_observation],
            #                                                     [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)

            loss, _ = model.train_obsolete(targets, None, None, tape,
                                           extra_loss)
            #if(episode%100 == 0):
            #    print(loss.numpy()-extra_loss.numpy(), extra_loss.numpy())
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model