def train_predictive_net(model=None, iterations=5000, learning_rate=0.1, algorithm=nn.RMSPROP, hidden_units=15): if model is None: model = nn.PredictiveNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(pnas2018task.all_inputs), size_action=len(pnas2018task.all_outputs)) num_episodes = iterations model.learning_rate = learning_rate rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_full_seq = 0. rng_avg_preds = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities(pnas2018task.sequence_probabilities) sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) action_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) prediction_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_inputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) model.prediction_linear = np.zeros((1, model.size_observation), dtype=np.float32) #initial prediction = 0 # run the network # Initialize context with random/uniform values. with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) for i in range(len(action_targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) #model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape((-1, len(action_targets[0]))) # reshape to (x, 8) ratios = scripts.evaluate([tchoices], [action_targets]) tpreds = np.array(model.h_prediction_wta).reshape((-1, len(prediction_targets[0]))) ratios_predictions = scripts.evaluate([tpreds], [prediction_targets]) # Train model, record loss. NOTE: targets and predictions are identical for this task!!! loss, gradients = model.train(tape, [action_targets, prediction_targets]) # Monitor progress using rolling averages. speed = 2. / (episode + 2) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_preds = utils.rolling_avg(rng_avg_preds, ratios_predictions[0], speed) rng_avg_full_seq = utils.rolling_avg(rng_avg_full_seq, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: grad_avg = sum([np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients])/sum([tf.size(gradient).numpy() for gradient in gradients]) grad_max = max([np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients]) print("{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tpredictions={4}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, rng_avg_preds)) return model
def train_with_goals(noise=0, iterations=10000, learning_rate=0.1): model = nn.ElmanGoalNet(size_hidden=15, size_observation=9, size_action=8, size_goal1=2, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = 0. rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities( pnas2018task.sequence_probabilities) goal = pnas2018task.goals[seqid] sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) # Add noise model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) loss, _ = model.train_obsolete(targets, goal, None, tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model
def train(model=None, noise=0., iterations=5000, l1reg=0.0, l2reg= 0.0, algorithm=nn.SGD, size_hidden=15, learning_rate=None, loss_type='cross_entropy', initial_context=pnas2018.ZEROS): if model is None: model = nn.ElmanGoalNet(size_hidden=size_hidden, size_observation=len(rewardtask.all_inputs), size_action=len(rewardtask.all_outputs), size_goal1=0, size_goal2=0, algorithm=algorithm, initialization="normal") num_episodes = iterations if learning_rate is not None: # Else keep the model's learning rate model.learning_rate = learning_rate model.L1_regularization = l1reg model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_sequence = 0. for episode in range(num_episodes): model.new_episode(initial_context=initial_context) seqid = utils.idx_from_probabilities(rewardtask.sequence_probabilities) sequence = rewardtask.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs) # run the network with tf.GradientTape(persistent=True) as tape: for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) #if episode % 2 == 0: # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape((-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. if loss_type==pnas2018.MSE: loss, _ = model.train_MSE(targets, None, None, tape) elif loss_type==pnas2018.CROSS_ENTROPY: loss, _ = model.train_obsolete(targets, None, None, tape) else: loss, _ = model.train(tape, targets) del tape #if episode % 2 == 0: # Monitor progress using rolling averages. speed = 2. / (episode + 2) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_sequence = utils.rolling_avg(rng_avg_sequence, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_sequence)) return model, rng_avg_sequence
def train_with_goals(model=None, mse=False, learning_rate=0.1, noise=0., iterations=5000, l2reg=0.0, algorithm=nn.SGD, hidden_units=15, reg_strength=0., reg_increase="square"): num_goals = 2 if model is None: model = nn.ElmanGoalNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(all_inputs), size_action=len(all_inputs), size_goal1=num_goals, size_goal2=0) num_episodes = iterations model.learning_rate = 0.5 if mse else learning_rate model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): decider = np.random.uniform() if decider < 0.6: seqid = 0 elif decider < 0.8: seqid = 1 else: seqid = 2 sequence = seqs[seqid] goal = goals[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) targets_goal1 = goal model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) cols = model.size_hidden # Regularization in the hidden layer weights # Recurrent hidden to hidden connections extra_loss = pnashierarchy.weight_regularization_calculator( model.hidden_layer.w, [0, model.size_hidden], [0, cols], reg_strength, reg_type="recurrent", reg_increase=reg_increase) # Prev action to hidden # extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9, model.size_hidden+9+model.size_action], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) # Prev goal to hidden extra_loss += pnashierarchy.weight_regularization_calculator( model.hidden_layer.w, [ model.size_hidden + 9 + model.size_action, model.size_hidden + 9 + model.size_action + num_goals ], [0, cols], reg_strength, reg_type="input_left", reg_increase=reg_increase) # SWITCHED OUTPUT LEFT AND OUTPUT RIGHT. #Regularization in the output layers (goals and actions) weights # hidden to next action extra_loss += pnashierarchy.weight_regularization_calculator( model.action_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_right", reg_increase=reg_increase) # Hidden to next goal extra_loss += pnashierarchy.weight_regularization_calculator( model.goal1_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_left", reg_increase=reg_increase) # Regularization of the observation (only goes to the action side) #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden, model.size_hidden+model.size_observation], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) loss, _ = model.train_obsolete(targets, goal, None, tape, extra_loss) # Train model, record loss. #if mse: # loss = model.train_MSE(targets, None, None, tape) #else: # loss, gradients = model.train_obsolete(targets, targets_goal1, None, tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model
def train(model=None, mse=False, noise=0., iterations=5000, l2reg=0.0, learning_rate=0.1, algorithm=nn.SGD, hidden_units=15): if model is None: model = nn.ElmanGoalNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(all_inputs), size_action=len(all_inputs), size_goal1=0, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_full_seq = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities(sequence_probabilities) sequence = seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network # Initialize context with random/uniform values. with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. if mse: loss, gradients = model.train_MSE(targets, None, None, tape) else: loss, gradients = model.train_obsolete(targets, None, None, tape) # Monitor progress using averages speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_full_seq = utils.rolling_avg( rng_avg_full_seq, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: grad_avg = sum([ np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients ]) / sum([tf.size(gradient).numpy() for gradient in gradients]) grad_max = max([ np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients ]) print( "{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tgrad_avg={4}, \tgrad_max={5}" .format(episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, grad_avg, grad_max)) return model
def train_hierarchical_nogoals(noise=0, iterations=10000, learning_rate=0.1, reg_strength=0.001, reg_increase="linear"): model = nn.ElmanGoalNet(size_hidden=15, size_observation=9, size_action=8, size_goal1=0, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = 0. rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): model.new_episode() seqid = utils.idx_from_probabilities( pnas2018task.sequence_probabilities) #goal = pnas2018task.goals[seqid] sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. #model.context = np.zeros((1, model.size_hidden), dtype=np.float32) #model.goal1 = np.zeros_like(goal[0]) for i in range(len(targets)): #model.action = np.zeros((1, model.size_action), dtype=np.float32) # Add noise model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. cols = model.size_hidden # Regularization in the hidden layer weights # Recurrent hidden to hidden connections extra_loss = utils.weight_regularization_calculator( model.hidden_layer.w, [0, model.size_hidden], [0, cols], reg_strength, reg_type="recurrent", reg_increase=reg_increase) # Prev action to hidden #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9, model.size_hidden+9+model.size_action], [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) # Prev goal to hidden #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9+model.size_action, model.size_hidden+9+model.size_action+2], [0, cols], # reg_strength, reg_type="input_left", reg_increase=reg_increase) #Regularization in the output layers (goals and actions) weights # hidden to next action extra_loss += utils.weight_regularization_calculator( model.action_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_right", reg_increase=reg_increase) # Hidden to next goal #extra_loss += weight_regularization_calculator(model.goal1_layer.w, # [0, model.size_hidden], [0, model.size_action], # reg_strength, reg_type="output_left", reg_increase=reg_increase) # Regularization of the observation (only goes to the action side) #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden, model.size_hidden+model.size_observation], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) loss, _ = model.train_obsolete(targets, None, None, tape, extra_loss) #if(episode%100 == 0): # print(loss.numpy()-extra_loss.numpy(), extra_loss.numpy()) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model