def td_update_network(model, evals_list, lam=.7): """ Updates the model weights to match the temporal difference loss """ game_lens = [len(el[1]) for el in evals_list] total_positions = sum(game_lens) all_errors = np.zeros(total_positions) cur_pos = 0 all_reps = np.zeros((total_positions, na.num_features)) for evals, pos_reps in evals_list: # print evals,pos_reps #how many moves were acutally played num_moves = len(pos_reps) #get error signals all_errors[cur_pos:cur_pos+num_moves] = td_errors(evals[:num_moves], lam=lam) for pos_rep in pos_reps: all_reps[cur_pos] = np.fromstring(pos_rep, sep=",") cur_pos += 1 training_dict = na.make_training_dict(all_reps, all_errors) current_evals = model.predict(training_dict)["out"] n_pos = len(all_errors) print "l1 loss "+str(np.sum(np.abs(all_errors))/n_pos/50.) print "l2 loss " + str(np.sum(all_errors**2)/n_pos/2500.) #I cannot directly back-propigate an arbitrary error signal #I can, however force the model to fit features to outputs #So, I obtain the model's current output for the inputs in consideration #Next, I add the desired errors, and voila, it works! #This is inefficient, but I'm not in the mood to extend Keras #Tee-hee-heee training_dict["out"] += np.ravel(current_evals) #print training_dict["out"].shape, all_errors.shape, all_errors model.train_on_batch(training_dict)
def td_update_network(model, evals_list, lam=.7): """ Updates the model weights to match the temporal difference loss """ game_lens = [len(el[1]) for el in evals_list] total_positions = sum(game_lens) all_errors = np.zeros(total_positions) cur_pos = 0 all_reps = np.zeros((total_positions, na.num_features)) for evals, pos_reps in evals_list: # print evals,pos_reps #how many moves were acutally played num_moves = len(pos_reps) #get error signals all_errors[cur_pos:cur_pos + num_moves] = td_errors(evals[:num_moves], lam=lam) for pos_rep in pos_reps: all_reps[cur_pos] = np.fromstring(pos_rep, sep=",") cur_pos += 1 training_dict = na.make_training_dict(all_reps, all_errors) current_evals = model.predict(training_dict)["out"] n_pos = len(all_errors) print "l1 loss " + str(np.sum(np.abs(all_errors)) / n_pos / 50.) print "l2 loss " + str(np.sum(all_errors**2) / n_pos / 2500.) #I cannot directly back-propigate an arbitrary error signal #I can, however force the model to fit features to outputs #So, I obtain the model's current output for the inputs in consideration #Next, I add the desired errors, and voila, it works! #This is inefficient, but I'm not in the mood to extend Keras #Tee-hee-heee training_dict["out"] += np.ravel(current_evals) #print training_dict["out"].shape, all_errors.shape, all_errors model.train_on_batch(training_dict)
def propigate_errors(model, reps, errors): training_dict = na.make_training_dict(reps, errors) current_evals = model.predict(training_dict)["out"] n_pos = len(errors) print "l1 loss "+str(np.sum(np.abs(errors))/n_pos/50.) print "l2 loss " + str(np.sum(errors**2)/n_pos/2500.) #I cannot directly back-propigate an arbitrary error signal #I can, however force the model to fit features to outputs #So, I obtain the model's current output for the inputs in consideration #Next, I add the desired errors, and voila, it works! #This is inefficient, but I'm not in the mood to extend Keras #Tee-hee-heee training_dict["out"] += np.ravel(current_evals) #print training_dict["out"].shape, all_errors.shape, all_errors model.train_on_batch(training_dict)
def propigate_errors(model, reps, errors): training_dict = na.make_training_dict(reps, errors) current_evals = model.predict(training_dict)["out"] n_pos = len(errors) print "l1 loss " + str(np.sum(np.abs(errors)) / n_pos / 50.) print "l2 loss " + str(np.sum(errors**2) / n_pos / 2500.) #I cannot directly back-propigate an arbitrary error signal #I can, however force the model to fit features to outputs #So, I obtain the model's current output for the inputs in consideration #Next, I add the desired errors, and voila, it works! #This is inefficient, but I'm not in the mood to extend Keras #Tee-hee-heee training_dict["out"] += np.ravel(current_evals) #print training_dict["out"].shape, all_errors.shape, all_errors model.train_on_batch(training_dict)