Example #1
0
def td_update_network(model, evals_list, lam=.7):
    """
    Updates the model weights to match the temporal difference loss
    """
    game_lens = [len(el[1]) for el in evals_list]
    total_positions = sum(game_lens)
    all_errors = np.zeros(total_positions)
    cur_pos = 0
    all_reps = np.zeros((total_positions, na.num_features))
    for evals, pos_reps in evals_list:
    #    print evals,pos_reps
        #how many moves were acutally played
        num_moves = len(pos_reps)
        #get error signals
        all_errors[cur_pos:cur_pos+num_moves] = td_errors(evals[:num_moves], lam=lam)
        for pos_rep in pos_reps:
            all_reps[cur_pos] = np.fromstring(pos_rep, sep=",")
            cur_pos += 1

    training_dict = na.make_training_dict(all_reps, all_errors)
    current_evals = model.predict(training_dict)["out"]
    n_pos = len(all_errors)
    print "l1 loss "+str(np.sum(np.abs(all_errors))/n_pos/50.)
    print "l2 loss " + str(np.sum(all_errors**2)/n_pos/2500.)
    #I cannot directly back-propigate an arbitrary error signal
    #I can, however force the model to fit features to outputs
    #So, I obtain the model's current output for the inputs in consideration
    #Next, I add the desired errors, and voila, it works!
    #This is inefficient, but I'm not in the mood to extend Keras
    #Tee-hee-heee
    training_dict["out"] += np.ravel(current_evals)
    #print training_dict["out"].shape, all_errors.shape, all_errors
    model.train_on_batch(training_dict)
Example #2
0
def td_update_network(model, evals_list, lam=.7):
    """
    Updates the model weights to match the temporal difference loss
    """
    game_lens = [len(el[1]) for el in evals_list]
    total_positions = sum(game_lens)
    all_errors = np.zeros(total_positions)
    cur_pos = 0
    all_reps = np.zeros((total_positions, na.num_features))
    for evals, pos_reps in evals_list:
        #    print evals,pos_reps
        #how many moves were acutally played
        num_moves = len(pos_reps)
        #get error signals
        all_errors[cur_pos:cur_pos + num_moves] = td_errors(evals[:num_moves],
                                                            lam=lam)
        for pos_rep in pos_reps:
            all_reps[cur_pos] = np.fromstring(pos_rep, sep=",")
            cur_pos += 1

    training_dict = na.make_training_dict(all_reps, all_errors)
    current_evals = model.predict(training_dict)["out"]
    n_pos = len(all_errors)
    print "l1 loss " + str(np.sum(np.abs(all_errors)) / n_pos / 50.)
    print "l2 loss " + str(np.sum(all_errors**2) / n_pos / 2500.)
    #I cannot directly back-propigate an arbitrary error signal
    #I can, however force the model to fit features to outputs
    #So, I obtain the model's current output for the inputs in consideration
    #Next, I add the desired errors, and voila, it works!
    #This is inefficient, but I'm not in the mood to extend Keras
    #Tee-hee-heee
    training_dict["out"] += np.ravel(current_evals)
    #print training_dict["out"].shape, all_errors.shape, all_errors
    model.train_on_batch(training_dict)
Example #3
0
def propigate_errors(model, reps, errors):
    training_dict = na.make_training_dict(reps, errors)
    current_evals = model.predict(training_dict)["out"]
    n_pos = len(errors)
    print "l1 loss "+str(np.sum(np.abs(errors))/n_pos/50.)
    print "l2 loss " + str(np.sum(errors**2)/n_pos/2500.)
    #I cannot directly back-propigate an arbitrary error signal
    #I can, however force the model to fit features to outputs
    #So, I obtain the model's current output for the inputs in consideration
    #Next, I add the desired errors, and voila, it works!
    #This is inefficient, but I'm not in the mood to extend Keras
    #Tee-hee-heee
    training_dict["out"] += np.ravel(current_evals)
    #print training_dict["out"].shape, all_errors.shape, all_errors
    model.train_on_batch(training_dict)
Example #4
0
def propigate_errors(model, reps, errors):
    training_dict = na.make_training_dict(reps, errors)
    current_evals = model.predict(training_dict)["out"]
    n_pos = len(errors)
    print "l1 loss " + str(np.sum(np.abs(errors)) / n_pos / 50.)
    print "l2 loss " + str(np.sum(errors**2) / n_pos / 2500.)
    #I cannot directly back-propigate an arbitrary error signal
    #I can, however force the model to fit features to outputs
    #So, I obtain the model's current output for the inputs in consideration
    #Next, I add the desired errors, and voila, it works!
    #This is inefficient, but I'm not in the mood to extend Keras
    #Tee-hee-heee
    training_dict["out"] += np.ravel(current_evals)
    #print training_dict["out"].shape, all_errors.shape, all_errors
    model.train_on_batch(training_dict)