nrns_st, rctrs_st = learning.get_spike_times(nrns_sd, rctrs_sd) rec_nrn_tags, nrn_nrn_tags, rctr_t, nrn_t, pt = learning.get_eligibility_trace( nrns_st, rctrs_st, simtime, simdata['rctr_nrn_trace'][t], simdata['nrn_nrn_trace'][t]) simdata['rctr_nrn_trace'].append(rec_nrn_tags) simdata['nrn_nrn_trace'].append(nrn_nrn_tags) if (t+1) % 10 == 0: fitness = ev.get_fitness_value(simdata['speed_log'][t-10:]) print "Fitness" print fitness print "Reward" reward = learning.get_reward(reward, fitness, x_cur, y_cur) print reward simdata['reward'].append(reward) delta_w_rec, delta_w_nrn = learning.get_weight_changes(reward, rec_nrn_tags, nrn_nrn_tags) learning.update_weights(delta_w_rec, delta_w_nrn, nrns, rctrs) weights.append(learning.get_weights(rctrs, nrns)) # Stop simulation if collision occurs if col: # simdata['speed_log'].extend((0, 0) for k in range(t, 400)) # break print "COLLISION" break simdata['fitness'] = ev.get_fitness_value(simdata['speed_log']) data.append(simdata) # reward = get_reward(reward, simdata['speed_log'], x_cur, y_cur) # simdata['reward'].append(reward)
top_performers = ev.get_top_performers(generation_log) best_of_generation.append(top_performers[0]) update_elite() results.save_generation_results(top_performers[0], average_fitness, average_connectivity, gen) population = ev.evolve_new_generation(top_performers) results.save_fitness(average_fitness, [best_of_generation[i][1] for i in range(len(best_of_generation))]) else: # Learning results.set_results_path(data['init'], data['model'], data['arena'], 0) fitness, rewards, weights = [], [], [] simtime = 0 fitness.append(0) rewards.append(0) nrns, nrns_sd, rctrs, rctrs_sd, pop_spikes = \ network.create_learning_network(data['model']) for i in range(50): weights.append(learning.get_weights(rctrs, nrns)) simdata, rt, nt, pt, col = simulate_learning(nrns, rctrs, nrns_sd, rctrs_sd, arena) fitness.append(simdata['fitness']) reward = learning.get_reward(rewards[-1], fitness[-1], simdata['traj'][-1][0], simdata['traj'][-1][1]) rewards.append(reward) delta_w_rec, delta_w_nrn = learning.get_weight_changes(reward, simdata['rctr_nrn_trace'][-1], simdata['nrn_nrn_trace'][-1]) learning.update_weights(delta_w_rec, delta_w_nrn, nrns, rctrs) results.save_rl_results(simdata, pop_spikes, i)