Ejemplo n.º 1
0
 
         nrns_st, rctrs_st = learning.get_spike_times(nrns_sd, rctrs_sd)
         rec_nrn_tags, nrn_nrn_tags, rctr_t, nrn_t, pt = learning.get_eligibility_trace(
                     nrns_st, rctrs_st, simtime,
                     simdata['rctr_nrn_trace'][t], simdata['nrn_nrn_trace'][t])
         
         simdata['rctr_nrn_trace'].append(rec_nrn_tags)
         simdata['nrn_nrn_trace'].append(nrn_nrn_tags)
         if (t+1) % 10 == 0:
             fitness = ev.get_fitness_value(simdata['speed_log'][t-10:])
             print "Fitness"
             print fitness
             print "Reward"
             reward = learning.get_reward(reward, fitness, x_cur, y_cur)
             print reward
             simdata['reward'].append(reward)
             delta_w_rec, delta_w_nrn = learning.get_weight_changes(reward, rec_nrn_tags,
                                                       nrn_nrn_tags)
             learning.update_weights(delta_w_rec, delta_w_nrn, nrns, rctrs)
             weights.append(learning.get_weights(rctrs, nrns))
         # Stop simulation if collision occurs
         if col:
 #            simdata['speed_log'].extend((0, 0) for k in range(t, 400))
 #            break
             print "COLLISION"
             break
             
     simdata['fitness'] = ev.get_fitness_value(simdata['speed_log'])
     data.append(simdata)
 #    reward = get_reward(reward, simdata['speed_log'], x_cur, y_cur)
 #    simdata['reward'].append(reward)
Ejemplo n.º 2
0
        top_performers = ev.get_top_performers(generation_log)
        best_of_generation.append(top_performers[0])
        update_elite()
        results.save_generation_results(top_performers[0], average_fitness,
                                        average_connectivity, gen)
        population = ev.evolve_new_generation(top_performers)
        results.save_fitness(average_fitness,
            [best_of_generation[i][1] for i in range(len(best_of_generation))])
    else:
        # Learning
        results.set_results_path(data['init'], data['model'], data['arena'],
                                 0)
        fitness, rewards, weights = [], [], []
        simtime = 0
        fitness.append(0)
        rewards.append(0)
        nrns, nrns_sd, rctrs, rctrs_sd, pop_spikes = \
                                network.create_learning_network(data['model'])
        for i in range(50):
            weights.append(learning.get_weights(rctrs, nrns))
            simdata, rt, nt, pt, col = simulate_learning(nrns, rctrs, nrns_sd,
                                                         rctrs_sd, arena)
            fitness.append(simdata['fitness'])
            reward = learning.get_reward(rewards[-1], fitness[-1],
                                simdata['traj'][-1][0], simdata['traj'][-1][1])
            rewards.append(reward)
            delta_w_rec, delta_w_nrn = learning.get_weight_changes(reward,
                simdata['rctr_nrn_trace'][-1], simdata['nrn_nrn_trace'][-1])
            learning.update_weights(delta_w_rec, delta_w_nrn, nrns, rctrs)
            results.save_rl_results(simdata, pop_spikes, i)