def evolve_atari_network(game, input_size): ne = NE.GSP(input_size,18,2,[],True) NUM_GENERATIONS = 1000 best_fitness = -10000000 best_net = None generation = 0 while generation < NUM_GENERATIONS: curr_best_fitness = -10000000 # test each subpopulation for i in range(ne.populationSize): sim = Simulator(game) currnet = ne.testNet(i) fitness = 0 while sim.running(): sim.read() fitness += sim.reward #print "{}, ".format(fitness), currnet.clearCharges() currnet.setInputs(sim.objects.reshape(input_size*80,1)) currnet.activate() output = currnet.readOutputs() sim.write('{},18\n'.format(np.argmax(output))) ne.evaluate(fitness, i) if fitness > curr_best_fitness: curr_best_fitness = fitness #currnet.visualize() if fitness > best_fitness: best_fitness = fitness pickle.dump(currnet, open('nets/{}.net'.format(game),'wb')) #best_net = copy.deepcopy(currnet) #best_net.visualize() print "gen: {}\ti: {}\trew: {}\tbest: {}\t end: {}" \ .format(generation, i, fitness, best_fitness, sim.terminated) sim.kill() with open('nets/{}.curve'.format(game),'a') as curve: curve.write(str(curr_best_fitness)+',') print "Gen " + str(generation) + ", Best: " + str(curr_best_fitness) ne.nextGen() generation += 1 print "Generation "+str(generation)+", task complete."
def run_game(game, net, result_file, skip_num_frames=0, max_num_frames=50000, max_secs_without_reward=50000, drop_rate=0, num_evals=1, display_screen=False): currnet = cPickle.load(open(net,'r')) total_fitness = 0 for e in range(num_evals): currnet.clearCharges() sim = Simulator(game, currnet.numInput, skip_num_frames, max_num_frames, max_secs_without_reward, display) fitness = 0 i = 0 action = 0 while sim.running(): success = sim.read() if not success: break if sim.reward != 0: print sim.reward fitness += sim.reward if np.random.random() >= drop_rate: # activate if signal not dropped #currnet.clearCharges() currnet.setInputs(sim.objects) currnet.activate() output = currnet.readOutputs() if len(output) == 18: action = np.argmax(output) else: # compressed repr: output0 is fire; output1-9 are dirs # relies on output functions with range centered around 0.5, e.g., [0,1] action = np.argmax(output[1:]) + 1 if action != 1: # add 8 if (fire) and (dir not noop) if output[0] >= 0.5: action += 8 else: # substract 1 if (not fire) and (dir noop) if output[0] < 0.5: action -= 1 sim.write('{},18\n'.format(action)) i += 1 total_fitness += fitness print "TOTAL FITNESS: "+str(total_fitness) avg_fitness = float(total_fitness)/num_evals tmp = tempfile.mktemp() f = open(tmp, 'w') f.write('{}'.format(avg_fitness)) f.close() shutil.move(tmp, result_file)