예제 #1
0
파일: BotMain.py 프로젝트: TurnTheTideTM/AI
 def __init__(self):
     self.this_ai_count = planet_environment.ai_count
     self.finished = False
     planet_environment.ai_count += 1
     self.nextMove = (0, 0, 0)
     self.state = None
     network = buildNetwork(INPUT_NEURON_COUNT, 100, 3)
     enac_learner = ENAC()
     learning_agent = LearningAgent(network, enac_learner)
     self.experiment = planet_experiment(episodic_planet_task(self),
                                         learning_agent)
     self.experiment.task.clipping = False
예제 #2
0
파일: aiplayer.py 프로젝트: SkyWox/hackcu
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
예제 #3
0
    sys.exit('please give 4 parameters. run: "python play_catpole.py <p1> <p2> <p3> <p4>"\n')

# create environment
env = CartPoleEnvironment()
env.setRenderer(CartPoleRenderer())
env.getRenderer().start()
env.delay = (episodes == 1)

# create task
task = BalanceTask(env, epilen)

# create controller network
net = buildNetwork(4, 1, bias=False)

# create agent and set parameters from command line
agent = LearningAgent(net, None)
agent.module._setParameters([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])])

# create experiment
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(episodes)

# run environment
ret = []
for n in range(agent.history.getNumSequences()):
    returns = agent.history.getSequence(n)
    reward = returns[2]
    ret.append( sum(reward, 0).item() )

# print results
print ret, "mean:",mean(ret)
예제 #4
0
 def __init__(self, n_input,actions, alpha=0.5, gamma=0.99, qlambda=0.9,explorer=EpsilonGreedyExplorer(epsilon=0.20,decay=1)):
     CompleteLearner.__init__(self,actions)
     controller = ActionValueNetwork(dimState=n_input, numActions=len(actions))
     learner = NFQ()
     learner.explorer = explorer
     self.learning_agent = LearningAgent(controller, learner)