def __init__(self): self.this_ai_count = planet_environment.ai_count self.finished = False planet_environment.ai_count += 1 self.nextMove = (0, 0, 0) self.state = None network = buildNetwork(INPUT_NEURON_COUNT, 100, 3) enac_learner = ENAC() learning_agent = LearningAgent(network, enac_learner) self.experiment = planet_experiment(episodic_planet_task(self), learning_agent) self.experiment.task.clipping = False
def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
sys.exit('please give 4 parameters. run: "python play_catpole.py <p1> <p2> <p3> <p4>"\n') # create environment env = CartPoleEnvironment() env.setRenderer(CartPoleRenderer()) env.getRenderer().start() env.delay = (episodes == 1) # create task task = BalanceTask(env, epilen) # create controller network net = buildNetwork(4, 1, bias=False) # create agent and set parameters from command line agent = LearningAgent(net, None) agent.module._setParameters([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])]) # create experiment experiment = EpisodicExperiment(task, agent) experiment.doEpisodes(episodes) # run environment ret = [] for n in range(agent.history.getNumSequences()): returns = agent.history.getSequence(n) reward = returns[2] ret.append( sum(reward, 0).item() ) # print results print ret, "mean:",mean(ret)
def __init__(self, n_input,actions, alpha=0.5, gamma=0.99, qlambda=0.9,explorer=EpsilonGreedyExplorer(epsilon=0.20,decay=1)): CompleteLearner.__init__(self,actions) controller = ActionValueNetwork(dimState=n_input, numActions=len(actions)) learner = NFQ() learner.explorer = explorer self.learning_agent = LearningAgent(controller, learner)