def __init__(self, refm, disc_rate, sims, depth, horizon, epsilon=0.05, threads=1, memory=32): Agent.__init__(self, refm, disc_rate) if epsilon > 1.0: epsilon = 1.0 if epsilon < 0.0: epsilon = 0.0 self.refm = refm self.sims = int(sims) self.depth = int(depth) self.horizon = int(horizon) self.memory = int(memory) self.epsilon = epsilon self.threads = int(threads) self.obs_cells = refm.getNumObsCells() self.obs_symbols = refm.getNumObsSyms() self.obs_bits = int(ceil(log(refm.getNumObs(), 2.0))) self.reward_bits = int(ceil(log(refm.getNumRewards(), 2.0))) self.num_actions = refm.getNumActions() print "obs_bits = ", self.obs_bits print "reward_bits = ", self.reward_bits self.agent = None self.reset()
class World(Widget): def __init__(self, settings): super().__init__() self.settings = settings # Generate Obstacles self.obstacles = [] for i in range(self.settings.NUM_OBSTACLES): self.obstacles.append((randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE)) # Generate goal self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE) while self.goal in self.obstacles: self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE) # Create the agent self.agent = Agent(self.settings, self.canvas, self.goal, self.obstacles, self.settings.HEURISTIC) def draw(self): with self.canvas: # Draw obstactles Color(*self.settings.OBSTACLE_COLOR) for ob in self.obstacles: Rectangle(pos=ob, size=(self.settings.CELL_SIZE, self.settings.CELL_SIZE)) # Draw goal Color(*self.settings.GOAL_COLOR) Rectangle(pos=self.goal, size=(self.settings.CELL_SIZE, self.settings.CELL_SIZE)) def update(self, dt): with self.canvas: self.agent.update(dt)
def __init__(self,errGrowth,unnormalizeDirtRate,unnormalizeDirtSize,accuracy,N) : Agent.__init__(self,Router.PLANNER) # define the # variance growth parameter, # average dirt fall, # handle to sensor, # handle to array of vacuums) self.setNumber(N) self.vacuumRange = 3 self.setAccuracy(accuracy) # Initialize the matrices. self.worldview = zeros((N,N),dtype=float64); self.dirtLevels = [] self.wetview = zeros((N,N),dtype=float64); self.viewPrecision = zeros((N,N),dtype=float64); self.unnormalizeDirtRate = unnormalizeDirtRate self.unnormalizeDirtSize = unnormalizeDirtSize self.errGrowth = errGrowth self.normalizeDirtRate() self.vacuumlocation = [] #create distance matrix self.defineDistanceArray() self.wDist=0; # default
def __init__( self, refm, disc_rate, init_Q, Lambda, alpha, epsilon, gamma=0 ): Agent.__init__( self, refm, disc_rate ) self.num_states = refm.getNumObs() # assuming that states = observations self.obs_symbols = refm.getNumObsSyms() self.obs_cells = refm.getNumObsCells() self.init_Q = init_Q self.Lambda = Lambda self.epsilon = epsilon self.alpha = alpha # if the internal discount rate isn't set, use the environment value if gamma == 0: self.gamma = disc_rate else: self.gamma = gamma if self.gamma >= 1.0: print "Error: Q learning can only handle an internal discount rate ", \ "that is below 1.0" sys.exit() self.reset()
def testAddAndRemoveEdge(self): agent = Agent(n=10, p = 0, topology='ErdosRenyi') self.assertFalse(agent.graph.are_connected(0,1)) agent.addEdge(0,1) self.assertTrue(agent.graph.are_connected(0,1)) agent.removeEdge(0,1) self.assertFalse(agent.graph.are_connected(0,1))
def __init__(self, gamma, filename): Agent.__init__(self) self._fileName = filename + "fortify.pickle" self.load() self.gamma = gamma self.lastState = None self.lastAction = None self.lastScore = 0
def __init__(self, name=None): if name is None: name = "builder" Agent.__init__(self, name, "build") ProjectInspector.__init__(self) return
def __init__(self,accuracy=0.0) : Agent.__init__(self,Router.SENSORARRAY) # constructor (accuracy of measurement) self.accuracy=accuracy-float(int(accuracy)) #force to be within constraints self.N = 5 self.array = zeros((self.N,self.N),dtype=float64) # array of values for dirt levels self.Wet = zeros((self.N,self.N),dtype=float64) # array of values for dirt levels
def __init__(self, gamma, filename): Agent.__init__(self) self._fileName = filename + "startingCountry.pickle" self.gamma = gamma self.load() self.lastState = None self.lastAction = None self.stateActionList = []
class testAgent(unittest.TestCase): def setUp(self): self.agent = Agent() def testPluck(self): self.agent.pluck() def testGroupSize(self): agent = Agent(n=10) self.assertEqual(agent.groupSize(), 10) def testRandomVertexPair(self): (i,j) = self.agent.getRandomVertexPair() self.assertGreater(self.agent.groupSize(), i) self.assertGreater(self.agent.groupSize(), j) def testAddAndRemoveEdge(self): agent = Agent(n=10, p = 0, topology='ErdosRenyi') self.assertFalse(agent.graph.are_connected(0,1)) agent.addEdge(0,1) self.assertTrue(agent.graph.are_connected(0,1)) agent.removeEdge(0,1) self.assertFalse(agent.graph.are_connected(0,1)) def testPluckEdge(self): agent = Agent(n=10, p =0) agent.pluckEdge(1,2) def testPluckTillConnectedEmpty(self): agent = Agent(n=10, topology='Empty') agent.pluckTillConnected() self.assertTrue(agent.isConnected()) def testPluckTillConnectedStar(self): agent = Agent(n=20, topology='Star') agent.pluckTillConnected() self.assertTrue(agent.isConnected) def testAveragePathLengthFull(self): agent = Agent(n=10, topology='Full') self.assertEqual(1.0, agent.averagePathLength()) def testAveragePathLengthEmpty(self): agent = Agent(n=10, topology='Empty') self.assertEqual(agent.averagePathLength(), Inf) def testAveragePathLengthStar(self): for j in xrange(5,20): n = float(j) agent = Agent(n=j, topology='Star') self.assertEqual(agent.averagePathLength(),(n-1)*2.0/n ) def testEdgeOccupation(self): n = 10 for m in xrange(0, 10, 1): agent = Agent(n = n, m = m, topology='ErdosRenyi') p = float(m)/float(n * (n-1)/2) self.assertEqual(agent.edgeOccupation(), p)
def __init__( self, refm, disc_rate ): Agent.__init__( self, refm, disc_rate ) if self.num_actions > 10: print "Error: Manual agent only handles 10 or less actions!" sys.exit() self.mode = MANUAL self.last_val = 0
def __init__(self, gamma, filename): Agent.__init__(self) self._fileName = filename + "placeTroops.pickle" self.load() self.gamma = gamma self.lastState = None self.lastAction = None self.lastScore = 0 self.stateActionList = []
def __init__( self, refm, disc_rate, epsilon ): Agent.__init__( self, refm, disc_rate ) self.obs_symbols = refm.getNumObsSyms() self.obs_cells = refm.getNumObsCells() self.epsilon = epsilon self.reset()
def __init__(self,r=1.0,s=1.0,v=1.0,cloudsize=1.0) : Agent.__init__(self,Router.WORLD) self.time = 0 self.N=5 # %size of grid self.expenditure = 0.0 # cummulative funds expended since last reset self.numberVacuums = 0 # No vacuums assigned yet. self.vacuumArray = [] # array of object handles self.intializeVariables(r,s,v,cloudsize) self.setSensor(None) self.setPlanner(None)
def __init__(self, name=None, project=None, mode=None): if name is None: name = "janitor" if mode is None: mode = 'clean' self.mode = mode self.project = project Agent.__init__(self, name, "janitor") ProjectInspector.__init__(self) return
def __init__(self, x, y): """ Allocation d'un agent requin. @param x: position en x @param y: position en y @param pasX: le déplacement en x @param pasY: le déplacement en y """ Agent.__init__(self,x,y,0,0) self.color = 'red' self.age = 0 self.HUNGER_CYCLE = 6 self.hunger = choice(range(int(self.HUNGER_CYCLE/2.0))) self.PERIOD = 10
def spawn_agent(self): lifespan = (self.rando.randint(60,400))#lifespan of the agent, agent stops surfing once lifespan is reached(60,400) pagetime = (self.rando.randint(10,40))#Time spent on a page within a website, i.e youtube.com/watch...(10,40) hometime = (self.rando.randint(60,180))#Time spent on a website from the list of sites, i.e youtube.com(60,180) name = str(self.name) + " Agent " + str(len(self.all_agents)+1)#name = Overseer x: Agent y new_agent = Agent(self.clock.get_time_passed(), self.sites,(pagetime),(hometime), name, (lifespan),self.clock) print("") print("Created an agent") new_agent.print_params() print("") new_thread = surfThread(new_agent,Overseer.sim_time, self.mode, self.clock) new_thread.start()#Agent begins surfing the web self.threads.append(new_thread) self.curr_agents_surfing.append(new_agent) self.all_agents.append(new_agent)
def __init__(self, aMessageProcessorFunction, aAgentCount): Agent.__init__(self) self.agents = [] self.processedCount = 0 self.receivedCount = 0 self.exitOnDone = False self.messageDataQueue = [] self.agentIdentifierQueue = [] for index in range(0, aAgentCount): agent= WorkerBee(aMessageProcessorFunction) self.agents = self.agents + [agent] self.agentIdentifierQueue += [agent.identifier]
def test_infiniteRadium(self): self.automaton.reinit(ROWS, COLUMNS) self.automaton.createPopulation(POPULATION, Agent.infiniteRadium()) self.simulation.start(ITERATIONS) self.assertTrue(self.automaton.convergence, "IT IS CONVERGENCE") array = self.automaton.getMatrixOfPopulation() # print repr(self.automaton) + " " + repr(array.max()) self.assertEqual(POPULATION, len(self.automaton.getAgents()), "ALL AGENTS")
def exp(self): self.automaton.reinit(SIZE_X, SIZE_Y) self.simulation.enableConvergenceStop() self.automaton.disableRandomVisitingOfCells() #self.automaton.enableCircularGrid() self.automaton.createPopulation(POPULATION, Agent.randomRangeRadiumUnif(RMIN,rmax)) self.simulation.start(ITERATION)
def addFamilies(self): while self.popSize < self.popLimit: # create new agents in bundles of families while the population is less than the maximum size startVert = len(self.agents.vs) popLeft = self.popLimit - self.popSize newVerts = random.randint(self.minFam, self.maxFam) # ensure that the final family group added does not exceed the maximum population size if newVerts < popLeft: addVerts = newVerts else: addVerts = popLeft #create new set of verticies that will represent a family in the pouplation self.agents.add_vertices(addVerts) endVert = len(self.agents.vs) newFam = [] groupNum = random.randint(1,2) # initialize instances of Class:Agent to be stored at each vertex, and assign necessary properties to each agent / vertex for i in range(startVert, endVert): newAgent = Agent() newAgent.setID(i) # unique ID for each agent self.agents.vs(i)["Agent"] = newAgent # store the agent at the vertex self.agents.vs(i)["Index"] = str(i) # create a string representation of the unique ID for logging and reporting self.agents.vs(i)["Status"] = "H" # set the current disease status of the agent to 'H' (Healthy) self.agents.vs(i)["Family"] = str(self.numFamilies) # set an identifier of the family that the agent belongs to (family number increments with each new group created) # designate the agent as a member of 'Group A' or 'Group B' based on the randomly generated number above if groupNum == 1: self.agents.vs(i)["Group"] = "A" else: self.agents.vs(i)["Group"] = "B" newFam.append(i) # print "Added agent " + str(i) #debugging # Create connections between all members of the family and designate those as family connections for i in range(startVert, endVert-1): for j in range(i+1, endVert): self.agents.add_edge(i, j) self.agents.es(len(self.agents.es)-1)["Relation"] = "Family" self.families.append(newFam) # store the family in the family list self.numFamilies+=1 # update the total number of families in the population self.popSize += addVerts # update the total number of agents in the population
def __init__(self,IDnum,currentTime=0.0,channel=None) : #class constructor Agent.__init__(self,Router.VACUUM) self.xPos = 0 self.yPos = 0 self.setStatus(3) # 1 - moving, 2-cleaning, 3-waiting, 4-repairing self.initializeTime(currentTime) # time it will be done with current operation self.setID(IDnum) self.range = 3 # maximum distance that can be travelled self.moveQueue = [] self.setChannel(channel) #channel to commander self.timeToClean=8; self.timeToRepair=32; self.odometer=0; # tracks distance travelled self.missions=0; #number of cells than have been cleaned self.moveCost=1; #cost to move self.repairCost=30; # cost to conduct repair self.repairs=0; # number of repairs - running total self.time = 0; self.Moisture = None
def __init__(self, settings): super().__init__() self.settings = settings # Generate Obstacles self.obstacles = [] for i in range(self.settings.NUM_OBSTACLES): self.obstacles.append((randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE)) # Generate goal self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE) while self.goal in self.obstacles: self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE) # Create the agent self.agent = Agent(self.settings, self.canvas, self.goal, self.obstacles, self.settings.HEURISTIC)
def __init__(self, x, y): Agent.__init__(self,x,y,0,0) # Couleur marron self.color = '#582900'
def updateAgentPopulation(self): self.agent_pop = [ Agent(t) for t in self.team_pop if t.getNumReferencingLearners() == 0 ]
def extractPerson(main=False, onto=None, functType=None, name=None, time="present"): if onto is None: raise Exception("Parameters found None") if main and functType is None: usernametext = "Alright! Tell me about yourself. What is your name?: " healthparamsaux = healthparams elif functType == "liveswith": usernametext = texts[main]["liveswithintroduce"].format(name=name) healthparamsaux = healthliveswith main = False elif functType == "companions": usernametext = texts[main]["companionsintroduce"] healthparamsaux = healthcompanions main = False else: pass #debug username = input(usernametext) person = AGPerson(username, onto) if username in ag.globalAgent().people: name = renameUser(username, ag.globalAgent().people.keys()) if name is None: return ag.globalAgent().people[name] person.name = name ag.globalAgent().addPerson(person) if main: userparamstext = "What else could be of interest about you? This data will help me give you a better analysis." else: userparamstext = "What else could be of interest about " + username + "? This data will help me give you a better analysis." while (True): clear() print(userparamstext) uindex = userMenu( list(x[0] for x in healthparamsaux) + ["That's enough data."]) if uindex == len(healthparamsaux): break else: clear() healthparamsaux[uindex][1](main, person) if uindex == len(healthparamsaux) - 1: healthparamsaux = healthparamsaux[:uindex] elif uindex == 0: healthparamsaux = healthparamsaux[uindex + 1:] else: healthparamsaux = healthparamsaux[:uindex] + healthparamsaux[ uindex + 1:] if functType != "liveswith": liveswith = extractLivingwith(main=main, person=person, onto=onto) person.toOnto() if liveswith is not None: person.linkLivesWith(liveswith) gears = extractProtectionGear(main=main, onto=onto, personname=person.name, placename=name) if gears is not None: person.gears = gears person.updateGears() clear() while (True): resp = input(texts[main]["pastactivityyesno"].format(name=person.name)) if resp.lower() == "no": break elif resp.lower() == "yes": clear() pastActivities = AC.extractActivity( main=main, entranceText=texts[main]["pastactivitytell"].format( name=person.name), onto=onto, locations=[ "Bookshop", "Boutique", "Cafe", "Library", "Restaurant", "Shop", "Stadium" ], time="past", agent=person) clear() else: clear() print("Please introduce yes or no.") person.toOnto() return person
class Gridworld(object): def __init__(self, walls, treasure, snake_pit, size=18, alfa=0.5, gamma=1): self.size = size self.walls = walls self.treasure = treasure self.snake_pit = snake_pit self.snake_penalty = -20 self.treasure_reward = 10 self.default_reward = -1 self.alfa = alfa self.gamma = gamma # Initialize random policies self.policies = [[{ 'north': 0.25, 'south': 0.25, 'west': 0.25, 'east': 0.25 } for _ in range(size)] for _ in range(size)] # Initialize Q values to 0 self.Qmat = [[{ 'north': 0., 'south': 0., 'east': 0., 'west': 0. } for _ in range(self.size)] for _ in range(self.size)] def init_agent(self): ''' Initializes the agent in a tile that is not a wall. ''' [start_y, start_x] = self.walls[0] while [start_y, start_x] in self.walls: start_y = random.randint(0, self.size - 1) start_x = random.randint(0, self.size - 1) self.agent = Agent(start_y, start_x) def check_reward(self, current_pos): ''' Checks reward associated at a position ''' if current_pos == self.snake_pit: return self.snake_penalty elif current_pos == self.treasure: return self.treasure_reward else: return self.default_reward def generate_episode(self, algorithm='sarsa', e=0.1): # Initialize s self.init_agent() # Choose a from s using policy derived from Q, e-greedy dir = self.agent.select_e_greedily(self.Qmat, e=e) # Repeat for each step while self.agent.pos != self.snake_pit and self.agent.pos != self.treasure: # Save current state to updated [current_y, current_x] = self.agent.pos # Take action a, observe reward, s' self.agent.move(dir, self.walls, self.size) reward = self.check_reward(self.agent.pos) self.agent.reward += reward # Choose a' from s' using policy derived from Q, e-greedy new_dir = self.agent.select_e_greedily(self.Qmat, e=e) [new_y, new_x] = self.agent.pos # Apply sarsa or q learning if algorithm == 'sarsa': update_dir = new_dir else: update_dir = self.agent.select_e_greedily(self.Qmat, e=0) # Update Q self.Qmat[current_y][current_x][dir] += self.alfa*\ (reward + self.gamma*self.Qmat[new_y][new_x][update_dir] - self.Qmat[current_y][current_x][dir]) # Update a <- a', s <- s' dir = new_dir
# Author: Kishansingh Rajput # Script: Driver script for RL agent import gym import numpy as np from Agent import Agent from utils import plot_learning_curve import env import tensorflow as tf if __name__ == '__main__': # env = gym.make('Pendulum-v0') agent = Agent(input_dims=env.observation_space.shape, env=env, n_actions=env.action_space.shape) n_games = 25000 figure_file = 'pendulum.png' best_score = env.reward_range[0] score_history = [] load_checkpoint = False if load_checkpoint: n_steps = 0 while n_steps <= agent.batch_size: observation = env.reset() # action = env.action_space.sample() action = [] observation_, reward, done, info = env.step(action) agent.remember(observation, action, reward, observation_, done)
from Agent import Agent from Issue import Issue from datetime import date from utils import * ################################################################## # Agent(String name, bool isAvailable, date availableSince, list roleList) # Issue(list roleList) # agentSelectionMode: allAvailable | random | leastBusy # allocateAgents(Issue, list[Agent], agentSelectionMode) #################################################################### # Master agent - Knows everything - has been free the least a0 = Agent('X', True, date.today(), ['hindi', 'english', 'spanish', 'french', 'chinese', 'tamil']) # List of agents a1 = Agent('A', True, date(2020, 5, 1), ['hindi', 'english', 'spanish']) a2 = Agent('B', True, date(2020, 5, 1), ['hindi', 'french', 'english']) a3 = Agent('C', True, date(2020, 6, 3), 'french') a4 = Agent('D', True, date.today(), ['chinese', 'tamil', 'spanish']) a5 = Agent('E', True, date.today(), ['chinese', 'french', 'english']) a6 = Agent('M', False, "NA", ['spanish', 'french', 'english']) a7 = Agent('N', False, 'NA', ['tamil', 'french', 'english']) a8 = Agent('O', False, '', ['chinese', 'french', 'hindi']) # Master agent - Knows everything - has been free the longest a9 = Agent('Z', True, date(2020, 1, 1), ['hindi', 'english', 'spanish', 'french', 'chinese', 'tamil']) agentList = [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9] # Getting Issue Details from command prompt
def calculateSwarmDrones(self, newParameters): SPFParameter = newParameters[0:4] TPFParameter = newParameters[4:6] # Setup min_allowable_dist = self.targetOutput Drones = [] position_drone1 = [(0, 0, 5)] Drone1 = Agent(0, position_drone1[0], 1) Drones.append(Drone1) position_drone2 = [(10, 0, 5)] Drone2 = Agent(1, position_drone2[0], 1) Drones.append(Drone2) SPF = SwarmPotentialField(min_allowable_dist) SPF.setup(SPFParameter) Ship = Target([5, 10, 5]) TPF = TargetPotentialField(TPFParameter[0], TPFParameter[1], 1) Ships = [Ship] responseValue = [] for iteration in self.simulationTime: Drone1.SwarmPotentialForce = SPF.calculate_total_swarm_field_force( Drone1.index, Drones) Drone2.SwarmPotentialForce = SPF.calculate_total_swarm_field_force( Drone2.index, Drones) Drone1.TargetPotentialForce = TPF.calculate_target_force( Drone1.index, 0, Drones, Ships) Drone2.TargetPotentialForce = TPF.calculate_target_force( Drone2.index, 0, Drones, Ships) self.swarmForces.append( calculateLength(Drone1.calculate_total_force())) Drone1.calculateVelocity(Drone1.calculate_total_force()) Drone1.move() Drone2.calculateVelocity(Drone2.calculate_total_force()) Drone2.move() [distance_tuple, distance] = SPF.getDistance(0, 1, Drones) responseValue.append(distance) return responseValue, True
# examine the state space state = env_info.vector_observations[0] print('States look like:', state) state_size = len(state) print('States have length:', state_size) #Replay Buffer memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, random.seed(seed), device) scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon agent = Agent(state_size, action_size, seed=seed, lr=LR, memory=memory, update_every=UPDATE_EVERY, batch_size=BATCH_SIZE, gamma=GAMMA, TAU=TAU, device=device) for i_episode in range(1, n_episodes + 1): #state = env.reset() env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] score = 0 for t in range(max_t): action = agent.act(state, eps) #next_state, reward, done, _ = env.step(action) env_info = env.step(action.astype(int))[brain_name] next_state = env_info.vector_observations[0] # get the next state
# taglist.append(AcousticTag(i)) #better for understanding because pings are aligned in time and all have same ping interval x,y,_ = taglist[i].pos tagx[i]=x tagy[i]=y """ E = Grid(taglist,x_range=x_range, y_range=y_range) if field == fields[0]: taglist= E.loadTagList("testField1_1000") #E.setMap(density_map) tagData=np.genfromtxt("testField1_1000.csv",delimiter=",") #E.saveTagList("tags") for i in range(numAgents): s= AcousticReciever(np.array([0,0,0]),sensorRange) if method == searchMethods[2]: #agentList.append(Agent(np.array([np.random.rand()*x_range,np.random.rand()*y_range,0,0]),s,E,dim=2)) agentList.append(Agent(np.array([start_pos[0],start_pos[1],0,0]),s,E,dim=2)) agentList[i].dynamics=m2_step u=[0,0] elif method == searchMethods[4]: #agentList.append(Agent(np.array([np.random.rand()*x_range,np.random.rand()*y_range,0,0]),s,E,dim=2)) agentList.append(Agent(np.array([start_pos[0],start_pos[1]]),s,E,dim=2)) agentList[i].dynamics=m3_step u=[0,0] else: #agentList.append(Agent(np.array([np.random.rand()*x_range,np.random.rand()*y_range]),s,E,dim=2)) agentList.append(Agent(np.array([start_pos[0],start_pos[1]]),s,E,dim=2)) agentList[i].dynamics=m1_step for i in range(len(taglist)): x,y,_ = taglist[i].pos tagx[i]=x
def execute(self): ## ## Initialize agents ## pDisease = {Constant.BETA: 1 - math.exp(-self.disease[Constant.BETA]), Constant.RHO: self.disease[Constant.RHO], Constant.GAMMA: 1 - math.exp(-self.disease[Constant.GAMMA])} self.decision = 1 - math.exp(-self.decision) N = 0 agents = [] infected = [] for state in self.nAgents: for x in range(self.nAgents[state]): agent = Agent(N, state, pDisease, self.fear, self.timeHorizon, self.payoffs) agents.append(agent) if (state == State.I): infected.append(agent) N += 1 ## ## Output variables ## num = [] num.append([0, self.nAgents[State.S], self.nAgents[State.P], 0, self.nAgents[State.I], 0, 0, self.nAgents[State.R], 0, 0, self.nAgents[State.S] * self.payoffs[State.S], self.nAgents[State.P] * self.payoffs[State.P], self.nAgents[State.I] * self.payoffs[State.I], self.nAgents[State.R] * self.payoffs[State.R]]) ## ## Run the simulation ## t = 1 i = self.nAgents[State.I] / float(N) while ((t < self.timeSteps) and (i > 0)): numagents = [0, 0, 0, 0] ## ## Interaction ## shuffle(agents) n = N infected = [] while(n > 1): a1 = agents[n - 1] a2 = agents[n - 2] a1State = a1.getState() a2State = a2.getState() a1S = a1State a2S = a2State if (a1State == State.I): infected.append(a1) a2S = a2.interact(a1State) if (a2State == State.I): infected.append(a2) a1S = a1.interact(a2State) numagents[a1S] += 1 numagents[a2S] += 1 n = n - 2 ## ## Decision ## for agent in agents: if (uniform(0.0, 1.0) < self.decision): state = agent.getState() numagents[state] -= 1 state = agent.decide(i) numagents[state] += 1 ## ## Recover ## for agent in infected: if (agent.recover() == State.R): numagents[State.I] -= 1 numagents[State.R] += 1 num.append([t, numagents[State.S], numagents[State.P], 0, numagents[State.I], 0, 0, numagents[State.R], 0, 0, numagents[State.S] * self.payoffs[State.S], numagents[State.P] * self.payoffs[State.P], numagents[State.I] * self.payoffs[State.I], numagents[State.R] * self.payoffs[State.R]]) i = numagents[State.I] / float(N) t += 1 return num
session = tf.InteractiveSession(config=tf.ConfigProto( gpu_options=gpu_options)) ################################################################################ if __name__ == '__main__': Env = Environment(not_render=args.not_render) Env.front_camera.set_resolution([INPUT_SIZE_90X, INPUT_SIZE_90X]) Env.side_camera.set_resolution([INPUT_SIZE_90X, INPUT_SIZE_90X]) Env.top_camera.set_resolution([INPUT_SIZE_90X, INPUT_SIZE_90X]) Agent = Agent(model_string='3_input', memory_size=10, batch_size=0, input_dimension=INPUT_SIZE_90X, number_of_actions=NUMBER_OF_ACTIONS, alpha=args.alpha, load_weights=True, file=args.model_file) EPSILON = args.epsilon for episode in range(args.ep): state = Env.reset_scene() episode_rw = 0.0 done = 0 for step in range(args.steps): if (step % 30 == 0): Agent.action_counts = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
A = self.compute_new_A(F, policy, q_state, true_reward) # A = np.zeros(4) # A[1:3] = a return A if __name__ == "__main__": from Agent import Agent from Environment import Environment np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) env = Environment() a = Agent(env, food_is_left_prior=.5) lt = a.long_term ############ SAND BOX ############## a.time_step = 0 q = lt.sample_states() o = lt.sample_outcomes(q) fe = lt.exp_free_energy_all_policies(q, o) print(fe) print(lt.bayesian_averaging(fe))
statC.addSessionData("start_expl", START_EPSILON) statC.addSessionData("expl_decay", EPSILON_DECAY) statC.addSessionData("expl_policy", EXPLORATION_POLICY) statC.addSessionData("Model info", f"{LAYERS} x {NODES_IN_LAYER}, min replay:" + f"{MIN_REPLAY_MEMORY_SIZE}, batch size: {MINIBATCH_SIZE}") #The simulations themselves for i in range(0, NUM_REPETITIONS): print(f"{cm.BACKED_C} {i} out of {NUM_REPETITIONS} simulations done.{cm.NORMAL}") statC.startRun() env.createRandomProblem() if (LOGIC_MODULE == "nn"): if (EXPLORATION_POLICY == "epsilon"): explPolicy = EpsilonGreedyPolicy(epsilon = START_EPSILON, decayRate = EPSILON_DECAY, minEpsilon = MIN_EPSILON) elif (EXPLORATION_POLICY == "boltzman"): explPolicy = BoltzmanExplorationPolicy(startingTemperature = START_EPSILON, temperatureDecay = EPSILON_DECAY, minTemperature = MIN_EPSILON) lm = QLearningNeuralModule( explorationPolicy = explPolicy, discountFactor = DISCOUNT_FACTOR, learningRate = LEARNING_RATE, minReplayMemorySize = MIN_REPLAY_MEMORY_SIZE, miniBatchSize = MINIBATCH_SIZE, layers = LAYERS, nodesInLayer = NODES_IN_LAYER) elif (LOGIC_MODULE == "tab"): lm = QLearningTabModule(explorationPolicy = GreedyPolicy(), discountFactor = 0, learningRate = 1) agent = Agent(env, lm) agent.train(NUM_SIMULATIONS)
from ReplayBuffer import ReplayBuffer from Environment import Environment import gym import numpy as np from Agent import Agent from stolen_openai_wrappers import wrap_dqn agent = Agent(2) _env = wrap_dqn(gym.make("PongDeterministic-v4")) env = Environment(_env, 0, False, [2, 3], False, -1, 1) agent.load_weights("./current_model.torch") # load weights if necessary agent.train(100, 4, env, 1000000, 400000, 0.1, 10000)
def main(): """This is the main function called when the program starts. It initializes everything it needs, then runs in a loop until exited. """ display = Display() background = display.drawBackground() display.drawPitch(background) display.centreTitleOnBackground(background) # Prepare Game Objects # clock = pygame.time.Clock() clock = pygw.clock() WM = WorldModel() ball = Ball() blue1 = Agent(BLUE1_START_POS, 1, BLUE_START_ANGLE, WM) blue2 = Agent(BLUE2_START_POS, 2, BLUE_START_ANGLE, WM) red1 = Agent(RED1_START_POS, 3, RED_START_ANGLE, WM) red2 = Agent(RED2_START_POS, 4, RED_START_ANGLE, WM) ball.setName("ball") blue1.setName("blue1") blue2.setName("blue2") red1.setName("red1") red2.setName("red2") # ballSprite = pygame.sprite.RenderPlain(ball) ballSprite = pygw.renderplainsprite(ball) blue1Sprite = pygw.renderplainsprite(blue1) blue2Sprite = pygw.renderplainsprite(blue2) red1Sprite = pygw.renderplainsprite(red1) red2Sprite = pygw.renderplainsprite(red2) frame = 0 going = True # Main game loop while going: clock.tick(FPS) if frame >= 30: frame = 0 else: frame += 1 allData = [ball, blue1, blue2, red1, red2] if (frame % WORLD_MODEL_UPDATE) == 0: WM.update_info(allData) #Update Sprites ballSprite.update() blue1Sprite.update() blue2Sprite.update() red1Sprite.update() red2Sprite.update() #Draw Everything display.drawEverything(background, ballSprite, blue1Sprite, blue2Sprite, red1Sprite, red2Sprite) display.updateFeaturesOnScreen(frame, ball, blue1, blue2, red1, red2) #Check for kicks ball.setPushValue(0) if blue1.kicking or blue2.kicking or red1.kicking or red2.kicking: ball.setPushValue(1) ball.setPushSpeed(5) if blue1.kicking: ball.setPushOrientation(blue1.angle) elif blue2.kicking: ball.setPushOrientation(blue2.angle) elif red1.kicking: ball.setPushOrientation(red1.angle) elif red2.kicking: ball.setPushOrientation(red2.angle) # # ball.setPushValue(0) # # if ball.speed == 0: # ball.setPushValue(1) # ball.setPushOrientation(np.random.randint(0, 360)) # ball.setPushSpeed(5) # pygame.display.flip() pygw.updatefulldisplay() # for event in pygame.event.get(): for event in pygw.getIOevent(): if event.type == pygw.QUIT or event.type == pygw.KEYDOWN and event.key == pygw.K_ESCAPE: going = False print('User quit the game') # pygame.quit() pygw.quitgame() sys.exit()
from Agent import Agent import GUI from Displayer import DISPLAYER from Saver import SAVER import settings if __name__ == '__main__': tf.reset_default_graph() with tf.Session() as sess: agent = Agent(sess) SAVER.set_sess(sess) SAVER.load(agent) if settings.GUI: gui = threading.Thread(target=GUI.main) gui.start() print("Starting the run") try: agent.run() except KeyboardInterrupt: pass print("End of the run") SAVER.save('last')
def __init__(self): self.agent = Agent() self.game_steps = 0
import gym import numpy as np from Agent import Agent import matplotlib.pyplot as plt if __name__ == '__main__': env = gym.make('LunarLander-v2') n_games = 1000 load_check = False agent = Agent(gamma=0.99, epsilon=1.0, alpha=5e-4, input_dimension=[8], actions=4, memory_size=1000000, batch_size=64, eps_decay=5e-5, replace=100) if load_check: agent.load_models() scores = [] avg_scores = [] eps_hist = [] for i in range(n_games): done = False score = 0
def main(args): if args.load_weights: args.exploration_decay_steps = 10 start = time.time() localtime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())) print 'Current time is:',localtime print 'Starting at main.py...' # use for investigating the influence of tag length ''' f = open(args.home_dir + args.result_dir + "_train.txt",'w') f1 = open(args.home_dir + args.result_dir + "_test.txt",'w') f.write(str(args)+'\n') f.write('\nCurrent time is: %s'%localtime) f.write('\nStarting at main.py...') ''' #Initial environment, replay memory, deep q net and agent gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_rate) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: net = DeepQLearner(args, sess) env = Environment(args) temp_size = env.train_steps * args.epochs + env.test_steps if temp_size > 100000: temp_size = 100000 args.replay_size = temp_size args.train_steps = env.train_steps assert args.replay_size > 0 mem = ReplayMemory(args.replay_size, args) agent = Agent(env, mem, net, args) print '\n',args,'\n' if args.load_weights: print 'Loading weights from %s...'%args.load_weights net.load_weights(args.home_dir + args.load_weights) #load last trained weights if args.test_one and args.load_weights: ws, act_seq, st = agent.test_one(args.text_dir) #f0.write('\nText_vec: %s'%str(env.text_vec)) print '\nStates: %s\n'%str(st) print '\nWords: %s\n'%str(ws) print '\n\nAction_squence: %s\n'%str(act_seq) else: # loop over epochs for epoch in xrange(args.start_epoch, args.epochs): #print '\n----------epoch: %d----------'%(epoch+1) epoch_start = time.time() f = open(args.home_dir + args.result_dir + "_train"+ str(epoch) + ".txt",'w') f1 = open(args.home_dir + args.result_dir + "_test"+ str(epoch) + ".txt",'w') f.write(str(args)+'\n') f.write('\nCurrent time is: %s'%localtime) f.write('\nStarting at main.py...') #print 'env.train_steps: %d'%env.train_steps #print 'env.test_steps: %d'%env.test_steps #assert 1==0 if args.train_steps > 0: #agent.train(args.train_steps, epoch) if epoch == args.start_epoch: env.train_init() agent.train(args.train_steps, epoch) if args.save_weights_prefix: filename = args.home_dir + args.save_weights_prefix + "_%d.prm" % (epoch + 1) net.save_weights(filename) cnt = 0 ras = 0 tas = 0 tta = 0 for i in range(env.size):#len(env.saved_text_vec)): text_vec_tags = env.saved_text_vec[i,:,-1] state_tags = env.saved_states[i,:,-1] sum_tags = sum(text_vec_tags) if not sum_tags: break count = 0 right_actions = 0 tag_actions = 0 total_actions = 0 total_words = args.num_actions/2 temp_words = env.saved_text_length[i] if temp_words > total_words: temp_words = total_words #print "text_vec_tags",text_vec_tags #print 'state_tags',state_tags for t in text_vec_tags: if t == args.action_label: total_actions += 1 f.write('\n\nText:'+str(i)) f.write('\ntotal words: %d\n'%temp_words) print '\ntotal words: %d\n'%temp_words #f.write('\nsaved_text_vec:\n') #f.write(str(env.saved_text_vec[i,:,-1])) #f.write('\nsaved_states:\n') #f.write(str(env.saved_states[i,:,-1])) for s in xrange(temp_words): if state_tags[s] == 0: count += 1 elif state_tags[s] == args.action_label: tag_actions += 1 if text_vec_tags[s] == state_tags[s]: right_actions += 1 cnt += count ras += right_actions tta += tag_actions tas += total_actions if total_actions > 0: recall = float(right_actions)/total_actions else: recall = 0 if tag_actions > 0: precision = float(right_actions)/tag_actions else: precision = 0 rp = recall + precision if rp > 0: F_value = (2.0*recall*precision)/(recall+precision) else: F_value = 0 f.write('\nWords left: %d'%count) f.write('\nAcions: %d'%total_actions) f.write('\nRight_actions: %d'%right_actions) f.write('\nTag_actions: %d'%tag_actions) f.write('\nActions_recall: %f'%recall) f.write('\nActions_precision: %f'%precision) f.write('\nF_measure: %f'%F_value) print '\nText: %d'%i print '\nWords left: %d'%count print 'Acions: %d'%total_actions print 'Right_actions: %d'%right_actions print 'Tag_actions: %d'%tag_actions print 'Actions_recall: %f'%recall print 'Actions_precision: %f'%precision print 'F_measure: %f'%F_value if tas > 0: average_recall = float(ras)/tas else: average_recall = 0 if tta > 0: average_precision = float(ras)/tta else: average_precision = 0 arp = average_recall + average_precision if arp > 0: ave_F_value = (2*average_recall*average_precision)/(average_recall+average_precision) else: ave_F_value = 0 f.write('\nTotal words left: %d'%cnt) f.write('\nTotal acions: %d'%tas) f.write('\nTotal right_acions: %d'%ras) f.write('\nTotal tag_acions: %d'%tta) f.write('\nAverage_actions_recall: %f'%average_recall) f.write('\nAverage_actions_precision: %f'%average_precision) f.write('\nAverage_F_measure: %f'%ave_F_value) print '\nTotal words left: %d'%cnt print 'Total acions: %d'%tas print 'Total right_actions: %d'%ras print 'Total tag_actions: %d'%tta print 'Average_actions_recall: %f'%average_recall print 'Average_actions_precision: %f'%average_precision print 'Average_F_measure: %f'%ave_F_value if args.test: f1.write('test_texts: %s\ttexts_num: %d\n'%(str(env.test_text_name), args.test_text_num)) agent.test(args.words_num, env.test_steps/args.words_num, f1) epoch_end = time.time() print 'Total time cost of epoch %d is: %ds'%(epoch, epoch_end-epoch_start) f.write('\nTotal time cost of epoch %d is: %ds\n'%(epoch, epoch_end-epoch_start)) f1.write('\nTotal time cost of epoch %d is: %ds\n'%(epoch, epoch_end-epoch_start)) f.close() f1.close() end = time.time() print 'Total time cost: %ds'%(end-start) localtime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())) print 'Current time is: %s'%localtime
class MBExperiment: def __init__(self, params): """Initializes class instance. Argument: params (DotMap): A DotMap containing the following: .sim_cfg: .env (gym.env): Environment for this experiment. .task_hor (int): Task horizon. .test_percentile (float): Risk-aversion percentile used for testing. .record_video (bool): Whether to record training/adaptation iterations. .exp_cfg: .ntrain_iters (int): Number of training iterations to be performed. .nrollouts_per_iter (int): (optional) Number of rollouts done between training iterations. Defaults to 1. .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1. .policy (controller): Policy that will be trained. .ntest_rollouts (int): Number of rollouts for measuring test performance. .nadapt_iters (int): (optional) Number of adaptation iters to perform. 10 in paper. .continue_train (bool): Whether to continue training from a load_model_dir. .test_domain (float): Environment domain used for adaptation/testing. .start_epoch (int): Which epoch to start training from, used for continuing to train a trained model. .nexplore_iters (int): Number of unsupervised exploration iterations to be performed. .log_cfg: .logdir (str): Directory to log to. .suffix (str): Suffix to add to logdir. """ # Assert True arguments that we currently do not support assert params.sim_cfg.get("stochastic", False) == False self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.") self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.") self.ntrain_iters = get_required_argument( params.exp_cfg, "ntrain_iters", "Must provide number of training iterations.") self.test_percentile = params.sim_cfg.test_percentile self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1) self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1) self.ntest_rollouts = params.exp_cfg.get("ntest_rollouts", 1) self.nadapt_iters = params.exp_cfg.get("nadapt_iters", 0) self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.") self.continue_train = params.exp_cfg.get("continue_train", False) self.test_domain = params.exp_cfg.get("test_domain", None) self.start_epoch = params.exp_cfg.get("start_epoch", 0) self.nrecord = params.log_cfg.get("nrecord", 0) self.neval = params.log_cfg.get("neval", 1) self.training_percentile = self.policy.percentile self.frac_unsafe_pretraining = params.exp_cfg.get( "frac_unsafe_pretraining", 0) if self.continue_train: self.logdir = params.exp_cfg.load_model_dir self.policy.ac_buf = np.load( os.path.join(self.logdir, "ac_buf.npy")) self.policy.prev_sol = np.load( os.path.join(self.logdir, "prev_sol.npy")) self.policy.init_var = np.load( os.path.join(self.logdir, "init_var.npy")) self.policy.train_in = np.load( os.path.join(self.logdir, "train_in.npy")) self.policy.train_targs = np.load( os.path.join(self.logdir, "train_targs.npy")) self.logdir = os.path.join( get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."), f"{params.log_cfg.get('expname') or ''}_{strftime('%Y-%m-%d--%H-%M-%S', localtime())}", ) print("Logging to: ", self.logdir) self.suffix = params.log_cfg.get("suffix", None) if self.suffix is not None: self.logdir = self.logdir + '-' + self.suffix self.writer = SummaryWriter(self.logdir + '-tboard') # Set env for PointmassEnv if (isinstance(self.env, PointmassEnv)): # set logdir for Pointmass self.env.set_logdir(self.logdir) self.record_video = params.sim_cfg.get("record_video", False) if self.test_domain is not None: self.env.test_domain = self.test_domain print("Setting test domain to: %0.3f" % self.env.test_domain) def run_experiment(self): """Perform experiment. """ os.makedirs(self.logdir, exist_ok=True) # Train with random data first samples = [] self.agent = Agent() for i in range(self.ninit_rollouts): if self.record_video: self.record_env = wrappers.Monitor(self.env, "%s/init_iter_%d" % (self.logdir, i), force=True) samples.append( self.agent.sample( self.task_hor, self.policy, record=False, env=self.env, )) print("Training with initial rollouts ", self.ninit_rollouts) if self.ninit_rollouts > 0: self.policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], ) # Learning the dynamics and safety model self.run_training_iters(adaptation=False) # Save training buffers at end of training so we can load for adaptation if required old_train_in = self.policy.train_in old_train_targs = self.policy.train_targs old_ac_buf = self.policy.ac_buf old_prev_sol = self.policy.prev_sol old_init_var = self.policy.init_var torch.save(self.policy.model.state_dict(), os.path.join(self.logdir, 'weights')) np.save(os.path.join(self.logdir, "ac_buf.npy"), old_ac_buf) np.save(os.path.join(self.logdir, "prev_sol.npy"), old_prev_sol) np.save(os.path.join(self.logdir, "init_var.npy"), old_init_var) np.save(os.path.join(self.logdir, "train_in.npy"), old_train_in) np.save(os.path.join(self.logdir, "train_targs.npy"), old_train_targs) self.run_training_iters(adaptation=True) self.run_test_evals(self.nadapt_iters) # Plot density self.env.plot_density_graph() def run_training_iters(self, adaptation): max_return = -float("inf") if adaptation: iteration_range = [self.nadapt_iters] percentile = self.test_percentile self.policy.unsafe_pretraining = False print_str = "ADAPT" else: iteration_range = [self.start_epoch, self.ntrain_iters] percentile = self.training_percentile self.policy.unsafe_pretraining = True # start off by default print_str = "TRAIN" last_tick = perf_counter() if (isinstance(self.env, PointmassEnv)): # set logdir for Pointmass self.env.set_logdir(f"{self.logdir}/{print_str}/") for i in trange(*iteration_range): print( f"========= TIME ELAPSED per iter f{perf_counter() - last_tick}" ) last_tick = perf_counter() if i % 2 == 0 and adaptation: self.run_test_evals(i) samples = [] self.policy.clear_stats() self.policy.percentile = percentile # Unsafe pretraining for first `frac_unsafe_pretraining` proportion of ntrain_iters if not adaptation and i >= self.frac_unsafe_pretraining * self.ntrain_iters: self.policy.unsafe_pretraining = False print( "####################################################################" ) print( f"Starting training on {print_str}, {'UNSAFE' if self.policy.unsafe_pretraining else ''} env iteration {i+1}" ) for j in range(self.nrollouts_per_iter): self.policy.percentile = percentile if self.record_video: self.env = wrappers.Monitor( self.env, "%s/%s_iter_%d_percentile/percentile_%d_rollout_%d" % (self.logdir, print_str, i, self.policy.percentile, j), force=True) self.policy.logdir = "%s/%s_iter_%d" % (self.logdir, print_str, i) samples.append( self.agent.sample(self.task_hor, self.policy, record=self.record_video and adaptation, env=self.env, mode='test' if adaptation else 'train')) if self.record_video: self.env = self.env.env eval_samples = samples self.writer.add_scalar( 'mean-' + print_str + '-return', float(sum([sample["reward_sum"] for sample in eval_samples])) / float(len(eval_samples)), i) max_return = max( float(sum([sample["reward_sum"] for sample in eval_samples])) / float(len(eval_samples)), max_return) self.writer.add_scalar('max-' + print_str + '-return', max_return, i) rewards = [sample["reward_sum"] for sample in eval_samples] print("Rewards obtained:", rewards) samples = samples[:self.nrollouts_per_iter] self.policy.train( [sample["obs"] for sample in samples], [sample["ac"] for sample in samples], [sample["rewards"] for sample in samples], ) if self.policy.mse_loss is not None: mean_loss = np.mean(self.policy.mse_loss) self.writer.add_scalar('%s-mean-loss' % print_str, mean_loss, i) if self.policy.catastrophe_loss is not None: self.writer.add_scalar('%s-catastrophe-loss' % print_str, self.policy.catastrophe_loss, i) def run_test_evals(self, adaptation_iteration): print("Beginning evaluation rollouts.") if self.test_percentile is not None: self.policy.percentile = self.test_percentile samples = [] for i in range(self.ntest_rollouts): if self.record_video: self.env = wrappers.Monitor(self.env, "%s/test_eval_%d" % (self.logdir, i), force=True) if not hasattr(self, "agent"): self.agent = Agent() self.policy.clear_stats() cur_sample = self.agent.sample( self.task_hor, self.policy, record=self.record_video, env=self.env, mode='test', ) if self.record_video: self.env = self.env.env samples.append(cur_sample) mean_test_return = float( sum([cur_sample["reward_sum"] for sample in cur_sample])) / float(len(cur_sample)) print("Evaluation mean-return (rollout number %d out of %d): %f" % (i, self.ntest_rollouts, mean_test_return)) if self.ntest_rollouts > 0: num_catastrophes = sum( [1 if sample["catastrophe"] else 0 for sample in samples]) self.writer.add_scalar('num-catastrophes', num_catastrophes, adaptation_iteration) mean_test_return = float( sum([sample["reward_sum"] for sample in samples])) / float(len(samples)) self.writer.add_scalar('mean-test-return:', mean_test_return, adaptation_iteration) self.writer.close()
class Environment(object): def __init__(self): self.nodes = NodeGroup(gridUnit, gridUnit) self.nodes.getBoardNodes("Field.txt") # initialize agents self.agentRed = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentRed.color = (255, 0, 0) self.agentWhite = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentWhite.color = (255, 255, 255) self.agentTeal = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentTeal.color = (0, 255, 255) self.agentPurple = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentPurple.color = (100, 100, 200) self.agentGrey = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentGrey.color = (150, 150, 150) # initialize targets self.targetRed1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed1.color = (250, 1, 1) self.targetRed2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed2.color = (250, 2, 2) self.targetRed3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed3.color = (250, 3, 3) self.targetRed4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed4.color = (250, 4, 4) self.targetRed5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed5.color = (250, 5, 5) self.targetWhite1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite1.color = (255, 255, 255) self.targetWhite2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite2.color = (255, 255, 255) self.targetWhite3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite3.color = (255, 255, 255) self.targetWhite4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite4.color = (255, 255, 255) self.targetWhite5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite5.color = (255, 255, 255) self.targetTeal1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal1.color = (0, 255, 255) self.targetTeal2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal2.color = (0, 255, 255) self.targetTeal3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal3.color = (0, 255, 255) self.targetTeal4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal4.color = (0, 255, 255) self.targetTeal5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal5.color = (0, 255, 255) self.targetPurple1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple1.color = (100, 100, 200) self.targetPurple2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple2.color = (100, 100, 200) self.targetPurple3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple3.color = (100, 100, 200) self.targetPurple4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple4.color = (100, 100, 200) self.targetPurple5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple5.color = (100, 100, 200) self.targetGrey1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey1.color = (150, 150, 150) self.targetGrey2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey2.color = (150, 150, 150) self.targetGrey3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey3.color = (150, 150, 150) self.targetGrey4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey4.color = (150, 150, 150) self.targetGrey5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey5.color = (150, 150, 150) self.Agents = [ self.agentRed, self.agentWhite, self.agentTeal, self.agentPurple, self.agentGrey ] self.Targets = [ self.targetRed1, self.targetRed2, self.targetRed3, self.targetRed4, self.targetRed5, self.targetWhite1, self.targetWhite2, self.targetWhite3, self.targetWhite4, self.targetWhite5, self.targetTeal1, self.targetTeal2, self.targetTeal3, self.targetTeal4, self.targetTeal5, self.targetPurple1, self.targetPurple2, self.targetPurple3, self.targetPurple4, self.targetPurple5, self.targetGrey1, self.targetGrey2, self.targetGrey3, self.targetGrey4, self.targetGrey5 ] self.checkList = [] def update(self, time_passed, screen): self.checkScenarioGoal() for target in self.Targets: if target.isInCheckList: if target not in self.checkList: self.checkList.append(target) if target.isFound: if target in self.checkList: self.checkList.remove(target) self.Targets.remove(target) self.agentRed.update(time_passed, self.Targets, self.checkList) self.agentWhite.update(time_passed, self.Targets, self.checkList) self.agentTeal.update(time_passed, self.Targets, self.checkList) self.agentPurple.update(time_passed, self.Targets, self.checkList) self.agentGrey.update(time_passed, self.Targets, self.checkList) for target in self.Targets: target.render2(screen) #Render agents self.agentRed.render(screen) self.agentWhite.render(screen) self.agentTeal.render(screen) self.agentPurple.render(screen) self.agentGrey.render(screen) def returnNodes(self): return self.nodes def checkScenarioGoal(self): if self.agentRed.targetsFound == 5 \ or self.agentWhite.targetsFound == 5 \ or self.agentTeal.targetsFound == 5 \ or self.agentGrey.targetsFound == 5 \ or self.agentPurple.targetsFound == 5: return True
def Agent_activate(): Agent.train()
def __init__(self): self.nodes = NodeGroup(gridUnit, gridUnit) self.nodes.getBoardNodes("Field.txt") # initialize agents self.agentRed = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentRed.color = (255, 0, 0) self.agentWhite = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentWhite.color = (255, 255, 255) self.agentTeal = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentTeal.color = (0, 255, 255) self.agentPurple = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentPurple.color = (100, 100, 200) self.agentGrey = Agent(self.nodes.nodeList[randint(0, 2000)]) self.agentGrey.color = (150, 150, 150) # initialize targets self.targetRed1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed1.color = (250, 1, 1) self.targetRed2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed2.color = (250, 2, 2) self.targetRed3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed3.color = (250, 3, 3) self.targetRed4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed4.color = (250, 4, 4) self.targetRed5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentRed.id) self.targetRed5.color = (250, 5, 5) self.targetWhite1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite1.color = (255, 255, 255) self.targetWhite2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite2.color = (255, 255, 255) self.targetWhite3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite3.color = (255, 255, 255) self.targetWhite4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite4.color = (255, 255, 255) self.targetWhite5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentWhite.id) self.targetWhite5.color = (255, 255, 255) self.targetTeal1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal1.color = (0, 255, 255) self.targetTeal2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal2.color = (0, 255, 255) self.targetTeal3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal3.color = (0, 255, 255) self.targetTeal4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal4.color = (0, 255, 255) self.targetTeal5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentTeal.id) self.targetTeal5.color = (0, 255, 255) self.targetPurple1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple1.color = (100, 100, 200) self.targetPurple2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple2.color = (100, 100, 200) self.targetPurple3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple3.color = (100, 100, 200) self.targetPurple4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple4.color = (100, 100, 200) self.targetPurple5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentPurple.id) self.targetPurple5.color = (100, 100, 200) self.targetGrey1 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey1.color = (150, 150, 150) self.targetGrey2 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey2.color = (150, 150, 150) self.targetGrey3 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey3.color = (150, 150, 150) self.targetGrey4 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey4.color = (150, 150, 150) self.targetGrey5 = Targets(self.nodes.nodeList[randint(0, 2000)], self.agentGrey.id) self.targetGrey5.color = (150, 150, 150) self.Agents = [ self.agentRed, self.agentWhite, self.agentTeal, self.agentPurple, self.agentGrey ] self.Targets = [ self.targetRed1, self.targetRed2, self.targetRed3, self.targetRed4, self.targetRed5, self.targetWhite1, self.targetWhite2, self.targetWhite3, self.targetWhite4, self.targetWhite5, self.targetTeal1, self.targetTeal2, self.targetTeal3, self.targetTeal4, self.targetTeal5, self.targetPurple1, self.targetPurple2, self.targetPurple3, self.targetPurple4, self.targetPurple5, self.targetGrey1, self.targetGrey2, self.targetGrey3, self.targetGrey4, self.targetGrey5 ] self.checkList = []
""" This script is used to execute the cellular automaton Chile """ from Automaton import Automaton from Simulation import Simulation from Analyzer import Analyzer from Agent import Agent # TODO: USE A GUI TO CONFIG THESE PARAMETERS COLUMNS = 30 ROWS = 30 POPULATION = 100 ITERATIONS = 20 # executing the main method of the code automaton = Automaton(ROWS, COLUMNS) analyzer = Analyzer(automaton) automaton.createPopulation(POPULATION, Agent.randomRangeRadiumUnif(1, 5)) simulation = Simulation(automaton, True) simulation.start(ITERATIONS) rankings = analyzer.getRankingOfPopulation() print analyzer.getLinearRegressionData(False)
import pickle import time import numpy as np import matplotlib.pyplot as plt import torch # Initialize Environment env = Game() # Initialize Agent agent = Agent(lr=0, eps=0, gamma=0, max_memory=0, n_steps=0, batch_size=0, tau=0, lambda_1=0, lambda_2=0, lambda_3=0, l_margin=0) # Load policy agent.policy.predictNet.load_state_dict(torch.load("Q_target_demo.pth")) done = False accumulate_rewards = 0 state = env.reset() while not done: action = agent.choose_action(state)[0]
end = (5, 5) # Goal State w = World(width, height, cell_size) #wall_pos = [(0,5),(1,5),(2,5),(3,5),(4,5),(6,7)] wall_pos = w.random_wall(40) # Percentage of Walls in Grid if end in wall_pos: wall_pos.remove(end) #---------------------------------- Simulation -------------------------------------------- print("Activating the Matrix") #---------------------------------- Setting Up the World ---------------------------------- agent = Agent(w, 0, 0) # State State neighbors = agent.neighbor() visited = [(0, 0)] current = (agent.pos_x, agent.pos_y) #---------------------------------- Drawing Walls------------------------------------------ for wall in wall_pos: w.draw_rec(screen, wall, pygame.Color(0, 0, 0)) w.draw_rec(screen, end, pygame.Color(0, 255, 0)) #---------------------------------- Searching --------------------------------------------- print(wall_pos) for n in agent.neighbor(): if n not in visited and n not in neighbors and n not in wall_pos:
def __init__(self): Agent.__init__(self)
def __init__(self,channel=None) : Agent.__init__(self,Router.COMMANDER) self.setChannel(channel) # handle to planner
Created on Fri Nov 6 00:26:47 2020 @author: Abdelhamid """ import gym from Agent import Agent from plot import plot_epi_step if __name__ == '__main__': env = gym.make('CartPole-v1') agent = Agent(lr=10**-4, n_actions=env.action_space.n, input_dim=env.observation_space.shape, gamma=0.99, epsilon=1.0, eps_dec=1e-5, eps_min=0.01, max_iterations=10000, lamda=0.9) n_games = 10000 scores = [] steps = [] n = 5 for i in range(n_games): score, cont = agent.learn(env) scores.append(score) steps.append(cont)
#==================================================================================== #==================================================================================== #Building Setting lift_num = 4 buliding_height = 10 max_people_in_floor = 30 add_people_at_step = 25 add_people_prob = 0.8 #Create building with 4 elevators, height 10, max people 30 in each floor building = Building(lift_num, buliding_height, max_people_in_floor) #Agent controls each elevator agent = Agent(buliding_height, lift_num, 4) #The goal is to bring down all the people in the building to the ground floor batch_size = 64 epochs = 50 max_steps = 100 global_step = 0 for epoch in range(epochs): #generate poeple with 80% probability in each floor building.empty_building() building.generate_people(add_people_prob) for step in range(max_steps): states = [] actions = [] rewards = []
from CosmicEncounter import Environment from Agent import Agent players = 5 #negotiation_map = [[], [], [], [], []] #negotiation_map = [[1, 2, 3, 4], [0, 2, 3, 4], [0, 1, 3, 4], [0, 1, 2, 4], [0, 1, 2, 3]] #negotiation_map = [[1], [0], [], [], []] negotiation_map = [[1, 2], [0, 2], [0, 1], [], []] env = Environment(nrof_players=players, nrof_planets_per_player=3) agents = [ Agent(agent_id, players, negotiation_map) for agent_id in range(players) ] episode_encounters = [] for episode in range(5000): print('Episode:', episode) obs, terminal, winners, reward = env.reset() episode_encounters.append(0) while not terminal: agent_id = env.whose_turn()[0] action_id, negotiation = agents[agent_id](obs, env.action_type(), env.available_actions()) obs, terminal, winners, reward = env.action(action_id, negotiation) agents[agent_id].reward(reward) if len(env.player_turns) == 1: episode_encounters[-1] += 1 if terminal:
} }) #parameters for the QNetwork (critic) network params['arch_params_critic'] = OrderedDict({ 'state_and_action_dims': (params['state_dim'], params['action_dim']), 'layers': { 'Linear_1': 512, 'ReLU_1': None, 'Linear_2': 256, 'ReLU_2': None, 'Linear_3': 128, 'ReLU_3': None, 'Linear_4': 64, 'ReLU_4': None, 'Linear_5': params['action_dim'] } }) # AGENT RL_Agent = Agent(params) #TRAINING OF AN AGENT scores = interact_and_train(RL_Agent, env, params) #SAVING THE RESULTS pickle.dump(scores, open(params['save_to'] + '.pkl', 'wb+')) #PLOT THE RESULTS plotter(scores, threshold=600)
def SendMessage(self, receivingAgents, content): Agent.SendMessage(self, receivingAgents, content)
from Agent import Agent import sys if len(sys.argv) <= 2: port = eval(sys.argv[1]) else: host = eval(sys.argv[1]) port = eval(sys.argv[2]) print "Create agent for module serverdefs" # Here we create an agent for a module! agent = Agent('serverdefs', '138.221.22.200', port) print "Ok, agent created and configured" print "\nThe following methods are available:" dict = agent.__dict__ for item in dict.keys(): if hasattr(dict[item], '__class__') and dict[item].__class__ == Agent.Method: print ' '*3, item print "\nCreate an instance of a server object" server = agent.new_server() print "\nThe following methods are available:" dict = server.__dict__ for item in dict.keys(): if hasattr(dict[item], '__class__') and dict[item].__class__ == Agent.Method: print ' '*3, item
def interact_and_train(Agent, Env, params): state_low = env.observation_space.low state_high = env.observation_space.high action_low = env.action_space.low action_high = env.action_space.high num_episodes = params['num_episodes'] max_t = params['max_t'] save_to = params['save_to'] threshold = params['threshold'] scores = [] scores_window = deque(maxlen=10) best_score = -np.inf for e in range(num_episodes): score = 0 states = np.array(Env.reset()) # reset the environment SSS states = normalize(states, state_high, state_low) actions, actions_perturbed = Agent.choose_action(states) actions = denormalize(actions.detach().numpy(), action_high, action_low) actions_perturbed = denormalize(actions_perturbed.detach().numpy(), action_high, action_low) if (len(actions_perturbed.shape) != 1): actions_perturbed = actions_perturbed.tolist() dones = False * np.ones(len(actions_perturbed)) t = 0 while not (np.any(dones) == True): t += 1 next_states, rewards, dones, infos = Env.step(actions_perturbed) next_states = normalize(next_states, state_high, state_low) if type(states) == list: for i in range(states.shape[0]): Agent.memorize_experience(states[i], actions[i], rewards[i], next_states[i], dones[i]) else: Agent.memorize_experience(states, actions, rewards, next_states, dones) Agent.learn_from_past_experiences() states = np.array(next_states) actions, actions_perturbed = Agent.choose_action(states) actions = denormalize(actions.detach().numpy(), action_high, action_low) actions_perturbed = denormalize(actions_perturbed.detach().numpy(), action_high, action_low) if (len(actions_perturbed.shape) != 1): actions_perturbed = actions_perturbed.tolist() score += np.mean(rewards) # get the reward if (np.any(dones) == True) or (t == max_t): break if params['noise_type'] == 'action': Agent.update_eps() scores.append(score) scores_window.append(score) print('\rEpisode {}\tAverage Score: {:.2f}\tCurrent Score : {}'.format( e + 1, np.mean(scores_window), score), end="") if (e + 1) % 10 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format( e + 1, np.mean(scores_window))) if (np.mean(scores_window) >= threshold and (np.mean(scores_window) > best_score)): best_score = np.mean(scores_window) print( '\nEnvironment achieved average score {:.2f} in {:d} episodes!' .format(np.mean(scores_window), (e + 1))) file_name = str(save_to) + '_' + str( np.round(np.mean(scores_window), 0)) + str('.prms') Agent.save_weights(str(file_name)) print("environment saved to ", file_name) return scores
from World import Reward from Agent import Agent import time reward = Reward() lik = np.log(np.array([0.00001])) hyp = np.log(np.array([1, 1, 10])) cov = NormalARD() gp = GaussianProcess(lik, hyp, cov) gp2 = GaussianProcess(lik, hyp, cov) sig =np.ones((3,)) * 0.001 sig2 = np.ones((3,)) * 0.1 start_z = np.array([[0., 0., 0.]]) agent = Agent(gp, reward, sig, start_z) agent2 = Agent(gp2, reward, sig2, start_z) fig = plt.figure(figsize=(20,7), dpi=300) zlim = (-10, 10, -10, 10) for i in xrange(0, 1000): agent.observe() agent.decide() agent.act() agent2.observe() agent2.decide() agent2.act() t = agent.gp.Z[-1].flatten()[-1] a = [0] * 4 a[0] = agent.gp.Z[-1].flatten()[0] a[1] = agent.gp.Z[-1].flatten()[1]
class Game: def __init__(self): self.agent = Agent() self.game_steps = 0 def start(self): self.agent.space() self.game_steps = 0 return self.execute_action('n') def reload(self): self.game_steps = 0 self.agent.reload() self.agent.space() return self.execute_action('n') def execute_action(self, action): self.agent.space() self.game_steps += 1 #self.agent.unpause() for char in action: getattr(self.agent, char)() shot = ImageGrab.grab([505, 225, 1195, 1025])#850,900 -> 690:800 img = np.array(shot)[:,:,0] img = cv2.resize(img, (0, 0), fx=0.1, fy=0.1) shot = img #self.agent.pause() done = self.is_done(shot) score = 0.0 if done: distance_score = self.get_score()[1] time_score = - (self.game_steps/(abs(distance_score)+1e5)) # The higher the pace, the slowest it goes score = distance_score + time_score self.reload() return shot.astype(np.float).ravel(), score, done def is_done(self, shot): return self.get_score()[0] def get_score(self): raw = pytesseract.image_to_string(ImageGrab.grab([600, 40, 1200, 160])) current_score = "" for i in raw: if (i == 'm') | (i == 'e') | (i == 't') | (i == 'r') | (i == 'e') | (i == 's'): continue else: current_score = current_score + i try: float(current_score) current_score=float(current_score) except: current_score=1.8 #print("score:",current_score) tmp = ImageGrab.grab([900,380,1000,400]) r, g, b = tmp.getpixel((0, 0)) game_over = False if r == 237 and g == 237 and b == 237: game_over = True return [game_over,current_score]