Example #1
0
    def __init__(self, refm, disc_rate, sims, depth, horizon, epsilon=0.05, threads=1, memory=32):

        Agent.__init__(self, refm, disc_rate)

        if epsilon > 1.0:
            epsilon = 1.0
        if epsilon < 0.0:
            epsilon = 0.0

        self.refm = refm
        self.sims = int(sims)
        self.depth = int(depth)
        self.horizon = int(horizon)
        self.memory = int(memory)
        self.epsilon = epsilon
        self.threads = int(threads)

        self.obs_cells = refm.getNumObsCells()
        self.obs_symbols = refm.getNumObsSyms()

        self.obs_bits = int(ceil(log(refm.getNumObs(), 2.0)))
        self.reward_bits = int(ceil(log(refm.getNumRewards(), 2.0)))
        self.num_actions = refm.getNumActions()

        print "obs_bits = ", self.obs_bits
        print "reward_bits = ", self.reward_bits

        self.agent = None

        self.reset()
Example #2
0
class World(Widget):

    def __init__(self, settings):
        super().__init__()

        self.settings = settings

        # Generate Obstacles
        self.obstacles = []
        for i in range(self.settings.NUM_OBSTACLES):
            self.obstacles.append((randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE))

        # Generate goal
        self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE)
        while self.goal in self.obstacles:
            self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE)

        # Create the agent
        self.agent = Agent(self.settings, self.canvas, self.goal, self.obstacles, self.settings.HEURISTIC)

    def draw(self):
        
        with self.canvas:
            # Draw obstactles
            Color(*self.settings.OBSTACLE_COLOR)
            for ob in self.obstacles:
                Rectangle(pos=ob, size=(self.settings.CELL_SIZE, self.settings.CELL_SIZE))

            # Draw goal
            Color(*self.settings.GOAL_COLOR)
            Rectangle(pos=self.goal, size=(self.settings.CELL_SIZE, self.settings.CELL_SIZE))

    def update(self, dt):
        with self.canvas:
            self.agent.update(dt)
Example #3
0
    def __init__(self,errGrowth,unnormalizeDirtRate,unnormalizeDirtSize,accuracy,N) :
        Agent.__init__(self,Router.PLANNER)
	
        # define the
        #     variance growth parameter,
        #     average dirt fall,
        #     handle to sensor,
        #     handle to array of vacuums)
        self.setNumber(N)
        self.vacuumRange = 3
        self.setAccuracy(accuracy)

        # Initialize the matrices.
        self.worldview = zeros((N,N),dtype=float64);
        self.dirtLevels = []
        self.wetview = zeros((N,N),dtype=float64);
        self.viewPrecision = zeros((N,N),dtype=float64);

        self.unnormalizeDirtRate = unnormalizeDirtRate
	self.unnormalizeDirtSize = unnormalizeDirtSize
        self.errGrowth = errGrowth
	self.normalizeDirtRate()


        self.vacuumlocation = []
        
        #create distance matrix
        self.defineDistanceArray()
        self.wDist=0;               # default
Example #4
0
File: Q_l.py Project: benkant/AIQ
    def __init__( self, refm, disc_rate, init_Q, Lambda, alpha, epsilon, gamma=0 ):

        Agent.__init__( self, refm, disc_rate )

        self.num_states  = refm.getNumObs() # assuming that states = observations
        self.obs_symbols = refm.getNumObsSyms()
        self.obs_cells   = refm.getNumObsCells()
        
        self.init_Q  = init_Q
        self.Lambda  = Lambda
        self.epsilon = epsilon
        self.alpha   = alpha

        # if the internal discount rate isn't set, use the environment value
        if gamma == 0:
            self.gamma = disc_rate
        else:
            self.gamma = gamma

        if self.gamma >= 1.0:
            print "Error: Q learning can only handle an internal discount rate ", \
                  "that is below 1.0"
            sys.exit()
            
        self.reset()
Example #5
0
 def testAddAndRemoveEdge(self):
     agent = Agent(n=10, p = 0, topology='ErdosRenyi')
     self.assertFalse(agent.graph.are_connected(0,1))
     agent.addEdge(0,1)
     self.assertTrue(agent.graph.are_connected(0,1))
     agent.removeEdge(0,1)
     self.assertFalse(agent.graph.are_connected(0,1))
 def __init__(self, gamma, filename):
     Agent.__init__(self)
     self._fileName = filename + "fortify.pickle"
     self.load()
     self.gamma = gamma
     self.lastState = None
     self.lastAction = None
     self.lastScore = 0
Example #7
0
    def __init__(self, name=None):
        if name is None:
            name = "builder"
            
        Agent.__init__(self, name, "build")
        ProjectInspector.__init__(self)

        return
Example #8
0
    def __init__(self,accuracy=0.0) :
	Agent.__init__(self,Router.SENSORARRAY)
        # constructor (accuracy of measurement)
        self.accuracy=accuracy-float(int(accuracy))  #force to be within constraints

        self.N = 5
        self.array = zeros((self.N,self.N),dtype=float64) # array of values for dirt levels
        self.Wet = zeros((self.N,self.N),dtype=float64)   # array of values for dirt levels
 def __init__(self, gamma, filename):
     Agent.__init__(self)
     self._fileName = filename + "startingCountry.pickle"
     self.gamma = gamma
     self.load()
     self.lastState = None
     self.lastAction = None
     self.stateActionList = []
Example #10
0
class testAgent(unittest.TestCase):
    def setUp(self):
        self.agent = Agent()
    def testPluck(self):
        self.agent.pluck()

    def testGroupSize(self):
        agent = Agent(n=10)
        self.assertEqual(agent.groupSize(), 10)

    def testRandomVertexPair(self):
        (i,j) = self.agent.getRandomVertexPair()
        self.assertGreater(self.agent.groupSize(), i)
        self.assertGreater(self.agent.groupSize(), j)

    def testAddAndRemoveEdge(self):
        agent = Agent(n=10, p = 0, topology='ErdosRenyi')
        self.assertFalse(agent.graph.are_connected(0,1))
        agent.addEdge(0,1)
        self.assertTrue(agent.graph.are_connected(0,1))
        agent.removeEdge(0,1)
        self.assertFalse(agent.graph.are_connected(0,1))

    def testPluckEdge(self):
        agent = Agent(n=10, p =0)
        agent.pluckEdge(1,2)

    def testPluckTillConnectedEmpty(self):
        agent = Agent(n=10, topology='Empty')
        agent.pluckTillConnected()
        self.assertTrue(agent.isConnected())

    def testPluckTillConnectedStar(self):
        agent = Agent(n=20, topology='Star')
        agent.pluckTillConnected()
        self.assertTrue(agent.isConnected)

    def testAveragePathLengthFull(self):
        agent = Agent(n=10, topology='Full')
        self.assertEqual(1.0, agent.averagePathLength())

    def testAveragePathLengthEmpty(self):
        agent = Agent(n=10, topology='Empty')
        self.assertEqual(agent.averagePathLength(), Inf)

    def testAveragePathLengthStar(self):
        for j in xrange(5,20):
            n = float(j)
            agent = Agent(n=j, topology='Star')
            self.assertEqual(agent.averagePathLength(),(n-1)*2.0/n )

    def testEdgeOccupation(self):
        n = 10
        for m in xrange(0, 10, 1):
            agent = Agent(n = n, m = m, topology='ErdosRenyi')
            p = float(m)/float(n * (n-1)/2)
            self.assertEqual(agent.edgeOccupation(), p)
Example #11
0
    def __init__( self, refm, disc_rate ):
        Agent.__init__( self, refm, disc_rate )

        if self.num_actions > 10:
            print "Error: Manual agent only handles 10 or less actions!"
            sys.exit()

        self.mode = MANUAL
        self.last_val = 0
 def __init__(self, gamma, filename):
     Agent.__init__(self)
     self._fileName = filename + "placeTroops.pickle"
     self.load()
     self.gamma = gamma
     self.lastState = None
     self.lastAction = None
     self.lastScore = 0
     self.stateActionList = []
Example #13
0
File: Freq.py Project: benkant/AIQ
    def __init__( self, refm, disc_rate, epsilon ):

        Agent.__init__( self, refm, disc_rate )
        
        self.obs_symbols = refm.getNumObsSyms()
        self.obs_cells   = refm.getNumObsCells()

        self.epsilon = epsilon
        
        self.reset()
Example #14
0
    def __init__(self,r=1.0,s=1.0,v=1.0,cloudsize=1.0) :
	Agent.__init__(self,Router.WORLD)
        self.time = 0
    
        self.N=5                   # %size of grid
        self.expenditure = 0.0     # cummulative funds expended since last reset
        self.numberVacuums = 0     # No vacuums assigned yet.
	self.vacuumArray = []      # array of object handles
        self.intializeVariables(r,s,v,cloudsize)

	self.setSensor(None)
	self.setPlanner(None)
Example #15
0
    def __init__(self, name=None, project=None, mode=None):
        if name is None:
            name = "janitor"

        if mode is None:
            mode = 'clean'
        self.mode = mode

        self.project = project
            
        Agent.__init__(self, name, "janitor")
        ProjectInspector.__init__(self)

        return
Example #16
0
File: Shark.py Project: agoryu/SCI
 def __init__(self, x, y):
     """
     Allocation d'un agent requin.
     @param x: position en x
     @param y: position en y
     @param pasX: le déplacement en x
     @param pasY: le déplacement en y
     """
     Agent.__init__(self,x,y,0,0)
     self.color = 'red'
     self.age = 0
     self.HUNGER_CYCLE = 6
     self.hunger = choice(range(int(self.HUNGER_CYCLE/2.0)))
     self.PERIOD = 10
    def spawn_agent(self):

        lifespan = (self.rando.randint(60,400))#lifespan of the agent, agent stops surfing once lifespan is reached(60,400)
        pagetime = (self.rando.randint(10,40))#Time spent on a page within a website, i.e youtube.com/watch...(10,40)
        hometime = (self.rando.randint(60,180))#Time spent on a website from the list of sites, i.e youtube.com(60,180)

        name = str(self.name) + " Agent " + str(len(self.all_agents)+1)#name = Overseer x: Agent y
        new_agent = Agent(self.clock.get_time_passed(), self.sites,(pagetime),(hometime), name, (lifespan),self.clock)
        print("")
        print("Created an agent")
        new_agent.print_params()
        print("")
        new_thread = surfThread(new_agent,Overseer.sim_time, self.mode, self.clock)
        new_thread.start()#Agent begins surfing the web
        self.threads.append(new_thread)
        self.curr_agents_surfing.append(new_agent)
        self.all_agents.append(new_agent)
Example #18
0
 def __init__(self, aMessageProcessorFunction, aAgentCount):
 
     Agent.__init__(self)
     
     self.agents = []
     
     self.processedCount = 0
     self.receivedCount = 0
     self.exitOnDone = False
     self.messageDataQueue = []
     self.agentIdentifierQueue = []
     
     for index in range(0, aAgentCount):
     
         agent= WorkerBee(aMessageProcessorFunction)
         self.agents = self.agents + [agent]
         self.agentIdentifierQueue += [agent.identifier]
Example #19
0
    def test_infiniteRadium(self):

        self.automaton.reinit(ROWS, COLUMNS)
        self.automaton.createPopulation(POPULATION, Agent.infiniteRadium())
        self.simulation.start(ITERATIONS)
        self.assertTrue(self.automaton.convergence, "IT IS CONVERGENCE")
        array = self.automaton.getMatrixOfPopulation()
        # print repr(self.automaton) + " " + repr(array.max())
        self.assertEqual(POPULATION, len(self.automaton.getAgents()), "ALL AGENTS")
Example #20
0
            def exp(self):

                self.automaton.reinit(SIZE_X, SIZE_Y)
                self.simulation.enableConvergenceStop()

                self.automaton.disableRandomVisitingOfCells()
                #self.automaton.enableCircularGrid()

                self.automaton.createPopulation(POPULATION, Agent.randomRangeRadiumUnif(RMIN,rmax))
                self.simulation.start(ITERATION)
Example #21
0
    def addFamilies(self):    
        while self.popSize < self.popLimit: # create new agents in bundles of families while the population is less than the maximum size
            startVert = len(self.agents.vs)            
            popLeft = self.popLimit - self.popSize
            newVerts = random.randint(self.minFam, self.maxFam)
            
            # ensure that the final family group added does not exceed the maximum population size
            if newVerts < popLeft:
                addVerts = newVerts
            else: addVerts = popLeft
            
            #create new set of verticies that will represent a family in the pouplation
            self.agents.add_vertices(addVerts)
            endVert = len(self.agents.vs)
            newFam = []
            groupNum = random.randint(1,2)
            
            # initialize instances of Class:Agent to be stored at each vertex, and assign necessary properties to each agent / vertex
            for i in range(startVert, endVert):
                newAgent = Agent()
                newAgent.setID(i) # unique ID for each agent
                self.agents.vs(i)["Agent"] = newAgent # store the agent at the vertex
                self.agents.vs(i)["Index"] = str(i) # create a string representation of the unique ID for logging and reporting
                self.agents.vs(i)["Status"] = "H" # set the current disease status of the agent to 'H' (Healthy)
                self.agents.vs(i)["Family"] = str(self.numFamilies) # set an identifier of the family that the agent belongs to (family number increments with each new group created)
                # designate the agent as a member of 'Group A' or 'Group B' based on the randomly generated number above                
                if groupNum == 1:
                    self.agents.vs(i)["Group"] = "A"
                else: self.agents.vs(i)["Group"] = "B"
                newFam.append(i)
#                print "Added agent " + str(i) #debugging                
            
            # Create connections between all members of the family and designate those as family connections
            for i in range(startVert, endVert-1):
                for j in range(i+1, endVert):
                    self.agents.add_edge(i, j)
                    self.agents.es(len(self.agents.es)-1)["Relation"] = "Family"
                    
            self.families.append(newFam) # store the family in the family list            
            self.numFamilies+=1 # update the total number of families in the population
            self.popSize += addVerts # update the total number of agents in the population
Example #22
0
    def __init__(self,IDnum,currentTime=0.0,channel=None) : #class constructor
	Agent.__init__(self,Router.VACUUM)
	
        self.xPos   = 0
        self.yPos   = 0
        self.setStatus(3)                     # 1 - moving, 2-cleaning, 3-waiting, 4-repairing
        self.initializeTime(currentTime)      # time it will be done with current operation
        self.setID(IDnum)
        self.range = 3                        # maximum distance that can be travelled 
        self.moveQueue  = []

        self.setChannel(channel)              #channel to commander
        self.timeToClean=8;
        self.timeToRepair=32;
        self.odometer=0;                      # tracks distance travelled
        self.missions=0;                      #number of cells than have been cleaned
        self.moveCost=1;                      #cost to move
        self.repairCost=30;                   # cost to conduct repair
        self.repairs=0;                       # number of repairs - running total
        
        self.time = 0;

        self.Moisture = None
Example #23
0
    def __init__(self, settings):
        super().__init__()

        self.settings = settings

        # Generate Obstacles
        self.obstacles = []
        for i in range(self.settings.NUM_OBSTACLES):
            self.obstacles.append((randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE))

        # Generate goal
        self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE)
        while self.goal in self.obstacles:
            self.goal = (randint(0, self.settings.GRID_WIDTH - 1) * self.settings.CELL_SIZE, randint(0, self.settings.GRID_HEIGHT - 1) * self.settings.CELL_SIZE)

        # Create the agent
        self.agent = Agent(self.settings, self.canvas, self.goal, self.obstacles, self.settings.HEURISTIC)
Example #24
0
File: Wall.py Project: agoryu/SCI
 def __init__(self, x, y):
     Agent.__init__(self,x,y,0,0)
     # Couleur marron
     self.color = '#582900' 
Example #25
0
 def updateAgentPopulation(self):
     self.agent_pop = [
         Agent(t) for t in self.team_pop
         if t.getNumReferencingLearners() == 0
     ]
Example #26
0
def extractPerson(main=False,
                  onto=None,
                  functType=None,
                  name=None,
                  time="present"):

    if onto is None:
        raise Exception("Parameters found None")

    if main and functType is None:
        usernametext = "Alright! Tell me about yourself. What is your name?: "
        healthparamsaux = healthparams
    elif functType == "liveswith":
        usernametext = texts[main]["liveswithintroduce"].format(name=name)
        healthparamsaux = healthliveswith
        main = False
    elif functType == "companions":
        usernametext = texts[main]["companionsintroduce"]
        healthparamsaux = healthcompanions
        main = False
    else:
        pass  #debug

    username = input(usernametext)
    person = AGPerson(username, onto)
    if username in ag.globalAgent().people:
        name = renameUser(username, ag.globalAgent().people.keys())
        if name is None:
            return ag.globalAgent().people[name]
        person.name = name
    ag.globalAgent().addPerson(person)

    if main:
        userparamstext = "What else could be of interest about you? This data will help me give you a better analysis."
    else:
        userparamstext = "What else could be of interest about " + username + "? This data will help me give you a better analysis."

    while (True):
        clear()
        print(userparamstext)

        uindex = userMenu(
            list(x[0] for x in healthparamsaux) + ["That's enough data."])
        if uindex == len(healthparamsaux):
            break
        else:
            clear()
            healthparamsaux[uindex][1](main, person)
            if uindex == len(healthparamsaux) - 1:
                healthparamsaux = healthparamsaux[:uindex]
            elif uindex == 0:
                healthparamsaux = healthparamsaux[uindex + 1:]
            else:
                healthparamsaux = healthparamsaux[:uindex] + healthparamsaux[
                    uindex + 1:]

    if functType != "liveswith":
        liveswith = extractLivingwith(main=main, person=person, onto=onto)
        person.toOnto()
        if liveswith is not None:
            person.linkLivesWith(liveswith)
        gears = extractProtectionGear(main=main,
                                      onto=onto,
                                      personname=person.name,
                                      placename=name)
        if gears is not None:
            person.gears = gears
        person.updateGears()

    clear()
    while (True):
        resp = input(texts[main]["pastactivityyesno"].format(name=person.name))
        if resp.lower() == "no":
            break
        elif resp.lower() == "yes":
            clear()
            pastActivities = AC.extractActivity(
                main=main,
                entranceText=texts[main]["pastactivitytell"].format(
                    name=person.name),
                onto=onto,
                locations=[
                    "Bookshop", "Boutique", "Cafe", "Library", "Restaurant",
                    "Shop", "Stadium"
                ],
                time="past",
                agent=person)
            clear()
        else:
            clear()
            print("Please introduce yes or no.")

    person.toOnto()
    return person
Example #27
0
class Gridworld(object):
    def __init__(self, walls, treasure, snake_pit, size=18, alfa=0.5, gamma=1):
        self.size = size
        self.walls = walls
        self.treasure = treasure
        self.snake_pit = snake_pit
        self.snake_penalty = -20
        self.treasure_reward = 10
        self.default_reward = -1
        self.alfa = alfa
        self.gamma = gamma

        # Initialize random policies
        self.policies = [[{
            'north': 0.25,
            'south': 0.25,
            'west': 0.25,
            'east': 0.25
        } for _ in range(size)] for _ in range(size)]

        # Initialize Q values to 0
        self.Qmat = [[{
            'north': 0.,
            'south': 0.,
            'east': 0.,
            'west': 0.
        } for _ in range(self.size)] for _ in range(self.size)]

    def init_agent(self):
        '''
        Initializes the agent in a tile that is not a wall.
        '''
        [start_y, start_x] = self.walls[0]
        while [start_y, start_x] in self.walls:
            start_y = random.randint(0, self.size - 1)
            start_x = random.randint(0, self.size - 1)

        self.agent = Agent(start_y, start_x)

    def check_reward(self, current_pos):
        '''
        Checks reward associated at a position
        '''
        if current_pos == self.snake_pit:
            return self.snake_penalty
        elif current_pos == self.treasure:
            return self.treasure_reward
        else:
            return self.default_reward

    def generate_episode(self, algorithm='sarsa', e=0.1):
        # Initialize s
        self.init_agent()

        # Choose a from s using policy derived from Q, e-greedy
        dir = self.agent.select_e_greedily(self.Qmat, e=e)

        # Repeat for each step
        while self.agent.pos != self.snake_pit and self.agent.pos != self.treasure:

            # Save current state to updated
            [current_y, current_x] = self.agent.pos

            # Take action a, observe reward, s'
            self.agent.move(dir, self.walls, self.size)
            reward = self.check_reward(self.agent.pos)
            self.agent.reward += reward

            # Choose a' from s' using policy derived from Q, e-greedy
            new_dir = self.agent.select_e_greedily(self.Qmat, e=e)
            [new_y, new_x] = self.agent.pos

            # Apply sarsa or q learning
            if algorithm == 'sarsa':
                update_dir = new_dir
            else:
                update_dir = self.agent.select_e_greedily(self.Qmat, e=0)

            # Update Q
            self.Qmat[current_y][current_x][dir] += self.alfa*\
                                              (reward +
                                               self.gamma*self.Qmat[new_y][new_x][update_dir] -
                                                          self.Qmat[current_y][current_x][dir])

            # Update a <- a', s <- s'
            dir = new_dir
Example #28
0
# Author: Kishansingh Rajput
# Script: Driver script for RL agent

import gym
import numpy as np
from Agent import Agent
from utils import plot_learning_curve
import env
import tensorflow as tf

if __name__ == '__main__':
    # env = gym.make('Pendulum-v0')
    agent = Agent(input_dims=env.observation_space.shape,
                  env=env,
                  n_actions=env.action_space.shape)
    n_games = 25000

    figure_file = 'pendulum.png'

    best_score = env.reward_range[0]
    score_history = []
    load_checkpoint = False

    if load_checkpoint:
        n_steps = 0
        while n_steps <= agent.batch_size:
            observation = env.reset()
            # action = env.action_space.sample()
            action = []
            observation_, reward, done, info = env.step(action)
            agent.remember(observation, action, reward, observation_, done)
Example #29
0
from Agent import Agent
from Issue import Issue
from datetime import date
from utils import *

##################################################################
# Agent(String name, bool isAvailable, date availableSince, list roleList)
# Issue(list roleList)
# agentSelectionMode: allAvailable | random | leastBusy
# allocateAgents(Issue, list[Agent], agentSelectionMode)
####################################################################

# Master agent - Knows everything - has been free the least
a0 = Agent('X', True, date.today(),
           ['hindi', 'english', 'spanish', 'french', 'chinese', 'tamil'])
# List of agents
a1 = Agent('A', True, date(2020, 5, 1), ['hindi', 'english', 'spanish'])
a2 = Agent('B', True, date(2020, 5, 1), ['hindi', 'french', 'english'])
a3 = Agent('C', True, date(2020, 6, 3), 'french')
a4 = Agent('D', True, date.today(), ['chinese', 'tamil', 'spanish'])
a5 = Agent('E', True, date.today(), ['chinese', 'french', 'english'])
a6 = Agent('M', False, "NA", ['spanish', 'french', 'english'])
a7 = Agent('N', False, 'NA', ['tamil', 'french', 'english'])
a8 = Agent('O', False, '', ['chinese', 'french', 'hindi'])
# Master agent - Knows everything - has been free the longest
a9 = Agent('Z', True, date(2020, 1, 1),
           ['hindi', 'english', 'spanish', 'french', 'chinese', 'tamil'])

agentList = [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9]

# Getting Issue Details from command prompt
    def calculateSwarmDrones(self, newParameters):
        SPFParameter = newParameters[0:4]
        TPFParameter = newParameters[4:6]

        # Setup
        min_allowable_dist = self.targetOutput
        Drones = []
        position_drone1 = [(0, 0, 5)]
        Drone1 = Agent(0, position_drone1[0], 1)
        Drones.append(Drone1)

        position_drone2 = [(10, 0, 5)]
        Drone2 = Agent(1, position_drone2[0], 1)
        Drones.append(Drone2)

        SPF = SwarmPotentialField(min_allowable_dist)
        SPF.setup(SPFParameter)

        Ship = Target([5, 10, 5])
        TPF = TargetPotentialField(TPFParameter[0], TPFParameter[1], 1)
        Ships = [Ship]

        responseValue = []

        for iteration in self.simulationTime:
            Drone1.SwarmPotentialForce = SPF.calculate_total_swarm_field_force(
                Drone1.index, Drones)
            Drone2.SwarmPotentialForce = SPF.calculate_total_swarm_field_force(
                Drone2.index, Drones)

            Drone1.TargetPotentialForce = TPF.calculate_target_force(
                Drone1.index, 0, Drones, Ships)
            Drone2.TargetPotentialForce = TPF.calculate_target_force(
                Drone2.index, 0, Drones, Ships)

            self.swarmForces.append(
                calculateLength(Drone1.calculate_total_force()))

            Drone1.calculateVelocity(Drone1.calculate_total_force())
            Drone1.move()
            Drone2.calculateVelocity(Drone2.calculate_total_force())
            Drone2.move()

            [distance_tuple, distance] = SPF.getDistance(0, 1, Drones)
            responseValue.append(distance)

        return responseValue, True
    # examine the state space
    state = env_info.vector_observations[0]
    print('States look like:', state)
    state_size = len(state)
    print('States have length:', state_size)

    #Replay Buffer
    memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, random.seed(seed), device)
    scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon
    agent = Agent(state_size,
                  action_size,
                  seed=seed,
                  lr=LR,
                  memory=memory,
                  update_every=UPDATE_EVERY,
                  batch_size=BATCH_SIZE,
                  gamma=GAMMA,
                  TAU=TAU,
                  device=device)

    for i_episode in range(1, n_episodes + 1):
        #state = env.reset()
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            #next_state, reward, done, _ = env.step(action)
            env_info = env.step(action.astype(int))[brain_name]
            next_state = env_info.vector_observations[0]  # get the next state
Example #32
0
#	taglist.append(AcousticTag(i)) #better for understanding because pings are aligned in time and  all have same ping interval
	x,y,_ = taglist[i].pos
	tagx[i]=x
	tagy[i]=y
"""

E = Grid(taglist,x_range=x_range, y_range=y_range)
if field == fields[0]:
    taglist= E.loadTagList("testField1_1000") #E.setMap(density_map)
    tagData=np.genfromtxt("testField1_1000.csv",delimiter=",")
    #E.saveTagList("tags")
for i in range(numAgents):
    s= AcousticReciever(np.array([0,0,0]),sensorRange)
    if method == searchMethods[2]:
        #agentList.append(Agent(np.array([np.random.rand()*x_range,np.random.rand()*y_range,0,0]),s,E,dim=2))
        agentList.append(Agent(np.array([start_pos[0],start_pos[1],0,0]),s,E,dim=2))
        agentList[i].dynamics=m2_step
        u=[0,0]
    elif method == searchMethods[4]:
        #agentList.append(Agent(np.array([np.random.rand()*x_range,np.random.rand()*y_range,0,0]),s,E,dim=2))
        agentList.append(Agent(np.array([start_pos[0],start_pos[1]]),s,E,dim=2))
        agentList[i].dynamics=m3_step
        u=[0,0]
    else:
        #agentList.append(Agent(np.array([np.random.rand()*x_range,np.random.rand()*y_range]),s,E,dim=2))
        agentList.append(Agent(np.array([start_pos[0],start_pos[1]]),s,E,dim=2))
        agentList[i].dynamics=m1_step

for i in range(len(taglist)):
    x,y,_ = taglist[i].pos
    tagx[i]=x
Example #33
0
 def execute(self):        
     ##
     ## Initialize agents
     ##
     pDisease = {Constant.BETA: 1 - math.exp(-self.disease[Constant.BETA]),
                 Constant.RHO: self.disease[Constant.RHO],
                 Constant.GAMMA: 1 - math.exp(-self.disease[Constant.GAMMA])}
     
     self.decision = 1 - math.exp(-self.decision)
             
     N = 0
     agents = []
     infected = []
     for state in self.nAgents:            
         for x in range(self.nAgents[state]):
             agent = Agent(N, state, pDisease, self.fear, self.timeHorizon, self.payoffs)
             agents.append(agent)
             
             if (state == State.I):
                 infected.append(agent)
             
             N += 1
     ##
     ## Output variables
     ##
     num = []
     num.append([0,
                 self.nAgents[State.S],
                 self.nAgents[State.P],
                 0,
                 self.nAgents[State.I],
                 0,
                 0,
                 self.nAgents[State.R],
                 0,
                 0,
                 self.nAgents[State.S] * self.payoffs[State.S],
                 self.nAgents[State.P] * self.payoffs[State.P],
                 self.nAgents[State.I] * self.payoffs[State.I],
                 self.nAgents[State.R] * self.payoffs[State.R]])
     
     ##
     ## Run the simulation
     ##
     t = 1
     i = self.nAgents[State.I] / float(N)
     
     while ((t < self.timeSteps) and (i > 0)):
         numagents = [0, 0, 0, 0]
         
         ##
         ## Interaction
         ##
         shuffle(agents)
         
         n = N
         infected = []
         while(n > 1):
             a1 = agents[n - 1]
             a2 = agents[n - 2]
             
             a1State = a1.getState()
             a2State = a2.getState()
             
             a1S = a1State
             a2S = a2State
             
             if (a1State == State.I):
                 infected.append(a1)
                 a2S = a2.interact(a1State)
                 
             if (a2State == State.I):
                 infected.append(a2)
                 a1S = a1.interact(a2State)
             
             numagents[a1S] += 1
             numagents[a2S] += 1
             
             n = n - 2
         
         ##
         ## Decision
         ##
         for agent in agents:
             if (uniform(0.0, 1.0) < self.decision):
                 
                 state = agent.getState()
                 numagents[state] -= 1
                 
                 state = agent.decide(i)
                 numagents[state] += 1
         
         ##
         ## Recover
         ##
         for agent in infected:
             if (agent.recover() == State.R):
                 numagents[State.I] -= 1
                 numagents[State.R] += 1
         
         num.append([t,
                     numagents[State.S],
                     numagents[State.P],
                     0,
                     numagents[State.I],
                     0,
                     0,
                     numagents[State.R],
                     0,
                     0,
                     numagents[State.S] * self.payoffs[State.S],
                     numagents[State.P] * self.payoffs[State.P],
                     numagents[State.I] * self.payoffs[State.I],
                     numagents[State.R] * self.payoffs[State.R]])
         
         i = numagents[State.I] / float(N)
         t += 1
     
     return num
Example #34
0
    session = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=gpu_options))

################################################################################

if __name__ == '__main__':

    Env = Environment(not_render=args.not_render)
    Env.front_camera.set_resolution([INPUT_SIZE_90X, INPUT_SIZE_90X])
    Env.side_camera.set_resolution([INPUT_SIZE_90X, INPUT_SIZE_90X])
    Env.top_camera.set_resolution([INPUT_SIZE_90X, INPUT_SIZE_90X])

    Agent = Agent(model_string='3_input',
                  memory_size=10,
                  batch_size=0,
                  input_dimension=INPUT_SIZE_90X,
                  number_of_actions=NUMBER_OF_ACTIONS,
                  alpha=args.alpha,
                  load_weights=True,
                  file=args.model_file)

    EPSILON = args.epsilon

    for episode in range(args.ep):
        state = Env.reset_scene()
        episode_rw = 0.0
        done = 0

        for step in range(args.steps):
            if (step % 30 == 0):
                Agent.action_counts = [
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Example #35
0
            A = self.compute_new_A(F, policy, q_state, true_reward)

        # A = np.zeros(4)
        # A[1:3] = a

        return A

if __name__ == "__main__":

    from Agent import Agent
    from Environment import Environment

    np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

    env = Environment()
    a = Agent(env, food_is_left_prior=.5)
    lt = a.long_term

    ############ SAND BOX ##############

    a.time_step = 0

    q = lt.sample_states()
    o = lt.sample_outcomes(q)
    fe = lt.exp_free_energy_all_policies(q, o)

    print(fe)

    print(lt.bayesian_averaging(fe))
Example #36
0
    statC.addSessionData("start_expl", START_EPSILON)
    statC.addSessionData("expl_decay", EPSILON_DECAY)
    statC.addSessionData("expl_policy", EXPLORATION_POLICY)
    statC.addSessionData("Model info", f"{LAYERS} x {NODES_IN_LAYER}, min replay:" +
                         f"{MIN_REPLAY_MEMORY_SIZE}, batch size: {MINIBATCH_SIZE}")


#The simulations themselves
for i in range(0, NUM_REPETITIONS):

    print(f"{cm.BACKED_C} {i} out of {NUM_REPETITIONS} simulations done.{cm.NORMAL}")


    statC.startRun()
    env.createRandomProblem()

    if (LOGIC_MODULE == "nn"):
        if (EXPLORATION_POLICY == "epsilon"):
            explPolicy = EpsilonGreedyPolicy(epsilon = START_EPSILON, decayRate = EPSILON_DECAY, minEpsilon = MIN_EPSILON)
        elif (EXPLORATION_POLICY == "boltzman"):
            explPolicy = BoltzmanExplorationPolicy(startingTemperature = START_EPSILON, temperatureDecay = EPSILON_DECAY, minTemperature = MIN_EPSILON)

        lm = QLearningNeuralModule( explorationPolicy = explPolicy, discountFactor = DISCOUNT_FACTOR, learningRate = LEARNING_RATE,
                                    minReplayMemorySize = MIN_REPLAY_MEMORY_SIZE, miniBatchSize = MINIBATCH_SIZE,
                                    layers = LAYERS, nodesInLayer = NODES_IN_LAYER)
    elif (LOGIC_MODULE == "tab"):
        lm = QLearningTabModule(explorationPolicy = GreedyPolicy(), discountFactor = 0, learningRate = 1)

    agent = Agent(env, lm)
    agent.train(NUM_SIMULATIONS)
Example #37
0
from ReplayBuffer import ReplayBuffer
from Environment import Environment
import gym
import numpy as np
from Agent import Agent
from stolen_openai_wrappers import wrap_dqn

agent = Agent(2)
_env = wrap_dqn(gym.make("PongDeterministic-v4"))
env = Environment(_env, 0, False, [2, 3], False, -1, 1)

agent.load_weights("./current_model.torch")
# load weights if necessary
agent.train(100, 4, env, 1000000, 400000, 0.1, 10000)
Example #38
0
def main():
	"""This is the main function called when the program starts. It initializes
	everything it needs, then runs in a loop until exited. """

	display = Display()

	background = display.drawBackground()
	display.drawPitch(background)
	display.centreTitleOnBackground(background)

	# Prepare Game Objects
#	clock = pygame.time.Clock()
	clock = pygw.clock()
	WM = WorldModel()
	ball = Ball()
	blue1 = Agent(BLUE1_START_POS, 1, BLUE_START_ANGLE, WM)
	blue2 = Agent(BLUE2_START_POS, 2, BLUE_START_ANGLE, WM)
	red1 = Agent(RED1_START_POS, 3, RED_START_ANGLE, WM)
	red2 = Agent(RED2_START_POS, 4, RED_START_ANGLE, WM)

	ball.setName("ball")
	blue1.setName("blue1")
	blue2.setName("blue2")
	red1.setName("red1")
	red2.setName("red2")

#	ballSprite = pygame.sprite.RenderPlain(ball)
	ballSprite = pygw.renderplainsprite(ball)
	blue1Sprite = pygw.renderplainsprite(blue1)
	blue2Sprite = pygw.renderplainsprite(blue2)
	red1Sprite = pygw.renderplainsprite(red1)
	red2Sprite = pygw.renderplainsprite(red2)

	frame = 0
	going = True

	# Main game loop
	while going:
		clock.tick(FPS)

		if frame >= 30:
			frame = 0
		else:
			frame += 1

		allData = [ball, blue1, blue2, red1, red2]
		if (frame % WORLD_MODEL_UPDATE) == 0:
			WM.update_info(allData)

		#Update Sprites
		ballSprite.update()
		blue1Sprite.update()
		blue2Sprite.update()
		red1Sprite.update()
		red2Sprite.update()

		#Draw Everything
		display.drawEverything(background, ballSprite, blue1Sprite, blue2Sprite, red1Sprite, red2Sprite)
		display.updateFeaturesOnScreen(frame, ball, blue1, blue2, red1, red2)

		#Check for kicks
		ball.setPushValue(0)
		if blue1.kicking or blue2.kicking or red1.kicking or red2.kicking:
			ball.setPushValue(1)
			ball.setPushSpeed(5)
		if blue1.kicking:
			ball.setPushOrientation(blue1.angle)
		elif blue2.kicking:
			ball.setPushOrientation(blue2.angle)
		elif red1.kicking:
			ball.setPushOrientation(red1.angle)
		elif red2.kicking:
			ball.setPushOrientation(red2.angle)
#
#		ball.setPushValue(0)
#
#		if ball.speed == 0:
#			ball.setPushValue(1)
#			ball.setPushOrientation(np.random.randint(0, 360))
#			ball.setPushSpeed(5)

#		pygame.display.flip()
		pygw.updatefulldisplay()
#		for event in pygame.event.get():
		for event in pygw.getIOevent():
			if event.type == pygw.QUIT or event.type == pygw.KEYDOWN and event.key == pygw.K_ESCAPE:
				going = False
				print('User quit the game')

#	pygame.quit()
	pygw.quitgame()
	sys.exit()
Example #39
0
from Agent import Agent

import GUI
from Displayer import DISPLAYER
from Saver import SAVER

import settings

if __name__ == '__main__':

    tf.reset_default_graph()

    with tf.Session() as sess:

        agent = Agent(sess)
        SAVER.set_sess(sess)

        SAVER.load(agent)

        if settings.GUI:
            gui = threading.Thread(target=GUI.main)
            gui.start()

        print("Starting the run")
        try:
            agent.run()
        except KeyboardInterrupt:
            pass
        print("End of the run")
        SAVER.save('last')
Example #40
0
 def __init__(self):
     self.agent = Agent()
     self.game_steps = 0
import gym
import numpy as np
from Agent import Agent
import matplotlib.pyplot as plt

if __name__ == '__main__':

    env = gym.make('LunarLander-v2')
    n_games = 1000
    load_check = False

    agent = Agent(gamma=0.99,
                  epsilon=1.0,
                  alpha=5e-4,
                  input_dimension=[8],
                  actions=4,
                  memory_size=1000000,
                  batch_size=64,
                  eps_decay=5e-5,
                  replace=100)

    if load_check:
        agent.load_models()

    scores = []
    avg_scores = []
    eps_hist = []

    for i in range(n_games):
        done = False
        score = 0
Example #42
0
def main(args):
    if args.load_weights:
        args.exploration_decay_steps = 10

    start = time.time()
    localtime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
    print 'Current time is:',localtime
    print 'Starting at main.py...'

    # use for investigating the influence of tag length
    '''
    f = open(args.home_dir + args.result_dir + "_train.txt",'w')
    f1 = open(args.home_dir + args.result_dir + "_test.txt",'w')
    f.write(str(args)+'\n')
    f.write('\nCurrent time is: %s'%localtime)
    f.write('\nStarting at main.py...')
    '''
    #Initial environment, replay memory, deep q net and agent
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_rate)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        net = DeepQLearner(args, sess)
        env = Environment(args)

        temp_size = env.train_steps * args.epochs + env.test_steps
        if temp_size > 100000:
            temp_size = 100000
        args.replay_size = temp_size
        args.train_steps = env.train_steps
        assert args.replay_size > 0

        mem = ReplayMemory(args.replay_size, args)
        agent = Agent(env, mem, net, args)

        print '\n',args,'\n'
        
        if args.load_weights:
            print 'Loading weights from %s...'%args.load_weights
            net.load_weights(args.home_dir + args.load_weights)  #load last trained weights


        if args.test_one and args.load_weights:
            ws, act_seq, st = agent.test_one(args.text_dir)
            #f0.write('\nText_vec: %s'%str(env.text_vec))
            print '\nStates: %s\n'%str(st)
            print '\nWords: %s\n'%str(ws)
            print '\n\nAction_squence: %s\n'%str(act_seq)

        else:
            # loop over epochs
            for epoch in xrange(args.start_epoch, args.epochs):
                #print '\n----------epoch: %d----------'%(epoch+1)
                epoch_start = time.time()
                f = open(args.home_dir + args.result_dir + "_train"+ str(epoch) + ".txt",'w')
                f1 = open(args.home_dir + args.result_dir + "_test"+ str(epoch) + ".txt",'w')

                f.write(str(args)+'\n')
                f.write('\nCurrent time is: %s'%localtime)
                f.write('\nStarting at main.py...')
                #print 'env.train_steps: %d'%env.train_steps
                #print 'env.test_steps: %d'%env.test_steps
                #assert 1==0
                if args.train_steps > 0:
                    #agent.train(args.train_steps, epoch)
                    if epoch == args.start_epoch:
                        env.train_init()
                    agent.train(args.train_steps, epoch)
                    if args.save_weights_prefix:
                        filename = args.home_dir + args.save_weights_prefix + "_%d.prm" % (epoch + 1)
                        net.save_weights(filename)

                    cnt = 0
                    ras = 0
                    tas = 0
                    tta = 0

                    for i in range(env.size):#len(env.saved_text_vec)):
                        text_vec_tags = env.saved_text_vec[i,:,-1]
                        state_tags = env.saved_states[i,:,-1]
                        sum_tags = sum(text_vec_tags)
                        if not sum_tags:
                            break
                        count = 0
                        right_actions = 0
                        tag_actions = 0
                        total_actions = 0
                        total_words = args.num_actions/2
                        temp_words = env.saved_text_length[i]
                        if temp_words > total_words:
                            temp_words = total_words

                        #print "text_vec_tags",text_vec_tags
                        #print 'state_tags',state_tags
                        for t in text_vec_tags:
                            if t == args.action_label:
                                total_actions += 1

                        f.write('\n\nText:'+str(i))
                        f.write('\ntotal words: %d\n'%temp_words)
                        print '\ntotal words: %d\n'%temp_words
                        #f.write('\nsaved_text_vec:\n')
                        #f.write(str(env.saved_text_vec[i,:,-1]))
                        #f.write('\nsaved_states:\n')
                        #f.write(str(env.saved_states[i,:,-1]))

                        for s in xrange(temp_words):
                            if state_tags[s] == 0:
                                count += 1
                            elif state_tags[s] == args.action_label:
                                tag_actions += 1
                                if text_vec_tags[s] == state_tags[s]:
                                    right_actions += 1

                        cnt += count
                        ras += right_actions
                        tta += tag_actions
                        tas += total_actions
                        if total_actions > 0:
                            recall = float(right_actions)/total_actions
                        else:
                            recall = 0
                        if tag_actions > 0:
                            precision = float(right_actions)/tag_actions
                        else:
                            precision = 0
                        rp = recall + precision
                        if rp > 0:
                            F_value = (2.0*recall*precision)/(recall+precision)
                        else:
                            F_value = 0
                        f.write('\nWords left: %d'%count)
                        f.write('\nAcions: %d'%total_actions)
                        f.write('\nRight_actions: %d'%right_actions)
                        f.write('\nTag_actions: %d'%tag_actions)
                        f.write('\nActions_recall: %f'%recall)
                        f.write('\nActions_precision: %f'%precision)
                        f.write('\nF_measure: %f'%F_value)
                        print '\nText: %d'%i
                        print '\nWords left: %d'%count
                        print 'Acions: %d'%total_actions
                        print 'Right_actions: %d'%right_actions
                        print 'Tag_actions: %d'%tag_actions
                        print 'Actions_recall: %f'%recall
                        print 'Actions_precision: %f'%precision
                        print 'F_measure: %f'%F_value

                    if tas > 0:
                        average_recall = float(ras)/tas
                    else:
                        average_recall = 0
                    if tta > 0:
                        average_precision = float(ras)/tta
                    else:
                        average_precision = 0
                    arp = average_recall + average_precision
                    if arp > 0:
                        ave_F_value = (2*average_recall*average_precision)/(average_recall+average_precision)
                    else:
                        ave_F_value = 0
                    f.write('\nTotal words left: %d'%cnt)
                    f.write('\nTotal acions: %d'%tas)
                    f.write('\nTotal right_acions: %d'%ras)
                    f.write('\nTotal tag_acions: %d'%tta)
                    f.write('\nAverage_actions_recall: %f'%average_recall)
                    f.write('\nAverage_actions_precision: %f'%average_precision)
                    f.write('\nAverage_F_measure: %f'%ave_F_value)
                    print '\nTotal words left: %d'%cnt
                    print 'Total acions: %d'%tas
                    print 'Total right_actions: %d'%ras
                    print 'Total tag_actions: %d'%tta
                    print 'Average_actions_recall: %f'%average_recall
                    print 'Average_actions_precision: %f'%average_precision
                    print 'Average_F_measure: %f'%ave_F_value


                if args.test:
                    f1.write('test_texts: %s\ttexts_num: %d\n'%(str(env.test_text_name), args.test_text_num))
                    agent.test(args.words_num, env.test_steps/args.words_num, f1)

                epoch_end = time.time()
                print 'Total time cost of epoch %d is: %ds'%(epoch, epoch_end-epoch_start)
                f.write('\nTotal time cost of epoch %d is: %ds\n'%(epoch, epoch_end-epoch_start))
                f1.write('\nTotal time cost of epoch %d is: %ds\n'%(epoch, epoch_end-epoch_start))

                f.close()
                f1.close()

        end = time.time()
        print 'Total time cost: %ds'%(end-start)
        localtime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
        print 'Current time is: %s'%localtime
Example #43
0
class MBExperiment:
    def __init__(self, params):
        """Initializes class instance.

        Argument:
            params (DotMap): A DotMap containing the following:
                .sim_cfg:
                    .env (gym.env): Environment for this experiment.
                    .task_hor (int): Task horizon.
                    .test_percentile (float): Risk-aversion percentile used for testing.
                    .record_video (bool): Whether to record training/adaptation iterations.

                .exp_cfg:
                    .ntrain_iters (int): Number of training iterations to be performed.
                    .nrollouts_per_iter (int): (optional) Number of rollouts done between training
                        iterations. Defaults to 1.
                    .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1.
                    .policy (controller): Policy that will be trained.
                    .ntest_rollouts (int): Number of rollouts for measuring test performance.
                    .nadapt_iters (int): (optional) Number of adaptation iters to perform. 10 in paper.
                    .continue_train (bool): Whether to continue training from a load_model_dir.
                    .test_domain (float): Environment domain used for adaptation/testing.
                    .start_epoch (int): Which epoch to start training from, used for continuing to train
                        a trained model.
                    .nexplore_iters (int): Number of unsupervised exploration iterations to be performed.


                .log_cfg:
                    .logdir (str): Directory to log to.
                    .suffix (str): Suffix to add to logdir.


        """

        # Assert True arguments that we currently do not support
        assert params.sim_cfg.get("stochastic", False) == False

        self.env = get_required_argument(params.sim_cfg, "env",
                                         "Must provide environment.")
        self.task_hor = get_required_argument(params.sim_cfg, "task_hor",
                                              "Must provide task horizon.")
        self.ntrain_iters = get_required_argument(
            params.exp_cfg, "ntrain_iters",
            "Must provide number of training iterations.")
        self.test_percentile = params.sim_cfg.test_percentile
        self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1)
        self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1)
        self.ntest_rollouts = params.exp_cfg.get("ntest_rollouts", 1)
        self.nadapt_iters = params.exp_cfg.get("nadapt_iters", 0)
        self.policy = get_required_argument(params.exp_cfg, "policy",
                                            "Must provide a policy.")

        self.continue_train = params.exp_cfg.get("continue_train", False)
        self.test_domain = params.exp_cfg.get("test_domain", None)
        self.start_epoch = params.exp_cfg.get("start_epoch", 0)

        self.nrecord = params.log_cfg.get("nrecord", 0)
        self.neval = params.log_cfg.get("neval", 1)

        self.training_percentile = self.policy.percentile

        self.frac_unsafe_pretraining = params.exp_cfg.get(
            "frac_unsafe_pretraining", 0)

        if self.continue_train:
            self.logdir = params.exp_cfg.load_model_dir
            self.policy.ac_buf = np.load(
                os.path.join(self.logdir, "ac_buf.npy"))
            self.policy.prev_sol = np.load(
                os.path.join(self.logdir, "prev_sol.npy"))
            self.policy.init_var = np.load(
                os.path.join(self.logdir, "init_var.npy"))
            self.policy.train_in = np.load(
                os.path.join(self.logdir, "train_in.npy"))
            self.policy.train_targs = np.load(
                os.path.join(self.logdir, "train_targs.npy"))
        self.logdir = os.path.join(
            get_required_argument(params.log_cfg, "logdir",
                                  "Must provide log parent directory."),
            f"{params.log_cfg.get('expname') or ''}_{strftime('%Y-%m-%d--%H-%M-%S', localtime())}",
        )
        print("Logging to: ", self.logdir)
        self.suffix = params.log_cfg.get("suffix", None)
        if self.suffix is not None:
            self.logdir = self.logdir + '-' + self.suffix
        self.writer = SummaryWriter(self.logdir + '-tboard')

        # Set env for PointmassEnv
        if (isinstance(self.env, PointmassEnv)):
            # set logdir for Pointmass
            self.env.set_logdir(self.logdir)

        self.record_video = params.sim_cfg.get("record_video", False)
        if self.test_domain is not None:
            self.env.test_domain = self.test_domain
            print("Setting test domain to: %0.3f" % self.env.test_domain)

    def run_experiment(self):
        """Perform experiment.
        """
        os.makedirs(self.logdir, exist_ok=True)

        # Train with random data first
        samples = []
        self.agent = Agent()
        for i in range(self.ninit_rollouts):
            if self.record_video:
                self.record_env = wrappers.Monitor(self.env,
                                                   "%s/init_iter_%d" %
                                                   (self.logdir, i),
                                                   force=True)
            samples.append(
                self.agent.sample(
                    self.task_hor,
                    self.policy,
                    record=False,
                    env=self.env,
                ))
        print("Training with initial rollouts ", self.ninit_rollouts)

        if self.ninit_rollouts > 0:
            self.policy.train(
                [sample["obs"] for sample in samples],
                [sample["ac"] for sample in samples],
                [sample["rewards"] for sample in samples],
            )

        # Learning the dynamics and safety model
        self.run_training_iters(adaptation=False)

        # Save training buffers at end of training so we can load for adaptation if required
        old_train_in = self.policy.train_in
        old_train_targs = self.policy.train_targs
        old_ac_buf = self.policy.ac_buf
        old_prev_sol = self.policy.prev_sol
        old_init_var = self.policy.init_var
        torch.save(self.policy.model.state_dict(),
                   os.path.join(self.logdir, 'weights'))
        np.save(os.path.join(self.logdir, "ac_buf.npy"), old_ac_buf)
        np.save(os.path.join(self.logdir, "prev_sol.npy"), old_prev_sol)
        np.save(os.path.join(self.logdir, "init_var.npy"), old_init_var)
        np.save(os.path.join(self.logdir, "train_in.npy"), old_train_in)
        np.save(os.path.join(self.logdir, "train_targs.npy"), old_train_targs)

        self.run_training_iters(adaptation=True)
        self.run_test_evals(self.nadapt_iters)

        # Plot density
        self.env.plot_density_graph()

    def run_training_iters(self, adaptation):
        max_return = -float("inf")
        if adaptation:
            iteration_range = [self.nadapt_iters]
            percentile = self.test_percentile
            self.policy.unsafe_pretraining = False
            print_str = "ADAPT"
        else:
            iteration_range = [self.start_epoch, self.ntrain_iters]
            percentile = self.training_percentile
            self.policy.unsafe_pretraining = True  # start off by default
            print_str = "TRAIN"
        last_tick = perf_counter()

        if (isinstance(self.env, PointmassEnv)):
            # set logdir for Pointmass
            self.env.set_logdir(f"{self.logdir}/{print_str}/")

        for i in trange(*iteration_range):
            print(
                f"========= TIME ELAPSED per iter f{perf_counter() - last_tick}"
            )
            last_tick = perf_counter()

            if i % 2 == 0 and adaptation:
                self.run_test_evals(i)

            samples = []
            self.policy.clear_stats()
            self.policy.percentile = percentile

            # Unsafe pretraining for first `frac_unsafe_pretraining` proportion of ntrain_iters
            if not adaptation and i >= self.frac_unsafe_pretraining * self.ntrain_iters:
                self.policy.unsafe_pretraining = False

            print(
                "####################################################################"
            )
            print(
                f"Starting training on {print_str}, {'UNSAFE' if self.policy.unsafe_pretraining else ''} env iteration {i+1}"
            )

            for j in range(self.nrollouts_per_iter):
                self.policy.percentile = percentile
                if self.record_video:
                    self.env = wrappers.Monitor(
                        self.env,
                        "%s/%s_iter_%d_percentile/percentile_%d_rollout_%d" %
                        (self.logdir, print_str, i, self.policy.percentile, j),
                        force=True)
                self.policy.logdir = "%s/%s_iter_%d" % (self.logdir, print_str,
                                                        i)
                samples.append(
                    self.agent.sample(self.task_hor,
                                      self.policy,
                                      record=self.record_video and adaptation,
                                      env=self.env,
                                      mode='test' if adaptation else 'train'))
            if self.record_video:
                self.env = self.env.env
            eval_samples = samples
            self.writer.add_scalar(
                'mean-' + print_str + '-return',
                float(sum([sample["reward_sum"] for sample in eval_samples])) /
                float(len(eval_samples)), i)
            max_return = max(
                float(sum([sample["reward_sum"] for sample in eval_samples])) /
                float(len(eval_samples)), max_return)
            self.writer.add_scalar('max-' + print_str + '-return', max_return,
                                   i)
            rewards = [sample["reward_sum"] for sample in eval_samples]
            print("Rewards obtained:", rewards)
            samples = samples[:self.nrollouts_per_iter]

            self.policy.train(
                [sample["obs"] for sample in samples],
                [sample["ac"] for sample in samples],
                [sample["rewards"] for sample in samples],
            )

            if self.policy.mse_loss is not None:
                mean_loss = np.mean(self.policy.mse_loss)
                self.writer.add_scalar('%s-mean-loss' % print_str, mean_loss,
                                       i)
            if self.policy.catastrophe_loss is not None:
                self.writer.add_scalar('%s-catastrophe-loss' % print_str,
                                       self.policy.catastrophe_loss, i)

    def run_test_evals(self, adaptation_iteration):
        print("Beginning evaluation rollouts.")
        if self.test_percentile is not None:
            self.policy.percentile = self.test_percentile
        samples = []
        for i in range(self.ntest_rollouts):
            if self.record_video:
                self.env = wrappers.Monitor(self.env,
                                            "%s/test_eval_%d" %
                                            (self.logdir, i),
                                            force=True)
            if not hasattr(self, "agent"):
                self.agent = Agent()
            self.policy.clear_stats()
            cur_sample = self.agent.sample(
                self.task_hor,
                self.policy,
                record=self.record_video,
                env=self.env,
                mode='test',
            )
            if self.record_video:
                self.env = self.env.env
            samples.append(cur_sample)
            mean_test_return = float(
                sum([cur_sample["reward_sum"]
                     for sample in cur_sample])) / float(len(cur_sample))
            print("Evaluation mean-return (rollout number %d out of %d): %f" %
                  (i, self.ntest_rollouts, mean_test_return))
        if self.ntest_rollouts > 0:
            num_catastrophes = sum(
                [1 if sample["catastrophe"] else 0 for sample in samples])
            self.writer.add_scalar('num-catastrophes', num_catastrophes,
                                   adaptation_iteration)
            mean_test_return = float(
                sum([sample["reward_sum"]
                     for sample in samples])) / float(len(samples))
            self.writer.add_scalar('mean-test-return:', mean_test_return,
                                   adaptation_iteration)
        self.writer.close()
class Environment(object):
    def __init__(self):
        self.nodes = NodeGroup(gridUnit, gridUnit)
        self.nodes.getBoardNodes("Field.txt")

        # initialize agents
        self.agentRed = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentRed.color = (255, 0, 0)

        self.agentWhite = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentWhite.color = (255, 255, 255)

        self.agentTeal = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentTeal.color = (0, 255, 255)

        self.agentPurple = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentPurple.color = (100, 100, 200)

        self.agentGrey = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentGrey.color = (150, 150, 150)

        # initialize targets
        self.targetRed1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed1.color = (250, 1, 1)

        self.targetRed2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed2.color = (250, 2, 2)

        self.targetRed3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed3.color = (250, 3, 3)

        self.targetRed4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed4.color = (250, 4, 4)

        self.targetRed5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed5.color = (250, 5, 5)

        self.targetWhite1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite1.color = (255, 255, 255)

        self.targetWhite2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite2.color = (255, 255, 255)

        self.targetWhite3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite3.color = (255, 255, 255)

        self.targetWhite4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite4.color = (255, 255, 255)

        self.targetWhite5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite5.color = (255, 255, 255)

        self.targetTeal1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal1.color = (0, 255, 255)

        self.targetTeal2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal2.color = (0, 255, 255)

        self.targetTeal3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal3.color = (0, 255, 255)

        self.targetTeal4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal4.color = (0, 255, 255)

        self.targetTeal5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal5.color = (0, 255, 255)

        self.targetPurple1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple1.color = (100, 100, 200)

        self.targetPurple2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple2.color = (100, 100, 200)

        self.targetPurple3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple3.color = (100, 100, 200)

        self.targetPurple4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple4.color = (100, 100, 200)

        self.targetPurple5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple5.color = (100, 100, 200)

        self.targetGrey1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey1.color = (150, 150, 150)

        self.targetGrey2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey2.color = (150, 150, 150)

        self.targetGrey3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey3.color = (150, 150, 150)

        self.targetGrey4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey4.color = (150, 150, 150)

        self.targetGrey5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey5.color = (150, 150, 150)

        self.Agents = [
            self.agentRed, self.agentWhite, self.agentTeal, self.agentPurple,
            self.agentGrey
        ]

        self.Targets = [
            self.targetRed1, self.targetRed2, self.targetRed3, self.targetRed4,
            self.targetRed5, self.targetWhite1, self.targetWhite2,
            self.targetWhite3, self.targetWhite4, self.targetWhite5,
            self.targetTeal1, self.targetTeal2, self.targetTeal3,
            self.targetTeal4, self.targetTeal5, self.targetPurple1,
            self.targetPurple2, self.targetPurple3, self.targetPurple4,
            self.targetPurple5, self.targetGrey1, self.targetGrey2,
            self.targetGrey3, self.targetGrey4, self.targetGrey5
        ]

        self.checkList = []

    def update(self, time_passed, screen):
        self.checkScenarioGoal()

        for target in self.Targets:
            if target.isInCheckList:
                if target not in self.checkList:
                    self.checkList.append(target)

            if target.isFound:
                if target in self.checkList:
                    self.checkList.remove(target)
                self.Targets.remove(target)

        self.agentRed.update(time_passed, self.Targets, self.checkList)
        self.agentWhite.update(time_passed, self.Targets, self.checkList)
        self.agentTeal.update(time_passed, self.Targets, self.checkList)
        self.agentPurple.update(time_passed, self.Targets, self.checkList)
        self.agentGrey.update(time_passed, self.Targets, self.checkList)

        for target in self.Targets:
            target.render2(screen)

        #Render agents
        self.agentRed.render(screen)
        self.agentWhite.render(screen)
        self.agentTeal.render(screen)
        self.agentPurple.render(screen)
        self.agentGrey.render(screen)

    def returnNodes(self):
        return self.nodes

    def checkScenarioGoal(self):
        if self.agentRed.targetsFound == 5 \
                or self.agentWhite.targetsFound == 5 \
                or self.agentTeal.targetsFound == 5 \
                or self.agentGrey.targetsFound == 5 \
                or self.agentPurple.targetsFound == 5:
            return True
Example #45
0
def Agent_activate():
    Agent.train()
    def __init__(self):
        self.nodes = NodeGroup(gridUnit, gridUnit)
        self.nodes.getBoardNodes("Field.txt")

        # initialize agents
        self.agentRed = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentRed.color = (255, 0, 0)

        self.agentWhite = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentWhite.color = (255, 255, 255)

        self.agentTeal = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentTeal.color = (0, 255, 255)

        self.agentPurple = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentPurple.color = (100, 100, 200)

        self.agentGrey = Agent(self.nodes.nodeList[randint(0, 2000)])
        self.agentGrey.color = (150, 150, 150)

        # initialize targets
        self.targetRed1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed1.color = (250, 1, 1)

        self.targetRed2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed2.color = (250, 2, 2)

        self.targetRed3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed3.color = (250, 3, 3)

        self.targetRed4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed4.color = (250, 4, 4)

        self.targetRed5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                  self.agentRed.id)
        self.targetRed5.color = (250, 5, 5)

        self.targetWhite1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite1.color = (255, 255, 255)

        self.targetWhite2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite2.color = (255, 255, 255)

        self.targetWhite3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite3.color = (255, 255, 255)

        self.targetWhite4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite4.color = (255, 255, 255)

        self.targetWhite5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                    self.agentWhite.id)
        self.targetWhite5.color = (255, 255, 255)

        self.targetTeal1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal1.color = (0, 255, 255)

        self.targetTeal2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal2.color = (0, 255, 255)

        self.targetTeal3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal3.color = (0, 255, 255)

        self.targetTeal4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal4.color = (0, 255, 255)

        self.targetTeal5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentTeal.id)
        self.targetTeal5.color = (0, 255, 255)

        self.targetPurple1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple1.color = (100, 100, 200)

        self.targetPurple2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple2.color = (100, 100, 200)

        self.targetPurple3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple3.color = (100, 100, 200)

        self.targetPurple4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple4.color = (100, 100, 200)

        self.targetPurple5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                     self.agentPurple.id)
        self.targetPurple5.color = (100, 100, 200)

        self.targetGrey1 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey1.color = (150, 150, 150)

        self.targetGrey2 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey2.color = (150, 150, 150)

        self.targetGrey3 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey3.color = (150, 150, 150)

        self.targetGrey4 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey4.color = (150, 150, 150)

        self.targetGrey5 = Targets(self.nodes.nodeList[randint(0, 2000)],
                                   self.agentGrey.id)
        self.targetGrey5.color = (150, 150, 150)

        self.Agents = [
            self.agentRed, self.agentWhite, self.agentTeal, self.agentPurple,
            self.agentGrey
        ]

        self.Targets = [
            self.targetRed1, self.targetRed2, self.targetRed3, self.targetRed4,
            self.targetRed5, self.targetWhite1, self.targetWhite2,
            self.targetWhite3, self.targetWhite4, self.targetWhite5,
            self.targetTeal1, self.targetTeal2, self.targetTeal3,
            self.targetTeal4, self.targetTeal5, self.targetPurple1,
            self.targetPurple2, self.targetPurple3, self.targetPurple4,
            self.targetPurple5, self.targetGrey1, self.targetGrey2,
            self.targetGrey3, self.targetGrey4, self.targetGrey5
        ]

        self.checkList = []
Example #47
0
"""
This script is used to execute the cellular automaton Chile
"""

from Automaton import Automaton
from Simulation import Simulation
from Analyzer import Analyzer
from Agent import Agent

# TODO: USE A GUI TO CONFIG THESE PARAMETERS
COLUMNS = 30
ROWS = 30
POPULATION = 100
ITERATIONS = 20

# executing the main method of the code
automaton = Automaton(ROWS, COLUMNS)
analyzer = Analyzer(automaton)
automaton.createPopulation(POPULATION, Agent.randomRangeRadiumUnif(1, 5))

simulation = Simulation(automaton, True)
simulation.start(ITERATIONS)
rankings = analyzer.getRankingOfPopulation()
print analyzer.getLinearRegressionData(False)
Example #48
0
import pickle
import time
import numpy as np
import matplotlib.pyplot as plt
import torch

# Initialize Environment
env = Game()

# Initialize Agent
agent = Agent(lr=0,
              eps=0,
              gamma=0,
              max_memory=0,
              n_steps=0,
              batch_size=0,
              tau=0,
              lambda_1=0,
              lambda_2=0,
              lambda_3=0,
              l_margin=0)

# Load policy
agent.policy.predictNet.load_state_dict(torch.load("Q_target_demo.pth"))

done = False
accumulate_rewards = 0
state = env.reset()
while not done:
    action = agent.choose_action(state)[0]
Example #49
0
end = (5, 5)  # Goal State

w = World(width, height, cell_size)
#wall_pos = [(0,5),(1,5),(2,5),(3,5),(4,5),(6,7)]
wall_pos = w.random_wall(40)  # Percentage of Walls in Grid

if end in wall_pos:
    wall_pos.remove(end)

#---------------------------------- Simulation --------------------------------------------

print("Activating the Matrix")

#---------------------------------- Setting Up the World ----------------------------------

agent = Agent(w, 0, 0)  # State State
neighbors = agent.neighbor()
visited = [(0, 0)]
current = (agent.pos_x, agent.pos_y)

#---------------------------------- Drawing Walls------------------------------------------

for wall in wall_pos:
    w.draw_rec(screen, wall, pygame.Color(0, 0, 0))
w.draw_rec(screen, end, pygame.Color(0, 255, 0))

#---------------------------------- Searching ---------------------------------------------

print(wall_pos)
for n in agent.neighbor():
    if n not in visited and n not in neighbors and n not in wall_pos:
Example #50
0
 def __init__(self):
     Agent.__init__(self)
Example #51
0
    def __init__(self,channel=None) : 
	Agent.__init__(self,Router.COMMANDER)
        self.setChannel(channel) # handle to planner
Created on Fri Nov  6 00:26:47 2020

@author: Abdelhamid 
"""
import gym
from Agent import Agent
from plot import plot_epi_step

if __name__ == '__main__':

    env = gym.make('CartPole-v1')
    agent = Agent(lr=10**-4,
                  n_actions=env.action_space.n,
                  input_dim=env.observation_space.shape,
                  gamma=0.99,
                  epsilon=1.0,
                  eps_dec=1e-5,
                  eps_min=0.01,
                  max_iterations=10000,
                  lamda=0.9)
    n_games = 10000

    scores = []
    steps = []
    n = 5
    for i in range(n_games):

        score, cont = agent.learn(env)

        scores.append(score)
        steps.append(cont)
#====================================================================================

#====================================================================================
#Building Setting
lift_num = 4
buliding_height = 10
max_people_in_floor = 30

add_people_at_step = 25
add_people_prob = 0.8

#Create building with 4 elevators, height 10, max people 30 in each floor
building = Building(lift_num, buliding_height, max_people_in_floor)

#Agent controls each elevator
agent = Agent(buliding_height, lift_num, 4)

#The goal is to bring down all the people in the building to the ground floor
batch_size = 64
epochs = 50
max_steps = 100
global_step = 0

for epoch in range(epochs):
    #generate poeple with 80% probability in each floor
    building.empty_building()
    building.generate_people(add_people_prob)
    for step in range(max_steps):
        states = []
        actions = []
        rewards = []
Example #54
0
from CosmicEncounter import Environment
from Agent import Agent

players = 5
#negotiation_map = [[], [], [], [], []]
#negotiation_map = [[1, 2, 3, 4], [0, 2, 3, 4], [0, 1, 3, 4], [0, 1, 2, 4], [0, 1, 2, 3]]
#negotiation_map = [[1], [0], [], [], []]
negotiation_map = [[1, 2], [0, 2], [0, 1], [], []]

env = Environment(nrof_players=players, nrof_planets_per_player=3)
agents = [
    Agent(agent_id, players, negotiation_map) for agent_id in range(players)
]
episode_encounters = []

for episode in range(5000):
    print('Episode:', episode)
    obs, terminal, winners, reward = env.reset()
    episode_encounters.append(0)

    while not terminal:
        agent_id = env.whose_turn()[0]
        action_id, negotiation = agents[agent_id](obs, env.action_type(),
                                                  env.available_actions())
        obs, terminal, winners, reward = env.action(action_id, negotiation)
        agents[agent_id].reward(reward)

        if len(env.player_turns) == 1:
            episode_encounters[-1] += 1

        if terminal:
    }
})
#parameters for the QNetwork (critic) network
params['arch_params_critic'] = OrderedDict({
    'state_and_action_dims': (params['state_dim'], params['action_dim']),
    'layers': {
        'Linear_1': 512,
        'ReLU_1': None,
        'Linear_2': 256,
        'ReLU_2': None,
        'Linear_3': 128,
        'ReLU_3': None,
        'Linear_4': 64,
        'ReLU_4': None,
        'Linear_5': params['action_dim']
    }
})

# AGENT

RL_Agent = Agent(params)

#TRAINING OF AN AGENT
scores = interact_and_train(RL_Agent, env, params)

#SAVING THE RESULTS
pickle.dump(scores, open(params['save_to'] + '.pkl', 'wb+'))

#PLOT THE RESULTS
plotter(scores, threshold=600)
 def SendMessage(self, receivingAgents, content):
     Agent.SendMessage(self, receivingAgents, content)
Example #57
0
from Agent import Agent
import sys

if len(sys.argv) <= 2:
	port = eval(sys.argv[1])
else:
	host = eval(sys.argv[1])
	port = eval(sys.argv[2])

print "Create agent for module serverdefs"
# Here we create an agent for a module!
agent = Agent('serverdefs', '138.221.22.200', port)
print "Ok, agent created and configured"

print "\nThe following methods are available:"

dict = agent.__dict__
for item in dict.keys():
	if hasattr(dict[item], '__class__') and dict[item].__class__ == Agent.Method:
		print ' '*3, item

print "\nCreate an instance of a server object"
server = agent.new_server()

print "\nThe following methods are available:"
dict = server.__dict__
for item in dict.keys():
	if hasattr(dict[item], '__class__') and dict[item].__class__ == Agent.Method:
		print ' '*3, item

def interact_and_train(Agent, Env, params):
    state_low = env.observation_space.low
    state_high = env.observation_space.high
    action_low = env.action_space.low
    action_high = env.action_space.high
    num_episodes = params['num_episodes']
    max_t = params['max_t']
    save_to = params['save_to']
    threshold = params['threshold']
    scores = []
    scores_window = deque(maxlen=10)
    best_score = -np.inf
    for e in range(num_episodes):
        score = 0
        states = np.array(Env.reset())  # reset the environment SSS
        states = normalize(states, state_high, state_low)

        actions, actions_perturbed = Agent.choose_action(states)
        actions = denormalize(actions.detach().numpy(), action_high,
                              action_low)
        actions_perturbed = denormalize(actions_perturbed.detach().numpy(),
                                        action_high, action_low)
        if (len(actions_perturbed.shape) != 1):
            actions_perturbed = actions_perturbed.tolist()
        dones = False * np.ones(len(actions_perturbed))
        t = 0
        while not (np.any(dones) == True):
            t += 1
            next_states, rewards, dones, infos = Env.step(actions_perturbed)
            next_states = normalize(next_states, state_high, state_low)
            if type(states) == list:
                for i in range(states.shape[0]):
                    Agent.memorize_experience(states[i], actions[i],
                                              rewards[i], next_states[i],
                                              dones[i])
            else:
                Agent.memorize_experience(states, actions, rewards,
                                          next_states, dones)
            Agent.learn_from_past_experiences()
            states = np.array(next_states)

            actions, actions_perturbed = Agent.choose_action(states)
            actions = denormalize(actions.detach().numpy(), action_high,
                                  action_low)
            actions_perturbed = denormalize(actions_perturbed.detach().numpy(),
                                            action_high, action_low)
            if (len(actions_perturbed.shape) != 1):
                actions_perturbed = actions_perturbed.tolist()
            score += np.mean(rewards)  # get the reward
            if (np.any(dones) == True) or (t == max_t):
                break
        if params['noise_type'] == 'action':
            Agent.update_eps()
        scores.append(score)
        scores_window.append(score)

        print('\rEpisode {}\tAverage Score: {:.2f}\tCurrent Score : {}'.format(
            e + 1, np.mean(scores_window), score),
              end="")
        if (e + 1) % 10 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(
                e + 1, np.mean(scores_window)))
        if (np.mean(scores_window) >= threshold
                and (np.mean(scores_window) > best_score)):
            best_score = np.mean(scores_window)
            print(
                '\nEnvironment achieved average score {:.2f} in {:d} episodes!'
                .format(np.mean(scores_window), (e + 1)))
            file_name = str(save_to) + '_' + str(
                np.round(np.mean(scores_window), 0)) + str('.prms')
            Agent.save_weights(str(file_name))
            print("environment saved to ", file_name)
    return scores
Example #59
0
from World import Reward
from Agent import Agent
import time


reward = Reward()
lik = np.log(np.array([0.00001]))
hyp = np.log(np.array([1, 1, 10]))
cov = NormalARD()
gp = GaussianProcess(lik, hyp, cov)
gp2 = GaussianProcess(lik, hyp, cov)

sig =np.ones((3,)) * 0.001
sig2 = np.ones((3,)) * 0.1
start_z = np.array([[0., 0., 0.]])
agent = Agent(gp, reward, sig, start_z)
agent2 = Agent(gp2, reward, sig2, start_z)
fig = plt.figure(figsize=(20,7), dpi=300)
zlim = (-10, 10, -10, 10)
for i in xrange(0, 1000):
    agent.observe()
    agent.decide()
    agent.act()
    agent2.observe()
    agent2.decide()
    agent2.act()

    t = agent.gp.Z[-1].flatten()[-1]
    a = [0] * 4
    a[0] = agent.gp.Z[-1].flatten()[0]
    a[1] = agent.gp.Z[-1].flatten()[1]
Example #60
0
class Game:

    def __init__(self):
        self.agent = Agent()
        self.game_steps = 0

    def start(self):
        self.agent.space()
        self.game_steps = 0
        return self.execute_action('n')

    def reload(self):
        self.game_steps = 0
        self.agent.reload()
        self.agent.space()
        return self.execute_action('n')

    def execute_action(self, action):
        self.agent.space()
        self.game_steps += 1
        #self.agent.unpause()
        for char in action:
            getattr(self.agent, char)()
        
        shot = ImageGrab.grab([505, 225, 1195, 1025])#850,900 -> 690:800
        img = np.array(shot)[:,:,0]
        img = cv2.resize(img, (0, 0), fx=0.1, fy=0.1)
        shot = img
        #self.agent.pause()
        done = self.is_done(shot)
        score = 0.0
        if done:
            distance_score = self.get_score()[1]
            time_score = - (self.game_steps/(abs(distance_score)+1e5)) # The higher the pace, the slowest it goes
            score = distance_score + time_score
            self.reload()
        return shot.astype(np.float).ravel(), score, done

    def is_done(self, shot):
        return self.get_score()[0]

    def get_score(self):
        raw = pytesseract.image_to_string(ImageGrab.grab([600, 40, 1200, 160]))
        current_score = ""
        for i in raw:
            if (i == 'm') | (i == 'e') | (i == 't') | (i == 'r') | (i == 'e') | (i == 's'):
                continue
            else:
                current_score = current_score + i
        try:
            float(current_score)
            current_score=float(current_score)
        except:
            current_score=1.8
        #print("score:",current_score)
        tmp = ImageGrab.grab([900,380,1000,400])
        r, g, b = tmp.getpixel((0, 0))
        game_over = False
        if r == 237 and g == 237 and b == 237:
            game_over = True
        return [game_over,current_score]