コード例 #1
0
    def __init__(self, action_space, observation_space, params):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)

        #Initialize table with all zeros
        self.Q = np.zeros([observation_space.n, action_space.n])

        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
コード例 #2
0
    def __init__(self, action_space,observation_space,params,discreet=False):
        # Use uper init
        Agent.__init__(self,action_space,observation_space,params)
        self.discreet = discreet
        if discreet:
            self.inputN = self.observation_space.n
        else:
            self.inputN = self.observation_space.shape[0]
        self.actionN = self.action_space.n
        
        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
        self.learnRate = self.params[1]  # Number of episodes
        self.dicount = self.params[2]   # Time range value for reward
        self.epsi = self.params[3]   # Epsilon for greedy picking
        self.epsi_decay = self.params[4]
        self.epsi_min = 0.001
        self._timeTot = 200
        #define TF graph
        tf.reset_default_graph()
        #graph1 = tf.Graph()
        #with graph1.as_default():
        #These lines establish the feed-forward part of the network used to choose actions
        
        n_hidden_1  = 64
        n_hidden_2  = 32
        self.inputs1 = tf.placeholder(shape=[1,self.inputN],dtype=tf.float32)
        #W1 = tf.Variable(tf.random_uniform([self.inputN,self.actionN],0,0.01))
        
        W1 = tf.Variable(tf.random_normal([self.inputN,n_hidden_1]))
        W2 = tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]))
        W3 = tf.Variable(tf.random_normal([n_hidden_2, self.actionN]))
        
        layer_1 = tf.nn.relu(tf.matmul(self.inputs1, W1))
        layer_2 = tf.nn.relu(tf.matmul(layer_1, W2))
        self.Qout = tf.matmul(layer_2, W3)
        
        #self.Qout = tf.matmul(self.inputs1,self.W)
        self.predict = tf.argmax(self.Qout,1)
        
        self.time = 0
        self.currEpisode = 0 # Current training stage epsiode
        self.currQs = None # Current prediction for the Q values using current observation

        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.nextQ = tf.placeholder(shape=[1,self.actionN],dtype=tf.float32)
        loss = tf.reduce_sum(tf.square(self.nextQ - self.Qout))
        trainer = tf.train.AdamOptimizer(learning_rate=self.learnRate)
        #trainer = tf.train.GradientDescentOptimizer(learning_rate=self.learnRate)
        self.updateModel = trainer.minimize(loss)
        
        init = tf.global_variables_initializer()
        self.session = tf.Session()
        self.session.run(init)
コード例 #3
0
    def __init__(self, action_space, observation_space, params):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)

        #Initialize table with all zeros
        self.Q = np.zeros([observation_space.n, action_space.n])

        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
        self.lr = self.params[1]  #.5  # Learning Rate
        self.y = self.params[2]  # .8  # Discount Factor
        self.currEpisode = 0  # Current training stage epsiode
コード例 #4
0
 def __init__(self, obs):
     Agent.__init__(self)
     self.capacity = 5
     self.occupation = 0
     self.type = "Taxi"
     self.body.mass = 1000
     self.stat = 0
     self.clients = []
     self.body.fustrum.radius = 200
     self.body.vitesseMax = 15
     self.observerM = obs
     self.observer = None
     self.policy = TaxisPolicy.NONE 
コード例 #5
0
    def __init__(self, max_sims=50):
        # Takes an instance of a Board and optionally some keyword
        # arguments.  Initializes the list of game states and the
        # statistics tables.

        Agent.__init__(self)

        self.total_simulations = 0
        self.root_node = None
        self.if_debug = False
        self.loglevel = 0

        # parameters to change for how deep it goes
        self.max_sims = max_sims
コード例 #6
0
 def __init__(self, f):
     Agent.__init__(self)
     self.body = BoidsBody()
     self.type = "StandardAgent"
     self.famille = f
     self.body.mass = 80
     self.body.fustrum.radius = 100
     self.body.vitesseMax = 150.0
     self.body.vitesseMin = 20.0
     self.velocity = [
         random.uniform(-50.0, 50.0),
         random.uniform(-50.0, 50.0)
     ]
     self.avoidanceFactor = 7.5
     self.obstacleFactor = 500
     self.target = Vector2D(0, 0)
コード例 #7
0
 def __init__(self, memory_length=5):
     """
     Empty constructor
     """
     Agent.__init__(self)
     self.memoryLength = 1
     self.color_memory = [''] * memory_length  # previous color
     self.move_memory = [
         []
     ] * memory_length  # previous move location [piece, i, j]
     self.piece_memory = [
         []
     ] * memory_length  # previously played piece structure
     self._colors: List[str] = ['_', 'P', 'G', 'B', 'Y', 'O',
                                'V']  # Piece colors
     self._to_update = 0
     self._update_limit = memory_length - 1
コード例 #8
0
    def __init__(self,
                 action_space,
                 observation_space,
                 params,
                 discreet=False):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)
        self.discreet = discreet
        if discreet:
            self.inputN = self.observation_space.n
        else:
            self.inputN = self.observation_space.shape[0]
        self.actionN = self.action_space.n

        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
        self.learnRate = self.params[1]  # Number of episodes
        self.discount = self.params[2]  # Time range value for reward
        self.epsi = self.params[3]  # Epsilon for greedy picking
        self.epsi_decay = self.params[4]

        self.pretrainEpi = 250  # Number of steps before first train
        self.batch_size = 200  #Size of training batch
        self.trainPadding = 5  # Every xth step a training occurs
        self.tau = 0.01  #Amount to update target network at each step.
        self.method = self.selectMethod("e-greedy")

        self.epsi_min = 0.001

        self.currEpisode = 0  # Current training stage epsiode
        self.time = 0  # Current frame within one episode
        self._timeTot = 200  # Maximal time in one episode
        self.currQs = None  # Current prediction for the Q values using current observation

        tf.reset_default_graph()
        self.qNet = Q_Network([[self.inputN, 128, self.actionN],
                               self.learnRate])
        self.targetQNet = Q_Network([[self.inputN, 128, self.actionN],
                                     self.learnRate])
        self.myBuffer = ExperienceBuffer()

        init = tf.global_variables_initializer()
        trainables = tf.trainable_variables()
        self.targetOps = Q_Network.updateTargetGraph(trainables, self.tau)
        self.session = tf.Session()
        self.session.run(init)
コード例 #9
0
    def __init__(self):
        """
        Initializes random DQN model
        """
        Agent.__init__(self)

        # Initialize DQN
        dqn_input_dim = len(SquareStackerGame().get_state_vector())
        dqn_output_dim = len(move_to_vector([0, 0, 0]))
        self._dqn = Sequential([
            Dense(128, input_dim=dqn_input_dim),
            Activation('relu'),
            Dense(128),
            Activation('relu'),
            Dense(dqn_output_dim),
        ])
        self._dqn.compile(optimizer=Adam(), loss='mse', metrics=['accuracy'])
コード例 #10
0
    def __init__(self,
                 env,
                 tuning_parameters,
                 replicated_device=None,
                 thread_id=0,
                 create_target_network=True):
        Agent.__init__(self, env, tuning_parameters, replicated_device,
                       thread_id)
        self.main_network = NetworkWrapper(tuning_parameters,
                                           create_target_network,
                                           self.has_global, 'main',
                                           self.replicated_device,
                                           self.worker_device)
        self.networks.append(self.main_network)
        self.q_values = Signal("Q")
        self.signals.append(self.q_values)

        self.reset_game(do_not_reset_env=True)
コード例 #11
0
 def __init__(self):
     Agent.__init__(self)
     self.timeout = 600
     self.destination = Destination(0, 0)
     self.onboard = -1
     self.type = "Client"
     self.body.mass = 80
     self.body.vitesseMax = 1
     self.body.fustrum.radius = 100
     self.policy = ClientsPolicy.NONE
     self.observer = ClientObserver(self.id, time.time(),
                                    self.body.location)
     self.cohesionFactor = 0.03
     self.velocity = [
         random.uniform(-50.0, 50.0),
         random.uniform(-50.0, 50.0)
     ]
     self.allignFactor = 0.045
コード例 #12
0
    def __init__(self, action_space, observation_space, params):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)

        # Set learning parameters
        self.episode_count = params[0]  # Number of episodes
        self.lr = params[1]  #.5  # Learning Rate
        self.y = params[2]  # .8  # Discount Factor
        self.binsize = params[
            3]  # Should be uneven to distinguish -epsi and epsi
        self.currEpisode = 0  # Current training stage epsiode

        #Initialize table with all zeros
        self.Q = np.zeros([
            np.power(self.binsize, observation_space.shape[0]), action_space.n
        ])

        # Determine Bins
        self.low = [-0.5, -2, -0.25, -2]  #self.observation_space.low
        self.high = [0.5, 2, 0.25, 2]  # self.observation_space.high
        self.createBins()
コード例 #13
0
ファイル: boids.py プロジェクト: BDafflon/PAMELA
 def __init__(self):
     Agent.__init__(self)
     self.body = BoidsBody()
     self.collisionDVel = 1
     self.type = "Boid"
     self.famille = 1
     self.body.mass = 80
     self.body.fustrum.radius = 100
     self.body.vitesseMax = 150.0
     self.body.vitesseMin = 20.0
     self.repultion = 150
     self.cohesionFactor = 0.03
     self.collisionDistance = 10
     self.velocity = [
         random.uniform(-50.0, 50.0),
         random.uniform(-50.0, 50.0)
     ]
     self.allignFactor = 0.045
     self.avoidanceFactor = 7.5
     self.attractorFactor = 0.35
     self.obstacleFactor = 500
コード例 #14
0
 def __init__(self, *args, **kwargs):
     Agent.__init__(self, *args, **kwargs)
     if self.knowledge is None:
         self.knowledge = set()
     self.knowledge = self._convert_to_set(self.knowledge)
     assert isinstance(self.knowledge, set)
コード例 #15
0
 def __init__(self,scope,observation_shape,rewards_shape,candidates_shape,memory):
     Agent.__init__(self,scope,observation_shape,rewards_shape,candidates_shape,memory)
コード例 #16
0
 def __init__(self):
     """
     Empty constructor
     """
     Agent.__init__(self)
コード例 #17
0
 def __init__(self, action_space,observation_space,params):
     # Use uper init
     Agent.__init__(self,action_space,observation_space,params)
     
     # Set learning parameters
     self.episode_count = self.params[0]  # Number of episodes