def __init__(self, action_space, observation_space, params): # Use uper init Agent.__init__(self, action_space, observation_space, params) #Initialize table with all zeros self.Q = np.zeros([observation_space.n, action_space.n]) # Set learning parameters self.episode_count = self.params[0] # Number of episodes
def __init__(self, action_space,observation_space,params,discreet=False): # Use uper init Agent.__init__(self,action_space,observation_space,params) self.discreet = discreet if discreet: self.inputN = self.observation_space.n else: self.inputN = self.observation_space.shape[0] self.actionN = self.action_space.n # Set learning parameters self.episode_count = self.params[0] # Number of episodes self.learnRate = self.params[1] # Number of episodes self.dicount = self.params[2] # Time range value for reward self.epsi = self.params[3] # Epsilon for greedy picking self.epsi_decay = self.params[4] self.epsi_min = 0.001 self._timeTot = 200 #define TF graph tf.reset_default_graph() #graph1 = tf.Graph() #with graph1.as_default(): #These lines establish the feed-forward part of the network used to choose actions n_hidden_1 = 64 n_hidden_2 = 32 self.inputs1 = tf.placeholder(shape=[1,self.inputN],dtype=tf.float32) #W1 = tf.Variable(tf.random_uniform([self.inputN,self.actionN],0,0.01)) W1 = tf.Variable(tf.random_normal([self.inputN,n_hidden_1])) W2 = tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])) W3 = tf.Variable(tf.random_normal([n_hidden_2, self.actionN])) layer_1 = tf.nn.relu(tf.matmul(self.inputs1, W1)) layer_2 = tf.nn.relu(tf.matmul(layer_1, W2)) self.Qout = tf.matmul(layer_2, W3) #self.Qout = tf.matmul(self.inputs1,self.W) self.predict = tf.argmax(self.Qout,1) self.time = 0 self.currEpisode = 0 # Current training stage epsiode self.currQs = None # Current prediction for the Q values using current observation #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. self.nextQ = tf.placeholder(shape=[1,self.actionN],dtype=tf.float32) loss = tf.reduce_sum(tf.square(self.nextQ - self.Qout)) trainer = tf.train.AdamOptimizer(learning_rate=self.learnRate) #trainer = tf.train.GradientDescentOptimizer(learning_rate=self.learnRate) self.updateModel = trainer.minimize(loss) init = tf.global_variables_initializer() self.session = tf.Session() self.session.run(init)
def __init__(self, action_space, observation_space, params): # Use uper init Agent.__init__(self, action_space, observation_space, params) #Initialize table with all zeros self.Q = np.zeros([observation_space.n, action_space.n]) # Set learning parameters self.episode_count = self.params[0] # Number of episodes self.lr = self.params[1] #.5 # Learning Rate self.y = self.params[2] # .8 # Discount Factor self.currEpisode = 0 # Current training stage epsiode
def __init__(self, obs): Agent.__init__(self) self.capacity = 5 self.occupation = 0 self.type = "Taxi" self.body.mass = 1000 self.stat = 0 self.clients = [] self.body.fustrum.radius = 200 self.body.vitesseMax = 15 self.observerM = obs self.observer = None self.policy = TaxisPolicy.NONE
def __init__(self, max_sims=50): # Takes an instance of a Board and optionally some keyword # arguments. Initializes the list of game states and the # statistics tables. Agent.__init__(self) self.total_simulations = 0 self.root_node = None self.if_debug = False self.loglevel = 0 # parameters to change for how deep it goes self.max_sims = max_sims
def __init__(self, f): Agent.__init__(self) self.body = BoidsBody() self.type = "StandardAgent" self.famille = f self.body.mass = 80 self.body.fustrum.radius = 100 self.body.vitesseMax = 150.0 self.body.vitesseMin = 20.0 self.velocity = [ random.uniform(-50.0, 50.0), random.uniform(-50.0, 50.0) ] self.avoidanceFactor = 7.5 self.obstacleFactor = 500 self.target = Vector2D(0, 0)
def __init__(self, memory_length=5): """ Empty constructor """ Agent.__init__(self) self.memoryLength = 1 self.color_memory = [''] * memory_length # previous color self.move_memory = [ [] ] * memory_length # previous move location [piece, i, j] self.piece_memory = [ [] ] * memory_length # previously played piece structure self._colors: List[str] = ['_', 'P', 'G', 'B', 'Y', 'O', 'V'] # Piece colors self._to_update = 0 self._update_limit = memory_length - 1
def __init__(self, action_space, observation_space, params, discreet=False): # Use uper init Agent.__init__(self, action_space, observation_space, params) self.discreet = discreet if discreet: self.inputN = self.observation_space.n else: self.inputN = self.observation_space.shape[0] self.actionN = self.action_space.n # Set learning parameters self.episode_count = self.params[0] # Number of episodes self.learnRate = self.params[1] # Number of episodes self.discount = self.params[2] # Time range value for reward self.epsi = self.params[3] # Epsilon for greedy picking self.epsi_decay = self.params[4] self.pretrainEpi = 250 # Number of steps before first train self.batch_size = 200 #Size of training batch self.trainPadding = 5 # Every xth step a training occurs self.tau = 0.01 #Amount to update target network at each step. self.method = self.selectMethod("e-greedy") self.epsi_min = 0.001 self.currEpisode = 0 # Current training stage epsiode self.time = 0 # Current frame within one episode self._timeTot = 200 # Maximal time in one episode self.currQs = None # Current prediction for the Q values using current observation tf.reset_default_graph() self.qNet = Q_Network([[self.inputN, 128, self.actionN], self.learnRate]) self.targetQNet = Q_Network([[self.inputN, 128, self.actionN], self.learnRate]) self.myBuffer = ExperienceBuffer() init = tf.global_variables_initializer() trainables = tf.trainable_variables() self.targetOps = Q_Network.updateTargetGraph(trainables, self.tau) self.session = tf.Session() self.session.run(init)
def __init__(self): """ Initializes random DQN model """ Agent.__init__(self) # Initialize DQN dqn_input_dim = len(SquareStackerGame().get_state_vector()) dqn_output_dim = len(move_to_vector([0, 0, 0])) self._dqn = Sequential([ Dense(128, input_dim=dqn_input_dim), Activation('relu'), Dense(128), Activation('relu'), Dense(dqn_output_dim), ]) self._dqn.compile(optimizer=Adam(), loss='mse', metrics=['accuracy'])
def __init__(self, env, tuning_parameters, replicated_device=None, thread_id=0, create_target_network=True): Agent.__init__(self, env, tuning_parameters, replicated_device, thread_id) self.main_network = NetworkWrapper(tuning_parameters, create_target_network, self.has_global, 'main', self.replicated_device, self.worker_device) self.networks.append(self.main_network) self.q_values = Signal("Q") self.signals.append(self.q_values) self.reset_game(do_not_reset_env=True)
def __init__(self): Agent.__init__(self) self.timeout = 600 self.destination = Destination(0, 0) self.onboard = -1 self.type = "Client" self.body.mass = 80 self.body.vitesseMax = 1 self.body.fustrum.radius = 100 self.policy = ClientsPolicy.NONE self.observer = ClientObserver(self.id, time.time(), self.body.location) self.cohesionFactor = 0.03 self.velocity = [ random.uniform(-50.0, 50.0), random.uniform(-50.0, 50.0) ] self.allignFactor = 0.045
def __init__(self, action_space, observation_space, params): # Use uper init Agent.__init__(self, action_space, observation_space, params) # Set learning parameters self.episode_count = params[0] # Number of episodes self.lr = params[1] #.5 # Learning Rate self.y = params[2] # .8 # Discount Factor self.binsize = params[ 3] # Should be uneven to distinguish -epsi and epsi self.currEpisode = 0 # Current training stage epsiode #Initialize table with all zeros self.Q = np.zeros([ np.power(self.binsize, observation_space.shape[0]), action_space.n ]) # Determine Bins self.low = [-0.5, -2, -0.25, -2] #self.observation_space.low self.high = [0.5, 2, 0.25, 2] # self.observation_space.high self.createBins()
def __init__(self): Agent.__init__(self) self.body = BoidsBody() self.collisionDVel = 1 self.type = "Boid" self.famille = 1 self.body.mass = 80 self.body.fustrum.radius = 100 self.body.vitesseMax = 150.0 self.body.vitesseMin = 20.0 self.repultion = 150 self.cohesionFactor = 0.03 self.collisionDistance = 10 self.velocity = [ random.uniform(-50.0, 50.0), random.uniform(-50.0, 50.0) ] self.allignFactor = 0.045 self.avoidanceFactor = 7.5 self.attractorFactor = 0.35 self.obstacleFactor = 500
def __init__(self, *args, **kwargs): Agent.__init__(self, *args, **kwargs) if self.knowledge is None: self.knowledge = set() self.knowledge = self._convert_to_set(self.knowledge) assert isinstance(self.knowledge, set)
def __init__(self,scope,observation_shape,rewards_shape,candidates_shape,memory): Agent.__init__(self,scope,observation_shape,rewards_shape,candidates_shape,memory)
def __init__(self): """ Empty constructor """ Agent.__init__(self)
def __init__(self, action_space,observation_space,params): # Use uper init Agent.__init__(self,action_space,observation_space,params) # Set learning parameters self.episode_count = self.params[0] # Number of episodes