def run(): env = gym.make('CartPole-v0') episode_num = 1 episode_done = True a2c_agent = A2CAgent(env.action_space.n, env.observation_space.shape[0]) memory = Memory() episode_reward = 1 task_done = deque(maxlen=20) while True: if episode_done: state = env.reset() next_state = [ ] # next state will pass to initialize the accumulated reward episode_done = False template = 'episode num {} ends after {} time steps' print(template.format(episode_num, episode_reward)) task_done.append(episode_reward) episode_num += 1 episode_reward = 0 if sum(task_done) / len(task_done) > 195: print("####F*****G CONGRATULATIONS! TAKS IS DONE####") exit() for i in range(LOOKAHEAD): env.render() state = tf.convert_to_tensor(state) state = tf.expand_dims(state, 0) action = a2c_agent.act(state) memory.store(s=state, a=action) state, reward, episode_done, _ = env.step(action) memory.store(r=reward) episode_reward += reward next_state = state if episode_done: next_state = [] # if episode is done next state is None, break a2c_agent.prepare_train(memory, next_state) memory.clear()
def run(self): total_step = 1 mem = Memory() # Loop for all the episodes while Worker.global_episode < args.max_eps: current_state = self.env.reset() obs = current_state.clip(self.mn_d, self.mx_d) current_state = (((obs - self.mn_d) * (self.new_maxd - self.new_mind) ) / (self.mx_d - self.mn_d)) + self.new_mind mem.clear() ep_reward = 0. ep_steps = 0 self.ep_loss = 0 time_count = 1 total_loss = tf.constant(10e5) # Loop through one episode, until done or reached maximum steps per episode for ep_t in range(args.max_step_per_ep): # Take action based on current state mu, sigma, _ = self.local_model( tf.convert_to_tensor(current_state[None, :], dtype=tf.float32)) cov_matrix = np.diag(sigma[0]) normal_dist = tfp.distributions.Normal(mu, tf.sqrt(sigma)) # action = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), # clip_value_min=-0.999999, # clip_value_max=0.999999) action = tf.clip_by_value(mu, clip_value_min=self.env.action_space.low, clip_value_max=self.env.action_space.high) # Receive new state and reward # print(action.numpy()[0]) new_state, reward, done_game, _ = self.env.step(action.numpy()[0]) obs = new_state.clip(self.mn_d, self.mx_d) new_state = (((obs - self.mn_d) * (self.new_maxd - self.new_mind) ) / (self.mx_d - self.mn_d)) + self.new_mind done = True if ep_t == args.max_step_per_ep - 1 else done_game reward = max(min(float(reward), 1.0), -10.0) ep_reward += reward mem.store(current_state, action, reward) if time_count == args.update_freq or done: # Calculate gradient wrt to local model. We do so by tracking the # variables involved in computing the loss by using tf.GradientTape with tf.GradientTape(persistent=True) as tape: tape.watch(total_loss) total_loss = self.compute_loss(done, new_state, mem, args.gamma) self.ep_loss += total_loss # Calculate local gradients grads = tape.gradient(total_loss, self.local_model.trainable_weights) # Push local gradients to global model try: self.opt.apply_gradients(zip(grads, self.global_model.trainable_weights)) except ValueError: print("ValueError") # Update local model with new weights self.local_model.set_weights(self.global_model.get_weights()) mem.clear() time_count = 0 if done: # done and print information Worker.global_moving_average_reward = \ record(Worker.global_episode, ep_reward, self.worker_idx, Worker.global_moving_average_reward, self.result_queue, self.ep_loss, ep_steps) # We must use a lock to save our model and to print to prevent data races. if ep_reward > Worker.best_score: with Worker.save_lock: print("Saving best model to {}, " "episode score: {}".format(self.save_dir, ep_reward)) self.global_model.save_weights( os.path.join(self.save_dir, 'model_{}.h5'.format(self.game_name)) ) Worker.best_score = ep_reward Worker.global_episode += 1 ep_steps += 1 time_count += 1 total_step += 1 break ep_steps += 1 time_count += 1 current_state = new_state total_step += 1 self.result_queue.put(None)
""" from Memory import Memory from Parser import Parser if __name__ == '__main__': files = ["julia1.txt"] JuliaCode = "julia1.txt" parser = Parser(JuliaCode) print('Executing...----------------------------------------------------\n') print(format(JuliaCode)) print('\bOutput') intrepret = parser.parse() intrepret.execute() Memory.clear() print('Test Complete.. No error') """ JUlIA Code Samples to Copy and Paste in Julia.txt file Code 1 function a ( ) x = 1 while <= x 3 print ( x ) x = + x 1 end print ( 9999 ) end