def plot_cumulative_reward(cumulativeReward): line, = plt.plot(cumulativeReward) plt.ion() plt.show() plt.pause(0.0001) if __name__ == '__main__': with tf.Session() as sess: #env = gym.make('Pendulum-v0') #env = gym.make('MountainCarContinuous-v0') env = LASEnv(IP='127.0.0.1', Port=19997, reward_function_type='red_light_dense') LASAgent = LASAgent_Actor_Critic( sess, env, actor_lr=0.0001, actor_tau=0.001, critic_lr=0.0001, critic_tau=0.001, gamma=0.99, minibatch_size=64, max_episodes=50000, max_episode_len=1000, # Exploration Strategies exploration_action_noise_type='ou_0.2',
from Environment.LASEnv import LASEnv from LASAgent.InternalEnvOfAgent import InternalEnvOfAgent from LASAgent.InternalEnvOfCommunity import InternalEnvOfCommunity # Logging logging.basicConfig(filename='../ROM_Experiment_results/ROM_experiment_' + datetime.now().strftime("%Y%m%d-%H%M%S") + '.log', level=logging.INFO, format='%(asctime)s:%(levelname)s: %(message)s') ####################################################################### # Instatiate LAS virtual environment # ####################################################################### # Instantiate LAS environment object envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy') observation = envLAS.reset() ####################################################################### # Instatiate LAS-Agent # ####################################################################### # Note: 1. Set load_pretrained_agent_flag to "True" only when you have # and want to load pretrained agent. # 2. Keep observation unchanged if using pretrained agent. agent_name = 'LAS_Single_Agent' observation_space = envLAS.observation_space action_space = envLAS.action_space observation_space_name = [], action_space_name = [] x_order_MDP = 5 x_order_MDP_observation_type = 'concatenate_observation'
""" This script is for interaction between extrinsically motivated LASAgent and Environment. Note ---- You should instantiate Environment first, because LASAgent need using Environment object as parameter to instantiate. """ sess = tf.Session() K.set_session(sess) # Instantiate LASEnv envLAS = LASEnv('127.0.0.1', 19997) # Instantiate Extrinsically Motivated LAS-agent Ext_Mot_LASAgent = ExtrinsicallyMotivatedLASAgent(envLAS, sess, learnFromScratch = True) # Iinstantiate Random Action LAS-agent Random_LASAgent = RandomLASAgent(envLAS) # Step counter i = 1 observationForLAS, rewardLAS, done, info = envLAS.reset() #while not done: for temp in range(10000): actionLAS = Ext_Mot_LASAgent.perceive_and_act(observationForLAS, rewardLAS, done)
import tensorflow as tf import numpy as np import time from Environment.LASEnv import LASEnv from LASAgent.RandomLASAgent import RandomLASAgent from LASAgent.LASAgent_Actor_Critic import LASAgent_Actor_Critic from Environment.VisitorEnv import VisitorEnv from VisitorAgent.RedLightExcitedVisitorAgent import RedLightExcitedVisitorAgent if __name__ == '__main__': with tf.Session() as sess: # Instantiate LAS environment object envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy') observation_For_LAS = envLAS.reset() # Iinstantiate LAS-agent # LASAgent1 = RandomLASAgent(envLAS) LASAgent1 = LASAgent_Actor_Critic( sess, envLAS, actor_lr=0.0001, actor_tau=0.001, critic_lr=0.0001, critic_tau=0.001, gamma=0.99, minibatch_size=64, max_episodes=50000, max_episode_len=1000, # Exploration Strategies
""" Created on Tue Aug 7 17:17:08 2018 @author: jack.lingheng.meng """ from Environment.LASEnv import LASEnv from LASAgent.RandomLASAgent import RandomLASAgent from Environment.VisitorEnv import VisitorEnv from VisitorAgent.RedLightExcitedVisitorAgent import RedLightExcitedVisitorAgent if __name__ == '__main__': # Instantiate LAS environment object envLAS = LASEnv('127.0.0.1', 19997) observation_For_LAS = envLAS.reset() # Iinstantiate LAS-agent LASAgent1 = RandomLASAgent(envLAS) # Instantiate visitor environment object envVisitor = VisitorEnv('127.0.0.1', 19997) # Instantiate a red light excited visitor0 visitor0 = RedLightExcitedVisitorAgent("Visitor#0") # Instantiate a red light excited visitor1 visitor1 = RedLightExcitedVisitorAgent("Visitor#1") # Instantiate a red light excited visitor2 visitor2 = RedLightExcitedVisitorAgent("Visitor#2") # Instantiate a red light excited visitor2 visitor3 = RedLightExcitedVisitorAgent("Visitor#3")
def plot_cumulative_reward(cumulativeReward): line, = plt.plot(cumulativeReward) plt.ion() plt.show() plt.pause(0.0001) if __name__ == '__main__': with tf.Session() as sess: #env = gym.make('Pendulum-v0') #env = gym.make('MountainCarContinuous-v0') env = LASEnv('127.0.0.1', 19997) LASAgent = LASAgent_Actor_Critic(sess, env) #LASAgent.train() # Learning records episod_reward_memory = deque(maxlen=10000) # Train parameters max_episodes = 50000 max_episode_len = 1000 render_env = False reward = 0 done = False for i in range(max_episodes):
'Overall_Summary_Single_Agent') summary_dir = os.path.join(save_dir, datetime.now().strftime("%Y%m%d-%H%M%S")) if not os.path.isdir(summary_dir): os.makedirs(summary_dir) tf_writer = tf.summary.FileWriter(summary_dir) # Summarize # of bright light total_bright_light_number_sum_op,\ total_bright_light_number_sum = _init_summarize_total_bright_light_number() ############################################################################### ####################################################################### # Instatiate LAS virtual environment # ####################################################################### # Instantiate LAS environment object envLAS = LASEnv('127.0.0.1', 19997, reward_function_type='occupancy') observation = envLAS.reset() ####################################################################### # Instatiate LAS-Agent # ####################################################################### # Note: 1. Set load_pretrained_agent_flag to "True" only when you have # and want to load pretrained agent. # 2. Keep observation unchanged if using pretrained agent. agent_name = 'LAS_Single_Agent' observation_space = envLAS.observation_space action_space = envLAS.action_space observation_space_name = [], action_space_name = [] x_order_MDP = 5 x_order_MDP_observation_type = 'concatenate_observation'