def main(args): with tf.Session() as sess: #env = gym.make(args['env']) env = Carom(render=False) np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric #assert (env.action_space.high == -env.action_space.low) #saver = tf.train.Saver() actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) args['use_gym_monitor'] = False if args['use_gym_monitor']: if not args['render_env']: env = wrappers.Monitor(env, args['monitor_dir'], video_callable=False, force=True) else: env = wrappers.Monitor(env, args['monitor_dir'], force=True) load = False plot_anim = False if load: actor.saver.restore(sess, './actor-model-2b') critic.saver.restore(sess, './critic-model-2b') train(sess, env, args, actor, critic, actor_noise, load, plot_anim) actor.saver.save(sess, './actor-model-2b') critic.saver.save(sess, './critic-model-2b') if args['use_gym_monitor']: env.monitor.close()
from carom import Carom from Constants import * import numpy as np from vpython import sleep def choose_branch(Q, state): chosen_branch = Q[state].argmax() return chosen_branch tree_actions_index = np.load("tree_actions_index.npy") Q = np.load("treeQmatrix.npy") env = Carom(render=True) env.reset() actions = env.get_actions() nb_states = 0 index2 = 0 state = 0 episode_reward = 0 nb_branches = len(tree_actions_index[0, 0]) for i in range(tree_actions_index.shape[0]): nb_states += nb_branches**i for level in range(tree_actions_index.shape[0]): chosen_branch = choose_branch(Q, state) action_index = int(tree_actions_index[level, index2][chosen_branch]) reward, coll_r = env.step3(actions[action_index][0], actions[action_index][1],
from carom import Carom #import random #from Parameters import RADIUS #import numpy as np #from scipy import sparse from Constants import * from vpython import * # pos_white = vector(-SURFACE_WIDTH/3,0,0) # pos_yellow = vector(-SURFACE_WIDTH/4,- INIT_DIST/2,0) # pos_red = vector(SURFACE_WIDTH/4 -0.2,-0.5,0) # pos_white = vector(-SURFACE_WIDTH/2,0,0) # pos_yellow = vector(-SURFACE_WIDTH/2 + 0.03,SURFACE_WIDTH/2 - 0.07,0) # pos_red = vector(-SURFACE_WIDTH/2 - 0.4,SURFACE_WIDTH/2 - 0.07,0) env = Carom(render=False) #env.reset(pos_white, pos_yellow, pos_red) sleep(2) a = -0.2 * RADIUS b = 0 * RADIUS theta = 5 phi = 85 V = 5 actions = env.get_fixed_actions() nb_shots = 5 shots = [] for i in range(nb_shots): coll_reward = 0 while coll_reward == 0: #env.reset(pos_white, pos_yellow, pos_red) env.reset()
import numpy as np import gym from carom import Carom from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Activation, Flatten from tensorflow.keras.optimizers import Adam from rl.agents.dqn import DQNAgent from rl.policy import BoltzmannQPolicy from rl.memory import SequentialMemory ENV_NAME = 'Carom-v0' # Get the environment and extract the number of actions. env = Carom(render=False) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary())
import gym import numpy as np from tensorflow.keras.layers import Activation, Concatenate, Dense, Flatten, Input from tensorflow.keras.models import Model, Sequential from tensorflow.keras.optimizers import Adam from rl.agents import DDPGAgent from rl.memory import * from carom import Carom ENV_NAME = 'Carom-v0' # gym.undo_logger_setup() # Get the environment and extract the number of actions. env = Carom(render=False) np.random.seed(323) env.seed(323) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear'))
import numpy as np from carom import Carom import gym nb_rows = 1000 nb_good_steps = 0 env = Carom(render=False) np.random.seed(321) env.seed(321) demo_table = np.zeros((nb_rows, 4), dtype=object) while nb_good_steps < nb_rows: env.reset() state = np.array(env.state) action = env.action_space.sample() next_state, reward, done, info = env.step(action) if reward == 1: demo_table[nb_good_steps][0] = state demo_table[nb_good_steps][1] = action demo_table[nb_good_steps][2] = reward demo_table[nb_good_steps][3] = next_state nb_good_steps += 1 print("demo: %d/%d" % (nb_good_steps, nb_rows)) np.save("demoTable", demo_table)
total_points = 0 sum_goal_points = 0 pos_white = P0_WHITE pos_yellow = P0_YELLOW pos_red = P0_RED myList = [] letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'x', 'y', 'z'] for i in range(goal_points): sum_goal_points += nb_branches**(i + 1) for i in range(nb_branches): myList.append((letters[i], np.float32)) #num_episodes = 2000 #lr = .8 #y = .95 env = Carom(render=False) #tree_states_index = np.zeros((goal_points,nb_branches**(goal_points-1))) tree_actions_index = np.zeros((goal_points, nb_branches**(goal_points - 1)), dtype=myList) #print(tree_states_index) actions = env.get_actions() states_list = [(pos_white.x, pos_white.y, pos_yellow.x, pos_yellow.y, pos_red.x, pos_red.y)] #actions =[(0,0,0,20,4),(0,0,0,130,3),(0,0,0,1,8)] #Q = np.zeros((1, len(actions))) env.reset() #env.step(0,0,0,90,5) #a, b, thetha, phi, Vb #for i in range(num_episodes): level = 0
from rl.agents import NAFAgent from rl.memory import SequentialMemory from rl.random import OrnsteinUhlenbeckProcess from rl.core import Processor class CaromProcessor(Processor): def process_reward(self, reward): return reward ENV_NAME = 'Carom-v0' gym.undo_logger_setup() # Get the environment and extract the number of actions. env = Carom(render = False) np.random.seed(321) env.seed(321) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Build all necessary models: V, mu, and L networks. V_model = Sequential() V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1))
import numpy as np from carom import Carom import gym from vpython import * nb_rows = 1000 nb_good_steps = 0 env = Carom(render=True) np.random.seed(32) env.seed(32) demo_table = np.load('demoTable.npy') for i in range(10): index = np.random.choice(1000) env.state = demo_table[index][0] pos_white = vector(env.state[0], env.state[1], 0) pos_yellow = vector(env.state[2], env.state[3], 0) pos_red = vector(env.state[4], env.state[5], 0) env.non_random_reset(pos_white, pos_yellow, pos_red) action = demo_table[index][1] env.step(action)
import numpy as np from scipy import sparse from carom import Carom from Constants import RADIUS def choose_action(Q, currentState): action_index = Q[currentState].argmax() return action_index #actions =[(0,0,0,20,4),(0,0,0,130,3),(0,0,0,1,8)] Q = np.load("Qmatrix.npy") print(sparse.csr_matrix(Q)) print(len(Q)) env = Carom(render=True) actions = env.get_actions() state = 0 for i in range(len(Q) - 1): action_index = choose_action(Q, state) env.step(actions[action_index][0], actions[action_index][1], actions[action_index][2], actions[action_index][3], actions[action_index][4]) state = state + 1
env.non_random_reset(pos[0], pos[1], pos[2]) env.render = True env.step(action, rand=optimal_action, a=a, b=b, theta=theta) # def B(b): # env.render = False # state = env.rese t() # pos = env.arraystate2pos(state) # print(pos) # optimal_action = np.zeros(2) # action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos) # env.non_random_reset(pos[0], pos[1], pos[2]) # env.render = True # env.step(action, rand = optimal_action, a = a, b = b, theta = theta) env = Carom(render=False) check = False def B(b): env.render = False global check global agent if check == False: agent = load_network(env) check = True run(env, agent) scene.caption = '' scene.append_to_caption('\n\n') button(bind=B, text='Run')
from carom import Carom import numpy as np env = Carom(render=True) #for i in range(13): # state, reward, done, info = env.step(90) # print(state, reward, done) print(env.action_space.sample())