def main(args):

    with tf.Session() as sess:
        #env = gym.make(args['env'])
        env = Carom(render=False)
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        # Ensure action bound is symmetric
        #assert (env.action_space.high == -env.action_space.low)
        #saver = tf.train.Saver()
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
        args['use_gym_monitor'] = False
        if args['use_gym_monitor']:
            if not args['render_env']:
                env = wrappers.Monitor(env,
                                       args['monitor_dir'],
                                       video_callable=False,
                                       force=True)
            else:
                env = wrappers.Monitor(env, args['monitor_dir'], force=True)
        load = False
        plot_anim = False
        if load:
            actor.saver.restore(sess, './actor-model-2b')
            critic.saver.restore(sess, './critic-model-2b')
        train(sess, env, args, actor, critic, actor_noise, load, plot_anim)
        actor.saver.save(sess, './actor-model-2b')
        critic.saver.save(sess, './critic-model-2b')
        if args['use_gym_monitor']:
            env.monitor.close()
Beispiel #2
0
from carom import Carom
from Constants import *
import numpy as np
from vpython import sleep


def choose_branch(Q, state):
    chosen_branch = Q[state].argmax()
    return chosen_branch


tree_actions_index = np.load("tree_actions_index.npy")
Q = np.load("treeQmatrix.npy")

env = Carom(render=True)
env.reset()
actions = env.get_actions()
nb_states = 0
index2 = 0
state = 0
episode_reward = 0
nb_branches = len(tree_actions_index[0, 0])

for i in range(tree_actions_index.shape[0]):
    nb_states += nb_branches**i

for level in range(tree_actions_index.shape[0]):
    chosen_branch = choose_branch(Q, state)
    action_index = int(tree_actions_index[level, index2][chosen_branch])
    reward, coll_r = env.step3(actions[action_index][0],
                               actions[action_index][1],
Beispiel #3
0
from carom import Carom
#import random
#from Parameters import RADIUS
#import numpy as np
#from scipy import sparse
from Constants import *
from vpython import *

# pos_white = vector(-SURFACE_WIDTH/3,0,0)
# pos_yellow = vector(-SURFACE_WIDTH/4,- INIT_DIST/2,0)
# pos_red = vector(SURFACE_WIDTH/4 -0.2,-0.5,0)
# pos_white = vector(-SURFACE_WIDTH/2,0,0)
# pos_yellow = vector(-SURFACE_WIDTH/2 + 0.03,SURFACE_WIDTH/2 - 0.07,0)
# pos_red = vector(-SURFACE_WIDTH/2 - 0.4,SURFACE_WIDTH/2 - 0.07,0)
env = Carom(render=False)
#env.reset(pos_white, pos_yellow, pos_red)
sleep(2)
a = -0.2 * RADIUS
b = 0 * RADIUS
theta = 5
phi = 85
V = 5
actions = env.get_fixed_actions()
nb_shots = 5
shots = []

for i in range(nb_shots):
    coll_reward = 0
    while coll_reward == 0:
        #env.reset(pos_white, pos_yellow, pos_red)
        env.reset()
import numpy as np
import gym
from carom import Carom
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

ENV_NAME = 'Carom-v0'

# Get the environment and extract the number of actions.
env = Carom(render=False)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
import gym
import numpy as np
from tensorflow.keras.layers import Activation, Concatenate, Dense, Flatten, Input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from rl.agents import DDPGAgent
from rl.memory import *
from carom import Carom

ENV_NAME = 'Carom-v0'
# gym.undo_logger_setup()

# Get the environment and extract the number of actions.
env = Carom(render=False)
np.random.seed(323)
env.seed(323)
assert len(env.action_space.shape) == 1
nb_actions = env.action_space.shape[0]

# Next, we build a very simple model.
actor = Sequential()
actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(16))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('linear'))
import numpy as np
from carom import Carom
import gym

nb_rows = 1000
nb_good_steps = 0
env = Carom(render=False)
np.random.seed(321)
env.seed(321)
demo_table = np.zeros((nb_rows, 4), dtype=object)

while nb_good_steps < nb_rows:
    env.reset()
    state = np.array(env.state)
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    if reward == 1:
        demo_table[nb_good_steps][0] = state
        demo_table[nb_good_steps][1] = action
        demo_table[nb_good_steps][2] = reward
        demo_table[nb_good_steps][3] = next_state
        nb_good_steps += 1
        print("demo: %d/%d" % (nb_good_steps, nb_rows))

np.save("demoTable", demo_table)
total_points = 0
sum_goal_points = 0
pos_white = P0_WHITE
pos_yellow = P0_YELLOW
pos_red = P0_RED
myList = []
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'x', 'y', 'z']
for i in range(goal_points):
    sum_goal_points += nb_branches**(i + 1)

for i in range(nb_branches):
    myList.append((letters[i], np.float32))
#num_episodes = 2000
#lr = .8
#y = .95
env = Carom(render=False)
#tree_states_index = np.zeros((goal_points,nb_branches**(goal_points-1)))
tree_actions_index = np.zeros((goal_points, nb_branches**(goal_points - 1)),
                              dtype=myList)

#print(tree_states_index)
actions = env.get_actions()

states_list = [(pos_white.x, pos_white.y, pos_yellow.x, pos_yellow.y,
                pos_red.x, pos_red.y)]
#actions =[(0,0,0,20,4),(0,0,0,130,3),(0,0,0,1,8)]
#Q = np.zeros((1, len(actions)))
env.reset()
#env.step(0,0,0,90,5) #a, b, thetha, phi, Vb
#for i in range(num_episodes):
level = 0
Beispiel #8
0
from rl.agents import NAFAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from rl.core import Processor

class CaromProcessor(Processor):
    def process_reward(self, reward):
        return reward


ENV_NAME = 'Carom-v0'
gym.undo_logger_setup()


# Get the environment and extract the number of actions.
env = Carom(render = False)
np.random.seed(321)
env.seed(321)
assert len(env.action_space.shape) == 1
nb_actions = env.action_space.shape[0]

# Build all necessary models: V, mu, and L networks.
V_model = Sequential()
V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
V_model.add(Dense(16))
V_model.add(Activation('relu'))
V_model.add(Dense(16))
V_model.add(Activation('relu'))
V_model.add(Dense(16))
V_model.add(Activation('relu'))
V_model.add(Dense(1))
Beispiel #9
0
import numpy as np
from carom import Carom
import gym
from vpython import *

nb_rows = 1000
nb_good_steps = 0
env = Carom(render=True)
np.random.seed(32)
env.seed(32)
demo_table = np.load('demoTable.npy')

for i in range(10):
    index = np.random.choice(1000)
    env.state = demo_table[index][0]
    pos_white = vector(env.state[0], env.state[1], 0)
    pos_yellow = vector(env.state[2], env.state[3], 0)
    pos_red = vector(env.state[4], env.state[5], 0)
    env.non_random_reset(pos_white, pos_yellow, pos_red)
    action = demo_table[index][1]
    env.step(action)
import numpy as np
from scipy import sparse
from carom import Carom
from Constants import RADIUS


def choose_action(Q, currentState):
    action_index = Q[currentState].argmax()
    return action_index


#actions =[(0,0,0,20,4),(0,0,0,130,3),(0,0,0,1,8)]
Q = np.load("Qmatrix.npy")
print(sparse.csr_matrix(Q))
print(len(Q))
env = Carom(render=True)
actions = env.get_actions()

state = 0
for i in range(len(Q) - 1):
    action_index = choose_action(Q, state)
    env.step(actions[action_index][0], actions[action_index][1],
             actions[action_index][2], actions[action_index][3],
             actions[action_index][4])
    state = state + 1
    env.non_random_reset(pos[0], pos[1], pos[2])
    env.render = True
    env.step(action, rand=optimal_action, a=a, b=b, theta=theta)


# def B(b):
#     env.render = False
#     state = env.rese  t()
#     pos = env.arraystate2pos(state)
#     print(pos)
#     optimal_action = np.zeros(2)
#     action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos)
#     env.non_random_reset(pos[0], pos[1], pos[2])
#     env.render = True
#     env.step(action, rand = optimal_action, a = a, b = b, theta = theta)
env = Carom(render=False)
check = False


def B(b):
    env.render = False
    global check
    global agent
    if check == False:
        agent = load_network(env)
    check = True
    run(env, agent)
    scene.caption = ''
    scene.append_to_caption('\n\n')
    button(bind=B, text='Run')
from carom import Carom
import numpy as np
env = Carom(render=True)
#for i in range(13):
#    state, reward, done, info = env.step(90)
#    print(state, reward, done)
print(env.action_space.sample())