Beispiel #1
0
def main():
	
	environment = RacecarGymEnv(renders=True, isDiscrete=isDiscrete)
	environment.reset()
	
	targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity",-1,1,0)
	steeringSlider = environment._p.addUserDebugParameter("steering",-1,1,0)
	
	while (True):
	  targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
	  steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
	  if (isDiscrete):
	    discreteAction = 0
	    if (targetVelocity<-0.33):
	      discreteAction=0
	    else:
	      if (targetVelocity>0.33):
	        discreteAction=6
	      else:
	        discreteAction=3
	    if (steeringAngle>-0.17):
	      if (steeringAngle>0.17):
	        discreteAction=discreteAction+2
	      else:
	        discreteAction=discreteAction+1
	    action=discreteAction
	  else:
	    action=[targetVelocity,steeringAngle]
	  state, reward, done, info = environment.step(action)
	  obs = environment.getExtendedObservation()
	  print("obs")
	  print(obs)
def main():
    
    env = RacecarGymEnv(renders=True,isDiscrete=True)
    act = deepq.load("racecar_model.pkl")
    print(act)
    while True:
        obs, done = env.reset(), False
        print("===================================")        
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Beispiel #3
0
def main():

  environment = RacecarGymEnv(renders=True, isDiscrete=isDiscrete)
  environment.reset()

  targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity", -1, 1, 0)
  steeringSlider = environment._p.addUserDebugParameter("steering", -1, 1, 0)

  while (True):
    targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
    steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
    if (isDiscrete):
      discreteAction = 0
      if (targetVelocity < -0.33):
        discreteAction = 0
      else:
        if (targetVelocity > 0.33):
          discreteAction = 6
        else:
          discreteAction = 3
      if (steeringAngle > -0.17):
        if (steeringAngle > 0.17):
          discreteAction = discreteAction + 2
        else:
          discreteAction = discreteAction + 1
      action = discreteAction
    else:
      action = [targetVelocity, steeringAngle]
    state, reward, done, info = environment.step(action)
    obs = environment.getExtendedObservation()
    print("obs")
    print(obs)
def main():

    environment = RacecarGymEnv(renders=True, isDiscrete=isDiscrete)
    environment.reset()

    targetVelocitySlider = environment._p.addUserDebugParameter(
        "wheelVelocity", -1, 1, 0)
    steeringSlider = environment._p.addUserDebugParameter("steering", -1, 1, 0)
    y2z = environment._p.getQuaternionFromEuler([0, 0, 1.57])
    meshScale = [1, 1, 1]
    visualShapeId = environment._p.createVisualShape(
        shapeType=environment._p.GEOM_MESH,
        fileName="domino/domino.obj",
        rgbaColor=[1, 1, 1, 1],
        specularColor=[0.4, .4, 0],
        visualFrameOrientation=y2z,
        meshScale=meshScale)

    boxDimensions = [0.5 * 0.00635, 0.5 * 0.0254, 0.5 * 0.0508]
    collisionShapeId = environment._p.createCollisionShape(
        environment._p.GEOM_BOX, halfExtents=boxDimensions)
    while (True):
        targetVelocity = environment._p.readUserDebugParameter(
            targetVelocitySlider)
        steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
        if (isDiscrete):
            discreteAction = 0
            if (targetVelocity < -0.33):
                discreteAction = 0
            else:
                if (targetVelocity > 0.33):
                    discreteAction = 6
                else:
                    discreteAction = 3
            if (steeringAngle > -0.17):
                if (steeringAngle > 0.17):
                    discreteAction = discreteAction + 2
                else:
                    discreteAction = discreteAction + 1
            action = discreteAction
        else:
            action = [targetVelocity, steeringAngle]
        qKey = ord('a')
        keys = environment._p.getKeyboardEvents()
        if qKey in keys and keys[qKey] & environment._p.KEY_WAS_TRIGGERED:
            objid = environment._p.createMultiBody(
                baseMass=0.025,
                baseCollisionShapeIndex=collisionShapeId,
                baseVisualShapeIndex=visualShapeId,
                basePosition=[0.04, 0.05, 0.06],
                useMaximalCoordinates=True)
            environment._p.resetBaseVelocity(objid, linearVelocity=[2, 2, 1])
        state, reward, done, info = environment.step(action)
        obs = environment.getExtendedObservation()
        print("obs")
        print(obs)
def main():

    env = RacecarGymEnv(renders=False, isDiscrete=True)
    model = deepq.models.mlp([64])
    act = deepq.learn(env,
                      q_func=model,
                      lr=1e-3,
                      max_timesteps=10000,
                      buffer_size=50000,
                      exploration_fraction=0.1,
                      exploration_final_eps=0.02,
                      print_freq=10,
                      callback=callback)
    print("Saving model to racecar_model.pkl")
    act.save("racecar_model.pkl")
def main():

    env = RacecarGymEnv(renders=True, isDiscrete=True)
    act = deepq.load("racecar_model.pkl")
    print(act)
    while True:
        obs, done = env.reset(), False
        print("===================================")
        print("obs")
        print(obs)
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Beispiel #7
0
config['trainBatchSize'] = 32
config['gamma'] = 0.9
config['learningRate'] = 0.001
config['netGradClip'] = 1
config['logFlag'] = True
config['logFileName'] = 'StabilizerOneDLog/traj'
config['logFrequency'] = 100
config['priorityMemoryOption'] = False
config['netUpdateOption'] = 'doubleQ'
config['netUpdateFrequency'] = 1
config['priorityMemory_absErrUpper'] = 5

import gym
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv

env = RacecarGymEnv(renders=True, isDiscrete=True)
N_S = env.observation_space.shape[0]
N_A = env.action_space.n

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [100]
netParameter['n_output'] = N_A

policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])

targetNet = deepcopy(policyNet)

optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])
"""


#add parent dir to find package. Only needed for source code build, pip install doesn't need it.
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)

from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
isDiscrete = False




environment = RacecarGymEnv(renders=True, isDiscrete=isDiscrete)
environment.reset()

targetVelocitySlider = environment._p.addUserDebugParameter("wheelVelocity", -1, 1, 0)
steeringSlider = environment._p.addUserDebugParameter("steering", -1, 1, 0)

while (True):
  targetVelocity = environment._p.readUserDebugParameter(targetVelocitySlider)
  steeringAngle = environment._p.readUserDebugParameter(steeringSlider)
  if (isDiscrete):
    discreteAction = 0
    if (targetVelocity < -0.33):
      discreteAction = 0
    else:
      if (targetVelocity > 0.33):
        discreteAction = 6
# 乱数の種固定
np.random.seed(args.seed)
torch.manual_seed(args.seed)

# GPU or CPU
device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

# logのcsvファイル確保
score_file = os.path.join(args.log, 'progress.csv')
logger.add_tabular_output(score_file)

# Gymのenviromentを生成
from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
env = RacecarGymEnv(renders=False, isDiscrete=False)
# 観測と行動の次元
observation_space = env.observation_space
action_space = env.action_space

# Q-Network
qf_net = QNet(observation_space, action_space, args.h1, args.h2)
qf = DeterministicSAVfunc(
    observation_space, action_space, qf_net,
    data_parallel=args.data_parallel)  # 決定的行動状態価値関数?q-netの出力の形を少し整える

# target Q network theta1
targ_qf1_net = QNet(observation_space, action_space, args.h1, args.h2)
targ_qf1_net.load_state_dict(qf_net.state_dict())  # model(重み)をロード(q-netからコピー)
targ_qf1 = CEMDeterministicSAVfunc(
    observation_space,