Python RlEnvironment Exemples, rl_environment.RlEnvironment Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_show_objects.py Projet : mgualti/PickAndPlace

def main(objectClass):
    '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

    # PARAMETERS =====================================================================================

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(True)

    # RUN TEST =======================================================================================

    objectNames = threeDNet.GetObjectNames(objectClass)

    for objectIdx, objectName in enumerate(objectNames):

        print("Showing object {}: ".format(objectIdx + 1) + objectName)

        fullObjName, objScale = threeDNet.GetObjectByName(
            objectClass, objectName)
        objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
            fullObjName, objScale)

        raw_input("Press [Enter] to continue...")
        rlEnv.RemoveObject(objHandle)

    print("Finished.")

Exemple #2

0

Afficher le fichier

    def __init__(self, params):
        '''Initializes openrave environment, parameters, and first episode.
    - Input params: System parameters data structure.
    '''

        RlEnvironment.__init__(self, params)

        # parameters
        self.nObjects = params["nObjects"]
        self.nSupportObjects = params["nSupportObjects"]
        self.objectFolder = params["objectFolder"]
        self.supportObjectFolder = params["supportObjectFolder"]
        self.graspFrictionCone = params["graspFrictionCone"]
        self.graspMinDistFromBottom = params["graspMinDistFromBottom"]
        self.placeOrientTolerance = self.params["placeOrientTolerance"]
        self.placePosTolerance = self.params["placePosTolerance"]
        self.placeHeightTolerance = self.params["placeHeightTolerance"]

        # initialization
        self.InitializeHandRegions()
        self.objectFileNames = os.listdir(self.objectFolder)
        self.objectFileNames = fnmatch.filter(self.objectFileNames, "*.dae")
        self.supportObjectFileNames = os.listdir(self.supportObjectFolder)
        self.supportObjectFileNames = fnmatch.filter(
            self.supportObjectFileNames, "*.dae")

        # internal state
        self.objects = []
        self.supportObjects = []
        self.kinBodyCache = {}
        self.ResetEpisode()

Exemple #3

0

Afficher le fichier

Fichier : train_hsa_limited_sensor.py Projet : mgualti/Seq6DofManip

def Main():
  '''Entrypoint to the program.'''

  # PARAMETERS =====================================================================================

  params = loadmat("parameters.mat", squeeze_me=True)
  randomSeed = params["randomSeed"]
  tMax = params["tMax"]
  nEpisodes = params["nEpisodes"]
  unbiasOnEpisode = params["unbiasOnEpisode"]
  epsilon = params["epsilon"]
  loadQFunction = params["loadQFunction"]
  saveFileName = params["saveFileName"]
  
  # INITIALIZATION =================================================================================
  
  # set random seeds
  seed(randomSeed)

  # initialize agent
  agent = RlAgentHsaLimitedSensor(params)
  
  if loadQFunction:
    agent.LoadQFunction()
  
  # RUN TEST =======================================================================================
  
  episodeReturn = []; episodeTime = []

  for episode in xrange(nEpisodes):

    startTime = time()
    episodeReturn.append(0)
    if episode >= unbiasOnEpisode: epsilon = 0
    
    env = RlEnvironment(params)
    s = env.GetState()
    
    a, i, o = agent.GetActionsAndObservations(s, epsilon)

    for t in xrange(tMax):
      r = env.Transition(a)
      ss = env.GetState()
      aa, ii, oo = agent.GetActionsAndObservations(ss, epsilon)
      agent.UpdateQFunction(o, i, r, oo, ii)
      s = ss; a = aa; o = oo; i = ii
      episodeReturn[-1] += r

    print("Episode {}.{} had return {}.".format(realization, episode, episodeReturn[-1]))
    episodeTime.append(time()-startTime)

    print("Agent learned {} values.".format(agent.GetQTableSize()))

  saveData = {"episodeReturn":episodeReturn, "episodeTime":episodeTime,
    "nValuesLearned":agent.GetQTableSize()}
  saveData.update(params)
  savemat(saveFileName, saveData)
  agent.SaveQFunction()

Exemple #4

0

Afficher le fichier

Fichier : train_td.py Projet : mgualti/Seq6DofManip

def Main():
  '''Entrypoint to the program.'''

  # PARAMETERS =====================================================================================

  params = loadmat("parameters.mat", squeeze_me=True)
  randomSeed = params["randomSeed"]
  tMax = params["tMax"]
  nEpisodes = params["nEpisodes"]
  saveFileName = params["saveFileName"]
  
  # INITIALIZATION =================================================================================
  
  # set random seeds
  seed(randomSeed)

  # initialize agent
  agent = RlAgentTd(params)
  
  # RUN TEST =======================================================================================
  
  episodeReturn = []; episodeTime = []

  for episode in xrange(nEpisodes):

    startTime = time()
    episodeReturn.append(0)

    env = RlEnvironment(params)
    s = env.GetState()
    a = agent.GetAction(s)

    for t in xrange(tMax):
      r = env.Transition(a)
      ss = env.GetState()
      aa = agent.GetAction(ss)
      agent.UpdateQFunction(s, a, r, ss, aa)
      s = ss; a = aa
      episodeReturn[-1] += r

    print("Episode {}.{} had return {}.".format(realization, episode, episodeReturn[-1]))
    episodeTime.append(time()-startTime)

    print("Agent learned {} values.".format(agent.GetQTableSize()))

  saveData = {"episodeReturn":episodeReturn, "episodeTime":episodeTime,
    "nValuesLearned":agent.GetQTableSize()}
  saveData.update(params)
  savemat(saveFileName, saveData)

Exemple #5

0

Afficher le fichier

Fichier : test_show_models.py Projet : michaelchi08/DeepRLManip

def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    directory = "/home/mgualti/Data/3DNet/Cat10_ModelDatabase/mug_upright/"

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(showViewer=True,
                          enablePhysics=False,
                          removeTable=True)

    # RUN TEST =======================================================================================

    fileNames = os.listdir(directory)
    fileNames = fnmatch.filter(fileNames, "*.ply")

    for fileName in fileNames:

        threeDNetName = fileName[:-4]

        rlEnv.env.Load(directory + "/" + fileName)
        body = rlEnv.env.GetKinBody(threeDNetName)

        raw_input(threeDNetName)

        rlEnv.env.Remove(body)

Exemple #6

0

Afficher le fichier

Fichier : view_meshes.py Projet : mgualti/Seq6DofManip

def main():
  '''Entrypoint to the program.'''
  # PARAMETERS =====================================================================================
  
  # objects
  meshDirectory = "/home/mgualti/Data/3DNet/Cat10_ModelDatabase/bottle/"
  objectsToSkip = ["8cf1cc180de4ecd0a826d25a0acb0476.ply"] # z-axis not up
  objectsToSkip += ["b0652a09588f293c7e95755f464f6241.ply", \
    "91a1f4e7a5eab4eab05e5be75e72ca3c.ply", "70e77c09aca88d4cf76fe74de9d44699.ply"] # difficult to grasp
  meshExtension = "ply"
  scale = 0.25
  
  # environment
  removeTable = True
  showViewer = True

  # INITIALIZATION =================================================================================

  params = locals()
  rlEnv = RlEnvironment(params)

  # RUN TEST =======================================================================================

  fileNames = os.listdir(meshDirectory)
  fileNames = fnmatch.filter(fileNames, "*." + meshExtension)

  for fileName in fileNames:
    
    if fileName in objectsToSkip: continue
    objectName = fileName[:-4]
    rlEnv.env.Load(meshDirectory + "/" + fileName, {'scalegeometry':str(scale)})
    body = rlEnv.env.GetKinBody(objectName)
    raw_input(objectName)
    rlEnv.env.Remove(body)

Exemple #7

0

Afficher le fichier

def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # system
    gpuId = 0

    # objects
    objectScale = [0.09, 0.17]
    nObjects = 1000
    directory = "/home/mgualti/Data/3DNet/Cat200_ModelDatabase/plate/"

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.60
    viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)]

    # visualization/saving
    showViewer = False
    showSteps = False
    plotImages = False

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(showViewer, removeTable=True)
    rlAgent = RlAgent(rlEnv, gpuId)

    # RUN TEST =======================================================================================

    for objIdx in xrange(nObjects):

        obj = rlEnv.Place3DNetObjectAtOrigin(directory, objectScale,
                                             "plate-{}".format(objIdx), True)
        cloud, normals = rlAgent.GetFullCloudAndNormals(
            viewCenter, viewKeepout, viewWorkspace, False)
        point_cloud.SaveMat("plate-{}.mat".format(objIdx), cloud, normals)

        rlAgent.PlotCloud(cloud)
        if plotImages:
            point_cloud.Plot(cloud, normals, 2)

        if showSteps:
            raw_input("Placed plate-{}.".format(objIdx))

        rlEnv.RemoveObjectSet([obj])

Exemple #8

0

Afficher le fichier

Fichier : test_models_cylinders.py Projet : michaelchi08/DeepRLManip

def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # system
    gpuId = 0

    # objects
    objectHeight = [0.007, 0.013]
    objectRadius = [0.030, 0.045]
    nObjects = 1000

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.60
    viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)]

    # visualization/saving
    showViewer = False
    showSteps = False
    plotImages = False

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(showViewer, removeTable=True)
    rlAgent = RlAgent(rlEnv, gpuId)

    # RUN TEST =======================================================================================

    for objIdx in xrange(nObjects):

        obj = rlEnv.PlaceCylinderAtOrigin(objectHeight, objectRadius,
                                          "cylinder-{}".format(objIdx), True)
        cloud, normals = rlAgent.GetFullCloudAndNormals(
            viewCenter, viewKeepout, viewWorkspace)
        point_cloud.SaveMat("cylinder-{}.mat".format(objIdx), cloud, normals)

        rlAgent.PlotCloud(cloud)
        if plotImages:
            point_cloud.Plot(cloud, normals, 2)

        if showSteps:
            raw_input("Placed cylinder-{}.".format(objIdx))

        rlEnv.RemoveObjectSet([obj])

Exemple #9

0

Afficher le fichier

Fichier : rl_environment_pegs_on_disks.py Projet : mgualti/Seq6DofManip

  def __init__(self, params):
    '''Initializes openrave environment, parameters, and episode.
    - Input showViewer: If True, shows the openrave viewer.
    - Input removeTable: If True, the table is not included in the scene.
    '''

    RlEnvironment.__init__(self, params)

    # parameters

    self.colors = array([ \
      (1.0, 0.0, 0.0, 0.5), (0.0, 1.0, 0.0, 0.5), (0.0, 0.0, 1.0, 0.5), (0.0, 1.0, 1.0 ,0.5),
      (1.0, 0.0, 1.0, 0.5), (1.0, 1.0, 0.0, 0.5), (0.5, 1.0, 0.0, 0.5), (0.5, 0.0, 1.0, 0.5),
      (0.0, 0.5, 1.0, 0.5), (1.0, 0.5, 0.0, 0.5), (1.0, 0.0, 0.5, 0.5), (0.0, 1.0, 0.5, 0.5)  ])

    # internal state

    self.objects = []
    self.supportObjects = []
    self.ResetEpisode()

Exemple #10

0

Afficher le fichier

def main():
  '''Entrypoint to the program.'''

  # PARAMETERS =====================================================================================
  
  # system
  randomSeed = 0
  
  # environment
  removeTable = True
  
  # objects
  objectHeight = [0.005, 0.020]
  objectRadius = [0.040, 0.060]
  nObjects = 1000

  # view
  viewCenter = array([0,0,0])
  viewKeepout = 0.60
  viewWorkspace = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)]
  voxelSize = 0.002

  # visualization/saving
  showViewer = False
  showSteps = False
  plotImages = False  

  # INITIALIZATION =================================================================================

  params = locals()
  seed(randomSeed)
  rlEnv = RlEnvironment(params)

  # RUN TEST =======================================================================================

  for objIdx in xrange(nObjects):
    
    # Generate mesh, save mesh, and get points.
    objectName = "coaster-{}".format(objIdx)
    obj = rlEnv.GenerateCylinderMesh(objectHeight, objectRadius, objectName)
    cloud = rlEnv.GetFullCloud(viewCenter, viewKeepout, viewWorkspace,
      add45DegViews=False, computeNormals=False, voxelSize=voxelSize)
      
    # Compute object bounding box
    workspace = array([[min(cloud[:, 0]), max(cloud[:, 0])],
                       [min(cloud[:, 1]), max(cloud[:, 1])],
                       [min(cloud[:, 2]), max(cloud[:, 2])]])

    # Save metadata.
    data = {"cloud":cloud, "workspace":workspace, "height":obj.height, "radius":obj.radius}
    savemat(objectName + ".mat", data)
    
    # Optional visualization for debugging.
    rlEnv.PlotCloud(cloud)
    if plotImages: point_cloud.Plot(cloud)
    if showSteps: raw_input("Placed " + objectName + ".")
    
    # Remove the object before loading the next one.
    rlEnv.RemoveObjectSet([obj])

Exemple #11

0

Afficher le fichier

Fichier : rl_environment_pegs_on_disks.py Projet : mgualti/Seq6DofManip

    def __init__(self, params):
        '''Initializes openrave environment, parameters, and first episode.
    - Input params: System parameters data structure.
    '''

        RlEnvironment.__init__(self, params)

        # parameters
        self.nObjects = params["nObjects"]
        self.nSupportObjects = params["nSupportObjects"]
        self.objectFolder = params["objectFolder"]
        self.supportObjectFolder = params["supportObjectFolder"]
        self.placeOrientTolerance = self.params["placeOrientTolerance"]
        self.placeHeightTolerance = self.params["placeHeightTolerance"]
        self.rewardCapGrasps = self.params["rewardCapGrasps"]

        self.colors = array([ \
          (1.0, 0.0, 0.0, 0.5), (0.0, 1.0, 0.0, 0.5), (0.0, 0.0, 1.0, 0.5), (0.0, 1.0, 1.0 ,0.5),
          (1.0, 0.0, 1.0, 0.5), (1.0, 1.0, 0.0, 0.5), (0.5, 1.0, 0.0, 0.5), (0.5, 0.0, 1.0, 0.5),
          (0.0, 0.5, 1.0, 0.5), (1.0, 0.5, 0.0, 0.5), (1.0, 0.0, 0.5, 0.5), (0.0, 1.0, 0.5, 0.5)  ])

        self.pointToRealRadiusError = 0.0001

        # initialization
        self.InitializeHandRegions()

        self.objectFileNames = os.listdir(self.objectFolder)
        self.objectFileNames = fnmatch.filter(self.objectFileNames, "*.dae")
        self.supportObjectFileNames = os.listdir(self.supportObjectFolder)
        self.supportObjectFileNames = fnmatch.filter(
            self.supportObjectFileNames, "*.dae")

        # internal state
        self.objects = []
        self.supportObjects = []
        self.ResetEpisode()

Exemple #12

0

Afficher le fichier

Fichier : test_show_placements.py Projet : mgualti/PickAndPlace

def main():
    '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

    # PARAMETERS =====================================================================================

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(True)
    rlAgent = RlAgent(rlEnv)

    # RUN TEST =======================================================================================

    for placeIdx, placePose in enumerate(rlAgent.placePoses):

        print("Showing pose {}: ".format(placeIdx))
        print placePose

        rlAgent.MoveHandToPose(placePose)

        raw_input("Press [Enter] to continue...")

    print("Finished.")

Exemple #13

0

Afficher le fichier

Fichier : generate_bottles.py Projet : mgualti/Seq6DofManip

def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # system
    randomSeed = 0

    # environment
    removeTable = True

    # objects
    objectDirectory = "/home/mgualti/Data/3DNet/Cat10_ModelDatabase/bottle"
    objectsToSkip = ["8cf1cc180de4ecd0a826d25a0acb0476.ply"]  # z-axis not up
    objectsToSkip += ["b0652a09588f293c7e95755f464f6241.ply", \
      "91a1f4e7a5eab4eab05e5be75e72ca3c.ply", "70e77c09aca88d4cf76fe74de9d44699.ply"] # difficult to grasp
    objectScale = [0.10, 0.20]
    nObjects = 1000

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.60
    viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)]
    voxelSize = 0.002

    # visualization/saving
    showViewer = False
    showSteps = False
    plotImages = False

    # INITIALIZATION =================================================================================

    params = locals()
    seed(randomSeed)
    rlEnv = RlEnvironment(params)

    # RUN TEST =======================================================================================

    for objIdx in xrange(nObjects):

        # Load mesh, save mesh, and get points and normals.
        objectName = "bottle-{}".format(objIdx)
        obj = rlEnv.GenerateMeshFromMesh(objectDirectory, objectScale,
                                         objectsToSkip, objectName)
        cloud, normals = rlEnv.GetFullCloud(viewCenter,
                                            viewKeepout,
                                            viewWorkspace,
                                            add45DegViews=False,
                                            computeNormals=True,
                                            voxelSize=voxelSize)

        # Occasionally, the normals calculation fails. Replace with the nearest normal.
        if isnan(normals).any():
            nanIdx = sum(isnan(normals), axis=1) > 0
            notNanIdx = logical_not(nanIdx)
            nanFreeTree = cKDTree(cloud[notNanIdx, :])
            d, nearestIdx = nanFreeTree.query(cloud[nanIdx, :])
            normals[nanIdx, :] = normals[nearestIdx, :]

        # Compute object bounding box
        workspace = array([[min(cloud[:, 0]),
                            max(cloud[:, 0])],
                           [min(cloud[:, 1]),
                            max(cloud[:, 1])],
                           [min(cloud[:, 2]),
                            max(cloud[:, 2])]])

        # Save metadata.
        data = {
            "cloud": cloud,
            "normals": normals,
            "workspace": workspace,
            "scale": obj.scale
        }
        savemat(objectName + ".mat", data)

        # Optional visualization for debugging.
        rlEnv.PlotCloud(cloud)
        if plotImages: point_cloud.Plot(cloud, normals, 5)
        if showSteps: raw_input("Placed " + objectName + ".")

        # Remove the object before loading the next one.
        rlEnv.RemoveObjectSet([obj])

Exemple #14

0

Afficher le fichier

def main(saveFileSuffix):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 0  # 0=sample, 1=sample+label
    nGraspSamples = 100
    graspScoreThresh = 350

    # learning
    nValueIterations = 70
    nDataIterations = 50
    nGraspIterations = 20
    pickEpsilon = 1.0
    placeEpsilon = 1.0
    minPickEpsilon = 0.10
    minPlaceEpsilon = 0.10
    pickEpsilonDelta = 0.05
    placeEpsilonDelta = 0.05
    maxExperiences = 25000
    trainingBatchSize = 25000
    unbiasOnIteration = nValueIterations - 5

    # visualization/saving
    saveFileName = "results" + saveFileSuffix + ".mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    averageReward = []
    placeActionCounts = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for valueIterationIdx in xrange(nValueIterations):

        print("Iteration {}. Epsilon pick: {}, place: {}".format(\
          valueIterationIdx, pickEpsilon, placeEpsilon))

        # 1. Collect data for this training iteration.

        iterationStartTime = time.time()
        R = []
        placeCounts = zeros(nPlaceOptions)

        # check if it's time to unbias data
        if valueIterationIdx >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            pickEpsilon = 0  # estimating value function of actual policy
            placeEpsilon = 0  # estimating value function of actual policy

        for dataIterationIdx in xrange(nDataIterations):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)
            rlAgent.PlotCloud(cloud)

            # detect grasps in the sensory data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            rlAgent.PlotGrasps(grasps)

            if showSteps:
                raw_input("Acquired grasps.")

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                rlAgent.UnplotCloud()
                continue

            for graspIterationIdx in xrange(nGraspIterations):

                print("Episode {}.{}.{}.".format(valueIterationIdx,
                                                 dataIterationIdx,
                                                 graspIterationIdx))

                # perform pick action
                grasp = rlAgent.GetGrasp(grasps, pickEpsilon)
                s = rlEnv.GetState(rlAgent, grasp, None)
                rlAgent.PlotGrasps([grasp])

                if showSteps:
                    print("Selected grasp.")

                # perform place action
                P = rlAgent.GetPlacePose(grasp, placeEpsilon)
                rlAgent.MoveHandToPose(P)
                ss = rlEnv.GetState(rlAgent, grasp, P)
                rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
                r = rlEnv.RewardBinary(objHandle, targetObjectAxis,
                                       maxAngleFromObjectAxis,
                                       maxObjectTableGap)
                print("The robot receives {} reward.".format(r))

                if showSteps:
                    raw_input("Press [Enter] to continue...")

                # add experience to database
                experienceDatabase.append((s, ss, 0))  # grasp -> placement
                experienceDatabase.append((ss, None, r))  # placement -> end

                # record save data
                R.append(r)
                placeCounts += ss[1][len(s[1]) - nPlaceOptions:]

                # cleanup this grasp iteration
                rlAgent.UnplotGrasps()
                rlEnv.MoveObjectToPose(objHandle, objRandPose)

            # cleanup this data iteration
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(\
          experienceDatabase, trainingBatchSize)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        pickEpsilon -= pickEpsilonDelta
        placeEpsilon -= placeEpsilonDelta
        pickEpsilon = max(minPickEpsilon, pickEpsilon)
        placeEpsilon = max(minPlaceEpsilon, placeEpsilon)

        # 4. Save results
        averageReward.append(mean(R))
        placeActionCounts.append(placeCounts)
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "nValueIterations": nValueIterations,
            "nDataIterations": nDataIterations,
            "nGraspIterations": nGraspIterations,
            "pickEpsilon": pickEpsilon,
            "placeEpsilon": placeEpsilon,
            "minPickEpsilon": minPickEpsilon,
            "minPlaceEpsilon": minPlaceEpsilon,
            "pickEpsilonDelta": pickEpsilonDelta,
            "placeEpsilonDelta": placeEpsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "averageReward": averageReward,
            "placeActionCounts": placeActionCounts,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)

Exemple #15

0

Afficher le fichier

    def __init__(self, showViewer=True, removeTable=False):
        '''Initializes openrave environment, etc.'''

        RlEnvironment.__init__(self, showViewer, removeTable)

Exemple #16

0

Afficher le fichier

Fichier : train_deictic.py Projet : mgualti/Seq6DofManip

def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    epsilon = params["epsilon"]
    loadQFunction = params["loadQFunction"]
    saveQFunction = params["saveQFunction"]
    saveFileName = params["saveFileName"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)

    # initialize agent
    agent = RlAgentDeictic(params)

    if loadQFunction:
        agent.LoadQFunction()

    # RUN TEST =======================================================================================

    nPlacedObjects = []
    episodeTime = []

    for episode in xrange(nEpisodes):

        startTime = time()
        R = 0
        nPlacedObjects.append(0)
        if episode >= unbiasOnEpisode: epsilon = 0

        env = RlEnvironment(params)
        s = env.GetState()

        a, i, o = agent.GetActionAndObservation(s, epsilon)

        for t in xrange(tMax):
            isPlace = env.IsPlace()
            r = env.Transition(a)
            if isPlace: nPlacedObjects[-1] += r > 0
            R += r
            ss = env.GetState()
            aa, ii, oo = agent.GetActionAndObservation(ss, epsilon)
            agent.UpdateQFunction(o, i, r, oo, ii)
            s = ss
            a = aa
            o = oo
            i = ii

        print("Episode {}.{} had return {}.".format(realization, episode, R))
        episodeTime.append(time() - startTime)

        print("Agent learned {} values.".format(agent.GetQTableSize()))

    saveData = {
        "nPlacedObjects": nPlacedObjects,
        "episodeTime": episodeTime,
        "nValuesLearned": agent.GetQTableSize()
    }
    saveData.update(params)
    savemat(saveFileName, saveData)
    if saveQFunction: agent.SaveQFunction()

Exemple #17

0

Afficher le fichier

Fichier : test_clutter.py Projet : mgualti/PickAndPlace

def main(objectClass, epsilon):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    nObjects = 7
    randomObjectScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.02

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]
    objViewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 500
    graspScoreThresh = 300
    nGraspInliers = 2

    # testing
    weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
    nDataIterations = 300

    # visualization/saving
    saveFileName = "results-clutter-" + objectClass + "-epsilon" + str(
        epsilon) + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
    Return = []

    # RUN TEST =======================================================================================

    for dataIterationIdx in xrange(nDataIterations):

        print("Iteration {}.".format(dataIterationIdx))

        # place clutter on table
        fullObjNames, objScales = threeDNet.GetRandomObjectSet(
            objectClass, nObjects, randomObjectScale)
        objHandles, objPoses = rlEnv.PlaceObjectSet(fullObjNames, objScales)

        objCloud, objCloudIdxs = rlEnv.AssignPointsToObjects(
            rlAgent, objHandles, viewCenter, viewKeepout, objViewWorkspace)

        if showSteps:
            raw_input("Objects placed.")

        # move the hand to view position and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Point cloud.")

        # detect grasps in the sensory data
        graspsDetected = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh, nGraspInliers,
                                              graspDetectMode)
        grasps = rlAgent.FilterGraspsWithNoPoints(graspsDetected, objCloud)
        if len(graspsDetected) > len(grasps):
            print("Fitlered {} empty grasps.".format(
                len(graspsDetected) - len(grasps)))
        rlAgent.PlotGrasps(grasps)

        if showSteps:
            raw_input("Acquired grasps.")

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObjectSet(objHandles)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()
            continue

        # perform pick action
        grasp = rlAgent.GetGrasp(grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        if showSteps:
            raw_input("Selected grasp.")

        # perform place action
        P = rlAgent.GetPlacePose(grasp, epsilon)
        rlAgent.MoveHandToPose(P)
        objHandle, objPose = rlEnv.GetObjectWithMaxGraspPoints(
            grasp, objHandles, objCloud, objCloudIdxs)
        rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
        r = rlEnv.RewardHeightExponential(objHandle, targetObjectAxis,
                                          maxAngleFromObjectAxis,
                                          maxObjectTableGap)
        print("The robot receives {} reward.".format(r))
        Return.append(r)

        if showSteps:
            raw_input("Press [Enter] to continue...")

        # cleanup this data iteration
        rlEnv.RemoveObjectSet(objHandles)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "nObjects": nObjects,
            "randomObjectScale": randomObjectScale,
            "targetObjectAxis": targetObjectAxis,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "graspDetectMode": graspDetectMode,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "weightsFileName": weightsFileName,
            "nDataIterations": nDataIterations,
            "epsilon": epsilon,
            "Return": Return
        }
        savemat(saveFileName, saveData)

Exemple #18

0

Afficher le fichier

Fichier : test_train_regrasp.py Projet : mgualti/PickAndPlace

def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # learning
    nTrainingIterations = 100
    nEpisodes = 100
    nReuses = 10
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 1.0
    epsilonDelta = 0.05
    minEpsilon = 0.05
    maxExperiences = 50000
    trainingBatchSize = 50000
    unbiasOnIteration = nTrainingIterations - 5

    # visualization/saving
    saveFileName = "results.mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    avgReturn = []
    avgGraspsDetected = []
    avgTopGraspsDetected = []
    placeHistograms = []
    avgGoodTempPlaceCount = []
    avgBadTempPlaceCount = []
    avgGoodFinalPlaceCount = []
    avgBadFinalPlaceCount = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for trainingIteration in xrange(nTrainingIterations):

        # initialization
        iterationStartTime = time.time()
        print("Iteration: {}, Epsilon: {}".format(trainingIteration, epsilon))

        placeHistogram = zeros(nPlaceOptions)
        Return = []
        graspsDetected = []
        topGraspsDetected = []
        goodTempPlaceCount = []
        badTempPlaceCount = []
        goodFinalPlaceCount = []
        badFinalPlaceCount = []

        # check if it's time to unbias data
        if trainingIteration >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            epsilon = 0  # estimating value function of actual policy

        # for each episode/object placement
        for episode in xrange(nEpisodes):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position(s) and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)

            # detect grasps in the sensor data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            graspsStart = grasps

            graspsDetected.append(len(grasps))
            topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                  maxAngleFromObjectAxis)

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                continue

            rlAgent.PlotCloud(cloud)
            rlAgent.PlotGrasps(grasps)

            for reuse in xrange(nReuses):

                print("Episode {}.{}.{}.".format(trainingIteration, episode,
                                                 reuse))

                if showSteps:
                    raw_input(
                        "Beginning of episode. Press [Enter] to continue...")

                # initialize recording variables
                episodePlaceHistogram = zeros(nPlaceOptions)
                episodeReturn = 0
                episodeGoodTempPlaceCount = 0
                episodeBadTempPlaceCount = 0
                episodeGoodFinalPlaceCount = 0
                episodeBadFinalPlaceCount = 0
                graspDetectionFailure = False
                episodeExperiences = []

                # initial state and first action
                s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
                a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
                rlAgent.PlotGrasps([grasp])

                # for each time step in the episode
                for t in xrange(maxTimesteps):

                    ss, selectedGrasp, rr = rlEnv.Transition(
                        rlAgent, objHandle, s, selectedGrasp, a, grasp, place,
                        targetObjectAxis, maxAngleFromObjectAxis,
                        maxObjectTableGap)
                    ssIsPlacedTempGood = ss[1][1]
                    ssIsPlacedTempBad = ss[1][2]
                    ssIsPlacedFinalGood = ss[1][3]
                    ssIsPlacedFinalBad = ss[1][4]

                    if showSteps:
                        raw_input(
                            "Transition {}. Press [Enter] to continue...".
                            format(t))

                    # re-detect only if a non-terminal placement just happened
                    if ssIsPlacedTempGood and place is not None:
                        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                            viewCenter, viewKeepout, viewWorkspace)
                        grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                                      viewPointIndices,
                                                      nGraspSamples,
                                                      graspScoreThresh,
                                                      graspDetectMode)
                        graspsDetected.append(len(grasps))
                        topGraspsCount = CountObjectTopGrasps(
                            grasps, rlEnv.GetObjectPose(objHandle),
                            maxAngleFromObjectAxis)
                        topGraspsDetected.append(topGraspsCount)
                        if len(grasps) == 0:
                            print("Grasp detection failure.")
                            graspDetectionFailure = True
                            break
                        rlAgent.PlotCloud(cloud)
                        rlAgent.PlotGrasps(grasps)

                    # get next action
                    aa, ggrasp, pplace = rlAgent.ChooseAction(
                        ss, grasps, epsilon)
                    if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

                    if showSteps:
                        raw_input(
                            "Action {}. Press [Enter] to continue...".format(
                                t))

                    # add to database and record data
                    episodeExperiences.append((s, a, rr, ss, aa))
                    episodeReturn += (gamma**t) * rr
                    if place is not None:
                        episodeGoodTempPlaceCount += ssIsPlacedTempGood
                        episodeBadTempPlaceCount += ssIsPlacedTempBad
                        episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                        episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                        placeHistogram += a[1]

                    # prepare for next time step
                    if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                        break
                    s = ss
                    a = aa
                    grasp = ggrasp
                    place = pplace

                # cleanup this reuse
                if not graspDetectionFailure:
                    experienceDatabase += episodeExperiences
                    placeHistogram += episodePlaceHistogram
                    Return.append(episodeReturn)
                    goodTempPlaceCount.append(episodeGoodTempPlaceCount)
                    badTempPlaceCount.append(episodeBadTempPlaceCount)
                    goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
                    badFinalPlaceCount.append(episodeBadFinalPlaceCount)
                rlEnv.MoveObjectToPose(objHandle, objRandPose)
                grasps = graspsStart

            # cleanup this episode
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(experienceDatabase,
                                            trainingBatchSize, gamma)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        epsilon -= epsilonDelta
        epsilon = max(minEpsilon, epsilon)

        # 4. Save results
        avgReturn.append(mean(Return))
        avgGraspsDetected.append(mean(graspsDetected))
        avgTopGraspsDetected.append(mean(topGraspsDetected))
        placeHistograms.append(placeHistogram)
        avgGoodTempPlaceCount.append(mean(goodTempPlaceCount))
        avgBadTempPlaceCount.append(mean(badTempPlaceCount))
        avgGoodFinalPlaceCount.append(mean(goodFinalPlaceCount))
        avgBadFinalPlaceCount.append(mean(badFinalPlaceCount))
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nTrainingIterations": nTrainingIterations,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "minEpsilon": minEpsilon,
            "epsilonDelta": epsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "avgReturn": avgReturn,
            "avgGraspsDetected": avgGraspsDetected,
            "avgTopGraspsDetected": avgTopGraspsDetected,
            "placeHistograms": placeHistograms,
            "avgGoodTempPlaceCount": avgGoodTempPlaceCount,
            "avgBadTempPlaceCount": avgBadTempPlaceCount,
            "avgGoodFinalPlaceCount": avgGoodFinalPlaceCount,
            "avgBadFinalPlaceCount": avgBadFinalPlaceCount,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)

Exemple #19

0

Afficher le fichier

def main(objectClass, epsilon):
  '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

  # PARAMETERS =====================================================================================

  # objects
  randomScale = True
  targetObjectAxis = array([0,0,1])
  maxAngleFromObjectAxis = 20*(pi/180)
  maxObjectTableGap = 0.02

  # view
  viewCenter = array([0,0,0])
  viewKeepout = 0.50
  viewWorkspace = [(-1,1),(-1,1),(-1,1)]

  # grasps
  graspDetectMode = 1 # 0=sample, 1=sample+label
  nGraspSamples = 200
  graspScoreThresh = 300
  nGraspInliers = 3

  # learning
  weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
  nDataIterations = 300

  # visualization/saving
  showViewer = False
  showEveryStep = False
  saveFileName = "results-single-" + objectClass + "-epsilon" + str(epsilon) + ".mat"

  # INITIALIZATION =================================================================================

  threeDNet = ThreeDNet()
  rlEnv = RlEnvironment(showViewer)
  rlAgent = RlAgent(rlEnv)
  if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
  Return = []

  # RUN TEST =======================================================================================

  # Collect data for this training iteration.

  for dataIterationIdx in xrange(nDataIterations):

    print("Iteration {}.".format(dataIterationIdx))

    # place object in random orientation on table
    fullObjName, objScale = threeDNet.GetRandomObjectFromClass(objectClass, randomScale)
    objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(fullObjName, objScale)

    # move the hand to view position and capture a point cloud
    cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
      viewCenter, viewKeepout, viewWorkspace)
    rlAgent.PlotCloud(cloud)

    # detect grasps in the sensory data
    grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
      nGraspSamples, graspScoreThresh, nGraspInliers, graspDetectMode)
    rlAgent.PlotGrasps(grasps)

    if len(grasps) == 0:
      print("No grasps found. Skipping iteration.")
      rlEnv.RemoveObject(objHandle)
      rlAgent.UnplotCloud()
      continue

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform pick action
    grasp = rlAgent.GetGrasp(grasps, epsilon)
    rlAgent.PlotGrasps([grasp])

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform place action
    P = rlAgent.GetPlacePose(grasp, epsilon)
    rlAgent.MoveHandToPose(P)
    rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
    r = rlEnv.RewardHeightExponential(
      objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap)
    print("The robot receives {} reward.".format(r))
    Return.append(r)

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # cleanup this data iteration
    rlEnv.RemoveObject(objHandle)
    rlAgent.UnplotGrasps()
    rlAgent.UnplotCloud()

    saveData = {"randomScale":randomScale, "targetObjectAxis":targetObjectAxis,
      "maxAngleFromObjectAxis":maxAngleFromObjectAxis, "maxObjectTableGap":maxObjectTableGap,
      "graspDetectMode":graspDetectMode, "nGraspSamples":nGraspSamples,
      "graspScoreThresh":graspScoreThresh, "weightsFileName":weightsFileName,
      "nDataIterations":nDataIterations, "epsilon":epsilon, "Return":Return}
    savemat(saveFileName, saveData)

Exemple #20

0

Afficher le fichier

def main(objectClass):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # testing
    nEpisodes = 300
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 0.0
    weightsFileName = \
      "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/dualImage_iter_5000.caffemodel"

    # visualization/saving
    saveFileName = "results-" + objectClass + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    rlAgent.LoadNetworkWeights(weightsFileName)
    nPlaceOptions = len(rlAgent.placePoses)

    placeHistogram = zeros(nPlaceOptions)
    Return = []
    graspsDetected = []
    topGraspsDetected = []
    goodTempPlaceCount = []
    badTempPlaceCount = []
    goodFinalPlaceCount = []
    badFinalPlaceCount = []

    # RUN TEST =======================================================================================

    # for each episode/object placement
    for episode in xrange(nEpisodes):

        # place random object in random orientation on table
        fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
            objectClass, randomScale)
        objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
            fullObjName, objScale)
        rlAgent.MoveSensorToPose(
            rlAgent.GetStandardViewPose(viewCenter, viewKeepout))

        if showSteps:
            raw_input("Beginning of episode. Press [Enter] to continue...")

        # move the hand to view position(s) and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Acquired point cloud. Press [Enter] to continue...")

        # detect grasps in the sensor data
        grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                      nGraspSamples, graspScoreThresh,
                                      graspDetectMode)

        graspsDetected.append(len(grasps))
        topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                              maxAngleFromObjectAxis)

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObject(objHandle)
            continue

        rlAgent.PlotGrasps(grasps)

        print("Episode {}.".format(episode))

        if showSteps:
            raw_input("Acquired grasps. Press [Enter] to continue...")

        # initialize recording variables
        episodePlaceHistogram = zeros(nPlaceOptions)
        episodeReturn = 0
        episodeGoodTempPlaceCount = 0
        episodeBadTempPlaceCount = 0
        episodeGoodFinalPlaceCount = 0
        episodeBadFinalPlaceCount = 0
        graspDetectionFailure = False

        # initial state and first action
        s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
        a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        # for each time step in the episode
        for t in xrange(maxTimesteps):

            ss, selectedGrasp, rr = rlEnv.Transition(rlAgent, objHandle, s,
                                                     selectedGrasp, a, grasp,
                                                     place, targetObjectAxis,
                                                     maxAngleFromObjectAxis,
                                                     maxObjectTableGap)
            ssIsPlacedTempGood = ss[1][1]
            ssIsPlacedTempBad = ss[1][2]
            ssIsPlacedFinalGood = ss[1][3]
            ssIsPlacedFinalBad = ss[1][4]

            if showSteps:
                raw_input(
                    "Transition {}. Press [Enter] to continue...".format(t))

            # re-detect only if a non-terminal placement just happened
            if ssIsPlacedTempGood and place is not None:
                cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                    viewCenter, viewKeepout, viewWorkspace)
                rlAgent.UnplotGrasps()
                rlAgent.PlotCloud(cloud)
                if showSteps:
                    raw_input("Acquired cloud. Press [Enter] to continue...")
                grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh,
                                              graspDetectMode)
                graspsDetected.append(len(grasps))
                topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                      maxAngleFromObjectAxis)
                topGraspsDetected.append(topGraspsCount)
                if len(grasps) == 0:
                    print("Grasp detection failure.")
                    graspDetectionFailure = True
                    break
                rlAgent.PlotGrasps(grasps)
                if showSteps:
                    raw_input("Acquired grasps. Press [Enter] to continue...")

            # get next action
            aa, ggrasp, pplace = rlAgent.ChooseAction(ss, grasps, epsilon)
            if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

            if showSteps:
                raw_input("Action {}. Press [Enter] to continue...".format(t))

            # record data from transition
            episodeReturn += (gamma**t) * rr
            if place is not None:
                episodeGoodTempPlaceCount += ssIsPlacedTempGood
                episodeBadTempPlaceCount += ssIsPlacedTempBad
                episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                placeHistogram += a[1]

            # prepare for next time step
            if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                break
            s = ss
            a = aa
            grasp = ggrasp
            place = pplace

        # cleanup this reuse
        if not graspDetectionFailure:
            placeHistogram += episodePlaceHistogram
            Return.append(episodeReturn)
            goodTempPlaceCount.append(episodeGoodTempPlaceCount)
            badTempPlaceCount.append(episodeBadTempPlaceCount)
            goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
            badFinalPlaceCount.append(episodeBadFinalPlaceCount)

        # cleanup this episode
        rlEnv.RemoveObject(objHandle)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "Return": Return,
            "graspsDetected": graspsDetected,
            "topGraspsDetected": topGraspsDetected,
            "placeHistogram": placeHistogram,
            "goodTempPlaceCount": goodTempPlaceCount,
            "badTempPlaceCount": badTempPlaceCount,
            "goodFinalPlaceCount": goodFinalPlaceCount,
            "badFinalPlaceCount": badFinalPlaceCount
        }
        savemat(saveFileName, saveData)