def main(objectClass): '''Entrypoint to the program. - Input objectClass: Folder in 3D Net database. ''' # PARAMETERS ===================================================================================== # INITIALIZATION ================================================================================= threeDNet = ThreeDNet() rlEnv = RlEnvironment(True) # RUN TEST ======================================================================================= objectNames = threeDNet.GetObjectNames(objectClass) for objectIdx, objectName in enumerate(objectNames): print("Showing object {}: ".format(objectIdx + 1) + objectName) fullObjName, objScale = threeDNet.GetObjectByName( objectClass, objectName) objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation( fullObjName, objScale) raw_input("Press [Enter] to continue...") rlEnv.RemoveObject(objHandle) print("Finished.")
def __init__(self, params): '''Initializes openrave environment, parameters, and first episode. - Input params: System parameters data structure. ''' RlEnvironment.__init__(self, params) # parameters self.nObjects = params["nObjects"] self.nSupportObjects = params["nSupportObjects"] self.objectFolder = params["objectFolder"] self.supportObjectFolder = params["supportObjectFolder"] self.graspFrictionCone = params["graspFrictionCone"] self.graspMinDistFromBottom = params["graspMinDistFromBottom"] self.placeOrientTolerance = self.params["placeOrientTolerance"] self.placePosTolerance = self.params["placePosTolerance"] self.placeHeightTolerance = self.params["placeHeightTolerance"] # initialization self.InitializeHandRegions() self.objectFileNames = os.listdir(self.objectFolder) self.objectFileNames = fnmatch.filter(self.objectFileNames, "*.dae") self.supportObjectFileNames = os.listdir(self.supportObjectFolder) self.supportObjectFileNames = fnmatch.filter( self.supportObjectFileNames, "*.dae") # internal state self.objects = [] self.supportObjects = [] self.kinBodyCache = {} self.ResetEpisode()
def Main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== params = loadmat("parameters.mat", squeeze_me=True) randomSeed = params["randomSeed"] tMax = params["tMax"] nEpisodes = params["nEpisodes"] unbiasOnEpisode = params["unbiasOnEpisode"] epsilon = params["epsilon"] loadQFunction = params["loadQFunction"] saveFileName = params["saveFileName"] # INITIALIZATION ================================================================================= # set random seeds seed(randomSeed) # initialize agent agent = RlAgentHsaLimitedSensor(params) if loadQFunction: agent.LoadQFunction() # RUN TEST ======================================================================================= episodeReturn = []; episodeTime = [] for episode in xrange(nEpisodes): startTime = time() episodeReturn.append(0) if episode >= unbiasOnEpisode: epsilon = 0 env = RlEnvironment(params) s = env.GetState() a, i, o = agent.GetActionsAndObservations(s, epsilon) for t in xrange(tMax): r = env.Transition(a) ss = env.GetState() aa, ii, oo = agent.GetActionsAndObservations(ss, epsilon) agent.UpdateQFunction(o, i, r, oo, ii) s = ss; a = aa; o = oo; i = ii episodeReturn[-1] += r print("Episode {}.{} had return {}.".format(realization, episode, episodeReturn[-1])) episodeTime.append(time()-startTime) print("Agent learned {} values.".format(agent.GetQTableSize())) saveData = {"episodeReturn":episodeReturn, "episodeTime":episodeTime, "nValuesLearned":agent.GetQTableSize()} saveData.update(params) savemat(saveFileName, saveData) agent.SaveQFunction()
def Main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== params = loadmat("parameters.mat", squeeze_me=True) randomSeed = params["randomSeed"] tMax = params["tMax"] nEpisodes = params["nEpisodes"] saveFileName = params["saveFileName"] # INITIALIZATION ================================================================================= # set random seeds seed(randomSeed) # initialize agent agent = RlAgentTd(params) # RUN TEST ======================================================================================= episodeReturn = []; episodeTime = [] for episode in xrange(nEpisodes): startTime = time() episodeReturn.append(0) env = RlEnvironment(params) s = env.GetState() a = agent.GetAction(s) for t in xrange(tMax): r = env.Transition(a) ss = env.GetState() aa = agent.GetAction(ss) agent.UpdateQFunction(s, a, r, ss, aa) s = ss; a = aa episodeReturn[-1] += r print("Episode {}.{} had return {}.".format(realization, episode, episodeReturn[-1])) episodeTime.append(time()-startTime) print("Agent learned {} values.".format(agent.GetQTableSize())) saveData = {"episodeReturn":episodeReturn, "episodeTime":episodeTime, "nValuesLearned":agent.GetQTableSize()} saveData.update(params) savemat(saveFileName, saveData)
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # objects directory = "/home/mgualti/Data/3DNet/Cat10_ModelDatabase/mug_upright/" # INITIALIZATION ================================================================================= rlEnv = RlEnvironment(showViewer=True, enablePhysics=False, removeTable=True) # RUN TEST ======================================================================================= fileNames = os.listdir(directory) fileNames = fnmatch.filter(fileNames, "*.ply") for fileName in fileNames: threeDNetName = fileName[:-4] rlEnv.env.Load(directory + "/" + fileName) body = rlEnv.env.GetKinBody(threeDNetName) raw_input(threeDNetName) rlEnv.env.Remove(body)
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # objects meshDirectory = "/home/mgualti/Data/3DNet/Cat10_ModelDatabase/bottle/" objectsToSkip = ["8cf1cc180de4ecd0a826d25a0acb0476.ply"] # z-axis not up objectsToSkip += ["b0652a09588f293c7e95755f464f6241.ply", \ "91a1f4e7a5eab4eab05e5be75e72ca3c.ply", "70e77c09aca88d4cf76fe74de9d44699.ply"] # difficult to grasp meshExtension = "ply" scale = 0.25 # environment removeTable = True showViewer = True # INITIALIZATION ================================================================================= params = locals() rlEnv = RlEnvironment(params) # RUN TEST ======================================================================================= fileNames = os.listdir(meshDirectory) fileNames = fnmatch.filter(fileNames, "*." + meshExtension) for fileName in fileNames: if fileName in objectsToSkip: continue objectName = fileName[:-4] rlEnv.env.Load(meshDirectory + "/" + fileName, {'scalegeometry':str(scale)}) body = rlEnv.env.GetKinBody(objectName) raw_input(objectName) rlEnv.env.Remove(body)
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # system gpuId = 0 # objects objectScale = [0.09, 0.17] nObjects = 1000 directory = "/home/mgualti/Data/3DNet/Cat200_ModelDatabase/plate/" # view viewCenter = array([0, 0, 0]) viewKeepout = 0.60 viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)] # visualization/saving showViewer = False showSteps = False plotImages = False # INITIALIZATION ================================================================================= rlEnv = RlEnvironment(showViewer, removeTable=True) rlAgent = RlAgent(rlEnv, gpuId) # RUN TEST ======================================================================================= for objIdx in xrange(nObjects): obj = rlEnv.Place3DNetObjectAtOrigin(directory, objectScale, "plate-{}".format(objIdx), True) cloud, normals = rlAgent.GetFullCloudAndNormals( viewCenter, viewKeepout, viewWorkspace, False) point_cloud.SaveMat("plate-{}.mat".format(objIdx), cloud, normals) rlAgent.PlotCloud(cloud) if plotImages: point_cloud.Plot(cloud, normals, 2) if showSteps: raw_input("Placed plate-{}.".format(objIdx)) rlEnv.RemoveObjectSet([obj])
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # system gpuId = 0 # objects objectHeight = [0.007, 0.013] objectRadius = [0.030, 0.045] nObjects = 1000 # view viewCenter = array([0, 0, 0]) viewKeepout = 0.60 viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)] # visualization/saving showViewer = False showSteps = False plotImages = False # INITIALIZATION ================================================================================= rlEnv = RlEnvironment(showViewer, removeTable=True) rlAgent = RlAgent(rlEnv, gpuId) # RUN TEST ======================================================================================= for objIdx in xrange(nObjects): obj = rlEnv.PlaceCylinderAtOrigin(objectHeight, objectRadius, "cylinder-{}".format(objIdx), True) cloud, normals = rlAgent.GetFullCloudAndNormals( viewCenter, viewKeepout, viewWorkspace) point_cloud.SaveMat("cylinder-{}.mat".format(objIdx), cloud, normals) rlAgent.PlotCloud(cloud) if plotImages: point_cloud.Plot(cloud, normals, 2) if showSteps: raw_input("Placed cylinder-{}.".format(objIdx)) rlEnv.RemoveObjectSet([obj])
def __init__(self, params): '''Initializes openrave environment, parameters, and episode. - Input showViewer: If True, shows the openrave viewer. - Input removeTable: If True, the table is not included in the scene. ''' RlEnvironment.__init__(self, params) # parameters self.colors = array([ \ (1.0, 0.0, 0.0, 0.5), (0.0, 1.0, 0.0, 0.5), (0.0, 0.0, 1.0, 0.5), (0.0, 1.0, 1.0 ,0.5), (1.0, 0.0, 1.0, 0.5), (1.0, 1.0, 0.0, 0.5), (0.5, 1.0, 0.0, 0.5), (0.5, 0.0, 1.0, 0.5), (0.0, 0.5, 1.0, 0.5), (1.0, 0.5, 0.0, 0.5), (1.0, 0.0, 0.5, 0.5), (0.0, 1.0, 0.5, 0.5) ]) # internal state self.objects = [] self.supportObjects = [] self.ResetEpisode()
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # system randomSeed = 0 # environment removeTable = True # objects objectHeight = [0.005, 0.020] objectRadius = [0.040, 0.060] nObjects = 1000 # view viewCenter = array([0,0,0]) viewKeepout = 0.60 viewWorkspace = [(-1.0,1.0),(-1.0,1.0),(-1.0,1.0)] voxelSize = 0.002 # visualization/saving showViewer = False showSteps = False plotImages = False # INITIALIZATION ================================================================================= params = locals() seed(randomSeed) rlEnv = RlEnvironment(params) # RUN TEST ======================================================================================= for objIdx in xrange(nObjects): # Generate mesh, save mesh, and get points. objectName = "coaster-{}".format(objIdx) obj = rlEnv.GenerateCylinderMesh(objectHeight, objectRadius, objectName) cloud = rlEnv.GetFullCloud(viewCenter, viewKeepout, viewWorkspace, add45DegViews=False, computeNormals=False, voxelSize=voxelSize) # Compute object bounding box workspace = array([[min(cloud[:, 0]), max(cloud[:, 0])], [min(cloud[:, 1]), max(cloud[:, 1])], [min(cloud[:, 2]), max(cloud[:, 2])]]) # Save metadata. data = {"cloud":cloud, "workspace":workspace, "height":obj.height, "radius":obj.radius} savemat(objectName + ".mat", data) # Optional visualization for debugging. rlEnv.PlotCloud(cloud) if plotImages: point_cloud.Plot(cloud) if showSteps: raw_input("Placed " + objectName + ".") # Remove the object before loading the next one. rlEnv.RemoveObjectSet([obj])
def __init__(self, params): '''Initializes openrave environment, parameters, and first episode. - Input params: System parameters data structure. ''' RlEnvironment.__init__(self, params) # parameters self.nObjects = params["nObjects"] self.nSupportObjects = params["nSupportObjects"] self.objectFolder = params["objectFolder"] self.supportObjectFolder = params["supportObjectFolder"] self.placeOrientTolerance = self.params["placeOrientTolerance"] self.placeHeightTolerance = self.params["placeHeightTolerance"] self.rewardCapGrasps = self.params["rewardCapGrasps"] self.colors = array([ \ (1.0, 0.0, 0.0, 0.5), (0.0, 1.0, 0.0, 0.5), (0.0, 0.0, 1.0, 0.5), (0.0, 1.0, 1.0 ,0.5), (1.0, 0.0, 1.0, 0.5), (1.0, 1.0, 0.0, 0.5), (0.5, 1.0, 0.0, 0.5), (0.5, 0.0, 1.0, 0.5), (0.0, 0.5, 1.0, 0.5), (1.0, 0.5, 0.0, 0.5), (1.0, 0.0, 0.5, 0.5), (0.0, 1.0, 0.5, 0.5) ]) self.pointToRealRadiusError = 0.0001 # initialization self.InitializeHandRegions() self.objectFileNames = os.listdir(self.objectFolder) self.objectFileNames = fnmatch.filter(self.objectFileNames, "*.dae") self.supportObjectFileNames = os.listdir(self.supportObjectFolder) self.supportObjectFileNames = fnmatch.filter( self.supportObjectFileNames, "*.dae") # internal state self.objects = [] self.supportObjects = [] self.ResetEpisode()
def main(): '''Entrypoint to the program. - Input objectClass: Folder in 3D Net database. ''' # PARAMETERS ===================================================================================== # INITIALIZATION ================================================================================= rlEnv = RlEnvironment(True) rlAgent = RlAgent(rlEnv) # RUN TEST ======================================================================================= for placeIdx, placePose in enumerate(rlAgent.placePoses): print("Showing pose {}: ".format(placeIdx)) print placePose rlAgent.MoveHandToPose(placePose) raw_input("Press [Enter] to continue...") print("Finished.")
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # system randomSeed = 0 # environment removeTable = True # objects objectDirectory = "/home/mgualti/Data/3DNet/Cat10_ModelDatabase/bottle" objectsToSkip = ["8cf1cc180de4ecd0a826d25a0acb0476.ply"] # z-axis not up objectsToSkip += ["b0652a09588f293c7e95755f464f6241.ply", \ "91a1f4e7a5eab4eab05e5be75e72ca3c.ply", "70e77c09aca88d4cf76fe74de9d44699.ply"] # difficult to grasp objectScale = [0.10, 0.20] nObjects = 1000 # view viewCenter = array([0, 0, 0]) viewKeepout = 0.60 viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)] voxelSize = 0.002 # visualization/saving showViewer = False showSteps = False plotImages = False # INITIALIZATION ================================================================================= params = locals() seed(randomSeed) rlEnv = RlEnvironment(params) # RUN TEST ======================================================================================= for objIdx in xrange(nObjects): # Load mesh, save mesh, and get points and normals. objectName = "bottle-{}".format(objIdx) obj = rlEnv.GenerateMeshFromMesh(objectDirectory, objectScale, objectsToSkip, objectName) cloud, normals = rlEnv.GetFullCloud(viewCenter, viewKeepout, viewWorkspace, add45DegViews=False, computeNormals=True, voxelSize=voxelSize) # Occasionally, the normals calculation fails. Replace with the nearest normal. if isnan(normals).any(): nanIdx = sum(isnan(normals), axis=1) > 0 notNanIdx = logical_not(nanIdx) nanFreeTree = cKDTree(cloud[notNanIdx, :]) d, nearestIdx = nanFreeTree.query(cloud[nanIdx, :]) normals[nanIdx, :] = normals[nearestIdx, :] # Compute object bounding box workspace = array([[min(cloud[:, 0]), max(cloud[:, 0])], [min(cloud[:, 1]), max(cloud[:, 1])], [min(cloud[:, 2]), max(cloud[:, 2])]]) # Save metadata. data = { "cloud": cloud, "normals": normals, "workspace": workspace, "scale": obj.scale } savemat(objectName + ".mat", data) # Optional visualization for debugging. rlEnv.PlotCloud(cloud) if plotImages: point_cloud.Plot(cloud, normals, 5) if showSteps: raw_input("Placed " + objectName + ".") # Remove the object before loading the next one. rlEnv.RemoveObjectSet([obj])
def main(saveFileSuffix): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # objects objectClass = "mug_train" randomScale = True targetObjectAxis = array([0, 0, 1]) maxAngleFromObjectAxis = 20 * (pi / 180) maxObjectTableGap = 0.03 # view viewCenter = array([0, 0, 0]) viewKeepout = 0.50 viewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)] # grasps graspDetectMode = 0 # 0=sample, 1=sample+label nGraspSamples = 100 graspScoreThresh = 350 # learning nValueIterations = 70 nDataIterations = 50 nGraspIterations = 20 pickEpsilon = 1.0 placeEpsilon = 1.0 minPickEpsilon = 0.10 minPlaceEpsilon = 0.10 pickEpsilonDelta = 0.05 placeEpsilonDelta = 0.05 maxExperiences = 25000 trainingBatchSize = 25000 unbiasOnIteration = nValueIterations - 5 # visualization/saving saveFileName = "results" + saveFileSuffix + ".mat" recordLoss = True showViewer = False showSteps = False # INITIALIZATION ================================================================================= threeDNet = ThreeDNet() rlEnv = RlEnvironment(showViewer) rlAgent = RlAgent(rlEnv) nPlaceOptions = len(rlAgent.placePoses) experienceDatabase = [] # RUN TEST ======================================================================================= averageReward = [] placeActionCounts = [] trainLosses = [] testLosses = [] databaseSize = [] iterationTime = [] for valueIterationIdx in xrange(nValueIterations): print("Iteration {}. Epsilon pick: {}, place: {}".format(\ valueIterationIdx, pickEpsilon, placeEpsilon)) # 1. Collect data for this training iteration. iterationStartTime = time.time() R = [] placeCounts = zeros(nPlaceOptions) # check if it's time to unbias data if valueIterationIdx >= unbiasOnIteration: maxExperiences = trainingBatchSize # selects all recent experiences, unbiased pickEpsilon = 0 # estimating value function of actual policy placeEpsilon = 0 # estimating value function of actual policy for dataIterationIdx in xrange(nDataIterations): # place random object in random orientation on table fullObjName, objScale = threeDNet.GetRandomObjectFromClass( objectClass, randomScale) objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation( fullObjName, objScale) # move the hand to view position and capture a point cloud cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) rlAgent.PlotCloud(cloud) # detect grasps in the sensory data grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, graspDetectMode) rlAgent.PlotGrasps(grasps) if showSteps: raw_input("Acquired grasps.") if len(grasps) == 0: print("No grasps found. Skipping iteration.") rlEnv.RemoveObject(objHandle) rlAgent.UnplotCloud() continue for graspIterationIdx in xrange(nGraspIterations): print("Episode {}.{}.{}.".format(valueIterationIdx, dataIterationIdx, graspIterationIdx)) # perform pick action grasp = rlAgent.GetGrasp(grasps, pickEpsilon) s = rlEnv.GetState(rlAgent, grasp, None) rlAgent.PlotGrasps([grasp]) if showSteps: print("Selected grasp.") # perform place action P = rlAgent.GetPlacePose(grasp, placeEpsilon) rlAgent.MoveHandToPose(P) ss = rlEnv.GetState(rlAgent, grasp, P) rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle) r = rlEnv.RewardBinary(objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap) print("The robot receives {} reward.".format(r)) if showSteps: raw_input("Press [Enter] to continue...") # add experience to database experienceDatabase.append((s, ss, 0)) # grasp -> placement experienceDatabase.append((ss, None, r)) # placement -> end # record save data R.append(r) placeCounts += ss[1][len(s[1]) - nPlaceOptions:] # cleanup this grasp iteration rlAgent.UnplotGrasps() rlEnv.MoveObjectToPose(objHandle, objRandPose) # cleanup this data iteration rlEnv.RemoveObject(objHandle) rlAgent.UnplotCloud() # 2. Compute value labels for data. experienceDatabase = rlAgent.PruneDatabase(experienceDatabase, maxExperiences) Dl = rlAgent.DownsampleAndLabelData(\ experienceDatabase, trainingBatchSize) databaseSize.append(len(experienceDatabase)) # 3. Train network from replay database. trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss) trainLosses.append(trainLoss) testLosses.append(testLoss) pickEpsilon -= pickEpsilonDelta placeEpsilon -= placeEpsilonDelta pickEpsilon = max(minPickEpsilon, pickEpsilon) placeEpsilon = max(minPlaceEpsilon, placeEpsilon) # 4. Save results averageReward.append(mean(R)) placeActionCounts.append(placeCounts) iterationTime.append(time.time() - iterationStartTime) saveData = { "objectClass": objectClass, "nGraspSamples": nGraspSamples, "graspScoreThresh": graspScoreThresh, "nValueIterations": nValueIterations, "nDataIterations": nDataIterations, "nGraspIterations": nGraspIterations, "pickEpsilon": pickEpsilon, "placeEpsilon": placeEpsilon, "minPickEpsilon": minPickEpsilon, "minPlaceEpsilon": minPlaceEpsilon, "pickEpsilonDelta": pickEpsilonDelta, "placeEpsilonDelta": placeEpsilonDelta, "maxExperiences": maxExperiences, "trainingBatchSize": trainingBatchSize, "averageReward": averageReward, "placeActionCounts": placeActionCounts, "trainLoss": trainLosses, "testLoss": testLosses, "databaseSize": databaseSize, "iterationTime": iterationTime, "placePoses": rlAgent.placePoses } savemat(saveFileName, saveData)
def __init__(self, showViewer=True, removeTable=False): '''Initializes openrave environment, etc.''' RlEnvironment.__init__(self, showViewer, removeTable)
def Main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== params = loadmat("parameters.mat", squeeze_me=True) randomSeed = params["randomSeed"] tMax = params["tMax"] nEpisodes = params["nEpisodes"] unbiasOnEpisode = params["unbiasOnEpisode"] epsilon = params["epsilon"] loadQFunction = params["loadQFunction"] saveQFunction = params["saveQFunction"] saveFileName = params["saveFileName"] # INITIALIZATION ================================================================================= # set random seeds seed(randomSeed) # initialize agent agent = RlAgentDeictic(params) if loadQFunction: agent.LoadQFunction() # RUN TEST ======================================================================================= nPlacedObjects = [] episodeTime = [] for episode in xrange(nEpisodes): startTime = time() R = 0 nPlacedObjects.append(0) if episode >= unbiasOnEpisode: epsilon = 0 env = RlEnvironment(params) s = env.GetState() a, i, o = agent.GetActionAndObservation(s, epsilon) for t in xrange(tMax): isPlace = env.IsPlace() r = env.Transition(a) if isPlace: nPlacedObjects[-1] += r > 0 R += r ss = env.GetState() aa, ii, oo = agent.GetActionAndObservation(ss, epsilon) agent.UpdateQFunction(o, i, r, oo, ii) s = ss a = aa o = oo i = ii print("Episode {}.{} had return {}.".format(realization, episode, R)) episodeTime.append(time() - startTime) print("Agent learned {} values.".format(agent.GetQTableSize())) saveData = { "nPlacedObjects": nPlacedObjects, "episodeTime": episodeTime, "nValuesLearned": agent.GetQTableSize() } saveData.update(params) savemat(saveFileName, saveData) if saveQFunction: agent.SaveQFunction()
def main(objectClass, epsilon): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # objects nObjects = 7 randomObjectScale = True targetObjectAxis = array([0, 0, 1]) maxAngleFromObjectAxis = 20 * (pi / 180) maxObjectTableGap = 0.02 # view viewCenter = array([0, 0, 0]) viewKeepout = 0.50 viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)] objViewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)] # grasps graspDetectMode = 1 # 0=sample, 1=sample+label nGraspSamples = 500 graspScoreThresh = 300 nGraspInliers = 2 # testing weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel" nDataIterations = 300 # visualization/saving saveFileName = "results-clutter-" + objectClass + "-epsilon" + str( epsilon) + ".mat" showViewer = False showSteps = False # INITIALIZATION ================================================================================= threeDNet = ThreeDNet() rlEnv = RlEnvironment(showViewer) rlAgent = RlAgent(rlEnv) if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName) Return = [] # RUN TEST ======================================================================================= for dataIterationIdx in xrange(nDataIterations): print("Iteration {}.".format(dataIterationIdx)) # place clutter on table fullObjNames, objScales = threeDNet.GetRandomObjectSet( objectClass, nObjects, randomObjectScale) objHandles, objPoses = rlEnv.PlaceObjectSet(fullObjNames, objScales) objCloud, objCloudIdxs = rlEnv.AssignPointsToObjects( rlAgent, objHandles, viewCenter, viewKeepout, objViewWorkspace) if showSteps: raw_input("Objects placed.") # move the hand to view position and capture a point cloud cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) rlAgent.PlotCloud(cloud) if showSteps: raw_input("Point cloud.") # detect grasps in the sensory data graspsDetected = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, nGraspInliers, graspDetectMode) grasps = rlAgent.FilterGraspsWithNoPoints(graspsDetected, objCloud) if len(graspsDetected) > len(grasps): print("Fitlered {} empty grasps.".format( len(graspsDetected) - len(grasps))) rlAgent.PlotGrasps(grasps) if showSteps: raw_input("Acquired grasps.") if len(grasps) == 0: print("No grasps found. Skipping iteration.") rlEnv.RemoveObjectSet(objHandles) rlAgent.UnplotGrasps() rlAgent.UnplotCloud() continue # perform pick action grasp = rlAgent.GetGrasp(grasps, epsilon) rlAgent.PlotGrasps([grasp]) if showSteps: raw_input("Selected grasp.") # perform place action P = rlAgent.GetPlacePose(grasp, epsilon) rlAgent.MoveHandToPose(P) objHandle, objPose = rlEnv.GetObjectWithMaxGraspPoints( grasp, objHandles, objCloud, objCloudIdxs) rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle) r = rlEnv.RewardHeightExponential(objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap) print("The robot receives {} reward.".format(r)) Return.append(r) if showSteps: raw_input("Press [Enter] to continue...") # cleanup this data iteration rlEnv.RemoveObjectSet(objHandles) rlAgent.UnplotGrasps() rlAgent.UnplotCloud() # Save results saveData = { "nObjects": nObjects, "randomObjectScale": randomObjectScale, "targetObjectAxis": targetObjectAxis, "maxAngleFromObjectAxis": maxAngleFromObjectAxis, "maxObjectTableGap": maxObjectTableGap, "graspDetectMode": graspDetectMode, "nGraspSamples": nGraspSamples, "graspScoreThresh": graspScoreThresh, "weightsFileName": weightsFileName, "nDataIterations": nDataIterations, "epsilon": epsilon, "Return": Return } savemat(saveFileName, saveData)
def main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # objects objectClass = "mug_train" randomScale = True targetObjectAxis = array([0, 0, 1]) maxAngleFromObjectAxis = 20 * (pi / 180) maxObjectTableGap = 0.03 # view viewCenter = array([0, 0, 0]) viewKeepout = 0.50 viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)] # grasps graspDetectMode = 1 # 0=sample, 1=sample+label nGraspSamples = 200 graspScoreThresh = 300 # learning nTrainingIterations = 100 nEpisodes = 100 nReuses = 10 maxTimesteps = 10 gamma = 0.98 epsilon = 1.0 epsilonDelta = 0.05 minEpsilon = 0.05 maxExperiences = 50000 trainingBatchSize = 50000 unbiasOnIteration = nTrainingIterations - 5 # visualization/saving saveFileName = "results.mat" recordLoss = True showViewer = False showSteps = False # INITIALIZATION ================================================================================= threeDNet = ThreeDNet() rlEnv = RlEnvironment(showViewer) rlAgent = RlAgent(rlEnv) nPlaceOptions = len(rlAgent.placePoses) experienceDatabase = [] # RUN TEST ======================================================================================= avgReturn = [] avgGraspsDetected = [] avgTopGraspsDetected = [] placeHistograms = [] avgGoodTempPlaceCount = [] avgBadTempPlaceCount = [] avgGoodFinalPlaceCount = [] avgBadFinalPlaceCount = [] trainLosses = [] testLosses = [] databaseSize = [] iterationTime = [] for trainingIteration in xrange(nTrainingIterations): # initialization iterationStartTime = time.time() print("Iteration: {}, Epsilon: {}".format(trainingIteration, epsilon)) placeHistogram = zeros(nPlaceOptions) Return = [] graspsDetected = [] topGraspsDetected = [] goodTempPlaceCount = [] badTempPlaceCount = [] goodFinalPlaceCount = [] badFinalPlaceCount = [] # check if it's time to unbias data if trainingIteration >= unbiasOnIteration: maxExperiences = trainingBatchSize # selects all recent experiences, unbiased epsilon = 0 # estimating value function of actual policy # for each episode/object placement for episode in xrange(nEpisodes): # place random object in random orientation on table fullObjName, objScale = threeDNet.GetRandomObjectFromClass( objectClass, randomScale) objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation( fullObjName, objScale) # move the hand to view position(s) and capture a point cloud cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) # detect grasps in the sensor data grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, graspDetectMode) graspsStart = grasps graspsDetected.append(len(grasps)) topGraspsCount = CountObjectTopGrasps(grasps, objRandPose, maxAngleFromObjectAxis) if len(grasps) == 0: print("No grasps found. Skipping iteration.") rlEnv.RemoveObject(objHandle) continue rlAgent.PlotCloud(cloud) rlAgent.PlotGrasps(grasps) for reuse in xrange(nReuses): print("Episode {}.{}.{}.".format(trainingIteration, episode, reuse)) if showSteps: raw_input( "Beginning of episode. Press [Enter] to continue...") # initialize recording variables episodePlaceHistogram = zeros(nPlaceOptions) episodeReturn = 0 episodeGoodTempPlaceCount = 0 episodeBadTempPlaceCount = 0 episodeGoodFinalPlaceCount = 0 episodeBadFinalPlaceCount = 0 graspDetectionFailure = False episodeExperiences = [] # initial state and first action s, selectedGrasp = rlEnv.GetInitialState(rlAgent) a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon) rlAgent.PlotGrasps([grasp]) # for each time step in the episode for t in xrange(maxTimesteps): ss, selectedGrasp, rr = rlEnv.Transition( rlAgent, objHandle, s, selectedGrasp, a, grasp, place, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap) ssIsPlacedTempGood = ss[1][1] ssIsPlacedTempBad = ss[1][2] ssIsPlacedFinalGood = ss[1][3] ssIsPlacedFinalBad = ss[1][4] if showSteps: raw_input( "Transition {}. Press [Enter] to continue...". format(t)) # re-detect only if a non-terminal placement just happened if ssIsPlacedTempGood and place is not None: cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, graspDetectMode) graspsDetected.append(len(grasps)) topGraspsCount = CountObjectTopGrasps( grasps, rlEnv.GetObjectPose(objHandle), maxAngleFromObjectAxis) topGraspsDetected.append(topGraspsCount) if len(grasps) == 0: print("Grasp detection failure.") graspDetectionFailure = True break rlAgent.PlotCloud(cloud) rlAgent.PlotGrasps(grasps) # get next action aa, ggrasp, pplace = rlAgent.ChooseAction( ss, grasps, epsilon) if ggrasp is not None: rlAgent.PlotGrasps([ggrasp]) if showSteps: raw_input( "Action {}. Press [Enter] to continue...".format( t)) # add to database and record data episodeExperiences.append((s, a, rr, ss, aa)) episodeReturn += (gamma**t) * rr if place is not None: episodeGoodTempPlaceCount += ssIsPlacedTempGood episodeBadTempPlaceCount += ssIsPlacedTempBad episodeGoodFinalPlaceCount += ssIsPlacedFinalGood episodeBadFinalPlaceCount += ssIsPlacedFinalBad placeHistogram += a[1] # prepare for next time step if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad: break s = ss a = aa grasp = ggrasp place = pplace # cleanup this reuse if not graspDetectionFailure: experienceDatabase += episodeExperiences placeHistogram += episodePlaceHistogram Return.append(episodeReturn) goodTempPlaceCount.append(episodeGoodTempPlaceCount) badTempPlaceCount.append(episodeBadTempPlaceCount) goodFinalPlaceCount.append(episodeGoodFinalPlaceCount) badFinalPlaceCount.append(episodeBadFinalPlaceCount) rlEnv.MoveObjectToPose(objHandle, objRandPose) grasps = graspsStart # cleanup this episode rlEnv.RemoveObject(objHandle) rlAgent.UnplotGrasps() rlAgent.UnplotCloud() # 2. Compute value labels for data. experienceDatabase = rlAgent.PruneDatabase(experienceDatabase, maxExperiences) Dl = rlAgent.DownsampleAndLabelData(experienceDatabase, trainingBatchSize, gamma) databaseSize.append(len(experienceDatabase)) # 3. Train network from replay database. trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss) trainLosses.append(trainLoss) testLosses.append(testLoss) epsilon -= epsilonDelta epsilon = max(minEpsilon, epsilon) # 4. Save results avgReturn.append(mean(Return)) avgGraspsDetected.append(mean(graspsDetected)) avgTopGraspsDetected.append(mean(topGraspsDetected)) placeHistograms.append(placeHistogram) avgGoodTempPlaceCount.append(mean(goodTempPlaceCount)) avgBadTempPlaceCount.append(mean(badTempPlaceCount)) avgGoodFinalPlaceCount.append(mean(goodFinalPlaceCount)) avgBadFinalPlaceCount.append(mean(badFinalPlaceCount)) iterationTime.append(time.time() - iterationStartTime) saveData = { "objectClass": objectClass, "randomScale": randomScale, "maxAngleFromObjectAxis": maxAngleFromObjectAxis, "maxObjectTableGap": maxObjectTableGap, "nGraspSamples": nGraspSamples, "graspScoreThresh": graspScoreThresh, "graspDetectMode": graspDetectMode, "nTrainingIterations": nTrainingIterations, "nEpisodes": nEpisodes, "maxTimesteps": maxTimesteps, "gamma": gamma, "epsilon": epsilon, "minEpsilon": minEpsilon, "epsilonDelta": epsilonDelta, "maxExperiences": maxExperiences, "trainingBatchSize": trainingBatchSize, "avgReturn": avgReturn, "avgGraspsDetected": avgGraspsDetected, "avgTopGraspsDetected": avgTopGraspsDetected, "placeHistograms": placeHistograms, "avgGoodTempPlaceCount": avgGoodTempPlaceCount, "avgBadTempPlaceCount": avgBadTempPlaceCount, "avgGoodFinalPlaceCount": avgGoodFinalPlaceCount, "avgBadFinalPlaceCount": avgBadFinalPlaceCount, "trainLoss": trainLosses, "testLoss": testLosses, "databaseSize": databaseSize, "iterationTime": iterationTime, "placePoses": rlAgent.placePoses } savemat(saveFileName, saveData)
def main(objectClass, epsilon): '''Entrypoint to the program. - Input objectClass: Folder in 3D Net database. ''' # PARAMETERS ===================================================================================== # objects randomScale = True targetObjectAxis = array([0,0,1]) maxAngleFromObjectAxis = 20*(pi/180) maxObjectTableGap = 0.02 # view viewCenter = array([0,0,0]) viewKeepout = 0.50 viewWorkspace = [(-1,1),(-1,1),(-1,1)] # grasps graspDetectMode = 1 # 0=sample, 1=sample+label nGraspSamples = 200 graspScoreThresh = 300 nGraspInliers = 3 # learning weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel" nDataIterations = 300 # visualization/saving showViewer = False showEveryStep = False saveFileName = "results-single-" + objectClass + "-epsilon" + str(epsilon) + ".mat" # INITIALIZATION ================================================================================= threeDNet = ThreeDNet() rlEnv = RlEnvironment(showViewer) rlAgent = RlAgent(rlEnv) if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName) Return = [] # RUN TEST ======================================================================================= # Collect data for this training iteration. for dataIterationIdx in xrange(nDataIterations): print("Iteration {}.".format(dataIterationIdx)) # place object in random orientation on table fullObjName, objScale = threeDNet.GetRandomObjectFromClass(objectClass, randomScale) objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(fullObjName, objScale) # move the hand to view position and capture a point cloud cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) rlAgent.PlotCloud(cloud) # detect grasps in the sensory data grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, nGraspInliers, graspDetectMode) rlAgent.PlotGrasps(grasps) if len(grasps) == 0: print("No grasps found. Skipping iteration.") rlEnv.RemoveObject(objHandle) rlAgent.UnplotCloud() continue if showEveryStep: raw_input("Press [Enter] to continue...") # perform pick action grasp = rlAgent.GetGrasp(grasps, epsilon) rlAgent.PlotGrasps([grasp]) if showEveryStep: raw_input("Press [Enter] to continue...") # perform place action P = rlAgent.GetPlacePose(grasp, epsilon) rlAgent.MoveHandToPose(P) rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle) r = rlEnv.RewardHeightExponential( objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap) print("The robot receives {} reward.".format(r)) Return.append(r) if showEveryStep: raw_input("Press [Enter] to continue...") # cleanup this data iteration rlEnv.RemoveObject(objHandle) rlAgent.UnplotGrasps() rlAgent.UnplotCloud() saveData = {"randomScale":randomScale, "targetObjectAxis":targetObjectAxis, "maxAngleFromObjectAxis":maxAngleFromObjectAxis, "maxObjectTableGap":maxObjectTableGap, "graspDetectMode":graspDetectMode, "nGraspSamples":nGraspSamples, "graspScoreThresh":graspScoreThresh, "weightsFileName":weightsFileName, "nDataIterations":nDataIterations, "epsilon":epsilon, "Return":Return} savemat(saveFileName, saveData)
def main(objectClass): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== # objects randomScale = True targetObjectAxis = array([0, 0, 1]) maxAngleFromObjectAxis = 20 * (pi / 180) maxObjectTableGap = 0.03 # view viewCenter = array([0, 0, 0]) viewKeepout = 0.50 viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)] # grasps graspDetectMode = 1 # 0=sample, 1=sample+label nGraspSamples = 200 graspScoreThresh = 300 # testing nEpisodes = 300 maxTimesteps = 10 gamma = 0.98 epsilon = 0.0 weightsFileName = \ "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/dualImage_iter_5000.caffemodel" # visualization/saving saveFileName = "results-" + objectClass + ".mat" showViewer = False showSteps = False # INITIALIZATION ================================================================================= threeDNet = ThreeDNet() rlEnv = RlEnvironment(showViewer) rlAgent = RlAgent(rlEnv) rlAgent.LoadNetworkWeights(weightsFileName) nPlaceOptions = len(rlAgent.placePoses) placeHistogram = zeros(nPlaceOptions) Return = [] graspsDetected = [] topGraspsDetected = [] goodTempPlaceCount = [] badTempPlaceCount = [] goodFinalPlaceCount = [] badFinalPlaceCount = [] # RUN TEST ======================================================================================= # for each episode/object placement for episode in xrange(nEpisodes): # place random object in random orientation on table fullObjName, objScale = threeDNet.GetRandomObjectFromClass( objectClass, randomScale) objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation( fullObjName, objScale) rlAgent.MoveSensorToPose( rlAgent.GetStandardViewPose(viewCenter, viewKeepout)) if showSteps: raw_input("Beginning of episode. Press [Enter] to continue...") # move the hand to view position(s) and capture a point cloud cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) rlAgent.PlotCloud(cloud) if showSteps: raw_input("Acquired point cloud. Press [Enter] to continue...") # detect grasps in the sensor data grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, graspDetectMode) graspsDetected.append(len(grasps)) topGraspsCount = CountObjectTopGrasps(grasps, objRandPose, maxAngleFromObjectAxis) if len(grasps) == 0: print("No grasps found. Skipping iteration.") rlEnv.RemoveObject(objHandle) continue rlAgent.PlotGrasps(grasps) print("Episode {}.".format(episode)) if showSteps: raw_input("Acquired grasps. Press [Enter] to continue...") # initialize recording variables episodePlaceHistogram = zeros(nPlaceOptions) episodeReturn = 0 episodeGoodTempPlaceCount = 0 episodeBadTempPlaceCount = 0 episodeGoodFinalPlaceCount = 0 episodeBadFinalPlaceCount = 0 graspDetectionFailure = False # initial state and first action s, selectedGrasp = rlEnv.GetInitialState(rlAgent) a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon) rlAgent.PlotGrasps([grasp]) # for each time step in the episode for t in xrange(maxTimesteps): ss, selectedGrasp, rr = rlEnv.Transition(rlAgent, objHandle, s, selectedGrasp, a, grasp, place, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap) ssIsPlacedTempGood = ss[1][1] ssIsPlacedTempBad = ss[1][2] ssIsPlacedFinalGood = ss[1][3] ssIsPlacedFinalBad = ss[1][4] if showSteps: raw_input( "Transition {}. Press [Enter] to continue...".format(t)) # re-detect only if a non-terminal placement just happened if ssIsPlacedTempGood and place is not None: cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud( viewCenter, viewKeepout, viewWorkspace) rlAgent.UnplotGrasps() rlAgent.PlotCloud(cloud) if showSteps: raw_input("Acquired cloud. Press [Enter] to continue...") grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices, nGraspSamples, graspScoreThresh, graspDetectMode) graspsDetected.append(len(grasps)) topGraspsCount = CountObjectTopGrasps(grasps, objRandPose, maxAngleFromObjectAxis) topGraspsDetected.append(topGraspsCount) if len(grasps) == 0: print("Grasp detection failure.") graspDetectionFailure = True break rlAgent.PlotGrasps(grasps) if showSteps: raw_input("Acquired grasps. Press [Enter] to continue...") # get next action aa, ggrasp, pplace = rlAgent.ChooseAction(ss, grasps, epsilon) if ggrasp is not None: rlAgent.PlotGrasps([ggrasp]) if showSteps: raw_input("Action {}. Press [Enter] to continue...".format(t)) # record data from transition episodeReturn += (gamma**t) * rr if place is not None: episodeGoodTempPlaceCount += ssIsPlacedTempGood episodeBadTempPlaceCount += ssIsPlacedTempBad episodeGoodFinalPlaceCount += ssIsPlacedFinalGood episodeBadFinalPlaceCount += ssIsPlacedFinalBad placeHistogram += a[1] # prepare for next time step if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad: break s = ss a = aa grasp = ggrasp place = pplace # cleanup this reuse if not graspDetectionFailure: placeHistogram += episodePlaceHistogram Return.append(episodeReturn) goodTempPlaceCount.append(episodeGoodTempPlaceCount) badTempPlaceCount.append(episodeBadTempPlaceCount) goodFinalPlaceCount.append(episodeGoodFinalPlaceCount) badFinalPlaceCount.append(episodeBadFinalPlaceCount) # cleanup this episode rlEnv.RemoveObject(objHandle) rlAgent.UnplotGrasps() rlAgent.UnplotCloud() # Save results saveData = { "objectClass": objectClass, "randomScale": randomScale, "maxAngleFromObjectAxis": maxAngleFromObjectAxis, "maxObjectTableGap": maxObjectTableGap, "nGraspSamples": nGraspSamples, "graspScoreThresh": graspScoreThresh, "graspDetectMode": graspDetectMode, "nEpisodes": nEpisodes, "maxTimesteps": maxTimesteps, "gamma": gamma, "epsilon": epsilon, "Return": Return, "graspsDetected": graspsDetected, "topGraspsDetected": topGraspsDetected, "placeHistogram": placeHistogram, "goodTempPlaceCount": goodTempPlaceCount, "badTempPlaceCount": badTempPlaceCount, "goodFinalPlaceCount": goodFinalPlaceCount, "badFinalPlaceCount": badFinalPlaceCount } savemat(saveFileName, saveData)