Python RlAgent.DetectGrasps Beispiele

Programmiersprache: Python

Namespace / Paketname: rl_agent

Klasse / Typ: RlAgent

Methode / Funktion: DetectGrasps

Beispiele auf hotexamples.com: 5

Python RlAgent.DetectGrasps - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die rl_agent.RlAgent.DetectGrasps, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

GetDualCloud(5)

DetectGrasps(5)

MoveHandToPose(4)

GetGrasp(3)

LoadQFunction(3)

LoadNetworkWeights(3)

LoadExperienceDatabase(3)

GetPlacePose(3)

GetNumberOfExperiences(3)

AddExperienceMonteCarlo(2)

FilterGraspsWithNoPoints(2)

DownsampleAndLabelData(2)

ChooseAction(2)

GetFullCloudAndNormals(2)

AddExperienceQLearning(1)

GetDualCloudAndViewPoints(1)

GetGraspCylinder(1)

GetAction(1)

GetPlacePoseCylinder(1)

GetStandardViewPose(1)

AddExperienceSarsa(1)

Beispiel #1

Datei anzeigen

def main(saveFileSuffix):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 0  # 0=sample, 1=sample+label
    nGraspSamples = 100
    graspScoreThresh = 350

    # learning
    nValueIterations = 70
    nDataIterations = 50
    nGraspIterations = 20
    pickEpsilon = 1.0
    placeEpsilon = 1.0
    minPickEpsilon = 0.10
    minPlaceEpsilon = 0.10
    pickEpsilonDelta = 0.05
    placeEpsilonDelta = 0.05
    maxExperiences = 25000
    trainingBatchSize = 25000
    unbiasOnIteration = nValueIterations - 5

    # visualization/saving
    saveFileName = "results" + saveFileSuffix + ".mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    averageReward = []
    placeActionCounts = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for valueIterationIdx in xrange(nValueIterations):

        print("Iteration {}. Epsilon pick: {}, place: {}".format(\
          valueIterationIdx, pickEpsilon, placeEpsilon))

        # 1. Collect data for this training iteration.

        iterationStartTime = time.time()
        R = []
        placeCounts = zeros(nPlaceOptions)

        # check if it's time to unbias data
        if valueIterationIdx >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            pickEpsilon = 0  # estimating value function of actual policy
            placeEpsilon = 0  # estimating value function of actual policy

        for dataIterationIdx in xrange(nDataIterations):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)
            rlAgent.PlotCloud(cloud)

            # detect grasps in the sensory data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            rlAgent.PlotGrasps(grasps)

            if showSteps:
                raw_input("Acquired grasps.")

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                rlAgent.UnplotCloud()
                continue

            for graspIterationIdx in xrange(nGraspIterations):

                print("Episode {}.{}.{}.".format(valueIterationIdx,
                                                 dataIterationIdx,
                                                 graspIterationIdx))

                # perform pick action
                grasp = rlAgent.GetGrasp(grasps, pickEpsilon)
                s = rlEnv.GetState(rlAgent, grasp, None)
                rlAgent.PlotGrasps([grasp])

                if showSteps:
                    print("Selected grasp.")

                # perform place action
                P = rlAgent.GetPlacePose(grasp, placeEpsilon)
                rlAgent.MoveHandToPose(P)
                ss = rlEnv.GetState(rlAgent, grasp, P)
                rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
                r = rlEnv.RewardBinary(objHandle, targetObjectAxis,
                                       maxAngleFromObjectAxis,
                                       maxObjectTableGap)
                print("The robot receives {} reward.".format(r))

                if showSteps:
                    raw_input("Press [Enter] to continue...")

                # add experience to database
                experienceDatabase.append((s, ss, 0))  # grasp -> placement
                experienceDatabase.append((ss, None, r))  # placement -> end

                # record save data
                R.append(r)
                placeCounts += ss[1][len(s[1]) - nPlaceOptions:]

                # cleanup this grasp iteration
                rlAgent.UnplotGrasps()
                rlEnv.MoveObjectToPose(objHandle, objRandPose)

            # cleanup this data iteration
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(\
          experienceDatabase, trainingBatchSize)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        pickEpsilon -= pickEpsilonDelta
        placeEpsilon -= placeEpsilonDelta
        pickEpsilon = max(minPickEpsilon, pickEpsilon)
        placeEpsilon = max(minPlaceEpsilon, placeEpsilon)

        # 4. Save results
        averageReward.append(mean(R))
        placeActionCounts.append(placeCounts)
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "nValueIterations": nValueIterations,
            "nDataIterations": nDataIterations,
            "nGraspIterations": nGraspIterations,
            "pickEpsilon": pickEpsilon,
            "placeEpsilon": placeEpsilon,
            "minPickEpsilon": minPickEpsilon,
            "minPlaceEpsilon": minPlaceEpsilon,
            "pickEpsilonDelta": pickEpsilonDelta,
            "placeEpsilonDelta": placeEpsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "averageReward": averageReward,
            "placeActionCounts": placeActionCounts,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)

Beispiel #2

Datei anzeigen

Datei: test_train_regrasp.py Projekt: mgualti/PickAndPlace

def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # learning
    nTrainingIterations = 100
    nEpisodes = 100
    nReuses = 10
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 1.0
    epsilonDelta = 0.05
    minEpsilon = 0.05
    maxExperiences = 50000
    trainingBatchSize = 50000
    unbiasOnIteration = nTrainingIterations - 5

    # visualization/saving
    saveFileName = "results.mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    avgReturn = []
    avgGraspsDetected = []
    avgTopGraspsDetected = []
    placeHistograms = []
    avgGoodTempPlaceCount = []
    avgBadTempPlaceCount = []
    avgGoodFinalPlaceCount = []
    avgBadFinalPlaceCount = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for trainingIteration in xrange(nTrainingIterations):

        # initialization
        iterationStartTime = time.time()
        print("Iteration: {}, Epsilon: {}".format(trainingIteration, epsilon))

        placeHistogram = zeros(nPlaceOptions)
        Return = []
        graspsDetected = []
        topGraspsDetected = []
        goodTempPlaceCount = []
        badTempPlaceCount = []
        goodFinalPlaceCount = []
        badFinalPlaceCount = []

        # check if it's time to unbias data
        if trainingIteration >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            epsilon = 0  # estimating value function of actual policy

        # for each episode/object placement
        for episode in xrange(nEpisodes):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position(s) and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)

            # detect grasps in the sensor data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            graspsStart = grasps

            graspsDetected.append(len(grasps))
            topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                  maxAngleFromObjectAxis)

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                continue

            rlAgent.PlotCloud(cloud)
            rlAgent.PlotGrasps(grasps)

            for reuse in xrange(nReuses):

                print("Episode {}.{}.{}.".format(trainingIteration, episode,
                                                 reuse))

                if showSteps:
                    raw_input(
                        "Beginning of episode. Press [Enter] to continue...")

                # initialize recording variables
                episodePlaceHistogram = zeros(nPlaceOptions)
                episodeReturn = 0
                episodeGoodTempPlaceCount = 0
                episodeBadTempPlaceCount = 0
                episodeGoodFinalPlaceCount = 0
                episodeBadFinalPlaceCount = 0
                graspDetectionFailure = False
                episodeExperiences = []

                # initial state and first action
                s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
                a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
                rlAgent.PlotGrasps([grasp])

                # for each time step in the episode
                for t in xrange(maxTimesteps):

                    ss, selectedGrasp, rr = rlEnv.Transition(
                        rlAgent, objHandle, s, selectedGrasp, a, grasp, place,
                        targetObjectAxis, maxAngleFromObjectAxis,
                        maxObjectTableGap)
                    ssIsPlacedTempGood = ss[1][1]
                    ssIsPlacedTempBad = ss[1][2]
                    ssIsPlacedFinalGood = ss[1][3]
                    ssIsPlacedFinalBad = ss[1][4]

                    if showSteps:
                        raw_input(
                            "Transition {}. Press [Enter] to continue...".
                            format(t))

                    # re-detect only if a non-terminal placement just happened
                    if ssIsPlacedTempGood and place is not None:
                        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                            viewCenter, viewKeepout, viewWorkspace)
                        grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                                      viewPointIndices,
                                                      nGraspSamples,
                                                      graspScoreThresh,
                                                      graspDetectMode)
                        graspsDetected.append(len(grasps))
                        topGraspsCount = CountObjectTopGrasps(
                            grasps, rlEnv.GetObjectPose(objHandle),
                            maxAngleFromObjectAxis)
                        topGraspsDetected.append(topGraspsCount)
                        if len(grasps) == 0:
                            print("Grasp detection failure.")
                            graspDetectionFailure = True
                            break
                        rlAgent.PlotCloud(cloud)
                        rlAgent.PlotGrasps(grasps)

                    # get next action
                    aa, ggrasp, pplace = rlAgent.ChooseAction(
                        ss, grasps, epsilon)
                    if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

                    if showSteps:
                        raw_input(
                            "Action {}. Press [Enter] to continue...".format(
                                t))

                    # add to database and record data
                    episodeExperiences.append((s, a, rr, ss, aa))
                    episodeReturn += (gamma**t) * rr
                    if place is not None:
                        episodeGoodTempPlaceCount += ssIsPlacedTempGood
                        episodeBadTempPlaceCount += ssIsPlacedTempBad
                        episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                        episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                        placeHistogram += a[1]

                    # prepare for next time step
                    if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                        break
                    s = ss
                    a = aa
                    grasp = ggrasp
                    place = pplace

                # cleanup this reuse
                if not graspDetectionFailure:
                    experienceDatabase += episodeExperiences
                    placeHistogram += episodePlaceHistogram
                    Return.append(episodeReturn)
                    goodTempPlaceCount.append(episodeGoodTempPlaceCount)
                    badTempPlaceCount.append(episodeBadTempPlaceCount)
                    goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
                    badFinalPlaceCount.append(episodeBadFinalPlaceCount)
                rlEnv.MoveObjectToPose(objHandle, objRandPose)
                grasps = graspsStart

            # cleanup this episode
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(experienceDatabase,
                                            trainingBatchSize, gamma)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        epsilon -= epsilonDelta
        epsilon = max(minEpsilon, epsilon)

        # 4. Save results
        avgReturn.append(mean(Return))
        avgGraspsDetected.append(mean(graspsDetected))
        avgTopGraspsDetected.append(mean(topGraspsDetected))
        placeHistograms.append(placeHistogram)
        avgGoodTempPlaceCount.append(mean(goodTempPlaceCount))
        avgBadTempPlaceCount.append(mean(badTempPlaceCount))
        avgGoodFinalPlaceCount.append(mean(goodFinalPlaceCount))
        avgBadFinalPlaceCount.append(mean(badFinalPlaceCount))
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nTrainingIterations": nTrainingIterations,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "minEpsilon": minEpsilon,
            "epsilonDelta": epsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "avgReturn": avgReturn,
            "avgGraspsDetected": avgGraspsDetected,
            "avgTopGraspsDetected": avgTopGraspsDetected,
            "placeHistograms": placeHistograms,
            "avgGoodTempPlaceCount": avgGoodTempPlaceCount,
            "avgBadTempPlaceCount": avgBadTempPlaceCount,
            "avgGoodFinalPlaceCount": avgGoodFinalPlaceCount,
            "avgBadFinalPlaceCount": avgBadFinalPlaceCount,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)

Beispiel #3

Datei anzeigen

Datei: test_clutter.py Projekt: mgualti/PickAndPlace

def main(objectClass, epsilon):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    nObjects = 7
    randomObjectScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.02

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]
    objViewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 500
    graspScoreThresh = 300
    nGraspInliers = 2

    # testing
    weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
    nDataIterations = 300

    # visualization/saving
    saveFileName = "results-clutter-" + objectClass + "-epsilon" + str(
        epsilon) + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
    Return = []

    # RUN TEST =======================================================================================

    for dataIterationIdx in xrange(nDataIterations):

        print("Iteration {}.".format(dataIterationIdx))

        # place clutter on table
        fullObjNames, objScales = threeDNet.GetRandomObjectSet(
            objectClass, nObjects, randomObjectScale)
        objHandles, objPoses = rlEnv.PlaceObjectSet(fullObjNames, objScales)

        objCloud, objCloudIdxs = rlEnv.AssignPointsToObjects(
            rlAgent, objHandles, viewCenter, viewKeepout, objViewWorkspace)

        if showSteps:
            raw_input("Objects placed.")

        # move the hand to view position and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Point cloud.")

        # detect grasps in the sensory data
        graspsDetected = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh, nGraspInliers,
                                              graspDetectMode)
        grasps = rlAgent.FilterGraspsWithNoPoints(graspsDetected, objCloud)
        if len(graspsDetected) > len(grasps):
            print("Fitlered {} empty grasps.".format(
                len(graspsDetected) - len(grasps)))
        rlAgent.PlotGrasps(grasps)

        if showSteps:
            raw_input("Acquired grasps.")

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObjectSet(objHandles)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()
            continue

        # perform pick action
        grasp = rlAgent.GetGrasp(grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        if showSteps:
            raw_input("Selected grasp.")

        # perform place action
        P = rlAgent.GetPlacePose(grasp, epsilon)
        rlAgent.MoveHandToPose(P)
        objHandle, objPose = rlEnv.GetObjectWithMaxGraspPoints(
            grasp, objHandles, objCloud, objCloudIdxs)
        rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
        r = rlEnv.RewardHeightExponential(objHandle, targetObjectAxis,
                                          maxAngleFromObjectAxis,
                                          maxObjectTableGap)
        print("The robot receives {} reward.".format(r))
        Return.append(r)

        if showSteps:
            raw_input("Press [Enter] to continue...")

        # cleanup this data iteration
        rlEnv.RemoveObjectSet(objHandles)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "nObjects": nObjects,
            "randomObjectScale": randomObjectScale,
            "targetObjectAxis": targetObjectAxis,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "graspDetectMode": graspDetectMode,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "weightsFileName": weightsFileName,
            "nDataIterations": nDataIterations,
            "epsilon": epsilon,
            "Return": Return
        }
        savemat(saveFileName, saveData)

Beispiel #4

Datei anzeigen

def main(objectClass, epsilon):
  '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

  # PARAMETERS =====================================================================================

  # objects
  randomScale = True
  targetObjectAxis = array([0,0,1])
  maxAngleFromObjectAxis = 20*(pi/180)
  maxObjectTableGap = 0.02

  # view
  viewCenter = array([0,0,0])
  viewKeepout = 0.50
  viewWorkspace = [(-1,1),(-1,1),(-1,1)]

  # grasps
  graspDetectMode = 1 # 0=sample, 1=sample+label
  nGraspSamples = 200
  graspScoreThresh = 300
  nGraspInliers = 3

  # learning
  weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
  nDataIterations = 300

  # visualization/saving
  showViewer = False
  showEveryStep = False
  saveFileName = "results-single-" + objectClass + "-epsilon" + str(epsilon) + ".mat"

  # INITIALIZATION =================================================================================

  threeDNet = ThreeDNet()
  rlEnv = RlEnvironment(showViewer)
  rlAgent = RlAgent(rlEnv)
  if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
  Return = []

  # RUN TEST =======================================================================================

  # Collect data for this training iteration.

  for dataIterationIdx in xrange(nDataIterations):

    print("Iteration {}.".format(dataIterationIdx))

    # place object in random orientation on table
    fullObjName, objScale = threeDNet.GetRandomObjectFromClass(objectClass, randomScale)
    objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(fullObjName, objScale)

    # move the hand to view position and capture a point cloud
    cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
      viewCenter, viewKeepout, viewWorkspace)
    rlAgent.PlotCloud(cloud)

    # detect grasps in the sensory data
    grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
      nGraspSamples, graspScoreThresh, nGraspInliers, graspDetectMode)
    rlAgent.PlotGrasps(grasps)

    if len(grasps) == 0:
      print("No grasps found. Skipping iteration.")
      rlEnv.RemoveObject(objHandle)
      rlAgent.UnplotCloud()
      continue

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform pick action
    grasp = rlAgent.GetGrasp(grasps, epsilon)
    rlAgent.PlotGrasps([grasp])

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform place action
    P = rlAgent.GetPlacePose(grasp, epsilon)
    rlAgent.MoveHandToPose(P)
    rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
    r = rlEnv.RewardHeightExponential(
      objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap)
    print("The robot receives {} reward.".format(r))
    Return.append(r)

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # cleanup this data iteration
    rlEnv.RemoveObject(objHandle)
    rlAgent.UnplotGrasps()
    rlAgent.UnplotCloud()

    saveData = {"randomScale":randomScale, "targetObjectAxis":targetObjectAxis,
      "maxAngleFromObjectAxis":maxAngleFromObjectAxis, "maxObjectTableGap":maxObjectTableGap,
      "graspDetectMode":graspDetectMode, "nGraspSamples":nGraspSamples,
      "graspScoreThresh":graspScoreThresh, "weightsFileName":weightsFileName,
      "nDataIterations":nDataIterations, "epsilon":epsilon, "Return":Return}
    savemat(saveFileName, saveData)

Beispiel #5

Datei anzeigen

def main(objectClass):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # testing
    nEpisodes = 300
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 0.0
    weightsFileName = \
      "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/dualImage_iter_5000.caffemodel"

    # visualization/saving
    saveFileName = "results-" + objectClass + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    rlAgent.LoadNetworkWeights(weightsFileName)
    nPlaceOptions = len(rlAgent.placePoses)

    placeHistogram = zeros(nPlaceOptions)
    Return = []
    graspsDetected = []
    topGraspsDetected = []
    goodTempPlaceCount = []
    badTempPlaceCount = []
    goodFinalPlaceCount = []
    badFinalPlaceCount = []

    # RUN TEST =======================================================================================

    # for each episode/object placement
    for episode in xrange(nEpisodes):

        # place random object in random orientation on table
        fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
            objectClass, randomScale)
        objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
            fullObjName, objScale)
        rlAgent.MoveSensorToPose(
            rlAgent.GetStandardViewPose(viewCenter, viewKeepout))

        if showSteps:
            raw_input("Beginning of episode. Press [Enter] to continue...")

        # move the hand to view position(s) and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Acquired point cloud. Press [Enter] to continue...")

        # detect grasps in the sensor data
        grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                      nGraspSamples, graspScoreThresh,
                                      graspDetectMode)

        graspsDetected.append(len(grasps))
        topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                              maxAngleFromObjectAxis)

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObject(objHandle)
            continue

        rlAgent.PlotGrasps(grasps)

        print("Episode {}.".format(episode))

        if showSteps:
            raw_input("Acquired grasps. Press [Enter] to continue...")

        # initialize recording variables
        episodePlaceHistogram = zeros(nPlaceOptions)
        episodeReturn = 0
        episodeGoodTempPlaceCount = 0
        episodeBadTempPlaceCount = 0
        episodeGoodFinalPlaceCount = 0
        episodeBadFinalPlaceCount = 0
        graspDetectionFailure = False

        # initial state and first action
        s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
        a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        # for each time step in the episode
        for t in xrange(maxTimesteps):

            ss, selectedGrasp, rr = rlEnv.Transition(rlAgent, objHandle, s,
                                                     selectedGrasp, a, grasp,
                                                     place, targetObjectAxis,
                                                     maxAngleFromObjectAxis,
                                                     maxObjectTableGap)
            ssIsPlacedTempGood = ss[1][1]
            ssIsPlacedTempBad = ss[1][2]
            ssIsPlacedFinalGood = ss[1][3]
            ssIsPlacedFinalBad = ss[1][4]

            if showSteps:
                raw_input(
                    "Transition {}. Press [Enter] to continue...".format(t))

            # re-detect only if a non-terminal placement just happened
            if ssIsPlacedTempGood and place is not None:
                cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                    viewCenter, viewKeepout, viewWorkspace)
                rlAgent.UnplotGrasps()
                rlAgent.PlotCloud(cloud)
                if showSteps:
                    raw_input("Acquired cloud. Press [Enter] to continue...")
                grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh,
                                              graspDetectMode)
                graspsDetected.append(len(grasps))
                topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                      maxAngleFromObjectAxis)
                topGraspsDetected.append(topGraspsCount)
                if len(grasps) == 0:
                    print("Grasp detection failure.")
                    graspDetectionFailure = True
                    break
                rlAgent.PlotGrasps(grasps)
                if showSteps:
                    raw_input("Acquired grasps. Press [Enter] to continue...")

            # get next action
            aa, ggrasp, pplace = rlAgent.ChooseAction(ss, grasps, epsilon)
            if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

            if showSteps:
                raw_input("Action {}. Press [Enter] to continue...".format(t))

            # record data from transition
            episodeReturn += (gamma**t) * rr
            if place is not None:
                episodeGoodTempPlaceCount += ssIsPlacedTempGood
                episodeBadTempPlaceCount += ssIsPlacedTempBad
                episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                placeHistogram += a[1]

            # prepare for next time step
            if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                break
            s = ss
            a = aa
            grasp = ggrasp
            place = pplace

        # cleanup this reuse
        if not graspDetectionFailure:
            placeHistogram += episodePlaceHistogram
            Return.append(episodeReturn)
            goodTempPlaceCount.append(episodeGoodTempPlaceCount)
            badTempPlaceCount.append(episodeBadTempPlaceCount)
            goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
            badFinalPlaceCount.append(episodeBadFinalPlaceCount)

        # cleanup this episode
        rlEnv.RemoveObject(objHandle)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "Return": Return,
            "graspsDetected": graspsDetected,
            "topGraspsDetected": topGraspsDetected,
            "placeHistogram": placeHistogram,
            "goodTempPlaceCount": goodTempPlaceCount,
            "badTempPlaceCount": badTempPlaceCount,
            "goodFinalPlaceCount": goodFinalPlaceCount,
            "badFinalPlaceCount": badFinalPlaceCount
        }
        savemat(saveFileName, saveData)