Beispiel #1
0
def main(saveFileSuffix):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 0  # 0=sample, 1=sample+label
    nGraspSamples = 100
    graspScoreThresh = 350

    # learning
    nValueIterations = 70
    nDataIterations = 50
    nGraspIterations = 20
    pickEpsilon = 1.0
    placeEpsilon = 1.0
    minPickEpsilon = 0.10
    minPlaceEpsilon = 0.10
    pickEpsilonDelta = 0.05
    placeEpsilonDelta = 0.05
    maxExperiences = 25000
    trainingBatchSize = 25000
    unbiasOnIteration = nValueIterations - 5

    # visualization/saving
    saveFileName = "results" + saveFileSuffix + ".mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    averageReward = []
    placeActionCounts = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for valueIterationIdx in xrange(nValueIterations):

        print("Iteration {}. Epsilon pick: {}, place: {}".format(\
          valueIterationIdx, pickEpsilon, placeEpsilon))

        # 1. Collect data for this training iteration.

        iterationStartTime = time.time()
        R = []
        placeCounts = zeros(nPlaceOptions)

        # check if it's time to unbias data
        if valueIterationIdx >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            pickEpsilon = 0  # estimating value function of actual policy
            placeEpsilon = 0  # estimating value function of actual policy

        for dataIterationIdx in xrange(nDataIterations):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)
            rlAgent.PlotCloud(cloud)

            # detect grasps in the sensory data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            rlAgent.PlotGrasps(grasps)

            if showSteps:
                raw_input("Acquired grasps.")

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                rlAgent.UnplotCloud()
                continue

            for graspIterationIdx in xrange(nGraspIterations):

                print("Episode {}.{}.{}.".format(valueIterationIdx,
                                                 dataIterationIdx,
                                                 graspIterationIdx))

                # perform pick action
                grasp = rlAgent.GetGrasp(grasps, pickEpsilon)
                s = rlEnv.GetState(rlAgent, grasp, None)
                rlAgent.PlotGrasps([grasp])

                if showSteps:
                    print("Selected grasp.")

                # perform place action
                P = rlAgent.GetPlacePose(grasp, placeEpsilon)
                rlAgent.MoveHandToPose(P)
                ss = rlEnv.GetState(rlAgent, grasp, P)
                rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
                r = rlEnv.RewardBinary(objHandle, targetObjectAxis,
                                       maxAngleFromObjectAxis,
                                       maxObjectTableGap)
                print("The robot receives {} reward.".format(r))

                if showSteps:
                    raw_input("Press [Enter] to continue...")

                # add experience to database
                experienceDatabase.append((s, ss, 0))  # grasp -> placement
                experienceDatabase.append((ss, None, r))  # placement -> end

                # record save data
                R.append(r)
                placeCounts += ss[1][len(s[1]) - nPlaceOptions:]

                # cleanup this grasp iteration
                rlAgent.UnplotGrasps()
                rlEnv.MoveObjectToPose(objHandle, objRandPose)

            # cleanup this data iteration
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(\
          experienceDatabase, trainingBatchSize)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        pickEpsilon -= pickEpsilonDelta
        placeEpsilon -= placeEpsilonDelta
        pickEpsilon = max(minPickEpsilon, pickEpsilon)
        placeEpsilon = max(minPlaceEpsilon, placeEpsilon)

        # 4. Save results
        averageReward.append(mean(R))
        placeActionCounts.append(placeCounts)
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "nValueIterations": nValueIterations,
            "nDataIterations": nDataIterations,
            "nGraspIterations": nGraspIterations,
            "pickEpsilon": pickEpsilon,
            "placeEpsilon": placeEpsilon,
            "minPickEpsilon": minPickEpsilon,
            "minPlaceEpsilon": minPlaceEpsilon,
            "pickEpsilonDelta": pickEpsilonDelta,
            "placeEpsilonDelta": placeEpsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "averageReward": averageReward,
            "placeActionCounts": placeActionCounts,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)
def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # learning
    nTrainingIterations = 100
    nEpisodes = 100
    nReuses = 10
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 1.0
    epsilonDelta = 0.05
    minEpsilon = 0.05
    maxExperiences = 50000
    trainingBatchSize = 50000
    unbiasOnIteration = nTrainingIterations - 5

    # visualization/saving
    saveFileName = "results.mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    avgReturn = []
    avgGraspsDetected = []
    avgTopGraspsDetected = []
    placeHistograms = []
    avgGoodTempPlaceCount = []
    avgBadTempPlaceCount = []
    avgGoodFinalPlaceCount = []
    avgBadFinalPlaceCount = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for trainingIteration in xrange(nTrainingIterations):

        # initialization
        iterationStartTime = time.time()
        print("Iteration: {}, Epsilon: {}".format(trainingIteration, epsilon))

        placeHistogram = zeros(nPlaceOptions)
        Return = []
        graspsDetected = []
        topGraspsDetected = []
        goodTempPlaceCount = []
        badTempPlaceCount = []
        goodFinalPlaceCount = []
        badFinalPlaceCount = []

        # check if it's time to unbias data
        if trainingIteration >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            epsilon = 0  # estimating value function of actual policy

        # for each episode/object placement
        for episode in xrange(nEpisodes):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position(s) and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)

            # detect grasps in the sensor data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            graspsStart = grasps

            graspsDetected.append(len(grasps))
            topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                  maxAngleFromObjectAxis)

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                continue

            rlAgent.PlotCloud(cloud)
            rlAgent.PlotGrasps(grasps)

            for reuse in xrange(nReuses):

                print("Episode {}.{}.{}.".format(trainingIteration, episode,
                                                 reuse))

                if showSteps:
                    raw_input(
                        "Beginning of episode. Press [Enter] to continue...")

                # initialize recording variables
                episodePlaceHistogram = zeros(nPlaceOptions)
                episodeReturn = 0
                episodeGoodTempPlaceCount = 0
                episodeBadTempPlaceCount = 0
                episodeGoodFinalPlaceCount = 0
                episodeBadFinalPlaceCount = 0
                graspDetectionFailure = False
                episodeExperiences = []

                # initial state and first action
                s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
                a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
                rlAgent.PlotGrasps([grasp])

                # for each time step in the episode
                for t in xrange(maxTimesteps):

                    ss, selectedGrasp, rr = rlEnv.Transition(
                        rlAgent, objHandle, s, selectedGrasp, a, grasp, place,
                        targetObjectAxis, maxAngleFromObjectAxis,
                        maxObjectTableGap)
                    ssIsPlacedTempGood = ss[1][1]
                    ssIsPlacedTempBad = ss[1][2]
                    ssIsPlacedFinalGood = ss[1][3]
                    ssIsPlacedFinalBad = ss[1][4]

                    if showSteps:
                        raw_input(
                            "Transition {}. Press [Enter] to continue...".
                            format(t))

                    # re-detect only if a non-terminal placement just happened
                    if ssIsPlacedTempGood and place is not None:
                        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                            viewCenter, viewKeepout, viewWorkspace)
                        grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                                      viewPointIndices,
                                                      nGraspSamples,
                                                      graspScoreThresh,
                                                      graspDetectMode)
                        graspsDetected.append(len(grasps))
                        topGraspsCount = CountObjectTopGrasps(
                            grasps, rlEnv.GetObjectPose(objHandle),
                            maxAngleFromObjectAxis)
                        topGraspsDetected.append(topGraspsCount)
                        if len(grasps) == 0:
                            print("Grasp detection failure.")
                            graspDetectionFailure = True
                            break
                        rlAgent.PlotCloud(cloud)
                        rlAgent.PlotGrasps(grasps)

                    # get next action
                    aa, ggrasp, pplace = rlAgent.ChooseAction(
                        ss, grasps, epsilon)
                    if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

                    if showSteps:
                        raw_input(
                            "Action {}. Press [Enter] to continue...".format(
                                t))

                    # add to database and record data
                    episodeExperiences.append((s, a, rr, ss, aa))
                    episodeReturn += (gamma**t) * rr
                    if place is not None:
                        episodeGoodTempPlaceCount += ssIsPlacedTempGood
                        episodeBadTempPlaceCount += ssIsPlacedTempBad
                        episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                        episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                        placeHistogram += a[1]

                    # prepare for next time step
                    if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                        break
                    s = ss
                    a = aa
                    grasp = ggrasp
                    place = pplace

                # cleanup this reuse
                if not graspDetectionFailure:
                    experienceDatabase += episodeExperiences
                    placeHistogram += episodePlaceHistogram
                    Return.append(episodeReturn)
                    goodTempPlaceCount.append(episodeGoodTempPlaceCount)
                    badTempPlaceCount.append(episodeBadTempPlaceCount)
                    goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
                    badFinalPlaceCount.append(episodeBadFinalPlaceCount)
                rlEnv.MoveObjectToPose(objHandle, objRandPose)
                grasps = graspsStart

            # cleanup this episode
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(experienceDatabase,
                                            trainingBatchSize, gamma)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        epsilon -= epsilonDelta
        epsilon = max(minEpsilon, epsilon)

        # 4. Save results
        avgReturn.append(mean(Return))
        avgGraspsDetected.append(mean(graspsDetected))
        avgTopGraspsDetected.append(mean(topGraspsDetected))
        placeHistograms.append(placeHistogram)
        avgGoodTempPlaceCount.append(mean(goodTempPlaceCount))
        avgBadTempPlaceCount.append(mean(badTempPlaceCount))
        avgGoodFinalPlaceCount.append(mean(goodFinalPlaceCount))
        avgBadFinalPlaceCount.append(mean(badFinalPlaceCount))
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nTrainingIterations": nTrainingIterations,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "minEpsilon": minEpsilon,
            "epsilonDelta": epsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "avgReturn": avgReturn,
            "avgGraspsDetected": avgGraspsDetected,
            "avgTopGraspsDetected": avgTopGraspsDetected,
            "placeHistograms": placeHistograms,
            "avgGoodTempPlaceCount": avgGoodTempPlaceCount,
            "avgBadTempPlaceCount": avgBadTempPlaceCount,
            "avgGoodFinalPlaceCount": avgGoodFinalPlaceCount,
            "avgBadFinalPlaceCount": avgBadFinalPlaceCount,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)
Beispiel #3
0
def main(objectClass, epsilon):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    nObjects = 7
    randomObjectScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.02

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]
    objViewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 500
    graspScoreThresh = 300
    nGraspInliers = 2

    # testing
    weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
    nDataIterations = 300

    # visualization/saving
    saveFileName = "results-clutter-" + objectClass + "-epsilon" + str(
        epsilon) + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
    Return = []

    # RUN TEST =======================================================================================

    for dataIterationIdx in xrange(nDataIterations):

        print("Iteration {}.".format(dataIterationIdx))

        # place clutter on table
        fullObjNames, objScales = threeDNet.GetRandomObjectSet(
            objectClass, nObjects, randomObjectScale)
        objHandles, objPoses = rlEnv.PlaceObjectSet(fullObjNames, objScales)

        objCloud, objCloudIdxs = rlEnv.AssignPointsToObjects(
            rlAgent, objHandles, viewCenter, viewKeepout, objViewWorkspace)

        if showSteps:
            raw_input("Objects placed.")

        # move the hand to view position and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Point cloud.")

        # detect grasps in the sensory data
        graspsDetected = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh, nGraspInliers,
                                              graspDetectMode)
        grasps = rlAgent.FilterGraspsWithNoPoints(graspsDetected, objCloud)
        if len(graspsDetected) > len(grasps):
            print("Fitlered {} empty grasps.".format(
                len(graspsDetected) - len(grasps)))
        rlAgent.PlotGrasps(grasps)

        if showSteps:
            raw_input("Acquired grasps.")

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObjectSet(objHandles)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()
            continue

        # perform pick action
        grasp = rlAgent.GetGrasp(grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        if showSteps:
            raw_input("Selected grasp.")

        # perform place action
        P = rlAgent.GetPlacePose(grasp, epsilon)
        rlAgent.MoveHandToPose(P)
        objHandle, objPose = rlEnv.GetObjectWithMaxGraspPoints(
            grasp, objHandles, objCloud, objCloudIdxs)
        rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
        r = rlEnv.RewardHeightExponential(objHandle, targetObjectAxis,
                                          maxAngleFromObjectAxis,
                                          maxObjectTableGap)
        print("The robot receives {} reward.".format(r))
        Return.append(r)

        if showSteps:
            raw_input("Press [Enter] to continue...")

        # cleanup this data iteration
        rlEnv.RemoveObjectSet(objHandles)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "nObjects": nObjects,
            "randomObjectScale": randomObjectScale,
            "targetObjectAxis": targetObjectAxis,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "graspDetectMode": graspDetectMode,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "weightsFileName": weightsFileName,
            "nDataIterations": nDataIterations,
            "epsilon": epsilon,
            "Return": Return
        }
        savemat(saveFileName, saveData)
Beispiel #4
0
def main(objectClass, epsilon):
  '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

  # PARAMETERS =====================================================================================

  # objects
  randomScale = True
  targetObjectAxis = array([0,0,1])
  maxAngleFromObjectAxis = 20*(pi/180)
  maxObjectTableGap = 0.02

  # view
  viewCenter = array([0,0,0])
  viewKeepout = 0.50
  viewWorkspace = [(-1,1),(-1,1),(-1,1)]

  # grasps
  graspDetectMode = 1 # 0=sample, 1=sample+label
  nGraspSamples = 200
  graspScoreThresh = 300
  nGraspInliers = 3

  # learning
  weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
  nDataIterations = 300

  # visualization/saving
  showViewer = False
  showEveryStep = False
  saveFileName = "results-single-" + objectClass + "-epsilon" + str(epsilon) + ".mat"

  # INITIALIZATION =================================================================================

  threeDNet = ThreeDNet()
  rlEnv = RlEnvironment(showViewer)
  rlAgent = RlAgent(rlEnv)
  if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
  Return = []

  # RUN TEST =======================================================================================

  # Collect data for this training iteration.

  for dataIterationIdx in xrange(nDataIterations):

    print("Iteration {}.".format(dataIterationIdx))

    # place object in random orientation on table
    fullObjName, objScale = threeDNet.GetRandomObjectFromClass(objectClass, randomScale)
    objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(fullObjName, objScale)

    # move the hand to view position and capture a point cloud
    cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
      viewCenter, viewKeepout, viewWorkspace)
    rlAgent.PlotCloud(cloud)

    # detect grasps in the sensory data
    grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
      nGraspSamples, graspScoreThresh, nGraspInliers, graspDetectMode)
    rlAgent.PlotGrasps(grasps)

    if len(grasps) == 0:
      print("No grasps found. Skipping iteration.")
      rlEnv.RemoveObject(objHandle)
      rlAgent.UnplotCloud()
      continue

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform pick action
    grasp = rlAgent.GetGrasp(grasps, epsilon)
    rlAgent.PlotGrasps([grasp])

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform place action
    P = rlAgent.GetPlacePose(grasp, epsilon)
    rlAgent.MoveHandToPose(P)
    rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
    r = rlEnv.RewardHeightExponential(
      objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap)
    print("The robot receives {} reward.".format(r))
    Return.append(r)

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # cleanup this data iteration
    rlEnv.RemoveObject(objHandle)
    rlAgent.UnplotGrasps()
    rlAgent.UnplotCloud()

    saveData = {"randomScale":randomScale, "targetObjectAxis":targetObjectAxis,
      "maxAngleFromObjectAxis":maxAngleFromObjectAxis, "maxObjectTableGap":maxObjectTableGap,
      "graspDetectMode":graspDetectMode, "nGraspSamples":nGraspSamples,
      "graspScoreThresh":graspScoreThresh, "weightsFileName":weightsFileName,
      "nDataIterations":nDataIterations, "epsilon":epsilon, "Return":Return}
    savemat(saveFileName, saveData)
Beispiel #5
0
def main(objectClass):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # testing
    nEpisodes = 300
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 0.0
    weightsFileName = \
      "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/dualImage_iter_5000.caffemodel"

    # visualization/saving
    saveFileName = "results-" + objectClass + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    rlAgent.LoadNetworkWeights(weightsFileName)
    nPlaceOptions = len(rlAgent.placePoses)

    placeHistogram = zeros(nPlaceOptions)
    Return = []
    graspsDetected = []
    topGraspsDetected = []
    goodTempPlaceCount = []
    badTempPlaceCount = []
    goodFinalPlaceCount = []
    badFinalPlaceCount = []

    # RUN TEST =======================================================================================

    # for each episode/object placement
    for episode in xrange(nEpisodes):

        # place random object in random orientation on table
        fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
            objectClass, randomScale)
        objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
            fullObjName, objScale)
        rlAgent.MoveSensorToPose(
            rlAgent.GetStandardViewPose(viewCenter, viewKeepout))

        if showSteps:
            raw_input("Beginning of episode. Press [Enter] to continue...")

        # move the hand to view position(s) and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Acquired point cloud. Press [Enter] to continue...")

        # detect grasps in the sensor data
        grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                      nGraspSamples, graspScoreThresh,
                                      graspDetectMode)

        graspsDetected.append(len(grasps))
        topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                              maxAngleFromObjectAxis)

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObject(objHandle)
            continue

        rlAgent.PlotGrasps(grasps)

        print("Episode {}.".format(episode))

        if showSteps:
            raw_input("Acquired grasps. Press [Enter] to continue...")

        # initialize recording variables
        episodePlaceHistogram = zeros(nPlaceOptions)
        episodeReturn = 0
        episodeGoodTempPlaceCount = 0
        episodeBadTempPlaceCount = 0
        episodeGoodFinalPlaceCount = 0
        episodeBadFinalPlaceCount = 0
        graspDetectionFailure = False

        # initial state and first action
        s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
        a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        # for each time step in the episode
        for t in xrange(maxTimesteps):

            ss, selectedGrasp, rr = rlEnv.Transition(rlAgent, objHandle, s,
                                                     selectedGrasp, a, grasp,
                                                     place, targetObjectAxis,
                                                     maxAngleFromObjectAxis,
                                                     maxObjectTableGap)
            ssIsPlacedTempGood = ss[1][1]
            ssIsPlacedTempBad = ss[1][2]
            ssIsPlacedFinalGood = ss[1][3]
            ssIsPlacedFinalBad = ss[1][4]

            if showSteps:
                raw_input(
                    "Transition {}. Press [Enter] to continue...".format(t))

            # re-detect only if a non-terminal placement just happened
            if ssIsPlacedTempGood and place is not None:
                cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                    viewCenter, viewKeepout, viewWorkspace)
                rlAgent.UnplotGrasps()
                rlAgent.PlotCloud(cloud)
                if showSteps:
                    raw_input("Acquired cloud. Press [Enter] to continue...")
                grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh,
                                              graspDetectMode)
                graspsDetected.append(len(grasps))
                topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                      maxAngleFromObjectAxis)
                topGraspsDetected.append(topGraspsCount)
                if len(grasps) == 0:
                    print("Grasp detection failure.")
                    graspDetectionFailure = True
                    break
                rlAgent.PlotGrasps(grasps)
                if showSteps:
                    raw_input("Acquired grasps. Press [Enter] to continue...")

            # get next action
            aa, ggrasp, pplace = rlAgent.ChooseAction(ss, grasps, epsilon)
            if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

            if showSteps:
                raw_input("Action {}. Press [Enter] to continue...".format(t))

            # record data from transition
            episodeReturn += (gamma**t) * rr
            if place is not None:
                episodeGoodTempPlaceCount += ssIsPlacedTempGood
                episodeBadTempPlaceCount += ssIsPlacedTempBad
                episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                placeHistogram += a[1]

            # prepare for next time step
            if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                break
            s = ss
            a = aa
            grasp = ggrasp
            place = pplace

        # cleanup this reuse
        if not graspDetectionFailure:
            placeHistogram += episodePlaceHistogram
            Return.append(episodeReturn)
            goodTempPlaceCount.append(episodeGoodTempPlaceCount)
            badTempPlaceCount.append(episodeBadTempPlaceCount)
            goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
            badFinalPlaceCount.append(episodeBadFinalPlaceCount)

        # cleanup this episode
        rlEnv.RemoveObject(objHandle)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "Return": Return,
            "graspsDetected": graspsDetected,
            "topGraspsDetected": topGraspsDetected,
            "placeHistogram": placeHistogram,
            "goodTempPlaceCount": goodTempPlaceCount,
            "badTempPlaceCount": badTempPlaceCount,
            "goodFinalPlaceCount": goodFinalPlaceCount,
            "badFinalPlaceCount": badFinalPlaceCount
        }
        savemat(saveFileName, saveData)