Example #1
0
def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    trainEvery = params["trainEvery"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    saveFileName = params["saveFileName"]
    loadNetwork = params["loadNetwork"]
    loadDatabase = params["loadDatabase"]
    showSteps = params["showSteps"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)
    tensorflow.random.set_seed(randomSeed)

    # initialize agent and environment
    rlEnv = RlEnvironmentPegsOnDisks(params)
    rlAgent = RlAgent(params)

    # if testing, load previous results
    if loadNetwork:
        rlAgent.LoadQFunction()

    if loadDatabase:
        rlAgent.LoadExperienceDatabase()

    # RUN TEST =======================================================================================

    episodeReturn = []
    nPlacedObjects = []
    episodeTime = []
    timeStepEpsilon = []
    databaseSize = []
    losses = []

    for episode in xrange(nEpisodes):

        startTime = time()

        # place random object in random orientation on table
        rlEnv.MoveHandToHoldingPose()
        rlEnv.PlaceObjects(False)
        rlEnv.PlaceObjects(True)
        if showSteps: raw_input("Placed objects.")

        R = 0
        nPlaced = 0
        holdingDesc = None
        o = None
        a = None
        r = None
        for t in xrange(tMax):
            # get a point cloud
            cloud = rlEnv.GetArtificialCloud()
            isGrasp = holdingDesc is None
            # get the next action
            oo, aa, overtDesc, epsilon = rlAgent.SenseAndAct(
                holdingDesc, cloud, t, rlEnv, episode >= unbiasOnEpisode)
            # perform transition
            holdingDesc, rr = rlEnv.Transition(overtDesc, cloud)
            # save experience
            if t > 0: rlAgent.AddExperienceSarsa(o, a, r, oo, aa)
            o = oo
            a = aa
            r = rr
            # save recorded data
            R += r
            if isGrasp:
                if r < 0:
                    nPlaced -= 1
            else:
                if r > 0:
                    nPlaced += 1
            timeStepEpsilon.append(epsilon)

        # add final experience
        rlAgent.AddExperienceSarsa(o, a, r, [None] * rlAgent.nLevels,
                                   [None] * rlAgent.nLevels)

        # cleanup episode
        rlEnv.ResetEpisode()
        print("Episode {} had return {}".format(episode, R))

        # training
        if episode % trainEvery == trainEvery - 1:
            losses.append(rlAgent.UpdateQFunctionSarsa())
            rlAgent.SaveQFunction()

        # save results
        episodeReturn.append(R)
        nPlacedObjects.append(nPlaced)
        episodeTime.append(time() - startTime)
        databaseSize.append(rlAgent.GetNumberOfExperiences())

        if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1:
            saveData = {
                "episodeReturn": episodeReturn,
                "nPlacedObjects": nPlacedObjects,
                "episodeTime": episodeTime,
                "timeStepEpsilon": timeStepEpsilon,
                "databaseSize": databaseSize,
                "losses": losses
            }
            saveData.update(params)
            savemat(saveFileName, saveData)

        # backup agent data
        if episode == nEpisodes - 1:
            rlAgent.SaveExperienceDatabase()
Example #2
0
def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    trainEvery = params["trainEvery"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    saveFileName = params["saveFileName"]
    loadNetwork = params["loadNetwork"]
    loadDatabase = params["loadDatabase"]
    showSteps = params["showSteps"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)
    tensorflow.random.set_seed(randomSeed)

    # initialize agent and environment
    rlEnv = RlEnvironmentBottlesOnCoasters(params)
    rlAgent = RlAgent(params)

    # if testing, load previous results
    if loadNetwork:
        rlAgent.LoadQFunction()

    if loadDatabase:
        rlAgent.LoadExperienceDatabase()

    # RUN TEST =======================================================================================

    episodeReturn = []
    nPlacedObjects = []
    nGraspedObjects = []
    episodeTime = []
    timeStepEpsilon = []
    databaseSize = []
    losses = []

    for episode in xrange(nEpisodes):

        startTime = time()

        # place random object in random orientation on table
        rlEnv.MoveHandToHoldingPose()
        rlEnv.PlaceObjects(False)
        rlEnv.PlaceObjects(True)
        if showSteps: raw_input("Placed objects.")

        R = 0
        nPlaced = 0
        nGrasped = 0
        holdingDesc = None
        observations = []
        actions = []
        rewards = []
        isGrasp = []

        for t in xrange(tMax):
            # get a point cloud
            cloud = rlEnv.GetArtificialCloud()
            #rlEnv.PlotCloud(cloud)
            #if showSteps: raw_input("Acquired cloud.")
            isGrasp.append(holdingDesc is None)
            # get the next action
            o, a, overtDesc, epsilon = rlAgent.SenseAndAct(
                holdingDesc, cloud, t, rlEnv, episode >= unbiasOnEpisode)
            # perform transition
            holdingDesc, r = rlEnv.Transition(overtDesc, cloud)
            # save experiences
            timeStepEpsilon.append(epsilon)
            observations.append(o)
            actions.append(a)
            rewards.append(r)
            R += r
            # compute task success -- number of objects placed
            if isGrasp[-1]:
                if holdingDesc is not None:
                    nGrasped += 1
                if r < 0:
                    nPlaced -= 1
            else:
                if r == 1:
                    nPlaced += 1

        rlAgent.AddExperienceMonteCarlo(observations, actions, rewards,
                                        isGrasp)

        # cleanup episode
        rlEnv.ResetEpisode()
        print("Episode {} had return {}".format(episode, R))

        # training
        if episode % trainEvery == trainEvery - 1:
            losses.append(rlAgent.UpdateQFunctionMonteCarlo())
            rlAgent.SaveQFunction()

        # save results
        episodeReturn.append(R)
        nPlacedObjects.append(nPlaced)
        nGraspedObjects.append(nGrasped)
        episodeTime.append(time() - startTime)
        databaseSize.append(rlAgent.GetNumberOfExperiences())

        if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1:
            saveData = {
                "episodeReturn": episodeReturn,
                "nPlacedObjects": nPlacedObjects,
                "nGraspedObjects": nGraspedObjects,
                "episodeTime": episodeTime,
                "timeStepEpsilon": timeStepEpsilon,
                "databaseSize": databaseSize,
                "losses": losses
            }
            saveData.update(params)
            savemat(saveFileName, saveData)

        # backup agent data
        if episode == nEpisodes - 1:
            rlAgent.SaveExperienceDatabase()
Example #3
0
def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    trainEvery = params["trainEvery"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    nObjects = params["nObjects"]
    nSurfaceObjects = params["nSurfaceObjects"]
    objHeight = params["objHeight"]
    objRadius = params["objRadius"]
    surfObjHeight = params["surfObjHeight"]
    surfObjRadius = params["surfObjRadius"]
    saveFileName = params["saveFileName"]
    loadNetwork = params["loadNetwork"]
    loadDatabase = params["loadDatabase"]
    showSteps = params["showSteps"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)
    tensorflow.random.set_seed(randomSeed)

    # initialize agent and environment
    rlEnv = RlEnvironmentPegsOnDisks(params)
    rlAgent = RlAgent(params)

    # if testing, load previous results
    if loadNetwork:
        rlAgent.LoadQFunction()

    if loadDatabase:
        rlAgent.LoadExperienceDatabase()

    # RUN TEST =======================================================================================

    episodeReturn = []
    episodeTime = []
    timeStepEpsilon = []
    databaseSize = []
    losses = []
    for episode in xrange(nEpisodes):

        startTime = time()

        # place random object in random orientation on table
        rlEnv.MoveHandToHoldingPose()
        rlEnv.PlaceCylinders(nObjects, objHeight, objRadius, False)
        rlEnv.PlaceCylinders(nSurfaceObjects, surfObjHeight, surfObjRadius,
                             True)
        if showSteps: raw_input("Placed objects.")

        R = 0
        holdingDesc = None
        observations = []
        actions = []
        rewards = []
        for t in xrange(tMax):
            #  get the next action
            o, a, overtDesc, epsilon = rlAgent.SenseAndAct(
                holdingDesc, t, rlEnv, episode >= unbiasOnEpisode)
            # perform transition
            holdingDesc, r = rlEnv.Transition(overtDesc)
            # save experiences
            timeStepEpsilon.append(epsilon)
            observations.append(o)
            actions.append(a)
            rewards.append(r)
            R += r

        rlAgent.AddExperienceMonteCarlo(observations, actions, rewards)

        # cleanup episode
        rlEnv.ResetEpisode()
        print("Episode {} had return {}".format(episode, R))

        # training
        if episode % trainEvery == trainEvery - 1:
            losses.append(rlAgent.UpdateQFunctionMonteCarlo())
            rlAgent.SaveQFunction()

        # save results
        episodeReturn.append(R)
        episodeTime.append(time() - startTime)
        databaseSize.append(rlAgent.GetNumberOfExperiences())

        if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1:
            saveData = {
                "episodeReturn": episodeReturn,
                "episodeTime": episodeTime,
                "timeStepEpsilon": timeStepEpsilon,
                "databaseSize": databaseSize,
                "losses": losses
            }
            saveData.update(params)
            savemat(saveFileName, saveData)

        # backup agent data
        if episode == nEpisodes - 1:
            rlAgent.SaveExperienceDatabase()