def Main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== params = loadmat("parameters.mat", squeeze_me=True) randomSeed = params["randomSeed"] tMax = params["tMax"] nEpisodes = params["nEpisodes"] trainEvery = params["trainEvery"] unbiasOnEpisode = params["unbiasOnEpisode"] saveFileName = params["saveFileName"] loadNetwork = params["loadNetwork"] loadDatabase = params["loadDatabase"] showSteps = params["showSteps"] # INITIALIZATION ================================================================================= # set random seeds seed(randomSeed) tensorflow.random.set_seed(randomSeed) # initialize agent and environment rlEnv = RlEnvironmentPegsOnDisks(params) rlAgent = RlAgent(params) # if testing, load previous results if loadNetwork: rlAgent.LoadQFunction() if loadDatabase: rlAgent.LoadExperienceDatabase() # RUN TEST ======================================================================================= episodeReturn = [] nPlacedObjects = [] episodeTime = [] timeStepEpsilon = [] databaseSize = [] losses = [] for episode in xrange(nEpisodes): startTime = time() # place random object in random orientation on table rlEnv.MoveHandToHoldingPose() rlEnv.PlaceObjects(False) rlEnv.PlaceObjects(True) if showSteps: raw_input("Placed objects.") R = 0 nPlaced = 0 holdingDesc = None o = None a = None r = None for t in xrange(tMax): # get a point cloud cloud = rlEnv.GetArtificialCloud() isGrasp = holdingDesc is None # get the next action oo, aa, overtDesc, epsilon = rlAgent.SenseAndAct( holdingDesc, cloud, t, rlEnv, episode >= unbiasOnEpisode) # perform transition holdingDesc, rr = rlEnv.Transition(overtDesc, cloud) # save experience if t > 0: rlAgent.AddExperienceSarsa(o, a, r, oo, aa) o = oo a = aa r = rr # save recorded data R += r if isGrasp: if r < 0: nPlaced -= 1 else: if r > 0: nPlaced += 1 timeStepEpsilon.append(epsilon) # add final experience rlAgent.AddExperienceSarsa(o, a, r, [None] * rlAgent.nLevels, [None] * rlAgent.nLevels) # cleanup episode rlEnv.ResetEpisode() print("Episode {} had return {}".format(episode, R)) # training if episode % trainEvery == trainEvery - 1: losses.append(rlAgent.UpdateQFunctionSarsa()) rlAgent.SaveQFunction() # save results episodeReturn.append(R) nPlacedObjects.append(nPlaced) episodeTime.append(time() - startTime) databaseSize.append(rlAgent.GetNumberOfExperiences()) if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1: saveData = { "episodeReturn": episodeReturn, "nPlacedObjects": nPlacedObjects, "episodeTime": episodeTime, "timeStepEpsilon": timeStepEpsilon, "databaseSize": databaseSize, "losses": losses } saveData.update(params) savemat(saveFileName, saveData) # backup agent data if episode == nEpisodes - 1: rlAgent.SaveExperienceDatabase()
def Main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== params = loadmat("parameters.mat", squeeze_me=True) randomSeed = params["randomSeed"] tMax = params["tMax"] nEpisodes = params["nEpisodes"] trainEvery = params["trainEvery"] unbiasOnEpisode = params["unbiasOnEpisode"] saveFileName = params["saveFileName"] loadNetwork = params["loadNetwork"] loadDatabase = params["loadDatabase"] showSteps = params["showSteps"] # INITIALIZATION ================================================================================= # set random seeds seed(randomSeed) tensorflow.random.set_seed(randomSeed) # initialize agent and environment rlEnv = RlEnvironmentBottlesOnCoasters(params) rlAgent = RlAgent(params) # if testing, load previous results if loadNetwork: rlAgent.LoadQFunction() if loadDatabase: rlAgent.LoadExperienceDatabase() # RUN TEST ======================================================================================= episodeReturn = [] nPlacedObjects = [] nGraspedObjects = [] episodeTime = [] timeStepEpsilon = [] databaseSize = [] losses = [] for episode in xrange(nEpisodes): startTime = time() # place random object in random orientation on table rlEnv.MoveHandToHoldingPose() rlEnv.PlaceObjects(False) rlEnv.PlaceObjects(True) if showSteps: raw_input("Placed objects.") R = 0 nPlaced = 0 nGrasped = 0 holdingDesc = None observations = [] actions = [] rewards = [] isGrasp = [] for t in xrange(tMax): # get a point cloud cloud = rlEnv.GetArtificialCloud() #rlEnv.PlotCloud(cloud) #if showSteps: raw_input("Acquired cloud.") isGrasp.append(holdingDesc is None) # get the next action o, a, overtDesc, epsilon = rlAgent.SenseAndAct( holdingDesc, cloud, t, rlEnv, episode >= unbiasOnEpisode) # perform transition holdingDesc, r = rlEnv.Transition(overtDesc, cloud) # save experiences timeStepEpsilon.append(epsilon) observations.append(o) actions.append(a) rewards.append(r) R += r # compute task success -- number of objects placed if isGrasp[-1]: if holdingDesc is not None: nGrasped += 1 if r < 0: nPlaced -= 1 else: if r == 1: nPlaced += 1 rlAgent.AddExperienceMonteCarlo(observations, actions, rewards, isGrasp) # cleanup episode rlEnv.ResetEpisode() print("Episode {} had return {}".format(episode, R)) # training if episode % trainEvery == trainEvery - 1: losses.append(rlAgent.UpdateQFunctionMonteCarlo()) rlAgent.SaveQFunction() # save results episodeReturn.append(R) nPlacedObjects.append(nPlaced) nGraspedObjects.append(nGrasped) episodeTime.append(time() - startTime) databaseSize.append(rlAgent.GetNumberOfExperiences()) if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1: saveData = { "episodeReturn": episodeReturn, "nPlacedObjects": nPlacedObjects, "nGraspedObjects": nGraspedObjects, "episodeTime": episodeTime, "timeStepEpsilon": timeStepEpsilon, "databaseSize": databaseSize, "losses": losses } saveData.update(params) savemat(saveFileName, saveData) # backup agent data if episode == nEpisodes - 1: rlAgent.SaveExperienceDatabase()
def Main(): '''Entrypoint to the program.''' # PARAMETERS ===================================================================================== params = loadmat("parameters.mat", squeeze_me=True) randomSeed = params["randomSeed"] tMax = params["tMax"] nEpisodes = params["nEpisodes"] trainEvery = params["trainEvery"] unbiasOnEpisode = params["unbiasOnEpisode"] nObjects = params["nObjects"] nSurfaceObjects = params["nSurfaceObjects"] objHeight = params["objHeight"] objRadius = params["objRadius"] surfObjHeight = params["surfObjHeight"] surfObjRadius = params["surfObjRadius"] saveFileName = params["saveFileName"] loadNetwork = params["loadNetwork"] loadDatabase = params["loadDatabase"] showSteps = params["showSteps"] # INITIALIZATION ================================================================================= # set random seeds seed(randomSeed) tensorflow.random.set_seed(randomSeed) # initialize agent and environment rlEnv = RlEnvironmentPegsOnDisks(params) rlAgent = RlAgent(params) # if testing, load previous results if loadNetwork: rlAgent.LoadQFunction() if loadDatabase: rlAgent.LoadExperienceDatabase() # RUN TEST ======================================================================================= episodeReturn = [] episodeTime = [] timeStepEpsilon = [] databaseSize = [] losses = [] for episode in xrange(nEpisodes): startTime = time() # place random object in random orientation on table rlEnv.MoveHandToHoldingPose() rlEnv.PlaceCylinders(nObjects, objHeight, objRadius, False) rlEnv.PlaceCylinders(nSurfaceObjects, surfObjHeight, surfObjRadius, True) if showSteps: raw_input("Placed objects.") R = 0 holdingDesc = None observations = [] actions = [] rewards = [] for t in xrange(tMax): # get the next action o, a, overtDesc, epsilon = rlAgent.SenseAndAct( holdingDesc, t, rlEnv, episode >= unbiasOnEpisode) # perform transition holdingDesc, r = rlEnv.Transition(overtDesc) # save experiences timeStepEpsilon.append(epsilon) observations.append(o) actions.append(a) rewards.append(r) R += r rlAgent.AddExperienceMonteCarlo(observations, actions, rewards) # cleanup episode rlEnv.ResetEpisode() print("Episode {} had return {}".format(episode, R)) # training if episode % trainEvery == trainEvery - 1: losses.append(rlAgent.UpdateQFunctionMonteCarlo()) rlAgent.SaveQFunction() # save results episodeReturn.append(R) episodeTime.append(time() - startTime) databaseSize.append(rlAgent.GetNumberOfExperiences()) if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1: saveData = { "episodeReturn": episodeReturn, "episodeTime": episodeTime, "timeStepEpsilon": timeStepEpsilon, "databaseSize": databaseSize, "losses": losses } saveData.update(params) savemat(saveFileName, saveData) # backup agent data if episode == nEpisodes - 1: rlAgent.SaveExperienceDatabase()