def runOnListFileDPOMDP_unfinished(baseSavePath, listFilePath='DPOMDPsToEval.txt'): # For now, can't go back to inprogress ones pool = Pool() pString = claimRunEnvParamSet_unfinished(listFilePath) while pString is not None: splitPString = pString.split('/') # {run}/{env}/{param} run = splitPString[0] os.makedirs(os.path.join(baseSavePath, run), exist_ok=True) envName = splitPString[1] params = GDICEParams().fromName(name=splitPString[2]) try: env = gym.make(envName) except MemoryError: print(envName + ' too large for memory', file=sys.stderr) return except Exception as e: print(envName + ' encountered error in creation', file=sys.stderr) print(e, file=sys.stderr) return wasPartiallyRun, npzFilename = checkIfPartial(envName, params.name) prevResults = None if wasPartiallyRun: print(params.name + ' partially finished for ' + envName + ', loading...', file=sys.stderr) prevResults, FSCDist = loadResults(npzFilename)[:2] else: if params.centralized: FSCDist = FiniteStateControllerDistribution(params.numNodes, env.action_space[0].n, env.observation_space[0].n) else: FSCDist = [FiniteStateControllerDistribution(params.numNodes, env.action_space[a].n, env.observation_space[a].n) for a in range(env.agents)] env.reset() try: results = runGDICEOnEnvironment(env, FSCDist, params, parallel=pool, results=prevResults, baseDir=os.path.join(baseSavePath, run)) except MemoryError: print(envName + ' too large for parallel processing. Switching to MultiEnv...', file=sys.stderr) results = runGDICEOnEnvironment(env, FSCDist, params, parallel=None, results=prevResults, baseDir=os.path.join(baseSavePath, run)) except Exception as e: print(envName + ' encountered error in runnning' + params.name + ', skipping to next param', file=sys.stderr) print(e, file=sys.stderr) return saveResults(os.path.join(os.path.join(baseSavePath, run), 'EndResults'), envName, params, results) # Remove from in progress registerRunEnvParamSetCompletion_unfinished(pString, listFilePath) # Delete the temp results try: for filename in glob.glob(os.path.join(os.path.join(baseSavePath, run), 'GDICEResults', envName, params.name) + '*'): os.remove(filename) except: return # Claim next one pString = claimRunEnvParamSet_unfinished(listFilePath)
def runGridSearchOnAllEnvDPOMDP(baseSavePath): pool = Pool() envList, GDICEList = getGridSearchGDICEParams() for envStr in envList: try: env = gym.make(envStr) except MemoryError: print(envStr + ' too large for memory', file=sys.stderr) continue except Exception as e: print(envStr + ' encountered error in creation, skipping', file=sys.stderr) print(e, file=sys.stderr) continue for params in GDICEList: # Skip this permutation if we already have final results if checkIfFinished(envStr, params.name, baseDir=baseSavePath)[0]: print(params.name +' already finished for ' +envStr+ ', skipping...', file=sys.stderr) continue wasPartiallyRun, npzFilename = checkIfPartial(envStr, params.name) prevResults = None if wasPartiallyRun: print(params.name + ' partially finished for ' + envStr + ', loading...', file=sys.stderr) prevResults, FSCDist = loadResults(npzFilename)[:2] else: if params.centralized: FSCDist = FiniteStateControllerDistribution(params.numNodes, env.action_space[0].n, env.observation_space[0].n) else: FSCDist = [FiniteStateControllerDistribution(params.numNodes, env.action_space[a].n, env.observation_space[a].n) for a in range(env.agents)] env.reset() try: results = runGDICEOnEnvironment(env, FSCDist, params, parallel=pool, results=prevResults, baseDir=baseSavePath) except MemoryError: print(envStr + ' too large for parallel processing. Switching to MultiEnv...', file=sys.stderr) results = runGDICEOnEnvironment(env, FSCDist, params, parallel=None, results=prevResults, baseDir=baseSavePath) except Exception as e: print(envStr + ' encountered error in runnning' + params.name + ', skipping to next param', file=sys.stderr) print(e, file=sys.stderr) continue saveResults(os.path.join(baseSavePath, 'EndResults'), envStr, params, results) # Delete the temp results try: for filename in glob.glob(os.path.join(baseSavePath, 'GDICEResults', envStr, params.name) + '*'): os.remove(filename) except: continue
def runBasicDPOMDP(): envName = 'DPOMDP-recycling-v0' env = gym.make(envName) testParams = GDICEParams([10, 10]) controllers = [FiniteStateControllerDistribution(testParams.numNodes[a], env.action_space[a].n, env.observation_space[a].n) for a in range(env.agents)] pool = Pool() bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, \ estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = \ runGDICEOnEnvironment(env, controllers, testParams, parallel=pool)
def runBasic(): envName = 'POMDP-4x3-episodic-v0' env = gym.make(envName) # Make a gym environment with POMDP-1d-episodic-v0 testParams = GDICEParams() # Choose G-DICE parameters with default values controllerDistribution = FiniteStateControllerDistribution(testParams.numNodes, env.action_space.n, env.observation_space.n) # make a controller with 10 nodes, with #actions and observations from environment #pool = Pool() # Use a pool for parallel processing. Max # threads pool = None # use a multiEnv for vectorized processing on computers with low memory or no core access # Run GDICE. Return the best average value, its standard deviation, # tables of the best deterministic transitions, and the updated distribution of controllers bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, \ estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = \ runGDICEOnEnvironment(env, controllerDistribution, testParams, parallel=pool) # Create a deterministic controller from the tables above bestDeterministicController = DeterministicFiniteStateController(bestActionTransitions, bestNodeObservationTransitions)
def runDomain(env, testParams): controllerDistribution = FiniteStateControllerDistribution( testParams.numNodes, env.action_space.n, env.observation_space.n ) # make a controller with 10 nodes, with #actions and observations from environment # pool = Pool() # Use a pool for parallel processing. Max # threads pool = None # use a multiEnv for vectorized processing on computers with low memory or no core access # Run GDICE. Return the best average value, its standard deviation, # tables of the best deterministic transitions, and the updated distribution of controllers bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, \ estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = \ runGDICEOnEnvironment(env, controllerDistribution, testParams, parallel=pool, envType=1) # Create a deterministic controller from the tables above bestDeterministicController = DeterministicFiniteStateController( bestActionTransitions, bestNodeObservationTransitions) return bestDeterministicController
# -*- coding: utf-8 -*- from GDICE_Python.Controllers import FiniteStateControllerDistribution from GDICE_Python.Parameters import GDICEParams from GDICE_Python.Algorithms import runGDICEOnEnvironment from multiprocessing import Pool import gym if __name__ == "__main__": envName = 'DPOMDP-recycling-v0' env = gym.make(envName) testParams = GDICEParams([10, 10], centralized=False) controllers = [ FiniteStateControllerDistribution(testParams.numNodes[a], env.action_space[a].n, env.observation_space[a].n, True) for a in range(env.agents) ] pool = Pool() bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = runGDICEOnEnvironment( env, controllers, testParams, parallel=pool)