Esempio n. 1
0
def runOnListFileDPOMDP_unfinished(baseSavePath, listFilePath='DPOMDPsToEval.txt'):
    # For now, can't go back to inprogress ones
    pool = Pool()
    pString = claimRunEnvParamSet_unfinished(listFilePath)
    while pString is not None:
        splitPString = pString.split('/')  # {run}/{env}/{param}
        run = splitPString[0]
        os.makedirs(os.path.join(baseSavePath, run), exist_ok=True)
        envName = splitPString[1]
        params = GDICEParams().fromName(name=splitPString[2])
        try:
            env = gym.make(envName)
        except MemoryError:
            print(envName + ' too large for memory', file=sys.stderr)
            return
        except Exception as e:
            print(envName + ' encountered error in creation', file=sys.stderr)
            print(e, file=sys.stderr)
            return

        wasPartiallyRun, npzFilename = checkIfPartial(envName, params.name)
        prevResults = None
        if wasPartiallyRun:
            print(params.name + ' partially finished for ' + envName + ', loading...', file=sys.stderr)
            prevResults, FSCDist = loadResults(npzFilename)[:2]
        else:
            if params.centralized:
                FSCDist = FiniteStateControllerDistribution(params.numNodes, env.action_space[0].n,
                                                            env.observation_space[0].n)
            else:
                FSCDist = [FiniteStateControllerDistribution(params.numNodes, env.action_space[a].n,
                                                             env.observation_space[a].n) for a in range(env.agents)]
        env.reset()
        try:
            results = runGDICEOnEnvironment(env, FSCDist, params, parallel=pool, results=prevResults, baseDir=os.path.join(baseSavePath, run))
        except MemoryError:
            print(envName + ' too large for parallel processing. Switching to MultiEnv...', file=sys.stderr)
            results = runGDICEOnEnvironment(env, FSCDist, params, parallel=None, results=prevResults, baseDir=os.path.join(baseSavePath, run))
        except Exception as e:
            print(envName + ' encountered error in runnning' + params.name + ', skipping to next param', file=sys.stderr)
            print(e, file=sys.stderr)
            return
        saveResults(os.path.join(os.path.join(baseSavePath, run), 'EndResults'), envName, params, results)

        # Remove from in progress
        registerRunEnvParamSetCompletion_unfinished(pString, listFilePath)
        # Delete the temp results
        try:
            for filename in glob.glob(os.path.join(os.path.join(baseSavePath, run), 'GDICEResults', envName, params.name) + '*'):
                os.remove(filename)
        except:
            return

        # Claim next one
        pString = claimRunEnvParamSet_unfinished(listFilePath)
Esempio n. 2
0
def runGridSearchOnAllEnvDPOMDP(baseSavePath):
    pool = Pool()
    envList, GDICEList = getGridSearchGDICEParams()
    for envStr in envList:
        try:
            env = gym.make(envStr)
        except MemoryError:
            print(envStr + ' too large for memory', file=sys.stderr)
            continue
        except Exception as e:
            print(envStr + ' encountered error in creation, skipping', file=sys.stderr)
            print(e, file=sys.stderr)
            continue
        for params in GDICEList:
            # Skip this permutation if we already have final results
            if checkIfFinished(envStr, params.name, baseDir=baseSavePath)[0]:
                print(params.name +' already finished for ' +envStr+ ', skipping...', file=sys.stderr)
                continue

            wasPartiallyRun, npzFilename = checkIfPartial(envStr, params.name)
            prevResults = None
            if wasPartiallyRun:
                print(params.name + ' partially finished for ' + envStr + ', loading...', file=sys.stderr)
                prevResults, FSCDist = loadResults(npzFilename)[:2]
            else:
                if params.centralized:
                    FSCDist = FiniteStateControllerDistribution(params.numNodes, env.action_space[0].n,
                                                                env.observation_space[0].n)
                else:
                    FSCDist = [FiniteStateControllerDistribution(params.numNodes, env.action_space[a].n,
                                                                 env.observation_space[a].n) for a in range(env.agents)]
            env.reset()
            try:
                results = runGDICEOnEnvironment(env, FSCDist, params, parallel=pool, results=prevResults, baseDir=baseSavePath)
            except MemoryError:
                print(envStr + ' too large for parallel processing. Switching to MultiEnv...', file=sys.stderr)
                results = runGDICEOnEnvironment(env, FSCDist, params, parallel=None, results=prevResults, baseDir=baseSavePath)
            except Exception as e:
                print(envStr + ' encountered error in runnning' + params.name + ', skipping to next param', file=sys.stderr)
                print(e, file=sys.stderr)
                continue

            saveResults(os.path.join(baseSavePath, 'EndResults'), envStr, params, results)
            # Delete the temp results
            try:
                for filename in glob.glob(os.path.join(baseSavePath, 'GDICEResults', envStr, params.name) + '*'):
                    os.remove(filename)
            except:
                continue
Esempio n. 3
0
def runBasicDPOMDP():
    envName = 'DPOMDP-recycling-v0'
    env = gym.make(envName)
    testParams = GDICEParams([10, 10])
    controllers = [FiniteStateControllerDistribution(testParams.numNodes[a], env.action_space[a].n, env.observation_space[a].n) for a in range(env.agents)]
    pool = Pool()
    bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, \
    estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = \
        runGDICEOnEnvironment(env, controllers, testParams, parallel=pool)
Esempio n. 4
0
def runBasic():
    envName = 'POMDP-4x3-episodic-v0'
    env = gym.make(envName)  # Make a gym environment with POMDP-1d-episodic-v0
    testParams = GDICEParams()  # Choose G-DICE parameters with default values
    controllerDistribution = FiniteStateControllerDistribution(testParams.numNodes, env.action_space.n, env.observation_space.n)  # make a controller with 10 nodes, with #actions and observations from environment
    #pool = Pool()  # Use a pool for parallel processing. Max # threads
    pool = None  # use a multiEnv for vectorized processing on computers with low memory or no core access

    # Run GDICE. Return the best average value, its standard deviation,
    # tables of the best deterministic transitions, and the updated distribution of controllers
    bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, \
    estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = \
        runGDICEOnEnvironment(env, controllerDistribution, testParams, parallel=pool)

    # Create a deterministic controller from the tables above
    bestDeterministicController = DeterministicFiniteStateController(bestActionTransitions, bestNodeObservationTransitions)
Esempio n. 5
0
def runDomain(env, testParams):
    controllerDistribution = FiniteStateControllerDistribution(
        testParams.numNodes, env.action_space.n, env.observation_space.n
    )  # make a controller with 10 nodes, with #actions and observations from environment

    # pool = Pool()  # Use a pool for parallel processing. Max # threads
    pool = None  # use a multiEnv for vectorized processing on computers with low memory or no core access

    # Run GDICE. Return the best average value, its standard deviation,
    # tables of the best deterministic transitions, and the updated distribution of controllers
    bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, \
    estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = \
        runGDICEOnEnvironment(env, controllerDistribution, testParams, parallel=pool, envType=1)

    # Create a deterministic controller from the tables above
    bestDeterministicController = DeterministicFiniteStateController(
        bestActionTransitions, bestNodeObservationTransitions)

    return bestDeterministicController
Esempio n. 6
0
# -*- coding: utf-8 -*-

from GDICE_Python.Controllers import FiniteStateControllerDistribution
from GDICE_Python.Parameters import GDICEParams
from GDICE_Python.Algorithms import runGDICEOnEnvironment
from multiprocessing import Pool
import gym

if __name__ == "__main__":
    envName = 'DPOMDP-recycling-v0'
    env = gym.make(envName)
    testParams = GDICEParams([10, 10], centralized=False)
    controllers = [
        FiniteStateControllerDistribution(testParams.numNodes[a],
                                          env.action_space[a].n,
                                          env.observation_space[a].n, True)
        for a in range(env.agents)
    ]
    pool = Pool()
    bestValue, bestValueStdDev, bestActionTransitions, bestNodeObservationTransitions, updatedControllerDistribution, estimatedConvergenceIteration, allValues, allStdDev, bestValueAtEachIteration, bestStdDevAtEachIteration = runGDICEOnEnvironment(
        env, controllers, testParams, parallel=pool)