def main(): independentVariables = OrderedDict() # independentVariables['bufferSize'] = [1000, 5000, 10000, 20000] # independentVariables['layerWidth'] = [[32], [32, 32], [64, 64, 64], [128, 128, 128, 128]] # independentVariables['varianceDiscount'] = [0.995, 0.9995, 0.99995] independentVariables['bufferSize'] = [1000, 5000] independentVariables['layerWidth'] = [(32, 32), (32, 32, 32)] independentVariables['varianceDiscount'] = [0.995, 0.9995] modelSaveDirectory = "../trainedDDPGModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension) evaluate = EvaluateNoiseAndMemorySize(getSavePath, saveModel=False) levelNames = list(independentVariables.keys()) levelValues = list(independentVariables.values()) levelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=levelIndex) resultDF = toSplitFrame.groupby(levelNames).apply(evaluate) resultPath = os.path.join( dirName, '..', '..', 'plots', str(list(independentVariables.keys())) + '.pickle') saveToPickle(resultDF, resultPath) nCols = len(independentVariables['bufferSize']) nRows = len(independentVariables['layerWidth']) numplot = 1 axs = [] figure = plt.figure(dpi=200) figure.set_size_inches(7.5, 6) figure.suptitle('buffer: ' + str(independentVariables['bufferSize']) + ', layers: ' + str(independentVariables['layerWidth']) + ', varDiscount: ' + str(independentVariables['varianceDiscount']), fontsize=8) for layerWidth, outterSubDf in resultDF.groupby('layerWidth'): for bufferSize, innerSubDf in outterSubDf.groupby('bufferSize'): subplot = figure.add_subplot(nRows, nCols, numplot) axs.append(subplot) numplot += 1 plt.ylim([-800, 400]) innerSubDf.T.plot(ax=subplot) subplot.set_title('layerWidth = ' + str(layerWidth) + ' bufferSize = ' + str(bufferSize), fontsize=5) subplot.set_ylabel('MeanEpsReward', fontsize=5) subplot.set_xlabel('Episode', fontsize=5) subplot.tick_params(axis='both', which='major', labelsize=5) subplot.tick_params(axis='both', which='minor', labelsize=5) plt.legend(loc='best', prop={'size': 5}) plotPath = os.path.join(dirName, '..', '..', 'plots') plt.savefig(os.path.join(plotPath, str(list(independentVariables.keys())))) plt.show()
def main(): statesdim = env.observation_space.shape[0] actiondim = env.action_space.n fixedParameters = OrderedDict() fixedParameters['batchsize'] = 128 fixedParameters['maxEpisode'] = 400 fixedParameters['gamma'] = 0.9 fixedParameters['numberlayers'] = 20 fixedParameters['stateDim'] = statesdim fixedParameters['actionDim'] = actiondim fixedParameters['replaceiter'] = 100 fixedParameters['initepsilon'] = 0.9 fixedParameters['minepsilon'] = 0.1 fixedParameters['epsilondec'] = 0.001 learningRate = [0.1,0.01,0.001] buffersize = [1000,5000,20000] modelSaveDirectory = "/path/to/logs/trainedDQNModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension) evaluateModel = EvaluateLearningrateAndbatchsize(fixedParameters, getSavePath, saveModel= True) levelValues = [learningRate,buffersize] levelNames = ["learningRate", "buffersize"] modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index = modelIndex) modelResultDf = toSplitFrame.groupby(levelNames).apply(evaluateModel) plotRowNum = len(learningRate) plotColNum = len(buffersize) plotCounter = 1 axs = [] figure = plt.figure(figsize=(12, 10)) for keyCol, outterSubDf in modelResultDf.groupby('buffersize'): for keyRow, innerSubDf in outterSubDf.groupby("learningRate"): subplot = figure.add_subplot(plotRowNum, plotColNum, plotCounter) axs.append(subplot) plotCounter += 1 plt.ylim([0, 300]) innerSubDf.T.plot(ax=subplot) dirName = os.path.dirname(__file__) plotPath = os.path.join(dirName,'plots') plt.savefig(os.path.join(plotPath, 'CarpoleEvaluation')) plt.show()
def main(): independentVariables = OrderedDict() independentVariables['actionDim'] = [3, 7, 11, 15] independentVariables['epsilonIncrease'] = [1e-5, 1e-4, 2e-4, 1e-3] # independentVariables['actionDim'] = [3, 15] # independentVariables['epsilonIncrease'] = [1e-5, 1e-3] modelSaveDirectory = "../trainedDQNModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension) evaluate = EvaluateNoiseAndMemorySize(getSavePath, saveModel=False) levelNames = list(independentVariables.keys()) levelValues = list(independentVariables.values()) levelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=levelIndex) resultDF = toSplitFrame.groupby(levelNames).apply(evaluate) nCols = len(independentVariables['actionDim']) nRows = len(independentVariables['epsilonIncrease']) numplot = 1 axs = [] figure = plt.figure(figsize=(12, 10)) for keyCol, outterSubDf in resultDF.groupby('epsilonIncrease'): for keyRow, innerSubDf in outterSubDf.groupby('actionDim'): subplot = figure.add_subplot(nRows, nCols, numplot) axs.append(subplot) numplot += 1 plt.ylim([-1600, 50]) innerSubDf.T.plot(ax=subplot) dirName = os.path.dirname(__file__) plotPath = os.path.join(dirName, '..', 'plots') plt.savefig(os.path.join(plotPath, 'pendulumEvaluation')) plt.show()
def simuliateOneParameter(parameterDict): wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) wolvesID = [0,1] sheepsID = [2] blocksID = [3] numWolves = len(wolvesID) numSheeps = len(sheepsID) numBlocks = len(blocksID) sheepMaxSpeed = 1.3 wolfMaxSpeed =1.0 blockMaxSpeed = None agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks entitiesMovableList = [True]* numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) makePropertyList=MakePropertyList(transferNumberListToStr) #changeCollisionReleventParameter dmin=parameterDict['dmin'] dmax=parameterDict['dmax'] width=parameterDict['width'] midpoint=parameterDict['midpoint'] power=parameterDict['power'] timeconst=parameterDict['timeconst'] dampratio=parameterDict['dampratio'] geomIds=[1,2] keyNameList=['@solimp','@solref'] valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds) collisionParameter=makePropertyList(geomIds,keyNameList,valueList) #changeSize # geomIds=[1,2] # keyNameList=['@size'] # valueList=[[[0.075,0.075]],[[0.1,0.1]]] # sizeParameter=makePropertyList(geomIds,keyNameList,valueList) #load env xml and change some geoms' property # physicsDynamicsPath=os.path.join(dirName,'multiAgentcollision.xml') physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','multiAgentcollision.xml') with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) print(envXmlDict) envXmlPropertyDictList=[collisionParameter] changeEnvXmlPropertFuntionyList=[changeGeomProperty] for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList): envXmlDict=changeXmlProperty(envXmlDict,propertyDict) envXml=xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # MDP function qPosInit = [0, 0]*numAgents qVelInit = [0, 0]*numAgents qVelInitNoise = 0 qPosInitNoise = 1 # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise) fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks) blocksState=[[0,0,0,0]] reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState) isTerminal = lambda state: False numSimulationFrames = 1 visualize=False # physicsViewer=None dt=0.1 damping=2.5 transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,visualize,isTerminal) maxRunningSteps = 100 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] initObsForParams = observe(reset()) obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))] worldDim = 2 evalNum=1 trajectorySaveExtension = '.pickle' fixedParameters = {'isMujoco': 1,'isCylinder':1,'evalNum':evalNum} trajectoriesSaveDirectory=trajSavePath = os.path.join(dirName,'..','trajectory') generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(parameterDict) if not os.path.isfile(trajectorySavePath): trajList =list() for i in range(evalNum): # policy =lambda state: [[-3,0] for agent in range(numAgents)] np.random.seed(i) # policy =lambda state: [np.random.uniform(-5,5,2) for agent in range(numAgents)]sss # policy =lambda state: [[0,1] for agent in range(numAgents)] policy =lambda state: [[1,0] ] # policy =lambda state: [[np.random.uniform(0,1,1),0] ,[np.random.uniform(-1,0,1),0] ] traj = sampleTrajectory(policy) # print(i,'traj',[state[1] for state in traj[:2]]) # print(traj) trajList.append( traj) # saveTraj saveTraj = True if saveTraj: # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle') saveToPickle(trajList, trajectorySavePath) # visualize # physicsViewer.render() visualize = True if visualize: entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) render(trajList)
def main(): numAgents = 2 stateDim = numAgents * 2 actionLow = -1 actionHigh = 1 actionBound = (actionHigh - actionLow) / 2 actionDim = 2 buildActorModel = BuildActorModel(stateDim, actionDim, actionBound) actorLayerWidths = [64] actorWriter, actorModel = buildActorModel(actorLayerWidths) buildCriticModel = BuildCriticModel(stateDim, actionDim) criticLayerWidths = [64] criticWriter, criticModel = buildCriticModel(criticLayerWidths) trainCriticBySASRQ = TrainCriticBySASRQ(learningRateCritic, gamma, criticWriter) trainCritic = TrainCritic(actByPolicyTarget, evaluateCriticTarget, trainCriticBySASRQ) trainActorFromGradients = TrainActorFromGradients(learningRateActor, actorWriter) trainActorOneStep = TrainActorOneStep(actByPolicyTrain, trainActorFromGradients, getActionGradients) trainActor = TrainActor(trainActorOneStep) paramUpdateInterval = 1 updateParameters = UpdateParameters(paramUpdateInterval, tau) modelList = [actorModel, criticModel] actorModel, criticModel = resetTargetParamToTrainParam(modelList) trainModels = TrainDDPGModels(updateParameters, trainActor, trainCritic, actorModel, criticModel) noiseInitVariance = 1 varianceDiscount = .9995 getNoise = GetExponentialDecayGaussNoise(noiseInitVariance, varianceDiscount, noiseDecayStartStep) actOneStepWithNoise = ActDDPGOneStep(actionLow, actionHigh, actByPolicyTrain, actorModel, getNoise) sampleFromMemory = SampleFromMemory(minibatchSize) learnFromBuffer = LearnFromBuffer(learningStartBufferSize, sampleFromMemory, trainModels) sheepId = 0 wolfId = 1 getSheepPos = GetAgentPosFromState(sheepId) getWolfPos = GetAgentPosFromState(wolfId) wolfSpeed = 1 wolfPolicy = HeatSeekingContinuousDeterministicPolicy( getWolfPos, getSheepPos, wolfSpeed) # wolfPolicy = lambda state: (0, 0) xBoundary = (0, 20) yBoundary = (0, 20) stayWithinBoundary = StayWithinBoundary(xBoundary, yBoundary) physicalTransition = TransitForNoPhysics(getIntendedNextState, stayWithinBoundary) transit = TransitWithSingleWolf(physicalTransition, wolfPolicy) sheepAliveBonus = 1 / maxTimeStep sheepTerminalPenalty = 20 killzoneRadius = 1 isTerminal = IsTerminal(getWolfPos, getSheepPos, killzoneRadius) getBoundaryPunishment = GetBoundaryPunishment(xBoundary, yBoundary, sheepIndex=0, punishmentVal=10) rewardSheep = RewardFunctionCompete(sheepAliveBonus, sheepTerminalPenalty, isTerminal) getReward = RewardSheepWithBoundaryHeuristics(rewardSheep, getIntendedNextState, getBoundaryPunishment, getSheepPos) sampleOneStep = SampleOneStep(transit, getReward) runDDPGTimeStep = RunTimeStep(actOneStepWithNoise, sampleOneStep, learnFromBuffer) # reset = Reset(xBoundary, yBoundary, numAgents) # reset = lambda: np.array([10, 3, 15, 8]) #all [-1, -1] action # reset = lambda: np.array([15, 8, 10, 3]) # all [1. 1.] # reset = lambda: np.array([15, 10, 10, 10]) reset = lambda: np.array([10, 10, 15, 5]) runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal) ddpg = RunAlgorithm(runEpisode, maxEpisode) replayBuffer = deque(maxlen=int(bufferSize)) meanRewardList, trajectory = ddpg(replayBuffer) trainedActorModel, trainedCriticModel = trainModels.getTrainedModels() modelIndex = 0 actorFixedParam = {'actorModel': modelIndex} criticFixedParam = {'criticModel': modelIndex} parameters = { 'wolfSpeed': wolfSpeed, 'dimension': actionDim, 'maxEpisode': maxEpisode, 'maxTimeStep': maxTimeStep, 'minibatchSize': minibatchSize, 'gamma': gamma, 'learningRateActor': learningRateActor, 'learningRateCritic': learningRateCritic } modelSaveDirectory = "../trainedDDPGModels" modelSaveExtension = '.ckpt' getActorSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, actorFixedParam) getCriticSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, criticFixedParam) savePathActor = getActorSavePath(parameters) savePathCritic = getCriticSavePath(parameters) with actorModel.as_default(): saveVariables(trainedActorModel, savePathActor) with criticModel.as_default(): saveVariables(trainedCriticModel, savePathCritic) plotResult = True if plotResult: plt.plot(list(range(maxEpisode)), meanRewardList) plt.show()
def main(): statedim = env.observation_space.shape[0] actiondim = env.action_space.shape[0] actionHigh = env.action_space.high actionLow = env.action_space.low actionBound = (actionHigh - actionLow)/2 replaybuffer = deque(maxlen=buffersize) paramUpdateFrequency = 1 totalrewards = [] meanreward = [] totalreward = [] buildActorModel = BuildActorModel(statedim, actiondim, actionBound) actorWriter, actorModel = buildActorModel(actornumberlayers) buildCriticModel = BuildCriticModel(statedim, actiondim) criticWriter, criticModel = buildCriticModel(criticnumberlayers) trainCritic = TrainCritic(criticlearningRate, gamma, criticWriter) trainActor = TrainActor(actorlearningRate, actorWriter) updateParameters = UpdateParameters(tau,paramUpdateFrequency) actorModel= ReplaceParameters(actorModel) criticModel= ReplaceParameters(criticModel) trainddpgModels = TrainDDPGModels(updateParameters, trainActor, trainCritic, actorModel, criticModel) getnoise = GetNoise(noiseDecay,minVar,noiseDacayStep) getnoiseaction = GetNoiseAction(actorModel,actionLow, actionHigh) learn = Learn(buffersize,batchsize,trainddpgModels,actiondim) runtime = 0 trajectory = [] noisevar = initnoisevar for episode in range(EPISODE): state = env.reset() rewards = 0 for i in range(maxTimeStep): env.render() noise,noisevar = getnoise(runtime,noisevar) noiseaction = getnoiseaction(state,noise) nextstate,reward,done,info = env.step(noiseaction) learn(replaybuffer,state, noiseaction, nextstate,reward) trajectory.append((state, noiseaction, nextstate,reward)) rewards += reward state = nextstate runtime += 1 print(actionHigh,actionLow) if i == maxTimeStep-1: totalrewards.append(rewards) totalreward.append(rewards) print('episode: ',episode,'reward:',rewards, 'noisevar',noisevar) if episode % 100 == 0: meanreward.append(np.mean(totalreward)) print('episode: ',episode,'meanreward:',np.mean(totalreward)) totalreward = [] plt.plot(range(EPISODE),totalrewards) plt.xlabel('episode') plt.ylabel('rewards') plt.show() # save Model modelIndex = 0 actorFixedParam = {'actorModel': modelIndex} criticFixedParam = {'criticModel': modelIndex} parameters = {'env': Env_name, 'Eps': EPISODE, 'batchsize': batchsize,'buffersize': buffersize,'maxTimeStep':maxTimeStep, 'gamma': gamma, 'actorlearningRate': actorlearningRate, 'criticlearningRate': criticlearningRate, 'tau': tau, 'noiseDecay': noiseDecay, 'minVar': minVar, 'initnoisevar': initnoisevar} modelSaveDirectory = "/path/to/logs/trainedDDPGModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, actorFixedParam) getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, criticFixedParam) savePathDQN = getSavePath(parameters) with actorModel.as_default(): saveVariables(actorModel, savePathDQN) with criticModel.as_default(): saveVariables(criticModel, savePathDQN) dirName = os.path.dirname(__file__) trajectoryPath = os.path.join(dirName,'trajectory', 'HopperTrajectory.pickle') saveToPickle(trajectory, trajectoryPath)
def main(): manipulatedVariables = OrderedDict() #solimp manipulatedVariables['dmin'] = [0.9] manipulatedVariables['dmax'] = [0.9999] manipulatedVariables['width'] = [0.001] manipulatedVariables['midpoint'] = [0.5]#useless manipulatedVariables['power'] = [1]#useless #solref manipulatedVariables['timeconst'] = [0.02,0.4]#[0.1,0.2,0.4,0.6] manipulatedVariables['dampratio'] = [0.5] # timeconst: 0.1-0.2 # dmin: 0.5-dimax # dmax:0.9-0.9999 # dampratio:0.3-0.8 productedValues = it.product(*[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) parametersAllCondtion = [dict(list(specificValueParameter)) for specificValueParameter in productedValues] for parameterOneCondiiton in parametersAllCondtion: print(parameterOneCondiiton) simuliateOneParameter(parameterOneCondiiton) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # save evaluation trajectories dirName = os.path.dirname(__file__) dataFolderName=os.path.join(dirName,'..') trajectoryDirectory = os.path.join(dataFolderName, 'trajectory') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) # originEnvTrajSavePath =os.path.join(dirName,'traj','evaluateCollision','collisionMoveOriginBaseLine.pickle') originEnvTrajSavePath =os.path.join(dirName,'..','trajectory','collisionMoveOriginBaseLine.pickle') originTraj=loadFromPickle(originEnvTrajSavePath)[0] originTrajdata=[[],[],[],[]] originTrajdata[0]=[s[0][0][:2] for s in originTraj] originTrajdata[1]=[s[0][1][:2] for s in originTraj] originTrajdata[2]=[s[0][0][2:] for s in originTraj] originTrajdata[3]=[s[0][1][2:] for s in originTraj] originTraj=loadFromPickle(originEnvTrajSavePath) trajectoryExtension = '.pickle' fixedParameters = {'isMujoco': 1,'isCylinder':1} generateTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, fixedParameters) getparameter= GetSavePath('', '', {}) def parseTraj(traj): x=[s[0] for s in traj] y=[s[1] for s in traj] return x,y for i, parameterOneCondiiton in enumerate(parametersAllCondtion): mujocoTrajdata=[[],[],[],[]] trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton) mujocoTraj=loadFromPickle(trajectorySavePath) mujocoTrajdata[0]=[s[0][0][:2] for s in mujocoTraj] mujocoTrajdata[1]=[s[0][1][:2] for s in mujocoTraj] mujocoTrajdata[2]=[s[0][0][2:] for s in mujocoTraj] mujocoTrajdata[3]=[s[0][1][2:] for s in mujocoTraj] fig = plt.figure(i) numRows = 2 numColumns = 2 plotCounterNum = numRows*numColumns subName=['wolf0Pos(vs.Sheep)','wolf1Pos(vs.None)','wolf0Vel(vs.Sheep)','wolf1Vel(vs.None)'] for plotCounter in range(plotCounterNum): axForDraw = fig.add_subplot(numRows, numColumns, plotCounter+1) axForDraw.set_title(subName[plotCounter]) # axForDraw.set_ylim(-0.5, 1) # axForDraw.set_xlim(-1, 1) originX,originY=parseTraj(originTrajdata[plotCounter]) mujocoX,mujocoY=parseTraj(mujocoTrajdata[plotCounter]) plt.scatter(range(len(originX)),originX,s=5,c='red',marker='x',label='origin') plt.scatter(range(len(mujocoX)),mujocoX,s=5,c='blue',marker='v',label='mujoco') numSimulationFrames=10 plt.suptitle(getparameter(parameterOneCondiiton)+'numSimulationFrames={}'.format(numSimulationFrames)) # plt.suptitle('OriginEnv') plt.legend(loc='best') plt.show()
def main(): manipulatedVariables = OrderedDict() #solimp # manipulatedVariables['dmin'] = [0.9] # manipulatedVariables['dmax'] = [0.9999] # manipulatedVariables['width'] = [0.001] # manipulatedVariables['midpoint'] = [0.5]#useless # manipulatedVariables['power'] = [1]#useless #solref manipulatedVariables['timeconst'] = [0.5] manipulatedVariables['dampratio'] = [0.2] manipulatedVariables['damping'] = [0.25, 2.5, 5] eavluateParmetersList = ['dampratio', 'damping'] lineParameter = ['timeconst'] prametersToDrop = list( set(manipulatedVariables.keys()) - set(eavluateParmetersList + lineParameter)) print(prametersToDrop, manipulatedVariables) # timeconst: 0.1-0.2 # dmin: 0.5-dimax # dmax:0.9-0.9999 # dampratio:0.3-0.8 productedValues = it.product( *[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) parametersAllCondtion = [ dict(list(specificValueParameter)) for specificValueParameter in productedValues ] evalNum = 1 randomSeed = 1542 dt = 0.02 #0.05 dirName = os.path.dirname(__file__) dataFolderName = os.path.join(dirName, '..') trajectoryDirectory = os.path.join(dataFolderName, 'trajectory', 'Linedt={}'.format(dt)) if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) for parameterOneCondiiton in parametersAllCondtion: # print(parameterOneCondiiton,evalNum,randomSeed) simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed, dt) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # save evaluation trajectories trajectoryExtension = '.pickle' trajectoryFixedParameters = { 'isMujoco': 1, 'isCylinder': 1, 'evalNum': evalNum, 'randomSeed': randomSeed } generateTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, trajectoryFixedParameters) originEnvTrajSavePath = os.path.join(dirName, '..', 'trajectory', 'LineMoveOriginBaseLine.pickle') originTraj = loadFromPickle(originEnvTrajSavePath)[0] originTrajdata = [[], [], [], []] originTrajdata[0] = [s[0][0][:2] for s in originTraj] originTrajdata[1] = [s[0][0][2:] for s in originTraj] originTraj = loadFromPickle(originEnvTrajSavePath) fixedOriginEnvTrajSavePath = os.path.join(dirName, '..', 'trajectory', 'LineMoveFixedBaseLine.pickle') fixedOriginTraj = loadFromPickle(fixedOriginEnvTrajSavePath)[0] fixedOriginTrajdata = [[], [], [], []] fixedOriginTrajdata[0] = [s[0][0][:2] for s in fixedOriginTraj] fixedOriginTrajdata[1] = [s[0][0][2:] for s in fixedOriginTraj] fixedOriginTraj = loadFromPickle(originEnvTrajSavePath) # trajectoryExtension = '.pickle' # fixedParameters = {'isMujoco': 1,'isCylinder':1} # generateTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, fixedParameters) getparameter = GetSavePath('', '', {}) def parseTraj(traj): # print(traj) x = [s[0] for s in traj] y = [s[1] for s in traj] return x, y for i, parameterOneCondiiton in enumerate(parametersAllCondtion): mujocoTrajdata = [[], [], [], []] trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton) mujocoTraj = loadFromPickle(trajectorySavePath) mujocoTrajdata[0] = [s[0][0][:2] for s in mujocoTraj] # mujocoTrajdata[1]=[s[0][1][:2] for s in mujocoTraj] mujocoTrajdata[1] = [s[0][0][2:] for s in mujocoTraj] # mujocoTrajdata[3]=[s[0][1][2:] for s in mujocoTraj] fig = plt.figure(i) numRows = 1 numColumns = 2 plotCounterNum = numRows * numColumns subName = ['Pos', 'Vel'] for plotCounter in range(plotCounterNum): axForDraw = fig.add_subplot(numRows, numColumns, plotCounter + 1) axForDraw.set_title(subName[plotCounter]) # axForDraw.set_ylim(-0.5, 1) # axForDraw.set_xlim(-1, 1) originX, originY = parseTraj(originTrajdata[plotCounter]) mujocoX, mujocoY = parseTraj(mujocoTrajdata[plotCounter]) fixedOriginX, fixedOriginY = parseTraj( fixedOriginTrajdata[plotCounter]) plt.scatter(range(len(originX)), originX, s=5, c='red', marker='x', label='origin') plt.scatter(range(len(fixedOriginX)), fixedOriginX, s=5, c='green', marker='x', label='fixedOrigin') # print(mujocoX)s plt.scatter(range(len(mujocoX)), mujocoX, s=5, c='blue', marker='v', label='mujoco') numSimulationFrames = int(0.1 / dt) plt.suptitle( getparameter(parameterOneCondiiton) + 'numSimulationFrames={}'.format(numSimulationFrames)) # plt.suptitle('OriginEnv') plt.legend(loc='best') plt.show()
def main(): stateDim = env.observation_space.shape[0] actionDim = env.action_space.shape[0] actionHigh = env.action_space.high actionLow = env.action_space.low actionBound = (actionHigh - actionLow) / 2 buildActorModel = BuildActorModel(stateDim, actionDim, actionBound) actorLayerWidths = [30] actorWriter, actorModel = buildActorModel(actorLayerWidths) buildCriticModel = BuildCriticModel(stateDim, actionDim) criticLayerWidths = [30] criticWriter, criticModel = buildCriticModel(criticLayerWidths) trainCriticBySASRQ = TrainCriticBySASRQ(learningRateCritic, gamma, criticWriter) trainCritic = TrainCritic(actByPolicyTarget, evaluateCriticTarget, trainCriticBySASRQ) trainActorFromGradients = TrainActorFromGradients(learningRateActor, actorWriter) trainActorOneStep = TrainActorOneStep(actByPolicyTrain, trainActorFromGradients, getActionGradients) trainActor = TrainActor(trainActorOneStep) paramUpdateInterval = 1 updateParameters = UpdateParameters(paramUpdateInterval, tau) modelList = [actorModel, criticModel] actorModel, criticModel = resetTargetParamToTrainParam(modelList) trainModels = TrainDDPGModels(updateParameters, trainActor, trainCritic, actorModel, criticModel) noiseInitVariance = 1 # control exploration varianceDiscount = .99995 noiseDecayStartStep = bufferSize minVar = .1 getNoise = GetExponentialDecayGaussNoise(noiseInitVariance, varianceDiscount, noiseDecayStartStep, minVar) actOneStepWithNoise = ActDDPGOneStep(actionLow, actionHigh, actByPolicyTrain, actorModel, getNoise) learningStartBufferSize = minibatchSize sampleFromMemory = SampleFromMemory(minibatchSize) learnFromBuffer = LearnFromBuffer(learningStartBufferSize, sampleFromMemory, trainModels) transit = TransitGymMountCarContinuous() isTerminal = IsTerminalMountCarContin() getReward = RewardMountCarContin(isTerminal) sampleOneStep = SampleOneStep(transit, getReward) runDDPGTimeStep = RunTimeStep(actOneStepWithNoise, sampleOneStep, learnFromBuffer) resetLow = -1 resetHigh = 0.4 reset = ResetMountCarContin(seed=None) runEpisode = RunEpisode(reset, runDDPGTimeStep, maxTimeStep, isTerminal) ddpg = RunAlgorithm(runEpisode, maxEpisode) replayBuffer = deque(maxlen=int(bufferSize)) meanRewardList, trajectory = ddpg(replayBuffer) trainedActorModel, trainedCriticModel = trainModels.getTrainedModels() # save Model modelIndex = 0 actorFixedParam = {'actorModel': modelIndex} criticFixedParam = {'criticModel': modelIndex} parameters = { 'env': ENV_NAME, 'Eps': maxEpisode, 'timeStep': maxTimeStep, 'batch': minibatchSize, 'gam': gamma, 'lrActor': learningRateActor, 'lrCritic': learningRateCritic, 'noiseVar': noiseInitVariance, 'varDiscout': varianceDiscount, 'resetLow': resetLow, 'High': resetHigh } modelSaveDirectory = "../trainedDDPGModels" modelSaveExtension = '.ckpt' getActorSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, actorFixedParam) getCriticSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, criticFixedParam) savePathActor = getActorSavePath(parameters) savePathCritic = getCriticSavePath(parameters) with actorModel.as_default(): saveVariables(trainedActorModel, savePathActor) with criticModel.as_default(): saveVariables(trainedCriticModel, savePathCritic) dirName = os.path.dirname(__file__) trajectoryPath = os.path.join(dirName, '..', 'trajectory', 'mountCarTrajectoryOriginalReset1.pickle') saveToPickle(trajectory, trajectoryPath) # plots& plot showDemo = False if showDemo: visualize = VisualizeMountCarContin() visualize(trajectory) plotResult = True if plotResult: plt.plot(list(range(maxEpisode)), meanRewardList) plt.show()
def simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed): saveTraj = True visualizeTraj = False visualizeMujoco = False numWolves = parameterOneCondiiton['numWolves'] numSheeps = parameterOneCondiiton['numSheeps'] numBlocks = parameterOneCondiiton['numBlocks'] timeconst = parameterOneCondiiton['timeconst'] dampratio = parameterOneCondiiton['dampratio'] maxTimeStep = parameterOneCondiiton['maxTimeStep'] sheepSpeedMultiplier = parameterOneCondiiton['sheepSpeedMultiplier'] individualRewardWolf = parameterOneCondiiton['individualRewardWolf'] hasWalls = parameterOneCondiiton['hasWalls'] dt = parameterOneCondiiton['dt'] print( "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}". format(numWolves, numSheeps, numBlocks, str(saveTraj), str(visualizeTraj))) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numAgents)) blocksID = list(range(numAgents, numEntities)) wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks wolfMaxSpeed = 1.0 blockMaxSpeed = None sheepMaxSpeedOriginal = 1.3 sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) if individualRewardWolf: rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision) else: rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) rewardFunc = lambda state, action, nextState: list( rewardWolf(state, action, nextState)) + list( rewardSheep(state, action, nextState)) dirName = os.path.dirname(__file__) if hasWalls: physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'dt={}'.format(dt), 'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml' .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst, dampratio)) else: physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml' .format(numBlocks, numSheeps, numWolves, timeconst, dampratio)) with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) envXml = xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # print(physicsSimulation.model.body_pos) qPosInit = [0, 0] * numAgents qVelInit = [0, 0] * numAgents qVelInitNoise = 0 * hasWalls qPosInitNoise = 0.8 * hasWalls getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 * hasWalls, 2) getBlockSpeed = lambda: np.zeros(2) numQPos = len(physicsSimulation.data.qpos) numQVel = len(physicsSimulation.data.qvel) sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform( low=-qPosInitNoise, high=qPosInitNoise, size=numQPos) sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform( low=-qVelInitNoise, high=qVelInitNoise, size=numQVel) minDistance = 0.2 + 2 * blockSize isOverlap = IsOverlap(minDistance) sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos, getBlockSpeed, isOverlap) reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks, sampleAgentsQPos, sampleAgentsQVel, sampleBlockState) damping = 2.5 numSimulationFrames = int(0.1 / dt) agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed ] * numSheeps reshapeAction = ReshapeAction() isTerminal = lambda state: False transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents, numSimulationFrames, damping * dt * numSimulationFrames, agentsMaxSpeedList, visualizeMujoco, isTerminal, reshapeAction) maxRunningStepsToSample = 100 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] initObsForParams = observe(reset()) obsShape = [ initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams)) ] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] # ------------ model ------------------------ buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [ buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents) ] if hasWalls: modelFolder = os.path.join( dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall', 'dt={}'.format(dt), 'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) + 'individualRewardWolf=' + str(individualRewardWolf) + 'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier)) individStr = 'individ' if individualRewardWolf else 'shared' fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format( hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) else: modelFolder = os.path.join( dirName, '..', 'trainedModels', 'mujocoMADDPG', 'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio)) fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format( numWolves, numSheeps, numBlocks) modelPaths = [ os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents) ] [ restoreVariables(model, path) for model, path in zip(modelsList, modelPaths) ] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [ actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList ] startTime = time.time() trajList = [] for i in range(evalNum): np.random.seed(i) traj = sampleTrajectory(policy) trajList.append(list(traj)) endTime = time.time() print("Time taken {} seconds to generate {} trajectories".format( (endTime - startTime), evalNum)) # saveTraj if saveTraj: trajectoryFolder = os.path.join(dirName, '..', 'trajectory', 'evluateWall') if not os.path.exists(trajectoryFolder): os.makedirs(trajectoryFolder) trajectorySaveExtension = '.pickle' fixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum} generateTrajectorySavePath = GetSavePath(trajectoryFolder, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton) saveToPickle(trajList, trajectorySavePath) # visualize if visualizeTraj: entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) print(trajList[0][0], '!!!3', trajList[0][1]) trajToRender = np.concatenate(trajList) render(trajToRender) return endTime - startTime
def main(): manipulatedVariables = OrderedDict() manipulatedVariables['numWolves'] = [3] manipulatedVariables['numSheeps'] = [1] manipulatedVariables['numBlocks'] = [2] manipulatedVariables['maxTimeStep'] = [25] manipulatedVariables['sheepSpeedMultiplier'] = [1.0] manipulatedVariables['individualRewardWolf'] = [0] manipulatedVariables['timeconst'] = [0.5] manipulatedVariables['dampratio'] = [0.2] manipulatedVariables['hasWalls'] = [1.0, 1.5, 2.0] manipulatedVariables['dt'] = [0.05] eavluateParmetersList = ['hasWalls', 'dt'] lineParameter = ['sheepSpeedMultiplier'] prametersToDrop = list( set(manipulatedVariables.keys()) - set(eavluateParmetersList + lineParameter)) print(prametersToDrop, manipulatedVariables) productedValues = it.product( *[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) parametersAllCondtion = [ dict(list(specificValueParameter)) for specificValueParameter in productedValues ] evalNum = 500 randomSeed = 1542 dt = 0.05 #0.05 dirName = os.path.dirname(__file__) trajectoryDirectory = os.path.join(dirName, '..', 'trajectory', 'evluateWall') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) time = [] for parameterOneCondiiton in parametersAllCondtion: # print(parameterOneCondiiton,evalNum,randomSeed) sampletime = simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed) time.append(sampletime) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # save evaluation trajectories trajectoryExtension = '.pickle' trajectoryFixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum} getTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, trajectoryFixedParameters) getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath( readParametersFromDf(df)) # compute statistics on the trajectories fuzzySearchParameterNames = [] loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames) loadTrajectoriesFromDf = lambda df: loadTrajectories( readParametersFromDf(df)) class CheckStateOfofBound(object): """docstring for ClassName""" def __init__(self, minDistance): self.minDistance = minDistance def __call__(self, state): absloc = np.abs([agent[:2] for agent in state[0][:-2]]) return np.any(absloc > self.minDistance) class ComputeOutOfBounds: def __init__(self, getTrajectories, CheckStateOfofBound): self.getTrajectories = getTrajectories self.CheckStateOfofBound = CheckStateOfofBound def __call__(self, oneConditionDf): allTrajectories = self.getTrajectories(oneConditionDf) minDistance = readParametersFromDf( oneConditionDf)['hasWalls'] + 0.05 checkStateOfofBound = CheckStateOfofBound(minDistance) # allMeasurements = np.array(self.measurementFunction(allTrajectories,minDistance)) # allMeasurements = np.array(self.measurementFunction(allTrajectories,minDistance))s allMeasurements = np.array([ checkStateOfofBound(state) for traj in allTrajectories for state in traj ]) # print(allMeasurements) measurementMean = np.mean(allMeasurements) measurementStd = np.std(allMeasurements) return pd.Series({'mean': measurementMean, 'std': measurementStd}) computeStatistics = ComputeOutOfBounds(loadTrajectoriesFromDf, CheckStateOfofBound) statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf) for i, parameterOneCondiiton in enumerate(parametersAllCondtion): print(parameterOneCondiiton, time[i])
def main(): manipulatedVariables = OrderedDict() manipulatedVariables['damping'] = [0.0] manipulatedVariables['frictionloss'] = [0.0] manipulatedVariables['masterForce']=[0.0] eavluateParmetersList=['frictionloss','damping'] lineParameter=['masterForce'] prametersToDrop=list(set(manipulatedVariables.keys())-set(eavluateParmetersList+lineParameter)) productedValues = it.product(*[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) conditions = [dict(list(specificValueParameter)) for specificValueParameter in productedValues] evalNum=500 randomSeed=133 for condition in conditions: print(condition) generateSingleCondition(condition,evalNum,randomSeed) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # save evaluation trajectories trajectoriesSaveDirectory=os.path.join(dirName,'..', 'trajectory','evluateRopeFixedEnv2') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) trajectoryExtension = '.pickle' trajectoryFixedParameters = { 'randomSeed':randomSeed,'evalNum':evalNum} getTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectoryExtension, trajectoryFixedParameters) getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath(readParametersFromDf(df)) # compute statistics on the trajectories fuzzySearchParameterNames = [] loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames) loadTrajectoriesFromDf = lambda df: loadTrajectories(readParametersFromDf(df)) def computeV(traj): # [print(s1[0][0]-s2[0][0],np.linalg.norm(s1[0][0][2:]-s2[0][0][2:]),np.linalg.norm(s1[0][0][:2]-s2[0][0][:2])) for s1,s2 in zip (baselineTraj,traj) ] vel=[np.linalg.norm(agentState[2:]) for state in traj for agentState in state[0]] # print (vel) return np.mean(vel) measurementFunction = computeV computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction) statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf) darwStatisticsDf=DarwStatisticsDf(manipulatedVariables,eavluateParmetersList,lineParameter) subtitle='velocity' figIndex=0 ylimMax=1.2 darwStatisticsDf(statisticsDf,subtitle,figIndex,ylimMax) class fliterMeasurement(): """docstring for fliterMeasurement""" def __init__(self, splitLength,splitMeasurement): self.splitLength = splitLength self.splitMeasurement = splitMeasurement def __call__(self,traj): [splitMeasurement(traj[i:i+self.splitLength]) for i in range(len(traj)-self.splitLength) ] getWolfPos=lambda state :getPosFromAgentState(state[0][0]) getSheepfPos=lambda state :getPosFromAgentState(state[0][1]) minDistance=0.35 isCaught=IsTerminal(minDistance,getWolfPos,getSheepfPos) measurementFunction2 =lambda traj: np.mean([isCaught(state) for state in traj]) computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction2) statisticsDf2 = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf2) subtitle='caughtRatio(minDistance={})'.format(minDistance) figIndex=figIndex+1 ylimMax=0.2 darwStatisticsDf(statisticsDf2,subtitle,figIndex,ylimMax) getWolfPos=lambda state :getPosFromAgentState(state[0][0]) getSheepfPos=lambda state :getPosFromAgentState(state[0][1]) calculateWolfSheepDistance=lambda state:np.linalg.norm(getWolfPos(state)-getSheepfPos(state)) measurementFunction3 =lambda traj: np.mean([calculateWolfSheepDistance(state) for state in traj]) computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction3) statisticsDf3 = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf3) subtitle='WolfSheepDistance' figIndex=figIndex+1 ylimMax=1.6 darwStatisticsDf(statisticsDf3,subtitle,figIndex,ylimMax) getWolfPos=lambda state :getPosFromAgentState(state[0][0]) getMasterfPos=lambda state :getPosFromAgentState(state[0][2]) calculateWolfMasterDistance=lambda state:np.linalg.norm(getWolfPos(state)-getMasterfPos(state)) measurementFunction3 =lambda traj: np.mean([calculateWolfMasterDistance(state) for state in traj]) computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction3) statisticsDf3 = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf3) subtitle='WolfMasterDistance' figIndex=figIndex+1 ylimMax=0.6 darwStatisticsDf(statisticsDf3,subtitle,figIndex,ylimMax) getWolfPos=lambda state :getPosFromAgentState(state[0][0]) getSheepfPos=lambda state :getPosFromAgentState(state[0][1]) getWolfVel=lambda state :getVelFromAgentState(state[0][0]) def calculateCrossAngle(vel1,vel2): vel1complex=complex(vel1[0],vel1[1]) vel2complex=complex(vel2[0],vel2[1]) return np.abs(np.angle(vel2complex/vel1complex))/np.pi*180 calculateDevation= lambda state:calculateCrossAngle(getWolfPos(state)-getSheepfPos(state),getWolfVel(state)) measurementFunction3 =lambda traj: np.mean([calculateDevation(state) for state in traj]) computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction3) statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf) subtitle='Devation' figIndex=figIndex+1 darwStatisticsDf(statisticsDf,subtitle,figIndex) plt.show()
def generateSingleCondition(condition,evalNum,randomSeed): debug = 0 if debug: damping=2.0 frictionloss=0.0 masterForce=1.0 numWolves = 1 numSheeps = 1 numMasters = 1 maxTimeStep = 25 saveTraj=False saveImage=True visualizeMujoco=False visualizeTraj = True makeVideo=True else: # print(sys.argv) # condition = json.loads(sys.argv[1]) damping = float(condition['damping']) frictionloss = float(condition['frictionloss']) masterForce = float(condition['masterForce']) numWolves = 1 numSheeps = 1 numMasters = 1 maxTimeStep = 25 saveTraj=True saveImage=False visualizeMujoco=False visualizeTraj = False makeVideo=False print("maddpg: , saveTraj: {}, visualize: {},damping; {},frictionloss: {}".format( str(saveTraj), str(visualizeMujoco),damping,frictionloss)) numAgents = numWolves + numSheeps+numMasters numEntities = numAgents + numMasters wolvesID = [0] sheepsID = [1] masterID = [2] wolfSize = 0.075 sheepSize = 0.05 masterSize = 0.075 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [masterSize] * numMasters wolfMaxSpeed = 1.0 blockMaxSpeed = None entitiesMovableList = [True] * numAgents + [False] * numMasters massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,punishForOutOfBound) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) rewardMaster= lambda state, action, nextState: [-reward for reward in rewardWolf(state, action, nextState)] rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState)) dirName = os.path.dirname(__file__) # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml') makePropertyList=MakePropertyList(transferNumberListToStr) geomIds=[1,2,3] keyNameList=[0,1] valueList=[[damping,damping]]*len(geomIds) dampngParameter=makePropertyList(geomIds,keyNameList,valueList) changeJointDampingProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@damping') geomIds=[1,2,3] keyNameList=[0,1] valueList=[[frictionloss,frictionloss]]*len(geomIds) frictionlossParameter=makePropertyList(geomIds,keyNameList,valueList) changeJointFrictionlossProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@frictionloss') physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leasedNew.xml') with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) envXmlDict = xmltodict.parse(xml_string.strip()) envXmlPropertyDictList=[dampngParameter,frictionlossParameter] changeEnvXmlPropertFuntionyList=[changeJointDampingProperty,changeJointFrictionlossProperty] for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList): envXmlDict=changeXmlProperty(envXmlDict,propertyDict) envXml=xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # print(physicsSimulation.model.body_pos) # print(dir(physicsSimulation.model)) # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data) # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos)) # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos)) qPosInit = (0, ) * 24 qVelInit = (0, ) * 24 qPosInitNoise = 0.4 qVelInitNoise = 0 numAgent = 2 tiedAgentId = [0, 2] ropePartIndex = list(range(3, 12)) maxRopePartLength = 0.06 reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId,ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise) numSimulationFrames=10 isTerminal= lambda state: False reshapeActionList = [ReshapeAction(5),ReshapeAction(5),ReshapeAction(masterForce)] transit=TransitionFunctionWithoutXPos(physicsSimulation, numSimulationFrames, visualizeMujoco,isTerminal, reshapeActionList) # damping=2.5 # numSimulationFrames =int(0.1/dt) # agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps # reshapeAction = ReshapeAction() # isTerminal = lambda state: False # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt*numSimulationFrames,agentsMaxSpeedList,visualizeMujoco,isTerminal,reshapeAction) maxRunningStepsToSample = 100 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, masterID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] initObsForParams = observe(reset()) obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] # ------------ model ------------------------ buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)] modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce)) fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep) modelPaths = [os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents)] [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList] trajList = [] # numTrajToSample = 20 for i in range(evalNum): np.random.seed(randomSeed+i) traj = sampleTrajectory(policy) trajList.append(list(traj)) # saveTraj if saveTraj: trajectorySaveExtension = '.pickle' fixedParameters = { 'randomSeed':randomSeed,'evalNum':evalNum} trajectoriesSaveDirectory=os.path.join(dirName,'..', 'trajectory','evluateRopeFixedEnv2') if not os.path.exists(trajectoriesSaveDirectory): os.makedirs(trajectoriesSaveDirectory) generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(condition) saveToPickle(trajList, trajectorySavePath) # trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) # trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName) # saveToPickle(trajList, trajSavePath) # visualize if visualizeTraj: demoFolder = os.path.join(dirName, '..', 'demos', 'mujocoMADDPGLeased','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce)) if not os.path.exists(demoFolder): os.makedirs(demoFolder) entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [masterColor] * numMasters render = Render(entitiesSizeList, entitiesColorList, numAgents,demoFolder,saveImage, getPosFromAgentState) # print(trajList[0][0],'!!!3',trajList[0][1]) trajToRender = np.concatenate(trajList) render(trajToRender)
def main(): stateDim = env.observation_space.shape[0] actionDim = 7 buildModel = BuildModel(stateDim, actionDim) layersWidths = [30] writer, model = buildModel(layersWidths) learningRate = 0.001 gamma = 0.99 trainModelBySASRQ = TrainModelBySASRQ(learningRate, gamma, writer) paramUpdateInterval = 300 updateParameters = UpdateParameters(paramUpdateInterval) model = resetTargetParamToTrainParam([model])[0] trainModels = TrainDQNModel(getTargetQValue, trainModelBySASRQ, updateParameters, model) epsilonMax = 0.9 epsilonIncrease = 0.0001 epsilonMin = 0 bufferSize = 10000 decayStartStep = bufferSize getEpsilon = GetEpsilon(epsilonMax, epsilonMin, epsilonIncrease, decayStartStep) actGreedyByModel = ActGreedyByModel(getTrainQValue, model) actRandom = ActRandom(actionDim) actByTrainNetEpsilonGreedy = ActByTrainNetEpsilonGreedy(getEpsilon, actGreedyByModel, actRandom) minibatchSize = 128 learningStartBufferSize = minibatchSize sampleFromMemory = SampleFromMemory(minibatchSize) learnFromBuffer = LearnFromBuffer(learningStartBufferSize, sampleFromMemory, trainModels) processAction = ProcessDiscretePendulumAction(actionDim) transit = TransitGymPendulum(processAction) getReward = RewardGymPendulum(angle_normalize, processAction) sampleOneStep = SampleOneStep(transit, getReward) runDQNTimeStep = RunTimeStep(actByTrainNetEpsilonGreedy, sampleOneStep, learnFromBuffer, observe) reset = ResetGymPendulum(seed) maxTimeStep = 200 runEpisode = RunEpisode(reset, runDQNTimeStep, maxTimeStep, isTerminalGymPendulum) maxEpisode = 400 dqn = RunAlgorithm(runEpisode, maxEpisode) replayBuffer = deque(maxlen=int(bufferSize)) meanRewardList, trajectory = dqn(replayBuffer) trainedModel = trainModels.getTrainedModels() # save Model parameters = {'maxEpisode': maxEpisode, 'maxTimeStep': maxTimeStep, 'minibatchSize': minibatchSize, 'gamma': gamma, 'learningRate': learningRate, 'epsilonIncrease': epsilonIncrease , 'epsilonMin': epsilonMin} modelSaveDirectory = "../trainedDQNModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension) savePath = getSavePath(parameters) with trainedModel.as_default(): saveVariables(trainedModel, savePath) dirName = os.path.dirname(__file__) trajectoryPath = os.path.join(dirName, '..', 'trajectory', 'pendulumDQNTrajectory.pickle') saveToPickle(trajectory, trajectoryPath) plotResult = True if plotResult: plt.plot(list(range(maxEpisode)), meanRewardList) plt.show() showDemo = False if showDemo: visualize = VisualizeGymPendulum() visualize(trajectory)
def main(): env = MtCarDiscreteEnvSetup() visualize = visualizeMtCarDiscrete() reset = resetMtCarDiscrete(1234) transition = MtCarDiscreteTransition() rewardMtCar = MtCarDiscreteReward() isterminal = MtCarDiscreteIsTerminal() statesdim = env.observation_space.shape[0] actiondim = env.action_space.n replaybuffer = deque(maxlen=buffersize) runepsilon = initepsilon totalrewards = [] meanreward = [] trajectory = [] totalreward = [] buildmodel = BuildModel(statesdim, actiondim) Writer, DQNmodel = buildmodel(numberlayers) replaceParameters = ReplaceParameters(replaceiter) trainModel = TrainModel(learningRate, gamma, Writer) trainDQNmodel = TrainDQNmodel(replaceParameters, trainModel, DQNmodel) learn = Learn(buffersize, batchsize, trainDQNmodel, actiondim) for episode in range(EPISODE): state = reset() rewards = 0 runtime = 0 while True: action = learn.Getaction(DQNmodel, runepsilon, state) nextstate = transition(state, action) done = isterminal(nextstate) reward = rewardMtCar(state, action, nextstate, done) learn.ReplayMemory(replaybuffer, state, action, reward, nextstate, done) trajectory.append((state, action, reward, nextstate)) rewards += reward state = nextstate runtime += 1 if runtime == 200: totalrewards.append(rewards) totalreward.append(rewards) runtime = 0 print('episode: ', episode, 'reward:', rewards, 'epsilon:', runepsilon) break if done: totalrewards.append(rewards) totalreward.append(rewards) print('episode: ', episode, 'reward:', rewards, 'epsilon:', runepsilon) break runepsilon = epsilonDec(runepsilon, minepsilon, epsilondec) if episode % 100 == 0: meanreward.append(np.mean(totalreward)) print('episode: ', episode, 'meanreward:', np.mean(totalreward)) totalreward = [] episode = 100 * (np.arange(len(meanreward))) plt.plot(episode, meanreward) plt.xlabel('episode') plt.ylabel('rewards') plt.ylim([-200, -50]) plt.show() # save Model modelIndex = 0 DQNFixedParam = {'DQNmodel': modelIndex} parameters = { 'env': ENV_NAME, 'Eps': EPISODE, 'batch': batchsize, 'buffersize': buffersize, 'gam': gamma, 'learningRate': learningRate, 'replaceiter': replaceiter, 'epsilondec': epsilondec, 'minepsilon': minepsilon, 'initepsilon': initepsilon } modelSaveDirectory = "/path/to/logs/trainedDQNModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension, DQNFixedParam) savePathDQN = getSavePath(parameters) with DQNmodel.as_default(): saveVariables(DQNmodel, savePathDQN) dirName = os.path.dirname(__file__) trajectoryPath = os.path.join(dirName, 'trajectory', 'mountCarTrajectory.pickle') saveToPickle(trajectory, trajectoryPath)
def main(): manipulatedVariables = OrderedDict() #solimp manipulatedVariables['dmin'] = [0.9] manipulatedVariables['dmax'] = [0.9999] manipulatedVariables['width'] = [0.001] manipulatedVariables['midpoint'] = [0.5] #useless manipulatedVariables['power'] = [1] #useless #solref manipulatedVariables['timeconst'] = [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ] manipulatedVariables['dampratio'] = [0.2, 0.4, 0.6, 0.8, 1.0] eavluateParmetersList = ['dampratio', 'dmax'] lineParameter = ['timeconst'] prametersToDrop = list( set(manipulatedVariables.keys()) - set(eavluateParmetersList + lineParameter)) print(prametersToDrop, manipulatedVariables) # timeconst: 0.1-0.2 # dmin: 0.5-dimax # dmax:0.9-0.9999 # dampratio:0.3-0.8 productedValues = it.product( *[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) parametersAllCondtion = [ dict(list(specificValueParameter)) for specificValueParameter in productedValues ] evalNum = 500 randomSeed = 1542 dt = 0.05 #0.05 dirName = os.path.dirname(__file__) dataFolderName = os.path.join(dirName, '..') trajectoryDirectory = os.path.join(dataFolderName, 'trajectory', 'variousCollsiondt={}'.format(dt)) if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) for parameterOneCondiiton in parametersAllCondtion: # print(parameterOneCondiiton,evalNum,randomSeed) simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed, dt) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # save evaluation trajectories trajectoryExtension = '.pickle' trajectoryFixedParameters = { 'isMujoco': 1, 'isCylinder': 1, 'evalNum': evalNum, 'randomSeed': randomSeed } getTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, trajectoryFixedParameters) getTrajectorySavePathFromDf = lambda df: getTrajectorySavePath( readParametersFromDf(df)) # compute statistics on the trajectories fuzzySearchParameterNames = [] loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle, fuzzySearchParameterNames) loadTrajectoriesFromDf = lambda df: loadTrajectories( readParametersFromDf(df)) def compareXPos(traj, baselineTraj): SE = [ np.linalg.norm(s1[0][0][:2] - s2[0][0][:2])**2 for s1, s2 in zip(baselineTraj, traj) ] return SE originEnvTrajSavePath = os.path.join( dirName, '..', 'trajectory', 'variousCollsion', 'collisionMoveOriginBaseLine_evalNum={}_randomSeed={}.pickle'.format( evalNum, randomSeed)) baselineTrajs = loadFromPickle(originEnvTrajSavePath) class CompareTrajectories(): def __init__(self, baselineTrajs, calculateStatiscs): self.baselineTrajs = baselineTrajs self.calculateStatiscs = calculateStatiscs def __call__(self, trajectorys): allMeasurements = np.array([ self.calculateStatiscs(traj, baselineTraj) for traj, baselineTraj in zip(trajectorys, self.baselineTrajs) ]) # print() measurementMean = allMeasurements.mean(axis=0) measurementStd = allMeasurements.std(axis=0) return pd.Series({'mean': measurementMean, 'std': measurementStd}) measurementFunction = CompareTrajectories(baselineTrajs, compareXPos) computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction) statisticsDf = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf) fig = plt.figure(0) rowName, columnName = eavluateParmetersList numRows = len(manipulatedVariables[rowName]) numColumns = len(manipulatedVariables[columnName]) plotCounter = 1 selfId = 0 for rowVar, grp in statisticsDf.groupby(rowName): grp.index = grp.index.droplevel(rowName) for columVar, group in grp.groupby(columnName): group.index = group.index.droplevel(columnName) axForDraw = fig.add_subplot(numRows, numColumns, plotCounter) if (plotCounter % numColumns == 1) or numColumns == 1: axForDraw.set_ylabel(rowName + ': {}'.format(rowVar)) if plotCounter <= numColumns: axForDraw.set_title(columnName + ': {}'.format(columVar)) axForDraw.set_ylim(0, 0.02) drawPerformanceLine(group, axForDraw, lineParameter, prametersToDrop) plotCounter += 1 plt.suptitle('MSEforPos,dt={}'.format(dt)) plt.legend(loc='best') # plt.show() def compareV(traj, baselineTraj): # [print(s1[0][0]-s2[0][0],np.linalg.norm(s1[0][0][2:]-s2[0][0][2:]),np.linalg.norm(s1[0][0][:2]-s2[0][0][:2])) for s1,s2 in zip (baselineTraj,traj) ] SE = [ np.linalg.norm(s1[0][0][2:] - s2[0][0][2:])**2 for s1, s2 in zip(baselineTraj, traj) ] return SE measurementFunction2 = CompareTrajectories(baselineTrajs, compareV) computeStatistics = ComputeStatistics(loadTrajectoriesFromDf, measurementFunction2) statisticsDf2 = toSplitFrame.groupby(levelNames).apply(computeStatistics) print(statisticsDf2) fig = plt.figure(1) numRows = len(manipulatedVariables[rowName]) numColumns = len(manipulatedVariables[columnName]) plotCounter = 1 for rowVar, grp in statisticsDf2.groupby(rowName): grp.index = grp.index.droplevel(rowName) for columVar, group in grp.groupby(columnName): group.index = group.index.droplevel(columnName) axForDraw = fig.add_subplot(numRows, numColumns, plotCounter) if (plotCounter % numColumns == 1) or numColumns == 1: axForDraw.set_ylabel(rowName + ': {}'.format(rowVar)) if plotCounter <= numColumns: axForDraw.set_title(columnName + ': {}'.format(columVar)) axForDraw.set_ylim(0, 0.4) drawPerformanceLine(group, axForDraw, lineParameter, prametersToDrop) plotCounter += 1 plt.suptitle('MSEforSpeed,dt={}'.format(dt)) plt.legend(loc='best') plt.show()
def simuliateOneParameter(parameterDict, evalNum, randomSeed, dt): mujocoVisualize = False demoVisualize = False wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) wolvesID = [0] sheepsID = [1] blocksID = [2] numWolves = len(wolvesID) numSheeps = len(sheepsID) numBlocks = len(blocksID) sheepMaxSpeed = 1.3 wolfMaxSpeed = 1.0 blockMaxSpeed = None agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed ] * numSheeps numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) makePropertyList = MakePropertyList(transferNumberListToStr) #changeCollisionReleventParameter dmin = parameterDict['dmin'] dmax = parameterDict['dmax'] width = parameterDict['width'] midpoint = parameterDict['midpoint'] power = parameterDict['power'] timeconst = parameterDict['timeconst'] dampratio = parameterDict['dampratio'] geomIds = [1, 2, 3] keyNameList = ['@solimp', '@solref'] valueList = [[[dmin, dmax, width, midpoint, power], [timeconst, dampratio]] ] * len(geomIds) collisionParameter = makePropertyList(geomIds, keyNameList, valueList) #changeSize # geomIds=[1,2] # keyNameList=['@size'] # valueList=[[[0.075,0.075]],[[0.1,0.1]]] # sizeParameter=makePropertyList(geomIds,keyNameList,valueList) #load env xml and change some geoms' property physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'variousCollision_numBlocks=1_numSheeps=1_numWolves=1dt={}.xml'.format( dt)) with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) envXmlPropertyDictList = [collisionParameter] changeEnvXmlPropertFuntionyList = [changeGeomProperty] for propertyDict, changeXmlProperty in zip( envXmlPropertyDictList, changeEnvXmlPropertFuntionyList): envXmlDict = changeXmlProperty(envXmlDict, propertyDict) envXml = xmltodict.unparse(envXmlDict) # print(envXmlDict['mujoco']['worldbody']['body'][0]) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # MDP function qPosInit = [0, 0] * numAgents qVelInit = [0, 0] * numAgents qVelInitNoise = 0 qPosInitNoise = 1 # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise) # fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks) # blocksState=[[0,-0.8,0,0]] # reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState) isTerminal = lambda state: False numSimulationFrames = int(0.1 / dt) print(numSimulationFrames) # dt=0.01 damping = 2.5 reshapeAction = lambda action: action # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,mujocoVisualize,reshapeAction) maxRunningSteps = 10 # sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) # observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) # observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] # initObsForParams = observe(reset()) # obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))] class ImpulsePolicy(object): """docstring for c""" def __init__(self, initAction): self.initAction = initAction def __call__(self, state, timeStep): action = [[0, 0], [0, 0]] if timeStep == 0: action = self.initAction return action worldDim = 2 trajList = [] startTime = time.time() for i in range(evalNum): np.random.seed(randomSeed + i) initSpeedDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0] initSpeed = np.random.uniform(0, 1, 1)[0] initActionDirection = np.random.uniform(-np.pi / 2, np.pi / 2, 1)[0] initForce = np.random.uniform(0, 5, 1)[0] # print(initSpeedDirection,initSpeed,initActionDirection,initForce) fixReset = ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks) blocksState = [[0, 0, 0, 0]] #posX posY velX velY reset = lambda: fixReset([-0.28, 0, 8, 8], [ initSpeed * np.cos(initSpeedDirection), initSpeed * np.sin( initSpeedDirection), 0, 0 ], blocksState) transit = TransitionFunctionWithoutXPos( physicsSimulation, numAgents, numSimulationFrames, damping * dt * numSimulationFrames, agentsMaxSpeedList, mujocoVisualize, isTerminal, reshapeAction) initAction = [[ initForce * np.cos(initActionDirection), initForce * np.sin(initActionDirection) ], [0, 0]] impulsePolicy = ImpulsePolicy(initAction) sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) traj = sampleTrajectory(impulsePolicy) # print('traj',traj[0]) trajList.append(traj) # saveTraj # print(trajList) saveTraj = True if saveTraj: # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle') trajectorySaveExtension = '.pickle' fixedParameters = { 'isMujoco': 1, 'isCylinder': 1, 'randomSeed': randomSeed, 'evalNum': evalNum } trajectoriesSaveDirectory = trajSavePath = os.path.join( dirName, '..', 'trajectory', 'variousCollsiondt={}'.format(dt)) generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(parameterDict) saveToPickle(trajList, trajectorySavePath) # visualize if demoVisualize: entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajList) render(trajToRender) endTime = time.time() print("Time taken {} seconds to generate {} trajectories".format( (endTime - startTime), evalNum))
def main(): stateDim = env.observation_space.shape[0] actionDim = env.action_space.shape[0] actionHigh = env.action_space.high actionLow = env.action_space.low actionBound = (actionHigh - actionLow) / 2 fixedParameters = OrderedDict() fixedParameters['batchSize'] = 128 fixedParameters['learningRateActor'] = 0.001 fixedParameters['learningRateCritic'] = 0.001 fixedParameters['maxEpisode'] = 200 fixedParameters['maxRunSteps'] = 200 fixedParameters['tau'] = 0.01 fixedParameters['gamma'] = 0.9 fixedParameters['actorLayerWidths'] = [30] fixedParameters['criticLayerWidths'] = [30] fixedParameters['stateDim'] = stateDim fixedParameters['actionDim'] = actionDim fixedParameters['actionBound'] = actionBound fixedParameters['actionHigh'] = actionHigh fixedParameters['actionLow'] = actionLow fixedParameters['paramUpdateInterval'] = 1 fixedParameters['varianceDiscount'] = .9995 fixedParameters['noiseDecayStartStep'] = 10000 fixedParameters['learningStartStep'] = fixedParameters['batchSize'] independentVariables = OrderedDict() independentVariables['noiseInitVariance'] = [1, 2, 3, 4] independentVariables['memorySize'] = [1000, 5000, 10000, 20000] modelSaveDirectory = "../trainedDDPGModels" modelSaveExtension = '.ckpt' getSavePath = GetSavePath(modelSaveDirectory, modelSaveExtension) evaluate = EvaluateNoiseAndMemorySize(fixedParameters, getSavePath, saveModel=False) levelNames = list(independentVariables.keys()) levelValues = list(independentVariables.values()) levelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=levelIndex) resultDF = toSplitFrame.groupby(levelNames).apply(evaluate) nCols = len(independentVariables['noiseInitVariance']) nRows = len(independentVariables['memorySize']) numplot = 1 axs = [] figure = plt.figure(figsize=(12, 10)) for keyCol, outterSubDf in resultDF.groupby('memorySize'): for keyRow, innerSubDf in outterSubDf.groupby('noiseInitVariance'): subplot = figure.add_subplot(nRows, nCols, numplot) axs.append(subplot) numplot += 1 plt.ylim([-1600, 50]) innerSubDf.T.plot(ax=subplot) dirName = os.path.dirname(__file__) plotPath = os.path.join(dirName, '..', 'plots') plt.savefig(os.path.join(plotPath, 'pendulumEvaluation')) plt.show()