def completeHypotheses(self, rle, allObjects): observe(rle, 10) spriteTypeHypothesis = sampleFromDistribution(rle._game.spriteDistribution, allObjects) gameObject = Game(spriteInductionResult=spriteTypeHypothesis) newHypotheses = [] for hypothesis in self.hypotheses: newHypotheses.append(gameObject.addNewObjectsToTheory(hypothesis, spriteTypeHypothesis)) self.hypotheses = newHypotheses
def initializeHypotheses(self, rle, allObjects, learnSprites=True): if learnSprites: observe(rle, 10) spriteTypeHypothesis = sampleFromDistribution(rle._game.spriteDistribution, allObjects) gameObject = Game(spriteInductionResult=spriteTypeHypothesis) initialTheory = gameObject.buildGenericTheory(spriteTypeHypothesis) else: gameObject = Game(self.gameString) initialTheory = gameObject.buildGenericTheory(spriteSample=False, vgdlSpriteParse = gameObject.vgdlSpriteParse) self.hypotheses = [initialTheory] ## Old: Used to check for Vrle and initialize accordingly. ## New: assumption is Vrle needs to be initialized only if there are no hypotheses, so doing it all in ## one chunk. self.symbolDict = generateSymbolDict(rle) return gameObject
def getToSubgoal(rle, vrle, subgoal, all_objects, finalEventList, verbose=True, max_actions_per_plan=1, planning_steps=100, defaultPolicyMaxSteps=50, symbolDict=None): ## Takes a real world, a theory (instantiated as a virtual world) ## Moves the agent through the world, updating the theory as needed ## Ends when subgoal is reached. ## Right now will only properly work with max_actions_per_plan=1, as you want to re-plan when the theory changes. ## Otherwise it will only replan every max_actions_per_plan steps. ## Returns real world in its new state, as well as theory in its new state. ## TODO: also return a trace of events and of game states for re-creation hypotheses = [] terminal = rle._isDone()[0] goal_achieved = False def noise(action): prob=0. if random.random()<prob: return random.choice(BASEDIRS) else: return action ## TODO: this will be problematic when new objects appear, if you don't update it. # all_objects = rle._game.getObjects() print "" print "object goal is", colorDict[str(subgoal.color)], rle._rect2pos(subgoal.rect) # actions_executed = [] states_encountered = [] while not terminal and not goal_achieved: mcts = Basic_MCTS(existing_rle=vrle) planner = mcts.startTrainingPhase(planning_steps, defaultPolicyMaxSteps, vrle) actions = mcts.getBestActionsForPlayout() for i in range(len(actions)): if not terminal and not goal_achieved: spriteInduction(rle._game, step=1) spriteInduction(rle._game, step=2) ## Take actual step. RLE Updates all positions. res = rle.step(noise(actions[i])) ##added noise for testing, but prob(noise)=0 now. # actions_executed.append(actions[i]) states_encountered.append(rle._game.getFullState()) new_state = res['observation'] terminal = rle._isDone()[0] # vrle_res = vrle.step(noise(actions[i])) # vrle_new_state = vrle_res['observation'] # embed() effects = translateEvents(res['effectList'], all_objects) ##TODO: this gets object colors, not IDs. print ACTIONS[actions[i]] rle.show() # if symbolDict: # print rle.show() # else: # print np.reshape(new_state, rle.outdim) # Save the event and agent state try: agentState = dict(rle._game.getAvatars()[0].resources) rle.agentStatePrev = agentState # If agent is killed before we get agentState except Exception as e: # TODO: how to process changes in resources that led to termination state? agentState = rle.agentStatePrev ## If there were collisions, update history and perform interactionSet induction if effects: state = rle._game.getFullState() event = {'agentState': agentState, 'agentAction': actions[i], 'effectList': effects, 'gameState': rle._game.getFullStateColorized()} finalEventList.append(event) for effect in effects: rle._game.collision_objects.add(effect[1]) ##sometimes event is just (predicate, obj1) if len(effect)==3: ## usually event is (predicate, obj1, obj2) rle._game.collision_objects.add(effect[2]) if colorDict[str(subgoal.color)] in [item for sublist in effects for item in sublist]: print "reached subgoal" goal_achieved = True if subgoal.name in rle._game.unknown_objects: rle._game.unknown_objects.remove(subgoal.name) goalLoc=None else: goalLoc = rle._rect2pos(subgoal.rect) ## Sampling from the spriteDisribution makes sense, as it's ## independent of what we've learned about the interactionSet. ## Every timeStep, we should update our beliefs given what we've seen. # if not sample: sample = sampleFromDistribution(rle._game.spriteDistribution, all_objects) g = Game(spriteInductionResult=sample) terminationCondition = {'ended': False, 'win':False, 'time':rle._game.time} trace = ([TimeStep(e['agentAction'], e['agentState'], e['effectList'], e['gameState']) for e in finalEventList], terminationCondition) hypotheses = list(g.runInduction(sample, trace, 20)) # print "in getToSubgoal" # embed() ## make sure this is only adding things the avatar touched candidate_new_objs = [] for interaction in hypotheses[0].interactionSet: if not interaction.generic: if interaction.slot1 != 'avatar': candidate_new_objs.append(interaction.slot1) if interaction.slot2 != 'avatar': candidate_new_objs.append(interaction.slot2) candidate_new_objs = list(set(candidate_new_objs)) candidate_new_colors = [] for o in candidate_new_objs: cols = [c.color for c in hypotheses[0].classes[o]] candidate_new_colors.extend(cols) ## among the many things to fix: for e in finalEventList[-1]['effectList']: if e[1] == 'DARKBLUE': candidate_new_colors.append(e[2]) if e[2] == 'DARKBLUE': candidate_new_colors.append(e[1]) game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0], "./examples/gridphysics/theorytest.py", goalLoc=goalLoc) # all_immovables.extend(immovables) # print all_immovables vrle = createMindEnv(game, level, OBSERVATION_GLOBAL) vrle.immovables = immovables ## TODO: You're re-running all of theory induction for every timestep ## every time. Fix this. ## if you fix it, note that you'd be passing a different g each time, ## since you sampled (above). # hypotheses = list(g.runDFSInduction(trace, 20)) spriteInduction(rle._game, step=3) if terminal: if rle._isDone()[1]: print "game won" else: print "Agent died." return rle, hypotheses, finalEventList, candidate_new_colors, states_encountered
def playEpisode(rleCreateFunc, hypotheses=[], unknown_objects=False, goalColor=None, finalEventList=[], playback=False): ## Initialize rle the agent behaves in. rle = rleCreateFunc() rle._game.unknown_objects = rle._game.sprite_groups.keys() rle._game.unknown_objects.remove('avatar') ## For now we're asumming agent knows self. rle.agentStatePrev = {} all_objects = rle._game.getObjects() spriteInduction(rle._game, step=0) ## Initialize sprite induction noHypotheses = len(hypotheses)==0 # sample = sampleFromDistribution(rle._game.spriteDistribution, all_objects) ## Initialize mental theory # g = Game(spriteInductionResult=sample) # t = g.buildGenericTheory(sample) # hypotheses = [t] ## Fix this mess. Store the unknown categories. Select among those for a goal, and then provide that to selectToken. if unknown_categories==False: print "initializing unknown objects:" unknown_categories = [k for k in rle._game.sprite_groups.keys() if k!='avatar'] print [colorDict[str(rle._game.sprite_groups[k][0].color)] for k in unknown_categories] else: print "already know some objects. Unknown:" print [colorDict[str(rle._game.sprite_groups[k][0].color)] for k in unknown_categories] # if unknown_objects==False: ##uninitialized # print "initializing unknown objects:" # unknown_objects=[rle._game.sprite_groups[k] for k in rle._game.sprite_groups.keys() if k!='avatar'] ## Store instances of unknown objects # print [colorDict[str(o[0].color)] for o in unknown_objects] # else: # print "already know some objects. Unknown:" # print [colorDict[str(o[0].color)] for o in unknown_objects] ##working hypothesis is hypotheses[0] for now. # unknown_objects= [] # print [r.generic for r in hypotheses[0].interactionSet] # for rule in hypotheses[0].interactionSet: # ##right now this only tries to learn about avatar touching things # ##not things touching things # ##Also sometimes you're going to randomly choose an unreachable object! # if rule.generic: # ## make sure this is right if you have multiple objects in the class. # ## e.g., review induction assumptions. # ## you're assuming that generic rules always have avatar in slot2 # col = hypotheses[0].classes[rule.slot1][0].color # key = [k for k in rle._game.sprite_groups.keys() if \ # colorDict[str(rle._game.sprite_groups[k][0].color)]==col][0] # unknown_objects.append(rle._game.sprite_groups[key]) ended, won = rle._isDone() total_states_encountered = [rle._game.getFullState()] Vrle=None while not ended: ## Select known goal if it's known, otherwise unkown object. if noHypotheses: ## Observe a few frames, then initialize sprite hypotheses observe(rle, 5) sample = sampleFromDistribution(rle._game.spriteDistribution, all_objects) g = Game(spriteInductionResult=sample) t = g.buildGenericTheory(sample) hypotheses = [t] ## Initialize world in agent's head. if not Vrle: game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0],\ "./examples/gridphysics/theorytest.py")#, rle._rect2pos(subgoal.rect)) Vrle = createMindEnv(game, level, OBSERVATION_GLOBAL) Vrle.immovables = immovables if goalColor: key = [k for k in rle._game.sprite_groups.keys() if \ colorDict[str(rle._game.sprite_groups[k][0].color)]==goalColor][0] actual_goal = rle._game.sprite_groups[key][0] object_goal = actual_goal print "goal is known:", goalColor else: try: object_goal = random.choice(unknown_objects) embed() subgoalLocation = selectSubgoalToken(Vrle, 'wall', unknown_objects) except: print "no unknown objects and no goal? Embedding so you can debug." embed() # embed() # subgoal = random.choice(rle._game.sprite_groups[object_goal.name]) game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0],\ "./examples/gridphysics/theorytest.py", subgoalLocation) Vrle = createMindEnv(game, level, OBSERVATION_GLOBAL) ## World in agent's head. Vrle.immovables = immovables embed() ## Plan to get to subgoal rle, hypotheses, finalEventList, candidate_new_colors, states_encountered = \ getToSubgoal(rle, Vrle, subgoal, all_objects, finalEventList, symbolDict=symbolDict) if len(unknown_objects)>0: for col in candidate_new_colors: obj = [o for o in unknown_objects if colorDict[str(o[0].color)]==col] if len(obj)>0: obj=obj[0] unknown_objects.remove(obj) ended, won = rle._isDone() # actions_taken.extend(actions_executed) total_states_encountered.extend(states_encountered) ## Hack to remember actual winning goal, until terminationSet is fixed. if won and not hypotheses[0].goalColor: # embed() goalColor = finalEventList[-1]['effectList'][0][1] #fix. don't assume the second obj is the goal. hypotheses[0].goalColor=goalColor if playback: ## TODO: Aritro cleans this up. print "in playback" from vgdl.core import VGDLParser from examples.gridphysics.simpleGame4 import level, game playbackGame = push_game playbackLevel = box_level embed() VGDLParser.playGame(playbackGame, playbackLevel, total_states_encountered) return hypotheses, won, unknown_objects, goalColor, finalEventList, total_states_encountered
def playEpisode(rleCreateFunc, hypotheses=[], game_object=None, unknown_colors=False, goalColor=None, finalEventList=[], playback=False): rle = rleCreateFunc() ## Initialize rle the agent behaves in. all_objects = rle._game.getObjects() # spriteInduction(rle._game, step=0) ## Initialize sprite induction noHypotheses = len(hypotheses) == 0 print "" if unknown_colors == False: unknown_objects = [ k for k in rle._game.sprite_groups.keys() if k != 'avatar' ] unknown_colors = [ colorDict[str(rle._game.sprite_groups[k][0].color)] for k in unknown_objects ] print "unknown objects:", unknown_colors else: print "already know some objects. Unknown:" print unknown_colors ended, won = rle._isDone() total_states_encountered = [rle._game.getFullState() ] ## Start storing encountered states. Vrle = None g = game_object while not ended: if noHypotheses: ## Observe a few frames, then initialize sprite hypotheses observe(rle, 5) sample = sampleFromDistribution(rle._game.spriteDistribution, all_objects) g = Game(spriteInductionResult=sample) t = g.buildGenericTheory(sample) hypotheses = [t] noHypotheses = False if not Vrle: ## Initialize world in agent's head. symbolDict = generateSymbolDict(rle) # for k,v in symbolDict.items(): # print k, v # print "" # print "Initializing mental theory." game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0], symbolDict,\ "./examples/gridphysics/theorytest.py") Vrle = createMindEnv(game, level, output=False) Vrle.immovables = immovables if goalColor: ## Select known goal if it's known, otherwise unkown object. key = [k for k in rle._game.sprite_groups.keys() if \ colorDict[str(rle._game.sprite_groups[k][0].color)]==goalColor][0] actual_goal = rle._game.sprite_groups[key][0] object_goal = actual_goal object_goal_location = Vrle._rect2pos(object_goal.rect) object_goal_location = object_goal_location[ 1], object_goal_location[0] print "goal is known:", goalColor print "" else: try: object_goal = selectObjectGoal(Vrle, unknown_colors, method="random_then_nearest") object_goal_location = Vrle._rect2pos(object_goal.rect) object_goal_location = object_goal_location[ 1], object_goal_location[0] print "object goal is", colorDict[str( object_goal.color)], "at location", (rle._rect2pos( object_goal.rect)[1], rle._rect2pos( object_goal.rect)[0]) print "" except: print "no unknown objects and no goal? Embedding so you can debug." embed() game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0], symbolDict,\ "./examples/gridphysics/theorytest.py", object_goal_location) print "Initializing mental theory *with* object goal" # print "immovables", immovables Vrle = createMindEnv( game, level, output=True) ## World in agent's head, including object goal Vrle.immovables = immovables ## Plan to get to object goal rle, hypotheses, finalEventList, candidate_new_colors, states_encountered, g = \ getToObjectGoal(rle, Vrle, g, hypotheses[0], game, level, object_goal, all_objects, finalEventList, symbolDict=symbolDict) if len(unknown_colors) > 0: for col in candidate_new_colors: if col in unknown_colors: unknown_colors.remove(col) ended, won = rle._isDone() # actions_taken.extend(actions_executed) total_states_encountered.extend(states_encountered) ## Hack to remember actual winning goal, until terminationSet is fixed. if won and not hypotheses[0].goalColor: # embed() goalColor = finalEventList[-1]['effectList'][0][ 1] #fix. don't assume the second obj is the goal. hypotheses[0].goalColor = goalColor if playback: ## TODO: Aritro cleans this up. print "in playback" from vgdl.core import VGDLParser from examples.gridphysics.simpleGame4 import level, game playbackGame = game playbackLevel = level VGDLParser.playGame(playbackGame, playbackLevel, total_states_encountered) #, persist_movie=True) return hypotheses, g, won, unknown_colors, goalColor, finalEventList, total_states_encountered
def getToObjectGoal(rle, vrle, plannerType, game_object, hypothesis, game, level, object_goal, all_objects, finalEventList, verbose=True,\ defaultPolicyMaxSteps=50, symbolDict=None): ## Takes a real world, a theory (instantiated as a virtual world) ## Moves the agent through the world, updating the theory as needed ## Ends when object_goal is reached. ## Returns real world in its new state, as well as theory in its new state. ## TODO: also return a trace of events and of game states for re-creation hypotheses = [] terminal = rle._isDone()[0] goal_achieved = False outdim = rle.outdim def noise(action): prob = 0. if random.random() < prob: return random.choice(BASEDIRS) else: return action ## TODO: this will be problematic when new objects appear, if you don't update it. # all_objects = rle._game.getObjects() states_encountered = [rle._game.getFullState()] candidate_new_colors = [] hypotheses = [hypothesis] while not terminal and not goal_achieved: theory_change_flag = False if not theory_change_flag: if plannerType == 'mcts': planner = Basic_MCTS(existing_rle=vrle, game=game, level=level, partitionWeights=[5, 3, 3]) subgoals = planner.getSubgoals(subgoal_path_threshold=3) elif plannerType == 'QLearning': planner = QLearner(vrle, gameString=game, levelString=level) subgoals = planner.getSubgoals(subgoal_path_threshold=10) print "subgoals", subgoals total_steps = 0 for subgoal in subgoals: if not theory_change_flag and not goal_achieved: ## write subgoal to theory; initialize VRLE. game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0], symbolDict, \ "./examples/gridphysics/theorytest.py", subgoal) vrle = createMindEnv(game, level, output=False) vrle.immovables = immovables ## Get actions that take you to goal. ignore, actions, steps = getToWaypoint( vrle, subgoal, plannerType, symbolDict, defaultPolicyMaxSteps, partitionWeights=[5, 3, 3], act=False) for action in actions: if not theory_change_flag and not goal_achieved: spriteInduction(rle._game, step=1) spriteInduction(rle._game, step=2) res = rle.step(noise(action)) states_encountered.append(rle._game.getFullState()) terminal = rle._isDone()[0] effects = translateEvents(res['effectList'], all_objects) if symbolDict: print rle.show() else: print np.reshape(new_state, rle.outdim) # Save the event and agent state try: agentState = dict( rle._game.getAvatars()[0].resources) rle.agentStatePrev = agentState # If agent is killed before we get agentState except Exception as e: # TODO: how to process changes in resources that led to termination state? agentState = rle.agentStatePrev ## If there were collisions, update history and perform interactionSet induction if the collisions were novel. if effects: state = rle._game.getFullState() event = { 'agentState': agentState, 'agentAction': action, 'effectList': effects, 'gameState': rle._game.getFullStateColorized() } ## Check if you reached object goal # if colorDict[str(object_goal.color)] in [item for sublist in effects for item in sublist]: # print "goal achieved?" # embed() # print "goal achieved" # goal_achieved = True for e in effects: if 'DARKBLUE' in e and colorDict[str( object_goal.color)] in e: print "goal achieved" # embed() goal_achieved = True ## Sampling from the spriteDisribution makes sense, as it's ## independent of what we've learned about the interactionSet. ## Every timeStep, we should update our beliefs given what we've seen. ## TODO: This crashed. Get it working again, then incorporate the sprite induction result. if len(rle._game.spriteDistribution) == 0: print "before step3" embed() spriteInduction(rle._game, step=3) if len(rle._game.spriteDistribution) == 0: print "after step3" embed() # if not sample: sample = sampleFromDistribution( rle._game.spriteDistribution, all_objects) game_object = Game( spriteInductionResult=sample) ## Get list of all effects we've seen. Only update theory if we're seeing something new. all_effects = [ item for sublist in [e['effectList'] for e in finalEventList] for item in sublist ] if not all( [e in all_effects for e in effects] ): ## TODO: make sure you write this so that it works with simultaneous effects. finalEventList.append(event) terminationCondition = { 'ended': False, 'win': False, 'time': rle._game.time } trace = ([ TimeStep(e['agentAction'], e['agentState'], e['effectList'], e['gameState']) for e in finalEventList ], terminationCondition) theory_change_flag = True hypotheses = list( game_object.runInduction( game_object.spriteInductionResult, trace, 20) ) ##if you resample or run sprite induction, this ## should be g.runInduction ## new colors that we have maybe learned about candidate_new_objs = [] for interaction in hypotheses[ 0].interactionSet: if not interaction.generic: if interaction.slot1 != 'avatar': candidate_new_objs.append( interaction.slot1) if interaction.slot2 != 'avatar': candidate_new_objs.append( interaction.slot2) candidate_new_objs = list( set(candidate_new_objs)) for o in candidate_new_objs: cols = [ c.color for c in hypotheses[0].classes[o] ] candidate_new_colors.extend(cols) ## among the many things to fix: for e in finalEventList[-1]['effectList']: if e[1] == 'DARKBLUE': candidate_new_colors.append(e[2]) print "appending", e[ 2], "to candidate_new_colors" if e[2] == 'DARKBLUE': candidate_new_colors.append(e[1]) print "appending", e[ 1], "to candidate_new_colors" candidate_new_colors = list( set(candidate_new_colors)) # print "candidate new colors", candidate_new_colors ## update to incorporate what we've learned, keep the same subgoal for now; this will update at the top of the next loop. game, level, symbolDict, immovables = writeTheoryToTxt(rle, hypotheses[0], symbolDict, \ "./examples/gridphysics/theorytest.py", goalLoc=(rle._rect2pos(object_goal.rect)[1], rle._rect2pos(object_goal.rect)[0])) print "updating internal theory" vrle = createMindEnv(game, level, output=False) vrle.immovables = immovables else: finalEventList.append(event) terminationCondition = { 'ended': False, 'win': False, 'time': rle._game.time } trace = ([ TimeStep(e['agentAction'], e['agentState'], e['effectList'], e['gameState']) for e in finalEventList ], terminationCondition) ## you need to figure out how to incorporate the result of sprite induction in cases where you don't do ## interactionSet induction (i.e., here.) hypotheses = [hypothesis] if terminal: return rle, hypotheses, finalEventList, candidate_new_colors, states_encountered, game_object print "executed all actions." total_steps += steps return rle, hypotheses, finalEventList, candidate_new_colors, states_encountered, game_object