def __init__(self, teacher_iters, QM, e, epsilon): self.eps = epsilon self.placement = None self.penum = PlacementEnumerator(e) self.teacher_iters = teacher_iters self.teacher = LowestCenterOfGravityAgent(e) self.QM = QM
class FullPlacementActor(object): def __init__(self, teacher_iters, QM, e, epsilon): self.eps = epsilon self.placement = None self.penum = PlacementEnumerator(e) self.teacher_iters = teacher_iters self.teacher = LowestCenterOfGravityAgent(e) self.QM = QM def act(self, s, debug_mode=False): debug_info = {} if s.t < self.teacher_iters: debug_info["pfbms"] = [] return (self.teacher.act(s), debug_info) if self.placement is None or not self.placement.is_incomplete(): choices = self.penum.get_placement_actions(s) debug_info["pfbms"] = map(lambda x: x.pfbm, choices) if random.random() < self.eps: p = random.choice(choices) else: pass #all_actions = map(lambda x: x.minor_actions,choices) #all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices) #initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps] #print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices)))) #raise Exception("TODO") p = max(choices, key=lambda x: self.QM.q(s, x)) self.placement = PartiallyCompletedPlacement(p) else: debug_info["pfbms"] = [] return (self.placement.incr_action(), debug_info)
class FullPlacementActor(object): def __init__(self, teacher_iters, QM, e, epsilon): self.eps = epsilon self.placement = None self.penum = PlacementEnumerator(e) self.teacher_iters = teacher_iters self.teacher = LowestCenterOfGravityAgent(e) self.QM = QM def act(self, s, debug_mode=False): debug_info = {} if s.t < self.teacher_iters: debug_info["pfbms"] = [] return (self.teacher.act(s), debug_info) if self.placement is None or not self.placement.is_incomplete(): choices = self.penum.get_placement_actions(s) debug_info["pfbms"] = map(lambda x: x.pfbm, choices) if random.random() < self.eps: p = random.choice(choices) else: pass # all_actions = map(lambda x: x.minor_actions,choices) # all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices) # initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps] # print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices)))) # raise Exception("TODO") p = max(choices, key=lambda x: self.QM.q(s, x)) self.placement = PartiallyCompletedPlacement(p) else: debug_info["pfbms"] = [] return (self.placement.incr_action(), debug_info)