def __init__(self, teacher_iters, QM, e, epsilon): self.eps = epsilon self.placement = None self.penum = PlacementEnumerator(e) self.teacher_iters = teacher_iters self.teacher = LowestCenterOfGravityAgent(e) self.QM = QM
class FullPlacementActor(object): def __init__(self, teacher_iters, QM, e, epsilon): self.eps = epsilon self.placement = None self.penum = PlacementEnumerator(e) self.teacher_iters = teacher_iters self.teacher = LowestCenterOfGravityAgent(e) self.QM = QM def act(self, s, debug_mode=False): debug_info = {} if s.t < self.teacher_iters: debug_info["pfbms"] = [] return (self.teacher.act(s), debug_info) if self.placement is None or not self.placement.is_incomplete(): choices = self.penum.get_placement_actions(s) debug_info["pfbms"] = map(lambda x: x.pfbm, choices) if random.random() < self.eps: p = random.choice(choices) else: pass #all_actions = map(lambda x: x.minor_actions,choices) #all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices) #initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps] #print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices)))) #raise Exception("TODO") p = max(choices, key=lambda x: self.QM.q(s, x)) self.placement = PartiallyCompletedPlacement(p) else: debug_info["pfbms"] = [] return (self.placement.incr_action(), debug_info)
class LowestCenterOfGravityAgent(object): penum = None queued_actions = None e = None def __init__(self, e): self.penum = PlacementEnumerator(e) self.queued_actions = [] def act(self, s, debug_mode=False): def score(b): row_indices = np.nonzero(b)[0] return -row_indices.mean() if len(self.queued_actions) > 0: a = self.queued_actions[0] self.queued_actions = self.queued_actions[1:] return (a, None) else: logging.info("DELTA: 0") a_s_p = self.penum._get_actionseq_finalstate_pairs(s) assert (len(a_s_p) > 0) amin, smin, pmin = min(a_s_p, key=lambda x: score(x[2])) #assert pmin.sum()==0 or np.nonzero(pmin)[0].min() == pmin.shape[0]-1 self.queued_actions = amin[1:] return (amin[0], None) def observe_sars_tuple(self, s, a, r, sprime, pfbm=None): logging.info("REWARD: {}".format(r)) pass def save_model(self, fn): pass
class LowestCenterOfGravityAgent(object): penum=None queued_actions=None e=None def __init__(self,e): self.penum=PlacementEnumerator(e) self.queued_actions=[] def act(self,s,debug_mode=False): def score(b): row_indices = np.nonzero(b)[0] return -row_indices.mean() if len(self.queued_actions)>0: a = self.queued_actions[0] self.queued_actions = self.queued_actions[1:] return (a,None) else: logging.info("DELTA: 0") a_s_p = self.penum._get_actionseq_finalstate_pairs(s) assert(len(a_s_p)>0) amin,smin,pmin = min(a_s_p, key=lambda x:score(x[2])) #assert pmin.sum()==0 or np.nonzero(pmin)[0].min() == pmin.shape[0]-1 self.queued_actions = amin[1:] return (amin[0], None) def observe_sars_tuple(self,s,a,r,sprime,pfbm=None): logging.info("REWARD: {}".format(r)) pass def save_model(self,fn): pass
class FullPlacementActor(object): def __init__(self, teacher_iters, QM, e, epsilon): self.eps = epsilon self.placement = None self.penum = PlacementEnumerator(e) self.teacher_iters = teacher_iters self.teacher = LowestCenterOfGravityAgent(e) self.QM = QM def act(self, s, debug_mode=False): debug_info = {} if s.t < self.teacher_iters: debug_info["pfbms"] = [] return (self.teacher.act(s), debug_info) if self.placement is None or not self.placement.is_incomplete(): choices = self.penum.get_placement_actions(s) debug_info["pfbms"] = map(lambda x: x.pfbm, choices) if random.random() < self.eps: p = random.choice(choices) else: pass # all_actions = map(lambda x: x.minor_actions,choices) # all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices) # initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps] # print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices)))) # raise Exception("TODO") p = max(choices, key=lambda x: self.QM.q(s, x)) self.placement = PartiallyCompletedPlacement(p) else: debug_info["pfbms"] = [] return (self.placement.incr_action(), debug_info)
class LowestCenterOfGravityAgent(object): penum=None queued_actions=None e=None def __init__(self,e): self.penum=PlacementEnumerator() self.queued_actions=[] self.e = e def act(self,s): def score(b): row_indices = np.nonzero(b)[0] return -row_indices.mean() if len(self.queued_actions)>0: a = self.queued_actions[0] self.queued_actions = self.queued_actions[1:] return a else: bts = self.penum.get_successor_bitmaps(self.e, s) assert(len(bts)>0) smin,amin,rmin = min(bts, key=lambda x:score(x[0].arena.bitmap)) self.queued_actions = amin[1:] return amin[0]
def __init__(self,e): self.penum=PlacementEnumerator(e) self.queued_actions=[]
def __init__(self, e): self.penum = PlacementEnumerator(e) self.queued_actions = []