def __init__(self, teacher_iters, QM, e, epsilon):
     self.eps = epsilon
     self.placement = None
     self.penum = PlacementEnumerator(e)
     self.teacher_iters = teacher_iters
     self.teacher = LowestCenterOfGravityAgent(e)
     self.QM = QM
class FullPlacementActor(object):
    def __init__(self, teacher_iters, QM, e, epsilon):
        self.eps = epsilon
        self.placement = None
        self.penum = PlacementEnumerator(e)
        self.teacher_iters = teacher_iters
        self.teacher = LowestCenterOfGravityAgent(e)
        self.QM = QM

    def act(self, s, debug_mode=False):
        debug_info = {}
        if s.t < self.teacher_iters:
            debug_info["pfbms"] = []
            return (self.teacher.act(s), debug_info)

        if self.placement is None or not self.placement.is_incomplete():
            choices = self.penum.get_placement_actions(s)
            debug_info["pfbms"] = map(lambda x: x.pfbm, choices)

            if random.random() < self.eps:
                p = random.choice(choices)
            else:
                pass
                #all_actions = map(lambda x: x.minor_actions,choices)
                #all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices)
                #initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps]
                #print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices))))
                #raise Exception("TODO")

                p = max(choices, key=lambda x: self.QM.q(s, x))
            self.placement = PartiallyCompletedPlacement(p)
        else:
            debug_info["pfbms"] = []

        return (self.placement.incr_action(), debug_info)
class LowestCenterOfGravityAgent(object):
    penum = None
    queued_actions = None
    e = None

    def __init__(self, e):
        self.penum = PlacementEnumerator(e)
        self.queued_actions = []

    def act(self, s, debug_mode=False):
        def score(b):
            row_indices = np.nonzero(b)[0]
            return -row_indices.mean()

        if len(self.queued_actions) > 0:
            a = self.queued_actions[0]
            self.queued_actions = self.queued_actions[1:]
            return (a, None)
        else:
            logging.info("DELTA: 0")
            a_s_p = self.penum._get_actionseq_finalstate_pairs(s)
            assert (len(a_s_p) > 0)
            amin, smin, pmin = min(a_s_p, key=lambda x: score(x[2]))
            #assert pmin.sum()==0 or np.nonzero(pmin)[0].min() == pmin.shape[0]-1

            self.queued_actions = amin[1:]
            return (amin[0], None)

    def observe_sars_tuple(self, s, a, r, sprime, pfbm=None):
        logging.info("REWARD: {}".format(r))
        pass

    def save_model(self, fn):
        pass
Example #4
0
class LowestCenterOfGravityAgent(object):
    penum=None
    queued_actions=None
    e=None
    def __init__(self,e):
        self.penum=PlacementEnumerator(e)
        self.queued_actions=[]

    def act(self,s,debug_mode=False):
        def score(b):
            row_indices = np.nonzero(b)[0]
            return -row_indices.mean()

        if len(self.queued_actions)>0:
            a = self.queued_actions[0]
            self.queued_actions = self.queued_actions[1:]
            return (a,None)
        else:
            logging.info("DELTA: 0")
            a_s_p = self.penum._get_actionseq_finalstate_pairs(s)
            assert(len(a_s_p)>0)
            amin,smin,pmin = min(a_s_p, key=lambda x:score(x[2]))
            #assert pmin.sum()==0 or np.nonzero(pmin)[0].min() == pmin.shape[0]-1

            self.queued_actions = amin[1:]
            return (amin[0], None)
                        
    def observe_sars_tuple(self,s,a,r,sprime,pfbm=None):
        logging.info("REWARD: {}".format(r))
        pass
        
    def save_model(self,fn):
        pass
Example #5
0
 def __init__(self, teacher_iters, QM, e, epsilon):
     self.eps = epsilon
     self.placement = None
     self.penum = PlacementEnumerator(e)
     self.teacher_iters = teacher_iters
     self.teacher = LowestCenterOfGravityAgent(e)
     self.QM = QM
Example #6
0
class FullPlacementActor(object):
    def __init__(self, teacher_iters, QM, e, epsilon):
        self.eps = epsilon
        self.placement = None
        self.penum = PlacementEnumerator(e)
        self.teacher_iters = teacher_iters
        self.teacher = LowestCenterOfGravityAgent(e)
        self.QM = QM

    def act(self, s, debug_mode=False):
        debug_info = {}
        if s.t < self.teacher_iters:
            debug_info["pfbms"] = []
            return (self.teacher.act(s), debug_info)

        if self.placement is None or not self.placement.is_incomplete():
            choices = self.penum.get_placement_actions(s)
            debug_info["pfbms"] = map(lambda x: x.pfbm, choices)

            if random.random() < self.eps:
                p = random.choice(choices)
            else:
                pass
                # all_actions = map(lambda x: x.minor_actions,choices)
                # all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices)
                # initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps]
                # print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices))))
                # raise Exception("TODO")

                p = max(choices, key=lambda x: self.QM.q(s, x))
            self.placement = PartiallyCompletedPlacement(p)
        else:
            debug_info["pfbms"] = []

        return (self.placement.incr_action(), debug_info)
Example #7
0
class LowestCenterOfGravityAgent(object):
    penum=None
    queued_actions=None
    e=None
    def __init__(self,e):
        self.penum=PlacementEnumerator()
        self.queued_actions=[]
        self.e = e

    def act(self,s):
        def score(b):
            row_indices = np.nonzero(b)[0]
            return -row_indices.mean()

        if len(self.queued_actions)>0:
            a = self.queued_actions[0]
            self.queued_actions = self.queued_actions[1:]
            return a
        else:
            bts = self.penum.get_successor_bitmaps(self.e, s)
            assert(len(bts)>0)
            smin,amin,rmin = min(bts, key=lambda x:score(x[0].arena.bitmap))
            self.queued_actions = amin[1:]
            return amin[0]
Example #8
0
 def __init__(self,e):
     self.penum=PlacementEnumerator(e)
     self.queued_actions=[]
 def __init__(self, e):
     self.penum = PlacementEnumerator(e)
     self.queued_actions = []