예제 #1
0
 def __init__(self, teacher_iters, QM, e, epsilon):
     self.eps = epsilon
     self.placement = None
     self.penum = PlacementEnumerator(e)
     self.teacher_iters = teacher_iters
     self.teacher = LowestCenterOfGravityAgent(e)
     self.QM = QM
예제 #2
0
파일: rl.py 프로젝트: mkayser/tetris_rl
 def __init__(self, teacher_iters, QM, e, epsilon):
     self.eps = epsilon
     self.placement = None
     self.penum = PlacementEnumerator(e)
     self.teacher_iters = teacher_iters
     self.teacher = LowestCenterOfGravityAgent(e)
     self.QM = QM
예제 #3
0
class FullPlacementActor(object):
    def __init__(self, teacher_iters, QM, e, epsilon):
        self.eps = epsilon
        self.placement = None
        self.penum = PlacementEnumerator(e)
        self.teacher_iters = teacher_iters
        self.teacher = LowestCenterOfGravityAgent(e)
        self.QM = QM

    def act(self, s, debug_mode=False):
        debug_info = {}
        if s.t < self.teacher_iters:
            debug_info["pfbms"] = []
            return (self.teacher.act(s), debug_info)

        if self.placement is None or not self.placement.is_incomplete():
            choices = self.penum.get_placement_actions(s)
            debug_info["pfbms"] = map(lambda x: x.pfbm, choices)

            if random.random() < self.eps:
                p = random.choice(choices)
            else:
                pass
                #all_actions = map(lambda x: x.minor_actions,choices)
                #all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices)
                #initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps]
                #print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices))))
                #raise Exception("TODO")

                p = max(choices, key=lambda x: self.QM.q(s, x))
            self.placement = PartiallyCompletedPlacement(p)
        else:
            debug_info["pfbms"] = []

        return (self.placement.incr_action(), debug_info)
예제 #4
0
파일: rl.py 프로젝트: mkayser/tetris_rl
class FullPlacementActor(object):
    def __init__(self, teacher_iters, QM, e, epsilon):
        self.eps = epsilon
        self.placement = None
        self.penum = PlacementEnumerator(e)
        self.teacher_iters = teacher_iters
        self.teacher = LowestCenterOfGravityAgent(e)
        self.QM = QM

    def act(self, s, debug_mode=False):
        debug_info = {}
        if s.t < self.teacher_iters:
            debug_info["pfbms"] = []
            return (self.teacher.act(s), debug_info)

        if self.placement is None or not self.placement.is_incomplete():
            choices = self.penum.get_placement_actions(s)
            debug_info["pfbms"] = map(lambda x: x.pfbm, choices)

            if random.random() < self.eps:
                p = random.choice(choices)
            else:
                pass
                # all_actions = map(lambda x: x.minor_actions,choices)
                # all_bitmaps = map(lambda x: x.final_state.arena.bitmap[-1].flatten(),choices)
                # initial_bitmaps = [s.arena.bitmap[-1].flatten() for i in all_bitmaps]
                # print "{} choices. Bitmaps/Bitmaps/Betas: {}".format(len(choices), list(zip(initial_bitmaps,all_bitmaps,map(lambda x: self.QM.beta(s,x), choices))))
                # raise Exception("TODO")

                p = max(choices, key=lambda x: self.QM.q(s, x))
            self.placement = PartiallyCompletedPlacement(p)
        else:
            debug_info["pfbms"] = []

        return (self.placement.incr_action(), debug_info)