def __init__(self, **args):
        Agent.__init__(self, **args)
        self.nb_finishFeatures = 4
        self.nb_collisionFeatures = 4
        self.nb_actions = 9
        self.nb_features = 8
        self.epsilon = 1
        self.min_epsilon = 0.01
        self.decay = .9999
        self.discount = .9
        self.finishAgent = DqnModule(nb_features=self.nb_finishFeatures,
                                     featureExtractor=FeatureExtractor(
                                         self.layout).getSimplestFeatures,
                                     discount=self.discount)
        self.collisionAgent = DqnModule(nb_features=self.nb_collisionFeatures,
                                        featureExtractor=FeatureExtractor(
                                            self.layout).getSimplestFeatures,
                                        discount=0.8)
        self.last_saved_num = -1

        print '----------'
        print '############ GmQAgent ############'
        print 'Epsilon Decay = %s, Discount Factor = %.2f' % (self.decay,
                                                              self.discount)
        print 'Reward: Finish+Time = %.2f, Collision = %.2f, FinishTimePenalty = %.2f, CollisionTimePenalty = %.2f' % \
        (50*self.finishReward(TIME_STEP_PENALTY + FINISH_REWARD, 0), 50*self.collisionReward(TIME_STEP_PENALTY + COLLISION_PENALTY, 0), 50*self.finishReward(TIME_STEP_PENALTY, 0), 50*self.collisionReward(TIME_STEP_PENALTY, 0))
        print '----------'
        self.last_saved_num = -1
    def __init__(self, **args):
        Agent.__init__(self, **args)
        self.nb_actions = 9
        self.arbitrator_actions = 2
        self.nb_finishFeatures = 4
        self.nb_collisionFeatures = 4
        self.nb_features = 4
        self.outside_epsilon = 1.
        self.min_epsilon = 0.01

        self.outside_decay = .0
        self.arbitratorDecay = .9995

        self.finishDiscount = .9
        self.collisionDiscount = .8
        self.arbitratorDiscount = .9
        self.finishAgent = DqnModule(nb_features=self.nb_finishFeatures,
                                     featureExtractor=FeatureExtractor(
                                         self.layout).getSimplestFeatures,
                                     nb_actions=self.nb_actions,
                                     discount=self.finishDiscount)
        self.collisionAgent = DqnModule(nb_features=self.nb_collisionFeatures,
                                        featureExtractor=FeatureExtractor(
                                            self.layout).getSimplestFeatures,
                                        nb_actions=self.nb_actions,
                                        discount=self.collisionDiscount)
        self.arbitrator = DDPGModule(nb_features=self.nb_features,
                                     featureExtractor=FeatureExtractor(
                                         self.layout).getSimplestFeatures,
                                     nb_actions=self.arbitrator_actions,
                                     discount=self.arbitratorDiscount,
                                     decay=self.arbitratorDecay)
        self.last_saved_num = -1

        print '----------'
        print '############ HierarchicalDDPGAgent ############'
        print 'Outside Epsilon Decay = %f' % (self.outside_decay)
        print 'FinishAgent: Discount Factor = %.2f' % (self.finishDiscount)
        print 'CollisionAgent: Discount Factor = %.2f' % (
            self.collisionDiscount)
        print 'Arbitrator: Epsilon Decay = %f, Discount Factor = %.2f' % (
            self.arbitrator.decay, self.arbitratorDiscount)
        print 'Reward: Finish+Time = %.2f, Collision = %.2f, FinishTimePenalty = %.2f, CollisionTimePenalty = %.2f' % \
        (50*self.finishReward(TIME_STEP_PENALTY + FINISH_REWARD, 0), 50*self.collisionReward(TIME_STEP_PENALTY + COLLISION_PENALTY, 0), 50*self.finishReward(TIME_STEP_PENALTY, 0), 50*self.collisionReward(TIME_STEP_PENALTY, 0))
        print '----------'
 def __init__(self, **args):
     Agent.__init__(self, **args)
     self.epsilon = 1.0
     self.min_epsilon = 0.01
     self.decay = 0.9999
     self.nb_features = 8
     self.nb_actions = 9
     self.discount = .95
     self.Agent = DqnModule(featureExtractor=FeatureExtractor(
         self.layout).getCollisionFeatures,
                            nb_features=self.nb_features,
                            discount=self.discount)
     print '----------'
     print '############ CollisionAgent ############'
     print 'Epsilon Decay = %s, Discount Factor = %.2f' % (self.decay,
                                                           self.discount)
     print '----------'
     self.last_saved_num = -1
    def __init__(self, **args):
        Agent.__init__(self, **args)
        self.nb_actions = 9
        self.arbitrator_actions = 2
        self.nb_finishFeatures = 4
        self.nb_collisionFeatures = 4
        self.nb_features = 4
        self.min_epsilon = 0.01
        self.arbitrator_epsilon = 1.

        self.arbitrator_decay = .9995
        self.arbitratorDiscount = .9

        self.dqnAgent = DqnModule(nb_features=self.nb_features,
                                  featureExtractor=FeatureExtractor(
                                      self.layout).getSimplestFeatures,
                                  discount=self.arbitratorDiscount,
                                  nb_actions=self.nb_actions)

        self.finishAgent = DqnModule(nb_features=self.nb_finishFeatures,
                                     featureExtractor=FeatureExtractor(
                                         self.layout).getSimplestFeatures,
                                     nb_actions=self.nb_actions)
        self.collisionAgent = DqnModule(nb_features=self.nb_collisionFeatures,
                                        featureExtractor=FeatureExtractor(
                                            self.layout).getSimplestFeatures,
                                        nb_actions=self.nb_actions)
        self.arbitrator = DDPGModule(nb_features=self.nb_features,
                                     featureExtractor=FeatureExtractor(
                                         self.layout).getSimplestFeatures,
                                     nb_actions=self.arbitrator_actions,
                                     discount=self.arbitratorDiscount,
                                     decay=self.arbitrator_decay)
        self.last_saved_num = -1

        print '----------'
        print '############ SequentialDDPGAgent ############'
        print 'Arbitrator Epsilon Decay = %f, Discount Factor = %.2f' % (
            self.arbitrator.decay, self.arbitratorDiscount)
        print '----------'
    def __init__(self, extractor='IdentityExtractor', **args):
        Agent.__init__(self, **args)
        self.nb_features = 4
        self.nb_finishFeatures = 4
        self.nb_collisionFeatures = 4
        self.nb_actions = 9
        self.arbitrator_actions = 2
        self.arbitratorEpsilon = 1
        self.min_epsilon = 0.01

        self.arbitratorDecay = .9995
        self.finishDiscount = .9
        self.collisionDiscount = .8
        self.arbitratorDiscount = .9
        self.finishAgent = DqnModule(nb_features=self.nb_finishFeatures,
                                     featureExtractor=FeatureExtractor(
                                         self.layout).getSimplestFeatures,
                                     discount=self.finishDiscount,
                                     nb_actions=self.nb_actions)
        self.collisionAgent = DqnModule(nb_features=self.nb_collisionFeatures,
                                        featureExtractor=FeatureExtractor(
                                            self.layout).getSimplestFeatures,
                                        discount=self.collisionDiscount,
                                        nb_actions=self.nb_actions)
        self.arbitrator = DqnModule(nb_features=self.nb_features,
                                    featureExtractor=FeatureExtractor(
                                        self.layout).getSimplestFeatures,
                                    discount=self.arbitratorDiscount,
                                    nb_actions=self.arbitrator_actions)
        self.subModules = [self.finishAgent, self.collisionAgent]
        self.last_saved_num = -1
        self.finishAgent = self.loadModel('finishAgent_2_1999')
        self.collisionAgent = self.loadModel('collisionAgent_2_1999')

        print '----------'
        print '############ SequentialArbiQAgent ############'
        print 'Arbitrator: Decay = %f, Discount Factor = %.2f' % (
            self.arbitratorDecay, self.arbitratorDiscount)
        print '----------'
    def __init__(self, **args):
        Agent.__init__(self, **args)
        self.actor = self.loadModel('actor_3_1000')

        self.finishAgent = self.loadModel('')
        self.collisionAgent = self.loadModel('')
 def __init__(self, **args):
     Agent.__init__(self, **args)
     self.finishAgent = self.loadModel('finishAgent_2_1999')
     self.collisionAgent = self.loadModel('collisionAgent_2_1999')