def __init__(self, epsilon, lambda_=0.5): self.feedSrv = rospy.Service('SuturoMlHeadNextAction', SuturoMlNextAction, self.nextActionCallback) self.policyPringPub = rospy.Publisher('SuturoMlPolicy', String, queue_size=10, latch=True) self.policy = [] # self.q = defaultdict(lambda : 10) # self.actions = filter(lambda x: x[0].startswith('Const'), [(a,s) for a,s in vars(SuturoMlAction).iteritems()]) self.actions = ["GRAB-SIDE blue_handle", "GRAB-SIDE red_cube", "TURN", "OPEN-GRIPPER", "GRAB-TOP blue_handle", "GRAB-TOP red_cube", "PLACE-IN-ZONE"] self.q = None self.policyMaker = EpsilonGreedyPolicy(self.q, self.actions, epsilon) # self.policyMaker = Haxx0rPolicy(self.actions) self.learner = SarsaLambdaLearner(self.policyMaker, l=lambda_) self.q = self.learner.get_q() self.policyMaker.updateQ(self.q) # self.policyMaker = ReverseGreedyPolicy(self.q, self.actions) rospy.wait_for_service('json_prolog/simple_query') self.prolog = Prolog() print("SuturoMlHeadLearnerPolicyFeeder started.")