def __init__(self, epsilon, lambda_=0.5): self.feedSrv = rospy.Service('SuturoMlHeadNextAction', SuturoMlNextAction, self.nextActionCallback) self.policyPringPub = rospy.Publisher('SuturoMlPolicy', String, queue_size=10, latch=True) self.policy = [] # self.q = defaultdict(lambda : 10) # self.actions = filter(lambda x: x[0].startswith('Const'), [(a,s) for a,s in vars(SuturoMlAction).iteritems()]) self.actions = ["GRAB-SIDE blue_handle", "GRAB-SIDE red_cube", "TURN", "OPEN-GRIPPER", "GRAB-TOP blue_handle", "GRAB-TOP red_cube", "PLACE-IN-ZONE"] self.q = None self.policyMaker = EpsilonGreedyPolicy(self.q, self.actions, epsilon) # self.policyMaker = Haxx0rPolicy(self.actions) self.learner = SarsaLambdaLearner(self.policyMaker, l=lambda_) self.q = self.learner.get_q() self.policyMaker.updateQ(self.q) # self.policyMaker = ReverseGreedyPolicy(self.q, self.actions) rospy.wait_for_service('json_prolog/simple_query') self.prolog = Prolog() print("SuturoMlHeadLearnerPolicyFeeder started.")
class SuturoMlHeadLearner(object): def __init__(self, epsilon, lambda_=0.5): self.feedSrv = rospy.Service('SuturoMlHeadNextAction', SuturoMlNextAction, self.nextActionCallback) self.policyPringPub = rospy.Publisher('SuturoMlPolicy', String, queue_size=10, latch=True) self.policy = [] # self.q = defaultdict(lambda : 10) # self.actions = filter(lambda x: x[0].startswith('Const'), [(a,s) for a,s in vars(SuturoMlAction).iteritems()]) self.actions = ["GRAB-SIDE blue_handle", "GRAB-SIDE red_cube", "TURN", "OPEN-GRIPPER", "GRAB-TOP blue_handle", "GRAB-TOP red_cube", "PLACE-IN-ZONE"] self.q = None self.policyMaker = EpsilonGreedyPolicy(self.q, self.actions, epsilon) # self.policyMaker = Haxx0rPolicy(self.actions) self.learner = SarsaLambdaLearner(self.policyMaker, l=lambda_) self.q = self.learner.get_q() self.policyMaker.updateQ(self.q) # self.policyMaker = ReverseGreedyPolicy(self.q, self.actions) rospy.wait_for_service('json_prolog/simple_query') self.prolog = Prolog() print("SuturoMlHeadLearnerPolicyFeeder started.") def nextActionCallback(self, nextActionRequest): r = SuturoMlNextActionResponse() r.action.action = self.policyMaker.getNextAction(nextActionRequest.state) return r def doTheShit(self): q = self.prolog.query("suturo_learning:get_learning_sequence(A)") print("start learning") for solution in q.solutions(): # print sol policy = solution["A"] self.q = self.learner.learn(policy) self.policyMaker.updateQ(self.q) print("learning done.\n") ppp = defaultdict(lambda : ((-999999999999,-999999999999),)) tmp_q = deepcopy(self.q) for s in self.q.iterkeys(): state = s[0] for a in self.actions: b = tmp_q[(state,a)] if ppp[state][0][1] == b and not ppp[state].__contains__((a,b)): ppp[state] = ppp[state] + ((a,b),) elif ppp[state][0][1] < b: ppp[state] = ((a,b),) # for a,b in self.q.iteritems(): # print a,b muh = [] for a,b in ppp.iteritems(): muh.append((a, b)) def cmpmuh(x,y): for i in range(len(x[0])): if x[0][i] > y[0][i]: return 1 elif x[0][i] < y[0][i]: return -1 if x[1][1] > y[1][1]: return 1 elif x[1][1] < y[1][1]: return -1 return 0 muh.sort(cmp=cmpmuh) msg = "" for a in muh: print a msg += str(a[0]) +"\n" +str(a[1]) + "\n\n" s = String(msg) self.policyPringPub.publish(s) def pub_policy(self): pass