def __init__(self, state, parent=None, action=None, path_cost=0, improvement = 1000.0, inside_convex= False): "Create a search tree Node, derived from a parent by an action." utils.update(self, state=state, parent=parent, action=action, path_cost=path_cost, depth=0, improvement= improvement, inside_convex= inside_convex) self.state = state if parent: self.parent = parent self.depth = parent.depth + 1 self.improvement = linfDistance([np.array(self.state[1])], [np.array(parent.state[1])], 'chebyshev')[0,0]
def epsilon_close_convex_hull(self, V_d, P_inintial, epsilon): """ the function gets vector and check if there is a vector inside P_initial which is epsilon close to V_d :param V_d: d dimensional vector :param P_inintial: array of d dimensional rows :return: True or False """ for item in xrange(P_inintial.shape[0]): dist = linfDistance([np.array(P_inintial[item, :])], [np.array(V_d)], 'chebyshev')[0,0] if dist < epsilon: return True return False
def value_iteration_with_advantages(self, _epsilon, k, noise, cluster_error, threshold, exact): """ compute value iteration use clustering on advantages :param _epsilon: stopping criteria used in classic value iteration :param k: maximum number of iteration if stopping criteria is not supported :param noise: variance of generating noise for normal distribution as N(0, noise) if user is uncertain and give uncertain responses to vector comparisons :param cluster_error: maximum distance between each point in any cluster :param threshold : stopping criteria for final value vector using advantages :returns: pair of the best value iteration response: best vector of dimension d and equal matrix of dimension nxd """ obs = open("observe-search" + ".txt", "w") print >> obs, '****************************' gather_query = [] gather_diff = [] d = self.mdp.d matrix_nd = np.zeros((self.n, d), dtype=ftype) v_d = np.zeros(d, dtype=ftype) #start with a random policy best_p_and_v_d = ({s:[random.randint(0, self.na-1)] for s in range(self.n)}, np.zeros(d, dtype=ftype)) #best_p_and_v_d = ({0: [3], 1: [3], 2: [3], 3: [3]}, np.zeros(d, dtype=ftype)) #best_p_and_v_d = ({s:[self.na-1] for s in range(self.n)}, np.zeros(d, dtype=ftype)) print "best_p_and_v_d", best_p_and_v_d delta = 0.0 queries = [] list_v_d = [] query_count = 0 #k=3 for t in range(k): print '****** t=', t, "***************" advantages_pair_vector_dic = self.mdp.calculate_advantages_labels(matrix_nd, True) print 'advantages_pair_vector_dic', advantages_pair_vector_dic print 'len(advantages_pair_vector_dic)', len(advantages_pair_vector_dic) cluster_advantages = self.accumulate_advantage_clusters(matrix_nd, advantages_pair_vector_dic, cluster_error) print "cluster_advantages", cluster_advantages policies = self.declare_policies(cluster_advantages, best_p_and_v_d[0], matrix_nd) for val in policies.itervalues(): best_p_and_v_d = self.get_best_policies(best_p_and_v_d, val, noise) # root = Tk() # T = Text(root, height=100, width=100) # T.pack() # T.insert(END, 'list of policies'+str(policies)+ '\n best_p_and_v_d' + str(best_p_and_v_d) ) # mainloop() print 'list of policies', policies print 'best_p_and_v_d', best_p_and_v_d matrix_nd = self.mdp.update_matrix(policy_p=best_p_and_v_d[0], _Uvec_nd= matrix_nd) best_v_d = best_p_and_v_d[1] #best_v_d = self.get_initial_distribution().dot(matrix_nd) print 'best_v_d', best_v_d print 'difference', linfDistance([np.array(best_v_d)], [np.array(exact)], 'chebyshev')[0,0] print "*************************" delta = linfDistance([np.array(best_v_d)], [np.array(v_d)], 'chebyshev')[0,0] gather_query.append(self.query_counter_with_advantages) gather_diff.append(abs( sum(a*b for a,b in zip(list(self.get_Lambda()), list(best_v_d))) - \ sum(a*b for a,b in zip(list(self.get_Lambda()), list(exact))) ) ) print >> obs,'delta', delta, " query", self.query_counter_with_advantages,\ " difference",linfDistance([np.array(best_v_d)], [np.array(exact)], 'chebyshev')[0,0], \ obs.flush() if query_count!= self.query_counter_with_advantages: queries.append(query_count) list_v_d.append(v_d) query_count = self.query_counter_with_advantages if delta < threshold: queries.append(query_count) list_v_d.append(best_v_d) #return (list_v_d, queries) print "best_p_and_v_d", best_p_and_v_d return (list_v_d, self.Lambda_inequalities, gather_query , gather_diff, best_v_d) #return (best_v_d, self.query_counter_with_advantages) else: v_d = best_v_d queries.append(query_count) list_v_d.append(best_v_d) #return (list_v_d, queries) print "best_p_and_v_d", best_p_and_v_d return (list_v_d, self.Lambda_inequalities, gather_query, gather_diff, best_v_d)
def value_iteration_weng(self, k, noise, threshold, exact): """ this function find the optimal v_bar of dimension d using Interactive value iteration method :param k: max number of iteration :param noise: user noise variance :param threshold: the stopping criteria value :return: it list f d-dimensional vectors after any posing any query to the user. the last vector in list is the optimal value solution of algorithm. """ obs = open("observe-search" + ".txt", "w") print >> obs, '***************************' gather_query = [] gather_diff = [] n, na, d =self.mdp.nstates , self.mdp.nactions, self.mdp.d Uvec_old_nd = np.zeros( (n,d) , dtype=ftype) delta = 0.0 vector_list_d = [] query_count = self.query_counter_ queries = [] for t in range(k): Uvec_nd = np.zeros((n,d), dtype=ftype) for s in range(n): _V_best_d = np.zeros(d, dtype=ftype) for a in range(na): #compute Q function Q_d = self.mdp.get_vec_Q(s, a, Uvec_old_nd) _V_best_d = self.get_best(_V_best_d, Q_d, _noise= noise) Uvec_nd[s] = _V_best_d Uvec_final_d = self.get_initial_distribution().dot(Uvec_nd) Uvec_old_d = self.get_initial_distribution().dot(Uvec_old_nd) delta = linfDistance([np.array(Uvec_final_d)], [np.array(Uvec_old_d)], 'chebyshev')[0,0] gather_query.append(self.query_counter_) gather_diff.append(abs( sum(a*b for a,b in zip(list(self.get_Lambda()), list(Uvec_final_d))) - \ sum(a*b for a,b in zip(list(self.get_Lambda()), list(exact)))) ) print >> obs,'delta', delta, " query", self.query_counter_, \ " difference ",linfDistance([np.array(Uvec_final_d)], [np.array(exact)], 'chebyshev')[0,0] obs.flush() if query_count != self.query_counter_: queries.append(query_count) vector_list_d.append(Uvec_old_d) query_count = self.query_counter_ if delta <threshold: queries.append(query_count) vector_list_d.append(Uvec_final_d) return(vector_list_d, self.Lambda_inequalities, gather_query, gather_diff, Uvec_final_d) #return (vector_list_d, queries) #return (Uvec_final_d, self.query_counter_) else: Uvec_old_nd = Uvec_nd queries.append(query_count) vector_list_d.append(Uvec_final_d) return(vector_list_d, self.Lambda_inequalities, gather_query, gather_diff,Uvec_final_d )
def value_iteration_weng(self, k, noise, threshold, exact, _error_exat_approx=None): """ this function find the optimal v_bar of dimension d using Interactive value iteration method :param k: max number of iteration :param noise: user noise variance :param threshold: the stopping criteria value :param exact: the weight vector used to simulate user answers to queries. :return: it list f d-dimensional vectors after any posing any query to the user. the last vector in list is the optimal value solution of algorithm. """ gather_query = [] gather_diff = [] self.query_counter_ = 0 n, na, d = self.mdp.nstates, self.mdp.nactions, self.mdp.d Uvec_old_nd = np.zeros((n, d), dtype=ftype) Uvec_nd = np.zeros((n, d), dtype=ftype) delta = 0.0 # seems useless and harmless for t in range(k): # print t, if t % 50 == 0: print "" # Uvec_nd = np.zeros((n, d), dtype=ftype) for s in range(n): _V_best_d = np.zeros(d, dtype=ftype) for a in range(na): # compute Q function # Q_d = self.mdp.get_vec_Q(s, a, Uvec_old_nd) Q_d = self.mdp.get_vec_Q(s, a, Uvec_nd) _V_best_d = self.get_best(_V_best_d, Q_d, _noise=noise) Uvec_nd[s] = _V_best_d Uvec_temp = self.get_initial_distribution().dot(Uvec_nd) gather_query.append(self.query_counter_) gather_diff.append(abs(np.dot(self.get_Lambda(), Uvec_temp) - np.dot(self.get_Lambda(), exact))) Uvec_final_d = self.get_initial_distribution().dot(Uvec_nd) Uvec_old_d = self.get_initial_distribution().dot(Uvec_old_nd) delta = linfDistance([np.array(Uvec_final_d)], [np.array(Uvec_old_d)], "chebyshev")[0, 0] # gather_query.append(self.query_counter_) # gather_diff.append(abs( np.dot(self.get_Lambda(),Uvec_final_d) - np.dot(self.get_Lambda(), exact))) # temporary: just for approximation project to harmonize a stopping criteria regarding aproximate error if _error_exat_approx: new_delta = gather_diff[-1] if new_delta < _error_exat_approx: return Uvec_final_d, gather_query, gather_diff, t else: Uvec_old_nd = Uvec_nd.copy() # gather_diff.append(linfDistance( [np.array(Uvec_final_d)] , [np.array(exact)], 'chebyshev')[0,0]) # gather_diff.append(delta) # problem de side effect print >> self.wen, "iteration = ", t, "query =", gather_query[-1], " error= ", gather_diff[-1], " +" if ( len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1] ) else " " if not _error_exat_approx: if delta < threshold: self.prob.write("show-LdominanceWeng.lp") return Uvec_final_d, gather_query, gather_diff, t else: Uvec_old_nd = Uvec_nd.copy() print >> self.wen, "iteration = ", t, "query =", gather_query[-1], " error= ", gather_diff[-1], "+ " if ( len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1] ) else " " return Uvec_final_d, gather_query, gather_diff, t
def value_iteration_with_advantages(self, limit, noise, cluster_threshold, min_change, exact): """ best_policyvaluepair is a pair made of a dictionary of state:action items and a value vector of size d. :param limit: max number of iterations :param noise: a vector of size d, none if no noise :param cluster_threshold: the threshold to build clusters (max distance between two of its vectors) :param min_change: iteration stops when the value changes less than this min :param exact: the weights (lambda vector) used to simulate users answers to queries. :return: """ gather_query = [] gather_diff = [] gather_clusters = [] self.adv = advantage.Advantage(self.mdp, cluster_threshold) d = self.mdp.d currentUvecs_nd = np.zeros((self.nstates, d), dtype=ftype) # initial value vector per state previousvalue_d = np.zeros(d, dtype=ftype) # a value vector # initial policy-value node: best_policyvaluepair = [{s: [random.randint(0, self.nactions - 1)] for s in range(self.nstates)}, np.zeros(d, dtype=ftype)] currenvalue_d = best_policyvaluepair[1] # limit = 1 for t in range(limit): # computes all the advantages in a dictionary {(state, action):vector ...} advantages_dic = self.mdp.calculate_advantages_dic(currentUvecs_nd, True) # removes advantages equal to vector 0 advantages_dic = self.adv.clean_Points(advantages_dic) if advantages_dic == {}: print "dictionaire vide" return currenvalue_d, gather_query, gather_diff # feeds into internal class format advantages_dic = self.adv.AdvantagesDict(advantages_dic) # computes a dictionary of clusters, where each cluster is a pair ([(s,a)...], V) (the list of (s,a) in the # cluster, and the sum of the (vectorial) advantages and the previous \beta(s) \dot \bar V(s) clusters_dic = self.adv.accumulate_advantage_clusters(currentUvecs_nd, advantages_dic, cluster_threshold) # policies = self.declare_policies(clusters_dic, best_policyvaluepair[0], currentUvecs_nd) # only replaces actions in the best policy by actions in the cluster when their state is the same policies = self.adv.declare_policies(clusters_dic, best_policyvaluepair[0]) # after merge Pegah*** #advantages_pair_vector_dic = self.mdp.calculate_advantages_labels(matrix_nd, True) #cluster_advantages = self.adv.accumulate_advantage_clusters(matrix_nd, advantages_pair_vector_dic, # cluster_error) #policies = self.adv.declare_policies(cluster_advantages, best_p_and_v_d[0]) # after merge Pegah*** # Updates the best (policy, value) pair. The value inherited from the previous iteration is fist cleaned # to protects against keeping the (policy, value) pair from previous iteration best_policyvaluepair = [best_policyvaluepair[0], np.zeros(d, dtype=ftype)] for val in policies.itervalues(): best_policyvaluepair = self.get_best_policies(best_policyvaluepair, val, noise) #print t, ":", len(best_policyvaluepair[0]), if t%25 == 0: print currentUvecs_nd = self.mdp.update_matrix(policy_p=best_policyvaluepair[0], _Uvec_nd=currentUvecs_nd) currenvalue_d = best_policyvaluepair[1] delta = linfDistance([np.array(currenvalue_d)], [np.array(previousvalue_d)], 'chebyshev')[0, 0] gather_query.append(self.query_counter_) gather_diff.append(self.Lambda.dot(exact) - self.Lambda.dot(currenvalue_d)) gather_clusters.append(self.adv.nbclusters) print >> self.wen, "iteration = ", t, "query =", gather_query[len(gather_query)-1] , \ "clusters =", self.adv.nbclusters, "error= ", gather_diff[len(gather_diff)-1], \ " +" if (len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1]) else " " if delta < min_change: self.prob.write("show-LdominanceAvi.lp") print "\n", exact print currenvalue_d print self.adv.get_initial_distribution().dot(currentUvecs_nd) return currenvalue_d, gather_query, gather_diff, gather_clusters, hullsuccess, hullexcept, t else: previousvalue_d = currenvalue_d.copy() print >> self.wen, "iteration = ", t, "query =", gather_query[-1] , \ "clusters =", self.adv.nbclusters," error= ", gather_diff[-1],\ " +" if (len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1]) else "" # noinspection PyUnboundLocalVariable return currenvalue_d, gather_query, gather_diff, gather_clusters, hullsuccess, hullexcept, t