Esempio n. 1
0
 def __init__(self, state, parent=None, action=None, path_cost=0, improvement = 1000.0, inside_convex= False):
     "Create a search tree Node, derived from a parent by an action."
     utils.update(self, state=state, parent=parent, action=action,
            path_cost=path_cost, depth=0, improvement= improvement, inside_convex= inside_convex)
     self.state = state
     if parent:
         self.parent = parent
         self.depth = parent.depth + 1
         self.improvement = linfDistance([np.array(self.state[1])], [np.array(parent.state[1])], 'chebyshev')[0,0]
Esempio n. 2
0
    def epsilon_close_convex_hull(self, V_d, P_inintial, epsilon):
        """
        the function gets vector and check if there is a vector inside P_initial which is epsilon close to V_d
        :param V_d: d dimensional vector
        :param P_inintial: array of d dimensional rows
        :return: True or False
        """
        for item in xrange(P_inintial.shape[0]):
            dist = linfDistance([np.array(P_inintial[item, :])], [np.array(V_d)], 'chebyshev')[0,0]
            if dist < epsilon:
                return True

        return False
Esempio n. 3
0
    def value_iteration_with_advantages(self, _epsilon, k, noise, cluster_error, threshold, exact):

        """
        compute value iteration use clustering on advantages

        :param _epsilon: stopping criteria used in classic value iteration
        :param k: maximum number of iteration if stopping criteria is not supported
        :param noise: variance of generating noise for normal distribution as N(0, noise) if user is uncertain and give
                      uncertain responses to vector comparisons
        :param cluster_error: maximum distance between each point in any cluster
        :param threshold : stopping criteria for final value vector using advantages
        :returns: pair of the best value iteration response: best vector of dimension d and equal matrix of dimension nxd
        """

        obs = open("observe-search" + ".txt", "w")
        print >> obs, '****************************'

        gather_query = []
        gather_diff = []

        d = self.mdp.d
        matrix_nd = np.zeros((self.n, d), dtype=ftype)
        v_d = np.zeros(d, dtype=ftype)

        #start with a random policy
        best_p_and_v_d = ({s:[random.randint(0, self.na-1)] for s in range(self.n)}, np.zeros(d, dtype=ftype))
        #best_p_and_v_d = ({0: [3], 1: [3], 2: [3], 3: [3]}, np.zeros(d, dtype=ftype))
        #best_p_and_v_d = ({s:[self.na-1] for s in range(self.n)}, np.zeros(d, dtype=ftype))

        print "best_p_and_v_d", best_p_and_v_d

        delta = 0.0

        queries = []
        list_v_d = []
        query_count = 0

        #k=3
        for t in range(k):
            print '****** t=', t, "***************"

            advantages_pair_vector_dic = self.mdp.calculate_advantages_labels(matrix_nd, True)

            print 'advantages_pair_vector_dic', advantages_pair_vector_dic
            print 'len(advantages_pair_vector_dic)', len(advantages_pair_vector_dic)

            cluster_advantages = self.accumulate_advantage_clusters(matrix_nd, advantages_pair_vector_dic, cluster_error)

            print "cluster_advantages", cluster_advantages

            policies = self.declare_policies(cluster_advantages, best_p_and_v_d[0], matrix_nd)

            for val in policies.itervalues():
                best_p_and_v_d = self.get_best_policies(best_p_and_v_d, val, noise)

            # root = Tk()
            # T = Text(root, height=100, width=100)
            # T.pack()
            # T.insert(END, 'list of policies'+str(policies)+ '\n best_p_and_v_d' + str(best_p_and_v_d) )
            # mainloop()

            print 'list of policies', policies
            print 'best_p_and_v_d', best_p_and_v_d

            matrix_nd = self.mdp.update_matrix(policy_p=best_p_and_v_d[0], _Uvec_nd= matrix_nd)
            best_v_d = best_p_and_v_d[1]
            #best_v_d = self.get_initial_distribution().dot(matrix_nd)

            print 'best_v_d', best_v_d
            print 'difference', linfDistance([np.array(best_v_d)], [np.array(exact)], 'chebyshev')[0,0]

            print "*************************"

            delta = linfDistance([np.array(best_v_d)], [np.array(v_d)], 'chebyshev')[0,0]

            gather_query.append(self.query_counter_with_advantages)
            gather_diff.append(abs( sum(a*b for a,b in zip(list(self.get_Lambda()), list(best_v_d))) - \
                         sum(a*b for a,b in zip(list(self.get_Lambda()), list(exact))) ) )

            print >> obs,'delta', delta, "      query", self.query_counter_with_advantages,\
                "    difference",linfDistance([np.array(best_v_d)], [np.array(exact)], 'chebyshev')[0,0], \
            obs.flush()

            if query_count!= self.query_counter_with_advantages:
                queries.append(query_count)
                list_v_d.append(v_d)
                query_count = self.query_counter_with_advantages

            if delta < threshold:
                queries.append(query_count)
                list_v_d.append(best_v_d)
                #return (list_v_d, queries)
                print "best_p_and_v_d", best_p_and_v_d
                return (list_v_d, self.Lambda_inequalities, gather_query , gather_diff, best_v_d)
                #return (best_v_d, self.query_counter_with_advantages)
            else:
                v_d = best_v_d

        queries.append(query_count)
        list_v_d.append(best_v_d)
        #return (list_v_d, queries)
        print "best_p_and_v_d", best_p_and_v_d
        return (list_v_d, self.Lambda_inequalities, gather_query, gather_diff, best_v_d)
Esempio n. 4
0
    def value_iteration_weng(self, k, noise, threshold, exact):
        """
        this function find the optimal v_bar of dimension d using Interactive value iteration method
        :param k: max number of iteration
        :param noise: user noise variance
        :param threshold: the stopping criteria value
        :return: it list f d-dimensional vectors after any posing any query to the user. the last vector in list is the
        optimal value solution of algorithm.
        """

        obs = open("observe-search" + ".txt", "w")
        print >> obs, '***************************'

        gather_query = []
        gather_diff = []

        n, na, d =self.mdp.nstates , self.mdp.nactions, self.mdp.d
        Uvec_old_nd = np.zeros( (n,d) , dtype=ftype)

        delta = 0.0

        vector_list_d = []
        query_count = self.query_counter_
        queries = []

        for t in range(k):
            Uvec_nd = np.zeros((n,d), dtype=ftype)

            for s in range(n):
                _V_best_d = np.zeros(d, dtype=ftype)
                for a in range(na):
                    #compute Q function
                    Q_d       = self.mdp.get_vec_Q(s, a, Uvec_old_nd)
                    _V_best_d = self.get_best(_V_best_d, Q_d, _noise= noise)

                Uvec_nd[s] = _V_best_d

            Uvec_final_d = self.get_initial_distribution().dot(Uvec_nd)
            Uvec_old_d = self.get_initial_distribution().dot(Uvec_old_nd)
            delta = linfDistance([np.array(Uvec_final_d)], [np.array(Uvec_old_d)], 'chebyshev')[0,0]

            gather_query.append(self.query_counter_)
            gather_diff.append(abs( sum(a*b for a,b in zip(list(self.get_Lambda()), list(Uvec_final_d))) - \
                         sum(a*b for a,b in zip(list(self.get_Lambda()), list(exact)))) )

            print >> obs,'delta', delta, "      query", self.query_counter_, \
                "   difference      ",linfDistance([np.array(Uvec_final_d)], [np.array(exact)], 'chebyshev')[0,0]
            obs.flush()

            if query_count != self.query_counter_:
                queries.append(query_count)
                vector_list_d.append(Uvec_old_d)
                query_count = self.query_counter_

            if delta <threshold:
                queries.append(query_count)
                vector_list_d.append(Uvec_final_d)
                return(vector_list_d, self.Lambda_inequalities, gather_query, gather_diff, Uvec_final_d)
                #return (vector_list_d, queries)
                #return (Uvec_final_d, self.query_counter_)
            else:
                Uvec_old_nd = Uvec_nd

        queries.append(query_count)
        vector_list_d.append(Uvec_final_d)
        return(vector_list_d, self.Lambda_inequalities, gather_query, gather_diff,Uvec_final_d )
Esempio n. 5
0
    def value_iteration_weng(self, k, noise, threshold, exact, _error_exat_approx=None):
        """
        this function find the optimal v_bar of dimension d using Interactive value iteration method
        :param k: max number of iteration
        :param noise: user noise variance
        :param threshold: the stopping criteria value
        :param exact: the weight vector used to simulate user answers to queries.
        :return: it list f d-dimensional vectors after any posing any query to the user. the last vector in list is the
        optimal value solution of algorithm.
        """

        gather_query = []
        gather_diff = []
        self.query_counter_ = 0

        n, na, d = self.mdp.nstates, self.mdp.nactions, self.mdp.d
        Uvec_old_nd = np.zeros((n, d), dtype=ftype)
        Uvec_nd = np.zeros((n, d), dtype=ftype)

        delta = 0.0  # seems useless and harmless

        for t in range(k):
            # print t,
            if t % 50 == 0:
                print ""
            # Uvec_nd = np.zeros((n, d), dtype=ftype)

            for s in range(n):
                _V_best_d = np.zeros(d, dtype=ftype)
                for a in range(na):
                    # compute Q function
                    # Q_d = self.mdp.get_vec_Q(s, a, Uvec_old_nd)
                    Q_d = self.mdp.get_vec_Q(s, a, Uvec_nd)
                    _V_best_d = self.get_best(_V_best_d, Q_d, _noise=noise)

                Uvec_nd[s] = _V_best_d

                Uvec_temp = self.get_initial_distribution().dot(Uvec_nd)
                gather_query.append(self.query_counter_)
                gather_diff.append(abs(np.dot(self.get_Lambda(), Uvec_temp) - np.dot(self.get_Lambda(), exact)))

            Uvec_final_d = self.get_initial_distribution().dot(Uvec_nd)
            Uvec_old_d = self.get_initial_distribution().dot(Uvec_old_nd)
            delta = linfDistance([np.array(Uvec_final_d)], [np.array(Uvec_old_d)], "chebyshev")[0, 0]

            # gather_query.append(self.query_counter_)
            # gather_diff.append(abs( np.dot(self.get_Lambda(),Uvec_final_d) - np.dot(self.get_Lambda(), exact)))

            # temporary: just for approximation project to harmonize a stopping criteria regarding aproximate error
            if _error_exat_approx:
                new_delta = gather_diff[-1]
                if new_delta < _error_exat_approx:
                    return Uvec_final_d, gather_query, gather_diff, t
                else:
                    Uvec_old_nd = Uvec_nd.copy()

            # gather_diff.append(linfDistance( [np.array(Uvec_final_d)] , [np.array(exact)], 'chebyshev')[0,0])
            # gather_diff.append(delta) # problem de side effect

            print >> self.wen, "iteration = ", t, "query =", gather_query[-1], " error= ", gather_diff[-1], " +" if (
                len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1]
            ) else " "

            if not _error_exat_approx:
                if delta < threshold:
                    self.prob.write("show-LdominanceWeng.lp")
                    return Uvec_final_d, gather_query, gather_diff, t
                else:
                    Uvec_old_nd = Uvec_nd.copy()

        print >> self.wen, "iteration = ", t, "query =", gather_query[-1], " error= ", gather_diff[-1], "+ " if (
            len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1]
        ) else " "

        return Uvec_final_d, gather_query, gather_diff, t
    def value_iteration_with_advantages(self, limit, noise, cluster_threshold, min_change, exact):
        """
        best_policyvaluepair is a pair made of a dictionary of state:action items and a value vector of size d.
        :param limit: max number of iterations
        :param noise: a vector of size d, none if no noise
        :param cluster_threshold: the threshold to build clusters (max distance between two of its vectors)
        :param min_change: iteration stops when the value changes less than this min
        :param exact: the weights (lambda vector) used to simulate users answers to queries.
        :return:
        """

        gather_query = []
        gather_diff = []
        gather_clusters = []
        self.adv = advantage.Advantage(self.mdp, cluster_threshold)

        d = self.mdp.d
        currentUvecs_nd = np.zeros((self.nstates, d), dtype=ftype) # initial value vector per state
        previousvalue_d = np.zeros(d, dtype=ftype) # a value vector

        # initial policy-value node:
        best_policyvaluepair = [{s: [random.randint(0, self.nactions - 1)] for s in range(self.nstates)},
                                np.zeros(d, dtype=ftype)]
        currenvalue_d = best_policyvaluepair[1]

        # limit = 1
        for t in range(limit):
            # computes all the advantages in a dictionary {(state, action):vector ...}
            advantages_dic = self.mdp.calculate_advantages_dic(currentUvecs_nd, True)
            # removes advantages equal to vector 0
            advantages_dic = self.adv.clean_Points(advantages_dic)
            if advantages_dic == {}:
                print "dictionaire vide"
                return currenvalue_d, gather_query, gather_diff
            # feeds into internal class format
            advantages_dic = self.adv.AdvantagesDict(advantages_dic)
            # computes a dictionary of clusters, where each cluster is a pair ([(s,a)...], V) (the list of (s,a) in the
            # cluster, and the sum of the (vectorial) advantages and the previous \beta(s) \dot \bar V(s)
            clusters_dic = self.adv.accumulate_advantage_clusters(currentUvecs_nd, advantages_dic, cluster_threshold)
            # policies = self.declare_policies(clusters_dic, best_policyvaluepair[0], currentUvecs_nd)
            # only replaces actions in the best policy by actions in the cluster when their state is the same
            policies = self.adv.declare_policies(clusters_dic, best_policyvaluepair[0])
            
            # after merge Pegah***
            #advantages_pair_vector_dic = self.mdp.calculate_advantages_labels(matrix_nd, True)
            #cluster_advantages = self.adv.accumulate_advantage_clusters(matrix_nd, advantages_pair_vector_dic,
            #                                                        cluster_error)
            #policies = self.adv.declare_policies(cluster_advantages, best_p_and_v_d[0])
            # after merge Pegah***

            # Updates the best (policy, value) pair. The value inherited from the previous iteration is fist cleaned
            # to protects against keeping the (policy, value) pair from previous iteration
            best_policyvaluepair = [best_policyvaluepair[0], np.zeros(d, dtype=ftype)]
            for val in policies.itervalues():
                best_policyvaluepair = self.get_best_policies(best_policyvaluepair, val, noise)

            #print t, ":", len(best_policyvaluepair[0]),
            if t%25 == 0:
                print
            currentUvecs_nd = self.mdp.update_matrix(policy_p=best_policyvaluepair[0], _Uvec_nd=currentUvecs_nd)
            currenvalue_d = best_policyvaluepair[1]

            delta = linfDistance([np.array(currenvalue_d)], [np.array(previousvalue_d)], 'chebyshev')[0, 0]

            gather_query.append(self.query_counter_)
            gather_diff.append(self.Lambda.dot(exact) - self.Lambda.dot(currenvalue_d))
            gather_clusters.append(self.adv.nbclusters)

            print >> self.wen,  "iteration = ", t, "query =", gather_query[len(gather_query)-1] , \
                "clusters =", self.adv.nbclusters, "error= ", gather_diff[len(gather_diff)-1], \
                " +" if (len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1]) else " "

            if delta < min_change:
                self.prob.write("show-LdominanceAvi.lp")
                print "\n", exact
                print currenvalue_d
                print self.adv.get_initial_distribution().dot(currentUvecs_nd)
                return currenvalue_d, gather_query, gather_diff, gather_clusters, hullsuccess, hullexcept, t
            else:
                previousvalue_d = currenvalue_d.copy()

        print >> self.wen,  "iteration = ", t, "query =", gather_query[-1] ,  \
                "clusters =", self.adv.nbclusters," error= ", gather_diff[-1],\
            " +" if (len(gather_diff) > 2 and gather_diff[-2] < gather_diff[-1]) else ""

        # noinspection PyUnboundLocalVariable
        return currenvalue_d, gather_query, gather_diff, gather_clusters, hullsuccess, hullexcept, t