Python sparse_dot 예제들, sparseToolsDict.sparse_dot Python 예제들

예제 #1

0

파일 보기

def error(w,l,sample,sampleSize):
    norm = l*std.sparse_dot(w,w)
    sum = 0
    for i in range(sampleSize):
        label = sample[i].get(-1,0)
        example = std.take_out_label(sample[i])
        sum += max(0,1-label*std.sparse_dot(w,example))
    cost = norm + sum
    return cost

예제 #2

0

파일 보기

 def __init__(self):
     # An iterator that will count the number of clients that contact the
     # server at each epoch.
     self.iterator = 0
     # A barrier condition to be sure that every waited client contacted
     # the server before to start the GetFeature method (kind of join).
     self.enter_condition = (self.iterator == nbClients)
     # An other barrier condition, that acts like a join on the threads to.
     self.exit_condition = (self.iterator == 0)
     # A list to store all the vectors sent by each client at each epoch.
     self.vectors = []
     # The current epoch (0 -> send the data to the clients).
     self.epoch = 0
     # The previous vecor of parameters : the last that had been sent.
     self.oldParam = w0
     # Norm of gradW0
     self.normGW0 = math.sqrt(std.sparse_dot(gW0, gW0))
     # The name of one of the thread executing GetFeature : this one, and
     # only this one will something about the state of the computation in
     # the server.
     self.printerThreadName = ''
     # The final vector of parameters we find
     self.paramVector = {}
     # Error on the testing set, computed at each cycle of the server
     self.testingErrors = []
     # Step of the descent
     self.step = step
     # Keep all the merged vectors
     self.merged = [w0]
     # Error on the training set, computed at each cycle of the server
     self.trainingErrors = []
     #Number of bytes send bu clients at each epoch
     self.bytesTab = {}

예제 #3

0

파일 보기

def der_error(w,l,sample,sampleSize):
    d = std.sparse_mult(l, w)
    sum = {}
    for i in range(sampleSize):
        label = sample[i].get(-1,0)
        example = std.take_out_label(sample[i])
        if (label*(std.sparse_dot(w,example)) < 1):
            sum = std.sparse_vsum(sum,std.sparse_mult(-label,example))
    #print("der_err summ part = " + str(sum))
    dcost = std.sparse_vsum(d,sum)
    return dcost

예제 #4

0

파일 보기

def guide_get_feature(stub):

    # A variable to count the number of iteration of the client, which must coincide with the epoch in the server.
    it = 1

    # We make a first call to the server to get the data : after that call, vect is the data set. Then we store it.
    vect = stub.GetFeature(route_guide_pb2.Vector(poids="pret"))
    dataInfo = vect.poids.split("<depre>")
    nbChunks = int(dataInfo[0])

    computeInfo = dataInfo[1].split("<samples>")

    # The depreciation of the SVM norm cost
    l = float(computeInfo[0])

    # Number of samples in each subtraining set
    numSamples = float(computeInfo[1])

    # Get the dataset from the server, by receiving all the chunks
    k = 1
    dataSampleSet = []
    while (k <= nbChunks):
        vect = stub.GetFeature(route_guide_pb2.Vector(poids="chunk<nb>" + str(k)))
        dataSampleSet += std.str2datadict(vect.poids)
        k +=1

    # This second call serves to get the departure vector.
    vect = stub.GetFeature(route_guide_pb2.Vector(poids="getw0"))


    while (vect.poids != 'stop'):

        print("iteration : " + str(it))

        # We save the vector on which we base the computations
        wt = std.str2dict(vect.poids)

        # Gradient descent on the sample.
        nw = sgd.descent(dataSampleSet, std.str2dict(vect.poids), numSamples, l)

        # Normalization of the vector of parameters
        normnW = math.sqrt(std.sparse_dot(nw, nw))
        nw = std.sparse_mult(1/normnW, nw)

        # The result is sent to the server.
        vect.poids = std.dict2str(nw) + "<delay>" + std.dict2str(wt)
        vect = stub.GetFeature(route_guide_pb2.Vector(poids=vect.poids))

        it += 1

        #time.sleep(1.7)

    print(vect)

예제 #5

0

파일 보기

    def GetFeature(self, request, context):

        ######################################################################
        # Section 1 : wait for all the clients -> get their vectors and
        # appoint one of them as the printer.

        self.iterator += 1
        if (request.poids == "pret" or request.poids == "getw0"):
            self.vectors.append(request.poids)
        else:
            entry = request.poids.split("<bytes>")
            b = int(entry[1])
            if (self.epoch in self.bytesTab):
                self.bytesTab[self.epoch] += b
            else:
                self.bytesTab[self.epoch] = b
            v = std.str2dict(entry[0])
            self.vectors.append(v)
        self.enter_condition = (self.iterator == nbClients)
        waiting.wait(lambda: self.enter_condition)

        self.printerThreadName = threading.current_thread().name

        ######################################################################

        ######################################################################
        # Section 2 : compute the new vector -> send the data, a merge of
        # all the vectors we got from the clients or the message 'stop' the
        # signal to the client that we converged.

        normDiff = 0
        normGradW = 0
        normPrecW = 0
        if (request.poids == 'pret'):
            vector = std.datadict2Sstr(trainingSet) + "<samples>" + str(
                numSamples) + "<#compo>" + str(nbCompo)
        elif (request.poids == 'getw0'):
            vector = std.dict2str(w0) + "<<||>>" + str(self.step)
        else:
            # Modification of the vector of parameters
            gradParam = std.mergeSGD(self.vectors)
            vector = std.sparse_vsous(self.oldParam, gradParam)
            # Normalization of the vector of parameters
            normW = math.sqrt(std.sparse_dot(vector, vector))
            vector = std.sparse_mult(1 / normW, vector)
            # Checking of the stoping criterion
            diff = std.sparse_vsous(self.oldParam, vector)
            normDiff = math.sqrt(std.sparse_dot(diff, diff))
            normGradW = math.sqrt(std.sparse_dot(gradParam, gradParam))
            normPrecW = math.sqrt(std.sparse_dot(self.oldParam, self.oldParam))
            if ((normDiff <= c1 * normPrecW) or (self.epoch > nbMaxCall)
                    or (normGradW <= c2 * self.normGW0)):
                self.paramVector = vector
                vector = 'stop'
            else:
                vector = std.dict2str(vector) + "<<||>>" + str(self.step)

        ######################################################################

        ######################################################################
        # Section 3 : wait that all the threads pass the computation area, and
        # store the new computed vector.

        realComputation = (request.poids != 'pret') and (
            request.poids != 'getw0') and (vector != 'stop')

        self.iterator -= 1

        self.exit_condition = (self.iterator == 0)
        waiting.wait(lambda: self.exit_condition)

        if (realComputation):
            self.oldParam = std.str2dict(vector.split("<<||>>")[0])

        ######################################################################

        ###################### PRINT OF THE CURRENT STATE ######################
        ##################### AND DO CRITICAL MODIFICATIONS ####################
        if (threading.current_thread().name == self.printerThreadName):
            std.printTraceRecData(self.epoch, vector, self.paramVector,
                                  self.testingErrors, self.trainingErrors,
                                  normDiff, normGradW, normPrecW, normGW0,
                                  realComputation, self.oldParam, trainingSet,
                                  testingSet, nbTestingData, nbExamples, c1,
                                  c2, l, nbCompo, filePath)
            self.merged.append(self.oldParam)
            self.epoch += 1
            self.step *= 0.9  #std.stepSize(nbExamples, self.epoch, nbDesc, nbCompo)
            ############################### END OF PRINT ###########################

            dataTest = trainingSet[9]
            label = dataTest.get(-1, 0)
            example = std.take_out_label(dataTest)
            print("label = " + str(label))
            print("SVM says = " + str(std.sparse_dot(self.oldParam, example)))

            ######################################################################
            # Section 4 : empty the storage list of the vectors, and wait for all
            # the threads.

            self.vectors = []
            waiting.wait(lambda: (self.vectors == []))

            ######################################################################

        #time.sleep(1)
        return route_guide_pb2.Vector(poids=vector)

예제 #6

0

파일 보기

# The depreciation of the SVM norm cost
l = 0.01

# The step of the descent
step = 1

# Initial vector to process the stochastic gradient descent :
# random generated.
w0 = {
    1: 0.21,
    2: 0.75,
    hypPlace: 0.011
}  # one element, to start the computation
gW0 = sgd.der_error(w0, l, trainingSet, nbExamples)
normGW0 = math.sqrt(std.sparse_dot(gW0, gW0))
nbParameters = len(trainingSet[0]) - 1  # -1 because we don't count the label

# Maximum number of epochs we allow.
nbMaxCall = 1000

# Constants to test the convergence
c1 = 10**(-8)
c2 = 10**(-8)

print("Server ready....")


class RouteGuideServicer(route_guide_pb2_grpc.RouteGuideServicer):
    """ We define attributes of the class to perform the computations."""
    def __init__(self):

예제 #7

0

파일 보기

파일: server.py 프로젝트: OmarBoujdaria/data_sciences_project-

    def GetFeature(self, request, context):

        ######################################################################
        # Section 1 : wait for all the clients -> get their vectors and
        # appoint one of them as the printer.

        print(self.epoch)

        self.printerThreadName = threading.current_thread().name

        if (request.poids == "pret" or request.poids == "getw0"
                or request.poids[:5] == "chunk"):
            self.iterator += 1
            self.vectors.append(request.poids)
            self.enter_condition = (self.iterator == nbClients)
            waiting.wait(lambda: self.enter_condition)

        if ((way2work == "sync") and (request.poids != "pret")
                and (request.poids != "getw0")
                and (request.poids[:5] != "chunk")):
            self.iterator += 1
            self.vectors.append(std.str2dict(
                request.poids.split("<delay>")[0]))
            self.enter_condition = (self.iterator == nbClients)
            waiting.wait(lambda: self.enter_condition)

        if ((threading.current_thread().name == self.printerThreadName)
                and (self.epoch == 1)):
            ############ Starting of the timer to time the run ############
            self.startTime = time.time()

        ######################################################################

        ######################################################################
        # Section 2 : compute the new vector -> send the data, a merge of
        # all the vectors we got from the clients or the message 'stop' the
        # signal to the client that we converged.

        normDiff = 0
        normGradW = 0
        normPrecW = 0

        if (request.poids == 'pret'):
            vector = str(nbChunks) + "<depre>" + str(l) + "<samples>" + str(
                numSamples)
        elif (request.poids[:5] == 'chunk'):
            chunk = request.poids.split("<nb>")
            chunk = int(chunk[1])
            vector = std.datadict2Sstr(
                trainingSet[(chunk - 1) * chunkSize:chunk * chunkSize])
        elif (request.poids == 'getw0'):
            vector = std.dict2str(w0)
        else:
            if (way2work == "sync"):
                gradParam = std.mergeSGD(self.vectors)
                if (self.epoch == 2):
                    self.normGradW0 = math.sqrt(
                        std.sparse_dot(gradParam, gradParam))
                normGradW = math.sqrt(std.sparse_dot(gradParam, gradParam))
                gradParam = std.sparse_mult(self.step, gradParam)
                vector = std.sparse_vsous(self.oldParam, gradParam)
            else:
                info = request.poids.split("<delay>")
                grad_vector = std.str2dict(info[0])
                if (self.epoch == 2):
                    self.normGradW0 = math.sqrt(
                        std.sparse_dot(grad_vector, grad_vector))
                normGradW = math.sqrt(std.sparse_dot(grad_vector, grad_vector))
                wt = std.str2dict(info[1])
                vector = std.asynchronousUpdate(self.oldParam, grad_vector, wt,
                                                l, self.step)

            ######## NORMALIZATION OF THE VECTOR OF PARAMETERS #########
            normW = math.sqrt(std.sparse_dot(vector, vector))
            vector = std.sparse_mult(1. / normW, vector)

            ############################################################
            diff = std.sparse_vsous(self.oldParam, vector)
            normDiff = math.sqrt(std.sparse_dot(diff, diff))
            normPrecW = math.sqrt(std.sparse_dot(self.oldParam, self.oldParam))
            if ((normDiff <= c1 * normPrecW) or (self.epoch > nbMaxCall)
                    or (normGradW <= c2 * normGW0)):
                self.paramVector = vector
                print("1 : " + str((normDiff <= c1 * normPrecW)))
                print("2 : " + str((self.epoch > nbMaxCall)))
                print("3 : " + str((normGradW <= c2 * normGW0)))
                vector = 'stop'
            else:
                vector = std.dict2str(vector)

        ######################################################################

        ######################################################################
        # Section 3 : wait that all the threads pass the computation area, and
        # store the new computed vector.

        realComputation = (request.poids != 'pret') and (
            request.poids !=
            'getw0') and (vector != 'stop') and (request.poids[:5] != 'chunk')

        if (realComputation):
            self.oldParam = std.str2dict(vector)

        ######################################################################

        ###################### PRINT OF THE CURRENT STATE ######################
        ##################### AND DO CRITICAL MODIFICATIONS ####################
        if (((threading.current_thread().name == self.printerThreadName) and
             (way2work == "sync")) or (way2work == "async")):
            print("oooooooo")
            endTime = time.time()
            duration = endTime - self.startTime

            if (vector == 'stop'):
                print("The server ran during : " + str(duration))

            std.printTraceRecData(self.epoch, vector, self.testingErrors,
                                  self.trainingErrors, normDiff, normGradW,
                                  normPrecW, normGW0, realComputation,
                                  self.oldParam, trainingSet, testingSet,
                                  nbTestingData, nbExamples, c1, c2, l,
                                  duration, filePath)

            self.merged.append(self.oldParam)
            if (realComputation):
                self.epoch += 1
                self.step *= 0.9
            ############################### END OF PRINT ###########################

            ######################################################################
            # Section 4 : empty the storage list of the vectors, and wait for all
            # the threads.

            self.vectors = []

            ######################################################################

        ######################################################################
        # Section 5 : synchronize all clients at the end of a server iteration
        if (way2work == "sync"):
            self.iterator -= 1
            self.exit_condition = (self.iterator == 0)
            waiting.wait(lambda: self.exit_condition)

        #time.sleep(1)
        return route_guide_pb2.Vector(poids=vector)

예제 #8

0

파일 보기

파일: serverGen.py 프로젝트: OmarBoujdaria/data_sciences_project-

    def GetFeature(self, request, context):

        ######################################################################
        # Section 1 : wait for all the clients -> get their vectors and
        # appoint one of them as the printer.

        if (way2work == "sync"):
            self.iterator += 1
            if (request.poids == "pret" or request.poids == "getw0"):
                self.vectors.append(request.poids)
            else:
                self.vectors.append(
                    std.str2dict(request.poids.split("<delay>")[0]))
            self.enter_condition = (self.iterator == nbClients)
            waiting.wait(lambda: self.enter_condition)

            self.printerThreadName = threading.current_thread().name

        ######################################################################

        ######################################################################
        # Section 2 : compute the new vector -> send the data, a merge of
        # all the vectors we got from the clients or the message 'stop' the
        # signal to the client that we converged.

        normDiff = 0
        normGradW = 0
        normPrecW = 0
        if (request.poids == 'pret'):
            vector = std.datadict2Sstr(trainingSet) + "<depre>" + str(
                l) + "<samples>" + str(numSamples)
        elif (request.poids == 'getw0'):
            vector = std.dict2str(w0)
        else:
            if (way2work == "sync"):
                gradParam = std.mergeSGD(self.vectors)
                gradParam = std.sparse_mult(self.step, gradParam)
                vector = std.sparse_vsous(self.oldParam, gradParam)
            else:
                info = request.poids.split("<delay>")
                grad_vector = std.str2dict(info[0])
                wt = std.str2dict(info[1])
                vector = std.asynchronousUpdate(self.oldParam, grad_vector, wt,
                                                l, self.step)

            ######## NORMALIZATION OF THE VECTOR OF PARAMETERS #########
            normW = math.sqrt(std.sparse_dot(vector, vector))
            vector = std.sparse_mult(1. / normW, vector)

            ############################################################
            diff = std.sparse_vsous(self.oldParam, vector)
            normDiff = math.sqrt(std.sparse_dot(diff, diff))
            normGradW = math.sqrt(std.sparse_dot(vector, vector))
            normPrecW = math.sqrt(std.sparse_dot(self.oldParam, self.oldParam))
            if ((normDiff <= c1 * normPrecW) or (self.epoch > nbMaxCall)
                    or (normGradW <= c2 * self.normgW0)):
                self.paramVector = vector
                vector = 'stop'
            else:
                vector = std.dict2str(vector)

        ######################################################################

        ######################################################################
        # Section 3 : wait that all the threads pass the computation area, and
        # store the new computed vector.

        realComputation = (request.poids != 'pret') and (
            request.poids != 'getw0') and (vector != 'stop')

        if (way2work == "sync"):
            self.iterator -= 1

            self.exit_condition = (self.iterator == 0)
            waiting.wait(lambda: self.exit_condition)

        if (realComputation):
            self.oldParam = std.str2dict(vector)

        ######################################################################

        ###################### PRINT OF THE CURRENT STATE ######################
        ##################### AND DO CRITICAL MODIFICATIONS ####################
        if ((threading.current_thread().name == self.printerThreadName) &
            (way2work == "sync") or (way2work == "async")):
            std.printTraceGenData(self.epoch, vector, self.paramVector,
                                  self.testingErrors, self.trainingErrors,
                                  trainaA, trainaB, trainoA, trainoB, hypPlace,
                                  normDiff, normGradW, normPrecW, self.normgW0,
                                  w0, realComputation, self.oldParam,
                                  trainingSet, testingSet, nbTestingData,
                                  nbExamples, nbMaxCall, self.merged, "", c1,
                                  c2, l)
            self.merged.append(self.oldParam)
            self.epoch += 1
            self.step *= 0.9

            dataTest = trainingSet[9]
            label = dataTest.get(-1, 0)
            example = std.take_out_label(dataTest)
            print("label = " + str(label))
            print("SVM says = " + str(std.sparse_dot(self.oldParam, example)))
            ############################### END OF PRINT ###########################

            ######################################################################
            # Section 4 : empty the storage list of the vectors, and wait for all
            # the threads.

            self.vectors = []
            waiting.wait(lambda: (self.vectors == []))

            ######################################################################

        #time.sleep(1)
        return route_guide_pb2.Vector(poids=vector)

예제 #9

0

파일 보기

def generateData(nbData):

    # d is the double of the distance of each point in the square to the
    # separator hyperplan.
    d = 2

    # u is a hyperplan's orthogonal vector.
    u = {1: 1, 2: 1}
    # d is the double of the distance of each point in the square to the
    # separator hyperplan.
    d = .1

    # u is a hyperplan's orthogonal vector.
    u = {1: 1, 2: 1}

    # A and B denote each a different class, respectively associated
    # to the labels 1 and -1.
    A = []
    B = []

    # Number of examples we kept for our training set.
    nbExamples = 0

    # Number of data we rejected because they are not at the good
    # distance of the hyperplan.
    nbRejeted = 0

    absA, absB = [], []
    ordA, ordB = [], []

    cardA = 0
    cardB = 0

    while (nbExamples < nbData):
        a = random.randint(0,100)/10
        b = random.randint(0,100)/10
        sign = random.random()
        if (sign <= 0.25):
            a = -a
            b = -b
        elif ((0.25 <= sign) & (sign <= 0.5)):
            b = -b
        elif ((0.5 <= sign) & (sign <= 0.75)):
            a = -a
        genvect = {1:a,2:b}
        dist = abs((std.sparse_dot(u,genvect))/(math.sqrt(std.sparse_dot(u,u))))
        valide = (d==0) or ((d!=0) & (dist >= d))
        if (b > -a) & valide:
            A.append({-1:1,1:a,2:b})
            absA.append(a)
            ordA.append(b)
            nbExamples += 1
            cardA += 1
        elif (b < -a) & valide:
            B.append(({-1:-1,1:a,2:b}))
            absB.append(a)
            ordB.append(b)
            nbExamples += 1
            cardB += 1
        else:
            nbRejeted += 1

    #plt.scatter(absA,ordA,s=10,c='r',marker='*')
    #plt.scatter(absB,ordB,s=10,c='b',marker='o')
    #plt.plot([-10,10],[10,-10],'orange')
    #plt.show()



    trainingSet = A+B

    return trainingSet,absA,ordA,absB,ordB

예제 #10

0

파일 보기

파일: testsparseToolsDict.py 프로젝트: OmarBoujdaria/data_sciences_project-

# Initialisation of the data

spV1 = {1: 4, 3: 5, 7: 1, 9: 10}
spV2 = {1: 4, 2: 3, 7: 6, 9: 2}
spV3 = {1: 2, 3: 5, 7: 5, 9: 2}

data = [spV1, spV2, spV3]
data[0][-1] = 1
data[1][-1] = -1
data[2][-1] = 1

print('')
print("############## Test of the sparse scalar product. ##############")
print('')

spdsp = std.sparse_dot(spV1, spV2)
print("spdsp = " + str(spdsp))

print('')
print("################ Test of the sparse sum. #################")
print('')

spdsu = std.sparse_vsum(spV1, spV2)
print("spdsum = " + str(spdsu))

empty = std.sparse_vsum({}, spV1)
print("empty = " + str(empty))

print('')
print("############### Test of the sparse map. ###############")
print('')