コード例 #1
0
def stats(data):
    # See: http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Incremental_algorithm

    nx = len(data[0][0])  # number of variables

    mins = [float_info.max] * nx
    maxs = [-float_info.max] * nx
    means = [0] * nx  # means of variables
    variances = [0] * nx  # variances of variables

    # sample counter
    j = 0

    # run through data once
    for q in data:
        for x in q:
            # update counter
            j = j + 1

            # update mins and maxs
            for k in xrange(len(x)):
                if x[k] > maxs[k]: maxs[k] = x[k]
                if x[k] < mins[k]: mins[k] = x[k]

            # update means and variances
            delta = la.vsum(x, la.sax(-1.0, means))
            means = la.vsum(means, la.sax(1.0 / j, delta))
            variances = la.vsum(
                variances, la.vmul(delta, la.vsum(x, la.sax(-1.0, means))))

    # normalize variance
    variances = la.sax(1.0 / (j - 1), variances)

    return [mins, maxs, means, variances]
コード例 #2
0
ファイル: eda.py プロジェクト: guess/y-test-ranknet
def stats(data):
    # See: http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Incremental_algorithm
    
    nx = len(data[0][0])        # number of variables
    
    mins = [float_info.max] * nx   
    maxs = [-float_info.max] * nx
    means = [0] * nx            # means of variables
    variances = [0] * nx              # variances of variables
   
    # sample counter
    j = 0
 
    # run through data once
    for q in data:
        for x in q:
            # update counter        
            j = j + 1
            
            # update mins and maxs
            for k in xrange(len(x)):
                if x[k] > maxs[k]: maxs[k] = x[k]
                if x[k] < mins[k]: mins[k] = x[k]
            
            # update means and variances
            delta = la.vsum(x, la.sax(-1.0, means))
            means = la.vsum(means, la.sax(1.0/j, delta))
            variances = la.vsum(variances, la.vmul(delta, la.vsum(x, la.sax(-1.0, means))))
 
    # normalize variance
    variances = la.sax( 1.0 / (j - 1), variances )
 
    return [mins, maxs, means, variances]
コード例 #3
0
ファイル: ranknet_test.py プロジェクト: guess/y-test-ranknet
 def xtest_training_2(self): 
     
     # trainsg on several queries
     data = []
     d = range(10)
     for j in d:
         data.append( [ [j, random.choice([0, 1])] + [random.random() for _ in xrange(self.ninp)] for _ in xrange(self.nq) ] )
     
     print data
             
     nepoch = 10000    # number of training epochs
     rate = 0.1        # learning rate
     nprint = 1000     # print frequency
     
     # compute current cost and estimations
     for je in xrange(nepoch):
         
         # select training sample at random
         jq = random.choice(d)   
         
         if je % nprint == 0:
             
             # compute cost of a first sample
             C = ranknet.cost(data[0], self.model, self.sigma)
             
             print je, C[0], C[1], C[2]
             print "w:", self.model.getw() 
         
         # compute gradients
         g = ranknet.gradient(data[jq], self.model, self.sigma)
     
         # update weights
         w = la.vsum( self.model.getw(), la.sax(-rate, g) )
         self.model.setw(w)
 
     # final report
     for query in data:
         print "Query: ", query[0][0]
         C = ranknet.cost(query, self.model, self.sigma)
         for j in xrange(len(query)):
             print query[j][1], C[1][j]
コード例 #4
0
ファイル: ranknet_test.py プロジェクト: guess/y-test-ranknet
 def xtest_training_1(self):
     
     # trainsg on a single query
     
     nepoch = 10000    # number of training epochs
     rate = 0.1        # learning rate
     nprint = 1000     # print frequency
             
     for je in xrange(nepoch):
         
         # compute current cost and estimations
         C = ranknet.cost(self.query, self.model, self.sigma)
         if je % nprint == 0:
             print je, C[0], C[1], C[2]
             print "w:", self.model.getw() 
         # compute gradients
         g = ranknet.gradient(self.query, self.model, self.sigma)
     
         # update weights
         w = la.vsum( self.model.getw(), la.sax(-rate, g) )
         self.model.setw(w)
コード例 #5
0
ファイル: trainb.py プロジェクト: guess/y-test-ranknet
def train(data, opts, net, writefcn):
    """
    Batch training of ranknet model using RProp
    """

    # random permutation of data
    perm = range(len(data))
    random.shuffle(perm)

    jvalid = perm[0:opts.nvalid]  # validation data index
    jtrain = perm[opts.nvalid:]  # training data index

    nfail = 0  # current number of validation fails
    mincost = 1.e+100  # current known minimal validation cost
    wbest = net.getw()  # weights for minimal validation error

    # write out options and initial network
    writefcn(str(opts) + "\n")
    writefcn(str(net) + "\n")

    # initialize RProp working memory
    rpropmem = ([1.e-5] * len(net.getw()), [1] * len(net.getw()))

    print "Start batch training, number of queries: ", len(data)
    print str(opts)
    print str(net)

    # training iterations
    for je in xrange(opts.maxepoch):

        # validation cost
        vcost = 0.0
        for j in jvalid:
            c = ranknet.cost(data[j], net, opts.sigma)
            vcost += c[0]

        # update best estimates
        if vcost < mincost:
            mincost = vcost
            wbest = net.getw()
        else:
            nfail += 1

        # check stopping criteria
        if opts.maxfail > 0 and nfail >= opts.maxfail:
            break

        # reset accumulators
        tcost = 0.0  # training cost
        G = [0] * len(net.getw())  # accumulated gradient

        # batch training
        for jt in jtrain:
            # take next training query
            query = data[jt]

            # compute cost
            c = ranknet.cost(query, net, opts.sigma)
            tcost += c[0]

            # compute gradient
            g = ranknet.gradient(query, net, opts.sigma)

            # update batch gradient
            G = la.vsum(G, g)

        # print to screen
        print je, nfail, tcost, vcost, net.getw()

        # write out
        sw = str(net.getw()).replace("[", "").replace("]", "").replace(",", "")
        writefcn("%d %d %e %e %s\n" % (je, nfail, tcost, vcost, sw))

        # RProp update steps
        rpropmem = opt.rprop(G, rpropmem)
        steps = rpropmem[0]
        signs = rpropmem[1]

        # update network weights
        w = la.vsum(net.getw(), la.vmul(steps, la.sax(-1, signs)))
        net.setw(w)

    # training complete
    print "Training stopped"
    print "Final cost: ", mincost
    print "Final weights: ", wbest

    # return updated model
    net.setw(wbest)
    return net
コード例 #6
0
ファイル: trainb.py プロジェクト: guess/y-test-ranknet
def train(data, opts, net, writefcn):
    """
    Batch training of ranknet model using RProp
    """
         
    # random permutation of data
    perm = range(len(data))
    random.shuffle(perm)
        
    jvalid = perm[0:opts.nvalid]    # validation data index                         
    jtrain = perm[opts.nvalid:]     # training data index
       
    nfail = 0               # current number of validation fails
    mincost = 1.e+100       # current known minimal validation cost
    wbest = net.getw()      # weights for minimal validation error
    
    # write out options and initial network
    writefcn(str(opts) + "\n")
    writefcn(str(net) + "\n")    
    
    # initialize RProp working memory
    rpropmem = ( [1.e-5] * len(net.getw()), [ 1 ] * len(net.getw()) )
    
    print "Start batch training, number of queries: ", len(data)
    print str(opts)
    print str(net)    
            
    # training iterations
    for je in xrange(opts.maxepoch):
                
        # validation cost
        vcost = 0.0                     
        for j in jvalid:
            c = ranknet.cost(data[j], net, opts.sigma)
            vcost += c[0]
        
        # update best estimates
        if vcost < mincost: 
            mincost = vcost
            wbest = net.getw()
        else: 
            nfail += 1
        
        # check stopping criteria                
        if opts.maxfail > 0 and nfail >= opts.maxfail:
            break
        
        # reset accumulators
        tcost = 0.0                     # training cost
        G = [0] * len(net.getw())       # accumulated gradient
        
        # batch training 
        for jt in jtrain:
            # take next training query
            query = data[jt]
            
            # compute cost
            c = ranknet.cost(query, net, opts.sigma)
            tcost += c[0]
                        
            # compute gradient 
            g = ranknet.gradient(query, net, opts.sigma)
        
            # update batch gradient 
            G = la.vsum(G, g)
        
        # print to screen
        print je, nfail, tcost, vcost, net.getw()
        
        # write out
        sw = str(net.getw()).replace("[", "").replace("]", "").replace(",", "")
        writefcn("%d %d %e %e %s\n" % (je, nfail, tcost, vcost, sw))
        
        # RProp update steps
        rpropmem = opt.rprop(G, rpropmem)
        steps = rpropmem[0]
        signs = rpropmem[1]
                        
        # update network weights
        w = la.vsum(net.getw(), la.vmul(steps, la.sax(-1, signs)))
        net.setw(w)
    
    # training complete             
    print "Training stopped"
    print "Final cost: ", mincost
    print "Final weights: ", wbest
        
    # return updated model
    net.setw(wbest)
    return net
コード例 #7
0
ファイル: trainsg.py プロジェクト: guess/y-test-ranknet
def train(data, opts, net, writefcn):
    """
    Stochastic gradient training of ranknet model 
    """

    # random permutation of data
    perm = range(len(data))
    random.shuffle(perm)

    jvalid = perm[0:opts.nvalid]  # validation data index
    jtrain = perm[opts.nvalid:]  # training data index

    nfail = 0  # current number of validation fails
    mincost = 1.e+100  # current known minimal validation cost
    wbest = net.getw()  # weights for minimal validation error

    print "Start stochastic gradient descent training, number of queries: ", len(
        data)
    print str(opts)
    print str(net)

    # stochastic gradient training
    for je in xrange(opts.maxepoch):

        # validation
        if je % opts.nepval == 0:

            # compute validation cost
            C = 0.0
            for j in jvalid:
                c = ranknet.cost(data[j], net, opts.sigma)
                C += c[0]

            # update best estimates
            if C < mincost:
                mincost = C
                wbest = net.getw()
            else:
                nfail += 1

            # check stopping criteria
            if opts.maxfail > 0 and nfail >= opts.maxfail:
                break

            # print
            print je, nfail, C, net.getw()

            # write to report file
            sw = str(net.getw()).replace("[", "").replace("]",
                                                          "").replace(",", "")
            writefcn("%d %d %e %s\n" % (je, nfail, C, sw))

        # next training query
        jq = je % len(jtrain)
        query = data[jtrain[jq]]

        # compute gradients
        g = ranknet.gradient(query, net, opts.sigma)

        # update weights
        w = la.vsum(net.getw(), la.sax(-opts.rate, g))
        net.setw(w)

    print "Training stopped"
    print "Final cost: ", mincost
    print "Final weights: ", wbest

    # return updated model
    net.setw(wbest)
    return net