def stats(data): # See: http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Incremental_algorithm nx = len(data[0][0]) # number of variables mins = [float_info.max] * nx maxs = [-float_info.max] * nx means = [0] * nx # means of variables variances = [0] * nx # variances of variables # sample counter j = 0 # run through data once for q in data: for x in q: # update counter j = j + 1 # update mins and maxs for k in xrange(len(x)): if x[k] > maxs[k]: maxs[k] = x[k] if x[k] < mins[k]: mins[k] = x[k] # update means and variances delta = la.vsum(x, la.sax(-1.0, means)) means = la.vsum(means, la.sax(1.0 / j, delta)) variances = la.vsum( variances, la.vmul(delta, la.vsum(x, la.sax(-1.0, means)))) # normalize variance variances = la.sax(1.0 / (j - 1), variances) return [mins, maxs, means, variances]
def stats(data): # See: http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Incremental_algorithm nx = len(data[0][0]) # number of variables mins = [float_info.max] * nx maxs = [-float_info.max] * nx means = [0] * nx # means of variables variances = [0] * nx # variances of variables # sample counter j = 0 # run through data once for q in data: for x in q: # update counter j = j + 1 # update mins and maxs for k in xrange(len(x)): if x[k] > maxs[k]: maxs[k] = x[k] if x[k] < mins[k]: mins[k] = x[k] # update means and variances delta = la.vsum(x, la.sax(-1.0, means)) means = la.vsum(means, la.sax(1.0/j, delta)) variances = la.vsum(variances, la.vmul(delta, la.vsum(x, la.sax(-1.0, means)))) # normalize variance variances = la.sax( 1.0 / (j - 1), variances ) return [mins, maxs, means, variances]
def xtest_training_2(self): # trainsg on several queries data = [] d = range(10) for j in d: data.append( [ [j, random.choice([0, 1])] + [random.random() for _ in xrange(self.ninp)] for _ in xrange(self.nq) ] ) print data nepoch = 10000 # number of training epochs rate = 0.1 # learning rate nprint = 1000 # print frequency # compute current cost and estimations for je in xrange(nepoch): # select training sample at random jq = random.choice(d) if je % nprint == 0: # compute cost of a first sample C = ranknet.cost(data[0], self.model, self.sigma) print je, C[0], C[1], C[2] print "w:", self.model.getw() # compute gradients g = ranknet.gradient(data[jq], self.model, self.sigma) # update weights w = la.vsum( self.model.getw(), la.sax(-rate, g) ) self.model.setw(w) # final report for query in data: print "Query: ", query[0][0] C = ranknet.cost(query, self.model, self.sigma) for j in xrange(len(query)): print query[j][1], C[1][j]
def xtest_training_1(self): # trainsg on a single query nepoch = 10000 # number of training epochs rate = 0.1 # learning rate nprint = 1000 # print frequency for je in xrange(nepoch): # compute current cost and estimations C = ranknet.cost(self.query, self.model, self.sigma) if je % nprint == 0: print je, C[0], C[1], C[2] print "w:", self.model.getw() # compute gradients g = ranknet.gradient(self.query, self.model, self.sigma) # update weights w = la.vsum( self.model.getw(), la.sax(-rate, g) ) self.model.setw(w)
def train(data, opts, net, writefcn): """ Batch training of ranknet model using RProp """ # random permutation of data perm = range(len(data)) random.shuffle(perm) jvalid = perm[0:opts.nvalid] # validation data index jtrain = perm[opts.nvalid:] # training data index nfail = 0 # current number of validation fails mincost = 1.e+100 # current known minimal validation cost wbest = net.getw() # weights for minimal validation error # write out options and initial network writefcn(str(opts) + "\n") writefcn(str(net) + "\n") # initialize RProp working memory rpropmem = ([1.e-5] * len(net.getw()), [1] * len(net.getw())) print "Start batch training, number of queries: ", len(data) print str(opts) print str(net) # training iterations for je in xrange(opts.maxepoch): # validation cost vcost = 0.0 for j in jvalid: c = ranknet.cost(data[j], net, opts.sigma) vcost += c[0] # update best estimates if vcost < mincost: mincost = vcost wbest = net.getw() else: nfail += 1 # check stopping criteria if opts.maxfail > 0 and nfail >= opts.maxfail: break # reset accumulators tcost = 0.0 # training cost G = [0] * len(net.getw()) # accumulated gradient # batch training for jt in jtrain: # take next training query query = data[jt] # compute cost c = ranknet.cost(query, net, opts.sigma) tcost += c[0] # compute gradient g = ranknet.gradient(query, net, opts.sigma) # update batch gradient G = la.vsum(G, g) # print to screen print je, nfail, tcost, vcost, net.getw() # write out sw = str(net.getw()).replace("[", "").replace("]", "").replace(",", "") writefcn("%d %d %e %e %s\n" % (je, nfail, tcost, vcost, sw)) # RProp update steps rpropmem = opt.rprop(G, rpropmem) steps = rpropmem[0] signs = rpropmem[1] # update network weights w = la.vsum(net.getw(), la.vmul(steps, la.sax(-1, signs))) net.setw(w) # training complete print "Training stopped" print "Final cost: ", mincost print "Final weights: ", wbest # return updated model net.setw(wbest) return net
def train(data, opts, net, writefcn): """ Batch training of ranknet model using RProp """ # random permutation of data perm = range(len(data)) random.shuffle(perm) jvalid = perm[0:opts.nvalid] # validation data index jtrain = perm[opts.nvalid:] # training data index nfail = 0 # current number of validation fails mincost = 1.e+100 # current known minimal validation cost wbest = net.getw() # weights for minimal validation error # write out options and initial network writefcn(str(opts) + "\n") writefcn(str(net) + "\n") # initialize RProp working memory rpropmem = ( [1.e-5] * len(net.getw()), [ 1 ] * len(net.getw()) ) print "Start batch training, number of queries: ", len(data) print str(opts) print str(net) # training iterations for je in xrange(opts.maxepoch): # validation cost vcost = 0.0 for j in jvalid: c = ranknet.cost(data[j], net, opts.sigma) vcost += c[0] # update best estimates if vcost < mincost: mincost = vcost wbest = net.getw() else: nfail += 1 # check stopping criteria if opts.maxfail > 0 and nfail >= opts.maxfail: break # reset accumulators tcost = 0.0 # training cost G = [0] * len(net.getw()) # accumulated gradient # batch training for jt in jtrain: # take next training query query = data[jt] # compute cost c = ranknet.cost(query, net, opts.sigma) tcost += c[0] # compute gradient g = ranknet.gradient(query, net, opts.sigma) # update batch gradient G = la.vsum(G, g) # print to screen print je, nfail, tcost, vcost, net.getw() # write out sw = str(net.getw()).replace("[", "").replace("]", "").replace(",", "") writefcn("%d %d %e %e %s\n" % (je, nfail, tcost, vcost, sw)) # RProp update steps rpropmem = opt.rprop(G, rpropmem) steps = rpropmem[0] signs = rpropmem[1] # update network weights w = la.vsum(net.getw(), la.vmul(steps, la.sax(-1, signs))) net.setw(w) # training complete print "Training stopped" print "Final cost: ", mincost print "Final weights: ", wbest # return updated model net.setw(wbest) return net
def train(data, opts, net, writefcn): """ Stochastic gradient training of ranknet model """ # random permutation of data perm = range(len(data)) random.shuffle(perm) jvalid = perm[0:opts.nvalid] # validation data index jtrain = perm[opts.nvalid:] # training data index nfail = 0 # current number of validation fails mincost = 1.e+100 # current known minimal validation cost wbest = net.getw() # weights for minimal validation error print "Start stochastic gradient descent training, number of queries: ", len( data) print str(opts) print str(net) # stochastic gradient training for je in xrange(opts.maxepoch): # validation if je % opts.nepval == 0: # compute validation cost C = 0.0 for j in jvalid: c = ranknet.cost(data[j], net, opts.sigma) C += c[0] # update best estimates if C < mincost: mincost = C wbest = net.getw() else: nfail += 1 # check stopping criteria if opts.maxfail > 0 and nfail >= opts.maxfail: break # print print je, nfail, C, net.getw() # write to report file sw = str(net.getw()).replace("[", "").replace("]", "").replace(",", "") writefcn("%d %d %e %s\n" % (je, nfail, C, sw)) # next training query jq = je % len(jtrain) query = data[jtrain[jq]] # compute gradients g = ranknet.gradient(query, net, opts.sigma) # update weights w = la.vsum(net.getw(), la.sax(-opts.rate, g)) net.setw(w) print "Training stopped" print "Final cost: ", mincost print "Final weights: ", wbest # return updated model net.setw(wbest) return net