# get featuremethod (Ron) from features.user_features import FeatureGetter fg = FeatureGetter(testing) #print fg.get_features(907345, 907345, 1) dimension = fg.get_dimension() # create SGD object, sample different competitorsets, and do learning from gradientdescent import SGDLearning from gradientdescent_personalization import SGDLearningPersonalized import random # it is better to load all competitorsets at once and then do learning fast traindataobject = CompetitorSetCollection(testing=True,validation=False) Ntrain = traindataobject.get_nsamples() competitorsets_train = [traindataobject.get_sample(i) for i in xrange(Ntrain)] testdataobject = CompetitorSetCollection(testing=True, validation=True) Ntest = testdataobject.get_nsamples() competitorsets_test = [testdataobject.get_sample(i) for i in xrange(Ntest)] print "loaded all training and testing examples into memory" def train(sgd, competitorsets_train, niter, alpha, beta, lambda_winner, lambda_reject, verbose): N = len(competitorsets_train) # TRAINING # do a couple update steps t0 = time()
def run(cfg): lambdas = cfg.lambdas memory_for_personalized_parameters = cfg.memory_for_personalized_parameters percentage = cfg.train_percentage outer_iterations = cfg.outer_iterations nepoches = cfg.nepoches alpha = cfg.alpha beta = cfg.beta verbose = cfg.verbose personalization = cfg.personalization rhostsize = cfg.rhostsize just_winning_sets = cfg.just_winning_sets testing = cfg.testing dirname = cfg.train_dirname if comm_rank == 0: print "using lambdas:", lambdas fg = FeatureGetter() if cfg.god_mode: featuredimension = 1 else: featuredimension = fg.get_dimension() get_feature_function = fg.get_features sq = get_sqler() overallnum_sets = sq.get_num_compsets("train") num_sets = int(overallnum_sets * percentage) for i in range(2, comm_size + 2, 3): if comm_rank == i or comm_rank == i - 1 or comm_rank == i - 2: print ("Machine %d/%d - Start loading %s competitorsets for TRAIN" % (comm_rank + 1, comm_size, num_sets)) t0 = time.time() cs_train = CompetitorSetCollection(num_sets=num_sets, mode="train") t1 = time.time() print "Machine %d/%d - Finished loading the competitorsets for TRAIN." % (comm_rank, comm_size) print "Loading competitorsets took %s." % (t1 - t0) safebarrier(comm) # sleeping so that we dont kill database sec = comm_rank print "machine %d is sleeping for %d sec." % (comm_rank, sec) time.sleep(sec) trainerrors = np.zeros((len(lambdas), len(lambdas))) testerrors = np.zeros((len(lambdas), len(lambdas))) trainmeannrank = np.zeros((len(lambdas), len(lambdas))) testmeannrank = np.zeros((len(lambdas), len(lambdas))) for lw in range(len(lambdas)): lambda_winner, lambda_reject = lambdas[lw] # Create sgd object if personalization: sgd = SGDLearningPersonalized(featuredimension, get_feature_function, memory_for_personalized_parameters) else: sgd = SGDLearningRHOSTHASH(featuredimension, get_feature_function, rhostsize=rhostsize) N = cs_train.get_nsamples() niter = int(N * nepoches) for outit in range(outer_iterations): # for each outer iteration we draw new samples iid per node sampleindices = [] for _ in range(int(nepoches) + 1): sampleindices += range(N) random.shuffle(sampleindices) update_lookahead_cnt = 0 req_ids = cs_train.get_req_ids_for_samples(sampleindices[0:LOOK_AHEAD_LENGTH]) fg.upt_out_prod_get(req_ids) for innerit in range(niter): i = outit * niter + innerit eta_t = 1 / sqrt(alpha + i * beta) if not i % (niter / 5): print ( "Machine %d/%d - Iterations out: %d/%d - in: %d/%d - eta %f - lambda %f" % (comm_rank, comm_size, outit + 1, outer_iterations, innerit + 1, niter, eta_t, lambda_winner) ) update_lookahead_cnt += 1 if update_lookahead_cnt == LOOK_AHEAD_LENGTH: req_ids = cs_train.get_req_ids_for_samples(sampleindices[innerit : innerit + LOOK_AHEAD_LENGTH]) fg.upt_out_prod_get(req_ids) update_lookahead_cnt = 0 # draw random sample - UPDATE: now first get a random permutation, then do it sampleindex = sampleindices[innerit] competitorset = cs_train.get_sample(sampleindex) for l in competitorset.get_surferlist(): assert l[1] in req_ids if verbose and not i % (niter / 5) and i > 1: print ( "Iterations \n\tout: %d/%d \n\tin: %d/%d - eta %f - lambda %f" % (outit + 1, outer_iterations, innerit + 1, niter, eta_t, lambda_winner) ) print "\ttheta", min(sgd.theta), max(sgd.theta) print "\tr", sgd.r print "\tr_hosts", min(sgd.r_hosts), max(sgd.r_hosts) print "\ttrue", competitorset.get_winner() print "\tpredicted", sgd.predict(competitorset) print "\tranking", sgd.rank(competitorset) sgd.update(competitorset, eta=eta_t, lambda_winner=lambda_winner, lambda_reject=lambda_reject) # Now we aggregate theta(_host), r(_host) print ("outer iteration %d/%d: node %d at safebarrier" % (outit + 1, outer_iterations, comm_rank)) safebarrier(comm) if comm_rank == 0: print "all nodes arrived and we start allreduce/broadcasting" theta = comm.allreduce(sgd.theta) / float(comm_size) if comm_rank == 0: print "allreduce done for theta" if personalization: theta_hosts = comm.allreduce(sgd.theta_hosts) / float(comm_size) if comm_rank == 0: print "allreduce done for theta_hosts" r = comm.allreduce(sgd.r) / float(comm_size) if comm_rank == 0: print "allreduce done for r" r_hosts = comm.allreduce(sgd.r_hosts) / float(comm_size) if comm_rank == 0: print "allreduce done for r_hosts" print "spreading mean of parameters done!" sgd.theta = theta if personalization: sgd.theta_hosts = theta_hosts sgd.r = r sgd.r_hosts = r_hosts print "done with training" # Store the parameters to /tscratch/tmp/csrec if comm_rank == 0: if os.path.exists("/tscratch"): if not os.path.exists(dirname): os.makedirs(dirname) filename = "parameters_lwin_%f_lrej_%f_testing_%d_personalized_%d_numsets_%d_outerit_%d_nepoches_%d.pkl" % ( lambda_winner, lambda_reject, testing, personalization, num_sets, outer_iterations, nepoches, ) if not RON_MODE: os.system("chmod -R 777 " + dirname) if personalization: pickle.dump((sgd.theta, sgd.theta_hosts, sgd.r, sgd.r_hosts), open(dirname + filename, "wb")) else: pickle.dump((sgd.theta, sgd.r, sgd.r_hosts), open(dirname + filename, "wb")) print "Stored params at " + dirname + filename return filename