testing_candidates = [] testing_jobs = [] testing_job_ranks = [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i]) else: training_candidates.append(candidate_pools[i]) training_jobs.append(job_pools[i]) training_job_ranks.append(job_ranks[i]) # Find a decent starting place (using the training set) simulator = SimulationEngine(training_candidates, training_jobs, training_job_ranks, inst, model, power=args.power, reg=args.reg, iters=20) w0 = None best_error = np.inf for i in xrange(args.num_steps): wtemp = np.random.randn(model.num_weights()) error = simulator.simulate(weights=wtemp) if error < best_error: w0 = wtemp.copy() best_error = error # Optimize from there (for the training set) simulator = SimulationEngine(training_candidates, training_jobs, training_job_ranks, inst, model, power=args.power, reg=args.reg, iters=args.num_iters) opt = {'maxiter':args.num_steps} res = minimize(simulator.simulate, w0, method='Nelder-Mead', options=opt) final_weights = res.x print 'FINAL_WEIGHTS:', final_weights
school_info=inst, ranking='pi_rescaled', year_start=1970, year_stop=2012, year_step=1) if args.validation: # if specified years are to be evaluated hold_out = [int(year) for year in args.validation.split(',')] testing_candidates, testing_jobs, testing_job_ranks = [], [], [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i]) # Overwrite originals candidate_pools, job_pools, job_ranks = testing_candidates, testing_jobs, testing_job_ranks model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None for i in xrange(args.num_iters): simulator.simulate(weights=w, ranking=args.ranking)
for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i]) else: training_candidates.append(candidate_pools[i]) training_jobs.append(job_pools[i]) training_job_ranks.append(job_ranks[i]) # Find a decent starting place (using the training set) simulator = SimulationEngine(training_candidates, training_jobs, training_job_ranks, inst, model, power=args.power, reg=args.reg, iters=20) w0 = None best_error = np.inf for i in xrange(args.num_steps): wtemp = np.random.randn(model.num_weights()) error = simulator.simulate(weights=wtemp) if error < best_error: w0 = wtemp.copy() best_error = error # Optimize from there (for the training set) simulator = SimulationEngine(training_candidates, training_jobs,
for i, year in enumerate(year_range): if year not in hold_out: training_candidates.append(candidate_pools[i]) training_jobs.append(job_pools[i]) training_job_ranks.append(job_ranks[i]) training_orders.append(hiring_orders[i]) training_probs.append(hiring_probs[i]) # Overwrite originals: candidate_pools, job_pools, job_ranks = training_candidates, training_jobs, training_job_ranks hiring_orders, hiring_probs = training_orders, training_probs # Which model? model = SigmoidModel(prob_function=args.prob_function) # Find a starting place simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, power=1, reg=args.reg, hiring_orders=hiring_orders, hiring_probs=hiring_probs) w0 = None best_neg_likelihood = np.inf for i in xrange(10): #args.num_steps): #wtemp = 100*np.random.randn(model.num_weights()) # ~[-100, 100] wtemp = 100*(np.random.random(model.num_weights()) - 0.5) # ~[-100, 100] temp = simulator.calculate_neg_log_likelihood(weights=wtemp) if temp < best_neg_likelihood: w0 = wtemp.copy() best_neg_likelihood = temp # Optimize from there simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, power=1, reg=args.reg, hiring_orders=hiring_orders, hiring_probs=hiring_probs) opt = {'maxiter':args.num_steps}
training_candidates, training_jobs, training_job_ranks = [], [], [] for i, year in enumerate(year_range): if year not in hold_out: training_candidates.append(candidate_pools[i]) training_jobs.append(job_pools[i]) training_job_ranks.append(job_ranks[i]) # Overwrite originals candidate_pools, job_pools, job_ranks = training_candidates, training_jobs, training_job_ranks model = SigmoidModel(prob_function=args.prob_function) # Find a decent starting place simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, power=1, reg=args.reg, iters=20) w0 = None best_error = np.inf for i in xrange(args.num_steps): wtemp = 200 * (np.random.random(model.num_weights()) - 0.5 ) # ~[-100, 100] error = simulator.simulate(weights=wtemp) if error < best_error: w0 = wtemp.copy() best_error = error # Optimize from there simulator = SimulationEngine(candidate_pools,
for person, rank in pool: actual_hires.append((person, person.first_asst_job_location)) compute_network_stats(actual_hires, inst, output) output.close() exit() # Otherwise, set up the model + simulator if args.prob_function == 'flat': # 'flat' could very easily be a prob_function in the sigmoid model -- this is faster model = ConfigurationModel() else: model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None # Run however many iterations and compute stats for i in xrange(args.num_iters): hires = simulator.generate_network(weights=w, one_list=True) compute_network_stats(hires, inst, output)
if args.validation: # if specified years are to be evaluated hold_out = [int(year) for year in args.validation.split(',')] testing_candidates, testing_jobs, testing_job_ranks = [], [], [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i]) # Overwrite originals candidate_pools, job_pools, job_ranks = testing_candidates, testing_jobs, testing_job_ranks model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None for i in xrange(args.num_iters): simulator.simulate(weights=w, ranking=args.ranking)
if args.actual: actual_hires = [] for pool in candidate_pools: for person, rank in pool: actual_hires.append((person, person.first_asst_job_location)) compute_network_stats(actual_hires, inst, output) output.close() exit() # Otherwise, set up the model + simulator if args.prob_function == 'flat': # 'flat' could very easily be a prob_function in the sigmoid model -- this is faster model = ConfigurationModel() else: model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None # Run however many iterations and compute stats for i in xrange(args.num_iters): hires = simulator.generate_network(weights=w, one_list=True) compute_network_stats(hires, inst, output) output.close()
if args.validation: # if specified years are to be left out hold_out = [int(year) for year in args.validation.split(',')] training_candidates, training_jobs, training_job_ranks = [], [], [] for i, year in enumerate(year_range): if year not in hold_out: training_candidates.append(candidate_pools[i]) training_jobs.append(job_pools[i]) training_job_ranks.append(job_ranks[i]) # Overwrite originals candidate_pools, job_pools, job_ranks = training_candidates, training_jobs, training_job_ranks model = SigmoidModel(prob_function=args.prob_function) # Find a decent starting place simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, power=1, reg=args.reg, iters=20) w0 = None best_error = np.inf for i in xrange(args.num_steps): wtemp = 200*(np.random.random(model.num_weights()) - 0.5) # ~[-100, 100] error = simulator.simulate(weights=wtemp) if error < best_error: w0 = wtemp.copy() best_error = error # Optimize from there simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, power=1, reg=args.reg, iters=args.num_iters) opt = {'maxiter':args.num_steps} res = minimize(simulator.simulate, w0, method='Nelder-Mead', options=opt) print res