school_info=inst, ranking='pi_rescaled', year_start=1970, year_stop=2012, year_step=1) if args.validation: # if specified years are to be evaluated hold_out = [int(year) for year in args.validation.split(',')] testing_candidates, testing_jobs, testing_job_ranks = [], [], [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i]) # Overwrite originals candidate_pools, job_pools, job_ranks = testing_candidates, testing_jobs, testing_job_ranks model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None for i in xrange(args.num_iters): simulator.simulate(weights=w, ranking=args.ranking)
return args if __name__=="__main__": args = interface() inst = parse_institution_records(open(args.inst_file, 'rU')) candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), school_info=inst, ranking='pi_rescaled', year_start=1970, year_stop=2012, year_step=1) # Which model to use model = SigmoidModel(prob_function=args.prob_function) # Create a validation set hold_out = [int(year) for year in args.validation.split(',')] training_candidates = [] training_jobs = [] training_job_ranks = [] testing_candidates = [] testing_jobs = [] testing_job_ranks = [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i])
hold_out = [int(year) for year in args.validation.split(',')] training_candidates, training_jobs, training_job_ranks = [], [], [] training_orders, training_probs = [], [] for i, year in enumerate(year_range): if year not in hold_out: training_candidates.append(candidate_pools[i]) training_jobs.append(job_pools[i]) training_job_ranks.append(job_ranks[i]) training_orders.append(hiring_orders[i]) training_probs.append(hiring_probs[i]) # Overwrite originals: candidate_pools, job_pools, job_ranks = training_candidates, training_jobs, training_job_ranks hiring_orders, hiring_probs = training_orders, training_probs # Which model? model = SigmoidModel(prob_function=args.prob_function) # Find a starting place simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, power=1, reg=args.reg, hiring_orders=hiring_orders, hiring_probs=hiring_probs) w0 = None best_neg_likelihood = np.inf for i in xrange(10): #args.num_steps): #wtemp = 100*np.random.randn(model.num_weights()) # ~[-100, 100] wtemp = 100*(np.random.random(model.num_weights()) - 0.5) # ~[-100, 100] temp = simulator.calculate_neg_log_likelihood(weights=wtemp) if temp < best_neg_likelihood: w0 = wtemp.copy() best_neg_likelihood = temp
if __name__ == "__main__": args = interface() inst = parse_institution_records(open(args.inst_file, 'rU')) candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools( open(args.fac_file), school_info=inst, ranking='pi_rescaled', year_start=1970, year_stop=2012, year_step=1) # Which model to use model = SigmoidModel(prob_function=args.prob_function) # Create a validation set hold_out = [int(year) for year in args.validation.split(',')] training_candidates = [] training_jobs = [] training_job_ranks = [] testing_candidates = [] testing_jobs = [] testing_job_ranks = [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i])
# Compute actual stats, if requested. if args.actual: actual_hires = [] for pool in candidate_pools: for person, rank in pool: actual_hires.append((person, person.first_asst_job_location)) compute_network_stats(actual_hires, inst, output) output.close() exit() # Otherwise, set up the model + simulator if args.prob_function == 'flat': # 'flat' could very easily be a prob_function in the sigmoid model -- this is faster model = ConfigurationModel() else: model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None
year_start=1970, year_stop=2012, year_step=1) if args.validation: # if specified years are to be evaluated hold_out = [int(year) for year in args.validation.split(',')] testing_candidates, testing_jobs, testing_job_ranks = [], [], [] for i, year in enumerate(year_range): if year in hold_out: testing_candidates.append(candidate_pools[i]) testing_jobs.append(job_pools[i]) testing_job_ranks.append(job_ranks[i]) # Overwrite originals candidate_pools, job_pools, job_ranks = testing_candidates, testing_jobs, testing_job_ranks model = SigmoidModel(prob_function=args.prob_function) simulator = SimulationEngine(candidate_pools, job_pools, job_ranks, inst, model, iters=1, reg=0) if model.num_weights() > 0: w = np.array([float(x) for x in args.weights.split(',')]) if len(w) != model.num_weights(): print len(w), model.num_weights() raise ValueError('Invalid number of weights/model parameters!') else: w = None