Ejemplo n.º 1
0
    args.add_argument('-p', '--prob-function', help='Candidate probability/matching function', required=True)
    args.add_argument('-n', '--num-iters', help='Number of iterations to est. error', default=100, type=int)
    args.add_argument('-w', '--weights', help='Model parameters (as comma-separated string)', type=allow_negatives)
    args.add_argument('-r', '--ranking', help='Which ranking to use', default='pi_rescaled')
    args.add_argument('-v', '--validation', help='Years to hold out', default='')
    args = args.parse_args()
    return args


if __name__=="__main__":
    args = interface()
    
    inst = parse_institution_records(open(args.inst_file, 'rU'))
    candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), 
                                                                                  school_info=inst, 
                                                                                  ranking='pi_rescaled',
                                                                                  year_start=1970, 
                                                                                  year_stop=2012, 
                                                                                  year_step=1)

    if args.validation:  # if specified years are to be evaluated
        hold_out = [int(year) for year in args.validation.split(',')]
        testing_candidates, testing_jobs, testing_job_ranks = [], [], []
        for i, year in enumerate(year_range):
            if year in hold_out:
                testing_candidates.append(candidate_pools[i])
                testing_jobs.append(job_pools[i])
                testing_job_ranks.append(job_ranks[i])
        # Overwrite originals 
        candidate_pools, job_pools, job_ranks = testing_candidates, testing_jobs, testing_job_ranks

    model = SigmoidModel(prob_function=args.prob_function)
Ejemplo n.º 2
0
def interface():
    args = argparse.ArgumentParser()
    args.add_argument('-f', '--fac-file', help='Faculty file', required=True)
    args.add_argument('-i', '--inst-file', help='Institutions file', required=True)
    args.add_argument('-s', '--orders-file', help='Input (pickle) file', required=True)
    args = args.parse_args()
    return args


if __name__=="__main__":
    args = interface()
    
    inst = parse_institution_records(open(args.inst_file, 'rU'))
    candidate_pools, job_pools, job_ranks, year_range = load_assistant_prof_pools(open(args.fac_file), 
                                                                                  school_info=inst, 
                                                                                  ranking='pi_rescaled',
                                                                                  year_start=1970, 
                                                                                  year_stop=2012, 
                                                                                  year_step=1)


    hiring_orders, hiring_probs = load_hiring_order_set(args.orders_file)
    if len(hiring_orders) != len(job_pools):
        raise ValueError('Incorrect number of pools!')

    for i, pool in enumerate(job_pools):
        pool_size = len(pool)
        if len(pool) != len(hiring_orders[i][0]):
            raise ValueError('Incorrect pool size')
        if np.sum(hiring_orders[i][0]) != np.sum(np.arange(pool_size)):
            raise ValueError('Ordering doesnt seem legit')