def make_inter_level_hierarchical_transition_matrixes_tables(person_year_table, profession, out_dir):
    """
    This function spits out two .csv's per profession, where one CSV contains the transition matrices for all observed
    years except the left and right censors (since we judge entry and departure based on anterior and posterior year
    to focal year X) and the other shows the transition PROBABILITY matrices for the same years.

    :param person_year_table: a table of person years, as a list of lists
    :param profession: string, "judges", "prosecutors", "notaries" or "executori".
    :param out_dir: str, the path to where the transition matrices will live
    :return: None
    """
    global_trans_dict = inter_level_transition_matrices(person_year_table, profession)

    with open(out_dir + 'yearly_count_hierarchical_transition_matrices.csv', 'w') as out_ct, \
            open(out_dir + 'yearly_count_hierarchical_probability_transition_matrices.csv', 'w') as out_pb:

        count_writer, prob_writer = csv.writer(out_ct), csv.writer(out_pb)
        count_writer.writerow([profession]), count_writer.writerow([]),
        prob_writer.writerow([profession]), prob_writer.writerow([]),

        for yr in global_trans_dict:
            count_writer.writerow([yr]), prob_writer.writerow([yr])

            for lvl in global_trans_dict[yr]:
                count_row = [str(key) + ' : ' + str(value) for key, value in global_trans_dict[yr][lvl].items()]
                count_writer.writerow(count_row)

                level_sum_key = str(lvl) + '-' + "level_sum"
                level_sum = global_trans_dict[yr][lvl][level_sum_key]
                prob_row = [str(key) + ' : ' + str(round(helpers.weird_division(value, level_sum), 4))
                            for key, value in global_trans_dict[yr][lvl].items()]
                prob_writer.writerow(prob_row)
            count_writer.writerow([]), prob_writer.writerow([])
def freq_mat_to_prob_mat(frequency_matrix, round_to=15):
    """
    Take a matrix of frequencies and turn it into a probability matrix, where each cell is divided by its row sum.
    NB: leaves zero rows as they are are

    :param frequency_matrix: list of lists, e.g. [[1,2,], [3,4]]
    :param round_to: int, to how many decimals we want to round; default is fifteen
    :return list of lists, where rows sum to 1, e.g. [[0.25, 0.75], [0.9, 0.1]]
    """
    probability_matrix = []
    for i in range(len(frequency_matrix)):
        row_sum = sum(frequency_matrix[i])
        prob_row = [round(helpers.weird_division(round(cell, 5), row_sum), round_to) for cell in frequency_matrix[i]]
        probability_matrix.append(prob_row)
    return probability_matrix
예제 #3
0
def adjusted_retirement_counts(person_year_table, profession, weights=False):
    """
    The problem with the raw sample count of retirement is that it does not distinguish between people who genuinely
    leave the profession and those who simply leave the sample (i.e. move to another area) but remain in the profession.
    Consequently, raw sample retirement counts are biased upwards because profession-exits and sample-exits are
    implicitly equated.

    The solution is to use the total population to compute the fraction of retirements from the sample area that are
    genuine departures from the profession and then to multiply the raw sample retirement count by that fraction,
    thereby reducing the upward bias. To be exact, the genuine retirement fraction is computed by

      genuine retirement fraction = genuine retirement counts / (genuine retirement counts + sample-leaving counts)

    and the adjusted retirement count will therefore be

      adjusted number of retirements = raw sample retirement count * genuine retirement fraction

    :param person_year_table: list of lists, a list of person-years (each one a list of values)
    :param profession: string, "judges", "prosecutors", "notaries" or "executori"
    :param weights: bool, if True then instead of returning the adusted counts, return the fractions by which we weigh
                    the observed counts in order to reduce bias
    :return a nested dict, where 1st layer keys are year, 2nd layer keys are level in the judicial hierarchy, and base
            values are the adjusted retirement counts
    """
    samp_yrs, samp_as, fracts_yrs = samp_yr_range[profession], samp_areas[profession], pop_yrs_for_fracts[profession]

    yr_col_idx = helpers.get_header(profession, 'preprocess').index('an')
    ca_cod_idx = helpers.get_header(profession, 'preprocess').index('ca cod')
    lvl_col_idx = helpers.get_header(profession, 'preprocess').index('nivel')

    # sort the population table by person and year then sample from it by area
    sorted_person_year_table = helpers.sort_pers_yr_table_by_pers_then_yr(person_year_table, profession)

    # initialise the dicts; NB: four possible levels, even though level 3 (Appeals courts) only began in 1993
    ret_fracts = {lvl: {"gen_rets": 0, "samp_leaves": 0} for lvl in range(1, 5)}

    people = helpers.group_table_by_persons(sorted_person_year_table, profession)
    for person in people:
        for idx, pers_yr in enumerate(person):

            current_yr, current_lvl, current_area = pers_yr[yr_col_idx], int(pers_yr[lvl_col_idx]), pers_yr[ca_cod_idx]
            # if this year is used for the fraction, and within the sampling areas
            if int(current_yr) in fracts_yrs and current_area in samp_as:
                if idx < len(person) - 1:  # since we do look-aheads to see departures-cum-retirements
                    # if next year's area is NOT within the sampling area, increment sample departures
                    if person[idx + 1][ca_cod_idx] not in samp_as:
                        ret_fracts[current_lvl]["samp_leaves"] += 1

                # if last year is used for the fraction and within the sampling areas, increment genuine retirements
                else:  # NB: this always assume we pick a sampling year than is less than the right censoring year
                    ret_fracts[current_lvl]["gen_rets"] += 1

    # average over the years then get the final fraction, per level
    for lvl in ret_fracts:
        avg_gen_rets = float(ret_fracts[lvl]["gen_rets"]) / float(len(fracts_yrs))
        avg_samp_leave_rets = float(ret_fracts[lvl]["samp_leaves"]) / float(len(fracts_yrs))
        ret_fracts[lvl] = helpers.weird_division(avg_gen_rets, (avg_gen_rets + avg_samp_leave_rets), mult_const=True)

    # get the raw counts
    cas_sample_table = sample.appellate_area_sample(sorted_person_year_table, profession, samp_as)
    samp_ret_counts = totals_in_out.pop_cohort_counts(cas_sample_table, samp_yrs[0], samp_yrs[1], profession,
                                                      cohorts=True, unit_type="nivel", entry=False)
    samp_ret_counts.pop("grand_total")  # don't need the grand total

    # and weigh them; round result to four decimals
    for lvl in samp_ret_counts:
        for yr in samp_ret_counts[lvl]:
            samp_ret_counts[lvl][yr] = round(samp_ret_counts[lvl][yr]["total_size"] * ret_fracts[int(lvl)], 4)

    if weights:
        return ret_fracts
    else:
        return samp_ret_counts
예제 #4
0
def adjusted_lateral_transfer_counts(person_year_table, profession, weights=False):
    """
    The problem with the raw sample count of lateral trasnfers is that it is biased downward, for two reasons:

     a) those who trasnfer laterally to a position outside the sample will appear have retired, thus biasing the
        lateral transfer count downward

     b) those who entered the sample via lateral transfer from outside the sample will appear to be new entrants, thus
        biasing the lateral transfer count downward

    Essentially, the sample only counts lateral transfers that occur within the sample, ignoring those lateral transfers
     that feature sample entry or departure.

    To fix this bias we use the total populating to compute the genuine fraction of lateral transfers, namely

      genuine promotion ratio = (within-sample lateral transfers +
                                 lateral transfers leaving the sample +
                                 lateral transfers entering the sample)
                                            /
                                   within-sample lateral transfers

    and the adjusted lateral transfer count will therefore be

      adjusted number of lateral transfers = within-sample lateral transfer count * genuine lateral transfer ratio

    :param person_year_table: list of lists, a list of person-years (each one a list of values)
    :param profession: string, "judges", "prosecutors", "notaries" or "executori"
    :param weights: bool, if True then instead of returning the adusted counts, return the fractions by which we weigh
                    the observed counts in order to reduce bias
    :return a nested dict, where 1st layer keys are year, 2nd layer keys are level in the judicial hierarchy, and base
            values are the adjusted lateral transfer counts
    """
    samp_yrs, samp_as, fracts_yrs = samp_yr_range[profession], samp_areas[profession], pop_yrs_for_fracts[profession]

    yr_col_idx = helpers.get_header(profession, 'preprocess').index('an')
    ca_cod_idx = helpers.get_header(profession, 'preprocess').index('ca cod')
    lvl_col_idx = helpers.get_header(profession, 'preprocess').index('nivel')

    # sort the population table by person and year then sample from it by area
    sorted_person_year_table = helpers.sort_pers_yr_table_by_pers_then_yr(person_year_table, profession)

    # initialise the dicts; NB: four possible levels, even though level 3 (Appeals courts) only began in 1993
    trans_fracts = {lvl: {"within_samp_transfs": 0, "samp_leave_transfs": 0, "samp_ent_transfs": 0}
                    for lvl in range(1, 5)}

    people = helpers.group_table_by_persons(sorted_person_year_table, profession)
    for person in people:
        for idx, pers_yr in enumerate(person):

            current_yr, current_lvl, current_area = pers_yr[yr_col_idx], int(pers_yr[lvl_col_idx]), pers_yr[ca_cod_idx]

            # if this year is used for the fraction and this year is within the sample area
            if int(current_yr) in fracts_yrs and current_area in samp_as:

                if idx < len(person) - 1:  # since we do look-aheads to judge mobility within or leaving the sample

                    # if current hierarchical level is equal to NEXT year's AND the exact workplaces differ
                    # (i.e. there's a lateral transfer this year):
                    if current_lvl == int(person[idx + 1][lvl_col_idx]) and \
                            get_workplace_code(pers_yr, profession) != get_workplace_code(person[idx + 1], profession):

                        # if next year's area is outside the sample, increment count of leaving-sample transfers
                        if person[idx + 1][ca_cod_idx] not in samp_as:
                            trans_fracts[current_lvl]["samp_leave_transfs"] += 1

                    else:
                        # if next year's area is within the sample, increment the count of within-sample demotions
                        if person[idx + 1][ca_cod_idx] in samp_as:
                            trans_fracts[current_lvl]["within_samp_transfs"] += 1

                if 1 < idx:  # we do look behinds to see if someone entered the sample from elsewhere:

                    # if LAST year's hierarchical level was the same as this year's AND the exact workplaces different
                    # (i.e. a lateral transfer occurred last year)
                    if int(person[idx - 1][lvl_col_idx]) == current_lvl and \
                            get_workplace_code(pers_yr, profession) != get_workplace_code(person[idx - 1], profession):
                        # if last year's area was not within the sample, increment the count of extra-sample entries via
                        # lateral transfer
                        trans_fracts[current_lvl]["samp_ent_transfs"] += 1

    # average over the years then get the final fraction, per level
    for lvl in trans_fracts:
        avg_within_samp_transfs = float(trans_fracts[lvl]["within_samp_transfs"]) / float(len(fracts_yrs))
        avg_samp_leave_transfs = float(trans_fracts[lvl]["samp_leave_transfs"]) / float(len(fracts_yrs))
        avg_samp_ent_transfs = float(trans_fracts[lvl]["samp_ent_transfs"]) / float(len(fracts_yrs))
        trans_fracts[lvl] = helpers.weird_division((avg_within_samp_transfs +
                                                    avg_samp_leave_transfs +
                                                    avg_samp_ent_transfs),
                                                   avg_within_samp_transfs, mult_const=True)

    # get the raw counts
    cas_sample_table = sample.appellate_area_sample(sorted_person_year_table, profession, samp_as)
    samp_transf_counts = hierarchical.hierarchical_mobility(cas_sample_table, profession)

    # and weigh them; round result to four decimals
    for yr in samp_transf_counts:
        for lvl in samp_transf_counts[yr]:
            samp_transf_counts[yr][lvl] = round(samp_transf_counts[yr][lvl]["across"]["total"] * trans_fracts[lvl], 4)

    if weights:
        return trans_fracts
    else:
        return samp_transf_counts
예제 #5
0
def adjusted_entry_counts(person_year_table, profession, weights=False):
    """
    The problem with the raw sample count of entries is that it does not distinguish between people who are genuinely
    new recruits to the profession, and those who were already in the profession but outside the sample. Consequently,
    the raw count is biased upwards because it equates entering the sample from within the profession with entering
    the profession tout-court.

    The solution is to use the total population to compute the fraction of entries into the sample that are genuine
    recruits into the profession and then to multiply the raw sample entry count by that fraction, thereby reducing the
    upward bias. To be exact, the genuine entry fraction is computed by

      genuine entry fraction = genuine entry counts / (genuine entry counts + sample-entering counts)

    and the adjusted entry count will therefore be

      adjusted number entries = sample entry count * genuine entry fraction

    :param person_year_table: list of lists, a list of person-years (each one a list of values)
    :param profession: string, "judges", "prosecutors", "notaries" or "executori"
    :param weights: bool, if True then instead of returning the adusted counts, return the fractions by which we weigh
                    the observed counts in order to reduce bias
    :return a nested dict, where 1st layer keys are year, 2nd layer keys are level in the judicial hierarchy, and base
            values are the adjusted entry counts
    """
    samp_yrs, samp_as, fracts_yrs = samp_yr_range[profession], samp_areas[profession], pop_yrs_for_fracts[profession]

    yr_col_idx = helpers.get_header(profession, 'preprocess').index('an')
    ca_cod_idx = helpers.get_header(profession, 'preprocess').index('ca cod')
    lvl_col_idx = helpers.get_header(profession, 'preprocess').index('nivel')

    # sort the population table by person and year then sample from it by area
    sorted_person_year_table = helpers.sort_pers_yr_table_by_pers_then_yr(person_year_table, profession)

    # initialise the dicts; NB: four possible levels, even though level 3 (Appeals courts) only began in 1993
    ent_fracts = {lvl: {"gen_ents": 0, "samp_ents": 0} for lvl in range(1, 5)}

    people = helpers.group_table_by_persons(sorted_person_year_table, profession)
    for person in people:
        for idx, pers_yr in enumerate(person):

            current_yr, current_lvl, current_area = pers_yr[yr_col_idx], int(pers_yr[lvl_col_idx]), pers_yr[ca_cod_idx]

            # if this year is used for the fraction and this year is within the sample area
            if int(current_yr) in fracts_yrs and current_area in samp_as:

                # if it's genuinely the first year, increment genuine entries
                #  NB: this always assumes that we skip the left censor year
                if idx == 0:  # the first year of the career;
                    ent_fracts[current_lvl]["gen_ents"] += 1

                if 1 < idx:  # since we do look-behinds to see if someone entered the sample from elsewhere

                    # if LAST year's appellate area is different from this year's appellate area, increment count of
                    # extra-sample entries
                    if current_area != person[idx - 1][ca_cod_idx]:
                        ent_fracts[current_lvl]["samp_ents"] += 1
    # average over the years then get the final fraction, per level
    for lvl in ent_fracts:
        avg_gen_ents = float(ent_fracts[lvl]["gen_ents"]) / float(len(fracts_yrs))
        avg_samp_ents = float(ent_fracts[lvl]["samp_ents"]) / float(len(fracts_yrs))
        ent_fracts[lvl] = helpers.weird_division(avg_gen_ents, (avg_gen_ents + avg_samp_ents), mult_const=True)

    # get the raw counts
    cas_sample_table = sample.appellate_area_sample(sorted_person_year_table, profession, samp_as)
    samp_ent_counts = totals_in_out.pop_cohort_counts(cas_sample_table, samp_yrs[0], samp_yrs[1], profession,
                                                      cohorts=True, unit_type="nivel", entry=True)
    samp_ent_counts.pop("grand_total")  # don't need the grand total
    # and weigh them; round result to four decimals
    for lvl in samp_ent_counts:
        for yr in samp_ent_counts[lvl]:
            samp_ent_counts[lvl][yr] = round(samp_ent_counts[lvl][yr]["total_size"] * ent_fracts[int(lvl)], 4)

    if weights:
        return ent_fracts
    else:
        return samp_ent_counts
예제 #6
0
def yearly_weights(person_year_table, profession, appellate_areas_to_sample,
                   weighting_year):
    """
    Get following weights (as ratios), per year, per level:
     - retirement / retire + leave area
     - internal promotion / total promotions
     - external promotion / total entries

    All counts are based on comparing the sampled appellate areas to the population in the other appellate areas.

    NB: these weights pool across sampled areas

    NB: keys in base-level dicts indicate judicial level: 1 = low court, 2 = tribunal, 3 = appeals, 4 = high court

    NB: by convention I turn undefined weights (where the denominator is zero) to zero

    NB: assumes weighting years feature entire population.

    :param person_year_table: a table of person-years, as a list of lists; comes with NO header
    :param profession: string, "judges", "prosecutors", "notaries" or "executori"
    :param appellate_areas_to_sample: list of appellate area codes indicating which areas we sample, e.g. ["CA1, "CA5"]
    :param weighting_year: year based on which we draw weights. NB: since we measure mobility by comparing this year
                           with adjacted ones (e.g. we know you got promoted because your level in weighting_year is
                           less than your level in weighting_year+1), weighting_year actually signifies an interval.
                           So "2006" refers to mobility in the period 2006-2007. Years are as str, e.g. "2017".
    :return: dict of yearly weights
    """

    yr_col_idx = helpers.get_header(profession, 'preprocess').index('an')
    ca_cod_idx = helpers.get_header(profession, 'preprocess').index('ca cod')
    lvl_col_idx = helpers.get_header(profession, 'preprocess').index('nivel')
    pid_col_idx = helpers.get_header(profession,
                                     'preprocess').index('cod persoană')

    # make the dicts that hold mobility counts per level
    lvls_dict = {"1": 0, "2": 0, "3": 0}
    total_retirements, total_area_leaves = deepcopy(lvls_dict), deepcopy(
        lvls_dict)
    total_promotions, internal_promotions = deepcopy(lvls_dict), deepcopy(
        lvls_dict)
    total_entries, external_promotions = deepcopy(lvls_dict), deepcopy(
        lvls_dict)

    # group table by persons
    person_year_table = sorted(person_year_table,
                               key=operator.itemgetter(pid_col_idx,
                                                       yr_col_idx))
    pid_col_idx = helpers.get_header(profession,
                                     'preprocess').index('cod persoană')
    people = [
        person
        for k, [*person] in groupby(person_year_table,
                                    key=operator.itemgetter(pid_col_idx))
    ]

    # iterate through people
    for person in people:

        # iterate through person-years
        for idx, pers_yr in enumerate(person):

            if idx < 1:  # for the first year of the career; NB: this always assumes that we skip the left censor year

                # if first year is sampling year, and the person-year is in the sampling areas
                if pers_yr[yr_col_idx] == weighting_year and pers_yr[
                        ca_cod_idx] in appellate_areas_to_sample:
                    # increment total entries
                    total_entries[pers_yr[lvl_col_idx]] += 1

            elif 0 < idx < len(
                    person) - 1:  # look up to the second-last person-year

                # if this year is sampling year, and this person-year is in the sampling areas
                if pers_yr[yr_col_idx] == weighting_year and pers_yr[
                        ca_cod_idx] in appellate_areas_to_sample:

                    # if current appellate area is different from next year appellate area, increment total area leaves
                    if pers_yr[ca_cod_idx] != person[idx + 1][ca_cod_idx]:
                        total_area_leaves[pers_yr[lvl_col_idx]] += 1

                    # if current appellate area is different from last year's appellate area AND
                    # last year's level is lower than this year's level, increment external promotions
                    if pers_yr[ca_cod_idx] != person[idx + 1][ca_cod_idx] \
                            and person[idx - 1][lvl_col_idx] < pers_yr[lvl_col_idx]:
                        external_promotions[pers_yr[lvl_col_idx]] += 1

                    # if this year's level is lower than next year's level, increment total promotions
                    if pers_yr[lvl_col_idx] < person[idx + 1][lvl_col_idx]:
                        total_promotions[pers_yr[lvl_col_idx]] += 1

                        # if this year's level is lower than next year's
                        # AND this year's appellate area is the same as next years, increment internal promotions
                        if pers_yr[ca_cod_idx] == person[idx + 1][ca_cod_idx]:
                            internal_promotions[pers_yr[lvl_col_idx]] += 1

            else:  # we're in the last year, i.e. the retirement year
                # NB: this always assume we pick a sampling year than is less than the right censoring year

                # if last year is sampling year and in sampling areas, increment retirements counter
                if person[-1][yr_col_idx] == weighting_year and person[-1][
                        ca_cod_idx] in appellate_areas_to_sample:
                    total_retirements[person[-1][lvl_col_idx]] += 1

    # make retirement weights
    retirement_weights = {}
    for key in total_retirements:
        retirement_weights.update({
            key:
            helpers.weird_division(
                total_retirements[key],
                (total_area_leaves[key] + total_retirements[key]))
        })
    # make internal promotion weights
    internal_promotion_weights = {}
    for key in total_promotions:
        internal_promotion_weights.update({
            key:
            helpers.weird_division(internal_promotions[key],
                                   total_promotions[key])
        })

    # make external promotion weights
    external_promotion_weights = {}
    for key in total_entries:
        external_promotion_weights.update({
            key:
            helpers.weird_division(external_promotions[key],
                                   total_entries[key])
        })

    return {
        "ret_leave": retirement_weights,
        "int_prom": internal_promotion_weights,
        "ext_prom": external_promotion_weights
    }
예제 #7
0
def retirement_promotion_estimates(person_year_table, profession,
                                   sampling_year_range, out_dir):
    """
    Estimate how many people retire and move up the legal hierarchy (i.e. get promoted) every year, both in raw counts
    and relative to the population of people open to such retirement.

    Post-2005 we have the complete population of magistrates (i.e. judges and prosecutors) but pre-2005 we have only
    non-random samples. For judges I sample three appellate areas (Alba, Craiova, Iaşi, and Ploieşti) because I have
    yearly data on all courts in these areas since at least 1980. That said, mobility estimates from these samples
    need to be corrected. In particular, I look at three sorts of mobility: retirement, promotion, and entry.

    Post-2005 we are certain that someone retires when they are in the population in year X, but absent in year X+1.
    For the pre-2005 we can't be certain, because that person may have left the sample but stayed in the population,
    i.e. they have simply changed appellate area. I therefore correct sample estimates as follows:

    - for the intervals 2006-2007, 2007-2008, and 2008-2009, see how many magistrates in the sampled areas (Alba,
      Craiova, Iaşi, and Ploieşti) actually retired, and how many just left their respective area. Compute the ratio
      "retirement counts" / "retirement counts + area leaving counts" for each interval, and take the three-interval
      average. The result is a weight: X% of the people that departed the sampled areas actually retired. There is one
      ratio for each judicial level (i.e. low court, tribunal, and appeals).

    - for pre-2005 I count how many people left the sample, then multiply the per-level count by the appropriate weight.
      Obviously, this assumes that the ratio between retirements and area changes is constant over this period. I cannot
      numerically check that assumption.

    Regarding promotion, post-2005 we can just see if someone's judicial level increased between years. Pre-2005 this
    count will be based in the sample because a) those who receive a promotion outside the sample look show up as
    retirements, b) those who entered the sample upon promotion look like new entrants. To address this I construct two
    weights: the ratio of within-area promotions to total promotions, and the ratio of entrants-by-promotion to total
    entrants (per year, per level).

    The final count of (weighted) sample promotions is then computed as follows:
    raw count * 1 / within-total-ratio  + count entrants * promotion-entrants-to-total-ratio

    Finally, to estimate the number of entrants, into the profession using the sample, I do the following:
    count entrants * (1 - promotion-entrants-to-total-ratio).

    Again, the assumption is that the relative balance of inter-area mobility flows is constant throughout the period
    under study, and therefore that ratios derived from 2006-2009 are true of other times as well. I choose the
    2006-2009 period because it's a) the earliest population-level data, and b) this period did not feature major
    judicial reforms.

    Finally, also want estimates of the total size of the population, and of year-on-year population growth.

    :param person_year_table: a table of person-years, as a list of lists; NB: asumes no header
    :param profession: string, "judges", "prosecutors", "notaries" or "executori"
    :param sampling_year_range: 2-tuple of ints, range of year's for which we're estimating mobility, e.g. (1998-2004)
    :param out_dir: directory where tables of mobility estimates will live
    :return: None
    """

    # get handy column indexes
    yr_col_idx = helpers.get_header(profession, 'preprocess').index('an')
    pid_col_idx = helpers.get_header(profession,
                                     'preprocess').index('cod persoană')

    # sort person-year table by person then year
    person_year_table.sort(key=operator.itemgetter(pid_col_idx, yr_col_idx))

    # sample all courts in these appeals regions: Alba (CA1), Craiova (CA7), Iaşi (CA9), Ploieşti (CA12)
    appellate_areas_to_sample = ["CA1", "CA7", "CA9", "CA12"]
    cas_sample_table = sample.appellate_area_sample(person_year_table,
                                                    profession,
                                                    appellate_areas_to_sample)

    # get weights for retirement, promotion, and entry

    # for those appeals areas, for periods 2006-2007 and 2007-2008, per hierarchical level:
    # a) get ratio of within-area promotions (i.e. people who were already in the area) to total promotions
    # b) get ratio of retirements to retirements + out-of-area transfers
    # Average the values for 2006-07 and 2007-08: these will be weights for estimates from earlier years
    weights = three_year_average_weights(person_year_table, profession,
                                         appellate_areas_to_sample,
                                         ["2006", "2007", "2008"])
    retirement_weights = weights["ret_weight"]
    internal_promotion_weights = weights["int_prom_weight"]
    external_promotion_weights = weights["ext_prom_weight"]

    # get raw counts of entries, retirements and promotions per year, per level, in the desired time-frame
    counts = get_raw_counts(cas_sample_table, profession, sampling_year_range)
    ent_counts, ret_counts, prom_counts = counts["entries"], counts[
        "retirements"], counts["promotions"]
    # now weigh those counts with average ratios from 2006-2008. Recall (counts are from sample):
    # estimated retirements = retirement count * retirement weight
    # estimated promotions = promotion count * (1 / interior promotion weight) + entry count * external promotion weight
    # estimated entries = entry count * (1 - external promotion weight)
    for key in internal_promotion_weights:
        for year in ret_counts.keys():
            # round up since these are whole people
            ret_counts[year][key] = round(
                float(ret_counts[year][key]) * retirement_weights[key])
            prom_counts[year][key] = round(
                float(
                    helpers.weird_division(prom_counts[year][key],
                                           internal_promotion_weights[key]) +
                    float(ent_counts[year][key]) *
                    external_promotion_weights[key]))
            ent_counts[year][key] = round(
                ent_counts[year][key] * (1 - external_promotion_weights[key]))

    # relabel, strictly for clarity (notice it's not a deepcopy)
    weighted_ret_counts = ret_counts
    weighted_prom_counts = prom_counts
    weighted_ent_counts = ent_counts

    # using (weighted-estiamted) sample counts, estimate yearly, per-level departure and retirement probabilities, where
    # denominator is sample count of person-years in year X; also estimate what proportion in each year's sample are
    # new entrants
    yearly_counts = counts["total counts"]

    retire_probs = {
        year: {
            "1": 0,
            "2": 0,
            "3": 0
        }
        for year in yearly_counts.keys()
    }
    promotion_probs = {
        year: {
            "1": 0,
            "2": 0,
            "3": 0
        }
        for year in yearly_counts.keys()
    }
    entry_proportions = {
        year: {
            "1": 0,
            "2": 0,
            "3": 0
        }
        for year in yearly_counts.keys()
    }

    for year in yearly_counts:
        for lvl in yearly_counts[year]:
            promotion_probs[year][lvl] = helpers.weird_division(
                weighted_prom_counts[year][lvl], (yearly_counts[year][lvl]))
            retire_probs[year][lvl] = helpers.weird_division(
                weighted_ret_counts[year][lvl], yearly_counts[year][lvl])
            # NB: entry proportions is simple: how many of this year's samples are newcomers?
            entry_proportions[year][lvl] = helpers.weird_division(
                weighted_ent_counts[year][lvl], yearly_counts[year][lvl])

    # estimate the size of the professional population for years for which we only have samples
    estimated_pop = estimated_population_size(person_year_table,
                                              cas_sample_table, profession,
                                              sampling_year_range)

    # estimate year-on-year population growth
    estimated_pop_growth = estimated_population_growth(estimated_pop,
                                                       sampling_year_range)

    # save to disk one table each for retirements, entries, and departures,
    # and one table for estimated population size and growth
    with open(out_dir + "retirements.csv", 'w') as out_ret:
        writer = csv.writer(out_ret)
        writer.writerow([profession.upper()])
        writer.writerow([
            "YEAR", "LEVEL", "PROJECTED COUNT RETIREMENTS",
            "SAMPLE RETIREMENT PROBABILITY"
        ])
        for year in weighted_ret_counts:
            for lvl in weighted_ret_counts[year]:
                writer.writerow([
                    year, lvl, weighted_ret_counts[year][lvl],
                    retire_probs[year][lvl]
                ])

    with open(out_dir + "promotions.csv", 'w') as out_prom:
        writer = csv.writer(out_prom)
        writer.writerow([profession.upper()])
        writer.writerow([
            "YEAR", "LEVEL", "PROJECTED COUNT PROMOTIONS",
            "SAMPLE PROMOTION PROBABILITY"
        ])
        for year in weighted_prom_counts:
            for lvl in weighted_prom_counts[year]:
                if lvl in weighted_prom_counts[
                        year] and lvl in promotion_probs[year]:
                    writer.writerow([
                        year, lvl, weighted_prom_counts[year][lvl],
                        promotion_probs[year][lvl]
                    ])

    with open(out_dir + "entries.csv", 'w') as out_ent:
        writer = csv.writer(out_ent)
        writer.writerow([profession.upper()])
        writer.writerow([
            "YEAR", "LEVEL", "PROJECTED COUNT ENTRIES",
            "SAMPLE ENTRY PROPORTIONS"
        ])
        for year in weighted_ent_counts:
            for lvl in weighted_ent_counts[year]:
                writer.writerow([
                    year, lvl, weighted_ent_counts[year][lvl],
                    entry_proportions[year][lvl]
                ])

    with open(out_dir + "growth.csv", 'w') as out_grow:  # lol
        writer = csv.writer(out_grow)
        writer.writerow([profession.upper()])
        writer.writerow([
            "YEAR", "PROJECTED POPULATION",
            "SAMPLE PERCENT GROWTH SINCE PREVIOUS YEAR"
        ])
        for year in estimated_pop:
            if year == min(sorted(list(estimated_pop.keys()))
                           ):  # only know pop growth after second year
                writer.writerow([year, estimated_pop[year], "NA"])
            else:
                writer.writerow(
                    [year, estimated_pop[year], estimated_pop_growth[year]])