예제 #1
0
def rf_epidemiologists():
    # CITIES = ['spb', ]
    THRESHOLDS = [0]

    city_resolver = get_city_resolver()
    ah = get_ah(CITIES)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS)

    cases = [
        (['msk'], 'Moscow', 'Moscow'),
        (['spb'], 'SaintPetersburg', 'Saint Petersburg'),
        (['nsk'], 'Novosibirsk', 'Novosibirsk'),
        (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'),
    ]

    for case in cases:
        CUR_CITIES, name_suffix, title = case

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  city_resolver)

        filename = 'results/russia/epidemiologists/rf_%s.pdf' % name_suffix
        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            title=title,
                            save_to_file=filename)
예제 #2
0
def hypothesis_test_paris():
    THRESHOLDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30]
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    city_resolver = get_city_resolver()
    population = get_population(PARIS)

    ah = get_ah(PARIS)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    morbidity = get_daily_morbidity(PARIS)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS)
    years = range(1986, 2015)

    for threshold in THRESHOLDS:
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            PARIS,
            city_resolver,
            years,
            filename=f'results/stats/paris/ah_sample.{threshold}.json')
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            PARIS,
            city_resolver,
            filename=f'results/stats/paris/epidemic_sample.{threshold}.json')

    for threshold in THRESHOLDS:
        print(f'threshold {threshold}')
        with open(f'results/stats/paris/ah_sample.{threshold}.json', 'r') as f:
            ah_sample = json.load(f)
        with open(f'results/stats/paris/epidemic_sample.{threshold}.json',
                  'r') as f:
            epidemic_sample = json.load(f)

        print(f"AH' sample size = {len(ah_sample)}")
        print(f"Epidemic sample size = {len(epidemic_sample)}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        print()
예제 #3
0
def main():
    """
    Parameters
    """
    THRESHOLDS = [30, 35, 40, 45]  # [25, 30, 35, 40, 45, 50]
    # CITIES = ['spb']
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 11, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    city_resolver = get_city_resolver()
    population = get_population(CITIES)

    ah = get_ah(CITIES)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    morbidity = get_daily_morbidity(CITIES)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter)

    cases = [
        (['msk'], 'Moscow', 'Moscow'),
        (['spb'], 'SaintPetersburg', 'Saint Petersburg'),
        (['nsk'], 'Novosibirsk', 'Novosibirsk'),
        (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'),
    ]

    for case in cases:
        CUR_CITIES, name_suffix, title = case

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  city_resolver)

        filename = 'results/russia/morbidity/' \
                   'rf_m_%s_winter%d-%d_threshold%s-%s.pdf' % (
                    name_suffix, winter.START.month, winter.END.month,
                    min(average_ah_dev.keys()), max(average_ah_dev.keys()))
        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            title=title,
                            save_to_file=filename)
예제 #4
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_all_country():
    # THRESHOLDS = [0.005]
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)
    years = range(1972, 2002)

    # Generate for all the country
    for threshold in THRESHOLDS:
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(),
            CONTIGUOUS_STATES,
            state_resolver,
            years,
            filename=f'results/stats/usa/ah_sample.{threshold}.json')
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(),
            CONTIGUOUS_STATES,
            state_resolver,
            filename=f'results/stats/usa/epidemic_sample.{threshold}.json')

    for threshold in THRESHOLDS:
        print(f'threshold {threshold}:')
        with open(f'results/stats/usa/ah_sample.{threshold}.json', 'r') as f:
            ah_sample = json.load(f)
        with open(f'results/stats/usa/epidemic_sample.{threshold}.json',
                  'r') as f:
            epidemic_sample = json.load(f)

        t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        print(t, prob)
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(t, prob)
예제 #5
0
def hypothesis_test_epidemiologists():
    THRESHOLDS = [0]
    threshold = 0

    city_resolver = get_city_resolver()
    ah = get_ah(CITIES)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS)
    years = range(1986, 2015)

    from hypothesis import generate_control_sample, generate_experimental_sample

    generate_control_sample(
        onsets,
        threshold,
        ah_dev,
        Winter(),
        CITIES,
        city_resolver,
        years,
        filename=f'results/stats/russia_epid/ah_sample.{threshold}.json')
    generate_experimental_sample(
        onsets,
        threshold,
        ah_dev,
        Winter(),
        CITIES,
        city_resolver,
        filename=f'results/stats/russia_epid/epidemic_sample.{threshold}.json')

    print(f'threshold {threshold}')
    with open(f'results/stats/russia_epid/ah_sample.{threshold}.json',
              'r') as f:
        ah_sample = json.load(f)
    with open(f'results/stats/russia_epid/epidemic_sample.{threshold}.json',
              'r') as f:
        epidemic_sample = json.load(f)

    print(f"AH' sample size = {len(ah_sample)}")
    print(f"Epidemic sample size = {len(epidemic_sample)}")
    t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
    print(f"Equal variance (Student's t-test): P-value = {prob}")
    t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
    print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
    print()
예제 #6
0
def main_paris():
    state_resolver = get_city_resolver()
    population = get_population(PARIS)
    """
    Parameters
    """
    # THRESHOLDS = [-1000, 5, 10, 50, 100, 500, 750, ]
    # THRESHOLDS = [0, 5, 9, 25, 35, 40, 45, 50, ]
    THRESHOLDS = [9, 10, 20, 30]  # , 40, 50, ]
    # THRESHOLDS = [9, 10, 20, 30, 40, 50]
    # THRESHOLDS = [0, 25, 50, 75, 100, ]
    # THRESHOLDS = [10, 20, 30, 40, 50, 60, 70, 80]

    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 11, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    ah = get_ah(PARIS)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    morbidity = get_daily_morbidity(PARIS)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter)

    average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, PARIS,
                                              THRESHOLDS, DATE_SHIFT_RANGE,
                                              state_resolver)

    title = f'Île-de-France: outbreaks in ' \
            f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}'
    filename = 'results/paris/paris_winter%d-%d_threshold%s.pdf' % (
        winter.START.month, winter.END.month, max(average_ah_dev.keys()))
    plot_average_ah_dev(average_ah_dev,
                        THRESHOLD_COLORS,
                        DATE_SHIFT_RANGE,
                        limits=(-11e-4, 15e-4),
                        title=title,
                        save_to_file=filename)
예제 #7
0
파일: usa.py 프로젝트: neseleznev/YSC
def winter_range_investigation():
    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)

    for params in (
        (12, 2),
        (12, 3),
        (11, 2),
        (11, 3),
        (11, 4),
        (10, 3),
        (10, 4),
        (10, 5),
        (9, 4),
        (9, 5),
    ):
        winter = Winter()
        winter.START = datetime.date(winter.START.year, params[0], 1)
        if params[1] in [10, 12, 1, 3, 5]:
            last_day = 31
        elif params[1] in [9, 11, 4]:
            last_day = 30
        else:  # 2 (February 1972)
            last_day = 29
        winter.END = datetime.date(winter.END.year, params[1], last_day)

        onsets = get_onsets(excess_data, THRESHOLDS, winter)

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets,
                                                  CONTIGUOUS_STATES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  state_resolver)

        rng = f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}'
        title = 'AH\' v. Onset Day: outbreaks in ' + rng
        filename = 'results/winter_range_usa/usa_winter%d-%d.pdf' % (
            winter.START.month, winter.END.month)
        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            limits=(-3.3e-4, 2.2e-4),
                            title=title,
                            save_to_file=filename)
예제 #8
0
def test_parser():
    """
    Parse humidity data, draw a la Figure 1D from Shaman, 2010 article
    for Russian data
    """
    ah = get_ah(CITIES)
    ah_mean = get_ah_mean(ah)
    # ah_dev = get_ah_deviation(ah, ah_mean)

    # Graph 1D from Shaman 2010
    draw_ah_mean(ah_mean,
                 sites=['Saint Petersburg', 'Moscow', 'Novosibirsk'],
                 colors={
                     'Saint Petersburg': 'b',
                     'Moscow': 'g',
                     'Novosibirsk': 'r'
                 })
예제 #9
0
파일: usa.py 프로젝트: neseleznev/YSC
def distinct_states():
    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS)

    deeps = dict()  # state_code: ah
    deep_level = -0.0003
    anomaly_peaks = range(-28, 0, 1)  # [-19, -18, -17, -11, -10, -9]

    for state in [1] + list(range(3, 12)) + list(range(13, 52)):
        CONTIGUOUS_STATES = [state]

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets,
                                                  CONTIGUOUS_STATES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  state_resolver)

        for threshold, average in average_ah_dev.items():
            idxs = [day_x - DATE_SHIFT_RANGE[0] for day_x in anomaly_peaks]
            deep = min(average[idx] for idx in idxs)
            if deep < deep_level:
                if state in deeps:
                    deeps[state] = min(deep, deeps[state])
                else:
                    deeps[state] = deep
        # plot_average_ah_dev(
        #     average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE,
        #     title=state_resolver[state]['name'],
        #     save_to_file='results/usa_distinct/figure_state%s.png' %
        #                  state_resolver[state]['acronym'])

    for state, deep in sorted(deeps.items(), key=lambda x: x[1]):
        print('Deep level %f in %s state (%d)' %
              (deep, state_resolver[state]['acronym'], state))

    top_dip = [
        state for state, deep in sorted(deeps.items(), key=lambda x: x[1])
    ]
    print(f'Top dip {len(top_dip)}: {top_dip}')
    return top_dip
예제 #10
0
파일: usa.py 프로젝트: neseleznev/YSC
def test_parser():
    """
    Parse humidity data, draw Figure 1D from Shaman, 2010 article
    """
    ah = get_ah('data/stateAHmsk_oldFL.csv')
    ah_mean = get_ah_mean(ah)
    # ah_dev = get_ah_deviation(ah, ah_mean)

    # Graph 1D from Shaman 2010
    draw_ah_mean(
        ah_mean,
        sites=['Arizona', 'Florida', 'Illinois', 'New York', 'Washington'],
        colors={
            'Arizona': 'b',
            'Florida': 'g',
            'Illinois': 'r',
            'New York': 'c',
            'Washington': 'm'
        })
예제 #11
0
파일: usa.py 프로젝트: neseleznev/YSC
def main():
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 4, 30)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)

    regions = [
        # ('all', 'Contiguous States', CONTIGUOUS_STATES),
        ('sw', 'Southwest States', SW_STATES),
        ('ne', 'Northeast States', NE_STATES),
        ('gulf', 'Gulf States', GULF_STATES),
        ('the_rest', 'The Remained States', REST_STATES)
    ]

    for region in regions:
        name_suffix, title_suffix, SITES = region

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, SITES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  state_resolver)

        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            limits=(-7e-4, 5e-4),
                            title='AH\' v. Onset Day: ' + title_suffix,
                            save_to_file='results/usa/usa_winter10-4_%s.pdf' %
                            name_suffix)
예제 #12
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_regions():
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)
    years = range(1972, 2002)

    # For regions
    threshold = THRESHOLDS[-1]  # The strongest 0.02
    regions = {
        'sw': SW_STATES,
        'ne': NE_STATES,
        'gulf': GULF_STATES,
        'the_rest': REST_STATES
    }

    for region_name, region in regions.items():
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            region,
            state_resolver,
            years,
            filename=
            f'results/stats/usa/regions/control.{region_name}.{threshold}.json'
        )
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            region,
            state_resolver,
            filename=
            f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json'
        )

    for region_name, region in regions.items():
        try:
            with open(
                    f'results/stats/usa/regions/control.{region_name}.{threshold}.json',
                    'r') as f:
                ah_sample = json.load(f)
            with open(
                    f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json',
                    'r') as f:
                epidemic_sample = json.load(f)
        except:
            continue

        print(f'Region {region_name} ({len(region)} states)')
        print(f"AH' sample size = {len(ah_sample)}")
        print(f"Epidemic sample size = {len(epidemic_sample)}")
        # t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        # print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        print()
예제 #13
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_joint():
    # Assert stats_distinct_states been already performed for every state
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)

    top_dip = distinct_states()
    CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52))
    NOT_TOP_STATES = list(set(CONTIGUOUS_STATES) -
                          set(top_dip[:24]))  # exclude top 24 AH' lowest

    average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, NOT_TOP_STATES,
                                              THRESHOLDS, DATE_SHIFT_RANGE,
                                              state_resolver)

    plot_average_ah_dev(average_ah_dev,
                        THRESHOLD_COLORS,
                        DATE_SHIFT_RANGE,
                        limits=(-7e-4, 5e-4),
                        title='AH\' v. Onset Day: The Remained States',
                        save_to_file='results/usa/usa_top.pdf')

    # For joint states test
    threshold = THRESHOLDS[-1]  # The strongest
    ah_sample = []

    for i in range(len(top_dip)):
        CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52))
        CONTIGUOUS_STATES = list(set(CONTIGUOUS_STATES) -
                                 set(top_dip[:i]))  # exclude top 24 AH' lowest

        for site in CONTIGUOUS_STATES:
            try:
                with open(
                        f'results/stats/usa/distinct/control.{site}.{threshold}.json',
                        'r') as f:
                    ah_sample += json.load(f)
            except:
                continue

        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(),
            CONTIGUOUS_STATES,
            state_resolver,
            filename=
            f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json'
        )

        with open(
                f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json',
                'r') as f:
            epidemic_sample = json.load(f)

        print(f'Some {len(CONTIGUOUS_STATES)} states')
        print(f"AH' sample size = {len(ah_sample)}")
        print(f"Epidemic sample size = {len(epidemic_sample)}")
        # t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        # print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        print()
예제 #14
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_distinct_states():
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)
    years = range(1972, 2002)

    # For distinct states
    threshold = THRESHOLDS[-1]  # The strongest 0.02

    for site in CONTIGUOUS_STATES[1:]:
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(), [site],
            state_resolver,
            years,
            filename=
            f'results/stats/usa/distinct/control.{site}.{threshold}.json')
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(), [site],
            state_resolver,
            filename=
            f'results/stats/usa/distinct/experimental.{site}.{threshold}.json')

    different = []
    equal = []

    for site in CONTIGUOUS_STATES:
        try:
            with open(
                    f'results/stats/usa/distinct/control.{site}.{threshold}.json',
                    'r') as f:
                ah_sample = json.load(f)
            # ggg(onsets, threshold, ah_dev, Winter(), [site], state_resolver,
            #     filename=f'results/stats/usa/distinct/experimental.{site}.{threshold}.json')
            with open(
                    f'results/stats/usa/distinct/experimental.{site}.{threshold}.json',
                    'r') as f:
                epidemic_sample = json.load(f)
        except:
            continue

        # print(state_resolver[site]['name'])
        # print(f"AH' sample size = {len(ah_sample)}")
        # print(f"Epidemic sample size = {len(epidemic_sample)}")
        # t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        # print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        # print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        # print()
        prob_str = "\\textbf{" + str(prob)[:7] + "}" if prob < 0.05 else str(
            prob)[:7]
        print(f"{state_resolver[site]['name']} & {len(epidemic_sample)} & " +
              prob_str + " \\\\\n\\hline")

        if prob < 0.05:
            different.append((
                state_resolver[site]['name'],
                prob,
            ))
        else:
            equal.append((
                state_resolver[site]['name'],
                prob,
            ))

    print(f"\n\n\nOverall {len(different) + len(equal)} states:")
    print(f'\t{len(different)} different avg: {[x[0] for x in different]}')
    print(f'\t{len(equal)} equal avg: {[x[0] for x in equal]}')
    print()
    diff_prob, eq_prob = [x[1] for x in different], [x[1] for x in equal]
    if diff_prob:
        print(
            f'Different P-value variance: [{min(diff_prob)} ... {max(diff_prob)}]'
        )
    if eq_prob:
        print(f'Equal P-value variance: [{min(eq_prob)} ... {max(eq_prob)}]')