예제 #1
0
def hypothesis_test_paris():
    THRESHOLDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30]
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    city_resolver = get_city_resolver()
    population = get_population(PARIS)

    ah = get_ah(PARIS)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    morbidity = get_daily_morbidity(PARIS)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS)
    years = range(1986, 2015)

    for threshold in THRESHOLDS:
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            PARIS,
            city_resolver,
            years,
            filename=f'results/stats/paris/ah_sample.{threshold}.json')
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            PARIS,
            city_resolver,
            filename=f'results/stats/paris/epidemic_sample.{threshold}.json')

    for threshold in THRESHOLDS:
        print(f'threshold {threshold}')
        with open(f'results/stats/paris/ah_sample.{threshold}.json', 'r') as f:
            ah_sample = json.load(f)
        with open(f'results/stats/paris/epidemic_sample.{threshold}.json',
                  'r') as f:
            epidemic_sample = json.load(f)

        print(f"AH' sample size = {len(ah_sample)}")
        print(f"Epidemic sample size = {len(epidemic_sample)}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        print()
예제 #2
0
def onset_distribution():
    # Params
    THRESHOLDS = [10]
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)
    # End params

    population = get_population(CITIES)

    morbidity = get_daily_morbidity(CITIES)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter)

    filename = f'results/onsets/russia' \
               f'_winter{winter.START.month}-{winter.END.month}' \
               f'_threshold{THRESHOLDS[0]}.png'
    draw_onset_distribution_by_week(
        onsets[THRESHOLDS[0]],
        CITIES,
        winter=winter,
        title='Epidemic number distribution in Russia',
        save_to_file=filename)
예제 #3
0
def onset_distribution_paris():
    # Params
    THRESHOLDS = [5]
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)
    # End params

    population = get_population(PARIS)

    morbidity = get_daily_morbidity(PARIS)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter)

    filename = f'results/onsets/paris' \
               f'_winter{winter.START.month}-{winter.END.month}' \
               f'_threshold{THRESHOLDS[0]}.png'
    draw_onset_distribution_by_week(
        onsets[THRESHOLDS[0]],
        PARIS,
        winter=winter,
        title='Epidemic number distribution in Paris,\n'
        'determined with mordibity deviation (October — March)',
        save_to_file=filename)
예제 #4
0
def main():
    """
    Parameters
    """
    THRESHOLDS = [30, 35, 40, 45]  # [25, 30, 35, 40, 45, 50]
    # CITIES = ['spb']
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 11, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    city_resolver = get_city_resolver()
    population = get_population(CITIES)

    ah = get_ah(CITIES)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    morbidity = get_daily_morbidity(CITIES)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter)

    cases = [
        (['msk'], 'Moscow', 'Moscow'),
        (['spb'], 'SaintPetersburg', 'Saint Petersburg'),
        (['nsk'], 'Novosibirsk', 'Novosibirsk'),
        (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'),
    ]

    for case in cases:
        CUR_CITIES, name_suffix, title = case

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  city_resolver)

        filename = 'results/russia/morbidity/' \
                   'rf_m_%s_winter%d-%d_threshold%s-%s.pdf' % (
                    name_suffix, winter.START.month, winter.END.month,
                    min(average_ah_dev.keys()), max(average_ah_dev.keys()))
        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            title=title,
                            save_to_file=filename)
예제 #5
0
def onset_distribution_epidemiologists():
    winter = Winter()
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, [0])

    filename = 'results/onsets/russia_epidemiologists.png'
    draw_onset_distribution_by_week(
        onsets[0],
        CITIES,
        winter=winter,
        title='Epidemic number distribution in Russia\n'
        'given by Influenza Institute (October — March)',
        save_to_file=filename)
예제 #6
0
def get_onsets_by_morbidity(excess_data, thresholds, winter=Winter()):
    onsets = dict()
    for threshold in thresholds:
        onsets[threshold] = dict()
        for city in excess_data.keys():
            onsets[threshold][city] = list()

            excess = sorted(excess_data[city].items(),
                            key=lambda x: int(x[0][0:2]) + 31 * int(x[0][3:5])
                            + 366 * int(x[0][6:10]))

            for idx in range(len(excess)):
                if idx == 0 or idx == 1:
                    continue
                prev2, prev1, current = excess[idx - 2], \
                    excess[idx - 1], excess[idx]

                current_date = \
                    datetime.datetime.strptime(current[0], "%d.%m.%Y").date()

                if current_date <= datetime.date(1986, winter.END.month, winter.END.day - 1) or \
                        current_date >= datetime.date(2015, winter.START.month, winter.START.day):
                    continue

                if prev2[1] >= threshold and prev1[1] >= threshold \
                        and winter.is_winter(current_date):
                    # Cutoff second epidemic in the same winter-time
                    if onsets[threshold][city] and current_date - onsets[threshold][city][-1] < \
                            datetime.timedelta(days=winter.days_count):
                        continue
                    onsets[threshold][city].append(current_date)
                    continue
    return onsets
예제 #7
0
파일: usa.py 프로젝트: neseleznev/YSC
def get_onsets(excess_data, thresholds, winter=Winter()):
    onsets = dict()
    for threshold in thresholds:
        onsets[threshold] = dict()

        for idx in range(52):
            onsets[threshold][idx] = list()

        for state in range(52):
            for idx in range(len(excess_data[state])):
                if idx == 0 or idx == 1:
                    continue
                prev2, prev1, current = excess_data[state][idx - 2], \
                    excess_data[state][idx - 1], excess_data[state][idx]

                if current['date'] <= datetime.date(1972, winter.END.month, winter.END.day) or \
                        current['date'] >= datetime.date(2002, winter.START.month, winter.START.day):
                    continue

                if prev2['excess'] >= threshold and prev1['excess'] >= threshold \
                        and winter.is_winter(current['date']):
                    # Cutoff second epidemic in the same winter-time
                    if onsets[threshold][state] and current['date'] - onsets[threshold][state][-1] < \
                            datetime.timedelta(days=winter.days_count):
                        continue
                    onsets[threshold][state].append(current['date'])
                    continue
    return onsets
예제 #8
0
def hypothesis_test_epidemiologists():
    THRESHOLDS = [0]
    threshold = 0

    city_resolver = get_city_resolver()
    ah = get_ah(CITIES)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS)
    years = range(1986, 2015)

    from hypothesis import generate_control_sample, generate_experimental_sample

    generate_control_sample(
        onsets,
        threshold,
        ah_dev,
        Winter(),
        CITIES,
        city_resolver,
        years,
        filename=f'results/stats/russia_epid/ah_sample.{threshold}.json')
    generate_experimental_sample(
        onsets,
        threshold,
        ah_dev,
        Winter(),
        CITIES,
        city_resolver,
        filename=f'results/stats/russia_epid/epidemic_sample.{threshold}.json')

    print(f'threshold {threshold}')
    with open(f'results/stats/russia_epid/ah_sample.{threshold}.json',
              'r') as f:
        ah_sample = json.load(f)
    with open(f'results/stats/russia_epid/epidemic_sample.{threshold}.json',
              'r') as f:
        epidemic_sample = json.load(f)

    print(f"AH' sample size = {len(ah_sample)}")
    print(f"Epidemic sample size = {len(epidemic_sample)}")
    t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
    print(f"Equal variance (Student's t-test): P-value = {prob}")
    t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
    print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
    print()
예제 #9
0
def main_paris():
    state_resolver = get_city_resolver()
    population = get_population(PARIS)
    """
    Parameters
    """
    # THRESHOLDS = [-1000, 5, 10, 50, 100, 500, 750, ]
    # THRESHOLDS = [0, 5, 9, 25, 35, 40, 45, 50, ]
    THRESHOLDS = [9, 10, 20, 30]  # , 40, 50, ]
    # THRESHOLDS = [9, 10, 20, 30, 40, 50]
    # THRESHOLDS = [0, 25, 50, 75, 100, ]
    # THRESHOLDS = [10, 20, 30, 40, 50, 60, 70, 80]

    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 11, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    ah = get_ah(PARIS)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    morbidity = get_daily_morbidity(PARIS)
    morbidity_mean = get_morbidity_mean(morbidity)
    morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean)
    excess_data = get_relative_weekly_morbidity_excess(morbidity_excess,
                                                       population)

    onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter)

    average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, PARIS,
                                              THRESHOLDS, DATE_SHIFT_RANGE,
                                              state_resolver)

    title = f'Île-de-France: outbreaks in ' \
            f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}'
    filename = 'results/paris/paris_winter%d-%d_threshold%s.pdf' % (
        winter.START.month, winter.END.month, max(average_ah_dev.keys()))
    plot_average_ah_dev(average_ah_dev,
                        THRESHOLD_COLORS,
                        DATE_SHIFT_RANGE,
                        limits=(-11e-4, 15e-4),
                        title=title,
                        save_to_file=filename)
예제 #10
0
파일: usa.py 프로젝트: neseleznev/YSC
def winter_range_investigation():
    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)

    for params in (
        (12, 2),
        (12, 3),
        (11, 2),
        (11, 3),
        (11, 4),
        (10, 3),
        (10, 4),
        (10, 5),
        (9, 4),
        (9, 5),
    ):
        winter = Winter()
        winter.START = datetime.date(winter.START.year, params[0], 1)
        if params[1] in [10, 12, 1, 3, 5]:
            last_day = 31
        elif params[1] in [9, 11, 4]:
            last_day = 30
        else:  # 2 (February 1972)
            last_day = 29
        winter.END = datetime.date(winter.END.year, params[1], last_day)

        onsets = get_onsets(excess_data, THRESHOLDS, winter)

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets,
                                                  CONTIGUOUS_STATES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  state_resolver)

        rng = f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}'
        title = 'AH\' v. Onset Day: outbreaks in ' + rng
        filename = 'results/winter_range_usa/usa_winter%d-%d.pdf' % (
            winter.START.month, winter.END.month)
        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            limits=(-3.3e-4, 2.2e-4),
                            title=title,
                            save_to_file=filename)
예제 #11
0
파일: usa.py 프로젝트: neseleznev/YSC
def main():
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 4, 30)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)

    regions = [
        # ('all', 'Contiguous States', CONTIGUOUS_STATES),
        ('sw', 'Southwest States', SW_STATES),
        ('ne', 'Northeast States', NE_STATES),
        ('gulf', 'Gulf States', GULF_STATES),
        ('the_rest', 'The Remained States', REST_STATES)
    ]

    for region in regions:
        name_suffix, title_suffix, SITES = region

        average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, SITES,
                                                  THRESHOLDS, DATE_SHIFT_RANGE,
                                                  state_resolver)

        plot_average_ah_dev(average_ah_dev,
                            THRESHOLD_COLORS,
                            DATE_SHIFT_RANGE,
                            limits=(-7e-4, 5e-4),
                            title='AH\' v. Onset Day: ' + title_suffix,
                            save_to_file='results/usa/usa_winter10-4_%s.pdf' %
                            name_suffix)
예제 #12
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_all_country():
    # THRESHOLDS = [0.005]
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)
    years = range(1972, 2002)

    # Generate for all the country
    for threshold in THRESHOLDS:
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(),
            CONTIGUOUS_STATES,
            state_resolver,
            years,
            filename=f'results/stats/usa/ah_sample.{threshold}.json')
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(),
            CONTIGUOUS_STATES,
            state_resolver,
            filename=f'results/stats/usa/epidemic_sample.{threshold}.json')

    for threshold in THRESHOLDS:
        print(f'threshold {threshold}:')
        with open(f'results/stats/usa/ah_sample.{threshold}.json', 'r') as f:
            ah_sample = json.load(f)
        with open(f'results/stats/usa/epidemic_sample.{threshold}.json',
                  'r') as f:
            epidemic_sample = json.load(f)

        t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        print(t, prob)
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(t, prob)
예제 #13
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_regions():
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)
    years = range(1972, 2002)

    # For regions
    threshold = THRESHOLDS[-1]  # The strongest 0.02
    regions = {
        'sw': SW_STATES,
        'ne': NE_STATES,
        'gulf': GULF_STATES,
        'the_rest': REST_STATES
    }

    for region_name, region in regions.items():
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            region,
            state_resolver,
            years,
            filename=
            f'results/stats/usa/regions/control.{region_name}.{threshold}.json'
        )
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            winter,
            region,
            state_resolver,
            filename=
            f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json'
        )

    for region_name, region in regions.items():
        try:
            with open(
                    f'results/stats/usa/regions/control.{region_name}.{threshold}.json',
                    'r') as f:
                ah_sample = json.load(f)
            with open(
                    f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json',
                    'r') as f:
                epidemic_sample = json.load(f)
        except:
            continue

        print(f'Region {region_name} ({len(region)} states)')
        print(f"AH' sample size = {len(ah_sample)}")
        print(f"Epidemic sample size = {len(epidemic_sample)}")
        # t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        # print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        print()
예제 #14
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_joint():
    # Assert stats_distinct_states been already performed for every state
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)

    top_dip = distinct_states()
    CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52))
    NOT_TOP_STATES = list(set(CONTIGUOUS_STATES) -
                          set(top_dip[:24]))  # exclude top 24 AH' lowest

    average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, NOT_TOP_STATES,
                                              THRESHOLDS, DATE_SHIFT_RANGE,
                                              state_resolver)

    plot_average_ah_dev(average_ah_dev,
                        THRESHOLD_COLORS,
                        DATE_SHIFT_RANGE,
                        limits=(-7e-4, 5e-4),
                        title='AH\' v. Onset Day: The Remained States',
                        save_to_file='results/usa/usa_top.pdf')

    # For joint states test
    threshold = THRESHOLDS[-1]  # The strongest
    ah_sample = []

    for i in range(len(top_dip)):
        CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52))
        CONTIGUOUS_STATES = list(set(CONTIGUOUS_STATES) -
                                 set(top_dip[:i]))  # exclude top 24 AH' lowest

        for site in CONTIGUOUS_STATES:
            try:
                with open(
                        f'results/stats/usa/distinct/control.{site}.{threshold}.json',
                        'r') as f:
                    ah_sample += json.load(f)
            except:
                continue

        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(),
            CONTIGUOUS_STATES,
            state_resolver,
            filename=
            f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json'
        )

        with open(
                f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json',
                'r') as f:
            epidemic_sample = json.load(f)

        print(f'Some {len(CONTIGUOUS_STATES)} states')
        print(f"AH' sample size = {len(ah_sample)}")
        print(f"Epidemic sample size = {len(epidemic_sample)}")
        # t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        # print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        print()
예제 #15
0
파일: usa.py 프로젝트: neseleznev/YSC
def stats_distinct_states():
    winter = Winter()
    # if params[1] in [10, 12, 1, 3, 5]:
    #     last_day = 31
    # elif params[1] in [9, 11, 4]:
    #     last_day = 30
    winter.START = datetime.date(winter.START.year, 10, 1)
    winter.END = datetime.date(winter.END.year, 3, 31)

    state_resolver = get_state_resolver(STATE_CODES_FILE)
    ah = get_ah(AH_CSV_FILE)
    ah_mean = get_ah_mean(ah)
    ah_dev = get_ah_deviation(ah, ah_mean)

    excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE)
    onsets = get_onsets(excess_data, THRESHOLDS, winter)
    years = range(1972, 2002)

    # For distinct states
    threshold = THRESHOLDS[-1]  # The strongest 0.02

    for site in CONTIGUOUS_STATES[1:]:
        generate_control_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(), [site],
            state_resolver,
            years,
            filename=
            f'results/stats/usa/distinct/control.{site}.{threshold}.json')
        generate_experimental_sample(
            onsets,
            threshold,
            ah_dev,
            Winter(), [site],
            state_resolver,
            filename=
            f'results/stats/usa/distinct/experimental.{site}.{threshold}.json')

    different = []
    equal = []

    for site in CONTIGUOUS_STATES:
        try:
            with open(
                    f'results/stats/usa/distinct/control.{site}.{threshold}.json',
                    'r') as f:
                ah_sample = json.load(f)
            # ggg(onsets, threshold, ah_dev, Winter(), [site], state_resolver,
            #     filename=f'results/stats/usa/distinct/experimental.{site}.{threshold}.json')
            with open(
                    f'results/stats/usa/distinct/experimental.{site}.{threshold}.json',
                    'r') as f:
                epidemic_sample = json.load(f)
        except:
            continue

        # print(state_resolver[site]['name'])
        # print(f"AH' sample size = {len(ah_sample)}")
        # print(f"Epidemic sample size = {len(epidemic_sample)}")
        # t, prob = stats.ttest_ind(ah_sample, epidemic_sample)
        # print(f"Equal variance (Student's t-test): P-value = {prob}")
        t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False)
        # print(f"Not equal variance (Welch’s t-test): P-value = {prob}")
        # print()
        prob_str = "\\textbf{" + str(prob)[:7] + "}" if prob < 0.05 else str(
            prob)[:7]
        print(f"{state_resolver[site]['name']} & {len(epidemic_sample)} & " +
              prob_str + " \\\\\n\\hline")

        if prob < 0.05:
            different.append((
                state_resolver[site]['name'],
                prob,
            ))
        else:
            equal.append((
                state_resolver[site]['name'],
                prob,
            ))

    print(f"\n\n\nOverall {len(different) + len(equal)} states:")
    print(f'\t{len(different)} different avg: {[x[0] for x in different]}')
    print(f'\t{len(equal)} equal avg: {[x[0] for x in equal]}')
    print()
    diff_prob, eq_prob = [x[1] for x in different], [x[1] for x in equal]
    if diff_prob:
        print(
            f'Different P-value variance: [{min(diff_prob)} ... {max(diff_prob)}]'
        )
    if eq_prob:
        print(f'Equal P-value variance: [{min(eq_prob)} ... {max(eq_prob)}]')