def stats_all_country(): # THRESHOLDS = [0.005] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) years = range(1972, 2002) # Generate for all the country for threshold in THRESHOLDS: generate_control_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, years, filename=f'results/stats/usa/ah_sample.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, filename=f'results/stats/usa/epidemic_sample.{threshold}.json') for threshold in THRESHOLDS: print(f'threshold {threshold}:') with open(f'results/stats/usa/ah_sample.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open(f'results/stats/usa/epidemic_sample.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) t, prob = stats.ttest_ind(ah_sample, epidemic_sample) print(t, prob) t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(t, prob)
def hypothesis_test_paris(): THRESHOLDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) city_resolver = get_city_resolver() population = get_population(PARIS) ah = get_ah(PARIS) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(PARIS) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS) years = range(1986, 2015) for threshold in THRESHOLDS: generate_control_sample( onsets, threshold, ah_dev, winter, PARIS, city_resolver, years, filename=f'results/stats/paris/ah_sample.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, winter, PARIS, city_resolver, filename=f'results/stats/paris/epidemic_sample.{threshold}.json') for threshold in THRESHOLDS: print(f'threshold {threshold}') with open(f'results/stats/paris/ah_sample.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open(f'results/stats/paris/epidemic_sample.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample) print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def onset_distribution_paris(): # Params THRESHOLDS = [5] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) # End params population = get_population(PARIS) morbidity = get_daily_morbidity(PARIS) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) filename = f'results/onsets/paris' \ f'_winter{winter.START.month}-{winter.END.month}' \ f'_threshold{THRESHOLDS[0]}.png' draw_onset_distribution_by_week( onsets[THRESHOLDS[0]], PARIS, winter=winter, title='Epidemic number distribution in Paris,\n' 'determined with mordibity deviation (October — March)', save_to_file=filename)
def onset_distribution(): # Params THRESHOLDS = [10] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) # End params population = get_population(CITIES) morbidity = get_daily_morbidity(CITIES) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) filename = f'results/onsets/russia' \ f'_winter{winter.START.month}-{winter.END.month}' \ f'_threshold{THRESHOLDS[0]}.png' draw_onset_distribution_by_week( onsets[THRESHOLDS[0]], CITIES, winter=winter, title='Epidemic number distribution in Russia', save_to_file=filename)
def get_onsets_by_morbidity(excess_data, thresholds, winter=Winter()): onsets = dict() for threshold in thresholds: onsets[threshold] = dict() for city in excess_data.keys(): onsets[threshold][city] = list() excess = sorted(excess_data[city].items(), key=lambda x: int(x[0][0:2]) + 31 * int(x[0][3:5]) + 366 * int(x[0][6:10])) for idx in range(len(excess)): if idx == 0 or idx == 1: continue prev2, prev1, current = excess[idx - 2], \ excess[idx - 1], excess[idx] current_date = \ datetime.datetime.strptime(current[0], "%d.%m.%Y").date() if current_date <= datetime.date(1986, winter.END.month, winter.END.day - 1) or \ current_date >= datetime.date(2015, winter.START.month, winter.START.day): continue if prev2[1] >= threshold and prev1[1] >= threshold \ and winter.is_winter(current_date): # Cutoff second epidemic in the same winter-time if onsets[threshold][city] and current_date - onsets[threshold][city][-1] < \ datetime.timedelta(days=winter.days_count): continue onsets[threshold][city].append(current_date) continue return onsets
def get_onsets(excess_data, thresholds, winter=Winter()): onsets = dict() for threshold in thresholds: onsets[threshold] = dict() for idx in range(52): onsets[threshold][idx] = list() for state in range(52): for idx in range(len(excess_data[state])): if idx == 0 or idx == 1: continue prev2, prev1, current = excess_data[state][idx - 2], \ excess_data[state][idx - 1], excess_data[state][idx] if current['date'] <= datetime.date(1972, winter.END.month, winter.END.day) or \ current['date'] >= datetime.date(2002, winter.START.month, winter.START.day): continue if prev2['excess'] >= threshold and prev1['excess'] >= threshold \ and winter.is_winter(current['date']): # Cutoff second epidemic in the same winter-time if onsets[threshold][state] and current['date'] - onsets[threshold][state][-1] < \ datetime.timedelta(days=winter.days_count): continue onsets[threshold][state].append(current['date']) continue return onsets
def hypothesis_test_epidemiologists(): THRESHOLDS = [0] threshold = 0 city_resolver = get_city_resolver() ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS) years = range(1986, 2015) from hypothesis import generate_control_sample, generate_experimental_sample generate_control_sample( onsets, threshold, ah_dev, Winter(), CITIES, city_resolver, years, filename=f'results/stats/russia_epid/ah_sample.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CITIES, city_resolver, filename=f'results/stats/russia_epid/epidemic_sample.{threshold}.json') print(f'threshold {threshold}') with open(f'results/stats/russia_epid/ah_sample.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open(f'results/stats/russia_epid/epidemic_sample.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample) print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def main(): """ Parameters """ THRESHOLDS = [30, 35, 40, 45] # [25, 30, 35, 40, 45, 50] # CITIES = ['spb'] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 11, 1) winter.END = datetime.date(winter.END.year, 3, 31) city_resolver = get_city_resolver() population = get_population(CITIES) ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(CITIES) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) cases = [ (['msk'], 'Moscow', 'Moscow'), (['spb'], 'SaintPetersburg', 'Saint Petersburg'), (['nsk'], 'Novosibirsk', 'Novosibirsk'), (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'), ] for case in cases: CUR_CITIES, name_suffix, title = case average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES, THRESHOLDS, DATE_SHIFT_RANGE, city_resolver) filename = 'results/russia/morbidity/' \ 'rf_m_%s_winter%d-%d_threshold%s-%s.pdf' % ( name_suffix, winter.START.month, winter.END.month, min(average_ah_dev.keys()), max(average_ah_dev.keys())) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, title=title, save_to_file=filename)
def onset_distribution_epidemiologists(): winter = Winter() winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, [0]) filename = 'results/onsets/russia_epidemiologists.png' draw_onset_distribution_by_week( onsets[0], CITIES, winter=winter, title='Epidemic number distribution in Russia\n' 'given by Influenza Institute (October — March)', save_to_file=filename)
def main_paris(): state_resolver = get_city_resolver() population = get_population(PARIS) """ Parameters """ # THRESHOLDS = [-1000, 5, 10, 50, 100, 500, 750, ] # THRESHOLDS = [0, 5, 9, 25, 35, 40, 45, 50, ] THRESHOLDS = [9, 10, 20, 30] # , 40, 50, ] # THRESHOLDS = [9, 10, 20, 30, 40, 50] # THRESHOLDS = [0, 25, 50, 75, 100, ] # THRESHOLDS = [10, 20, 30, 40, 50, 60, 70, 80] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 11, 1) winter.END = datetime.date(winter.END.year, 3, 31) ah = get_ah(PARIS) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(PARIS) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, PARIS, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) title = f'Île-de-France: outbreaks in ' \ f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}' filename = 'results/paris/paris_winter%d-%d_threshold%s.pdf' % ( winter.START.month, winter.END.month, max(average_ah_dev.keys())) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-11e-4, 15e-4), title=title, save_to_file=filename)
def winter_range_investigation(): state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) for params in ( (12, 2), (12, 3), (11, 2), (11, 3), (11, 4), (10, 3), (10, 4), (10, 5), (9, 4), (9, 5), ): winter = Winter() winter.START = datetime.date(winter.START.year, params[0], 1) if params[1] in [10, 12, 1, 3, 5]: last_day = 31 elif params[1] in [9, 11, 4]: last_day = 30 else: # 2 (February 1972) last_day = 29 winter.END = datetime.date(winter.END.year, params[1], last_day) onsets = get_onsets(excess_data, THRESHOLDS, winter) average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CONTIGUOUS_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) rng = f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}' title = 'AH\' v. Onset Day: outbreaks in ' + rng filename = 'results/winter_range_usa/usa_winter%d-%d.pdf' % ( winter.START.month, winter.END.month) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-3.3e-4, 2.2e-4), title=title, save_to_file=filename)
def main(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 4, 30) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) regions = [ # ('all', 'Contiguous States', CONTIGUOUS_STATES), ('sw', 'Southwest States', SW_STATES), ('ne', 'Northeast States', NE_STATES), ('gulf', 'Gulf States', GULF_STATES), ('the_rest', 'The Remained States', REST_STATES) ] for region in regions: name_suffix, title_suffix, SITES = region average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, SITES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-7e-4, 5e-4), title='AH\' v. Onset Day: ' + title_suffix, save_to_file='results/usa/usa_winter10-4_%s.pdf' % name_suffix)
def stats_regions(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) years = range(1972, 2002) # For regions threshold = THRESHOLDS[-1] # The strongest 0.02 regions = { 'sw': SW_STATES, 'ne': NE_STATES, 'gulf': GULF_STATES, 'the_rest': REST_STATES } for region_name, region in regions.items(): generate_control_sample( onsets, threshold, ah_dev, winter, region, state_resolver, years, filename= f'results/stats/usa/regions/control.{region_name}.{threshold}.json' ) generate_experimental_sample( onsets, threshold, ah_dev, winter, region, state_resolver, filename= f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json' ) for region_name, region in regions.items(): try: with open( f'results/stats/usa/regions/control.{region_name}.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open( f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) except: continue print(f'Region {region_name} ({len(region)} states)') print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def stats_joint(): # Assert stats_distinct_states been already performed for every state winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) top_dip = distinct_states() CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52)) NOT_TOP_STATES = list(set(CONTIGUOUS_STATES) - set(top_dip[:24])) # exclude top 24 AH' lowest average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, NOT_TOP_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-7e-4, 5e-4), title='AH\' v. Onset Day: The Remained States', save_to_file='results/usa/usa_top.pdf') # For joint states test threshold = THRESHOLDS[-1] # The strongest ah_sample = [] for i in range(len(top_dip)): CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52)) CONTIGUOUS_STATES = list(set(CONTIGUOUS_STATES) - set(top_dip[:i])) # exclude top 24 AH' lowest for site in CONTIGUOUS_STATES: try: with open( f'results/stats/usa/distinct/control.{site}.{threshold}.json', 'r') as f: ah_sample += json.load(f) except: continue generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, filename= f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json' ) with open( f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f'Some {len(CONTIGUOUS_STATES)} states') print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def stats_distinct_states(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) years = range(1972, 2002) # For distinct states threshold = THRESHOLDS[-1] # The strongest 0.02 for site in CONTIGUOUS_STATES[1:]: generate_control_sample( onsets, threshold, ah_dev, Winter(), [site], state_resolver, years, filename= f'results/stats/usa/distinct/control.{site}.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, Winter(), [site], state_resolver, filename= f'results/stats/usa/distinct/experimental.{site}.{threshold}.json') different = [] equal = [] for site in CONTIGUOUS_STATES: try: with open( f'results/stats/usa/distinct/control.{site}.{threshold}.json', 'r') as f: ah_sample = json.load(f) # ggg(onsets, threshold, ah_dev, Winter(), [site], state_resolver, # filename=f'results/stats/usa/distinct/experimental.{site}.{threshold}.json') with open( f'results/stats/usa/distinct/experimental.{site}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) except: continue # print(state_resolver[site]['name']) # print(f"AH' sample size = {len(ah_sample)}") # print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) # print(f"Not equal variance (Welch’s t-test): P-value = {prob}") # print() prob_str = "\\textbf{" + str(prob)[:7] + "}" if prob < 0.05 else str( prob)[:7] print(f"{state_resolver[site]['name']} & {len(epidemic_sample)} & " + prob_str + " \\\\\n\\hline") if prob < 0.05: different.append(( state_resolver[site]['name'], prob, )) else: equal.append(( state_resolver[site]['name'], prob, )) print(f"\n\n\nOverall {len(different) + len(equal)} states:") print(f'\t{len(different)} different avg: {[x[0] for x in different]}') print(f'\t{len(equal)} equal avg: {[x[0] for x in equal]}') print() diff_prob, eq_prob = [x[1] for x in different], [x[1] for x in equal] if diff_prob: print( f'Different P-value variance: [{min(diff_prob)} ... {max(diff_prob)}]' ) if eq_prob: print(f'Equal P-value variance: [{min(eq_prob)} ... {max(eq_prob)}]')