def rf_epidemiologists(): # CITIES = ['spb', ] THRESHOLDS = [0] city_resolver = get_city_resolver() ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS) cases = [ (['msk'], 'Moscow', 'Moscow'), (['spb'], 'SaintPetersburg', 'Saint Petersburg'), (['nsk'], 'Novosibirsk', 'Novosibirsk'), (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'), ] for case in cases: CUR_CITIES, name_suffix, title = case average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES, THRESHOLDS, DATE_SHIFT_RANGE, city_resolver) filename = 'results/russia/epidemiologists/rf_%s.pdf' % name_suffix plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, title=title, save_to_file=filename)
def hypothesis_test_paris(): THRESHOLDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) city_resolver = get_city_resolver() population = get_population(PARIS) ah = get_ah(PARIS) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(PARIS) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS) years = range(1986, 2015) for threshold in THRESHOLDS: generate_control_sample( onsets, threshold, ah_dev, winter, PARIS, city_resolver, years, filename=f'results/stats/paris/ah_sample.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, winter, PARIS, city_resolver, filename=f'results/stats/paris/epidemic_sample.{threshold}.json') for threshold in THRESHOLDS: print(f'threshold {threshold}') with open(f'results/stats/paris/ah_sample.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open(f'results/stats/paris/epidemic_sample.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample) print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def main(): """ Parameters """ THRESHOLDS = [30, 35, 40, 45] # [25, 30, 35, 40, 45, 50] # CITIES = ['spb'] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 11, 1) winter.END = datetime.date(winter.END.year, 3, 31) city_resolver = get_city_resolver() population = get_population(CITIES) ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(CITIES) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) cases = [ (['msk'], 'Moscow', 'Moscow'), (['spb'], 'SaintPetersburg', 'Saint Petersburg'), (['nsk'], 'Novosibirsk', 'Novosibirsk'), (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'), ] for case in cases: CUR_CITIES, name_suffix, title = case average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES, THRESHOLDS, DATE_SHIFT_RANGE, city_resolver) filename = 'results/russia/morbidity/' \ 'rf_m_%s_winter%d-%d_threshold%s-%s.pdf' % ( name_suffix, winter.START.month, winter.END.month, min(average_ah_dev.keys()), max(average_ah_dev.keys())) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, title=title, save_to_file=filename)
def stats_all_country(): # THRESHOLDS = [0.005] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) years = range(1972, 2002) # Generate for all the country for threshold in THRESHOLDS: generate_control_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, years, filename=f'results/stats/usa/ah_sample.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, filename=f'results/stats/usa/epidemic_sample.{threshold}.json') for threshold in THRESHOLDS: print(f'threshold {threshold}:') with open(f'results/stats/usa/ah_sample.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open(f'results/stats/usa/epidemic_sample.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) t, prob = stats.ttest_ind(ah_sample, epidemic_sample) print(t, prob) t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(t, prob)
def hypothesis_test_epidemiologists(): THRESHOLDS = [0] threshold = 0 city_resolver = get_city_resolver() ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS) years = range(1986, 2015) from hypothesis import generate_control_sample, generate_experimental_sample generate_control_sample( onsets, threshold, ah_dev, Winter(), CITIES, city_resolver, years, filename=f'results/stats/russia_epid/ah_sample.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CITIES, city_resolver, filename=f'results/stats/russia_epid/epidemic_sample.{threshold}.json') print(f'threshold {threshold}') with open(f'results/stats/russia_epid/ah_sample.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open(f'results/stats/russia_epid/epidemic_sample.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample) print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def main_paris(): state_resolver = get_city_resolver() population = get_population(PARIS) """ Parameters """ # THRESHOLDS = [-1000, 5, 10, 50, 100, 500, 750, ] # THRESHOLDS = [0, 5, 9, 25, 35, 40, 45, 50, ] THRESHOLDS = [9, 10, 20, 30] # , 40, 50, ] # THRESHOLDS = [9, 10, 20, 30, 40, 50] # THRESHOLDS = [0, 25, 50, 75, 100, ] # THRESHOLDS = [10, 20, 30, 40, 50, 60, 70, 80] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 11, 1) winter.END = datetime.date(winter.END.year, 3, 31) ah = get_ah(PARIS) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(PARIS) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, PARIS, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) title = f'Île-de-France: outbreaks in ' \ f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}' filename = 'results/paris/paris_winter%d-%d_threshold%s.pdf' % ( winter.START.month, winter.END.month, max(average_ah_dev.keys())) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-11e-4, 15e-4), title=title, save_to_file=filename)
def winter_range_investigation(): state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) for params in ( (12, 2), (12, 3), (11, 2), (11, 3), (11, 4), (10, 3), (10, 4), (10, 5), (9, 4), (9, 5), ): winter = Winter() winter.START = datetime.date(winter.START.year, params[0], 1) if params[1] in [10, 12, 1, 3, 5]: last_day = 31 elif params[1] in [9, 11, 4]: last_day = 30 else: # 2 (February 1972) last_day = 29 winter.END = datetime.date(winter.END.year, params[1], last_day) onsets = get_onsets(excess_data, THRESHOLDS, winter) average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CONTIGUOUS_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) rng = f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}' title = 'AH\' v. Onset Day: outbreaks in ' + rng filename = 'results/winter_range_usa/usa_winter%d-%d.pdf' % ( winter.START.month, winter.END.month) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-3.3e-4, 2.2e-4), title=title, save_to_file=filename)
def test_parser(): """ Parse humidity data, draw a la Figure 1D from Shaman, 2010 article for Russian data """ ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) # ah_dev = get_ah_deviation(ah, ah_mean) # Graph 1D from Shaman 2010 draw_ah_mean(ah_mean, sites=['Saint Petersburg', 'Moscow', 'Novosibirsk'], colors={ 'Saint Petersburg': 'b', 'Moscow': 'g', 'Novosibirsk': 'r' })
def distinct_states(): state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS) deeps = dict() # state_code: ah deep_level = -0.0003 anomaly_peaks = range(-28, 0, 1) # [-19, -18, -17, -11, -10, -9] for state in [1] + list(range(3, 12)) + list(range(13, 52)): CONTIGUOUS_STATES = [state] average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CONTIGUOUS_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) for threshold, average in average_ah_dev.items(): idxs = [day_x - DATE_SHIFT_RANGE[0] for day_x in anomaly_peaks] deep = min(average[idx] for idx in idxs) if deep < deep_level: if state in deeps: deeps[state] = min(deep, deeps[state]) else: deeps[state] = deep # plot_average_ah_dev( # average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, # title=state_resolver[state]['name'], # save_to_file='results/usa_distinct/figure_state%s.png' % # state_resolver[state]['acronym']) for state, deep in sorted(deeps.items(), key=lambda x: x[1]): print('Deep level %f in %s state (%d)' % (deep, state_resolver[state]['acronym'], state)) top_dip = [ state for state, deep in sorted(deeps.items(), key=lambda x: x[1]) ] print(f'Top dip {len(top_dip)}: {top_dip}') return top_dip
def test_parser(): """ Parse humidity data, draw Figure 1D from Shaman, 2010 article """ ah = get_ah('data/stateAHmsk_oldFL.csv') ah_mean = get_ah_mean(ah) # ah_dev = get_ah_deviation(ah, ah_mean) # Graph 1D from Shaman 2010 draw_ah_mean( ah_mean, sites=['Arizona', 'Florida', 'Illinois', 'New York', 'Washington'], colors={ 'Arizona': 'b', 'Florida': 'g', 'Illinois': 'r', 'New York': 'c', 'Washington': 'm' })
def main(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 4, 30) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) regions = [ # ('all', 'Contiguous States', CONTIGUOUS_STATES), ('sw', 'Southwest States', SW_STATES), ('ne', 'Northeast States', NE_STATES), ('gulf', 'Gulf States', GULF_STATES), ('the_rest', 'The Remained States', REST_STATES) ] for region in regions: name_suffix, title_suffix, SITES = region average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, SITES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-7e-4, 5e-4), title='AH\' v. Onset Day: ' + title_suffix, save_to_file='results/usa/usa_winter10-4_%s.pdf' % name_suffix)
def stats_regions(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) years = range(1972, 2002) # For regions threshold = THRESHOLDS[-1] # The strongest 0.02 regions = { 'sw': SW_STATES, 'ne': NE_STATES, 'gulf': GULF_STATES, 'the_rest': REST_STATES } for region_name, region in regions.items(): generate_control_sample( onsets, threshold, ah_dev, winter, region, state_resolver, years, filename= f'results/stats/usa/regions/control.{region_name}.{threshold}.json' ) generate_experimental_sample( onsets, threshold, ah_dev, winter, region, state_resolver, filename= f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json' ) for region_name, region in regions.items(): try: with open( f'results/stats/usa/regions/control.{region_name}.{threshold}.json', 'r') as f: ah_sample = json.load(f) with open( f'results/stats/usa/regions/experimental.{region_name}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) except: continue print(f'Region {region_name} ({len(region)} states)') print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def stats_joint(): # Assert stats_distinct_states been already performed for every state winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) top_dip = distinct_states() CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52)) NOT_TOP_STATES = list(set(CONTIGUOUS_STATES) - set(top_dip[:24])) # exclude top 24 AH' lowest average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, NOT_TOP_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-7e-4, 5e-4), title='AH\' v. Onset Day: The Remained States', save_to_file='results/usa/usa_top.pdf') # For joint states test threshold = THRESHOLDS[-1] # The strongest ah_sample = [] for i in range(len(top_dip)): CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52)) CONTIGUOUS_STATES = list(set(CONTIGUOUS_STATES) - set(top_dip[:i])) # exclude top 24 AH' lowest for site in CONTIGUOUS_STATES: try: with open( f'results/stats/usa/distinct/control.{site}.{threshold}.json', 'r') as f: ah_sample += json.load(f) except: continue generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, filename= f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json' ) with open( f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f'Some {len(CONTIGUOUS_STATES)} states') print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()
def stats_distinct_states(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) years = range(1972, 2002) # For distinct states threshold = THRESHOLDS[-1] # The strongest 0.02 for site in CONTIGUOUS_STATES[1:]: generate_control_sample( onsets, threshold, ah_dev, Winter(), [site], state_resolver, years, filename= f'results/stats/usa/distinct/control.{site}.{threshold}.json') generate_experimental_sample( onsets, threshold, ah_dev, Winter(), [site], state_resolver, filename= f'results/stats/usa/distinct/experimental.{site}.{threshold}.json') different = [] equal = [] for site in CONTIGUOUS_STATES: try: with open( f'results/stats/usa/distinct/control.{site}.{threshold}.json', 'r') as f: ah_sample = json.load(f) # ggg(onsets, threshold, ah_dev, Winter(), [site], state_resolver, # filename=f'results/stats/usa/distinct/experimental.{site}.{threshold}.json') with open( f'results/stats/usa/distinct/experimental.{site}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) except: continue # print(state_resolver[site]['name']) # print(f"AH' sample size = {len(ah_sample)}") # print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) # print(f"Not equal variance (Welch’s t-test): P-value = {prob}") # print() prob_str = "\\textbf{" + str(prob)[:7] + "}" if prob < 0.05 else str( prob)[:7] print(f"{state_resolver[site]['name']} & {len(epidemic_sample)} & " + prob_str + " \\\\\n\\hline") if prob < 0.05: different.append(( state_resolver[site]['name'], prob, )) else: equal.append(( state_resolver[site]['name'], prob, )) print(f"\n\n\nOverall {len(different) + len(equal)} states:") print(f'\t{len(different)} different avg: {[x[0] for x in different]}') print(f'\t{len(equal)} equal avg: {[x[0] for x in equal]}') print() diff_prob, eq_prob = [x[1] for x in different], [x[1] for x in equal] if diff_prob: print( f'Different P-value variance: [{min(diff_prob)} ... {max(diff_prob)}]' ) if eq_prob: print(f'Equal P-value variance: [{min(eq_prob)} ... {max(eq_prob)}]')