def test_get_daily_count(self): # if only one day of cases input, daily count is that single number self.assertEqual(my_utils.get_daily_count(array.array('i', [3])), array.array('i', [3])) # basic case of one new case each day self.assertTrue( all( my_utils.get_daily_count(array.array('i', [0, 1, 2, 3])) == array.array('i', [0, 1, 1, 1]))) # not increasing, but not decreasing self.assertTrue( all( my_utils.get_daily_count(array.array('i', [0, 2, 2])) == array.array('i', [0, 2, 0])))
def test_get_daily_count_randomness(self): # include randomness in tests r1 = random.randint(0, 100) r2 = random.randint(0, 100) r3 = random.randint(0, 100) r4 = random.randint(0, 100) cumul_in = array.array('i', [r1, r1 + r2, r1 + r2 + r3, r1 + r2 + r3 + r4]) daily_out = array.array('i', [r1, r2, r3, r4]) self.assertTrue(all(my_utils.get_daily_count(cumul_in) == daily_out))
def test_daily_count(self): # simple test column = my_utils.get_columns('test_counties.csv', 1, 'Boulder', [4]) daily = my_utils.get_daily_count(column) test_daily = [ 1, 6, 0, 1, 0, 3, 13, 6, 7, 2, 10, 2, 15, 10, 8, 6, 10, 7, 7, 18 ] self.assertEqual(daily, test_daily) # randomized test for i in range(1000): data_size = random.randint(100, 1000) data = [] for j in range(data_size): data.append([random.randint(1, 100)]) daily = my_utils.get_daily_count(data) for k in range(len(daily)): if k == 0: self.assertListEqual([daily[k]], data[k]) else: pt1 = data[k] pt2 = data[k - 1] self.assertListEqual([daily[k]], [pt1[0] - pt2[0]])
def print_daily_cases(county_cases): """Calls get_daily_count() function and prints daily counts Parameters ---------- county_cases: list List of cases in a county Prints -------- daily_count: list List of daily counts in a county """ daily_count = mu.get_daily_count(county_cases) print(*daily_count, sep='\n')
def main(): desc = 'Opens a file and extracts data from a specific column.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('--file', dest='file_name', type=str, required=True, help='Name of the file to be opened by the script.') parser.add_argument('--result_column', dest='result_column', default=4, help='Column of file to be returned by the script.\ Defaults to 4 and must correspond to an index\ found in the file.') parser.add_argument('--county_column', dest='county_column', type=int, required=True, help='Column of file to be queried by the script.') parser.add_argument('--county', dest='county', type=str, required=True, help='Name of county to retrieve data from.') parser.add_argument('--return_daily_increment', dest='return_daily_increment', type=bool, default=False, help='Decides whether results\ are returned as daily increments.') parser.add_argument('--return_running_average', dest='return_running_average', type=bool, default=False, help='Decides whether to return\ running averages from results.') parser.add_argument('--running_avg_window_size', dest='running_avg_window_size', type=int, default=5, help='Determines the window\ size for the running average.') parser.add_argument('--date_column', dest='date_column', type=int, default=0, help='Determines the date column.') args = parser.parse_args() print() print('Results:') results = [] try: args.result_column = int(args.result_column) except ValueError: pass if ',' in args.result_column: result_array = [] for result in args.result_column.split(','): result_array.append(str(result)) args.result_column = result_array try: results = mu.get_columns(args.file_name, args.county_column, args.county, args.result_column, args.date_column) except ValueError: print('ValueError during get columns') else: try: results = mu.get_column(args.file_name, args.county_column, args.county, args.result_column, args.date_column) except ValueError: print('ValueError during get column') if args.return_daily_increment is True: try: results = mu.get_daily_count( get_cases(args.file_name, args.county_column, args.county, args.result_column, args.date_column)) except ValueError: print('Value Error during get daily increment.') if args.return_running_average is True: try: results, _ = mu.running_average( results, window_size=args.running_avg_window_size) except ValueError: print('ValueError during running average') for result in results: print(result) print() print()
def test_get_daily_count_error_mode(self): with self.assertRaises(SystemExit) as cm: mu.get_daily_count( mu.get_column('covid-19-data/us-counties.cs', 1, 'Boulder', 4, 0)) self.assertEqual(cm.exception.code, 1)
def test_get_daily_count(self): self.assertIs( mu.get_daily_count( mu.get_column('covid-19-data/us-counties.csv', 1, 'Boulder', 4, 0))[19], 11)
def main(): """ calculate the number of covid19 cases per capita\ for each county in a given State for a given date. Cases are per 100,000 people and rounded to 1 decimal Required Args: --------------- state: str Name of USA State (No abbreviations) query_date: str date in ISO format 'YYYY-MM-DD' Optional Args (have defaults): see argparser section ------------------------------------------- covid_file_name: str census_file_name: str daily_new: bool default=True running_avg: bool default=False window: int coviddata_county_column: int * cases_column: int * date_column: int * census_state_column: int * census_county_column: int * pop_column: int * Note: *= only needs to be changed if format of\ covid19 and census data files are changed Returns: --------- out_lists: list of [str, float] [county_name, county_caserate_at_date] """ # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--query_date', type=str, help='date in ISO format "YY-MM-DD" ', required=True) parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', default='covid-19-data/us-counties.csv') parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', default='census-data/co-est2019-alldata.csv') parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile', default=1) parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile', default=4) parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file', default=5) parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file', default=6) parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file', default=7) parser.add_argument('--daily_new', type=bool, default=True, help='daily newcases. False gives cumulativ cases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--window', type=int, default=5, help='Window size of running average') # parse arguments and store them in args args = parser.parse_args() # assign arguments coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column state = args.state census_county_column = args.census_county_column pop_column = args.pop_column query_date = date.fromisoformat(args.query_date) # make CSV file copy of only state covid-19-data if coviddata_file_name == 'covid-19-data/us-counties.csv': state_coviddata_file_name = 'covid-19-data/'+state+'-counties.csv' try: f1 = open(state_coviddata_file_name, 'r') f1.close() except FileNotFoundError: print('creating state_covidfile') state_coviddata_file_name = make_statefile(state) print(state_coviddata_file_name, 'state_coviddata_file_name') else: Warning('This script must be run on data within only \ one state or else has error if counties of \ the same name in different states across USA.\ if not using default args.covid_file_name, please\ check that county names are not duplicated.\ NOTE: Proceeding by assigning variable\ state_coviddata_file_name = args.covid_file_name ;\ Watch out for errors from this issue.') state_coviddata_file_name = args.covid_file_name # get state county names and population data from census file census_state_data = get_column(census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) county_pop_list = census_state_data[1][1:] # census file has names as "countyname + County", so rm " County" county_names_list_withcounty = census_state_data[0][1:] county_names_list = [] for c in range(len(county_names_list_withcounty)): county_names_list.append(county_names_list_withcounty[c][:-7]) # make hashtable of (key-county_name, value= county_pop) N = 260 # hashtable size. Max number counties in a State is Texas with 254 census_hashtable = [[] for i in range(N)] for c in range(len(county_names_list)): hash_table.put(census_hashtable, N, county_names_list[c], county_pop_list[c], method='rolling') # daily cases option and running avg cases option if daily_new is True: from my_utils import get_daily_count if running_avg is True: from my_utils import running_average # Loop through each county in state out_lists = [] for c in range(len(county_names_list)): county_cases_data_cumulative = get_column(state_coviddata_file_name, coviddata_county_column, county_names_list[c], result_columns=[cases_column], date_column=date_column, return_dates=True) # dates are stored in last index of list, in datetime format dates = county_cases_data_cumulative[-1] # convert cases from type str to int county_cases = list(map(int, county_cases_data_cumulative[0])) # daily cases option and running avg options if daily_new is True: county_cases = get_daily_count(county_cases) if running_avg is True: county_cases = running_average(county_cases, window) # binary search for county cases at date county_cases_at_date = binary_search(query_date, [dates, county_cases]) # case rate per 100,000 people if county_cases_at_date is not None: county_caserate_at_date = county_cases_at_date * 100000 \ / int(hash_table.get(census_hashtable, N, county_names_list[c], method='rolling')) out_lists.append([county_names_list[c], round(county_caserate_at_date, 1)]) print(out_lists) return out_lists
window = args.window date_column = args.date_column # call function to run cases = get_column(file_name, county_column, county, result_columns=[cases_column], date_column=date_column) # convert cases from type str to int cases = list(map(int, cases[0])) # print daily cases option if print_daily is True: from my_utils import get_daily_count day_cases = get_daily_count(cases) # print runing average cases option if print_running_avg is True: from my_utils import running_average running_avg_cases = running_average(day_cases, window) # print outputs. (print one value per line) print('cumulative cases by each date:') for c in range(0, len(cases)): print(cases[c]) if print_daily is True: print('daily cases:') for c in range(0, len(day_cases)): print(day_cases[c])
def main(): # Parses through file using command line inputs parser = argparse.ArgumentParser( description='Returns a column from a file') parser.add_argument('--file', dest='file_name', type=str, required=True, help='Enter file name to be parsed.') parser.add_argument('--county_column', dest='county_column', type=int, required=True, help='Enter the column containing county strings') parser.add_argument('--county', dest='county', type=str, required=True, help='Enter the county string') parser.add_argument('--cases_columns', dest='cases_columns', nargs='+', type=int, required=True, help='Enter the cases column to be returned') parser.add_argument('--print_daily_cases', dest='print_daily_cases', type=bool, required=False, help='Enter 1 to print daily cases') parser.add_argument('--print_running_avg', dest='print_running_avg', type=bool, required=False, help='Enter 1 to print running avg') parser.add_argument('--window_size', dest='window_size', type=int, required=False, help='Enter window size for avg calculation') parser.add_argument('--print_percap_plot', dest='print_percap_plot', type=bool, required=False, help='Enter 1 to print per capita plot') args = parser.parse_args() # Print Cases county_cases = print_cases(args.file_name, args.county_column, args.county, args.cases_columns) # Print Daily Cases if args.print_daily_cases: print_daily_cases(county_cases) # Print Running Average if args.print_running_avg: daily_cases = mu.get_daily_count(county_cases) print_running_avg(daily_cases, args.window_size) # Print Percapita Plot if args.print_percap_plot: print_percap_plot(args.file_name, args.county)
def main(): """ get Covid19 case data and census data and convert to per-capita rates data are from two different files. Per Capita Rates are per 100,000 people Required Args: --------------- state: str Name of USA State (No abbreviations) coviddata_countys_list: list of str Optional Args (have defaults): see argparser section ------------------------------------------- data_out_file: str name of CSV file if want one to be made. or '[]' covid_file_name: str census_file_name: str daily_new: bool default=True running_avg: bool default=False running_sum: bool default=False window: int coviddata_county_column: int * cases_column: int * date_column: int * census_state_column: int * census_county_column: int * pop_column: int * Note: *= only needs to be changed if format of covid19 and census data files are changed Returns: --------- out_data : list of lists of lists: [census_countys_list, [[dates for c1],[dates for c2],..], [per_capita_rates c1],[per_capita_rates c2],...] Where: ------ per_capita_rates: list list of cases / population (these are per 100,000 people) dates: list list of dates in format datetime.date(YYYY, MM, D) """ # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--coviddata_countys_list', type=str, nargs='+', help='list of strings for \ Name(s) of the county(s) in covid CSV file \ that we want to look at', required=True) parser.add_argument('--data_out_file', type=str, help='Name of the CSV file to write this data \ out to. If not wanted, is "[]", which\ is coded to not return any data_out_file', default='[]') parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', default='covid-19-data/us-counties.csv') parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', default='census-data/co-est2019-alldata.csv') parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile', default=1) parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile', default=4) parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file', default=5) parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file', default=6) parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file', default=7) parser.add_argument('--daily_new', type=bool, default=True, help='daily newcases. default is cumulativ dailycases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--running_sum', type=bool, default=False, help='running sum of cases over a window.\ default is False, window size is required.\ cannot be switched on at same \ time as running_avg') parser.add_argument('--window', type=int, default=5, help='Window size of running average or running sum') # parse arguments and store them in args args = parser.parse_args() # assign arguments state = args.state coviddata_countys_list = [ i.replace('-', ' ') for i in args.coviddata_countys_list ] data_out_file = args.data_out_file coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg running_summation = args.running_sum window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column census_county_column = args.census_county_column pop_column = args.pop_column # make CSV file copy of only state covid-19-data if coviddata_file_name == 'covid-19-data/us-counties.csv': state_coviddata_file_name = 'covid-19-data/' + state + '-counties.csv' try: f1 = open(state_coviddata_file_name, 'r') f1.close() except FileNotFoundError: print('creating state_covidfile') state_coviddata_file_name = make_statefile(state) print(state_coviddata_file_name, 'state_coviddata_file_name') elif coviddata_file_name == 'covid-19-data/' + state + '-counties.csv': state_coviddata_file_name = coviddata_file_name else: Warning('This script must be run on data within only \ one state or else has error if counties of \ the same name in different states across USA.\ if not using default args.covid_file_name, please\ check that county names are not duplicated.\ NOTE: Proceeding by assigning variable\ state_coviddata_file_name = args.covid_file_name ;\ Watch out for errors from this issue.') state_coviddata_file_name = args.covid_file_name # get census data for all counties in the state census_state_data = get_column( census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) # sort census_state_data by county name # census_state_data is of list [[county_names], [census2010pops]) sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1])) tuples = zip(*sorted_pairs) list1, list2 = [list(tuple) for tuple in tuples] census_state_data_sorted = [list1, list2] # pre-allocate structure of out_data list of lists of lists # out_data[0] will be coviddata_countys_list # out_data[1] will be list of dates for each county # out_data[2] will be list of per_capita_rates for each county out_data = [[], [], []] # run for each county for county_index in range(0, len(coviddata_countys_list)): coviddata_county_name = coviddata_countys_list[county_index] out_data[0].append(coviddata_county_name) # run get_column() on covid data and census data cases_data_cumulative = get_column(state_coviddata_file_name, coviddata_county_column, coviddata_county_name, result_columns=[cases_column], date_column=date_column, return_dates=True) # convert cases from type str to int cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0])) # dates are stored in last index of list, in datetime format dates = cases_data_cumulative[-1] # daily cases option if daily_new is True: from my_utils import get_daily_count cases = get_daily_count(cases_data_cumulative[0]) else: cases = cases_data_cumulative[0] # print running average OR running sum cases option OR neither if running_avg is True: from my_utils import running_average cases = running_average(cases, window) elif running_summation is True: from my_utils import running_sum cases = running_sum(cases, window) # use binary search to get county pop census data out of state data census_county_name = coviddata_county_name + ' County' county_pop = binary_search(census_county_name, census_state_data_sorted) # raise error if county census not found if county_pop is None: ValueError print('county census not found') sys.exit(1) county_pop = int(county_pop) # convert cases to per-capita rates by dividing county case by pop if type(cases) == list: cases = np.asarray(cases) per_capita_rates = np.round(cases / county_pop * 100000, 2) # convert per_capita_rates back from nparray to list per_capita_rates = per_capita_rates.tolist() # append to out_data lists out_data[1].append([dates]) out_data[2].append([per_capita_rates]) # write out_data to a CSV file in format 'County','date','per_capita_rate' if data_out_file != '[]': fout = open(data_out_file, 'w') fout.write("county,date,per_capita_rate \n") for county_index in range(0, len(out_data[0])): for date_ind in range(0, len(out_data[1][county_index][0])): fout.write(out_data[0][county_index] + ',' + str(out_data[1][county_index][0][date_ind]) + ',' + str(out_data[2][county_index][0][date_ind]) + '\n') fout.close() return out_data
def main(): """ get Covid19 case data and census data and convert to per-capita rates data are from two different files Returns: --------- per_capita_rates: list list of cases / population dates: list list of dates in format datetime.date(YYYY, MM, D) """ # TODO: add main def docstring # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', required=True) parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', required=True) parser.add_argument('--plot_file_name', type=str, help='output plot file generated', required=True) parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--coviddata_county', type=str, help='Name of the county in covid CSV file', required=True) parser.add_argument('--census_county', type=str, help='Name of the county in census CSV file', required=True) parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile') parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile') parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file') parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file') parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file') parser.add_argument('--daily_new', type=bool, default=False, help='daily newcases. default is cumulativ dailycases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--window', type=int, default=5, help='Window size of running average') # parse arguments and store them in args args = parser.parse_args() # assign arguments coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column plot_file_name = args.plot_file_name coviddata_county_name = args.coviddata_county cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column state = args.state census_county_name = args.census_county census_county_column = args.census_county_column pop_column = args.pop_column # run get_column() on covid data and census data cases_data_cumulative = get_column(coviddata_file_name, coviddata_county_column, coviddata_county_name, result_columns=[cases_column], date_column=date_column, return_dates=True) census_state_data = get_column( census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) # convert cases from type str to int cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0])) # dates are stored in last index of list, in datetime format dates = cases_data_cumulative[-1] # daily cases option if daily_new is True: from my_utils import get_daily_count cases = get_daily_count(cases_data_cumulative[0]) # not dates column else: cases = cases_data_cumulative[0] # print runing average cases option if running_avg is True: from my_utils import running_average cases = running_average(cases, window) # census_state_data is of list [[county_names], [census2010pops]) # sort census_state_data by county name sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1])) tuples = zip(*sorted_pairs) list1, list2 = [list(tuple) for tuple in tuples] census_state_data_sorted = [list1, list2] # use binary search to get county pop census data out of state data county_pop = binary_search(census_county_name, census_state_data_sorted) # raise error if county census not found if county_pop is None: ValueError print('county census not found') sys.exit(1) county_pop = int(county_pop) # convert cases to per-capita rates by dividing county case by population if type(cases) == list: cases = np.asarray(cases) per_capita_rates = cases / county_pop # convert per_capita_rates back from nparray to list per_capita_rates = per_capita_rates.tolist() # plot using plot_lines plot_points = [[]] for point in range(0, len(per_capita_rates)): plot_points[0].append([dates[point], per_capita_rates[point]]) plot_labels = ['dates', 'per_capita_rates'] plot = plot_lines(plot_points, plot_labels, plot_file_name) return plot # NOTE: idk if this line is needed?