def test_get_column_missingdates_returndates(self): self.assertEqual( my_utils.get_column( 'covid-19-data-old/' + 'us-counties-testfile-Boulder-fakemissingdates.csv', 1, 'Boulder', result_columns=[4], date_column=0, return_dates=True), [[ '2289', '2289', '2289', '2324', '2344', '2361', '2399', '2399', '2399', '2399', '2399', '2574', '2574', '2671' ], [ date.fromisoformat('2020-08-29'), date.fromisoformat('2020-08-30'), date.fromisoformat('2020-08-31'), date.fromisoformat('2020-09-01'), date.fromisoformat('2020-09-02'), date.fromisoformat('2020-09-03'), date.fromisoformat('2020-09-04'), date.fromisoformat('2020-09-05'), date.fromisoformat('2020-09-06'), date.fromisoformat('2020-09-07'), date.fromisoformat('2020-09-08'), date.fromisoformat('2020-09-09'), date.fromisoformat('2020-09-10'), date.fromisoformat('2020-09-11') ]])
def test_get_column_multipleresultcolumns(self): # test that I get what I think for result_columns of len>1 self.assertEqual( my_utils.get_column( 'covid-19-data-old/us-counties-testfile-Boulder.csv', 0, '2020-09-04', result_columns=[4, 5]), [['2399'], ['79']])
def test_get_column_querynotmatched(self): # empty array of int if the query value is not in the file. self.assertEqual( my_utils.get_column( 'covid-19-data-old/us-counties-testfile-Boulder.csv', 1, 'Denver', result_columns=[4]), [[]])
def test_get_column(self): # check that this is the correct CSV file of covid data \ # for comparing tests with (subset of last 20 days of Boulder county) self.assertEqual( my_utils.get_column( 'covid-19-data-old/us-counties-testfile-Boulder.csv', 0, '2020-09-04', result_columns=[4]), [['2399']])
def get_county_names_all(file_name): ''' make master list of all BRAC counties involved with this dataset Parameter: --------- file_name: str name of BRAC permit dataset CSV file example: 'BRAC_races_permits.csv' Returns: --------- all_countys: list of str list of unique names of all Counties That are in the dataset ''' # hard coded values: permit_column = 4 county_column = 2 # run get_column() to get BRAC permit data BRAC_permits_yes = get_column(file_name, permit_column, 'yes', result_columns=[county_column], date_column=None, return_dates=False) BRAC_permits_no = get_column(file_name, permit_column, 'no', result_columns=[county_column], date_column=None, return_dates=False) countys_yes = BRAC_permits_yes[0] countys_no = BRAC_permits_no[0] # make all_countys unique sorted list of all countys involved all_countys = remove_list_duplicates(countys_yes + countys_no) all_countys = remove_list_duplicates(countys_yes + countys_no) all_countys.sort() print(all_countys) return all_countys
def test_get_column_withdeaths(self): # test this works for missing dates and two result_columns self.assertEqual( my_utils.get_column('covid-19-data-old/' + 'us-counties-testfile-Boulder-shorter.csv', 1, 'Boulder', result_columns=[4, 5], date_column=0), [['2289', '2324', '2344', '2361', '2399', '2574', '2671'], ['79', '79', '79', '80', '80', '81', '83']])
def test_get_column_missingdates(self): self.assertEqual( my_utils.get_column( 'covid-19-data-old/' + 'us-counties-testfile-Boulder-fakemissingdates.csv', 1, 'Boulder', result_columns=[4], date_column=0), [[ '2289', '2289', '2289', '2324', '2344', '2361', '2399', '2399', '2399', '2399', '2399', '2574', '2574', '2671' ]])
def test_get_daily_count(self): self.assertIs( mu.get_daily_count( mu.get_column('covid-19-data/us-counties.csv', 1, 'Boulder', 4, 0))[19], 11)
def test_get_daily_count_error_mode(self): with self.assertRaises(SystemExit) as cm: mu.get_daily_count( mu.get_column('covid-19-data/us-counties.cs', 1, 'Boulder', 4, 0)) self.assertEqual(cm.exception.code, 1)
def test_out_of_order_date(self): with self.assertRaises(SystemExit) as cm: mu.get_column('test_date_disorder.csv', 1, 'Boulder', 4, 0) self.assertEqual(cm.exception.code, 1)
def test_date_parsing_error_mode(self): mu.get_column('covid-19-data/us-counties.csv', 1, 'Boulder', 4, 3)
def test_date_skipping(self): mu.get_column('test_date_missing.csv', 1, 'Boulder', 4, 0)
def test_no_date_column(self): mu.get_column('test_date_missing.csv', 1, 'Boulder', 4, None)
def BRAC_permit_data_with_caserates(BRAC_county_caserates_file, BRAC_race_info_file): """ for each race, get county caserate per capita at that date.\ return output: a copy of the BRAC permit dataset file with an\ extra new column of county caserate per capita. Parameters: ----------- BRAC_county_caserates_file: str string path name of file containing data output from get_rates.py\ File is of format: [list, list of lists, list of lists] where the first list is of county names,\ the second list of lists is of dates,\ the third list of lists is of caserates per\ capita (per 100,000 populaiton).Thesuperlists\ correspond to the countys, and the \ sublists are the data for each county. BRAC_race_info_file: str name of BRAC permit dataset CSV file example: 'BRAC_races_permits.csv' Returns: -------- out_dataset_file: str string path name of CSV out file 'BRAC_countycases_at_races.csv' """ # TODO: update doc str b/c BRAC_county_caserates_file no longer that format # Import BRAC races data state_column = 0 BRAC_races = get_column(BRAC_race_info_file, state_column, 'Colorado', result_columns=[1, 2, 3, 4], date_column=None, return_dates=False) # make variables from BRAC_races columns easier to follow BRAC_races_racenames = BRAC_races[0] BRAC_races_countys = BRAC_races[1] BRAC_races_dates = BRAC_races[2] BRAC_races_permits = BRAC_races[3] # get list of unique BRAC countys involved all_countys = remove_list_duplicates(BRAC_races_countys) # get county cases data for each county county_column = 0 date_column = 1 caserate_column = 2 # allocate space for cases_data [] cases_data = [[], [], []] for c in range(len(all_countys)): cases_data_county = get_column( BRAC_county_caserates_file, county_column, all_countys[c], result_columns=[date_column, caserate_column], date_column=None, return_dates=False) cases_data[0].append(all_countys[c]) cases_data[1].append(cases_data_county[0]) cases_data[2].append(cases_data_county[1]) # put county cases data into this format: cases_data_countys = cases_data[0] cases_data_dates_lists = cases_data[1] cases_data_rates_lists = cases_data[2] # match races to caserates caserate_races = [] for race in range(len(BRAC_races_racenames)): county_ind = cases_data_countys.index(BRAC_races_countys[race]) date_ind = cases_data_dates_lists[county_ind].index( BRAC_races_dates[race]) caserate_atrace = cases_data_rates_lists[county_ind][date_ind] caserate_races.append(caserate_atrace) # write data to new file that is a copy of old file with added column: fin = open(BRAC_race_info_file, 'r') out_line_list = [] # skip first header line next(fin) # parse through file lines for line in fin: out_line_list.append(line) fin.close() out_dataset_file = 'BRAC_countycases_at_races.csv' fout = open(out_dataset_file, 'w') # write header fout.write("state,race name,county,date,permit approved y/n, \ county caserate per capita 100000 ppl \n") # print all lines of previus file but with caserates added as new column for line in range(len(BRAC_races_racenames)): new_out_line_list = out_line_list[line].strip() + ',' + str( caserate_races[line]) + '\n' fout.write(new_out_line_list) fout.close() return out_dataset_file
def main(): """ get Covid19 case data and census data and convert to per-capita rates data are from two different files. Per Capita Rates are per 100,000 people Required Args: --------------- state: str Name of USA State (No abbreviations) coviddata_countys_list: list of str Optional Args (have defaults): see argparser section ------------------------------------------- data_out_file: str name of CSV file if want one to be made. or '[]' covid_file_name: str census_file_name: str daily_new: bool default=True running_avg: bool default=False running_sum: bool default=False window: int coviddata_county_column: int * cases_column: int * date_column: int * census_state_column: int * census_county_column: int * pop_column: int * Note: *= only needs to be changed if format of covid19 and census data files are changed Returns: --------- out_data : list of lists of lists: [census_countys_list, [[dates for c1],[dates for c2],..], [per_capita_rates c1],[per_capita_rates c2],...] Where: ------ per_capita_rates: list list of cases / population (these are per 100,000 people) dates: list list of dates in format datetime.date(YYYY, MM, D) """ # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--coviddata_countys_list', type=str, nargs='+', help='list of strings for \ Name(s) of the county(s) in covid CSV file \ that we want to look at', required=True) parser.add_argument('--data_out_file', type=str, help='Name of the CSV file to write this data \ out to. If not wanted, is "[]", which\ is coded to not return any data_out_file', default='[]') parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', default='covid-19-data/us-counties.csv') parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', default='census-data/co-est2019-alldata.csv') parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile', default=1) parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile', default=4) parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file', default=5) parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file', default=6) parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file', default=7) parser.add_argument('--daily_new', type=bool, default=True, help='daily newcases. default is cumulativ dailycases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--running_sum', type=bool, default=False, help='running sum of cases over a window.\ default is False, window size is required.\ cannot be switched on at same \ time as running_avg') parser.add_argument('--window', type=int, default=5, help='Window size of running average or running sum') # parse arguments and store them in args args = parser.parse_args() # assign arguments state = args.state coviddata_countys_list = [ i.replace('-', ' ') for i in args.coviddata_countys_list ] data_out_file = args.data_out_file coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg running_summation = args.running_sum window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column census_county_column = args.census_county_column pop_column = args.pop_column # make CSV file copy of only state covid-19-data if coviddata_file_name == 'covid-19-data/us-counties.csv': state_coviddata_file_name = 'covid-19-data/' + state + '-counties.csv' try: f1 = open(state_coviddata_file_name, 'r') f1.close() except FileNotFoundError: print('creating state_covidfile') state_coviddata_file_name = make_statefile(state) print(state_coviddata_file_name, 'state_coviddata_file_name') elif coviddata_file_name == 'covid-19-data/' + state + '-counties.csv': state_coviddata_file_name = coviddata_file_name else: Warning('This script must be run on data within only \ one state or else has error if counties of \ the same name in different states across USA.\ if not using default args.covid_file_name, please\ check that county names are not duplicated.\ NOTE: Proceeding by assigning variable\ state_coviddata_file_name = args.covid_file_name ;\ Watch out for errors from this issue.') state_coviddata_file_name = args.covid_file_name # get census data for all counties in the state census_state_data = get_column( census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) # sort census_state_data by county name # census_state_data is of list [[county_names], [census2010pops]) sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1])) tuples = zip(*sorted_pairs) list1, list2 = [list(tuple) for tuple in tuples] census_state_data_sorted = [list1, list2] # pre-allocate structure of out_data list of lists of lists # out_data[0] will be coviddata_countys_list # out_data[1] will be list of dates for each county # out_data[2] will be list of per_capita_rates for each county out_data = [[], [], []] # run for each county for county_index in range(0, len(coviddata_countys_list)): coviddata_county_name = coviddata_countys_list[county_index] out_data[0].append(coviddata_county_name) # run get_column() on covid data and census data cases_data_cumulative = get_column(state_coviddata_file_name, coviddata_county_column, coviddata_county_name, result_columns=[cases_column], date_column=date_column, return_dates=True) # convert cases from type str to int cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0])) # dates are stored in last index of list, in datetime format dates = cases_data_cumulative[-1] # daily cases option if daily_new is True: from my_utils import get_daily_count cases = get_daily_count(cases_data_cumulative[0]) else: cases = cases_data_cumulative[0] # print running average OR running sum cases option OR neither if running_avg is True: from my_utils import running_average cases = running_average(cases, window) elif running_summation is True: from my_utils import running_sum cases = running_sum(cases, window) # use binary search to get county pop census data out of state data census_county_name = coviddata_county_name + ' County' county_pop = binary_search(census_county_name, census_state_data_sorted) # raise error if county census not found if county_pop is None: ValueError print('county census not found') sys.exit(1) county_pop = int(county_pop) # convert cases to per-capita rates by dividing county case by pop if type(cases) == list: cases = np.asarray(cases) per_capita_rates = np.round(cases / county_pop * 100000, 2) # convert per_capita_rates back from nparray to list per_capita_rates = per_capita_rates.tolist() # append to out_data lists out_data[1].append([dates]) out_data[2].append([per_capita_rates]) # write out_data to a CSV file in format 'County','date','per_capita_rate' if data_out_file != '[]': fout = open(data_out_file, 'w') fout.write("county,date,per_capita_rate \n") for county_index in range(0, len(out_data[0])): for date_ind in range(0, len(out_data[1][county_index][0])): fout.write(out_data[0][county_index] + ',' + str(out_data[1][county_index][0][date_ind]) + ',' + str(out_data[2][county_index][0][date_ind]) + '\n') fout.close() return out_data
def test_get_column(self): test_results = None test_results = mu.get_column('covid-19-data/us-counties.csv', 1, 'Boulder', 4, 0) self.assertIsNotNone(test_results)
args = parser.parse_args() # assign arguments file_name = args.file_name county_column = args.county_column county = args.county cases_column = args.cases_column print_daily = args.daily print_running_avg = args.running_avg window = args.window date_column = args.date_column # call function to run cases = get_column(file_name, county_column, county, result_columns=[cases_column], date_column=date_column) # convert cases from type str to int cases = list(map(int, cases[0])) # print daily cases option if print_daily is True: from my_utils import get_daily_count day_cases = get_daily_count(cases) # print runing average cases option if print_running_avg is True: from my_utils import running_average running_avg_cases = running_average(day_cases, window)
def main(): """ calculate the number of covid19 cases per capita\ for each county in a given State for a given date. Cases are per 100,000 people and rounded to 1 decimal Required Args: --------------- state: str Name of USA State (No abbreviations) query_date: str date in ISO format 'YYYY-MM-DD' Optional Args (have defaults): see argparser section ------------------------------------------- covid_file_name: str census_file_name: str daily_new: bool default=True running_avg: bool default=False window: int coviddata_county_column: int * cases_column: int * date_column: int * census_state_column: int * census_county_column: int * pop_column: int * Note: *= only needs to be changed if format of\ covid19 and census data files are changed Returns: --------- out_lists: list of [str, float] [county_name, county_caserate_at_date] """ # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--query_date', type=str, help='date in ISO format "YY-MM-DD" ', required=True) parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', default='covid-19-data/us-counties.csv') parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', default='census-data/co-est2019-alldata.csv') parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile', default=1) parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile', default=4) parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file', default=5) parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file', default=6) parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file', default=7) parser.add_argument('--daily_new', type=bool, default=True, help='daily newcases. False gives cumulativ cases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--window', type=int, default=5, help='Window size of running average') # parse arguments and store them in args args = parser.parse_args() # assign arguments coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column state = args.state census_county_column = args.census_county_column pop_column = args.pop_column query_date = date.fromisoformat(args.query_date) # make CSV file copy of only state covid-19-data if coviddata_file_name == 'covid-19-data/us-counties.csv': state_coviddata_file_name = 'covid-19-data/'+state+'-counties.csv' try: f1 = open(state_coviddata_file_name, 'r') f1.close() except FileNotFoundError: print('creating state_covidfile') state_coviddata_file_name = make_statefile(state) print(state_coviddata_file_name, 'state_coviddata_file_name') else: Warning('This script must be run on data within only \ one state or else has error if counties of \ the same name in different states across USA.\ if not using default args.covid_file_name, please\ check that county names are not duplicated.\ NOTE: Proceeding by assigning variable\ state_coviddata_file_name = args.covid_file_name ;\ Watch out for errors from this issue.') state_coviddata_file_name = args.covid_file_name # get state county names and population data from census file census_state_data = get_column(census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) county_pop_list = census_state_data[1][1:] # census file has names as "countyname + County", so rm " County" county_names_list_withcounty = census_state_data[0][1:] county_names_list = [] for c in range(len(county_names_list_withcounty)): county_names_list.append(county_names_list_withcounty[c][:-7]) # make hashtable of (key-county_name, value= county_pop) N = 260 # hashtable size. Max number counties in a State is Texas with 254 census_hashtable = [[] for i in range(N)] for c in range(len(county_names_list)): hash_table.put(census_hashtable, N, county_names_list[c], county_pop_list[c], method='rolling') # daily cases option and running avg cases option if daily_new is True: from my_utils import get_daily_count if running_avg is True: from my_utils import running_average # Loop through each county in state out_lists = [] for c in range(len(county_names_list)): county_cases_data_cumulative = get_column(state_coviddata_file_name, coviddata_county_column, county_names_list[c], result_columns=[cases_column], date_column=date_column, return_dates=True) # dates are stored in last index of list, in datetime format dates = county_cases_data_cumulative[-1] # convert cases from type str to int county_cases = list(map(int, county_cases_data_cumulative[0])) # daily cases option and running avg options if daily_new is True: county_cases = get_daily_count(county_cases) if running_avg is True: county_cases = running_average(county_cases, window) # binary search for county cases at date county_cases_at_date = binary_search(query_date, [dates, county_cases]) # case rate per 100,000 people if county_cases_at_date is not None: county_caserate_at_date = county_cases_at_date * 100000 \ / int(hash_table.get(census_hashtable, N, county_names_list[c], method='rolling')) out_lists.append([county_names_list[c], round(county_caserate_at_date, 1)]) print(out_lists) return out_lists
def main(): desc = 'Opens a file and extracts data from a specific column.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('--file', dest='file_name', type=str, required=True, help='Name of the file to be opened by the script.') parser.add_argument('--result_column', dest='result_column', default=4, help='Column of file to be returned by the script.\ Defaults to 4 and must correspond to an index\ found in the file.') parser.add_argument('--county_column', dest='county_column', type=int, required=True, help='Column of file to be queried by the script.') parser.add_argument('--county', dest='county', type=str, required=True, help='Name of county to retrieve data from.') parser.add_argument('--return_daily_increment', dest='return_daily_increment', type=bool, default=False, help='Decides whether results\ are returned as daily increments.') parser.add_argument('--return_running_average', dest='return_running_average', type=bool, default=False, help='Decides whether to return\ running averages from results.') parser.add_argument('--running_avg_window_size', dest='running_avg_window_size', type=int, default=5, help='Determines the window\ size for the running average.') parser.add_argument('--date_column', dest='date_column', type=int, default=0, help='Determines the date column.') args = parser.parse_args() print() print('Results:') results = [] try: args.result_column = int(args.result_column) except ValueError: pass if ',' in args.result_column: result_array = [] for result in args.result_column.split(','): result_array.append(str(result)) args.result_column = result_array try: results = mu.get_columns(args.file_name, args.county_column, args.county, args.result_column, args.date_column) except ValueError: print('ValueError during get columns') else: try: results = mu.get_column(args.file_name, args.county_column, args.county, args.result_column, args.date_column) except ValueError: print('ValueError during get column') if args.return_daily_increment is True: try: results = mu.get_daily_count( get_cases(args.file_name, args.county_column, args.county, args.result_column, args.date_column)) except ValueError: print('Value Error during get daily increment.') if args.return_running_average is True: try: results, _ = mu.running_average( results, window_size=args.running_avg_window_size) except ValueError: print('ValueError during running average') for result in results: print(result) print() print()
def main(): """ get Covid19 case data and census data and convert to per-capita rates data are from two different files Returns: --------- per_capita_rates: list list of cases / population dates: list list of dates in format datetime.date(YYYY, MM, D) """ # TODO: add main def docstring # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', required=True) parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', required=True) parser.add_argument('--plot_file_name', type=str, help='output plot file generated', required=True) parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--coviddata_county', type=str, help='Name of the county in covid CSV file', required=True) parser.add_argument('--census_county', type=str, help='Name of the county in census CSV file', required=True) parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile') parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile') parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file') parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file') parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file') parser.add_argument('--daily_new', type=bool, default=False, help='daily newcases. default is cumulativ dailycases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--window', type=int, default=5, help='Window size of running average') # parse arguments and store them in args args = parser.parse_args() # assign arguments coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column plot_file_name = args.plot_file_name coviddata_county_name = args.coviddata_county cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column state = args.state census_county_name = args.census_county census_county_column = args.census_county_column pop_column = args.pop_column # run get_column() on covid data and census data cases_data_cumulative = get_column(coviddata_file_name, coviddata_county_column, coviddata_county_name, result_columns=[cases_column], date_column=date_column, return_dates=True) census_state_data = get_column( census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) # convert cases from type str to int cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0])) # dates are stored in last index of list, in datetime format dates = cases_data_cumulative[-1] # daily cases option if daily_new is True: from my_utils import get_daily_count cases = get_daily_count(cases_data_cumulative[0]) # not dates column else: cases = cases_data_cumulative[0] # print runing average cases option if running_avg is True: from my_utils import running_average cases = running_average(cases, window) # census_state_data is of list [[county_names], [census2010pops]) # sort census_state_data by county name sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1])) tuples = zip(*sorted_pairs) list1, list2 = [list(tuple) for tuple in tuples] census_state_data_sorted = [list1, list2] # use binary search to get county pop census data out of state data county_pop = binary_search(census_county_name, census_state_data_sorted) # raise error if county census not found if county_pop is None: ValueError print('county census not found') sys.exit(1) county_pop = int(county_pop) # convert cases to per-capita rates by dividing county case by population if type(cases) == list: cases = np.asarray(cases) per_capita_rates = cases / county_pop # convert per_capita_rates back from nparray to list per_capita_rates = per_capita_rates.tolist() # plot using plot_lines plot_points = [[]] for point in range(0, len(per_capita_rates)): plot_points[0].append([dates[point], per_capita_rates[point]]) plot_labels = ['dates', 'per_capita_rates'] plot = plot_lines(plot_points, plot_labels, plot_file_name) return plot # NOTE: idk if this line is needed?