def test_get_chain_rolling(self): # first put in things to hash table for testing t = [[] for i in range(10)] hash_table.put(t, 10, 'ABC', '789') # method='rolling' self.assertEqual(hash_table.get(t, 10, 'ABC'), '789') # try put another in same key location different value hash_table.put(t, 10, 'ABC', '101112') # replacing should fail self.assertNotEqual(hash_table.get(t, 10, 'ABC'), '101112')
def test_get(self): htable = [] for i in range(500): htable.append([]) hash_table.put(htable, 500, 'testmystring', 8002334) val = hash_table.get('testmystring', htable, 500) val0 = hash_table.get('notmystring', htable, 500) self.assertEqual(val, 8002334) self.assertEqual(val0, None)
def test_put(self): htable = [] for i in range(500): htable.append([]) hash_table.put(htable, 500, 'testmystring', 8002334) hash_val = hash_table.ascii_hash_function('testmystring', 500) self.assertEqual(htable[hash_val][0][0], 'testmystring') self.assertEqual(htable[hash_val][0][1], 8002334)
def test_put_chain_rolling(self): t = [[] for i in range(10)] # test rolling hash hash_table.put(t, 10, 'ABC', '789') # method='rolling' self.assertEqual(t[6][0][0], 'ABC') self.assertEqual(t[6][0][1], '789') # put scrambled key char order hash_table.put(t, 10, 'BCA', '101112') self.assertEqual(t[2][0][0], 'BCA') self.assertEqual(t[2][0][1], '101112')
def test_put_chain_ascii(self): t = [[] for i in range(10)] # test ascii hash_table.put(t, 10, 'ABC', '123', method='ascii') self.assertEqual(t[8][0][0], 'ABC') self.assertEqual(t[8][0][1], '123') # put another key in same location hash_table.put(t, 10, 'BCA', '456', method='ascii') self.assertEqual(t[8][1][0], 'BCA') self.assertEqual(t[8][1][1], '456')
def test_get_chain_ascii(self): # first put in things to hash table for testing t = [[] for i in range(10)] hash_table.put(t, 10, 'ABC', '123', method='ascii') # test get self.assertEqual(hash_table.get(t, 10, 'ABC', method='ascii'), '123') # put another key into table at same location for testing hash_table.put(t, 10, 'BCA', '456', method='ascii') # test get same location different query_key self.assertEqual(hash_table.get(t, 10, 'BCA', method='ascii'), '456') # test get nonexistent query_key self.assertEqual(hash_table.get(t, 10, 'Margot', method='ascii'), None)
def get_daily_rates(state, date, output_filename): """Prints daily case rate per capita for a given date Parameters ---------- state: string Name of state date: str Date of cases Prints/Returns -------- county_names: str list Name of the county case_rates: float list Percap rate for that day output_filename.txt: txt file Saves daily rates for counties in state """ # initialize hash table hcounty_pops = [] table_size = 1000 for i in range(table_size): hcounty_pops.append([]) # get counties and pops for a state census_name = 'co-est2019-alldata.csv' query_column = 5 # state query_value = state results_columns = [6, 7] county_pops = mu.get_columns(census_name, query_column, query_value, results_columns) # put counties and pops in a hash table for i in range(len(county_pops)): if i != 0: # state name curr_county_withc = county_pops[i][0] curr_county = curr_county_withc[:-7] curr_pop = county_pops[i][1] ht.put(hcounty_pops, table_size, curr_county, curr_pop) # get cases for each county on date case_file = 'covid-19-data/us-counties.csv' state_column = 2 counties_cases = [0, 1, 4] date_c_cases = mu.get_columns(case_file, state_column, state, counties_cases) # get cases for specific date c_cases = [] for c_date, c_county, c_case in date_c_cases: if c_date == date: c_cases.append([c_date, c_county, c_case]) # Write to txt file output_txt = output_filename + '_rates.txt' f = open(output_txt, 'w+') # print county name and percap case rate county_names = [] case_rates = [] for curr_date, county_name, cases in c_cases: county_names.append(county_name) c_pop = ht.get(county_name, hcounty_pops, table_size) date_case_rate = mu.calc_per_capita([[date, cases]], int(c_pop)) case_rate = date_case_rate[0][1] case_rates.append(case_rate) print(county_name, case_rate) to_txt = str(case_rate) + '\n' f.write(to_txt) f.close() return county_names, case_rates
def main(): """ calculate the number of covid19 cases per capita\ for each county in a given State for a given date. Cases are per 100,000 people and rounded to 1 decimal Required Args: --------------- state: str Name of USA State (No abbreviations) query_date: str date in ISO format 'YYYY-MM-DD' Optional Args (have defaults): see argparser section ------------------------------------------- covid_file_name: str census_file_name: str daily_new: bool default=True running_avg: bool default=False window: int coviddata_county_column: int * cases_column: int * date_column: int * census_state_column: int * census_county_column: int * pop_column: int * Note: *= only needs to be changed if format of\ covid19 and census data files are changed Returns: --------- out_lists: list of [str, float] [county_name, county_caserate_at_date] """ # parse command line arguments parser = argparse.ArgumentParser(description='process args for \ reading covid data CSV file') parser.add_argument('--state', type=str, help='Name of the State', required=True) parser.add_argument('--query_date', type=str, help='date in ISO format "YY-MM-DD" ', required=True) parser.add_argument('--covid_file_name', type=str, help='Name of the input covid cases data file', default='covid-19-data/us-counties.csv') parser.add_argument('--census_file_name', type=str, help='Name of the input census data file', default='census-data/co-est2019-alldata.csv') parser.add_argument('--coviddata_county_column', type=int, help='column ind for county names in covid CSVfile', default=1) parser.add_argument('--cases_column', type=int, help='column ind for number of cases in covid CSVfile', default=4) parser.add_argument('--date_column', type=int, default=0, help='column ind for date in covid CSV file') parser.add_argument('--census_state_column', type=int, help='column ind for state names in census CSV file', default=5) parser.add_argument('--census_county_column', type=int, help='column ind for county names in census CSV file', default=6) parser.add_argument('--pop_column', type=int, help='column ind for populaiton in census CSV file', default=7) parser.add_argument('--daily_new', type=bool, default=True, help='daily newcases. False gives cumulativ cases') parser.add_argument('--running_avg', type=bool, default=False, help='running average of cases.\ default is False, window size is required') parser.add_argument('--window', type=int, default=5, help='Window size of running average') # parse arguments and store them in args args = parser.parse_args() # assign arguments coviddata_file_name = args.covid_file_name coviddata_county_column = args.coviddata_county_column cases_column = args.cases_column date_column = args.date_column daily_new = args.daily_new running_avg = args.running_avg window = args.window census_file_name = args.census_file_name census_state_column = args.census_state_column state = args.state census_county_column = args.census_county_column pop_column = args.pop_column query_date = date.fromisoformat(args.query_date) # make CSV file copy of only state covid-19-data if coviddata_file_name == 'covid-19-data/us-counties.csv': state_coviddata_file_name = 'covid-19-data/'+state+'-counties.csv' try: f1 = open(state_coviddata_file_name, 'r') f1.close() except FileNotFoundError: print('creating state_covidfile') state_coviddata_file_name = make_statefile(state) print(state_coviddata_file_name, 'state_coviddata_file_name') else: Warning('This script must be run on data within only \ one state or else has error if counties of \ the same name in different states across USA.\ if not using default args.covid_file_name, please\ check that county names are not duplicated.\ NOTE: Proceeding by assigning variable\ state_coviddata_file_name = args.covid_file_name ;\ Watch out for errors from this issue.') state_coviddata_file_name = args.covid_file_name # get state county names and population data from census file census_state_data = get_column(census_file_name, census_state_column, state, result_columns=[census_county_column, pop_column], date_column=None) county_pop_list = census_state_data[1][1:] # census file has names as "countyname + County", so rm " County" county_names_list_withcounty = census_state_data[0][1:] county_names_list = [] for c in range(len(county_names_list_withcounty)): county_names_list.append(county_names_list_withcounty[c][:-7]) # make hashtable of (key-county_name, value= county_pop) N = 260 # hashtable size. Max number counties in a State is Texas with 254 census_hashtable = [[] for i in range(N)] for c in range(len(county_names_list)): hash_table.put(census_hashtable, N, county_names_list[c], county_pop_list[c], method='rolling') # daily cases option and running avg cases option if daily_new is True: from my_utils import get_daily_count if running_avg is True: from my_utils import running_average # Loop through each county in state out_lists = [] for c in range(len(county_names_list)): county_cases_data_cumulative = get_column(state_coviddata_file_name, coviddata_county_column, county_names_list[c], result_columns=[cases_column], date_column=date_column, return_dates=True) # dates are stored in last index of list, in datetime format dates = county_cases_data_cumulative[-1] # convert cases from type str to int county_cases = list(map(int, county_cases_data_cumulative[0])) # daily cases option and running avg options if daily_new is True: county_cases = get_daily_count(county_cases) if running_avg is True: county_cases = running_average(county_cases, window) # binary search for county cases at date county_cases_at_date = binary_search(query_date, [dates, county_cases]) # case rate per 100,000 people if county_cases_at_date is not None: county_caserate_at_date = county_cases_at_date * 100000 \ / int(hash_table.get(census_hashtable, N, county_names_list[c], method='rolling')) out_lists.append([county_names_list[c], round(county_caserate_at_date, 1)]) print(out_lists) return out_lists