Python get_column Examples, my_utils.get_column Python Examples

Example #1

0

Show file

 def test_get_column_missingdates_returndates(self):
     self.assertEqual(
         my_utils.get_column(
             'covid-19-data-old/' +
             'us-counties-testfile-Boulder-fakemissingdates.csv',
             1,
             'Boulder',
             result_columns=[4],
             date_column=0,
             return_dates=True), [[
                 '2289', '2289', '2289', '2324', '2344', '2361', '2399',
                 '2399', '2399', '2399', '2399', '2574', '2574', '2671'
             ],
                                  [
                                      date.fromisoformat('2020-08-29'),
                                      date.fromisoformat('2020-08-30'),
                                      date.fromisoformat('2020-08-31'),
                                      date.fromisoformat('2020-09-01'),
                                      date.fromisoformat('2020-09-02'),
                                      date.fromisoformat('2020-09-03'),
                                      date.fromisoformat('2020-09-04'),
                                      date.fromisoformat('2020-09-05'),
                                      date.fromisoformat('2020-09-06'),
                                      date.fromisoformat('2020-09-07'),
                                      date.fromisoformat('2020-09-08'),
                                      date.fromisoformat('2020-09-09'),
                                      date.fromisoformat('2020-09-10'),
                                      date.fromisoformat('2020-09-11')
                                  ]])

Example #2

0

Show file

 def test_get_column_multipleresultcolumns(self):
     # test that I get what I think for result_columns of len>1
     self.assertEqual(
         my_utils.get_column(
             'covid-19-data-old/us-counties-testfile-Boulder.csv',
             0,
             '2020-09-04',
             result_columns=[4, 5]), [['2399'], ['79']])

Example #3

0

Show file

 def test_get_column_querynotmatched(self):
     # empty array of int if the query value is not in the file.
     self.assertEqual(
         my_utils.get_column(
             'covid-19-data-old/us-counties-testfile-Boulder.csv',
             1,
             'Denver',
             result_columns=[4]), [[]])

Example #4

0

Show file

 def test_get_column(self):
     # check that this is the correct CSV file of covid data \
     # for comparing tests with (subset of last 20 days of Boulder county)
     self.assertEqual(
         my_utils.get_column(
             'covid-19-data-old/us-counties-testfile-Boulder.csv',
             0,
             '2020-09-04',
             result_columns=[4]), [['2399']])

Example #5

0

Show file

File: BRAC_get_permit_data.py Project: cu-swe4s-fall-2020/python-refresher-maclyne

def get_county_names_all(file_name):
    '''
    make master list of all BRAC counties involved with this dataset

    Parameter:
    ---------
    file_name: str      name of BRAC permit dataset CSV file
                        example: 'BRAC_races_permits.csv'

    Returns:
    ---------
    all_countys: list of str
                        list of unique names of all Counties
                        That are in the dataset
    '''
    # hard coded values:
    permit_column = 4
    county_column = 2

    # run get_column() to get BRAC permit data
    BRAC_permits_yes = get_column(file_name,
                                  permit_column,
                                  'yes',
                                  result_columns=[county_column],
                                  date_column=None,
                                  return_dates=False)

    BRAC_permits_no = get_column(file_name,
                                 permit_column,
                                 'no',
                                 result_columns=[county_column],
                                 date_column=None,
                                 return_dates=False)

    countys_yes = BRAC_permits_yes[0]
    countys_no = BRAC_permits_no[0]

    # make all_countys unique sorted list of all countys involved
    all_countys = remove_list_duplicates(countys_yes + countys_no)
    all_countys = remove_list_duplicates(countys_yes + countys_no)
    all_countys.sort()
    print(all_countys)
    return all_countys

Example #6

0

Show file

 def test_get_column_withdeaths(self):
     # test this works for missing dates and two result_columns
     self.assertEqual(
         my_utils.get_column('covid-19-data-old/' +
                             'us-counties-testfile-Boulder-shorter.csv',
                             1,
                             'Boulder',
                             result_columns=[4, 5],
                             date_column=0),
         [['2289', '2324', '2344', '2361', '2399', '2574', '2671'],
          ['79', '79', '79', '80', '80', '81', '83']])

Example #7

0

Show file

 def test_get_column_missingdates(self):
     self.assertEqual(
         my_utils.get_column(
             'covid-19-data-old/' +
             'us-counties-testfile-Boulder-fakemissingdates.csv',
             1,
             'Boulder',
             result_columns=[4],
             date_column=0), [[
                 '2289', '2289', '2289', '2324', '2344', '2361', '2399',
                 '2399', '2399', '2399', '2399', '2574', '2574', '2671'
             ]])

Example #8

0

Show file

 def test_get_daily_count(self):
     self.assertIs(
         mu.get_daily_count(
             mu.get_column('covid-19-data/us-counties.csv', 1, 'Boulder', 4,
                           0))[19], 11)

Example #9

0

Show file

 def test_get_daily_count_error_mode(self):
     with self.assertRaises(SystemExit) as cm:
         mu.get_daily_count(
             mu.get_column('covid-19-data/us-counties.cs', 1, 'Boulder', 4,
                           0))
     self.assertEqual(cm.exception.code, 1)

Example #10

0

Show file

File: test_my_utils.py Project: cu-swe4s-fall-2020/python-refresher-jrb07

 def test_out_of_order_date(self):
     with self.assertRaises(SystemExit) as cm:
         mu.get_column('test_date_disorder.csv', 1, 'Boulder', 4, 0)
     self.assertEqual(cm.exception.code, 1)

Example #11

0

Show file

 def test_date_parsing_error_mode(self):
     mu.get_column('covid-19-data/us-counties.csv', 1, 'Boulder', 4, 3)

Example #12

0

Show file

File: test_my_utils.py Project: cu-swe4s-fall-2020/python-refresher-jrb07

 def test_date_skipping(self):
     mu.get_column('test_date_missing.csv', 1, 'Boulder', 4, 0)

Example #13

0

Show file

File: test_my_utils.py Project: cu-swe4s-fall-2020/python-refresher-jrb07

 def test_no_date_column(self):
     mu.get_column('test_date_missing.csv', 1, 'Boulder', 4, None)

Example #14

0

Show file

File: BRAC_get_permit_data.py Project: cu-swe4s-fall-2020/python-refresher-maclyne

def BRAC_permit_data_with_caserates(BRAC_county_caserates_file,
                                    BRAC_race_info_file):
    """
    for each race, get county caserate per capita at that date.\
            return output: a copy of the BRAC permit dataset file with an\
            extra new column of county caserate per capita.

    Parameters:
    -----------
    BRAC_county_caserates_file: str
                                string path name of file containing
                                data output from get_rates.py\
                                File is of format:
                                [list, list of lists, list of lists]
                                where the first list is of county names,\
                                the second list of lists is of dates,\
                                the third list of lists is of caserates per\
                                capita (per 100,000 populaiton).Thesuperlists\
                                correspond to the countys, and the \
                                sublists are the data for each county.


    BRAC_race_info_file: str
                        name of BRAC permit dataset CSV file
                        example: 'BRAC_races_permits.csv'



    Returns:
    --------
    out_dataset_file: str   string path name of CSV out file
                        'BRAC_countycases_at_races.csv'
    """
    # TODO: update doc str b/c BRAC_county_caserates_file no longer that format

    # Import BRAC races data
    state_column = 0
    BRAC_races = get_column(BRAC_race_info_file,
                            state_column,
                            'Colorado',
                            result_columns=[1, 2, 3, 4],
                            date_column=None,
                            return_dates=False)

    # make variables from BRAC_races columns easier to follow
    BRAC_races_racenames = BRAC_races[0]
    BRAC_races_countys = BRAC_races[1]
    BRAC_races_dates = BRAC_races[2]
    BRAC_races_permits = BRAC_races[3]

    # get list of unique BRAC countys involved
    all_countys = remove_list_duplicates(BRAC_races_countys)

    # get county cases data for each county
    county_column = 0
    date_column = 1
    caserate_column = 2
    # allocate space for cases_data []
    cases_data = [[], [], []]
    for c in range(len(all_countys)):
        cases_data_county = get_column(
            BRAC_county_caserates_file,
            county_column,
            all_countys[c],
            result_columns=[date_column, caserate_column],
            date_column=None,
            return_dates=False)
        cases_data[0].append(all_countys[c])
        cases_data[1].append(cases_data_county[0])
        cases_data[2].append(cases_data_county[1])

    # put county cases data into this format:
    cases_data_countys = cases_data[0]
    cases_data_dates_lists = cases_data[1]
    cases_data_rates_lists = cases_data[2]

    # match races to caserates
    caserate_races = []
    for race in range(len(BRAC_races_racenames)):
        county_ind = cases_data_countys.index(BRAC_races_countys[race])
        date_ind = cases_data_dates_lists[county_ind].index(
            BRAC_races_dates[race])
        caserate_atrace = cases_data_rates_lists[county_ind][date_ind]
        caserate_races.append(caserate_atrace)

    # write data to new file that is a copy of old file with added column:
    fin = open(BRAC_race_info_file, 'r')
    out_line_list = []
    # skip first header line
    next(fin)
    # parse through file lines
    for line in fin:
        out_line_list.append(line)
    fin.close()

    out_dataset_file = 'BRAC_countycases_at_races.csv'
    fout = open(out_dataset_file, 'w')
    # write header
    fout.write("state,race name,county,date,permit approved y/n, \
                county caserate per capita 100000 ppl \n")
    # print all lines of previus file but with caserates added as new column
    for line in range(len(BRAC_races_racenames)):
        new_out_line_list = out_line_list[line].strip() + ',' + str(
            caserate_races[line]) + '\n'
        fout.write(new_out_line_list)

    fout.close()

    return out_dataset_file

Example #15

0

Show file

File: get_rates.py Project: cu-swe4s-fall-2020/python-refresher-maclyne

def main():
    """
    get Covid19 case data and census data and convert to per-capita rates
    data are from two different files.
    Per Capita Rates are per 100,000 people

    Required Args:
    ---------------
    state: str        Name of USA State (No abbreviations)
    coviddata_countys_list: list of str

    Optional Args (have defaults): see argparser section
    -------------------------------------------
    data_out_file: str  name of CSV file if want one to be made. or '[]'
    covid_file_name: str
    census_file_name: str
    daily_new: bool     default=True
    running_avg: bool   default=False
    running_sum: bool   default=False
    window: int
    coviddata_county_column: int *
    cases_column: int *
    date_column: int *
    census_state_column: int *
    census_county_column: int *
    pop_column: int *

    Note: *= only needs to be changed if format of
         covid19 and census data files are changed

    Returns:
    ---------
    out_data : list of lists of lists:
                [census_countys_list,
                 [[dates for c1],[dates for c2],..],
                 [per_capita_rates c1],[per_capita_rates c2],...]
    Where:
    ------
    per_capita_rates: list
            list of cases / population
            (these are per 100,000 people)

    dates: list
            list of dates in format datetime.date(YYYY, MM, D)

    """
    # parse command line arguments
    parser = argparse.ArgumentParser(description='process args for \
                                     reading covid data CSV file')

    parser.add_argument('--state',
                        type=str,
                        help='Name of the State',
                        required=True)

    parser.add_argument('--coviddata_countys_list',
                        type=str,
                        nargs='+',
                        help='list of strings for \
                        Name(s) of the county(s) in covid CSV file \
                        that we want to look at',
                        required=True)

    parser.add_argument('--data_out_file',
                        type=str,
                        help='Name of the CSV file to write this data \
                                out to. If not wanted, is "[]", which\
                                is coded to not return any data_out_file',
                        default='[]')

    parser.add_argument('--covid_file_name',
                        type=str,
                        help='Name of the input covid cases data file',
                        default='covid-19-data/us-counties.csv')

    parser.add_argument('--census_file_name',
                        type=str,
                        help='Name of the input census data file',
                        default='census-data/co-est2019-alldata.csv')

    parser.add_argument('--coviddata_county_column',
                        type=int,
                        help='column ind for county names in covid CSVfile',
                        default=1)

    parser.add_argument('--cases_column',
                        type=int,
                        help='column ind for number of cases in covid CSVfile',
                        default=4)

    parser.add_argument('--date_column',
                        type=int,
                        default=0,
                        help='column ind for date in covid CSV file')

    parser.add_argument('--census_state_column',
                        type=int,
                        help='column ind for state names in census CSV file',
                        default=5)

    parser.add_argument('--census_county_column',
                        type=int,
                        help='column ind for county names in census CSV file',
                        default=6)

    parser.add_argument('--pop_column',
                        type=int,
                        help='column ind for populaiton in census CSV file',
                        default=7)

    parser.add_argument('--daily_new',
                        type=bool,
                        default=True,
                        help='daily newcases. default is cumulativ dailycases')

    parser.add_argument('--running_avg',
                        type=bool,
                        default=False,
                        help='running average of cases.\
                                default is False, window size is required')

    parser.add_argument('--running_sum',
                        type=bool,
                        default=False,
                        help='running sum of cases over a window.\
                                default is False, window size is required.\
                                cannot be switched on at same \
                                time as running_avg')

    parser.add_argument('--window',
                        type=int,
                        default=5,
                        help='Window size of running average or running sum')

    # parse arguments and store them in args
    args = parser.parse_args()

    # assign arguments
    state = args.state
    coviddata_countys_list = [
        i.replace('-', ' ') for i in args.coviddata_countys_list
    ]
    data_out_file = args.data_out_file
    coviddata_file_name = args.covid_file_name
    coviddata_county_column = args.coviddata_county_column
    cases_column = args.cases_column
    date_column = args.date_column
    daily_new = args.daily_new
    running_avg = args.running_avg
    running_summation = args.running_sum
    window = args.window
    census_file_name = args.census_file_name
    census_state_column = args.census_state_column
    census_county_column = args.census_county_column
    pop_column = args.pop_column

    # make CSV file copy of only state covid-19-data
    if coviddata_file_name == 'covid-19-data/us-counties.csv':
        state_coviddata_file_name = 'covid-19-data/' + state + '-counties.csv'
        try:
            f1 = open(state_coviddata_file_name, 'r')
            f1.close()
        except FileNotFoundError:
            print('creating state_covidfile')
            state_coviddata_file_name = make_statefile(state)
            print(state_coviddata_file_name, 'state_coviddata_file_name')

    elif coviddata_file_name == 'covid-19-data/' + state + '-counties.csv':
        state_coviddata_file_name = coviddata_file_name
    else:
        Warning('This script must be run on data within only \
                one state or else has error if counties of \
                the same name in different states across USA.\
                if not using default args.covid_file_name, please\
                check that county names are not duplicated.\
                NOTE: Proceeding by assigning variable\
                state_coviddata_file_name = args.covid_file_name ;\
                Watch out for errors from this issue.')
        state_coviddata_file_name = args.covid_file_name

    # get census data for all counties in the state
    census_state_data = get_column(
        census_file_name,
        census_state_column,
        state,
        result_columns=[census_county_column, pop_column],
        date_column=None)

    # sort census_state_data by county name
    # census_state_data is of list [[county_names], [census2010pops])
    sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1]))
    tuples = zip(*sorted_pairs)
    list1, list2 = [list(tuple) for tuple in tuples]
    census_state_data_sorted = [list1, list2]

    # pre-allocate structure of out_data list of lists of lists
    #   out_data[0] will be coviddata_countys_list
    #   out_data[1] will be list of dates for each county
    #   out_data[2] will be list of per_capita_rates for each county
    out_data = [[], [], []]

    # run for each county
    for county_index in range(0, len(coviddata_countys_list)):
        coviddata_county_name = coviddata_countys_list[county_index]
        out_data[0].append(coviddata_county_name)
        # run get_column() on covid data and census data
        cases_data_cumulative = get_column(state_coviddata_file_name,
                                           coviddata_county_column,
                                           coviddata_county_name,
                                           result_columns=[cases_column],
                                           date_column=date_column,
                                           return_dates=True)
        # convert cases from type str to int
        cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0]))

        # dates are stored in last index of list, in datetime format
        dates = cases_data_cumulative[-1]

        # daily cases option
        if daily_new is True:
            from my_utils import get_daily_count
            cases = get_daily_count(cases_data_cumulative[0])
        else:
            cases = cases_data_cumulative[0]

        # print running average OR running sum cases option OR neither
        if running_avg is True:
            from my_utils import running_average
            cases = running_average(cases, window)
        elif running_summation is True:
            from my_utils import running_sum
            cases = running_sum(cases, window)

        # use binary search to get county pop census data out of state data
        census_county_name = coviddata_county_name + ' County'
        county_pop = binary_search(census_county_name,
                                   census_state_data_sorted)

        # raise error if county census not found
        if county_pop is None:
            ValueError
            print('county census not found')
            sys.exit(1)

        county_pop = int(county_pop)

        # convert cases to per-capita rates by dividing county case by pop
        if type(cases) == list:
            cases = np.asarray(cases)

        per_capita_rates = np.round(cases / county_pop * 100000, 2)

        # convert per_capita_rates back from nparray to list
        per_capita_rates = per_capita_rates.tolist()

        # append to out_data lists
        out_data[1].append([dates])
        out_data[2].append([per_capita_rates])

    # write out_data to a CSV file in format 'County','date','per_capita_rate'
    if data_out_file != '[]':
        fout = open(data_out_file, 'w')
        fout.write("county,date,per_capita_rate \n")
        for county_index in range(0, len(out_data[0])):
            for date_ind in range(0, len(out_data[1][county_index][0])):
                fout.write(out_data[0][county_index] + ',' +
                           str(out_data[1][county_index][0][date_ind]) + ',' +
                           str(out_data[2][county_index][0][date_ind]) + '\n')
        fout.close()

    return out_data

Example #16

0

Show file

 def test_get_column(self):
     test_results = None
     test_results = mu.get_column('covid-19-data/us-counties.csv', 1,
                                  'Boulder', 4, 0)
     self.assertIsNotNone(test_results)

Example #17

0

Show file

args = parser.parse_args()

# assign arguments
file_name = args.file_name
county_column = args.county_column
county = args.county
cases_column = args.cases_column
print_daily = args.daily
print_running_avg = args.running_avg
window = args.window
date_column = args.date_column

# call function to run
cases = get_column(file_name,
                   county_column,
                   county,
                   result_columns=[cases_column],
                   date_column=date_column)
# convert cases from type str to int
cases = list(map(int, cases[0]))

# print daily cases option
if print_daily is True:
    from my_utils import get_daily_count
    day_cases = get_daily_count(cases)

# print runing average cases option
if print_running_avg is True:
    from my_utils import running_average
    running_avg_cases = running_average(day_cases, window)

Example #18

0

Show file

def main():
    """
    calculate the number of covid19 cases per capita\
    for each county in a given State for a given date.
    Cases are per 100,000 people and rounded to 1 decimal

    Required Args:
    ---------------
    state: str        Name of USA State (No abbreviations)
    query_date: str   date in ISO format 'YYYY-MM-DD'

    Optional Args (have defaults): see argparser section
    -------------------------------------------
    covid_file_name: str
    census_file_name: str
    daily_new: bool     default=True
    running_avg: bool   default=False
    window: int
    coviddata_county_column: int *
    cases_column: int *
    date_column: int *
    census_state_column: int *
    census_county_column: int *
    pop_column: int *

    Note: *= only needs to be changed if format of\
         covid19 and census data files are changed

    Returns:
    ---------
    out_lists: list of [str, float]
                        [county_name, county_caserate_at_date]

    """
    # parse command line arguments
    parser = argparse.ArgumentParser(description='process args for \
                                     reading covid data CSV file')

    parser.add_argument('--state',
                        type=str,
                        help='Name of the State',
                        required=True)

    parser.add_argument('--query_date',
                        type=str,
                        help='date in ISO format "YY-MM-DD" ',
                        required=True)

    parser.add_argument('--covid_file_name',
                        type=str,
                        help='Name of the input covid cases data file',
                        default='covid-19-data/us-counties.csv')

    parser.add_argument('--census_file_name',
                        type=str,
                        help='Name of the input census data file',
                        default='census-data/co-est2019-alldata.csv')

    parser.add_argument('--coviddata_county_column',
                        type=int,
                        help='column ind for county names in covid CSVfile',
                        default=1)

    parser.add_argument('--cases_column',
                        type=int,
                        help='column ind for number of cases in covid CSVfile',
                        default=4)

    parser.add_argument('--date_column',
                        type=int,
                        default=0,
                        help='column ind for date in covid CSV file')

    parser.add_argument('--census_state_column',
                        type=int,
                        help='column ind for state names in census CSV file',
                        default=5)

    parser.add_argument('--census_county_column',
                        type=int,
                        help='column ind for county names in census CSV file',
                        default=6)

    parser.add_argument('--pop_column',
                        type=int,
                        help='column ind for populaiton in census CSV file',
                        default=7)

    parser.add_argument('--daily_new',
                        type=bool,
                        default=True,
                        help='daily newcases. False gives cumulativ cases')

    parser.add_argument('--running_avg',
                        type=bool,
                        default=False,
                        help='running average of cases.\
                                default is False, window size is required')

    parser.add_argument('--window',
                        type=int,
                        default=5,
                        help='Window size of running average')

    # parse arguments and store them in args
    args = parser.parse_args()

    # assign arguments
    coviddata_file_name = args.covid_file_name
    coviddata_county_column = args.coviddata_county_column
    cases_column = args.cases_column
    date_column = args.date_column
    daily_new = args.daily_new
    running_avg = args.running_avg
    window = args.window
    census_file_name = args.census_file_name
    census_state_column = args.census_state_column
    state = args.state
    census_county_column = args.census_county_column
    pop_column = args.pop_column
    query_date = date.fromisoformat(args.query_date)

    # make CSV file copy of only state covid-19-data
    if coviddata_file_name == 'covid-19-data/us-counties.csv':
        state_coviddata_file_name = 'covid-19-data/'+state+'-counties.csv'
        try:
            f1 = open(state_coviddata_file_name, 'r')
            f1.close()
        except FileNotFoundError:
            print('creating state_covidfile')
            state_coviddata_file_name = make_statefile(state)
            print(state_coviddata_file_name, 'state_coviddata_file_name')
    else:
        Warning('This script must be run on data within only \
                one state or else has error if counties of \
                the same name in different states across USA.\
                if not using default args.covid_file_name, please\
                check that county names are not duplicated.\
                NOTE: Proceeding by assigning variable\
                state_coviddata_file_name = args.covid_file_name ;\
                Watch out for errors from this issue.')
        state_coviddata_file_name = args.covid_file_name

    # get state county names and population data from census file
    census_state_data = get_column(census_file_name, census_state_column,
                                   state,
                                   result_columns=[census_county_column,
                                                   pop_column],
                                   date_column=None)
    county_pop_list = census_state_data[1][1:]

    # census file has names as "countyname + County", so rm " County"
    county_names_list_withcounty = census_state_data[0][1:]
    county_names_list = []
    for c in range(len(county_names_list_withcounty)):
        county_names_list.append(county_names_list_withcounty[c][:-7])

    # make hashtable of (key-county_name, value= county_pop)
    N = 260  # hashtable size. Max number counties in a State is Texas with 254
    census_hashtable = [[] for i in range(N)]
    for c in range(len(county_names_list)):
        hash_table.put(census_hashtable, N, county_names_list[c],
                       county_pop_list[c], method='rolling')

    # daily cases option and running avg cases option
    if daily_new is True:
        from my_utils import get_daily_count
    if running_avg is True:
        from my_utils import running_average

    # Loop through each county in state
    out_lists = []
    for c in range(len(county_names_list)):
        county_cases_data_cumulative = get_column(state_coviddata_file_name,
                                                  coviddata_county_column,
                                                  county_names_list[c],
                                                  result_columns=[cases_column],
                                                  date_column=date_column,
                                                  return_dates=True)
        # dates are stored in last index of list, in datetime format
        dates = county_cases_data_cumulative[-1]
        # convert cases from type str to int
        county_cases = list(map(int, county_cases_data_cumulative[0]))

        # daily cases option and running avg options
        if daily_new is True:
            county_cases = get_daily_count(county_cases)
        if running_avg is True:
            county_cases = running_average(county_cases, window)

        # binary search for county cases at date
        county_cases_at_date = binary_search(query_date, [dates, county_cases])
        # case rate per 100,000 people
        if county_cases_at_date is not None:
            county_caserate_at_date = county_cases_at_date * 100000 \
                                      / int(hash_table.get(census_hashtable,
                                            N,
                                            county_names_list[c],
                                            method='rolling'))
            out_lists.append([county_names_list[c],
                             round(county_caserate_at_date, 1)])
    print(out_lists)
    return out_lists

Example #19

0

Show file

def main():
    desc = 'Opens a file and extracts data from a specific column.'

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument('--file',
                        dest='file_name',
                        type=str,
                        required=True,
                        help='Name of the file to be opened by the script.')

    parser.add_argument('--result_column',
                        dest='result_column',
                        default=4,
                        help='Column of file to be returned by the script.\
                        Defaults to 4 and must correspond to an index\
                        found in the file.')

    parser.add_argument('--county_column',
                        dest='county_column',
                        type=int,
                        required=True,
                        help='Column of file to be queried by the script.')

    parser.add_argument('--county',
                        dest='county',
                        type=str,
                        required=True,
                        help='Name of county to retrieve data from.')

    parser.add_argument('--return_daily_increment',
                        dest='return_daily_increment',
                        type=bool,
                        default=False,
                        help='Decides whether results\
                        are returned as daily increments.')

    parser.add_argument('--return_running_average',
                        dest='return_running_average',
                        type=bool,
                        default=False,
                        help='Decides whether to return\
                        running averages from results.')

    parser.add_argument('--running_avg_window_size',
                        dest='running_avg_window_size',
                        type=int,
                        default=5,
                        help='Determines the window\
                        size for the running average.')

    parser.add_argument('--date_column',
                        dest='date_column',
                        type=int,
                        default=0,
                        help='Determines the date column.')

    args = parser.parse_args()

    print()
    print('Results:')
    results = []
    try:
        args.result_column = int(args.result_column)
    except ValueError:
        pass
    if ',' in args.result_column:
        result_array = []
        for result in args.result_column.split(','):
            result_array.append(str(result))
            args.result_column = result_array
        try:
            results = mu.get_columns(args.file_name, args.county_column,
                                     args.county, args.result_column,
                                     args.date_column)
        except ValueError:
            print('ValueError during get columns')
    else:
        try:
            results = mu.get_column(args.file_name, args.county_column,
                                    args.county, args.result_column,
                                    args.date_column)
        except ValueError:
            print('ValueError during get column')
    if args.return_daily_increment is True:
        try:
            results = mu.get_daily_count(
                get_cases(args.file_name, args.county_column, args.county,
                          args.result_column, args.date_column))
        except ValueError:
            print('Value Error during get daily increment.')
    if args.return_running_average is True:
        try:
            results, _ = mu.running_average(
                results, window_size=args.running_avg_window_size)
        except ValueError:
            print('ValueError during running average')
    for result in results:
        print(result)
    print()
    print()

Example #20

0

Show file

File: get_rates_saveHW5.py Project: cu-swe4s-fall-2020/python-refresher-maclyne

def main():
    """
    get Covid19 case data and census data and convert to per-capita rates
    data are from two different files

    Returns:
    ---------
    per_capita_rates: list
            list of cases / population

    dates: list
            list of dates in format datetime.date(YYYY, MM, D)

    """
    # TODO: add main def docstring

    # parse command line arguments
    parser = argparse.ArgumentParser(description='process args for \
                                     reading covid data CSV file')

    parser.add_argument('--covid_file_name',
                        type=str,
                        help='Name of the input covid cases data file',
                        required=True)

    parser.add_argument('--census_file_name',
                        type=str,
                        help='Name of the input census data file',
                        required=True)

    parser.add_argument('--plot_file_name',
                        type=str,
                        help='output plot file generated',
                        required=True)

    parser.add_argument('--state',
                        type=str,
                        help='Name of the State',
                        required=True)

    parser.add_argument('--coviddata_county',
                        type=str,
                        help='Name of the county in covid CSV file',
                        required=True)

    parser.add_argument('--census_county',
                        type=str,
                        help='Name of the county in census CSV file',
                        required=True)

    parser.add_argument('--coviddata_county_column',
                        type=int,
                        help='column ind for county names in covid CSVfile')

    parser.add_argument('--cases_column',
                        type=int,
                        help='column ind for number of cases in covid CSVfile')

    parser.add_argument('--date_column',
                        type=int,
                        default=0,
                        help='column ind for date in covid CSV file')

    parser.add_argument('--census_state_column',
                        type=int,
                        help='column ind for state names in census CSV file')

    parser.add_argument('--census_county_column',
                        type=int,
                        help='column ind for county names in census CSV file')

    parser.add_argument('--pop_column',
                        type=int,
                        help='column ind for populaiton in census CSV file')

    parser.add_argument('--daily_new',
                        type=bool,
                        default=False,
                        help='daily newcases. default is cumulativ dailycases')

    parser.add_argument('--running_avg',
                        type=bool,
                        default=False,
                        help='running average of cases.\
                                default is False, window size is required')

    parser.add_argument('--window',
                        type=int,
                        default=5,
                        help='Window size of running average')

    # parse arguments and store them in args
    args = parser.parse_args()

    # assign arguments
    coviddata_file_name = args.covid_file_name
    coviddata_county_column = args.coviddata_county_column
    plot_file_name = args.plot_file_name
    coviddata_county_name = args.coviddata_county
    cases_column = args.cases_column
    date_column = args.date_column
    daily_new = args.daily_new
    running_avg = args.running_avg
    window = args.window
    census_file_name = args.census_file_name
    census_state_column = args.census_state_column
    state = args.state
    census_county_name = args.census_county
    census_county_column = args.census_county_column
    pop_column = args.pop_column

    # run get_column() on covid data and census data
    cases_data_cumulative = get_column(coviddata_file_name,
                                       coviddata_county_column,
                                       coviddata_county_name,
                                       result_columns=[cases_column],
                                       date_column=date_column,
                                       return_dates=True)

    census_state_data = get_column(
        census_file_name,
        census_state_column,
        state,
        result_columns=[census_county_column, pop_column],
        date_column=None)

    # convert cases from type str to int
    cases_data_cumulative[0] = list(map(int, cases_data_cumulative[0]))

    # dates are stored in last index of list, in datetime format
    dates = cases_data_cumulative[-1]

    # daily cases option
    if daily_new is True:
        from my_utils import get_daily_count
        cases = get_daily_count(cases_data_cumulative[0])  # not dates column
    else:
        cases = cases_data_cumulative[0]

    # print runing average cases option
    if running_avg is True:
        from my_utils import running_average
        cases = running_average(cases, window)

    # census_state_data is of list [[county_names], [census2010pops])
    # sort census_state_data by county name
    sorted_pairs = sorted(zip(census_state_data[0], census_state_data[1]))
    tuples = zip(*sorted_pairs)
    list1, list2 = [list(tuple) for tuple in tuples]
    census_state_data_sorted = [list1, list2]

    # use binary search to get county pop census data out of state data
    county_pop = binary_search(census_county_name, census_state_data_sorted)

    # raise error if county census not found
    if county_pop is None:
        ValueError
        print('county census not found')
        sys.exit(1)

    county_pop = int(county_pop)

    # convert cases to per-capita rates by dividing county case by population
    if type(cases) == list:
        cases = np.asarray(cases)

    per_capita_rates = cases / county_pop

    # convert per_capita_rates back from nparray to list
    per_capita_rates = per_capita_rates.tolist()

    # plot using plot_lines
    plot_points = [[]]
    for point in range(0, len(per_capita_rates)):
        plot_points[0].append([dates[point], per_capita_rates[point]])

    plot_labels = ['dates', 'per_capita_rates']

    plot = plot_lines(plot_points, plot_labels, plot_file_name)

    return plot  # NOTE: idk if this line is needed?