def __init__(self, state: str, county: str, date_string: str): super().__init__(state=state, county=county) # Initialize relevant variables self.cases_raw_bool, self.deaths_raw_bool = False, False # Define raw and config files to be loaded logging.info("Initialize California raw and config file strings") raw_data_dir = os.path.join("states", state, "raw_data") raw_data_file = f"{raw_data_dir}/{date_string}/california_all.html" configs_dir = os.path.join("states", state, "configs") config_file_string = f"{configs_dir}/california_all_html_parse.yaml" # Load configs that will be used for html parsing logging.info("Load parsing config") html_parser_config_file = open(config_file_string) html_parser_config = yaml.safe_load(html_parser_config_file) # Get all dates for which parsing currently exists logging.info("Get and sort html parsing dates") html_parser_date_strings = list(html_parser_config["DATES"].keys()) html_parser_dates = self.get_sorted_dates_from_strings(date_string_list=html_parser_date_strings) # Get most recent parsing date with respect to the passed in date_string logging.info("Obtain valid map of ethnicities to xpath containing cases or deaths") self.date_string = date_string self.valid_date_string = utils_state_lib.get_valid_date_string( date_list=html_parser_dates, date_string=date_string) # Get xpath for particular date self.ethnicity_xpath_map = html_parser_config['DATES'][self.valid_date_string] logging.info("Load raw html data and convert it to lxml") # Load raw html for cases and/or deaths depending on whether or not it exists try: raw_data_file_object = open(raw_data_file, 'r') raw_data_file_html = raw_data_file_object.read() soup = bs4.BeautifulSoup(raw_data_file_html, 'html5lib') raw_data_file_html = soup.prettify() self.raw_data_lxml = etree.HTML(raw_data_file_html) if len(self.raw_data_lxml.text.strip(' ')) == 1: self.raw_data_lxml = soup self.cases_raw_bool, self.deaths_raw_bool = True, True except BaseException: pass # Define mapping of YAML keys from the html parser to the # names in this class logging.info("Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = {'LATINO_CASES': 'Hispanic', 'WHITE_CASES': 'White', 'ASIAN_CASES': 'Asian', 'BLACK_CASES': 'Black', 'MULTI_RACE_CASES': 'Multi-Race', 'AMERICAN_INDIAN_ALASKA_NATIVE_CASES': 'American Indian/Alaska Native', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_CASES': 'Native Hawaiian/Pacific Islander', 'OTHER_CASES': 'Other'} self.deaths_yaml_keys_dict_keys_map = {'LATINO_DEATHS': 'Hispanic', 'WHITE_DEATHS': 'White', 'ASIAN_DEATHS': 'Asian', 'BLACK_DEATHS': 'Black', 'MULTI_RACE_DEATHS': 'Multi-Race', 'AMERICAN_INDIAN_ALASKA_NATIVE_DEATHS': 'American Indian/Alaska Native', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_DEATHS': 'Native Hawaiian/Pacific Islander', 'OTHER_DEATHS': 'Other'}
def __init__(self, state: str, county: str, date_string: str): super().__init__(state=state, county=county, date_string=date_string) logging.info("Initialize Sonoma raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_file = f"{raw_data_dir}/{date_string}/sonoma_all.html" configs_dir = os.path.join("states", state, 'counties', county, "configs") config_file_string = f"{configs_dir}/sonoma_all_html_parse.yaml" logging.info("Load parsing config") html_parser_config = self.load_yaml(config_file_string) logging.info("Get and sort html parsing dates") html_parser_date_strings = list(html_parser_config["DATES"].keys()) html_parser_dates = self.get_sorted_dates_from_strings( date_string_list=html_parser_date_strings) logging.info( "Obtain valid map of ethnicities to xpath containing cases or deaths" ) self.date_string = date_string self.valid_date_string = utils_state_lib.get_valid_date_string( date_list=html_parser_dates, date_string=date_string) self.ethnicity_xpath_map = html_parser_config['DATES'][ self.valid_date_string] logging.info("Load raw html data and convert it to lxml") try: raw_data_file_object = open(raw_data_file, 'r') raw_data_file_html = raw_data_file_object.read() soup = bs4.BeautifulSoup(raw_data_file_html, 'html5lib') raw_data_file_html = soup.prettify() self.raw_data_lxml = etree.HTML(raw_data_file_html) if len(self.raw_data_lxml.text.strip(' ')) == 1: self.raw_data_lxml = soup self.cases_raw_bool = True except BaseException: pass logging.info( "Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = { 'HISPANIC_CASES': 'hispanic', 'WHITE_CASES': 'white', 'ASIAN_CASES': 'asian', 'ASIAN_PACIFIC_ISLANDER_CASES': 'asian_pacific_islander', 'NON_HISPANIC_CASES': 'non_hispanic', 'BLACK_CASES': 'black', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_CASES': 'Native Hawaiian/Pacific Islander', 'AMERICAN_INDIAN_ALASKA_NATIVE_CASES': 'American Indian/Alaska Native', } self.deaths_yaml_keys_dict_keys_map = None
def __init__(self, state: str, county: str, date_string: str): self.state, self.county = state, county self.cases_raw_bool, self.deaths_raw_bool = False, False logging.info("Initialize kern county raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_cases_file = f"{raw_data_dir}/{date_string}/kern_cases" configs_dir = os.path.join("states", state, 'counties', county, "configs") cases_config_file_string = f"{configs_dir}/kern_cases_json_parser.yaml" logging.info("Load cases and deaths parsing config") json_parser_cases_config = self.load_yaml(cases_config_file_string) logging.info("Get and sort json parsing dates") json_parser_cases_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_cases_config["DATES"].keys())) logging.info( "Obtain valid map of ethnicities to json containing cases or deaths" ) self.date_string = date_string self.cases_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_cases_dates, date_string=date_string) self.cases_ethnicity_json_keys_map, self.deaths_yaml_keys_dict_keys_map = json_parser_cases_config[ 'DATES'][self.cases_valid_date_string], None self.ethnicity_json_keys_map = self.cases_ethnicity_json_keys_map logging.info("Load raw json data") try: cases_file_obj = open(raw_data_cases_file, 'r') self.raw_data_cases_json = json.load(cases_file_obj) self.cases_raw_bool = True except BaseException: pass logging.info( "Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = { 'BLACK_CASES': 'Black', 'HISPANIC_CASES': 'Hispanic', 'ASIAN_CASES': 'Asian', 'WHITE_CASES': 'White', 'OTHER_CASES': 'Other', }
def __init__(self, state: str, county: str, date_string: str): super().__init__(state=state, county=county, date_string=date_string) self.cases_raw_bool, self.deaths_raw_bool = False, False logging.info("Initialize Los Angeles raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_file = f"{raw_data_dir}/{date_string}/losangeles_all.html" configs_dir = os.path.join("states", state, 'counties', county, "configs") config_file_string = f"{configs_dir}/losangeles_all_html_parse.yaml" logging.info("Load parsing config") html_parser_config_file = open(config_file_string) html_parser_config = yaml.safe_load(html_parser_config_file) logging.info("Get and sort html parsing dates") html_parser_date_strings = html_parser_config["DATES"].keys() html_parser_dates = sorted([datetime.strptime(date_string, '%Y-%m-%d') for date_string in html_parser_date_strings]) logging.info("Obtain valid map of ethnicities to xpath containing cases or deaths") self.date_string = date_string self.valid_date_string = utils_state_lib.get_valid_date_string( date_list=html_parser_dates, date_string=date_string) self.ethnicity_xpath_map = html_parser_config['DATES'][self.valid_date_string] logging.info("Load raw html data and convert it to lxml") try: raw_data_file_object = open(raw_data_file, 'r') raw_data_file_html = raw_data_file_object.read() soup = bs4.BeautifulSoup(raw_data_file_html, 'html5lib') raw_data_file_html = soup.prettify() self.raw_data_lxml = etree.HTML(raw_data_file_html) if len(self.raw_data_lxml.text.strip(' ')) == 1: self.raw_data_lxml = soup self.cases_raw_bool, self.deaths_raw_bool = True, True except BaseException: pass logging.info("Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = {'HISPANIC_CASES': 'Hispanic', 'WHITE_CASES': 'White', 'ASIAN_CASES': 'Asian', 'BLACK_CASES': 'Black', 'AMERICAN_INDIAN_ALASKA_NATIVE_CASES': 'American Indian/Alaska Native', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_CASES': 'Native Hawaiian/Pacific Islander', 'OTHER_CASES': 'Other'} self.deaths_yaml_keys_dict_keys_map = {'HISPANIC_DEATHS': 'Hispanic', 'WHITE_DEATHS': 'White', 'ASIAN_DEATHS': 'Asian', 'BLACK_DEATHS': 'Black', 'AMERICAN_INDIAN_ALASKA_NATIVE_DEATHS': 'American Indian/Alaska Native', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_DEATHS': 'Native Hawaiian/Pacific Islander', 'OTHER_DEATHS': 'Other'}
def __init__(self, state: str, county: str, date_string: str): self.state, self.county = state, county logging.info("Initialize imperial county raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_cases_file, raw_data_cases_file_html = f"{raw_data_dir}/{date_string}/imperial_county_cases", f"{raw_data_dir}/{date_string}/imperial_county_cases.html" raw_data_deaths_file, raw_data_deaths_file_html = f"{raw_data_dir}/{date_string}/imperial_county_deaths", f"{raw_data_dir}/{date_string}/imperial_county_deaths.html" configs_dir = os.path.join("states", state, 'counties', county, "configs") cases_config_file_string = f"{configs_dir}/imperial_county_cases_json_parser.yaml" deaths_config_file_string = f"{configs_dir}/imperial_county_deaths_json_parser.yaml" logging.info("Load cases and deaths parsing config") json_parser_cases_config = self.load_yaml(cases_config_file_string) json_parser_deaths_config = self.load_yaml(deaths_config_file_string) logging.info("Get and sort json parsing dates") json_parser_cases_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_cases_config["DATES"].keys())) json_parser_deaths_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_deaths_config["DATES"].keys())) logging.info( "Obtain valid map of ethnicities to json containing cases or deaths" ) self.date_string = date_string self.cases_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_cases_dates, date_string=date_string) self.deaths_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_deaths_dates, date_string=date_string) self.cases_ethnicity_json_keys_map = json_parser_cases_config['DATES'][ self.cases_valid_date_string] self.deaths_ethnicity_json_keys_map = json_parser_deaths_config[ 'DATES'][self.deaths_valid_date_string] self.ethnicity_json_keys_map = { **self.cases_ethnicity_json_keys_map, **self.deaths_ethnicity_json_keys_map } logging.info("Load raw json data") try: cases_file_obj = open(raw_data_cases_file, 'r') self.cases_raw_bool = True except BaseException: try: cases_file_obj = open(raw_data_cases_file_html, 'r') self.cases_raw_bool = True except BaseException: pass try: deaths_file_obj = open(raw_data_deaths_file, 'r') self.deaths_raw_bool = True except BaseException: try: deaths_file_obj = open(raw_data_deaths_file_html, 'r') self.deaths_raw_bool = True except BaseException: pass try: self.raw_data_cases_json = json.load(cases_file_obj) except BaseException: pass try: self.raw_data_deaths_json = json.load(deaths_file_obj) except BaseException: pass logging.info( "Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = { 'HISPANIC_LATINO_CASES': 'Hispanic', 'NON_HISPANIC_LATINO_CASES': 'Non-Hispanic' } self.deaths_yaml_keys_dict_keys_map = { 'HISPANIC_LATINO_DEATHS': 'Hispanic', 'NON_HISPANIC_LATINO_DEATHS': 'Non-Hispanic' }
def __init__(self, state: str, county: str, date_string: str): self.state, self.county = state, county super().__init__(state=state, county=county, date_string=date_string) self.cases_raw_bool, self.deaths_raw_bool = False, False logging.info("Initialize imperial county raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_cases_file = f"{raw_data_dir}/{date_string}/sanfrancisco_cases" raw_data_deaths_file = f"{raw_data_dir}/{date_string}/sanfrancisco_deaths" configs_dir = os.path.join("states", state, 'counties', county, "configs") cases_config_file_string = f"{configs_dir}/sanfrancisco_cases_json_parser.yaml" deaths_config_file_string = f"{configs_dir}/sanfrancisco_deaths_json_parser.yaml" logging.info("Load cases and deaths parsing config") json_parser_cases_config = self.load_yaml(cases_config_file_string) json_parser_deaths_config = self.load_yaml(deaths_config_file_string) logging.info("Get and sort json parsing dates") json_parser_cases_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_cases_config["DATES"].keys())) json_parser_deaths_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_deaths_config["DATES"].keys())) logging.info( "Obtain valid map of ethnicities to json containing cases or deaths" ) self.date_string = date_string self.cases_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_cases_dates, date_string=date_string) self.deaths_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_deaths_dates, date_string=date_string) self.cases_ethnicity_json_keys_map = json_parser_cases_config['DATES'][ self.cases_valid_date_string] self.deaths_ethnicity_json_keys_map = json_parser_deaths_config[ 'DATES'][self.deaths_valid_date_string] self.ethnicity_json_keys_map = { **self.cases_ethnicity_json_keys_map, **self.deaths_ethnicity_json_keys_map } self.cases_yaml_keys_dict_keys_map, self.deaths_yaml_keys_dict_keys_map = None, None try: logging.info("Load raw cases json data") cases_file_obj = open(raw_data_cases_file, 'r') self.raw_data_cases_json = json.load(cases_file_obj) logging.info("Define yaml keys to dictionary maps for cases") self.cases_yaml_keys_dict_keys_map = { 'NATIVE_AMERICAN_CASES': 'Native American', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_CASES': 'Native Hawaiian/Pacific Islander', 'MULTI_RACE_CASES': 'Multi-Race', 'BLACK_CASES': 'Black', 'ASIAN_CASES': 'Asian', 'WHITE_CASES': 'White', 'HISPANIC_CASES': 'Hispanic' } self.cases_raw_bool = True except BaseException: pass try: logging.info("Load raw deaths json data") deaths_file_obj = open(raw_data_deaths_file, 'r') self.raw_data_deaths_json = json.load(deaths_file_obj) logging.info("Define yaml keys to dictionary maps for deaths") self.deaths_yaml_keys_dict_keys_map = { 'WHITE_DEATHS': 'White', 'HISPANIC_DEATHS': 'Hispanic', 'ASIAN_DEATHS': 'Asian', 'BLACK_DEATHS': 'Black', 'MULTI_RACE_DEATHS': 'Multi-Race' } self.deaths_raw_bool = True except BaseException: pass
def __init__(self, state: str, county: str, date_string: str): # Initialize relevant variables self.state, self.county = state, county self.total_cases_int, self.total_deaths_int = None, None self.cases_raw_bool, self.deaths_raw_bool = False, False # Define raw and config files to be loaded logging.info("Initialize imperial county raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_cases_file, raw_data_totalcases_file = f"{raw_data_dir}/{date_string}/santaclara_cases", f"{raw_data_dir}/{date_string}/santaclara_totalcases" raw_data_deaths_file, raw_data_totaldeaths_file = f"{raw_data_dir}/{date_string}/santaclara_deaths", f"{raw_data_dir}/{date_string}/santaclara_totaldeaths" configs_dir = os.path.join("states", state, 'counties', county, "configs") cases_config_file_string = f"{configs_dir}/santaclara_cases_json_parser.yaml" deaths_config_file_string = f"{configs_dir}/santaclara_deaths_json_parser.yaml" totalcases_config_file_string = f"{configs_dir}/santaclara_totalcases_json_parser.yaml" totaldeaths_config_file_string = f"{configs_dir}/santaclara_totaldeaths_json_parser.yaml" # Load config files that will be used for parsing logging.info("Load cases and deaths parsing config") json_parser_cases_config = self.load_yaml(cases_config_file_string) json_parser_deaths_config = self.load_yaml(deaths_config_file_string) json_parser_totalcases_config = self.load_yaml(totalcases_config_file_string) json_parser_totaldeaths_config = self.load_yaml(totaldeaths_config_file_string) # Get all dates for which parsing currently exists logging.info("Get and sort json parsing dates") json_parser_cases_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_cases_config["DATES"].keys())) json_parser_deaths_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_deaths_config["DATES"].keys())) json_parser_totalcases_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_totalcases_config["DATES"].keys())) json_parser_totaldeaths_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_totaldeaths_config["DATES"].keys())) # Get most recent parsing date with respect to the passed in date_string logging.info("Obtain valid map of ethnicities to json containing cases or deaths") self.date_string = date_string self.cases_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_cases_dates, date_string=date_string) self.deaths_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_deaths_dates, date_string=date_string) self.totalcases_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_totalcases_dates, date_string=date_string) self.totaldeaths_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_totaldeaths_dates, date_string=date_string) # Get JSON keys for the chosen date self.cases_ethnicity_json_keys_map = json_parser_cases_config['DATES'][self.cases_valid_date_string] self.deaths_ethnicity_json_keys_map = json_parser_deaths_config['DATES'][self.deaths_valid_date_string] self.ethnicity_json_keys_map = {**self.cases_ethnicity_json_keys_map, **self.deaths_ethnicity_json_keys_map} self.totalcases_ethnicity_json_keys_map = json_parser_totalcases_config['DATES'][self.totalcases_valid_date_string] self.totaldeaths_ethnicity_json_keys_map = json_parser_totaldeaths_config['DATES'][self.totaldeaths_valid_date_string] self.totals_json_keys_map = { **self.totalcases_ethnicity_json_keys_map, **self.totaldeaths_ethnicity_json_keys_map} # Load raw json files for cases and/or deaths depending on whether or not it exists logging.info("Load raw json data") try: cases_file_obj = open(raw_data_cases_file, 'r') totalcases_file_obj = open( raw_data_totalcases_file, 'r') self.raw_data_cases_json = json.load(cases_file_obj) self.raw_data_totalcases_json = json.load( totalcases_file_obj) self.cases_raw_bool = True except BaseException: pass try: deaths_file_obj = open(raw_data_deaths_file, 'r') totaldeaths_file_obj = open( raw_data_totaldeaths_file, 'r') self.raw_data_deaths_json = json.load(deaths_file_obj) self.raw_data_totaldeaths_json = json.load(totaldeaths_file_obj) self.deaths_raw_bool = True except BaseException: pass # Define mapping of YAML keys from the JSON parser to the # names in this class logging.info("Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = { 'WHITE_CASES': 'White', 'HISPANIC_CASES': 'Hispanic', 'ASIAN_CASES': 'Asian', 'BLACK_CASES': 'Black', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_CASES': 'Native Hawaiian/Pacific Islander', 'OTHER_CASES': 'Other' } self.deaths_yaml_keys_dict_keys_map = { 'WHITE_DEATHS': 'White', 'HISPANIC_DEATHS': 'Hispanic', 'ASIAN_DEATHS': 'Asian', 'BLACK_DEATHS': 'Black', 'OTHER_DEATHS': 'Other', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_DEATHS': 'Native Hawaiian/Pacific Islander' } self.totals_cases_yaml_keys_dict_keys_map = { 'TOTAL_CASES': 'Total Cases', } self.totals_deaths_yaml_keys_dict_keys_map = { 'TOTAL_DEATHS': 'Total Deaths' }
def __init__(self, state: str, county: str, date_string: str): self.state, self.county = state, county self.cases_raw_bool, self.deaths_raw_bool = False, False logging.info("Initialize riverside raw and config file strings") raw_data_dir = os.path.join("states", state, 'counties', county, "raw_data") raw_data_cases_file, raw_data_cases_file_html = f"{raw_data_dir}/{date_string}/riverside_cases", f"{raw_data_dir}/{date_string}/riverside_cases.html" configs_dir = os.path.join("states", state, 'counties', county, "configs") cases_config_file_string = f"{configs_dir}/riverside_cases_json_parser.yaml" logging.info("Load cases and deaths parsing config") json_parser_cases_config = self.load_yaml(cases_config_file_string) logging.info("Get and sort json parsing dates") json_parser_cases_dates = self.get_sorted_dates_from_strings( date_string_list=list(json_parser_cases_config["DATES"].keys())) logging.info( "Obtain valid map of ethnicities to json containing cases or deaths" ) self.date_string = date_string self.cases_valid_date_string = utils_state_lib.get_valid_date_string( date_list=json_parser_cases_dates, date_string=date_string) self.cases_ethnicity_json_keys_map, self.deaths_yaml_keys_dict_keys_map = json_parser_cases_config[ 'DATES'][self.cases_valid_date_string], None self.ethnicity_json_keys_map = self.cases_ethnicity_json_keys_map logging.info("Load raw json data") try: cases_file_obj = open(raw_data_cases_file, 'r') self.cases_raw_bool = True except BaseException: try: cases_file_obj = open(raw_data_cases_file_html, 'r') self.cases_raw_bool = True except BaseException: pass try: self.raw_data_cases_json = json.load(cases_file_obj) except BaseException: pass logging.info( "Define yaml keys to dictionary maps for cases and deaths") self.cases_yaml_keys_dict_keys_map = { 'HISPANIC_LATINO_CASES': 'Hispanic', 'MULTI_RACE_CASES': 'Multi-Race', 'WHITE_CASES': 'White', 'ASIAN_PACIFIC_ISLANDER_CASES': 'Asian/Pacific Islander', 'ASIAN_CASES': 'Asian', 'AMERICAN_INDIAN_ALASKA_NATIVE_CASES': 'American Indian/Alaska Native', 'BLACK_CASES': 'Black', 'NATIVE_HAWAIIAN_PACIFIC_ISLANDER_CASES': 'Native Hawaiian/Pacific Islander' }