def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "COXRAY" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.nodes = { "core_metadata_collection": [], "study": [], "subject": [], "observation": [], "follow_up": [], "demographic": [], "imaging_file": [], }
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "ChestX-ray8" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.cmc_submitter_id = format_submitter_id("cmc_chestxray8", {}) self.core_metadata_collection = [{ "submitter_id": self.cmc_submitter_id, "projects": [{ "code": self.project_code }], }] self.imaging_file = []
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "Com-Mobility" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.expected_file_headers = [ "country_region_code", "country_region", "sub_region_1", "sub_region_2", "metro_area", "iso_3166_2_code", "census_fips_code", "date", "retail_and_recreation_percent_change_from_baseline", "grocery_and_pharmacy_percent_change_from_baseline", "parks_percent_change_from_baseline", "transit_stations_percent_change_from_baseline", "workplaces_percent_change_from_baseline", "residential_percent_change_from_baseline", ] self.summary_locations = [] self.summary_socio_demographics = []
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.subjects = [] self.demographics = [] self.program_name = "controlled" self.project_code = "SSR" self.country = "US" self.state = "IL" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) # self.records = { <node ID>: { <submitter_id: { <data> } } } self.records = defaultdict(dict) # TODO temporary - for now this ETL can only be run manually self.file_path = os.environ.get("FILE_PATH") if not self.file_path: # log instead of exception so that unit tests don't complain print("Need FILE_PATH environment variable (SSR file to parse)")
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "ncbi-covid-19" self.file_helper = AsyncFileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.bucket = "sra-pub-sars-cov2-metadata-us-east-1" self.nodes = { "virus_sequence_contig": ["contigs/contigs.json"], "virus_sequence_peptide": ["peptides/peptides.json"], "virus_sequence_contig_taxonomy": ["taxonomy/taxonomy.json"], "virus_sequence_blastn": [ "blastn/blastn.tsv", "acc\tqacc\tstaxid\tsacc\tslen\tlength\tbitscore\tscore\tpident\tsskingdom\tevalue\tssciname\n", ], "virus_sequence_hmm_search": ["hmmsearch_notc/hmmsearch_notc.json"], "virus_sequence_run_taxonomy": [ "sra_taxonomy/coronaviridae_07_31_2020_000000000000.gz" ], }
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "JHU" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, )
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.clinical_trials = [] self.program_name = "open" self.project_code = "VacTracker" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, )
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "ncbi-covid-19" self.manifest_bucket = "sra-pub-sars-cov2" self.sra_src_manifest = "sra-src/Manifest" self.accession_number_filename_map = {} self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = AsyncFileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.data_file = NCBI_FILE( base_url=self.base_url, s3_bucket=self.project_code, access_token=access_token, ) self.submitting_data = { "sample": [], "virus_sequence": [], "core_metadata_collection": [], "virus_sequence_run_taxonomy": [], "virus_sequence_contig": [], "virus_sequence_blastn": [], "virus_sequence_contig_taxonomy": [], "virus_sequence_peptide": [], "virus_sequence_hmm_search": [], } self.submitting_data["core_metadata_collection"].append({ "submitter_id": format_submitter_id("cmc_ncbi_covid19", {}), "projects": [{ "code": self.project_code }], }) read_ncbi_manifest( self.manifest_bucket, self.sra_src_manifest, self.accession_number_filename_map, )
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "IDPH-Vaccine" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.s3_client = boto3.client("s3")
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.summary_clinicals = [] self.summary_locations = [] self.program_name = "open" self.project_code = "covidstoplight" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, )
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.subjects = [] self.demographics = [] self.observations = [] self.program_name = "open" self.project_code = "DSFSI" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) # structure is # (csv field name, (node type, node field name, type of field)) self.countries_fields = [ ("case_id", ("subject", "submitter_id", str)), ("origin_case_id", (None, None, None)), ("date", ("observation", "reporting_date", normalize_date)), ("age", ("demographic", "age", normalize_age)), ("gender", ("demographic", "gender", normalize_gender)), ("city", ("demographic", "city", str)), ("province/state", ("demographic", "province_state", str)), ("country", ("demographic", "country_region", str)), ( "current_status", ("subject", "tmp_current_status", normalize_current_status), ), ( "source", ("observation", "reporting_source_url", str), ), # type of fields "None" is used to remove the value ("symptoms", ("observation", "symptoms", normalize_symptoms)), ( "date_onset_symptoms", ("observation", "date_onset_symptoms", normalize_date), ), ( "date_admission_hospital", ("observation", "date_admission_hospital", normalize_date), ), ("date_confirmation", ("subject", "date_confirmation", normalize_date)), ("underlying_conditions", (None, None, None)), ("travel_history_dates", ("subject", "travel_history_dates", str)), ("travel_history_location", ("subject", "travel_history_location", str)), ("death_date", ("subject", "deceased_date", normalize_date)), ("notes_for_discussion", (None, None, None)), ]
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "DS4C" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.subjects = [] self.demographics = [] self.observations = []
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "ATLAS" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.nodes = { "summary_location": [], "summary_socio_demographic": [], }
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "IDPH-zipcode" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.country = "US" self.state = "IL" self.summary_locations = [] self.summary_clinicals = []
def main(): headers = {"Authorization": f"bearer {access_token}"} records = get_existing_data(base_url, program, project, old_node, headers) metadata_helper = MetadataHelper( base_url=base_url, program_name=program, project_code=project, access_token=access_token, ) print(f"Submitting {new_node} data") for old_rec in records: new_rec = {"type": new_node, "project_id": f"{program}-{project}"} for key, value in old_rec.items(): if value: new_rec[key] = value metadata_helper.add_record_to_submit(new_rec) metadata_helper.batch_submit_records()
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "IDPH-Vaccine" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.country = "US" self.state = "IL" self.date = "" self.counties_inventory = {} self.summary_locations = {} self.summary_clinicals = {} self.summary_group_demographic = {}
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) # Get all input strings from YAML script = path.splitext(path.basename(__file__))[0].strip("/") script = path.join(CURRENT_DIR, script + ".yaml") with open(script) as f: config = yaml.load(f, Loader=yaml.FullLoader) self.verbose = config["verbose"] self.program_name = config["program_name"] self.project_code = config["project_code"] self.virus_genome_data_category = config["virus_genome_data_category"] self.virus_genome_data_type = config["virus_genome_data_type"] self.virus_genome_data_format = config["virus_genome_data_format"] self.virus_genome_source = config["virus_genome_source"] self.virus_genome_type = config["virus_genome_type"] self.virus_sequence_type = config["virus_sequence_type"] self.virus_sequence_data_type = config["virus_sequence_data_type"] self.virus_sequence_data_format = config["virus_sequence_data_format"] self.virus_sequence_alignment_type = config["virus_sequence_alignment_type"] self.virus_sequence_alignment_data_type = config[ "virus_sequence_alignment_data_type" ] self.virus_sequence_alignment_data_format = config[ "virus_sequence_alignment_data_format" ] self.virus_sequence_alignment_tool = config["virus_sequence_alignment_tool"] self.virus_sequence_hmm_type = config["virus_sequence_hmm_type"] self.virus_sequence_hmm_data_type = config["virus_sequence_hmm_data_type"] self.virus_sequence_hmm_data_format = config["virus_sequence_hmm_data_format"] self.virus_genomes = [] self.virus_sequences = [] self.virus_sequence_alignments = [] self.virus_sequence_hmms = [] self.metadata_helper = MetadataHelper( base_url=base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, )
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.manifest_bucket = "sra-pub-sars-cov2" self.sra_src_manifest = "sra-src/Manifest" self.program_name = "open" self.project_code = "ncbi-covid-19" self.token = access_token self.last_submission_identifier = None self.file_helper = AsyncFileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, )
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.location_data = {} self.time_series_data = defaultdict(lambda: defaultdict(dict)) self.program_name = "open" self.project_code = "JHU" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.expected_csv_headers = { "global": ["Province/State", "Country/Region", "Lat", "Long", "1/22/20"], "US_counties": { "confirmed": [ "UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Province_State", "Country_Region", "Lat", "Long_", "Combined_Key", "1/22/20", ], "deaths": [ "UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Province_State", "Country_Region", "Lat", "Long_", "Combined_Key", "Population", # TODO use this "1/22/20", ], }, } self.header_to_column = { "global": { "province": 0, "country": 1, "latitude": 2, "longitude": 3, "dates_start": 4, }, "US_counties": { "confirmed": { "iso2": 1, "iso3": 2, "code3": 3, "FIPS": 4, "county": 5, "province": 6, "country": 7, "latitude": 8, "longitude": 9, "dates_start": 11, }, "deaths": { "iso2": 1, "iso3": 2, "code3": 3, "FIPS": 4, "county": 5, "province": 6, "country": 7, "latitude": 8, "longitude": 9, "dates_start": 12, }, }, } self.existing_summary_locations = [] self.last_date = ""
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.summary_locations = [] self.summary_clinicals = [] self.header_to_column = {} self.program_name = "open" self.project_code = "CTP" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.expected_file_headers = set([ "date", "state", "positive", "negative", "pending", "totalTestResults", "hospitalizedCurrently", "hospitalizedCumulative", "inIcuCurrently", "inIcuCumulative", "onVentilatorCurrently", "onVentilatorCumulative", "recovered", "dataQualityGrade", "lastUpdateEt", "dateModified", "checkTimeEt", "death", "hospitalized", "dateChecked", "totalTestsViral", "positiveTestsViral", "negativeTestsViral", "positiveCasesViral", "deathConfirmed", "deathProbable", "totalTestEncountersViral", "totalTestsPeopleViral", "totalTestsAntibody", "positiveTestsAntibody", "negativeTestsAntibody", "totalTestsPeopleAntibody", "positiveTestsPeopleAntibody", "negativeTestsPeopleAntibody", "totalTestsPeopleAntigen", "positiveTestsPeopleAntigen", "totalTestsAntigen", "positiveTestsAntigen", "fips", "positiveIncrease", "negativeIncrease", "total", "totalTestResultsSource", "totalTestResultsIncrease", "posNeg", "deathIncrease", "hospitalizedIncrease", "hash", "commercialScore", "negativeRegularScore", "negativeScore", "positiveScore", "score", "grade", ]) self.expected_race_headers = set([ "Date", "State", "Cases_Total", "Cases_White", "Cases_Black", "Cases_Latinx", "Cases_Asian", "Cases_AIAN", "Cases_NHPI", "Cases_Multiracial", "Cases_Other", "Cases_Unknown", "Cases_Ethnicity_Hispanic", "Cases_Ethnicity_NonHispanic", "Cases_Ethnicity_Unknown", "Deaths_Total", "Deaths_White", "Deaths_Black", "Deaths_Latinx", "Deaths_Asian", "Deaths_AIAN", "Deaths_NHPI", "Deaths_Multiracial", "Deaths_Other", "Deaths_Unknown", "Deaths_Ethnicity_Hispanic", "Deaths_Ethnicity_NonHispanic", "Deaths_Ethnicity_Unknown", ])
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.summary_locations = [] self.summary_clinicals = [] self.program_name = "open" self.project_code = "OWID" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) # structure is # (csv field name, (node type, node field name, type of field)) testing_fields = [ ("ISO code", ("summary_location", "iso3", str)), ("Entity", (None, None, split_entity)), ("Date", ("summary_clinical", "date", str)), ("Source URL", ("summary_clinical", "source_url", str)), ("Source label", ("summary_clinical", "source_label", str)), ("Notes", ("summary_clinical", "notes", str)), ("Number of observations", ("summary_clinical", "num_observations", int)), ("Cumulative total", ("summary_clinical", "testing", int)), ( "Cumulative total per thousand", ("summary_clinical", "cumulative_total_per_thousand", int), ), ( "Daily change in cumulative total", ("summary_clinical", "daily_change_in_cumulative_total", int), ), ( "Daily change in cumulative total per thousand", ( "summary_clinical", "daily_change_in_cumulative_total_per_thousand", int, ), ), ( "7-day smoothed daily change", ("summary_clinical", "seven_day_smoothed_daily_change", int), ), ( "7-day smoothed daily change per thousand", ( "summary_clinical", "seven_day_smoothed_daily_change_per_thousand", float, ), ), ("Short-term positive rate", (None, None, None)), ("Short-term tests per case", (None, None, None)), ("General source label", ("summary_clinical", "general_source_label", str)), ("General source URL", ("summary_clinical", "general_source_url", str)), ("Short description", ("summary_clinical", "short_description", str)), ("Detailed description", ("summary_clinical", "detailed_description", str)), ] self.headers_mapping = { field: (k, mapping) for k, (field, mapping) in enumerate(testing_fields) }
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.summary_locations = [] self.summary_clinicals = [] self.summary_socio_demographics = [] self.program_name = "open" self.project_code = "CCMap" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) # structure is # (csv field name, (node type, node field name, type of field)) county_fields = [ ("fips_code", ("summary_location", "FIPS", int)), ("State", ("summary_location", "province_state", str)), ("County Name", ("summary_location", "county", str)), ("Staffed All Beds", ("summary_clinical", "staffed_all_beds", int)), ("Staffed ICU Beds", ("summary_clinical", "staffed_icu_beds", int)), ("Licensed All Beds", ("summary_clinical", "licensed_all_beds", int)), ( "All Bed Occupancy Rate", ("summary_clinical", "all_bed_occupancy_rate", float), ), ( "ICU Bed Occupancy Rate", ("summary_clinical", "icu_bed_occupancy_rate", float), ), ("Population", ("summary_clinical", "population", int)), ("Population (20+)", ("summary_clinical", "population_gtr_20", int)), ("Population (65+)", ("summary_clinical", "population_gtr_65", int)), ( "Staffed All Beds [Per 1000 People]", ("summary_clinical", "staffed_all_beds_per_1000", float), ), ( "Staffed All Beds [Per 1000 Adults (20+)]", ("summary_clinical", "staffed_all_beds_per_1000_gtr_20", float), ), ( "Staffed All Beds [Per 1000 Elderly (65+)]", ("summary_clinical", "staffed_all_beds_per_1000_gtr_65", float), ), ( "Staffed ICU Beds [Per 1000 People]", ("summary_clinical", "staffed_icu_beds_per_1000", float), ), ( "Staffed ICU Beds [Per 1000 Adults (20+)]", ("summary_clinical", "staffed_icu_beds_per_1000_gtr_20", float), ), ( "Staffed ICU Beds [Per 1000 Elderly (65+)]", ("summary_clinical", "staffed_icu_beds_per_1000_gtr_65", float), ), ( "Licensed All Beds [Per 1000 People]", ("summary_clinical", "licensed_all_beds_per_1000", float), ), ( "Licensed All Beds [Per 1000 Adults (20+)]", ("summary_clinical", "licensed_all_beds_per_1000_gtr_20", float), ), ( "Licensed All Beds [Per 1000 Elderly (65+)]", ("summary_clinical", "licensed_all_beds_per_1000_gtr_65", float), ), ] state_fields = [ ("State", ("summary_location", None, int)), ("State Name", ("summary_location", "province_state", str)), ("Staffed All Beds", ("summary_clinical", "staffed_all_beds", int)), ("Staffed ICU Beds", ("summary_clinical", "staffed_icu_beds", int)), ("Licensed All Beds", ("summary_clinical", "licensed_all_beds", int)), ( "All Bed Occupancy Rate", ("summary_clinical", "all_bed_occupancy_rate", float), ), ( "ICU Bed Occupancy Rate", ("summary_clinical", "icu_bed_occupancy_rate", float), ), ("Population", ("summary_clinical", "population", int)), ( "Population (20+)", ("summary_socio_demographic", "population_gtr_20", int), ), ( "Population (65+)", ("summary_socio_demographic", "population_gtr_65", int), ), ( "Staffed All Beds [Per 1000 People]", ("summary_clinical", "staffed_all_beds_per_1000", float), ), ( "Staffed All Beds [Per 1000 Adults (20+)]", ("summary_clinical", "staffed_all_beds_per_1000_gtr_20", float), ), ( "Staffed All Beds [Per 1000 Elderly (65+)]", ("summary_clinical", "staffed_all_beds_per_1000_gtr_65", float), ), ( "Staffed ICU Beds [Per 1000 People]", ("summary_clinical", "staffed_icu_beds_per_1000", float), ), ( "Staffed ICU Beds [Per 1000 Adults (20+)]", ("summary_clinical", "staffed_icu_beds_per_1000_gtr_20", float), ), ( "Staffed ICU Beds [Per 1000 Elderly (65+)]", ("summary_clinical", "staffed_icu_beds_per_1000_gtr_65", float), ), ( "Licensed All Beds [Per 1000 People]", ("summary_clinical", "licensed_all_beds_per_1000", float), ), ( "Licensed All Beds [Per 1000 Adults (20+)]", ("summary_clinical", "licensed_all_beds_per_1000_gtr_20", float), ), ( "Licensed All Beds [Per 1000 Elderly (65+)]", ("summary_clinical", "licensed_all_beds_per_1000_gtr_65", float), ), ( "Estimated No. Full-Featured Mechanical Ventilators (2010 study estimate)", ("summary_clinical", "estimated_full_mech_ventilators", int), ), ( "Estimated No. Full-Featured Mechanical Ventilators per 100,000 Population (2010 study estimate)", ( "summary_clinical", "estimated_full_mech_ventilators_per_100000", float, ), ), ( "Estimated No. Pediatrics-Capable Full-Feature Mechanical Ventilators (2010 study estimate)", ("summary_clinical", "estimated_full_mech_pediatric_ventilators", int), ), ( "Estimated No. Full-Feature Mechanical Ventilators, Pediatrics Capable per 100,000 Population <14 y (2010 study estimate)", ( "summary_clinical", "estimated_full_mech_pediatric_ventilators_per_100000", float, ), ), ] self.headers_mapping = { "county": {field: mapping for field, mapping in county_fields}, "state": {field: mapping for field, mapping in state_fields}, }
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.summary_locations = [] self.summary_clinicals = [] self.summary_socio_demographics = [] self.program_name = "open" self.project_code = "OWID" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.expected_csv_headers = [ "iso_code", "continent", "location", "date", "total_cases", "new_cases", "new_cases_smoothed", "new_deaths", "new_deaths_smoothed", "total_cases_per_million", "new_cases_per_million", "new_cases_smoothed_per_million", "total_deaths_per_million", "new_deaths_per_million", "new_deaths_smoothed_per_million", "new_tests", "total_tests", "total_tests_per_thousand", "new_tests_per_thousand", "new_tests_smoothed", "new_tests_smoothed_per_thousand", "tests_per_case", "positive_rate", "tests_units", "stringency_index", "population", "population_density", "median_age", "aged_65_older", "aged_70_older", "gdp_per_capita", "extreme_poverty", "cardiovasc_death_rate", "diabetes_prevalence", "female_smokers", "male_smokers", "handwashing_facilities", "hospital_beds_per_thousand", "life_expectancy", ] self.header_to_column = { k: self.expected_csv_headers.index(k) for k in self.expected_csv_headers }