Ejemplo n.º 1
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "COXRAY"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.nodes = {
            "core_metadata_collection": [],
            "study": [],
            "subject": [],
            "observation": [],
            "follow_up": [],
            "demographic": [],
            "imaging_file": [],
        }
Ejemplo n.º 2
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "ChestX-ray8"

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = FileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.cmc_submitter_id = format_submitter_id("cmc_chestxray8", {})
        self.core_metadata_collection = [{
            "submitter_id":
            self.cmc_submitter_id,
            "projects": [{
                "code": self.project_code
            }],
        }]
        self.imaging_file = []
Ejemplo n.º 3
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "Com-Mobility"

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.expected_file_headers = [
            "country_region_code",
            "country_region",
            "sub_region_1",
            "sub_region_2",
            "metro_area",
            "iso_3166_2_code",
            "census_fips_code",
            "date",
            "retail_and_recreation_percent_change_from_baseline",
            "grocery_and_pharmacy_percent_change_from_baseline",
            "parks_percent_change_from_baseline",
            "transit_stations_percent_change_from_baseline",
            "workplaces_percent_change_from_baseline",
            "residential_percent_change_from_baseline",
        ]

        self.summary_locations = []
        self.summary_socio_demographics = []
Ejemplo n.º 4
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)
        self.subjects = []
        self.demographics = []

        self.program_name = "controlled"
        self.project_code = "SSR"
        self.country = "US"
        self.state = "IL"

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        # self.records = { <node ID>: { <submitter_id: { <data> } } }
        self.records = defaultdict(dict)

        # TODO temporary - for now this ETL can only be run manually
        self.file_path = os.environ.get("FILE_PATH")
        if not self.file_path:
            # log instead of exception so that unit tests don't complain
            print("Need FILE_PATH environment variable (SSR file to parse)")
Ejemplo n.º 5
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "ncbi-covid-19"

        self.file_helper = AsyncFileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.bucket = "sra-pub-sars-cov2-metadata-us-east-1"
        self.nodes = {
            "virus_sequence_contig": ["contigs/contigs.json"],
            "virus_sequence_peptide": ["peptides/peptides.json"],
            "virus_sequence_contig_taxonomy": ["taxonomy/taxonomy.json"],
            "virus_sequence_blastn": [
                "blastn/blastn.tsv",
                "acc\tqacc\tstaxid\tsacc\tslen\tlength\tbitscore\tscore\tpident\tsskingdom\tevalue\tssciname\n",
            ],
            "virus_sequence_hmm_search": ["hmmsearch_notc/hmmsearch_notc.json"],
            "virus_sequence_run_taxonomy": [
                "sra_taxonomy/coronaviridae_07_31_2020_000000000000.gz"
            ],
        }
Ejemplo n.º 6
0
 def __init__(self, base_url, access_token, s3_bucket):
     super().__init__(base_url, access_token, s3_bucket)
     self.program_name = "open"
     self.project_code = "JHU"
     self.metadata_helper = MetadataHelper(
         base_url=self.base_url,
         program_name=self.program_name,
         project_code=self.project_code,
         access_token=access_token,
     )
Ejemplo n.º 7
0
 def __init__(self, base_url, access_token, s3_bucket):
     super().__init__(base_url, access_token, s3_bucket)
     self.clinical_trials = []
     self.program_name = "open"
     self.project_code = "VacTracker"
     self.metadata_helper = MetadataHelper(
         base_url=self.base_url,
         program_name=self.program_name,
         project_code=self.project_code,
         access_token=access_token,
     )
Ejemplo n.º 8
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "ncbi-covid-19"
        self.manifest_bucket = "sra-pub-sars-cov2"
        self.sra_src_manifest = "sra-src/Manifest"
        self.accession_number_filename_map = {}

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.file_helper = AsyncFileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.data_file = NCBI_FILE(
            base_url=self.base_url,
            s3_bucket=self.project_code,
            access_token=access_token,
        )

        self.submitting_data = {
            "sample": [],
            "virus_sequence": [],
            "core_metadata_collection": [],
            "virus_sequence_run_taxonomy": [],
            "virus_sequence_contig": [],
            "virus_sequence_blastn": [],
            "virus_sequence_contig_taxonomy": [],
            "virus_sequence_peptide": [],
            "virus_sequence_hmm_search": [],
        }

        self.submitting_data["core_metadata_collection"].append({
            "submitter_id":
            format_submitter_id("cmc_ncbi_covid19", {}),
            "projects": [{
                "code": self.project_code
            }],
        })

        read_ncbi_manifest(
            self.manifest_bucket,
            self.sra_src_manifest,
            self.accession_number_filename_map,
        )
Ejemplo n.º 9
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "IDPH-Vaccine"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )
        self.s3_client = boto3.client("s3")
Ejemplo n.º 10
0
 def __init__(self, base_url, access_token, s3_bucket):
     super().__init__(base_url, access_token, s3_bucket)
     self.summary_clinicals = []
     self.summary_locations = []
     self.program_name = "open"
     self.project_code = "covidstoplight"
     self.metadata_helper = MetadataHelper(
         base_url=self.base_url,
         program_name=self.program_name,
         project_code=self.project_code,
         access_token=access_token,
     )
Ejemplo n.º 11
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)
        self.subjects = []
        self.demographics = []
        self.observations = []

        self.program_name = "open"
        self.project_code = "DSFSI"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        # structure is
        # (csv field name, (node type, node field name, type of field))
        self.countries_fields = [
            ("case_id", ("subject", "submitter_id", str)),
            ("origin_case_id", (None, None, None)),
            ("date", ("observation", "reporting_date", normalize_date)),
            ("age", ("demographic", "age", normalize_age)),
            ("gender", ("demographic", "gender", normalize_gender)),
            ("city", ("demographic", "city", str)),
            ("province/state", ("demographic", "province_state", str)),
            ("country", ("demographic", "country_region", str)),
            (
                "current_status",
                ("subject", "tmp_current_status", normalize_current_status),
            ),
            (
                "source",
                ("observation", "reporting_source_url", str),
            ),  # type of fields "None" is used to remove the value
            ("symptoms", ("observation", "symptoms", normalize_symptoms)),
            (
                "date_onset_symptoms",
                ("observation", "date_onset_symptoms", normalize_date),
            ),
            (
                "date_admission_hospital",
                ("observation", "date_admission_hospital", normalize_date),
            ),
            ("date_confirmation", ("subject", "date_confirmation", normalize_date)),
            ("underlying_conditions", (None, None, None)),
            ("travel_history_dates", ("subject", "travel_history_dates", str)),
            ("travel_history_location", ("subject", "travel_history_location", str)),
            ("death_date", ("subject", "deceased_date", normalize_date)),
            ("notes_for_discussion", (None, None, None)),
        ]
Ejemplo n.º 12
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "DS4C"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.subjects = []
        self.demographics = []
        self.observations = []
Ejemplo n.º 13
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "ATLAS"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.nodes = {
            "summary_location": [],
            "summary_socio_demographic": [],
        }
Ejemplo n.º 14
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "IDPH-zipcode"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.country = "US"
        self.state = "IL"

        self.summary_locations = []
        self.summary_clinicals = []
Ejemplo n.º 15
0
def main():
    headers = {"Authorization": f"bearer {access_token}"}
    records = get_existing_data(base_url, program, project, old_node, headers)

    metadata_helper = MetadataHelper(
        base_url=base_url,
        program_name=program,
        project_code=project,
        access_token=access_token,
    )
    print(f"Submitting {new_node} data")
    for old_rec in records:
        new_rec = {"type": new_node, "project_id": f"{program}-{project}"}
        for key, value in old_rec.items():
            if value:
                new_rec[key] = value
        metadata_helper.add_record_to_submit(new_rec)
    metadata_helper.batch_submit_records()
Ejemplo n.º 16
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.program_name = "open"
        self.project_code = "IDPH-Vaccine"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )
        self.country = "US"
        self.state = "IL"
        self.date = ""
        self.counties_inventory = {}

        self.summary_locations = {}
        self.summary_clinicals = {}
        self.summary_group_demographic = {}
Ejemplo n.º 17
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        # Get all input strings from YAML
        script = path.splitext(path.basename(__file__))[0].strip("/")
        script = path.join(CURRENT_DIR, script + ".yaml")
        with open(script) as f:
            config = yaml.load(f, Loader=yaml.FullLoader)

        self.verbose = config["verbose"]
        self.program_name = config["program_name"]
        self.project_code = config["project_code"]
        self.virus_genome_data_category = config["virus_genome_data_category"]
        self.virus_genome_data_type = config["virus_genome_data_type"]
        self.virus_genome_data_format = config["virus_genome_data_format"]
        self.virus_genome_source = config["virus_genome_source"]
        self.virus_genome_type = config["virus_genome_type"]
        self.virus_sequence_type = config["virus_sequence_type"]
        self.virus_sequence_data_type = config["virus_sequence_data_type"]
        self.virus_sequence_data_format = config["virus_sequence_data_format"]
        self.virus_sequence_alignment_type = config["virus_sequence_alignment_type"]
        self.virus_sequence_alignment_data_type = config[
            "virus_sequence_alignment_data_type"
        ]
        self.virus_sequence_alignment_data_format = config[
            "virus_sequence_alignment_data_format"
        ]
        self.virus_sequence_alignment_tool = config["virus_sequence_alignment_tool"]
        self.virus_sequence_hmm_type = config["virus_sequence_hmm_type"]
        self.virus_sequence_hmm_data_type = config["virus_sequence_hmm_data_type"]
        self.virus_sequence_hmm_data_format = config["virus_sequence_hmm_data_format"]
        self.virus_genomes = []
        self.virus_sequences = []
        self.virus_sequence_alignments = []
        self.virus_sequence_hmms = []

        self.metadata_helper = MetadataHelper(
            base_url=base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )
Ejemplo n.º 18
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)

        self.manifest_bucket = "sra-pub-sars-cov2"
        self.sra_src_manifest = "sra-src/Manifest"
        self.program_name = "open"
        self.project_code = "ncbi-covid-19"
        self.token = access_token
        self.last_submission_identifier = None

        self.file_helper = AsyncFileHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )
Ejemplo n.º 19
0
 def __init__(self, base_url, access_token, s3_bucket):
     super().__init__(base_url, access_token, s3_bucket)
     self.location_data = {}
     self.time_series_data = defaultdict(lambda: defaultdict(dict))
     self.program_name = "open"
     self.project_code = "JHU"
     self.metadata_helper = MetadataHelper(
         base_url=self.base_url,
         program_name=self.program_name,
         project_code=self.project_code,
         access_token=access_token,
     )
     self.expected_csv_headers = {
         "global":
         ["Province/State", "Country/Region", "Lat", "Long", "1/22/20"],
         "US_counties": {
             "confirmed": [
                 "UID",
                 "iso2",
                 "iso3",
                 "code3",
                 "FIPS",
                 "Admin2",
                 "Province_State",
                 "Country_Region",
                 "Lat",
                 "Long_",
                 "Combined_Key",
                 "1/22/20",
             ],
             "deaths": [
                 "UID",
                 "iso2",
                 "iso3",
                 "code3",
                 "FIPS",
                 "Admin2",
                 "Province_State",
                 "Country_Region",
                 "Lat",
                 "Long_",
                 "Combined_Key",
                 "Population",  # TODO use this
                 "1/22/20",
             ],
         },
     }
     self.header_to_column = {
         "global": {
             "province": 0,
             "country": 1,
             "latitude": 2,
             "longitude": 3,
             "dates_start": 4,
         },
         "US_counties": {
             "confirmed": {
                 "iso2": 1,
                 "iso3": 2,
                 "code3": 3,
                 "FIPS": 4,
                 "county": 5,
                 "province": 6,
                 "country": 7,
                 "latitude": 8,
                 "longitude": 9,
                 "dates_start": 11,
             },
             "deaths": {
                 "iso2": 1,
                 "iso3": 2,
                 "code3": 3,
                 "FIPS": 4,
                 "county": 5,
                 "province": 6,
                 "country": 7,
                 "latitude": 8,
                 "longitude": 9,
                 "dates_start": 12,
             },
         },
     }
     self.existing_summary_locations = []
     self.last_date = ""
Ejemplo n.º 20
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)
        self.summary_locations = []
        self.summary_clinicals = []
        self.header_to_column = {}

        self.program_name = "open"
        self.project_code = "CTP"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.expected_file_headers = set([
            "date",
            "state",
            "positive",
            "negative",
            "pending",
            "totalTestResults",
            "hospitalizedCurrently",
            "hospitalizedCumulative",
            "inIcuCurrently",
            "inIcuCumulative",
            "onVentilatorCurrently",
            "onVentilatorCumulative",
            "recovered",
            "dataQualityGrade",
            "lastUpdateEt",
            "dateModified",
            "checkTimeEt",
            "death",
            "hospitalized",
            "dateChecked",
            "totalTestsViral",
            "positiveTestsViral",
            "negativeTestsViral",
            "positiveCasesViral",
            "deathConfirmed",
            "deathProbable",
            "totalTestEncountersViral",
            "totalTestsPeopleViral",
            "totalTestsAntibody",
            "positiveTestsAntibody",
            "negativeTestsAntibody",
            "totalTestsPeopleAntibody",
            "positiveTestsPeopleAntibody",
            "negativeTestsPeopleAntibody",
            "totalTestsPeopleAntigen",
            "positiveTestsPeopleAntigen",
            "totalTestsAntigen",
            "positiveTestsAntigen",
            "fips",
            "positiveIncrease",
            "negativeIncrease",
            "total",
            "totalTestResultsSource",
            "totalTestResultsIncrease",
            "posNeg",
            "deathIncrease",
            "hospitalizedIncrease",
            "hash",
            "commercialScore",
            "negativeRegularScore",
            "negativeScore",
            "positiveScore",
            "score",
            "grade",
        ])

        self.expected_race_headers = set([
            "Date",
            "State",
            "Cases_Total",
            "Cases_White",
            "Cases_Black",
            "Cases_Latinx",
            "Cases_Asian",
            "Cases_AIAN",
            "Cases_NHPI",
            "Cases_Multiracial",
            "Cases_Other",
            "Cases_Unknown",
            "Cases_Ethnicity_Hispanic",
            "Cases_Ethnicity_NonHispanic",
            "Cases_Ethnicity_Unknown",
            "Deaths_Total",
            "Deaths_White",
            "Deaths_Black",
            "Deaths_Latinx",
            "Deaths_Asian",
            "Deaths_AIAN",
            "Deaths_NHPI",
            "Deaths_Multiracial",
            "Deaths_Other",
            "Deaths_Unknown",
            "Deaths_Ethnicity_Hispanic",
            "Deaths_Ethnicity_NonHispanic",
            "Deaths_Ethnicity_Unknown",
        ])
Ejemplo n.º 21
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)
        self.summary_locations = []
        self.summary_clinicals = []

        self.program_name = "open"
        self.project_code = "OWID"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        # structure is
        # (csv field name, (node type, node field name, type of field))
        testing_fields = [
            ("ISO code", ("summary_location", "iso3", str)),
            ("Entity", (None, None, split_entity)),
            ("Date", ("summary_clinical", "date", str)),
            ("Source URL", ("summary_clinical", "source_url", str)),
            ("Source label", ("summary_clinical", "source_label", str)),
            ("Notes", ("summary_clinical", "notes", str)),
            ("Number of observations", ("summary_clinical", "num_observations",
                                        int)),
            ("Cumulative total", ("summary_clinical", "testing", int)),
            (
                "Cumulative total per thousand",
                ("summary_clinical", "cumulative_total_per_thousand", int),
            ),
            (
                "Daily change in cumulative total",
                ("summary_clinical", "daily_change_in_cumulative_total", int),
            ),
            (
                "Daily change in cumulative total per thousand",
                (
                    "summary_clinical",
                    "daily_change_in_cumulative_total_per_thousand",
                    int,
                ),
            ),
            (
                "7-day smoothed daily change",
                ("summary_clinical", "seven_day_smoothed_daily_change", int),
            ),
            (
                "7-day smoothed daily change per thousand",
                (
                    "summary_clinical",
                    "seven_day_smoothed_daily_change_per_thousand",
                    float,
                ),
            ),
            ("Short-term positive rate", (None, None, None)),
            ("Short-term tests per case", (None, None, None)),
            ("General source label", ("summary_clinical",
                                      "general_source_label", str)),
            ("General source URL", ("summary_clinical", "general_source_url",
                                    str)),
            ("Short description", ("summary_clinical", "short_description",
                                   str)),
            ("Detailed description", ("summary_clinical",
                                      "detailed_description", str)),
        ]

        self.headers_mapping = {
            field: (k, mapping)
            for k, (field, mapping) in enumerate(testing_fields)
        }
Ejemplo n.º 22
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)
        self.summary_locations = []
        self.summary_clinicals = []
        self.summary_socio_demographics = []

        self.program_name = "open"
        self.project_code = "CCMap"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        # structure is
        # (csv field name, (node type, node field name, type of field))
        county_fields = [
            ("fips_code", ("summary_location", "FIPS", int)),
            ("State", ("summary_location", "province_state", str)),
            ("County Name", ("summary_location", "county", str)),
            ("Staffed All Beds", ("summary_clinical", "staffed_all_beds",
                                  int)),
            ("Staffed ICU Beds", ("summary_clinical", "staffed_icu_beds",
                                  int)),
            ("Licensed All Beds", ("summary_clinical", "licensed_all_beds",
                                   int)),
            (
                "All Bed Occupancy Rate",
                ("summary_clinical", "all_bed_occupancy_rate", float),
            ),
            (
                "ICU Bed Occupancy Rate",
                ("summary_clinical", "icu_bed_occupancy_rate", float),
            ),
            ("Population", ("summary_clinical", "population", int)),
            ("Population (20+)", ("summary_clinical", "population_gtr_20",
                                  int)),
            ("Population (65+)", ("summary_clinical", "population_gtr_65",
                                  int)),
            (
                "Staffed All Beds [Per 1000 People]",
                ("summary_clinical", "staffed_all_beds_per_1000", float),
            ),
            (
                "Staffed All Beds [Per 1000 Adults (20+)]",
                ("summary_clinical", "staffed_all_beds_per_1000_gtr_20",
                 float),
            ),
            (
                "Staffed All Beds [Per 1000 Elderly (65+)]",
                ("summary_clinical", "staffed_all_beds_per_1000_gtr_65",
                 float),
            ),
            (
                "Staffed ICU Beds [Per 1000 People]",
                ("summary_clinical", "staffed_icu_beds_per_1000", float),
            ),
            (
                "Staffed ICU Beds [Per 1000 Adults (20+)]",
                ("summary_clinical", "staffed_icu_beds_per_1000_gtr_20",
                 float),
            ),
            (
                "Staffed ICU Beds [Per 1000 Elderly (65+)]",
                ("summary_clinical", "staffed_icu_beds_per_1000_gtr_65",
                 float),
            ),
            (
                "Licensed All Beds [Per 1000 People]",
                ("summary_clinical", "licensed_all_beds_per_1000", float),
            ),
            (
                "Licensed All Beds [Per 1000 Adults (20+)]",
                ("summary_clinical", "licensed_all_beds_per_1000_gtr_20",
                 float),
            ),
            (
                "Licensed All Beds [Per 1000 Elderly (65+)]",
                ("summary_clinical", "licensed_all_beds_per_1000_gtr_65",
                 float),
            ),
        ]

        state_fields = [
            ("State", ("summary_location", None, int)),
            ("State Name", ("summary_location", "province_state", str)),
            ("Staffed All Beds", ("summary_clinical", "staffed_all_beds",
                                  int)),
            ("Staffed ICU Beds", ("summary_clinical", "staffed_icu_beds",
                                  int)),
            ("Licensed All Beds", ("summary_clinical", "licensed_all_beds",
                                   int)),
            (
                "All Bed Occupancy Rate",
                ("summary_clinical", "all_bed_occupancy_rate", float),
            ),
            (
                "ICU Bed Occupancy Rate",
                ("summary_clinical", "icu_bed_occupancy_rate", float),
            ),
            ("Population", ("summary_clinical", "population", int)),
            (
                "Population (20+)",
                ("summary_socio_demographic", "population_gtr_20", int),
            ),
            (
                "Population (65+)",
                ("summary_socio_demographic", "population_gtr_65", int),
            ),
            (
                "Staffed All Beds [Per 1000 People]",
                ("summary_clinical", "staffed_all_beds_per_1000", float),
            ),
            (
                "Staffed All Beds [Per 1000 Adults (20+)]",
                ("summary_clinical", "staffed_all_beds_per_1000_gtr_20",
                 float),
            ),
            (
                "Staffed All Beds [Per 1000 Elderly (65+)]",
                ("summary_clinical", "staffed_all_beds_per_1000_gtr_65",
                 float),
            ),
            (
                "Staffed ICU Beds [Per 1000 People]",
                ("summary_clinical", "staffed_icu_beds_per_1000", float),
            ),
            (
                "Staffed ICU Beds [Per 1000 Adults (20+)]",
                ("summary_clinical", "staffed_icu_beds_per_1000_gtr_20",
                 float),
            ),
            (
                "Staffed ICU Beds [Per 1000 Elderly (65+)]",
                ("summary_clinical", "staffed_icu_beds_per_1000_gtr_65",
                 float),
            ),
            (
                "Licensed All Beds [Per 1000 People]",
                ("summary_clinical", "licensed_all_beds_per_1000", float),
            ),
            (
                "Licensed All Beds [Per 1000 Adults (20+)]",
                ("summary_clinical", "licensed_all_beds_per_1000_gtr_20",
                 float),
            ),
            (
                "Licensed All Beds [Per 1000 Elderly (65+)]",
                ("summary_clinical", "licensed_all_beds_per_1000_gtr_65",
                 float),
            ),
            (
                "Estimated No. Full-Featured Mechanical Ventilators (2010 study estimate)",
                ("summary_clinical", "estimated_full_mech_ventilators", int),
            ),
            (
                "Estimated No. Full-Featured Mechanical Ventilators per 100,000 Population (2010 study estimate)",
                (
                    "summary_clinical",
                    "estimated_full_mech_ventilators_per_100000",
                    float,
                ),
            ),
            (
                "Estimated No. Pediatrics-Capable Full-Feature Mechanical Ventilators (2010 study estimate)",
                ("summary_clinical",
                 "estimated_full_mech_pediatric_ventilators", int),
            ),
            (
                "Estimated No. Full-Feature Mechanical Ventilators, Pediatrics Capable per 100,000 Population <14 y (2010 study estimate)",
                (
                    "summary_clinical",
                    "estimated_full_mech_pediatric_ventilators_per_100000",
                    float,
                ),
            ),
        ]

        self.headers_mapping = {
            "county": {field: mapping
                       for field, mapping in county_fields},
            "state": {field: mapping
                      for field, mapping in state_fields},
        }
Ejemplo n.º 23
0
    def __init__(self, base_url, access_token, s3_bucket):
        super().__init__(base_url, access_token, s3_bucket)
        self.summary_locations = []
        self.summary_clinicals = []
        self.summary_socio_demographics = []

        self.program_name = "open"
        self.project_code = "OWID"
        self.metadata_helper = MetadataHelper(
            base_url=self.base_url,
            program_name=self.program_name,
            project_code=self.project_code,
            access_token=access_token,
        )

        self.expected_csv_headers = [
            "iso_code",
            "continent",
            "location",
            "date",
            "total_cases",
            "new_cases",
            "new_cases_smoothed",
            "new_deaths",
            "new_deaths_smoothed",
            "total_cases_per_million",
            "new_cases_per_million",
            "new_cases_smoothed_per_million",
            "total_deaths_per_million",
            "new_deaths_per_million",
            "new_deaths_smoothed_per_million",
            "new_tests",
            "total_tests",
            "total_tests_per_thousand",
            "new_tests_per_thousand",
            "new_tests_smoothed",
            "new_tests_smoothed_per_thousand",
            "tests_per_case",
            "positive_rate",
            "tests_units",
            "stringency_index",
            "population",
            "population_density",
            "median_age",
            "aged_65_older",
            "aged_70_older",
            "gdp_per_capita",
            "extreme_poverty",
            "cardiovasc_death_rate",
            "diabetes_prevalence",
            "female_smokers",
            "male_smokers",
            "handwashing_facilities",
            "hospital_beds_per_thousand",
            "life_expectancy",
        ]

        self.header_to_column = {
            k: self.expected_csv_headers.index(k)
            for k in self.expected_csv_headers
        }