def main(fetch: bool):
    common_init.configure_logging()
    log = structlog.get_logger()

    ccd_dataset = ccd_helpers.CovidCountyDataset.load(fetch=fetch)
    all_df = transform(ccd_dataset)

    common_df.write_csv(all_df, OUTPUT_PATH, log)
def main(replace_local_mirror: bool, generate_common_csv: bool):
    common_init.configure_logging()

    if replace_local_mirror:
        update_datasets()

    if generate_common_csv:
        common_df.write_csv(transform_cms_datasets(), TIMESERIES_CSV_PATH,
                            _logger)
def main(fetch: bool):
    common_init.configure_logging()
    log = structlog.get_logger(updater="CovidDataScraperTransformer")
    local_path = DATA_ROOT / "cases-cds" / "timeseries-common.csv"

    transformer = CovidDataScraperTransformer.make_with_data_root(DATA_ROOT, log)
    if fetch:
        transformer.fetch()
    common_df.write_csv(transformer.transform(), local_path, log)
def main(fetch: bool):
    common_init.configure_logging()
    log = structlog.get_logger()
    updater = CovidCareMapUpdater()
    if fetch:
        updater.update()

    df = updater.transform()
    common_df.write_csv(df, STATIC_CSV_PATH, log, [CommonFields.FIPS])
Beispiel #5
0
def main(fetch: bool):
    common_init.configure_logging()
    log = structlog.get_logger()

    TestAndTraceSyncer(
        source_url=SOURCE_URL,
        census_state_path=DATA_ROOT / "misc" / "state.txt",
        gsheets_copy_directory=DATA_ROOT / "test-and-trace" / "gsheet-copy",
        state_timeseries_path=DATA_ROOT / "test-and-trace" / "state_data.csv",
        date_today=date.today(),
    ).update(fetch=fetch, log=log)
Beispiel #6
0
def main(fetch: bool):
    common_init.configure_logging()
    connection = zoltpy.util.authenticate()
    transformer = ForecastHubUpdater.make_with_data_root(
        ForecastModel.ENSEMBLE, connection, DATA_ROOT
    )
    if fetch:
        _logger.info("Fetching new data.")
        transformer.update_source_data()

    data = transformer.load_source_data()
    data = transformer.transform(data)
    common_df.write_csv(data, transformer.timeseries_output_path, _logger)
def main(replace_local_mirror: bool, generate_common_csv: bool):
    logging.basicConfig(level=logging.INFO)
    common_init.configure_logging()

    if replace_local_mirror:
        update_local_json()

    CovidTrackingDataUpdater().update()

    if generate_common_csv:
        common_df.write_csv(
            transform(load_local_json()),
            TIMESERIES_CSV_PATH,
            structlog.get_logger(),
        )
Beispiel #8
0
def main(replace_local_mirror: bool):
    common_init.configure_logging()

    copier = AwsDataLakeCopier.make_with_data_root(DATA_ROOT)
    if replace_local_mirror:
        copier.replace_local_mirror()

    transformer = AwsDataLakeTransformer.make_with_data_root(DATA_ROOT)
    for source_name, source_files in copier.get_sources():
        log = structlog.get_logger(source_name=source_name)
        write_df_as_csv(
            transformer.transform(source_files, log),
            DATA_ROOT / "aws-lake" / f"timeseries-{source_name}.csv",
            log,
        )
def main(check_for_new_data: bool, fetch: bool):
    common_init.configure_logging()
    transformer = NYTimesUpdater.make_with_data_root(DATA_ROOT)

    if check_for_new_data:
        if not transformer.is_new_data_available():
            raise Exception("No new data available")
        _logger.info("New data available")
        return

    if fetch:
        _logger.info("Fetching new data.")
        transformer.update_source_data()

    data = transformer.load_state_and_county_data()
    data = transformer.transform(data)
    common_df.write_csv(data, transformer.timeseries_output_path, _logger)
def main(replace_local_mirror: bool, generate_common_csv: bool):
    common_init.configure_logging()

    if replace_local_mirror:
        update_dataset_csv()

    if generate_common_csv:
        dataset = pd.read_csv(
            DATASET_CSV_PATH,
            parse_dates=[Fields.DATE],
            dtype={Fields.STATE_FIPS: str},
            low_memory=False,
        )

        common_df.write_csv(
            transform(dataset),
            TIMESERIES_CSV_PATH,
            _logger,
        )
Beispiel #11
0
def entry_point(ctx):  # pylint: disable=no-value-for-parameter
    """Entry point for covid-data-model CLI."""
    common_init.configure_logging(command=ctx.invoked_subcommand)

    dataset_cache.set_pickle_cache_dir()
    pandarallel.initialize(progress_bar=False)
Beispiel #12
0
            area, county_names = re.match(r".+Area ([A-Z]) - (.*)[;.]", line).groups()
            counties = county_names.split(", ")
            for county in counties:
                # TODO(chris): Find better way match county to fips.  I believe there are some
                # python packages that do a lot of the heavy lifting.
                if county == "Raines":
                    county = "Rains"
                if county == "Dewitt":
                    county = "DeWitt"

                county = county + " County"
                county_data = census_data.get_county_data(state, county)
                if not county_data:
                    raise CountyNotFoundInCensusData()

                data.append({"fips": county_data["fips"], "state": state, "tsa_region": area})

        return pd.DataFrame(data)


if __name__ == "__main__":
    common_init.configure_logging()
    log = structlog.get_logger()

    transformer = TexasTraumaServiceAreaFipsTransformer.make_with_data_root(DATA_ROOT)
    output_csv = DATA_ROOT / "states" / "tx" / "tx_tsa_region_fips_map.csv"
    output = transformer.transform()
    output.to_csv(output_csv, index=False)

    log.info(f"Successfully wrote TSA -> FIPS map", output_file=str(output_csv))
Beispiel #13
0
def entry_point():
    """Basic entrypoint for cortex subcommands"""
    common_init.configure_logging()
Beispiel #14
0
def entry_point():
    """Basic entrypoint for cortex subcommands"""
    dataset_cache.set_pickle_cache_dir()
    common_init.configure_logging()
Beispiel #15
0
    def update_cds_data(self):
        pd.read_csv(self._CDS_TIMESERIES).to_csv(os.path.join(
            self._CDS_DATA_DIR, "timeseries.csv"),
                                                 index=False)
        # Record the date and time of update in versions.txt
        with open(os.path.join(self._CDS_DATA_DIR, "version.txt"), "w") as log:
            log.write("Updated on {}".format(self._stamp()))

    def update_all_data_files(self):
        self.update_cds_data()
        self.update_jhu_data()


if __name__ == "__main__":
    configure_logging()
    update = CovidDatasetAutoUpdater()
    something_specified = False

    if args.cds:
        logger.info("Updating data from the Corona Data Scraper")
        update.update_cds_data()
        something_specified = True

    if args.jhu:
        logger.info("Updating data from John Hopkins University")
        update.update_jhu_data()
        something_specified = True

    if not something_specified:
        #  If nothing was specified, then we assume that the user wants all datasets updated
def main():
    common_init.configure_logging()
    log = structlog.get_logger()
    df = load_dataset()
    common_df.write_csv(df, CSV_PATH, log, index_names=[CommonFields.FIPS])