Ejemplo n.º 1
0
def main(csv_path: str = SENSOR_CSV_PATH) -> None:
    """
    Parse all files in a given directory and insert them into the sensor table in the database.

    For all the files found recursively in csv_path that match the naming scheme specified by
    CsvImporter.find_csv_files(), attempt to load and insert them into the database. Files which do
    not match the naming scheme will be moved to an archive/failed folder and skipped, and files
    which raise an error during loading/uploading will be moved to the archive/failed folder and
    have the error raised.

    Parameters
    ----------
    csv_path
        Path to folder containing files to load.

    Returns
    -------
        None.
    """
    user, pw = secrets.db.epi
    engine = sqlalchemy.create_engine(f"mysql+pymysql://{user}:{pw}@{secrets.db.host}/{DB_NAME}")
    for filepath, attributes in CsvImporter.find_csv_files(csv_path):
        if attributes is None:
            move(filepath, filepath.replace("receiving", "archive/failed"))
            continue
        try:
            data = load_and_prepare_file(filepath, attributes)
            data.to_sql(TABLE_NAME, engine, if_exists="append", index=False)
        except Exception:
            move(filepath, filepath.replace("receiving", "archive/failed"))
            raise
        move(filepath, filepath.replace("receiving", "archive/successful"))
Ejemplo n.º 2
0
    def test_find_csv_files(self):
        """Recursively explore and find CSV files."""

        path_prefix = 'prefix/to/the/data/'
        glob_paths = [
            # valid weekly
            path_prefix + 'fb_survey/weekly_202015_county_cli.csv',
            # valid daily
            path_prefix + 'ght/20200408_state_rawsearch.csv',
            # valid national
            path_prefix + 'valid/20200408_nation_sig.csv',
            # valid hhs
            path_prefix + 'valid/20200408_hhs_sig.csv',
            # invalid
            path_prefix + 'invalid/hello_world.csv',
            # invalid day
            path_prefix + 'invalid/22222222_b_c.csv',
            # invalid week
            path_prefix + 'invalid/weekly_222222_b_c.csv',
            # invalid geography
            path_prefix + 'invalid/20200418_province_c.csv',
            # ignored
            path_prefix + 'ignored/README.md',
        ]
        mock_glob = MagicMock()
        mock_glob.glob.return_value = glob_paths

        found = set(CsvImporter.find_csv_files(path_prefix, glob=mock_glob))

        expected_issue_day = int(date.today().strftime("%Y%m%d"))
        expected_issue_week = int(str(epi.Week.fromdate(date.today())))
        time_value_day = 20200408
        expected = set([
            (glob_paths[0], ('fb_survey', 'cli', 'week', 'county', 202015,
                             expected_issue_week,
                             delta_epiweeks(202015, expected_issue_week))),
            (glob_paths[1],
             ('ght', 'rawsearch', 'day', 'state', time_value_day,
              expected_issue_day,
              (date.today() - date(year=time_value_day // 10000,
                                   month=(time_value_day // 100) % 100,
                                   day=time_value_day % 100)).days)),
            (glob_paths[2],
             ('valid', 'sig', 'day', 'nation', time_value_day,
              expected_issue_day,
              (date.today() - date(year=time_value_day // 10000,
                                   month=(time_value_day // 100) % 100,
                                   day=time_value_day % 100)).days)),
            (glob_paths[3],
             ('valid', 'sig', 'day', 'hhs', time_value_day, expected_issue_day,
              (date.today() - date(year=time_value_day // 10000,
                                   month=(time_value_day // 100) % 100,
                                   day=time_value_day % 100)).days)),
            (glob_paths[4], None),
            (glob_paths[5], None),
            (glob_paths[6], None),
            (glob_paths[7], None),
        ])
        self.assertEqual(found, expected)