def main(): status = {} # Open the new output SQLite database sqlite_path = RevisionIDs.get_path_from_id(TIME_FORMAT, LATEST_REVISION_ID, 'sqlite') dpdb = DataPointsDB(sqlite_path) run_crawlers(status, dpdb) dpdb.create_indexes() copy_failed_from_previous_revision(status, dpdb) # Derive "new cases" from "total cases" when # they aren't explicitly specified, etc DerivedData(dpdb).add_derived() # Commit and close the DB print("Derived data outputted OK: committing and closing") dpdb.commit() dpdb.close() # Output basic status info to a .json info # This also signifies to the web # interface that the import went OK print("Writing status JSON file") status_json_path = RevisionIDs.get_path_from_id(TIME_FORMAT, LATEST_REVISION_ID, 'json') with open(status_json_path, 'w', encoding='utf-8') as f: f.write(json.dumps({'status': status}, indent=4)) # Output datapoints to zip print("Outputting datapoints to zip...") with open( get_output_dir() / 'output' / f'{TIME_FORMAT}-{LATEST_REVISION_ID}.zip', 'wb') as f: output_revision_datapoints_to_zip(f, TIME_FORMAT, LATEST_REVISION_ID) # Upload them to remote AWS instance print("Uploading zip file to remote server...") system('/usr/bin/env bash /home/david/upload_to_remote.sh %s' % f'{TIME_FORMAT}-{LATEST_REVISION_ID}') # Clean up old DBs to save on space print("Deleting older DBs to save space..") delete_old_dbs() # Update the csv output print("Outputting CSV files:") output_csv_data(TIME_FORMAT, LATEST_REVISION_ID) print('CSV write done') # Output information about the sources to a markdown table/csv file print("Outputting source info...") output_source_info(SOURCE_INFO) # Output GeoJSON print("Outputting geojson...") output_geojson() # Commit to GitHub print("Pushing to GitHub...") push_to_github() print("Push to GitHub done!") print("[end of script]")
def get_path_from_id(time_format, revision_id, ext='sqlite'): return (get_output_dir() / 'output' / f'{time_format}-{revision_id}.{ext}')
from covid_db.DerivedData import DerivedData from covid_db.DataPointsDB import DataPointsDB from covid_db.delete_old_dbs import delete_old_dbs from covid_db.SQLiteDataRevisions import SQLiteDataRevisions from covid_db.output_compressor.output_revision_datapoints_to_zip import output_revision_datapoints_to_zip from data_export.push_to_github import push_to_github from data_export.output_geojson import output_geojson from data_export.output_source_info import output_source_info from data_export.output_csv_data import output_csv_data # Output stdout/stderr to log files stdout_logger = sys.stdout = Logger(sys.stdout, ext='stdout') stderr_logger = sys.stderr = Logger(sys.stderr, ext='stderr') OUTPUT_DIR = get_output_dir() / 'output' TIME_FORMAT = datetime.datetime.now().strftime('%Y_%m_%d') LATEST_REVISION_ID = RevisionIDs.get_latest_revision_id(TIME_FORMAT) RUN_INFREQUENT_JOBS = '--run-infrequent-jobs' in [i.strip() for i in sys.argv] SOURCE_INFO = [] def run_infrequent_jobs(): """ Run infrequent tasks which require more resources Comment out any of these if they break! """ isdj = InfrequentStateDataJobs() isdj.update_wa_regions() isdj.update_vic_tableau() isdj.update_sa_regions()
import datetime from _utility.get_package_dir import get_output_dir DB_DIR = get_output_dir() / 'output' ACTUALLY_DELETE = True DELETE_BEFORE = datetime.datetime.now() - datetime.timedelta(days=1) def delete_old_dbs(): for path in DB_DIR.iterdir(): date = datetime.datetime.strptime(path.name.split('-')[0], '%Y_%m_%d') if date <= DELETE_BEFORE: print("DELETING:", path) if ACTUALLY_DELETE: path.unlink() if __name__ == '__main__': delete_old_dbs()