Example #1
0
def main():
    status = {}

    # Open the new output SQLite database
    sqlite_path = RevisionIDs.get_path_from_id(TIME_FORMAT, LATEST_REVISION_ID,
                                               'sqlite')
    dpdb = DataPointsDB(sqlite_path)
    run_crawlers(status, dpdb)
    dpdb.create_indexes()
    copy_failed_from_previous_revision(status, dpdb)

    # Derive "new cases" from "total cases" when
    # they aren't explicitly specified, etc
    DerivedData(dpdb).add_derived()

    # Commit and close the DB
    print("Derived data outputted OK: committing and closing")
    dpdb.commit()
    dpdb.close()

    # Output basic status info to a .json info
    # This also signifies to the web
    # interface that the import went OK
    print("Writing status JSON file")
    status_json_path = RevisionIDs.get_path_from_id(TIME_FORMAT,
                                                    LATEST_REVISION_ID, 'json')
    with open(status_json_path, 'w', encoding='utf-8') as f:
        f.write(json.dumps({'status': status}, indent=4))

    # Output datapoints to zip
    print("Outputting datapoints to zip...")
    with open(
            get_output_dir() / 'output' /
            f'{TIME_FORMAT}-{LATEST_REVISION_ID}.zip', 'wb') as f:
        output_revision_datapoints_to_zip(f, TIME_FORMAT, LATEST_REVISION_ID)

    # Upload them to remote AWS instance
    print("Uploading zip file to remote server...")
    system('/usr/bin/env bash /home/david/upload_to_remote.sh %s' %
           f'{TIME_FORMAT}-{LATEST_REVISION_ID}')

    # Clean up old DBs to save on space
    print("Deleting older DBs to save space..")
    delete_old_dbs()

    # Update the csv output
    print("Outputting CSV files:")
    output_csv_data(TIME_FORMAT, LATEST_REVISION_ID)
    print('CSV write done')

    # Output information about the sources to a markdown table/csv file
    print("Outputting source info...")
    output_source_info(SOURCE_INFO)

    # Output GeoJSON
    print("Outputting geojson...")
    output_geojson()

    # Commit to GitHub
    print("Pushing to GitHub...")
    push_to_github()
    print("Push to GitHub done!")

    print("[end of script]")
 def get_path_from_id(time_format, revision_id, ext='sqlite'):
     return (get_output_dir() / 'output' /
             f'{time_format}-{revision_id}.{ext}')
Example #3
0
from covid_db.DerivedData import DerivedData
from covid_db.DataPointsDB import DataPointsDB
from covid_db.delete_old_dbs import delete_old_dbs
from covid_db.SQLiteDataRevisions import SQLiteDataRevisions
from covid_db.output_compressor.output_revision_datapoints_to_zip import output_revision_datapoints_to_zip

from data_export.push_to_github import push_to_github
from data_export.output_geojson import output_geojson
from data_export.output_source_info import output_source_info
from data_export.output_csv_data import output_csv_data

# Output stdout/stderr to log files
stdout_logger = sys.stdout = Logger(sys.stdout, ext='stdout')
stderr_logger = sys.stderr = Logger(sys.stderr, ext='stderr')

OUTPUT_DIR = get_output_dir() / 'output'
TIME_FORMAT = datetime.datetime.now().strftime('%Y_%m_%d')
LATEST_REVISION_ID = RevisionIDs.get_latest_revision_id(TIME_FORMAT)
RUN_INFREQUENT_JOBS = '--run-infrequent-jobs' in [i.strip() for i in sys.argv]
SOURCE_INFO = []


def run_infrequent_jobs():
    """
    Run infrequent tasks which require more resources
    Comment out any of these if they break!
    """
    isdj = InfrequentStateDataJobs()
    isdj.update_wa_regions()
    isdj.update_vic_tableau()
    isdj.update_sa_regions()
import datetime
from _utility.get_package_dir import get_output_dir

DB_DIR = get_output_dir() / 'output'

ACTUALLY_DELETE = True
DELETE_BEFORE = datetime.datetime.now() - datetime.timedelta(days=1)


def delete_old_dbs():
    for path in DB_DIR.iterdir():
        date = datetime.datetime.strptime(path.name.split('-')[0], '%Y_%m_%d')

        if date <= DELETE_BEFORE:
            print("DELETING:", path)

            if ACTUALLY_DELETE:
                path.unlink()


if __name__ == '__main__':
    delete_old_dbs()