def insert_regobs_data_to_database(regobs_data: pd.DataFrame,
                                   db_manager: DbManager,
                                   if_exists: str) -> None:
    logging.info('Inserting RegObs data into database table..')
    db_manager.insert_dataframe('regobs_data', regobs_data, if_exists)
Exemple #2
0
def main():
    # See configuration.ini for details
    fetch_regobs, load_excel_data, api_fetch_list, api_delete_list, api_initialize_list = load_configuration()

    # Handle command line arguments
    force_update = parse_command_line_arguments()

    logging.info(
        'Application started with force_update={}'.format(force_update))

    # Create engine and db_inserter
    try:
        engine = create_db_connection()
    except Exception as e:
        logging.exception('Cannot connect to the database')
        raise e

    db_manager = DbManager(engine)

    if load_excel_data:
        logging.info('Adding excel data to database table excel_data..')
        excel_data = process_excel_data()
        ExcelData.metadata.create_all(engine)
        db_manager.insert_dataframe('excel_data', excel_data, if_exists='replace')

    logging.info('Fetching RegObs data..')
    # Fetch regobs data from api
    if fetch_regobs:
        try:
            api_data = Regobs().get_data()
        except Exception as e:
            logging.exception('Cannot fetch RegObs data')
            raise e

        to_csv(api_data, 'csv_files/regobs.csv')

    # Load regobs data from csv file (can be useful for debugging or testing incremental update)
    else:
        api_data = read_csv('csv_files/regobs.csv')

    # Incremental update. Only update added, updated or deleted records in database tables.
    if not force_update:
        # Specify that the dataframe should be appended to the existing data in the database tables
        if_table_exists_in_database = 'append'

        # Query current data in database
        logging.info('Querying regobs table from database..')
        try:
            db_data = db_manager.query_all_data_from_table(
                'regobs_data', 'reg_id')
        except exc.NoSuchTableError as e:
            logging.exception(
                'The table regobs_data does not exist in the database. Run the application with --force-update command line parameter to initialize all tables and fetch all data.')
            raise e
        except Exception as e:
            logging.exception('Cannot query RegObs data from database')
            raise e

        # Compare current database data with new api data
        logging.info(
            'Comparing dataframes to determine which rows are added or removed..')
        # Rows to delete from all tables
        deleted_rows = dataframe_difference(
            db_data, api_data, ['reg_id', 'dt_change_time'])

        # Rows to add to all tables
        new_rows = dataframe_difference(
            api_data, db_data, ['reg_id', 'dt_change_time'])

        deleted_reg_ids = list(deleted_rows['reg_id'])

        deleted_reg_ids = [int(x) for x in deleted_reg_ids]
        logging.info('Records with the following reg_ids will be deleted from the database: {}'.format(
            deleted_reg_ids))

        if deleted_reg_ids:
            # Delete removed rows from api's
            try:
                for data_class in api_delete_list:
                    logging.info(
                        'Deleting removed records for: {}'.format(data_class.__name__))
                    db_manager.delete_rows_with_reg_id(
                        deleted_reg_ids, data_class)
            except Exception as e:
                logging.exception(
                    'Cannot delete removed records from database table')
                raise e
        else:
            logging.info(
                'There are no deleted records to remove from the database')

        if not new_rows.empty:
            logging.info(
                'Number of new records to add: {}'.format(len(new_rows)))

            try:
                avalanche_incident_list = AvalancheIncident.from_dataframe(
                    new_rows
                )
            except Exception as e:
                logging.exception(
                    'Cannot create avalanche_incident_list from regobs data')
                raise e

            # Append new rows to regobs table
            try:
                insert_regobs_data_to_database(new_rows, db_manager, 'append')
            except Exception as e:
                logging.exception(
                    'Cannot append RegObs data to database table')
                raise e

        else:
            avalanche_incident_list = []

    # Initialize database and load all data
    elif force_update:
        # Specify that the dataframe should replace existing data in the database table
        if_table_exists_in_database = 'replace'

        try:
            avalanche_incident_list = AvalancheIncident.from_dataframe(
                api_data
            )
        except Exception as e:
            logging.exception(
                'Cannot create avalanche_incident_list from regobs data')
            raise e

        logging.info('Initializing database tables..')
        try:
            initialize_tables(api_initialize_list, engine)
        except Exception as e:
            logging.exception(
                'Cannot initialize tables in database')
            raise e

        try:
            insert_regobs_data_to_database(api_data, db_manager, 'replace')
        except Exception as e:
            logging.exception(
                'Cannot add RegObs data to database table')
            raise e

    if not avalanche_incident_list:
        logging.info('There is no new records to add to the database')
        logging.info('The application terminated successfully')
        return

    try:
        api_table_dict = get_table_dict_for_apis_in_list(
            api_fetch_list, avalanche_incident_list)
    except Exception as e:
        logging.exception(
            'Error fetching API data')
        raise e

    # Set new database connection
    db_manager.engine = create_db_connection()

    try:
        insert_data_for_table_dict(
            api_table_dict, db_manager, if_table_exists_in_database)
    except Exception as e:
        logging.exception(
            'Cannot add API data to database table')
        raise e

    logging.info('The application terminated successfully')
def insert_data_for_table_dict(table_dict: Dict[str, pd.DataFrame],
                               db_manager: DbManager, if_exists: str) -> None:
    for table_name, rows in table_dict.items():
        logging.info('Inserting data into {}...'.format(table_name))
        db_manager.insert_dataframe(table_name, rows, if_exists)