def _sync_without_cleanup(resource_df: DataFrame, sync_db: sqlalchemy.engine.base.Engine) -> DataFrame: """ Take fetched API data and sync with database. Creates tables when necessary, but ok if temporary tables are there to start. Doesn't delete temporary tables when finished. Parameters ---------- resource_df: DataFrame a Sections API DataFrame with the current fetched data which will be mutated, adding Hash and CreateDate/LastModifiedDate sync_db: sqlalchemy.engine.base.Engine an Engine instance for creating database connections Returns ------- DataFrame a DataFrame with current fetched data and reconciled CreateDate/LastModifiedDate """ return sync_to_db_without_cleanup( resource_df=resource_df, identity_columns=["id"], resource_name=SECTIONS_RESOURCE_NAME, sync_db=sync_db, )
def test_db_after_sync(test_db_fixture): # arrange INITIAL_COURSE_DATA = [ CHANGED_COURSE_BEFORE, UNCHANGED_COURSE, OMITTED_FROM_SYNC_COURSE, ] courses_initial_df = DataFrame(INITIAL_COURSE_DATA, columns=COLUMNS) courses_initial_df = add_hash_and_json_to(courses_initial_df) add_sourceid_to(courses_initial_df, IDENTITY_COLUMNS) dateToUse = datetime(2020, 9, 14, 12, 0, 0) courses_initial_df["SyncNeeded"] = 0 courses_initial_df["CreateDate"] = dateToUse courses_initial_df["LastModifiedDate"] = dateToUse courses_initial_df = courses_initial_df[SYNC_COLUMNS] courses_sync_df = DataFrame(SYNC_DATA, columns=COLUMNS) with test_db_fixture.connect() as con: con.execute("DROP TABLE IF EXISTS Courses") con.execute(f""" CREATE TABLE IF NOT EXISTS Courses ( {SYNC_COLUMNS_SQL} ) """) courses_initial_df.to_sql("Courses", test_db_fixture, if_exists="append", index=False, chunksize=1000) # Duplicating a course to verify that duplicates will not be inserted dupe = courses_sync_df.iloc[0].copy() courses_sync_df = courses_sync_df.append(dupe) # act sync_to_db_without_cleanup(courses_sync_df, IDENTITY_COLUMNS, "Courses", test_db_fixture) return test_db_fixture
def sync_resource( resource_name: str, db_engine: sqlalchemy.engine.base.Engine, data: List[Dict[str, Any]], id_column: str = "id", ) -> DataFrame: if len(data) == 0: return DataFrame() resource_df: DataFrame = DataFrame(data) synced_df = sync_to_db_without_cleanup( resource_df=resource_df, identity_columns=[id_column], resource_name=resource_name, sync_db=db_engine, ) cleanup_after_sync(resource_name, db_engine) return synced_df