def test_bad_fetch_param_raises(self): task = PostgresFetch(db_name="test", user="******", host="test") with pytest.raises( ValueError, match=r"The 'fetch' parameter must be one of the following - \('one', 'many', 'all'\)", ): task.run(query="SELECT * FROM some_table", fetch="not a valid parameter")
def test_query_string_must_be_provided(self): task = PostgresFetch(db_name="test", user="******", password="******", host="test") with pytest.raises(ValueError, match="A query string must be provided"): task.run()
def test_bad_fetch_param_raises(self): task = PostgresFetch(db_name="test", user="******", password="******", host="test") with pytest.raises(ValueError) as exc: task.run(query="SELECT * FROM some_table", fetch="not a valid parameter") assert ( "The 'fetch' parameter must be one of the following - ('one', 'many', 'all')" == str(exc.value) )
def list_db_years(waiting_for: str) -> list: #list of sets db_years = PostgresFetch( db_name=local_config.DB_NAME, user=local_config.DB_USER, host=local_config.DB_HOST, port=local_config.DB_PORT, fetch="all", query=""" select distinct year, date_update from climate.csv_checker order by date_update """ ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) db_years.insert(0, db_years.pop()) # Move last item in the list to the first # - We want to check the most recent year first, since csvs in that dir # may not be complete (we are not doing the full number of csvs for some dirs # with each run) # - Then we move to the oldest checked folder in the list to move forward return db_years
def select_session_csvs(local_csvs: list, job_size: int) -> list: return_list = [] # LOCAL SET csv_set = set() for csv in local_csvs: csv_list = csv.split('/') if '/' in csv else csv.split('\\') csv_str = f'{csv_list[-2]}-{csv_list[-1]}' csv_set.add(csv_str) print(f'csvs from folder: {len(csv_set)}') year_db_csvs = PostgresFetch( db_name=local_config.DB_NAME, user=local_config.DB_USER, host=local_config.DB_HOST, port=local_config.DB_PORT, fetch="all", query=f""" select year, station from climate.csv_checker order by date_update """ ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) # DB SET year_db_set = set() for year_db in year_db_csvs: year_db_str = f'{year_db[0]}-{year_db[1]}' year_db_set.add(year_db_str) print(f'csv_checker set: {len(year_db_set)}') # SET DIFF, SORT new_set = csv_set.difference(year_db_set) new_set = sorted(new_set) print(f'new_set: {len(new_set)}') # CONVERT TO LIST, SELECT SHORT SUBSET new_list = [] set_empty = False while len(new_list) < job_size and not set_empty: if len(new_set)>0: new_list.append(new_set.pop()) else: set_empty = True new_list = [x.split('-') for x in new_list] new_list = new_list[:job_size] # REBUILD LIST OF FILE PATH LOCATIONS data_dir = Path(config.NOAA_TEMP_CSV_DIR) return_list = [f'{data_dir}/{x[0]}/{x[1]}' for x in new_list] print(f'retun_list: {len(return_list)}') return return_list
from prefect.tasks.postgres import PostgresExecute, PostgresFetch from settings import ( POSTGRES_DBNAME, POSTGRES_USERNAME, POSTGRES_PASSWORD, POSTGRES_HOST, POSTGRES_PORT, ) existing_tables = PostgresFetch( name="query-existing-tables", db_name=POSTGRES_DBNAME, user=POSTGRES_USERNAME, password=POSTGRES_PASSWORD, host=POSTGRES_HOST, port=POSTGRES_PORT, fetch="all", query=""" SELECT * FROM information_schema.tables; """, ) existing_schemas = PostgresFetch( name="query-existing-schemas", db_name=POSTGRES_DBNAME, user=POSTGRES_USERNAME, password=POSTGRES_PASSWORD, host=POSTGRES_HOST, port=POSTGRES_PORT, fetch="all", query="""
def test_construction(self): task = PostgresFetch(db_name="test", user="******", host="test") assert task.fetch == "one"
def test_query_string_must_be_provided(self): task = PostgresFetch(db_name="test", user="******", password="******", host="test") with pytest.raises(ValueError) as exc: task.run() assert "A query string must be provided" == str(exc.value)