Exemple #1
0
 def test_bad_fetch_param_raises(self):
     task = PostgresFetch(db_name="test", user="******", host="test")
     with pytest.raises(
         ValueError,
         match=r"The 'fetch' parameter must be one of the following - \('one', 'many', 'all'\)",
     ):
         task.run(query="SELECT * FROM some_table", fetch="not a valid parameter")
 def test_query_string_must_be_provided(self):
     task = PostgresFetch(db_name="test",
                          user="******",
                          password="******",
                          host="test")
     with pytest.raises(ValueError,
                        match="A query string must be provided"):
         task.run()
Exemple #3
0
 def test_bad_fetch_param_raises(self):
     task = PostgresFetch(db_name="test", user="******", password="******", host="test")
     with pytest.raises(ValueError) as exc:
         task.run(query="SELECT * FROM some_table", fetch="not a valid parameter")
     assert (
         "The 'fetch' parameter must be one of the following - ('one', 'many', 'all')"
         == str(exc.value)
     )
Exemple #4
0
def list_db_years(waiting_for: str) -> list: #list of sets
    db_years = PostgresFetch(
        db_name=local_config.DB_NAME,
        user=local_config.DB_USER,
        host=local_config.DB_HOST,
        port=local_config.DB_PORT,
        fetch="all",
        query="""
        select distinct year, date_update from climate.csv_checker
        order by date_update
        """
    ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())
    db_years.insert(0, db_years.pop())   # Move last item in the list to the first
                                         # - We want to check the most recent year first, since csvs in that dir
                                         #   may not be complete (we are not doing the full number of csvs for some dirs
                                         #   with each run)
                                         # - Then we move to the oldest checked folder in the list to move forward
    return db_years
Exemple #5
0
def select_session_csvs(local_csvs: list, job_size: int) -> list:
    return_list = []

    # LOCAL SET
    csv_set = set()
    for csv in local_csvs:
        csv_list = csv.split('/') if '/' in csv else csv.split('\\')
        csv_str = f'{csv_list[-2]}-{csv_list[-1]}'
        csv_set.add(csv_str)
    print(f'csvs from folder: {len(csv_set)}')

    year_db_csvs = PostgresFetch(
        db_name=local_config.DB_NAME,
        user=local_config.DB_USER,
        host=local_config.DB_HOST,
        port=local_config.DB_PORT,
        fetch="all",
        query=f"""
        select year, station from climate.csv_checker
        order by date_update
        """
    ).run(password=PrefectSecret('NOAA_LOCAL_DB').run())

    # DB SET
    year_db_set = set()
    for year_db in year_db_csvs:
        year_db_str = f'{year_db[0]}-{year_db[1]}'
        year_db_set.add(year_db_str)
    print(f'csv_checker set: {len(year_db_set)}')

    # SET DIFF, SORT
    new_set = csv_set.difference(year_db_set)
    new_set = sorted(new_set)
    print(f'new_set: {len(new_set)}')

    # CONVERT TO LIST, SELECT SHORT SUBSET
    new_list = []
    set_empty = False
    while len(new_list) < job_size and not set_empty:
        if len(new_set)>0:
            new_list.append(new_set.pop())
        else:
            set_empty = True
    new_list = [x.split('-') for x in new_list]
    new_list = new_list[:job_size]

    # REBUILD LIST OF FILE PATH LOCATIONS
    data_dir = Path(config.NOAA_TEMP_CSV_DIR)
    return_list = [f'{data_dir}/{x[0]}/{x[1]}' for x in new_list]
    print(f'retun_list: {len(return_list)}')
    return return_list
Exemple #6
0
from prefect.tasks.postgres import PostgresExecute, PostgresFetch

from settings import (
    POSTGRES_DBNAME,
    POSTGRES_USERNAME,
    POSTGRES_PASSWORD,
    POSTGRES_HOST,
    POSTGRES_PORT,
)

existing_tables = PostgresFetch(
    name="query-existing-tables",
    db_name=POSTGRES_DBNAME,
    user=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    host=POSTGRES_HOST,
    port=POSTGRES_PORT,
    fetch="all",
    query="""
    SELECT * FROM information_schema.tables;
    """,
)

existing_schemas = PostgresFetch(
    name="query-existing-schemas",
    db_name=POSTGRES_DBNAME,
    user=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    host=POSTGRES_HOST,
    port=POSTGRES_PORT,
    fetch="all",
    query="""
Exemple #7
0
 def test_construction(self):
     task = PostgresFetch(db_name="test", user="******", host="test")
     assert task.fetch == "one"
Exemple #8
0
 def test_query_string_must_be_provided(self):
     task = PostgresFetch(db_name="test", user="******", password="******", host="test")
     with pytest.raises(ValueError) as exc:
         task.run()
     assert "A query string must be provided" == str(exc.value)