def _get_vault_secret(self, name: str): """ Connect to vault service and retrieve a secret value Arguments: - name (str): name of the secret to be retrieved using "<mount_point>/<path>" Returns: - secret (dict): a dict of the secret key/value items retrieved Raises: - ValueError: unable to configure the vault client or unable to parse the provided secret name into a "<mount_point>/<path>" pattern - RuntimeError: unable to authenticate or attempting an unsupported auth method or or provided token/role not authorised to access the the secret - KeyError: unable to lookup secret in vault using the secret path """ self.logger.debug(f"Looking up vault path: {name}") client = hvac.Client() # get vault address url vault_url = os.getenv("VAULT_ADDR") or os.getenv("vault_addr") if not vault_url: raise ValueError( "VAULT_ADDR url var not found. " 'Either "VAULT_ADDR" or "vault_addr" env var required.') client.url = vault_url self.logger.debug(f"Vault addr set to: {client.url}") # get vault auth credentials from the PrefectSecret vault_creds = PrefectSecret(self.vault_credentials_secret).run() if "VAULT_TOKEN" in vault_creds.keys(): client.token = vault_creds["VAULT_TOKEN"] elif ("VAULT_ROLE_ID" in vault_creds.keys() and "VAULT_SECRET_ID" in vault_creds.keys()): client.auth_approle(vault_creds["VAULT_ROLE_ID"], vault_creds["VAULT_SECRET_ID"]) else: raise ValueError("Unable to authenticate with vault service. " "Supported methods: token, appRole") if not client.is_authenticated(): raise RuntimeError( "Unable to autheticate with vault using supplied credentials") self.logger.debug("Passed vault authentication check") # regex to parse path into 2 named parts: <mount_point>/<path> secret_path_re = r"^(?P<mount_point>[^/]+)/(?P<path>.+)$" m = re.fullmatch(secret_path_re, name) if m is None: raise ValueError( f'Invalid secret path: {name}. Expected: "<mount_point>/<path>"' ) vault_path = m.groupdict() value = "" try: vault_secret = client.secrets.kv.v2.read_secret_version( path=vault_path["path"], mount_point=vault_path["mount_point"]) value = vault_secret["data"]["data"] except hvac.exceptions.InvalidPath as exc: raise KeyError(f"Secret not found: {name}") from exc except hvac.exceptions.Forbidden as exc: raise RuntimeError(f"Access forbidden: {name}") from exc return value
def test_secret_value_pulled_from_context(self): secret = PrefectSecret(name="test") with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict(test=42)): assert secret.run() == 42 with pytest.raises(ValueError): secret.run()
def get_key_vault(credentials: str, secret_client_kwargs: dict, vault_name: str = None) -> SecretClient: if not vault_name: vault_name = PrefectSecret("AZURE_DEFAULT_KEYVAULT").run() if credentials: # set credentials as env variables so that they're discoverable by EnvironmentCredential key_vault_credentials = credentials["KEY_VAULT"][vault_name] os.environ["AZURE_TENANT_ID"] = key_vault_credentials[ "AZURE_TENANT_ID"] os.environ["AZURE_CLIENT_ID"] = key_vault_credentials[ "AZURE_CLIENT_ID"] os.environ["AZURE_CLIENT_SECRET"] = key_vault_credentials[ "AZURE_CLIENT_SECRET"] else: try: # we can read the credentials automatically if user uses the default name credentials = PrefectSecret("AZURE_CREDENTIALS").run() key_vault_credentials = credentials["KEY_VAULT"][vault_name] os.environ["AZURE_TENANT_ID"] = key_vault_credentials[ "AZURE_TENANT_ID"] os.environ["AZURE_CLIENT_ID"] = key_vault_credentials[ "AZURE_CLIENT_ID"] os.environ["AZURE_CLIENT_SECRET"] = key_vault_credentials[ "AZURE_CLIENT_SECRET"] except ValueError as e: # go to step 3 (attempt to read from env) pass credentials = EnvironmentCredential() vault_url = f"https://{vault_name}.vault.azure.net" key_vault = SecretClient(vault_url=vault_url, credential=credentials, **secret_client_kwargs) return key_vault
def test_local_secrets_auto_load_json_strings(self): secret = PrefectSecret(name="test") with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict(test='{"x": 42}')): assert secret.run() == {"x": 42} with pytest.raises(ValueError): secret.run()
def test_cloud_secrets_remain_plain_dictionaries(self, monkeypatch): response = { "data": { "secret_value": { "a": "1234", "b": [1, 2, { "c": 3 }] } } } post = MagicMock(return_value=MagicMock(json=MagicMock( return_value=response))) session = MagicMock() session.return_value.post = post monkeypatch.setattr("requests.Session", session) with set_temporary_config({ "cloud.auth_token": "secret_token", "cloud.use_local_secrets": False }): my_secret = PrefectSecret(name="the-key") val = my_secret.run() assert val == {"a": "1234", "b": [1, 2, {"c": 3}]} assert isinstance(val, dict) and not isinstance(val, box.Box) val2 = val["b"] assert isinstance(val2, list) and not isinstance(val2, box.BoxList) val3 = val["b"][2] assert isinstance(val3, dict) and not isinstance(val3, box.Box)
def test_local_secrets_remain_plain_dictionaries(self): secret = PrefectSecret(name="test") with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict(test={"x": 42})): assert isinstance(prefect.context.secrets["test"], dict) val = secret.run() assert val == {"x": 42} assert isinstance(val, dict) and not isinstance(val, box.Box)
def test_reads_by_rerunning_task(self): task = PrefectSecret("foo") task.run = lambda *args, **kwargs: 42 result = SecretResult(task) result.location == "foo" new_result = result.read("foo") assert new_result.value == 42 new_result.location == "foo"
def test_secrets_use_client(self, monkeypatch): response = {"data": {"secret_value": '"1234"'}} post = MagicMock(return_value=MagicMock(json=MagicMock(return_value=response))) session = MagicMock() session.return_value.post = post monkeypatch.setattr("requests.Session", session) with set_temporary_config( {"cloud.auth_token": "secret_token", "cloud.use_local_secrets": False} ): my_secret = PrefectSecret(name="the-key") val = my_secret.run() assert val == "1234"
def test_cloud_secrets_auto_load_json_strings(self, monkeypatch): response = {"data": {"secret_value": '{"x": 42}'}} post = MagicMock(return_value=MagicMock(json=MagicMock(return_value=response))) session = MagicMock() session.return_value.post = post monkeypatch.setattr("requests.Session", session) with set_temporary_config( {"cloud.auth_token": "secret_token", "cloud.use_local_secrets": False} ): my_secret = PrefectSecret(name="the-key") val = my_secret.run() assert isinstance(val, dict)
def test_secret_value_depends_on_use_local_secrets(self, monkeypatch): response = {"errors": "Malformed Authorization header"} post = MagicMock(return_value=MagicMock(json=MagicMock(return_value=response))) session = MagicMock() session.return_value.post = post monkeypatch.setattr("requests.Session", session) secret = PrefectSecret(name="test") with set_temporary_config( {"cloud.use_local_secrets": False, "cloud.auth_token": None} ): with prefect.context(secrets=dict()): with pytest.raises(ClientError): secret.run()
def test_secret_is_pickleable(self): secret = PrefectSecret(name="long name") new = cloudpickle.loads(cloudpickle.dumps(secret)) assert new.name == "long name" assert new.max_retries == 2 assert new.retry_delay.total_seconds() == 1.0 assert isinstance(new.result_handler, SecretResultHandler)
def run( self, from_path: str = None, to_path: str = None, recursive: bool = None, overwrite: bool = None, gen: int = None, sp_credentials_secret: str = None, vault_name: str = None, max_retries: int = None, retry_delay: timedelta = None, ) -> None: """Task run method. Args: from_path (str): The path from which to upload the file(s). to_path (str): The destination path. recursive (bool): Set to true if uploading entire directories. overwrite (bool): Whether to overwrite the file(s) if they exist. gen (int): The generation of the Azure Data Lake. sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET). Defaults to None. vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None. """ if not sp_credentials_secret: # attempt to read a default for the service principal secret name try: sp_credentials_secret = PrefectSecret( "AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run() except ValueError: pass if sp_credentials_secret: azure_secret_task = AzureKeyVaultSecret() credentials_str = azure_secret_task.run( secret=sp_credentials_secret, vault_name=vault_name) credentials = json.loads(credentials_str) else: credentials = { "ACCOUNT_NAME": os.environ["AZURE_ACCOUNT_NAME"], "AZURE_TENANT_ID": os.environ["AZURE_TENANT_ID"], "AZURE_CLIENT_ID": os.environ["AZURE_CLIENT_ID"], "AZURE_CLIENT_SECRET": os.environ["AZURE_CLIENT_SECRET"], } lake = AzureDataLake(gen=gen, credentials=credentials) full_to_path = os.path.join(credentials["ACCOUNT_NAME"], to_path) self.logger.info( f"Uploading data from {from_path} to {full_to_path}...") lake.upload( from_path=from_path, to_path=to_path, recursive=recursive, overwrite=overwrite, ) self.logger.info(f"Successfully uploaded data to {full_to_path}.")
def _execute_flow_run(): flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(storage.flows[flow_data.name]) environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) click.echo(str(exc)) raise exc
def run( self, path: str = None, gen: int = None, sp_credentials_secret: str = None, vault_name: str = None, max_retries: int = None, retry_delay: timedelta = None, ) -> List[str]: """Task run method. Args: from_path (str): The path to the directory which contents you want to list. Defaults to None. gen (int): The generation of the Azure Data Lake. Defaults to None. sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET). Defaults to None. vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None. Returns: List[str]: The list of paths to the contents of `path`. These paths do not include the container, eg. the path to the file located at "https://my_storage_acc.blob.core.windows.net/raw/supermetrics/test_file.txt" will be shown as "raw/supermetrics/test_file.txt". """ if not sp_credentials_secret: # attempt to read a default for the service principal secret name try: sp_credentials_secret = PrefectSecret( "AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run() except ValueError: pass if sp_credentials_secret: azure_secret_task = AzureKeyVaultSecret() credentials_str = azure_secret_task.run( secret=sp_credentials_secret, vault_name=vault_name) credentials = json.loads(credentials_str) else: credentials = { "ACCOUNT_NAME": os.environ["AZURE_ACCOUNT_NAME"], "AZURE_TENANT_ID": os.environ["AZURE_TENANT_ID"], "AZURE_CLIENT_ID": os.environ["AZURE_CLIENT_ID"], "AZURE_CLIENT_SECRET": os.environ["AZURE_CLIENT_SECRET"], } lake = AzureDataLake(gen=gen, credentials=credentials) full_dl_path = os.path.join(credentials["ACCOUNT_NAME"], path) self.logger.info(f"Listing files in {full_dl_path}...") files = lake.ls(path) self.logger.info(f"Successfully listed files in {full_dl_path}.") return files
def test_reads_with_new_name(self): task = PrefectSecret("foo") result = SecretResult(task) with prefect.context(secrets=dict(x=99, foo="bar")): res1 = result.read("x") res2 = result.read("foo") assert res1.value == 99 assert res1.location == "x" assert res2.value == "bar" assert res2.location == "foo"
def select_session_csvs(local_csvs: list, job_size: int) -> list: return_list = [] # LOCAL SET csv_set = set() for csv in local_csvs: csv_list = csv.split('/') if '/' in csv else csv.split('\\') csv_str = f'{csv_list[-2]}-{csv_list[-1]}' csv_set.add(csv_str) print(f'csvs from folder: {len(csv_set)}') year_db_csvs = PostgresFetch( db_name=local_config.DB_NAME, user=local_config.DB_USER, host=local_config.DB_HOST, port=local_config.DB_PORT, fetch="all", query=f""" select year, station from climate.csv_checker order by date_update """ ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) # DB SET year_db_set = set() for year_db in year_db_csvs: year_db_str = f'{year_db[0]}-{year_db[1]}' year_db_set.add(year_db_str) print(f'csv_checker set: {len(year_db_set)}') # SET DIFF, SORT new_set = csv_set.difference(year_db_set) new_set = sorted(new_set) print(f'new_set: {len(new_set)}') # CONVERT TO LIST, SELECT SHORT SUBSET new_list = [] set_empty = False while len(new_list) < job_size and not set_empty: if len(new_set)>0: new_list.append(new_set.pop()) else: set_empty = True new_list = [x.split('-') for x in new_list] new_list = new_list[:job_size] # REBUILD LIST OF FILE PATH LOCATIONS data_dir = Path(config.NOAA_TEMP_CSV_DIR) return_list = [f'{data_dir}/{x[0]}/{x[1]}' for x in new_list] print(f'retun_list: {len(return_list)}') return return_list
def test_secrets_are_rerun_on_restart(): @prefect.task def identity(x): return x with Flow("test") as flow: secret = PrefectSecret("key") val = identity(secret) with prefect.context(secrets={"key": "val"}): state = FlowRunner(flow=flow).run(task_states={secret: Success()}, return_tasks=[val]) assert state.is_successful() assert state.result[val].result == "val"
def test_secrets_dynamically_pull_from_context(): flow = Flow(name="test") task1 = PrefectSecret("foo", max_retries=1, retry_delay=datetime.timedelta(0)) flow.add_task(task1) flow_state = FlowRunner(flow=flow).run(return_tasks=[task1]) assert flow_state.is_running() assert flow_state.result[task1].is_retrying() with prefect.context(secrets=dict(foo=42)): time.sleep(1) flow_state = FlowRunner(flow=flow).run(task_states=flow_state.result) assert flow_state.is_successful()
def get_credentials(credentials_secret: str, vault_name: str = None): if not credentials_secret: # attempt to read a default for the service principal secret name try: credentials_secret = PrefectSecret( "AZURE_DEFAULT_SQLDB_SERVICE_PRINCIPAL_SECRET").run() except ValueError: pass if credentials_secret: azure_secret_task = AzureKeyVaultSecret() credentials_str = azure_secret_task.run(secret=credentials_secret, vault_name=vault_name) credentials = json.loads(credentials_str) return credentials
def run( self, path: str = None, schema: str = None, table: str = None, credentials_secret: str = None, vault_name: str = None, max_retries: int = None, retry_delay: timedelta = None, **kwargs, ) -> str: """ Task run method. Args: - path (str, optional): the path to the local CSV file to be inserted - schema (str, optional): the destination schema - table (str, optional): the destination table - credentials_secret (str, optional): the name of the Key Vault secret containing database credentials (server, db_name, user, password) - vault_name (str): the name of the vault from which to fetch the secret Returns: str: the output of the bcp CLI command """ if not credentials_secret: # attempt to read a default for the service principal secret name try: credentials_secret = PrefectSecret( "AZURE_DEFAULT_SQLDB_SERVICE_PRINCIPAL_SECRET").run() except ValueError: pass if credentials_secret: credentials_str = AzureKeyVaultSecret(credentials_secret, vault_name=vault_name).run() credentials = json.loads(credentials_str) fqn = f"{schema}.{table}" if schema else table server = credentials["server"] db_name = credentials["db_name"] uid = credentials["user"] pwd = credentials["password"] command = f"/opt/mssql-tools/bin/bcp {fqn} in {path} -S {server} -d {db_name} -U {uid} -P '{pwd}' -c -F 2 -b 5000 -h 'TABLOCK'" return super().run(command=command, **kwargs)
def list_db_years(waiting_for: str) -> list: #list of sets db_years = PostgresFetch( db_name=local_config.DB_NAME, user=local_config.DB_USER, host=local_config.DB_HOST, port=local_config.DB_PORT, fetch="all", query=""" select distinct year, date_update from climate.csv_checker order by date_update """ ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) db_years.insert(0, db_years.pop()) # Move last item in the list to the first # - We want to check the most recent year first, since csvs in that dir # may not be complete (we are not doing the full number of csvs for some dirs # with each run) # - Then we move to the oldest checked folder in the list to move forward return db_years
def get_report_flow(username: str = None, flow_name: str = None) -> Flow: """ Get a flow that generates a progress report. Args: - username (str): MyFitnessPaw username to be used for flow generation and dispatch - flow_name (str, optional): An optional name to be applied to the flow Returns: - prefect.Flow: The created Prefect flow ready to be run Raises: - ValueError: if the `username` keyword argument is not provided """ if not username: raise ValueError("An user must be provided for the flow") flow_name = flow_name or f"MyFitnessPaw Progress Report <{username.upper()}>" with Flow(name=flow_name) as progress_report_flow: usermail = PrefectSecret(f"MYFITNESSPAL_USERNAME_{username.upper()}") starting_date = Parameter( name="starting_date", default=datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d"), ) end_goal = Parameter(name="end_goal", default=150000) num_rows_report_tbl = Parameter(name="num_rows_report_tbl", default=7) report_style = Parameter(name="report_style", default="default") user = tasks.get_user(username, usermail) report_data = tasks.mfp_select_progress_report_data( usermail, starting_date, end_goal, num_rows_report_tbl) report = tasks.make_report(user, report_data, report_style) report_html = tasks.render_html_email_report(report) t = tasks.save_email_report_locally(report_html) # noqa r = tasks.send_email_report(report, report_html) # noqa return progress_report_flow
def insert_stations(list_of_tuples: list): #, password: str): insert = 0 unique_key_violation = 0 #print(len(list_of_tuples)) insert = 0 unique_key_violation = 0 for row in list_of_tuples[1:2]: print(row) station = row[0] latitude = row[2] if row[2] != '' else None longitude = row[3] if row[3] != '' else None elevation = row[4] if row[4] != '' else None name = row[5] try: PostgresExecute( db_name=local_config.DB_NAME, #'climatedb', user=local_config.DB_USER, #'postgres', host=local_config.DB_HOST, #'192.168.86.32', port=local_config.DB_PORT, #5432, query=""" insert into climate.noaa_global_temp_sites (station, latitude, longitude, elevation, name) values (%s, %s, %s, %s, %s) """, data=(station, latitude, longitude, elevation, name), commit=True, ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) insert += 1 except UniqueViolation: unique_key_violation += 1 except InvalidTextRepresentation as e: print(e) print( f'STATION INSERT RESULT: inserted {insert} records | {unique_key_violation} duplicates' )
def get_backup_flow(flow_name: str = None) -> Flow: """ Get a backup flow to upload the MyFitnessPaw database to a dropbox location. Args: - flow_name (str, optional): An optional name to be applied to the flow Returns: - prefect.Flow: The created Prefect flow ready to be run """ flow_name = flow_name or "MyFitnessPaw DB Backup" with Flow(flow_name) as backup_flow: dbx_mfp_dir = prefect.config.myfitnesspaw.backup.dbx_backup_dir dbx_token = PrefectSecret("MYFITNESSPAW_DROPBOX_ACCESS_TOKEN") backup_result = tasks.make_dropbox_backup(dbx_token, dbx_mfp_dir) # noqa avail_backups = tasks.dbx_list_available_backups( dbx_token, dbx_mfp_dir) res = tasks.apply_backup_rotation_scheme( # noqa dbx_token, dbx_mfp_dir, avail_backups) return backup_flow
def flow_run(): """ Execute a flow run in the context of a backend API. """ flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "run_config": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) # Set the `running_with_backend` context variable to enable logging with prefect.context(running_with_backend=True): try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(flow_data.name) with prefect.context(secrets=secrets): if flow_data.run_config is not None: runner_cls = get_default_flow_runner_class() runner_cls(flow=flow).run() else: environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) client.write_run_logs( dict( flow_run_id=flow_run_id, # type: ignore name="execute flow-run", message=msg, level="ERROR", )) click.echo(str(exc)) raise exc
def test_secret_name_set_at_runtime(self): secret = PrefectSecret() with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict(foo="bar")): assert secret.run(name="foo") == "bar"
def test_secret_raises_if_no_name_provided(self): secret = PrefectSecret() with set_temporary_config({"cloud.use_local_secrets": True}): with pytest.raises(ValueError, match="secret name must be provided"): secret.run()
def insert_records(filename):#list_of_tuples: list):#, waiting_for): with open(filename) as read_obj: csv_reader = reader(read_obj) # Get all rows of csv from csv_reader object as list of tuples list_of_tuples = list(map(tuple, csv_reader)) #insert = 0 if not list_of_tuples: return unique_key_violation = 0 new_list = [] for row in list_of_tuples[1:]: # print(row) date=row[1] station=row[0] latitude=row[2]# if row[2] != '' else None longitude=row[3]# if row[3] != '' else None elevation=row[4]# if row[4] != '' else None temp=row[6] temp_attributes=row[7] dewp=row[8] dewp_attributes=row[9] slp=row[10] slp_attributes=row[11] stp=row[12] stp_attributes=row[13] visib=row[14] visib_attributes=row[15] wdsp=row[16] wdsp_attributes=row[17] mxspd=row[18] gust=row[19] max_v=row[20] max_attributes=row[21] min_v=row[22] min_attributes=row[23] prcp=row[24] prcp_attributes=row[25] sndp=row[26] frshtt=row[27] name=row[5] new_tuple = (date, station, latitude, longitude, elevation, temp, temp_attributes, dewp, dewp_attributes, slp, slp_attributes, stp, stp_attributes, visib, visib_attributes, wdsp, wdsp_attributes, mxspd, gust, max_v, max_attributes, min_v, min_attributes, prcp, prcp_attributes, sndp, frshtt, name) new_list.append(new_tuple) insert = 0 try: PostgresExecuteMany( db_name=local_config.DB_NAME, #'climatedb', user=local_config.DB_USER, #'postgres', host=local_config.DB_HOST, #'192.168.86.32', port=local_config.DB_PORT, #5432, query=""" insert into climate.noaa_global_daily_temps (date, station, latitude, longitude, elevation, temp, temp_attributes, dewp, dewp_attributes, slp, slp_attributes, stp, stp_attributes, visib, visib_attributes, wdsp, wdsp_attributes, mxspd, gust, max, max_attributes, min, min_attributes, prcp, prcp_attributes, sndp, frshtt, name) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, data=new_list, #(date, station, temp, temp_attributes, dewp, dewp_attributes, slp, slp_attributes, # stp, stp_attributes, visib, visib_attributes, wdsp, wdsp_attributes, mxspd, gust, # max_v, max_attributes, min_v, min_attributes, prcp, prcp_attributes, sndp, frshtt), commit=True, ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) insert = len(new_list) # insert += 1 except UniqueViolation: unique_key_violation += 1 try: csv_filename = station + '.csv' PostgresExecute( db_name=local_config.DB_NAME, #'climatedb', user=local_config.DB_USER, #'postgres', host=local_config.DB_HOST, #'192.168.86.32', port=local_config.DB_PORT, #5432, query=""" insert into climate.csv_checker (station, date_create, date_update, year) values (%s, CURRENT_DATE, CURRENT_DATE, %s) """, data=(csv_filename, date[0:4]), commit=True, ).run(password=PrefectSecret('NOAA_LOCAL_DB').run()) except UniqueViolation: pass print(f'RECORD INSERT RESULT: inserted {insert} records | {unique_key_violation} duplicates')
dtypes_dir = path.join(data_dir, "dtypes") external_dir = path.join(data_dir, "external") interim_dir = path.join(data_dir, "interim") processed_dir = path.join(data_dir, "processed") ber_publicsearch_filename = "BERPublicsearch" cso_gas_filename = "cso_gas_2019" dublin_postcode_geometries_filename = "dublin_postcodes" small_area_statistics_filename = "small_area_statistics_2016" small_area_glossary_filename = "small_area_glossary_2016" small_area_geometries_filename = "small_area_geometries_2016" # Get Prefect secrets # ------------------- email_address = PrefectSecret("email_address") # Setup Download Tasks # -------------------- download_sa_statistics = Download( name="Download Small Area Statistics", url="https://www.cso.ie/en/media/csoie/census/census2016/census2016boundaryfiles/SAPS2016_SA2017.csv", dirpath=external_dir, filename=f"{small_area_statistics_filename}.zip", ) download_sa_glossary = Download( name="Download Small Area Glossary", url="https://www.cso.ie/en/media/csoie/census/census2016/census2016boundaryfiles/SAPS_2016_Glossary.xlsx", dirpath=external_dir, filename=f"{small_area_glossary_filename}.xlsx", )
def test_secret_raises_if_doesnt_exist(self): secret = PrefectSecret(name="test") with set_temporary_config({"cloud.use_local_secrets": True}): with pytest.raises(ValueError, match="not found"): secret.run()