def test_local_handler_writes_and_writes_to_dir(self, tmp_dir, res): handler = LocalResultHandler(dir=tmp_dir) fpath = handler.write(res) assert isinstance(fpath, str) assert os.path.basename(fpath).startswith("prefect") with open(fpath, "rb") as f: val = f.read() assert isinstance(val, bytes)
def test_serialize_local_result_handler_with_dir(self): root_dir = os.path.abspath(os.sep) serialized = ResultHandlerSchema().dump( LocalResultHandler(dir=root_dir)) assert isinstance(serialized, dict) assert serialized["type"] == "LocalResultHandler" assert serialized["dir"] == root_dir
def test_deserialize_local_result_handler(self, dir): schema = ResultHandlerSchema() obj = schema.load(schema.dump(LocalResultHandler(dir=dir))) assert isinstance(obj, LocalResultHandler) assert hasattr(obj, "logger") assert obj.logger.name == "prefect.LocalResultHandler" assert obj.dir == dir
def test_task_runner_validates_cached_state_inputs_if_task_has_caching(client): @prefect.task( cache_for=datetime.timedelta(minutes=1), cache_validator=all_inputs, result_handler=JSONResultHandler(), ) def cached_task(x): return 42 dull_state = Cached( cached_result_expiration=datetime.datetime.utcnow() + datetime.timedelta(minutes=2), result=Result(-1, JSONResultHandler()), ) state = Cached( cached_result_expiration=datetime.datetime.utcnow() + datetime.timedelta(minutes=2), result=Result(99, JSONResultHandler()), cached_inputs={ "x": SafeResult("2", result_handler=JSONResultHandler()) }, ) client.get_latest_cached_states = MagicMock( return_value=[dull_state, state]) res = CloudTaskRunner(task=cached_task).check_task_is_cached( Pending(), inputs={"x": Result(2, result_handler=LocalResultHandler())}) assert client.get_latest_cached_states.called assert res.is_successful() assert res.is_cached() assert res.result == 99
def __init__(self, directory: str = None, validate: bool = True) -> None: directory = directory or os.path.join(prefect.config.home_dir, "flows") self.flows = dict() # type: Dict[str, str] if validate: abs_directory = os.path.abspath(os.path.expanduser(directory)) if not os.path.exists(abs_directory): os.makedirs(abs_directory) else: abs_directory = directory self.directory = abs_directory result_handler = LocalResultHandler(self.directory, validate=validate) super().__init__(result_handler=result_handler)
def test_local_handler_is_pickleable(self): handler = LocalResultHandler(dir="root") new = cloudpickle.loads(cloudpickle.dumps(handler)) assert isinstance(new, LocalResultHandler)
def test_to_result_returns_self_for_no_results(self): assert NoResult.to_result() is NoResult def test_to_result_returns_hydrated_result_for_safe(self): s = SafeResult("3", result_handler=JSONResultHandler()) res = s.to_result() assert isinstance(res, Result) assert res.value == 3 assert res.safe_value is s assert res.result_handler is s.result_handler @pytest.mark.parametrize( "obj", [ Result(3), Result(object, result_handler=LocalResultHandler()), NoResult, SafeResult("3", result_handler=JSONResultHandler()), ], ) def test_everything_is_pickleable_after_init(obj): assert cloudpickle.loads(cloudpickle.dumps(obj)) == obj def test_results_are_pickleable_with_their_safe_values(): res = Result(3, result_handler=JSONResultHandler()) res.store_safe_value() assert cloudpickle.loads(cloudpickle.dumps(res)) == res
from prefect import Flow, task from prefect.engine.result_handlers import LocalResultHandler @task def result_here(): return "result" @task def get_it(x): print(x) with Flow("test-checkpoint", result_handler=LocalResultHandler()) as f: r = result_here() get_it(r) f.run() # print(f.result_handler)
def test_local_handler_writes_and_reads(self, tmp_dir, res): handler = LocalResultHandler(dir=tmp_dir) final = handler.read(handler.write(res)) assert final == res
def test_local_handler_initializes_with_no_args(self): handler = LocalResultHandler()
def test_local_handler_initializes_with_dir(self): handler = LocalResultHandler(dir="/.prefect") assert handler.dir == "/.prefect"
from prefect import task, Flow from prefect.engine.result_handlers import LocalResultHandler @task(checkpoint=True, result_handler=LocalResultHandler(dir="~/.prefect")) def print_df(): return 'hello' with Flow('test checkpoint') as flow: f = print_df() flow.run()
def test_basic_conversion_local_result(tmpdir): result_handler = LocalResultHandler(dir=str(tmpdir)) result = ResultHandlerResult.from_result_handler(result_handler) assert isinstance(result, LocalResult) assert result.dir == str(tmpdir)
def test_safe_results_with_different_handlers_are_not_same(self): r = SafeResult("3", result_handler=JSONResultHandler()) s = SafeResult("3", result_handler=LocalResultHandler()) assert r != s
# VBO data VBO_FILE = CONFIG.path.root / CONFIG.path.bag / (f"9999VBO{BAG_VERSION}" + ".zip") @task def create_xml_list(zip_file): """ Creates list of xml files from nested_zipfile which is in main BAG zipfile. """ with zipfile.ZipFile(zip_file) as z: return [f for f in z.namelist() if f.endswith(".xml")] ## TO DO: use results handler @task(checkpoint=True, result_handler=LocalResultHandler(dir=NUM_TMP_DIR.as_posix())) def parse_num(xml_file, tmp_dir=NUM_TMP_DIR): """Parse xml file in BAG NUM zip archive. Args: - xml_file: str of XML file to be processed in NUM zip archive Returns: - Path-object to ndjson file """ def remove_ns_keys(dict_, root): """Removes keys containing namespaces.""" keys = list(dict_[root].keys()) for key in keys:
import copy from datetime import timedelta from typing import Union, Dict import prefect from prefect.engine import cache_validators from prefect import task, Flow, Parameter from prefect.engine.result_handlers import LocalResultHandler @task( result_handler=LocalResultHandler(), cache_for=timedelta(seconds=60), log_stdout=True, ) def load_data() -> Dict: data = {"col_1": [3, 2, 1, 0], "col_2": ["a", "b", "c", "d"]} print(data) return data @task( result_handler=LocalResultHandler(), cache_for=timedelta(seconds=60), log_stdout=True, # cache_key="testme" ) def xform_data(data: Dict) -> Dict: xformed = copy.deepcopy(data) xformed["col_1"] = [v * 2 for v in xformed["col_1"]] print(xformed) return xformed
'n_tags': n_tags, 'primary_category': primary_cat, 'categories': categories, 'author': author, 'authors': authors, 'n_authors': n_authors, 'url_pdf': url_pdf, 'url_href': url_href, 'date': post['published'] }) # Checkpointing # @task @task(checkpoint=False, result_handler=LocalResultHandler(dir="~/.prefect/ds-arxiv"), state_handlers=[slack_handler]) def df_get_arxiv( arx_list, arx_dict, ): """Loop all the arx_list categories and combine into one""" df = pd.DataFrame() for cat in arx_list: posts = feedparser.parse(arxiv_query(cat))['items'] for post in posts: df = df.append(parse_arxiv_post(post, arx_dict)) # Pares date df['date'] = pd.to_datetime(df['date']).dt.date.astype('str') return df
from prefect.engine import signals # there is a result handler for Azure, S3 # the following will just write to local file from prefect.engine.result_handlers import LocalResultHandler # signature has to be like so def alert_failed(obj, old_state, new_state): if new_state.is_failed(): print("failed") ## extract, with cache so it will not hit consumer finance.gov for one day ## stores in memory # @task(cache_for=datetime.timedelta(days=1), state_handlers=[alert_failed], result_handler=LocalResultHandler()) @task(state_handlers=[alert_failed], result_handler=LocalResultHandler()) def get_complaint_data(): r = requests.get( "https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/", params={'size': 10}) response_json = json.loads(r.text) print("I actually requested this time ") return response_json['hits']['hits'] ## transform @task def parse_complaint_data(raw, state_handlers=[alert_failed]): # raise complaints = []
from prefect.engine import signals from prefect.engine.result_handlers import local_result_handler, LocalResultHandler import prefect def alert_failed(obj, old_state, new_sate): if new_sate.is_failed(): print("New State or Flow is Failed!!") ##setup create_table = SQLiteScript( db='cfpbcomplaints.db', script='CREATE TABLE IF NOT EXISTS complaint (timestamp TEXT, state TEXT, product TEXT, company TEXT, complaint_what_happened TEXT)' ) ## extract @task(cache_for=datetime.timedelta(days=1), state_handlers=[alert_failed], result_handler=LocalResultHandler()) def get_complaint_data(): r = requests.get("https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/", params={'size':10}) response_json = json.loads(r.text) logger = prefect.context.get('logger') logger.info("Actually I requested this time") return response_json['hits']['hits'] ## transform @task(state_handlers=[alert_failed]) def parse_complaint_data(raw): # uncomment below line to see functionality of state handler # raise Exception # uncomment below line to see functionality of signals # raise signals.SUCCESS complaints = []
parameter_defaults=dict( url='http://www.insidethex.co.uk/')), ]), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=[ 'requests==2.23.0', 'beautifulsoup4==4.8.2', 'sqlalchemy==1.3.15' ], ), # TODO: specify how you want to handle results # https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers result_handler=LocalResultHandler()) as flow: _url = Parameter("url", default='http://www.insidethex.co.uk/') _bypass = Parameter("bypass", default=False, required=False) _db_file = Parameter("db_file", default='xfiles_db.sqlite', required=False) # scrape the website _home_page = retrieve_url(_url) _episodes = create_episode_list(base_url=_url, main_html=_home_page, bypass=_bypass) _episode = retrieve_url.map(_episodes) _dialogue = scrape_dialogue.map(_episode) # insert into SQLite table _db = create_db(filename=_db_file) _final = insert_episode.map(episode=_dialogue, tbl=unmapped(_db))
def test_serialize_local_result_handler_with_dir(self): serialized = ResultHandlerSchema().dump( LocalResultHandler(dir="/root/prefect")) assert isinstance(serialized, dict) assert serialized["type"] == "LocalResultHandler" assert serialized["dir"] == "/root/prefect"
def test_local_handler_initializes_with_no_args(self): handler = LocalResultHandler() assert handler.dir == os.path.join(prefect.config.home_dir, "results")
def test_serialize_local_result_handler_with_no_dir(self): serialized = ResultHandlerSchema().dump(LocalResultHandler()) assert isinstance(serialized, dict) assert serialized["type"] == "LocalResultHandler" assert serialized["dir"] is None
def test_local_handler_initializes_with_dir(self): root_dir = os.path.abspath(os.sep) handler = LocalResultHandler(dir=root_dir) assert handler.dir == root_dir
def test_create_flow_with_result_handler(self): f = Flow(name="test", result_handler=LocalResultHandler()) assert isinstance(f.result_handler, ResultHandler) assert isinstance(f.result_handler, LocalResultHandler)
def test_local_handler_cleverly_redirects_prefect_defaults(self): handler = LocalResultHandler(dir=prefect.config.home_dir) assert handler.dir == os.path.join(prefect.config.home_dir, "results")