Ejemplo n.º 1
0
    def test_local_handler_writes_and_writes_to_dir(self, tmp_dir, res):
        handler = LocalResultHandler(dir=tmp_dir)
        fpath = handler.write(res)
        assert isinstance(fpath, str)
        assert os.path.basename(fpath).startswith("prefect")

        with open(fpath, "rb") as f:
            val = f.read()
        assert isinstance(val, bytes)
Ejemplo n.º 2
0
 def test_serialize_local_result_handler_with_dir(self):
     root_dir = os.path.abspath(os.sep)
     serialized = ResultHandlerSchema().dump(
         LocalResultHandler(dir=root_dir))
     assert isinstance(serialized, dict)
     assert serialized["type"] == "LocalResultHandler"
     assert serialized["dir"] == root_dir
Ejemplo n.º 3
0
 def test_deserialize_local_result_handler(self, dir):
     schema = ResultHandlerSchema()
     obj = schema.load(schema.dump(LocalResultHandler(dir=dir)))
     assert isinstance(obj, LocalResultHandler)
     assert hasattr(obj, "logger")
     assert obj.logger.name == "prefect.LocalResultHandler"
     assert obj.dir == dir
Ejemplo n.º 4
0
def test_task_runner_validates_cached_state_inputs_if_task_has_caching(client):
    @prefect.task(
        cache_for=datetime.timedelta(minutes=1),
        cache_validator=all_inputs,
        result_handler=JSONResultHandler(),
    )
    def cached_task(x):
        return 42

    dull_state = Cached(
        cached_result_expiration=datetime.datetime.utcnow() +
        datetime.timedelta(minutes=2),
        result=Result(-1, JSONResultHandler()),
    )
    state = Cached(
        cached_result_expiration=datetime.datetime.utcnow() +
        datetime.timedelta(minutes=2),
        result=Result(99, JSONResultHandler()),
        cached_inputs={
            "x": SafeResult("2", result_handler=JSONResultHandler())
        },
    )
    client.get_latest_cached_states = MagicMock(
        return_value=[dull_state, state])

    res = CloudTaskRunner(task=cached_task).check_task_is_cached(
        Pending(),
        inputs={"x": Result(2, result_handler=LocalResultHandler())})
    assert client.get_latest_cached_states.called
    assert res.is_successful()
    assert res.is_cached()
    assert res.result == 99
Ejemplo n.º 5
0
    def __init__(self, directory: str = None, validate: bool = True) -> None:
        directory = directory or os.path.join(prefect.config.home_dir, "flows")
        self.flows = dict()  # type: Dict[str, str]

        if validate:
            abs_directory = os.path.abspath(os.path.expanduser(directory))
            if not os.path.exists(abs_directory):
                os.makedirs(abs_directory)
        else:
            abs_directory = directory

        self.directory = abs_directory
        result_handler = LocalResultHandler(self.directory, validate=validate)
        super().__init__(result_handler=result_handler)
Ejemplo n.º 6
0
 def test_local_handler_is_pickleable(self):
     handler = LocalResultHandler(dir="root")
     new = cloudpickle.loads(cloudpickle.dumps(handler))
     assert isinstance(new, LocalResultHandler)
Ejemplo n.º 7
0
    def test_to_result_returns_self_for_no_results(self):
        assert NoResult.to_result() is NoResult

    def test_to_result_returns_hydrated_result_for_safe(self):
        s = SafeResult("3", result_handler=JSONResultHandler())
        res = s.to_result()
        assert isinstance(res, Result)
        assert res.value == 3
        assert res.safe_value is s
        assert res.result_handler is s.result_handler


@pytest.mark.parametrize(
    "obj",
    [
        Result(3),
        Result(object, result_handler=LocalResultHandler()),
        NoResult,
        SafeResult("3", result_handler=JSONResultHandler()),
    ],
)
def test_everything_is_pickleable_after_init(obj):
    assert cloudpickle.loads(cloudpickle.dumps(obj)) == obj


def test_results_are_pickleable_with_their_safe_values():
    res = Result(3, result_handler=JSONResultHandler())
    res.store_safe_value()
    assert cloudpickle.loads(cloudpickle.dumps(res)) == res
Ejemplo n.º 8
0
from prefect import Flow, task
from prefect.engine.result_handlers import LocalResultHandler

@task
def result_here():
    return "result"

@task
def get_it(x):
    print(x)

with Flow("test-checkpoint", result_handler=LocalResultHandler()) as f:
    r = result_here()
    get_it(r)

f.run()
# print(f.result_handler)
Ejemplo n.º 9
0
 def test_local_handler_writes_and_reads(self, tmp_dir, res):
     handler = LocalResultHandler(dir=tmp_dir)
     final = handler.read(handler.write(res))
     assert final == res
Ejemplo n.º 10
0
 def test_local_handler_initializes_with_no_args(self):
     handler = LocalResultHandler()
Ejemplo n.º 11
0
 def test_local_handler_initializes_with_dir(self):
     handler = LocalResultHandler(dir="/.prefect")
     assert handler.dir == "/.prefect"
Ejemplo n.º 12
0
from prefect import task, Flow
from prefect.engine.result_handlers import LocalResultHandler


@task(checkpoint=True, result_handler=LocalResultHandler(dir="~/.prefect"))
def print_df():
    return 'hello'


with Flow('test checkpoint') as flow:
    f = print_df()

flow.run()
def test_basic_conversion_local_result(tmpdir):
    result_handler = LocalResultHandler(dir=str(tmpdir))
    result = ResultHandlerResult.from_result_handler(result_handler)
    assert isinstance(result, LocalResult)
    assert result.dir == str(tmpdir)
Ejemplo n.º 14
0
 def test_safe_results_with_different_handlers_are_not_same(self):
     r = SafeResult("3", result_handler=JSONResultHandler())
     s = SafeResult("3", result_handler=LocalResultHandler())
     assert r != s
Ejemplo n.º 15
0
# VBO data
VBO_FILE = CONFIG.path.root / CONFIG.path.bag / (f"9999VBO{BAG_VERSION}" + ".zip")


@task
def create_xml_list(zip_file):
    """
    Creates list of xml files from nested_zipfile which is in main BAG zipfile.
    """
    with zipfile.ZipFile(zip_file) as z:
        return [f for f in z.namelist() if f.endswith(".xml")]


## TO DO: use results handler
@task(checkpoint=True, result_handler=LocalResultHandler(dir=NUM_TMP_DIR.as_posix()))
def parse_num(xml_file, tmp_dir=NUM_TMP_DIR):
    """Parse xml file in BAG NUM zip archive.

    Args:
        - xml_file: str of XML file to be processed in NUM zip archive

    Returns:
        - Path-object to ndjson file

    """

    def remove_ns_keys(dict_, root):
        """Removes keys containing namespaces."""
        keys = list(dict_[root].keys())
        for key in keys:
Ejemplo n.º 16
0
import copy
from datetime import timedelta
from typing import Union, Dict
import prefect
from prefect.engine import cache_validators
from prefect import task, Flow, Parameter
from prefect.engine.result_handlers import LocalResultHandler


@task(
    result_handler=LocalResultHandler(),
    cache_for=timedelta(seconds=60),
    log_stdout=True,
)
def load_data() -> Dict:
    data = {"col_1": [3, 2, 1, 0], "col_2": ["a", "b", "c", "d"]}
    print(data)
    return data


@task(
    result_handler=LocalResultHandler(),
    cache_for=timedelta(seconds=60),
    log_stdout=True,
    # cache_key="testme"
)
def xform_data(data: Dict) -> Dict:
    xformed = copy.deepcopy(data)
    xformed["col_1"] = [v * 2 for v in xformed["col_1"]]
    print(xformed)
    return xformed
Ejemplo n.º 17
0
        'n_tags': n_tags,
        'primary_category': primary_cat,
        'categories': categories,
        'author': author,
        'authors': authors,
        'n_authors': n_authors,
        'url_pdf': url_pdf,
        'url_href': url_href,
        'date': post['published']
    })


# Checkpointing
# @task
@task(checkpoint=False,
      result_handler=LocalResultHandler(dir="~/.prefect/ds-arxiv"),
      state_handlers=[slack_handler])
def df_get_arxiv(
    arx_list,
    arx_dict,
):
    """Loop all the arx_list categories and combine into one"""
    df = pd.DataFrame()
    for cat in arx_list:
        posts = feedparser.parse(arxiv_query(cat))['items']
        for post in posts:
            df = df.append(parse_arxiv_post(post, arx_dict))

    # Pares date
    df['date'] = pd.to_datetime(df['date']).dt.date.astype('str')
    return df
Ejemplo n.º 18
0
from prefect.engine import signals
# there is a result handler for Azure, S3
# the following will just write to local file
from prefect.engine.result_handlers import LocalResultHandler


# signature has to be like so
def alert_failed(obj, old_state, new_state):
    if new_state.is_failed():
        print("failed")


## extract, with cache so it will not hit consumer finance.gov for one day
## stores in memory
# @task(cache_for=datetime.timedelta(days=1), state_handlers=[alert_failed], result_handler=LocalResultHandler())
@task(state_handlers=[alert_failed], result_handler=LocalResultHandler())
def get_complaint_data():
    r = requests.get(
        "https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/",
        params={'size': 10})
    response_json = json.loads(r.text)
    print("I actually requested this time ")
    return response_json['hits']['hits']


## transform
@task
def parse_complaint_data(raw, state_handlers=[alert_failed]):
    # raise

    complaints = []
Ejemplo n.º 19
0
from prefect.engine import signals
from prefect.engine.result_handlers import local_result_handler, LocalResultHandler
import prefect

def alert_failed(obj, old_state, new_sate):
    if new_sate.is_failed():
        print("New State or Flow is Failed!!")

##setup
create_table = SQLiteScript(
    db='cfpbcomplaints.db',
    script='CREATE TABLE IF NOT EXISTS complaint (timestamp TEXT, state TEXT, product TEXT, company TEXT, complaint_what_happened TEXT)'
)

## extract
@task(cache_for=datetime.timedelta(days=1), state_handlers=[alert_failed], result_handler=LocalResultHandler())
def get_complaint_data():
    r = requests.get("https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/", params={'size':10})
    response_json = json.loads(r.text)
    logger = prefect.context.get('logger')
    logger.info("Actually I requested this time")
    return response_json['hits']['hits']

## transform
@task(state_handlers=[alert_failed])
def parse_complaint_data(raw):
    # uncomment below line to see functionality of state handler
    # raise Exception
    # uncomment below line to see functionality of signals
    # raise signals.SUCCESS
    complaints = []
Ejemplo n.º 20
0
                      parameter_defaults=dict(
                          url='http://www.insidethex.co.uk/')),
        ]),
        storage=Docker(
            # TODO: change to your docker registry:
            #  https://docs.prefect.io/cloud/recipes/configuring_storage.html
            registry_url='szelenka',
            # TODO: 'pin' the exact versions you used on your development machine
            python_dependencies=[
                'requests==2.23.0', 'beautifulsoup4==4.8.2',
                'sqlalchemy==1.3.15'
            ],
        ),
        # TODO: specify how you want to handle results
        #  https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers
        result_handler=LocalResultHandler()) as flow:
    _url = Parameter("url", default='http://www.insidethex.co.uk/')
    _bypass = Parameter("bypass", default=False, required=False)
    _db_file = Parameter("db_file", default='xfiles_db.sqlite', required=False)

    # scrape the website
    _home_page = retrieve_url(_url)
    _episodes = create_episode_list(base_url=_url,
                                    main_html=_home_page,
                                    bypass=_bypass)
    _episode = retrieve_url.map(_episodes)
    _dialogue = scrape_dialogue.map(_episode)

    # insert into SQLite table
    _db = create_db(filename=_db_file)
    _final = insert_episode.map(episode=_dialogue, tbl=unmapped(_db))
Ejemplo n.º 21
0
 def test_serialize_local_result_handler_with_dir(self):
     serialized = ResultHandlerSchema().dump(
         LocalResultHandler(dir="/root/prefect"))
     assert isinstance(serialized, dict)
     assert serialized["type"] == "LocalResultHandler"
     assert serialized["dir"] == "/root/prefect"
Ejemplo n.º 22
0
 def test_local_handler_initializes_with_no_args(self):
     handler = LocalResultHandler()
     assert handler.dir == os.path.join(prefect.config.home_dir, "results")
Ejemplo n.º 23
0
 def test_serialize_local_result_handler_with_no_dir(self):
     serialized = ResultHandlerSchema().dump(LocalResultHandler())
     assert isinstance(serialized, dict)
     assert serialized["type"] == "LocalResultHandler"
     assert serialized["dir"] is None
Ejemplo n.º 24
0
 def test_local_handler_initializes_with_dir(self):
     root_dir = os.path.abspath(os.sep)
     handler = LocalResultHandler(dir=root_dir)
     assert handler.dir == root_dir
Ejemplo n.º 25
0
 def test_create_flow_with_result_handler(self):
     f = Flow(name="test", result_handler=LocalResultHandler())
     assert isinstance(f.result_handler, ResultHandler)
     assert isinstance(f.result_handler, LocalResultHandler)
Ejemplo n.º 26
0
 def test_local_handler_cleverly_redirects_prefect_defaults(self):
     handler = LocalResultHandler(dir=prefect.config.home_dir)
     assert handler.dir == os.path.join(prefect.config.home_dir, "results")