Beispiel #1
0
 def on_modified(self, event, dry_run=False, remove_uploaded=True):
     'Called when a file (or directory) is modified. '
     super(ArchiveEventHandler, self).on_modified(event)
     src_path = event.src_path
     if event.is_directory:
         if not platform.is_darwin():
             log.info("event is a directory, safe to ignore")
             return
         # OSX is behaving erratically and we need to paper over it.
         # the OS only reports every other file event,
         # but always fires off a directory event when a file has changed. (OSX 10.9 tested)
         # so we need to find the actual file changed and then go on from there
         files = [event.src_path+"/"+f for f in os.listdir(event.src_path)]
         try:
             src_path = max(files, key=os.path.getmtime)
         except (OSError, ValueError) as e: # broken symlink or directory empty
             return
     log.info('Modified file detectd: %s', src_path)
     #
     # we're receiving events at least two times: on file open and on file close.
     # OSes might report even more
     # we're only interested in files that are closed (finished), so we try to
     # open it. if it is locked, we can infer that someone is still writing to it.
     # this works on platforms: windows, ... ?
     # TODO: investigate http://stackoverflow.com/a/3876461 for POSIX support
     try:
         open(src_path)   # win exclusively
         os.open(src_path, os.O_EXLOCK) # osx exclusively
     except IOError: # file is not finished
         log.info('File is not finished')
         return
     except AttributeError: # no suuport for O_EXLOCK (only BSD)
         pass
     return self._new(src_path, dry_run, remove_uploaded)
Beispiel #2
0
def test_recursive_off():
    mkdir(p('dir1'))
    start_watching(recursive=False)
    touch(p('dir1', 'a'))

    with pytest.raises(Empty):
        event_queue.get(timeout=5)

    mkfile(p('b'))
    expect_event(FileCreatedEvent(p('b')))
    if not platform.is_windows():
        expect_event(DirModifiedEvent(p()))

        if platform.is_linux():
            expect_event(FileClosedEvent(p('b')))

    # currently limiting these additional events to macOS only, see https://github.com/gorakhargosh/watchdog/pull/779
    if platform.is_darwin():
        mkdir(p('dir1', 'dir2'))
        with pytest.raises(Empty):
            event_queue.get(timeout=5)
        mkfile(p('dir1', 'dir2', 'somefile'))
        with pytest.raises(Empty):
            event_queue.get(timeout=5)

        mkdir(p('dir3'))
        expect_event(DirModifiedEvent(p()))  # the contents of the parent directory changed

        mv(p('dir1', 'dir2', 'somefile'), p('somefile'))
        expect_event(FileMovedEvent(p('dir1', 'dir2', 'somefile'), p('somefile')))
        expect_event(DirModifiedEvent(p()))

        mv(p('dir1', 'dir2'), p('dir2'))
        expect_event(DirMovedEvent(p('dir1', 'dir2'), p('dir2')))
        expect_event(DirModifiedEvent(p()))
def start_watching(path=None):
    path = p('') if path is None else path
    global emitter
    emitter = Emitter(event_queue, ObservedWatch(path, recursive=True))
    if platform.is_darwin():
        # FSEvents will report old evens (like create for mkdtemp in test
        # setup. Waiting for a considerable time seems to 'flush' the events.
        time.sleep(10)
    emitter.start()
Beispiel #4
0
def test_delete_self():
    mkdir(p('dir1'))
    start_watching(p('dir1'))
    rm(p('dir1'), True)

    if platform.is_darwin():
        event = event_queue.get(timeout=5)[0]
        assert event.src_path == p('dir1')
        assert isinstance(event, FileDeletedEvent)
Beispiel #5
0
def start_watching(path=None):
    path = p('') if path is None else path
    global emitter
    emitter = Emitter(event_queue, ObservedWatch(path, recursive=True))
    if platform.is_darwin():
        # FSEvents will report old evens (like create for mkdtemp in test
        # setup. Waiting for a considerable time seems to 'flush' the events.
        time.sleep(10)
    emitter.start()
def wait():
    """
    Wait long enough for file/folder mtime to change. This is needed
    to be able to detected modifications.
    """
    if platform.is_darwin() or platform.is_windows():
         # on osx resolution of stat.mtime is only 1 second
        time.sleep(1.5)
    else:
        time.sleep(0.5)
Beispiel #7
0
def wait():
    """
    Wait long enough for file/folder mtime to change. This is needed
    to be able to detected modifications.
    """
    if platform.is_darwin() or platform.is_windows():
        # on macOS resolution of stat.mtime is only 1 second
        time.sleep(1.5)
    else:
        time.sleep(0.5)
Beispiel #8
0
def _setup_emitter(path):
    event_queue = Queue()

    if platform.is_darwin():
        # FSEvents will report old evens (like create for mkdtemp in test
        # setup. Waiting for a considerable time seems to 'flush' the events.
        time.sleep(10)

    emitter = Emitter(event_queue, ObservedWatch(path, recursive=True))
    emitter.start()
    return event_queue, emitter
Beispiel #9
0
def _setup_emitter(path):
    event_queue = Queue()

    if platform.is_darwin():
        # FSEvents will report old evens (like create for mkdtemp in test
        # setup. Waiting for a considerable time seems to 'flush' the events.
        time.sleep(10)

    emitter = Emitter(event_queue, ObservedWatch(path, recursive=True))
    emitter.start()
    return event_queue, emitter
Beispiel #10
0
def start_watching(path=None, use_full_emitter=False, recursive=True):
    # todo: check if other platforms expect the trailing slash (e.g. `p('')`)
    path = p() if path is None else path
    global emitter
    if platform.is_linux() and use_full_emitter:
        emitter = InotifyFullEmitter(event_queue, ObservedWatch(path, recursive=recursive))
    else:
        emitter = Emitter(event_queue, ObservedWatch(path, recursive=recursive))

    if platform.is_darwin():
        emitter.suppress_history = True

    emitter.start()
Beispiel #11
0
def start_watching(path=None, use_full_emitter=False):
    path = p("") if path is None else path
    global emitter
    if platform.is_linux() and use_full_emitter:
        emitter = InotifyFullEmitter(event_queue,
                                     ObservedWatch(path, recursive=True))
    else:
        emitter = Emitter(event_queue, ObservedWatch(path, recursive=True))

    if platform.is_darwin():
        # FSEvents will report old evens (like create for mkdtemp in test
        # setup. Waiting for a considerable time seems to 'flush' the events.
        time.sleep(10)
    emitter.start()
Beispiel #12
0
def start_watching(path=None, use_full_emitter=False):
    path = p('') if path is None else path
    global emitter
    if platform.is_linux() and use_full_emitter:
        emitter = InotifyFullEmitter(event_queue, ObservedWatch(path, recursive=True))
    else:
        emitter = Emitter(event_queue, ObservedWatch(path, recursive=True))

    if platform.is_darwin():
        # FSEvents will report old evens (like create for mkdtemp in test
        # setup. Waiting for a considerable time seems to 'flush' the events.
        time.sleep(10)
    logger.debug('starting emitter')
    emitter.start()
Beispiel #13
0
    def _create_observer(self):
        # Default Mac OSX FSEventsObserver sometimes fails to pick up changes, so prefer
        # the KqueueObserver instead.
        # See: https://github.com/gorakhargosh/watchdog/blob/27588153a7aee849fbe36a608ee378b09a16e476/src/watchdog/observers/__init__.py
        if platform.is_darwin():
            try:
                from watchdog.observers.kqueue import KqueueObserver

                return KqueueObserver()
            except Exception:  # pylint: disable=broad-except
                warnings.warn(
                    "Failed to import KqueueObserver, falling back to default watchdog observer"
                )

        return Observer()
Beispiel #14
0
 def on_modified(self, event):
     if is_darwin():
         # on_modified event is fired off for both the directory modified and actual file
         if event.is_directory:
             modified_files = [os.path.join(event.src_path, f) for f in next(os.walk(event.src_path))[2]]
             mod_file = max(modified_files, key=os.path.getmtime)
             log.msg('Modified %s' % mod_file)
             self.msg_clients(mod_file)
         else:
             log.msg('Modified %s' % event.src_path)
             self.msg_clients(event.src_path)
     else:
         if os.path.exists(event.src_path):
             log.msg('Modified %s' % event.src_path)
             self.msg_clients(event.src_path)
Beispiel #15
0
def start_watching(path=None, use_full_emitter=False, recursive=True):
    # todo: check if other platforms expect the trailing slash (e.g. `p('')`)
    path = p() if path is None else path
    global emitter
    if platform.is_linux() and use_full_emitter:
        emitter = InotifyFullEmitter(event_queue,
                                     ObservedWatch(path, recursive=recursive))
    else:
        emitter = Emitter(event_queue, ObservedWatch(path,
                                                     recursive=recursive))

    emitter.start()

    if platform.is_darwin():
        # FSEvents _may_ report the event for the creation of `tmpdir`,
        # however, we're racing with fseventd there - if other filesystem
        # events happened _after_ `tmpdir` was created, but _before_ we
        # created the emitter then we won't get this event.
        # As such, let's create a sentinel event that tells us that we are
        # good to go.
        sentinel_file = os.path.join(
            path,
            '.sentinel' if isinstance(path, str) else '.sentinel'.encode())
        touch(sentinel_file)
        sentinel_events = [
            FileCreatedEvent(sentinel_file),
            DirModifiedEvent(path),
            FileModifiedEvent(sentinel_file)
        ]
        next_sentinel_event = sentinel_events.pop(0)
        now = time.monotonic()
        while time.monotonic() <= now + 30.0:
            try:
                event = event_queue.get(timeout=0.5)[0]
                if event == next_sentinel_event:
                    if not sentinel_events:
                        break
                    next_sentinel_event = sentinel_events.pop(0)
            except Empty:
                pass
            time.sleep(0.1)
        else:
            assert False, "Sentinel event never arrived!"
Beispiel #16
0
def test_modify():
    touch(p('a'))
    start_watching()

    touch(p('a'))

    # Because the tests run so fast then on macOS it is almost certain that
    # we receive a coalesced event from fseventsd here, which triggers an
    # additional file created event and dir modified event here.
    if platform.is_darwin():
        expect_event(FileCreatedEvent(p('a')))
        expect_event(DirModifiedEvent(p()))

    expect_event(FileModifiedEvent(p('a')))

    if platform.is_linux():
        event = event_queue.get(timeout=5)[0]
        assert event.src_path == p('a')
        assert isinstance(event, FileClosedEvent)
Beispiel #17
0
    FileCreatedEvent,
    FileMovedEvent,
    DirDeletedEvent,
    DirModifiedEvent,
    DirCreatedEvent,
    DirMovedEvent,
    FileClosedEvent,
)
from watchdog.observers.api import ObservedWatch

if platform.is_linux():
    from watchdog.observers.inotify import (
        InotifyEmitter as Emitter,
        InotifyFullEmitter,
    )
elif platform.is_darwin():
    from watchdog.observers.fsevents import FSEventsEmitter as Emitter
elif platform.is_windows():
    from watchdog.observers.read_directory_changes import (WindowsApiEmitter as
                                                           Emitter)
elif platform.is_bsd():
    from watchdog.observers.kqueue import (KqueueEmitter as Emitter)

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

if platform.is_darwin():
    # enable more verbose logs
    fsevents_logger = logging.getLogger("fsevents")
    fsevents_logger.setLevel(logging.DEBUG)
Beispiel #18
0
from watchdog.utils.dirsnapshot import DirectorySnapshot

from watchdog.events import (DirMovedEvent, DirDeletedEvent, DirCreatedEvent,
                             DirModifiedEvent, FileMovedEvent,
                             FileDeletedEvent, FileCreatedEvent,
                             FileModifiedEvent, EVENT_TYPE_MOVED,
                             EVENT_TYPE_DELETED, EVENT_TYPE_CREATED)

# Maximum number of events to process.
MAX_EVENTS = 4096

# O_EVTONLY value from the header files for OS X only.
O_EVTONLY = 0x8000

# Pre-calculated values for the kevent filter, flags, and fflags attributes.
if platform.is_darwin():
    WATCHDOG_OS_OPEN_FLAGS = O_EVTONLY
else:
    WATCHDOG_OS_OPEN_FLAGS = os.O_RDONLY | os.O_NONBLOCK
WATCHDOG_KQ_FILTER = select.KQ_FILTER_VNODE
WATCHDOG_KQ_EV_FLAGS = select.KQ_EV_ADD | select.KQ_EV_ENABLE | select.KQ_EV_CLEAR
WATCHDOG_KQ_FFLAGS = (select.KQ_NOTE_DELETE | select.KQ_NOTE_WRITE
                      | select.KQ_NOTE_EXTEND | select.KQ_NOTE_ATTRIB
                      | select.KQ_NOTE_LINK | select.KQ_NOTE_RENAME
                      | select.KQ_NOTE_REVOKE)

# Flag tests.


def is_deleted(kev):
    """Determines whether the given kevent represents deletion."""
Beispiel #19
0
class TestEventLogStorage:
    """
    You can extend this class to easily run these set of tests on any event log storage. When extending,
    you simply need to override the `event_log_storage` fixture and return your implementation of
    `EventLogStorage`.

    For example:

    ```
    class TestMyStorageImplementation(TestEventLogStorage):
        __test__ = True

        @pytest.fixture(scope='function', name='storage')
        def event_log_storage(self):  # pylint: disable=arguments-differ
            return MyStorageImplementation()
    ```
    """

    __test__ = False

    @pytest.fixture(name="storage", params=[])
    def event_log_storage(self, request):
        with request.param() as s:
            yield s

    def test_init_log_storage(self, storage):
        if isinstance(storage, InMemoryEventLogStorage):
            assert not storage.is_persistent
        else:
            assert storage.is_persistent

    def test_log_storage_run_not_found(self, storage):
        assert storage.get_logs_for_run("bar") == []

    def can_wipe(self):
        # Whether the storage is allowed to wipe the event log
        return True

    def can_watch(self):
        # whether the storage is allowed to subscribe to runs
        # for event log updates
        return True

    def test_event_log_storage_store_events_and_wipe(self, storage):
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
        storage.store_event(
            EventRecord(
                None,
                "Message2",
                "debug",
                "",
                DEFAULT_RUN_ID,
                time.time(),
                dagster_event=DagsterEvent(
                    DagsterEventType.ENGINE_EVENT.value,
                    "nonce",
                    event_specific_data=EngineEventData.in_process(999),
                ),
            ))
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 1
        assert storage.get_stats_for_run(DEFAULT_RUN_ID)

        if self.can_wipe():
            storage.wipe()
            assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0

    def test_event_log_storage_store_with_multiple_runs(self, storage):
        runs = ["foo", "bar", "baz"]
        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 0
            storage.store_event(
                EventRecord(
                    None,
                    "Message2",
                    "debug",
                    "",
                    run_id,
                    time.time(),
                    dagster_event=DagsterEvent(
                        DagsterEventType.STEP_SUCCESS.value,
                        "nonce",
                        event_specific_data=StepSuccessData(duration_ms=100.0),
                    ),
                ))

        for run_id in runs:
            assert len(storage.get_logs_for_run(run_id)) == 1
            assert storage.get_stats_for_run(run_id).steps_succeeded == 1

        if self.can_wipe():
            storage.wipe()
            for run_id in runs:
                assert len(storage.get_logs_for_run(run_id)) == 0

    @pytest.mark.skipif(
        platform.is_darwin(),
        reason=
        "watchdog's default MacOSX FSEventsObserver sometimes fails to pick up changes",
    )
    def test_event_log_storage_watch(self, storage):
        if self.can_watch():
            pytest.skip("storage cannot watch runs")

        watched = []
        watcher = lambda x: watched.append(x)  # pylint: disable=unnecessary-lambda

        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0

        storage.store_event(create_test_event_log_record(str(1)))
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 1
        assert len(watched) == 0

        storage.watch(DEFAULT_RUN_ID, 0, watcher)

        storage.store_event(create_test_event_log_record(str(2)))
        storage.store_event(create_test_event_log_record(str(3)))
        storage.store_event(create_test_event_log_record(str(4)))

        attempts = 10
        while len(watched) < 3 and attempts > 0:
            time.sleep(0.5)
            attempts -= 1
        assert len(watched) == 3

        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 4

        storage.end_watch(DEFAULT_RUN_ID, watcher)
        time.sleep(
            0.3
        )  # this value scientifically selected from a range of attractive values
        storage.store_event(create_test_event_log_record(str(5)))

        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 5
        assert len(watched) == 3

        storage.delete_events(DEFAULT_RUN_ID)

        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
        assert len(watched) == 3

        assert [int(evt.message) for evt in watched] == [2, 3, 4]

    def test_event_log_storage_pagination(self, storage):
        storage.store_event(create_test_event_log_record(str(0)))
        storage.store_event(create_test_event_log_record(str(1)))
        storage.store_event(create_test_event_log_record(str(2)))

        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 3
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID, -1)) == 3
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID, 0)) == 2
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID, 1)) == 1
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID, 2)) == 0

    def test_event_log_delete(self, storage):
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
        storage.store_event(create_test_event_log_record(str(0)))
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 1
        assert storage.get_stats_for_run(DEFAULT_RUN_ID)
        storage.delete_events(DEFAULT_RUN_ID)
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0

    def test_event_log_get_stats_without_start_and_success(self, storage):
        # When an event log doesn't have a PIPELINE_START or PIPELINE_SUCCESS | PIPELINE_FAILURE event,
        # we want to ensure storage.get_stats_for_run(...) doesn't throw an error.
        assert len(storage.get_logs_for_run(DEFAULT_RUN_ID)) == 0
        assert storage.get_stats_for_run(DEFAULT_RUN_ID)

    def test_event_log_get_stats_for_run(self, storage):
        import math

        enqueued_time = time.time()
        launched_time = enqueued_time + 20
        start_time = launched_time + 50
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                DEFAULT_RUN_ID,
                enqueued_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_ENQUEUED.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                DEFAULT_RUN_ID,
                launched_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_STARTING.value,
                    "nonce",
                ),
            ))
        storage.store_event(
            EventRecord(
                None,
                "message",
                "debug",
                "",
                DEFAULT_RUN_ID,
                start_time,
                dagster_event=DagsterEvent(
                    DagsterEventType.PIPELINE_START.value,
                    "nonce",
                ),
            ))
        assert math.isclose(
            storage.get_stats_for_run(DEFAULT_RUN_ID).enqueued_time,
            enqueued_time)
        assert math.isclose(
            storage.get_stats_for_run(DEFAULT_RUN_ID).launch_time,
            launched_time)
        assert math.isclose(
            storage.get_stats_for_run(DEFAULT_RUN_ID).start_time, start_time)

    def test_event_log_step_stats(self, storage):
        # When an event log doesn't have a PIPELINE_START or PIPELINE_SUCCESS | PIPELINE_FAILURE event,
        # we want to ensure storage.get_stats_for_run(...) doesn't throw an error.

        for record in _stats_records(run_id=DEFAULT_RUN_ID):
            storage.store_event(record)

        step_stats = storage.get_step_stats_for_run(DEFAULT_RUN_ID)
        assert len(step_stats) == 4

        a_stats = [stats for stats in step_stats if stats.step_key == "A"][0]
        assert a_stats.step_key == "A"
        assert a_stats.status.value == "SUCCESS"
        assert a_stats.end_time - a_stats.start_time == 100

        b_stats = [stats for stats in step_stats if stats.step_key == "B"][0]
        assert b_stats.step_key == "B"
        assert b_stats.status.value == "FAILURE"
        assert b_stats.end_time - b_stats.start_time == 50

        c_stats = [stats for stats in step_stats if stats.step_key == "C"][0]
        assert c_stats.step_key == "C"
        assert c_stats.status.value == "SKIPPED"
        assert c_stats.end_time - c_stats.start_time == 25

        d_stats = [stats for stats in step_stats if stats.step_key == "D"][0]
        assert d_stats.step_key == "D"
        assert d_stats.status.value == "SUCCESS"
        assert d_stats.end_time - d_stats.start_time == 150
        assert len(d_stats.materializations) == 3
        assert len(d_stats.expectation_results) == 2

    def test_secondary_index(self, storage):
        if not isinstance(storage, SqlEventLogStorage):
            pytest.skip("This test is for SQL-backed Event Log behavior")

        # test that newly initialized DBs will have the secondary indexes built
        for name in REINDEX_DATA_MIGRATIONS.keys():
            assert storage.has_secondary_index(name)

        # test the generic API with garbage migration names
        assert not storage.has_secondary_index("_A")
        assert not storage.has_secondary_index("_B")
        storage.enable_secondary_index("_A")
        assert storage.has_secondary_index("_A")
        assert not storage.has_secondary_index("_B")
        storage.enable_secondary_index("_B")
        assert storage.has_secondary_index("_A")
        assert storage.has_secondary_index("_B")

    def test_basic_event_store(self, storage):
        if not isinstance(storage, SqlEventLogStorage):
            pytest.skip("This test is for SQL-backed Event Log behavior")

        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events, _result = _synthesize_events(_solids, run_id=DEFAULT_RUN_ID)

        for event in events:
            storage.store_event(event)

        rows = _fetch_all_events(storage, run_id=DEFAULT_RUN_ID)

        out_events = list(
            map(lambda r: deserialize_json_to_dagster_namedtuple(r[0]), rows))

        # messages can come out of order
        event_type_counts = Counter(_event_types(out_events))
        assert event_type_counts
        assert Counter(_event_types(out_events)) == Counter(
            _event_types(events))

    def test_basic_get_logs_for_run(self, storage):
        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events, result = _synthesize_events(_solids)

        for event in events:
            storage.store_event(event)

        out_events = storage.get_logs_for_run(result.run_id)

        assert _event_types(out_events) == _event_types(events)

    def test_wipe_sql_backed_event_log(self, storage):
        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events, result = _synthesize_events(_solids)

        for event in events:
            storage.store_event(event)

        out_events = storage.get_logs_for_run(result.run_id)

        assert _event_types(out_events) == _event_types(events)

        if self.can_wipe():
            storage.wipe()

            assert storage.get_logs_for_run(result.run_id) == []

    def test_delete_sql_backed_event_log(self, storage):
        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events, result = _synthesize_events(_solids)

        for event in events:
            storage.store_event(event)

        out_events = storage.get_logs_for_run(result.run_id)

        assert _event_types(out_events) == _event_types(events)

        storage.delete_events(result.run_id)

        assert storage.get_logs_for_run(result.run_id) == []

    @pytest.mark.skip("https://github.com/dagster-io/dagster/issues/3621")
    def test_basic_get_logs_for_run_cursor(self, storage):
        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events, result = _synthesize_events(_solids)

        for event in events:
            storage.store_event(event)

        assert _event_types(storage.get_logs_for_run(
            result.run_id, cursor=0)) == _event_types(events)

        assert _event_types(storage.get_logs_for_run(
            result.run_id, cursor=1)) == _event_types(events)

    def test_basic_get_logs_for_run_multiple_runs(self, storage):
        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events_one, result_one = _synthesize_events(_solids)
        for event in events_one:
            storage.store_event(event)

        events_two, result_two = _synthesize_events(_solids)
        for event in events_two:
            storage.store_event(event)

        out_events_one = storage.get_logs_for_run(result_one.run_id)
        assert len(out_events_one) == len(events_one)

        assert set(_event_types(out_events_one)) == set(
            _event_types(events_one))

        assert set(map(lambda e: e.run_id,
                       out_events_one)) == {result_one.run_id}

        stats_one = storage.get_stats_for_run(result_one.run_id)
        assert stats_one.steps_succeeded == 1

        out_events_two = storage.get_logs_for_run(result_two.run_id)
        assert len(out_events_two) == len(events_two)

        assert set(_event_types(out_events_two)) == set(
            _event_types(events_two))

        assert set(map(lambda e: e.run_id,
                       out_events_two)) == {result_two.run_id}

        stats_two = storage.get_stats_for_run(result_two.run_id)
        assert stats_two.steps_succeeded == 1

    @pytest.mark.skip("https://github.com/dagster-io/dagster/issues/3621")
    def test_basic_get_logs_for_run_multiple_runs_cursors(self, storage):
        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        events_one, result_one = _synthesize_events(_solids)
        for event in events_one:
            storage.store_event(event)

        events_two, result_two = _synthesize_events(_solids)
        for event in events_two:
            storage.store_event(event)

        out_events_one = storage.get_logs_for_run(result_one.run_id, cursor=1)
        assert len(out_events_one) == len(events_one)

        assert set(_event_types(out_events_one)) == set(
            _event_types(events_one))

        assert set(map(lambda e: e.run_id,
                       out_events_one)) == {result_one.run_id}

        out_events_two = storage.get_logs_for_run(result_two.run_id, cursor=2)
        assert len(out_events_two) == len(events_two)
        assert set(_event_types(out_events_two)) == set(
            _event_types(events_one))

        assert set(map(lambda e: e.run_id,
                       out_events_two)) == {result_two.run_id}

    def test_event_watcher_single_run_event(self, storage):
        if not hasattr(storage, "event_watcher"):
            pytest.skip("This test requires an event_watcher attribute")

        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        event_list = []

        run_id = make_new_run_id()

        storage.event_watcher.watch_run(run_id, -1, event_list.append)

        events, _ = _synthesize_events(_solids, run_id=run_id)
        for event in events:
            storage.store_event(event)

        start = time.time()
        while len(event_list) < len(
                events) and time.time() - start < TEST_TIMEOUT:
            pass

        assert len(event_list) == len(events)
        assert all([isinstance(event, EventRecord) for event in event_list])

    def test_event_watcher_filter_run_event(self, storage):
        if not hasattr(storage, "event_watcher"):
            pytest.skip("This test requires an event_watcher attribute")

        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        run_id_one = make_new_run_id()
        run_id_two = make_new_run_id()

        # only watch one of the runs
        event_list = []
        storage.event_watcher.watch_run(run_id_two, 0, event_list.append)

        events_one, _result_one = _synthesize_events(_solids,
                                                     run_id=run_id_one)
        for event in events_one:
            storage.store_event(event)

        events_two, _result_two = _synthesize_events(_solids,
                                                     run_id=run_id_two)
        for event in events_two:
            storage.store_event(event)

        start = time.time()
        while len(event_list) < len(
                events_two) and time.time() - start < TEST_TIMEOUT:
            pass

        assert len(event_list) == len(events_two)
        assert all([isinstance(event, EventRecord) for event in event_list])

    def test_event_watcher_filter_two_runs_event(self, storage):
        if not hasattr(storage, "event_watcher"):
            pytest.skip("This test requires an event_watcher attribute")

        @solid
        def return_one(_):
            return 1

        def _solids():
            return_one()

        event_list_one = []
        event_list_two = []

        run_id_one = make_new_run_id()
        run_id_two = make_new_run_id()

        storage.event_watcher.watch_run(run_id_one, -1, event_list_one.append)
        storage.event_watcher.watch_run(run_id_two, -1, event_list_two.append)

        events_one, _result_one = _synthesize_events(_solids,
                                                     run_id=run_id_one)
        for event in events_one:
            storage.store_event(event)

        events_two, _result_two = _synthesize_events(_solids,
                                                     run_id=run_id_two)
        for event in events_two:
            storage.store_event(event)

        start = time.time()
        while (len(event_list_one) < len(events_one) or len(event_list_two) <
               len(events_two)) and time.time() - start < TEST_TIMEOUT:
            pass

        assert len(event_list_one) == len(events_one)
        assert len(event_list_two) == len(events_two)
        assert all(
            [isinstance(event, EventRecord) for event in event_list_one])
        assert all(
            [isinstance(event, EventRecord) for event in event_list_two])

    def test_correct_timezone(self, storage):
        curr_time = time.time()

        event = EventRecord(
            None,
            "Message2",
            "debug",
            "",
            "foo",
            curr_time,
            dagster_event=DagsterEvent(
                DagsterEventType.PIPELINE_START.value,
                "nonce",
                event_specific_data=EngineEventData.in_process(999),
            ),
        )

        storage.store_event(event)

        logs = storage.get_logs_for_run("foo")

        assert len(logs) == 1

        log = logs[0]

        stats = storage.get_stats_for_run("foo")

        assert int(log.timestamp) == int(stats.start_time)
        assert int(log.timestamp) == int(curr_time)

    def test_asset_materialization(self, storage):
        asset_key = AssetKey(["path", "to", "asset_one"])

        @solid
        def materialize_one(_):
            yield AssetMaterialization(
                asset_key=asset_key,
                metadata={
                    "text": "hello",
                    "json": {
                        "hello": "world"
                    },
                    "one_float": 1.0,
                    "one_int": 1,
                },
            )
            yield Output(1)

        def _solids():
            materialize_one()

        events_one, _ = _synthesize_events(_solids)
        for event in events_one:
            storage.store_event(event)

        assert asset_key in set(storage.all_asset_keys())
        events = storage.get_asset_events(asset_key)
        assert len(events) == 1
        event = events[0]
        assert isinstance(event, EventRecord)
        assert event.dagster_event.event_type_value == DagsterEventType.ASSET_MATERIALIZATION.value

    def test_asset_events_error_parsing(self, storage):
        if not isinstance(storage, SqlEventLogStorage):
            pytest.skip("This test is for SQL-backed Event Log behavior")
        _logs = []

        def mock_log(msg):
            _logs.append(msg)

        asset_key = AssetKey("asset_one")

        @solid
        def materialize_one(_):
            yield AssetMaterialization(asset_key=asset_key)
            yield Output(1)

        def _solids():
            materialize_one()

        events_one, _ = _synthesize_events(_solids)
        for event in events_one:
            storage.store_event(event)

        with mock.patch(
                "dagster.core.storage.event_log.sql_event_log.logging.warning",
                side_effect=mock_log,
        ):
            with mock.patch(
                    "dagster.core.storage.event_log.sql_event_log.deserialize_json_to_dagster_namedtuple",
                    return_value="not_an_event_record",
            ):

                assert asset_key in set(storage.all_asset_keys())
                events = storage.get_asset_events(asset_key)
                assert len(events) == 0
                assert len(_logs) == 1
                assert re.match(
                    "Could not resolve asset event record as EventRecord",
                    _logs[0])

            _logs = []  # reset logs

            with mock.patch(
                    "dagster.core.storage.event_log.sql_event_log.deserialize_json_to_dagster_namedtuple",
                    side_effect=seven.JSONDecodeError("error", "", 0),
            ):
                assert asset_key in set(storage.all_asset_keys())
                events = storage.get_asset_events(asset_key)
                assert len(events) == 0
                assert len(_logs) == 1
                assert re.match("Could not parse asset event record id",
                                _logs[0])

    def test_secondary_index_asset_keys(self, storage):
        asset_key_one = AssetKey(["one"])
        asset_key_two = AssetKey(["two"])

        @solid
        def materialize_one(_):
            yield AssetMaterialization(asset_key=asset_key_one)
            yield Output(1)

        @solid
        def materialize_two(_):
            yield AssetMaterialization(asset_key=asset_key_two)
            yield Output(1)

        def _one():
            materialize_one()

        def _two():
            materialize_two()

        events_one, _ = _synthesize_events(_one)
        for event in events_one:
            storage.store_event(event)

        asset_keys = storage.all_asset_keys()
        assert len(asset_keys) == 1
        assert asset_key_one in set(asset_keys)
        migrate_asset_key_data(storage)
        asset_keys = storage.all_asset_keys()
        assert len(asset_keys) == 1
        assert asset_key_one in set(asset_keys)
        events_two, _ = _synthesize_events(_two)
        for event in events_two:
            storage.store_event(event)
        asset_keys = storage.all_asset_keys()
        assert len(asset_keys) == 2
        assert asset_key_one in set(asset_keys)
        assert asset_key_two in set(asset_keys)

    def test_run_step_stats(self, storage):
        @solid(input_defs=[InputDefinition("_input", str)],
               output_defs=[OutputDefinition(str)])
        def should_fail(context, _input):
            context.log.info("fail")
            raise Exception("booo")

        def _one():
            should_fail(should_succeed())

        events, result = _synthesize_events(_one, check_success=False)
        for event in events:
            storage.store_event(event)

        step_stats = sorted(storage.get_step_stats_for_run(result.run_id),
                            key=lambda x: x.end_time)
        assert len(step_stats) == 2
        assert step_stats[0].step_key == "should_succeed"
        assert step_stats[0].status == StepEventStatus.SUCCESS
        assert step_stats[0].end_time > step_stats[0].start_time
        assert step_stats[0].attempts == 1
        assert step_stats[1].step_key == "should_fail"
        assert step_stats[1].status == StepEventStatus.FAILURE
        assert step_stats[1].end_time > step_stats[0].start_time
        assert step_stats[1].attempts == 1

    def test_run_step_stats_with_retries(self, storage):
        @solid(input_defs=[InputDefinition("_input", str)],
               output_defs=[OutputDefinition(str)])
        def should_retry(context, _input):
            raise RetryRequested(max_retries=3)

        def _one():
            should_retry(should_succeed())

        events, result = _synthesize_events(_one, check_success=False)
        for event in events:
            storage.store_event(event)

        step_stats = storage.get_step_stats_for_run(result.run_id,
                                                    step_keys=["should_retry"])
        assert len(step_stats) == 1
        assert step_stats[0].step_key == "should_retry"
        assert step_stats[0].status == StepEventStatus.FAILURE
        assert step_stats[0].end_time > step_stats[0].start_time
        assert step_stats[0].attempts == 4
Beispiel #20
0
.. autoclass:: KeventDescriptor
   :members:
   :show-inheritance:

.. autoclass:: KeventDescriptorSet
   :members:
   :show-inheritance:

.. _Mac OS X File System Performance Guidelines: http://developer.apple.com/library/ios/#documentation/Performance/Conceptual/FileSystem/Articles/TrackingChanges.html#//apple_ref/doc/uid/20001993-CJBJFIDD

"""

from __future__ import with_statement
from watchdog.utils import platform

if not platform.is_bsd() and not platform.is_darwin():
    raise ImportError

import threading
import errno
import sys
import stat
import os

# See the notes for this module in the documentation above ^.
#import select
# if not has_attribute(select, 'kqueue') or sys.version_info < (2, 7, 0):
if sys.version_info < (2, 7, 0):
    import select_backport as select
else:
    import select
# limitations under the License.

from __future__ import unicode_literals
import os
import time
import pytest
import logging
from tests import Queue
from functools import partial
from .shell import mkdir, touch, mv, rm, mkdtemp
from watchdog.utils import platform
from watchdog.utils.unicode_paths import str_cls
from watchdog.events import *
from watchdog.observers.api import ObservedWatch

pytestmark = pytest.mark.skipif(not platform.is_linux() and not platform.is_darwin(), reason="")
if platform.is_linux():
    from watchdog.observers.inotify import InotifyEmitter as Emitter
elif platform.is_darwin():
    from watchdog.observers.fsevents2 import FSEventsEmitter as Emitter
from watchdog.observers.inotify import InotifyFullEmitter

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def setup_function(function):
    global p, event_queue
    tmpdir = os.path.realpath(mkdtemp())
    p = partial(os.path.join, tmpdir)
    event_queue = Queue()
Beispiel #22
0
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""
:module: watchdog.observers.fsevents
:synopsis: FSEvents based emitter implementation.
:author: Gora Khargosh <*****@*****.**>
:platforms: Mac OS X
"""

from __future__ import with_statement
from watchdog.utils import platform

if platform.is_darwin():
    import threading
    import os.path
    import _watchdog_fsevents as _fsevents

    from watchdog.events import\
        FileDeletedEvent,\
        FileModifiedEvent,\
        FileCreatedEvent,\
        FileMovedEvent,\
        DirDeletedEvent,\
        DirModifiedEvent,\
        DirCreatedEvent,\
        DirMovedEvent,\
        DirMovedEvent
    from watchdog.utils import absolute_path
# limitations under the License.

from __future__ import unicode_literals
import os
import time
import pytest
import logging
from tests import Queue
from functools import partial
from .shell import mkdir, touch, mv, rm, mkdtemp
from watchdog.utils import platform
from watchdog.utils.unicode_paths import str_cls
from watchdog.events import *
from watchdog.observers.api import ObservedWatch

pytestmark = pytest.mark.skipif(not platform.is_linux() and not platform.is_darwin(), reason="")
if platform.is_linux():
    from watchdog.observers.inotify import InotifyEmitter as Emitter
elif platform.is_darwin():
    from watchdog.observers.fsevents2 import FSEventsEmitter as Emitter

logging.basicConfig(level=logging.DEBUG)


def setup_function(function):
    global p, event_queue
    tmpdir = os.path.realpath(mkdtemp())
    p = partial(os.path.join, tmpdir)
    event_queue = Queue()

Beispiel #24
0
.. autoclass:: KeventDescriptor
   :members:
   :show-inheritance:

.. autoclass:: KeventDescriptorSet
   :members:
   :show-inheritance:

.. _Mac OS X File System Performance Guidelines: http://developer.apple.com/library/ios/#documentation/Performance/Conceptual/FileSystem/Articles/TrackingChanges.html#//apple_ref/doc/uid/20001993-CJBJFIDD

"""

from __future__ import with_statement
from watchdog.utils import platform

if not platform.is_bsd() and not platform.is_darwin():
    raise ImportError

import threading
import errno
import sys
import stat
import os

# See the notes for this module in the documentation above ^.
#import select
# if not has_attribute(select, 'kqueue') or sys.version_info < (2, 7, 0):
if sys.version_info < (2, 7, 0):
    import select_backport as select
else:
    import select
Beispiel #25
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
:module: watchdog.observers.fsevents
:synopsis: FSEvents based emitter implementation.
:author: [email protected] (Yesudeep Mangalapilly)
:platforms: Mac OS X
"""

from __future__ import with_statement
from watchdog.utils import platform

if not platform.is_darwin():
    raise ImportError

import threading
import unicodedata
import time
import _watchdog_fsevents as _fsevents

from watchdog.events import (
    FileDeletedEvent,
    FileModifiedEvent,
    FileCreatedEvent,
    FileMovedEvent,
    DirDeletedEvent,
    DirModifiedEvent,
    DirCreatedEvent,
Beispiel #26
0
import os
import time
from functools import partial

import pytest
from watchdog.events import *
from watchdog.observers.api import ObservedWatch
from watchdog.utils import platform
from watchdog.utils.compat import Queue
from watchdog.utils.unicode_paths import str_cls

from .shell import mkdir, mkdtemp, mv, rm, touch

pytestmark = pytest.mark.skipif(not platform.is_linux()
                                and not platform.is_darwin(),
                                reason="")
if platform.is_linux():
    from watchdog.observers.inotify import InotifyEmitter as Emitter
    from watchdog.observers.inotify import InotifyFullEmitter
elif platform.is_darwin():
    pytestmark = pytest.mark.skip('WATCHDOG-8')
    from watchdog.observers.fsevents2 import FSEventsEmitter as Emitter

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def setup_function(function):
    global p, event_queue
    tmpdir = os.path.realpath(mkdtemp())
Beispiel #27
0
# coding: utf-8

import pytest
from watchdog.utils import platform

if not platform.is_darwin():
    pytest.skip("macOS only.", allow_module_level=True)

import logging
import os
import time
from functools import partial
from os import mkdir, rmdir

from watchdog.observers import Observer
from watchdog.observers.api import ObservedWatch
from watchdog.observers.fsevents import FSEventsEmitter

from . import Queue
from .shell import mkdtemp, rm

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def setup_function(function):
    global p, event_queue
    tmpdir = os.path.realpath(mkdtemp())
    p = partial(os.path.join, tmpdir)
    event_queue = Queue()
Beispiel #28
0
.. autoclass:: KeventDescriptor
   :members:
   :show-inheritance:

.. autoclass:: KeventDescriptorSet
   :members:
   :show-inheritance:

.. _Mac OS X File System Performance Guidelines: http://developer.apple.com/library/ios/#documentation/Performance/Conceptual/FileSystem/Articles/TrackingChanges.html#//apple_ref/doc/uid/20001993-CJBJFIDD

"""

from __future__ import with_statement
from watchdog.utils import platform  # , has_attribute

if platform.is_bsd() or platform.is_darwin():
    import threading
    import errno
    import sys
    import stat
    import os

    # See the notes for this module in the documentation above ^.
    # import select
    # if not has_attribute(select, 'kqueue') or sys.version_info < (2, 7, 0):
    if sys.version_info < (2, 7, 0):
        import select_backport as select
    else:
        import select

    from pathtools.path import absolute_path
Beispiel #29
0
   :members:
   :show-inheritance:

.. autoclass:: KeventDescriptorSet
   :members:
   :show-inheritance:

.. _Mac OS X File System Performance Guidelines: http://developer.apple.com/library/ios/#documentation/Performance/Conceptual/FileSystem/Articles/TrackingChanges.html#//apple_ref/doc/uid/20001993-CJBJFIDD

"""

from __future__ import with_statement
from watchdog.utils import\
  platform #, has_attribute

if platform.is_bsd() or platform.is_darwin():
    import threading
    import errno
    import sys
    import stat
    import os

    # See the notes for this module in the documentation above ^.
    #import select
    #if not has_attribute(select, 'kqueue') or sys.version_info < (2, 7, 0):
    if sys.version_info < (2, 7, 0):
        import select_backport as select
    else:
        import select

    from pathtools.path import absolute_path
Beispiel #30
0
    FileCreatedEvent,
    FileMovedEvent,
    DirDeletedEvent,
    DirModifiedEvent,
    DirCreatedEvent,
    DirMovedEvent,
    FileClosedEvent,
)
from watchdog.observers.api import ObservedWatch

if platform.is_linux():
    from watchdog.observers.inotify import (
        InotifyEmitter as Emitter,
        InotifyFullEmitter,
    )
elif platform.is_darwin():
    from watchdog.observers.fsevents import FSEventsEmitter as Emitter
elif platform.is_windows():
    from watchdog.observers.read_directory_changes import (
        WindowsApiEmitter as Emitter
    )
elif platform.is_bsd():
    from watchdog.observers.kqueue import (
        KqueueEmitter as Emitter
    )

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


if platform.is_darwin():
Beispiel #31
0
    FileCreatedEvent,
    FileMovedEvent,
    DirDeletedEvent,
    DirModifiedEvent,
    DirCreatedEvent,
    DirMovedEvent,
    FileClosedEvent,
)
from watchdog.observers.api import ObservedWatch

if platform.is_linux():
    from watchdog.observers.inotify import (
        InotifyEmitter as Emitter,
        InotifyFullEmitter,
    )
elif platform.is_darwin():
    from watchdog.observers.fsevents import FSEventsEmitter as Emitter
elif platform.is_windows():
    from watchdog.observers.read_directory_changes import (
        WindowsApiEmitter as Emitter
    )
elif platform.is_bsd():
    from watchdog.observers.kqueue import (
        KqueueEmitter as Emitter
    )

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


if platform.is_darwin():