def test_sweep_scheduled_sweep(): ct.place_config_file("", {"pattern": "*", "ttl": 2}) conf = config.create_config_map(ct.ROOT_DIR) schedule.every(1).seconds.do(lambda: sweeper.sweep(conf)) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test_old.csv", [("hello", "world"), (1, 2), (3, 4)]) schedule.run_pending() w.wait(2) file_path_new = ct.write_csv("", "test_new.csv", [("hello", "world"), (1, 2), (3, 4)]) schedule.run_pending() w.wait(1) schedule.run_pending() w.stop() dir_db = ct.read_db_file(ct.ROOT_DIR) dir_conf = conf[ct.ROOT_DIR] assert not Path(file_path_old).exists() assert (Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / Path(file_path_old).name).exists() assert Path(file_path_new).exists() dir_db = ct.read_db_file(ct.ROOT_DIR) assert Path(file_path_new).name in dir_db["watching"].keys() assert Path(file_path_old).name not in dir_db["watching"].keys()
def test_sweep_directory_files_into_archive_zip(): ct.place_config_file("", {"pattern": "*", "archive": {"type": "zip", "name": "_archive.zip"}}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test_old.csv", [("hello", "world"), (1, 2), (3, 4)]) file_path_new = ct.write_csv("", "test_new.csv", [("hello", "world"), (1, 2), (3, 4)]) w.wait(1) w.stop() archive_target_files = [Path(file_path_old)] dir_db = ct.read_db_file(ct.ROOT_DIR) dir_conf = conf[ct.ROOT_DIR] sweeper._sweep_directory_files(Path(ct.ROOT_DIR), archive_target_files, dir_db, dir_conf) assert Path(file_path_new).exists() assert not Path(file_path_old).exists() zip_file = Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] assert zip_file.exists() with zipfile.ZipFile(str(zip_file)) as existing_zip: assert Path(file_path_old).name in existing_zip.namelist()
def test_sweep_directory_list_return_expired_file_list(): ct.place_config_file("", {"pattern": "*", "ttl": 10}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test_old.csv", [("hello", "world"), (1, 2), (3, 4)]) file_path_new = ct.write_csv("", "test_new.csv", [("hello", "world"), (1, 2), (3, 4)]) w.wait(1) w.stop() # modify last_detected for debugging last_detected = datetime(2020, 7, 19, 0, 0, 0, 0, timezone(timedelta(hours=0))) ct.mod_last_detected(file_path_old, last_detected) ct.mod_last_detected(file_path_new, last_detected + timedelta(seconds=10)) db = ct.read_db_file(ct.ROOT_DIR) current_time = last_detected + timedelta(seconds=10) conf_dir = conf[ct.ROOT_DIR] actual_sweep_targets = sweeper._sweep_directory_list_target(Path(ct.ROOT_DIR), db, conf_dir, current_time) expected_sweep_targets = [file_path_old] assert expected_sweep_targets == actual_sweep_targets
def test_csv_tail_return_appended_data(): ct.place_config_file("", {"pattern": "*.csv"}) conf = config.create_config_map(ct.ROOT_DIR)[ct.ROOT_DIR] file_path = ct.write_csv("", "test.csv", [("hello", "world"), (1, 2), (3, 4)]) expected_head = tw.dedent(""" hello,world 1,2 3,4 """).lstrip() (actual_head, pos_head) = ch._read_tail(Path(file_path), 0, conf) assert expected_head == actual_head # Append new lines file_path = ct.write_csv("", "test.csv", [(5, 6), (7, 8), (9, 10)]) expected_appended = tw.dedent(""" hello,world 5,6 7,8 9,10 """).lstrip() (actual_appended, pos_appended) = ch._read_tail(Path(file_path), pos_head, conf) assert expected_appended == actual_appended
def test_text_tail_return_appended_data(): ct.place_config_file("", {"pattern": "*.jsonl"}) conf = config.create_config_map(ct.ROOT_DIR)[ct.ROOT_DIR] file_path = ct.place_file( "", "test.jsonl", tw.dedent(""" {"hello": "world"} {"hoge": "fuga"} """).lstrip()) expected = tw.dedent(""" {"hello": "world"} {"hoge": "fuga"} """).lstrip() actual = th.read(Path(file_path), conf) assert expected == actual file_path = ct.place_file( "", "test.jsonl", tw.dedent(""" {"hello": "piyo"} {"hoge": "pero"} """).lstrip()) expected = tw.dedent(""" {"hello": "piyo"} {"hoge": "pero"} """).lstrip() actual = th.read(Path(file_path), conf) assert expected == actual
def test_return_dict_overwrite_by_child_dir_configuration(): # ROOT_DIR {"a": "parent_a", "b": "parent_b"} # |-child1 {"a": "child1_a"} # |-empty/child2 {"b": "child2_b"} ct.place_config_file("", { "a": "parent_a", "b": "parent_b" }) # root configuration ct.place_config_file("child1", {"a": "child1_a"}) ct.place_config_file("empty/child2", {"b": "child2_b"}) expected = { f'{ct.ROOT_DIR}': { "a": "parent_a", "b": "parent_b" }, f'{ct.ROOT_DIR}/child1': { "a": "child1_a", "b": "parent_b" }, f'{ct.ROOT_DIR}/empty/child2': { "a": "parent_a", "b": "child2_b" }, } actual = config.create_config_map(ct.ROOT_DIR, default_conf={}) assert expected == actual
def test_sweep_directory(): ct.place_config_file("", {"pattern": "*", "ttl": 10}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test_old.csv", [("hello", "world"), (1, 2), (3, 4)]) file_path_new = ct.write_csv("", "test_new.csv", [("hello", "world"), (1, 2), (3, 4)]) w.wait(1) w.stop() # modify last_detected for debugging last_detected = datetime(2020, 7, 19, 0, 0, 0, 0, timezone(timedelta(hours=0))) ct.mod_last_detected(file_path_old, last_detected) ct.mod_last_detected(file_path_new, last_detected + timedelta(seconds=10)) current_time = last_detected + timedelta(seconds=10) sweeper.sweep(conf, current_time) dir_db = ct.read_db_file(ct.ROOT_DIR) dir_conf = conf[ct.ROOT_DIR] assert not Path(file_path_old).exists() assert (Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / Path(file_path_old).name).exists() assert Path(file_path_new).exists() dir_db = ct.read_db_file(ct.ROOT_DIR) assert Path(file_path_new).name in dir_db["watching"].keys() assert Path(file_path_old).name not in dir_db["watching"].keys()
def test_create_db_file_when_receive_file_event(): ct.place_config_file("", {"pattern": "*"}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("", "sample-file.csv", "hoge") w.wait(1) w.stop() db = ct.read_db_file(ct.ROOT_DIR) keys = db["watching"].keys() assert "sample-file.csv" in keys
def test_csv_read_all_return_all_data(): ct.place_config_file("", {"pattern": "*.csv"}) conf = config.create_config_map(ct.ROOT_DIR)[ct.ROOT_DIR] file_path = ct.write_csv("", "test.csv", [("hello", "world"), (1, 2), (3, 4)]) expected = tw.dedent(""" hello,world 1,2 3,4 """).lstrip() (actual, pos) = ch._read_tail(Path(file_path), 0, conf) assert expected == actual
def test_return_default_configs(): ct.place_config_file("", {}) # root configuration expected = { f'{ct.ROOT_DIR}': { "min_mod_interval": 1, "callback_delay": 0, "db_file": "onamazu.db", "ttl": -1, "archive": { "type": "directory", "name": "_archive" } }, } actual = config.create_config_map(ct.ROOT_DIR) assert expected == actual
def test_return_create_notify(): ct.place_config_file("sub", {"pattern": "*.csv"}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("sub", f"sample.csv", "hello,world1") w.wait(1) w.stop() expected = 1 actual = len(events) assert expected == actual
def test_inject_on_start(self): ct.place_config_file("", {"pattern": "*.csv"}) conf = config.create_config_map(ct.ROOT_DIR) ct.place_file("", "sample.csv", "hello,world") events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() w.wait(1) w.stop() assert 1 == len(events) ev = events[0] assert ev.src_path == "/".join([ct.ROOT_DIR, "sample.csv"])
def test_ignore_db_file(): ct.place_config_file("c", {"pattern": "*"}) ct.place_config_file("c/j", {"pattern": "*", "db_file": "cache-file.yml"}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("c", "hoge.csv", "hoge") ct.place_file("c/j", "hoge.csv", "hoge") w.wait(1) w.stop() expected = [f"{ct.ROOT_DIR}/c/hoge.csv", f"{ct.ROOT_DIR}/c/j/hoge.csv"] actual = [e.src_path for e in events] assert expected == actual
def test_return_delayed_events(): ct.place_config_file("", { "pattern": "*.csv", "callback_delay": 2 }) # root configuration conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("", f"hoge.csv", "hello,csv") w.wait(1) assert 0 == len(events) w.wait(2) assert 1 == len(events) w.stop()
def test_csv_read_return_appended_data_each_file(): ct.place_config_file("", {"pattern": "*.csv"}) conf = config.create_config_map(ct.ROOT_DIR)[ct.ROOT_DIR] file_path_a = ct.write_csv("", "test_a.csv", [("hello", "world"), (1, 2), (3, 4)]) expected_head_a = tw.dedent(""" hello,world 1,2 3,4 """).lstrip() actual_head_a = ch.read(Path(file_path_a), conf) assert expected_head_a == actual_head_a file_path_b = ct.write_csv("", "test_b.csv", [("hoge", "fuga"), (1, 2), (3, 4), (5, 6)]) expected_head_b = tw.dedent(""" hoge,fuga 1,2 3,4 5,6 """).lstrip() actual_head_b = ch.read(Path(file_path_b), conf) assert expected_head_b == actual_head_b # Append new lines file_path_a = ct.write_csv("", "test_a.csv", [(5, 6), (7, 8)]) expected_appended_a = tw.dedent(""" hello,world 5,6 7,8 """).lstrip() actual_appended_a = ch.read(Path(file_path_a), conf) assert expected_appended_a == actual_appended_a file_path_b = ct.write_csv("", "test_b.csv", [(7, 8), ("hoge", "fuga")]) expected_appended_b = tw.dedent(""" hoge,fuga 7,8 hoge,fuga """).lstrip() actual_appended_b = ch.read(Path(file_path_b), conf) assert expected_appended_b == actual_appended_b
def test_ignore_duplicated_events_in_callback_delay_multi_dir(): ct.place_config_file("mario", {"pattern": "*.csv", "callback_delay": 2}) ct.place_config_file("luigi", {"pattern": "*.txt", "callback_delay": 1}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("mario", f"hoge.csv", "hello,world") ct.place_file("luigi", f"hoge.txt", "hello,world") w.wait(3) # 1event w.stop() expected = [ f"{ct.ROOT_DIR}/luigi/hoge.txt", f"{ct.ROOT_DIR}/mario/hoge.csv", ] actual = [e.src_path for e in events] assert expected == actual
def test_sweep_directory_files_into_archive_dir(): ct.place_config_file("", {"pattern": "*"}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test_old.csv", [("hello", "world"), (1, 2), (3, 4)]) file_path_new = ct.write_csv("", "test_new.csv", [("hello", "world"), (1, 2), (3, 4)]) w.wait(1) w.stop() archive_target_files = [Path(file_path_old)] dir_db = ct.read_db_file(ct.ROOT_DIR) dir_conf = conf[ct.ROOT_DIR] sweeper._sweep_directory_files(Path(ct.ROOT_DIR), archive_target_files, dir_db, dir_conf) assert not Path(file_path_old).exists() assert (Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / Path(file_path_old).name).exists() assert Path(file_path_new).exists()
def test_sweep_directory_files_into_archive_already_exists_save_with_datetime_postfix(): ct.place_config_file("", {"pattern": "*"}) conf = config.create_config_map(ct.ROOT_DIR) dir_conf = conf[ct.ROOT_DIR] events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test.csv", [("hello", "world"), (1, 2), (3, 4)]) w.wait(1) w.stop() dir_db = ct.read_db_file(ct.ROOT_DIR) expected_content_old = file_path_old.read_text() sweeper._sweep_directory_files(Path(ct.ROOT_DIR), [Path(file_path_old)], dir_db, dir_conf) w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_new = ct.write_csv("", "test.csv", [("hello", "world"), (5, 6), (7, 8)]) w.wait(1) w.stop() dir_db = ct.read_db_file(ct.ROOT_DIR) expected_content_new = file_path_new.read_text() now = datetime(2019, 8, 15, 1, 39, 0, 0 * 1000, timezone(timedelta(hours=-6))) sweeper._sweep_directory_files(Path(ct.ROOT_DIR), [Path(file_path_new)], dir_db, dir_conf, now) assert not Path(file_path_old).exists() archived_file_old = Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / Path(file_path_old).name assert archived_file_old.exists() actual_content_old = archived_file_old.read_text() assert expected_content_old == actual_content_old assert not Path(file_path_new).exists() archived_file_new = Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / "test_20190815013900.csv" assert archived_file_new.exists() actual_content_new = archived_file_new.read_text() assert expected_content_new == actual_content_new
def test_return_specified_pattern(): ct.place_config_file("c", {"pattern": "*.csv"}) ct.place_config_file("c/j", {"pattern": "*.json"}) conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("c", f"sample.csv", "hello,csv") ct.place_file("c", f"sample.json", '{"hello":"json"}') ct.place_file("c/j", f"sample.csv", "hello,csv") ct.place_file("c/j", f"sample.json", '{"hello":"json"}') w.wait(1) w.stop() expected = [ f"{ct.ROOT_DIR}/c/sample.csv", f"{ct.ROOT_DIR}/c/j/sample.json" ] actual = [e.src_path for e in events] assert expected == actual
def test_return_ignored_duplicated_events(): ct.place_config_file("", { "min_mod_interval": 10, "pattern": "*.csv" }) # root configuration conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("", f"hoge.csv", "hello,csv") ct.place_file("", f"fuga.csv", "hello,csv") ct.place_file("", f"hoge.csv", "hello,csv") # should be ignore ct.place_file("", f"fuga.csv", "hello,csv") # should be ignore ct.place_file("", f"hoge.csv", "hello,csv") # should be ignore w.wait(1) w.stop() expected = [f"{ct.ROOT_DIR}/hoge.csv", f"{ct.ROOT_DIR}/fuga.csv"] actual = [e.src_path for e in events] assert expected == actual
def test_sweep_directory_files_into_archive_zip_already_exists_save_with_datetime_postfix(): ct.place_config_file("", {"pattern": "*", "archive": {"type": "zip", "name": "_archive.zip"}}) conf = config.create_config_map(ct.ROOT_DIR) dir_conf = conf[ct.ROOT_DIR] events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_old = ct.write_csv("", "test.csv", [("hello", "world"), (1, 2), (3, 4)]) w.wait(1) w.stop() dir_db = ct.read_db_file(ct.ROOT_DIR) sweeper._sweep_directory_files(Path(ct.ROOT_DIR), [Path(file_path_old)], dir_db, dir_conf) w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() file_path_new = ct.write_csv("", "test.csv", [("hello", "world"), (5, 6), (7, 8)]) w.wait(1) w.stop() dir_db = ct.read_db_file(ct.ROOT_DIR) now = datetime(2019, 8, 15, 1, 39, 0, 0 * 1000, timezone(timedelta(hours=-6))) sweeper._sweep_directory_files(Path(ct.ROOT_DIR), [Path(file_path_new)], dir_db, dir_conf, now) zip_file_path = Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] with zipfile.ZipFile(zip_file_path) as zip_file: files_in_zip = zip_file.namelist() assert not Path(file_path_old).exists() archived_file_old = Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / Path(file_path_old).name assert archived_file_old.name in files_in_zip assert not Path(file_path_new).exists() archived_file_new = Path(ct.ROOT_DIR) / dir_conf["archive"]["name"] / "test_20190815013900.csv" assert archived_file_new.name in files_in_zip
def test_ignore_duplicated_events_in_callback_delay(): ct.place_config_file("", { "pattern": "*.csv", "min_mod_interval": 0, "callback_delay": 2 }) # root configuration conf = config.create_config_map(ct.ROOT_DIR) events = [] w = watcher.NamazuWatcher(ct.ROOT_DIR, conf, lambda ev: events.append(ev)) w.start() ct.place_file("", "hoge.csv", "hello,csv") w.wait(1) # will be ignored ct.place_file("", "hoge.csv", "hello,csv") w.wait(1) # will be ignored ct.place_file("", "hoge.csv", "hello,csv") w.wait(1) # will be ignored ct.place_file("", "hoge.csv", "hello,csv") w.wait(1) w.wait(3) # 1event w.stop() assert 1 == len(events)
import time from onamazu import config from onamazu import watcher events = [] ROOT_DIR = 'sample' conf = config.create_config_map(ROOT_DIR) w = watcher.NamazuWatcher(ROOT_DIR, conf, lambda ev: events.append(ev)) try: w.start() while w.isAlive(): w.wait(1) print(w.isAlive()) except KeyboardInterrupt: w.stop()
def test_return_empty_dict(): expected = {} actual = config.create_config_map("not_found_dir") assert expected == actual
def test_return_simple_dict(): ct.place_config_file("piyo/01", {"hello": "onamazu"}) expected = {f'{ct.ROOT_DIR}/piyo/01': {"hello": "onamazu"}} actual = config.create_config_map(ct.ROOT_DIR, default_conf={}) assert expected == actual