def test_semaphore(): """ Test using a semaphore to limit the number of tasks concurrently accessing a resource. Running this graph is expected to take 7 seconds. """ start = time.time() tasks = list(DumpTask(i) for i in range(10)) run_tasks(tasks, pool_size=10) end = time.time() assert almost_equal(end - start, 7)
def test_smoke(): set_task = SetTask({"index": 0, "value": 41}) inc_task = IncTask({"index": 0}, [set_task]) res = run_tasks([inc_task], pool_size=1) assert res is True assert array[0] == 42 inc_again_task = IncTask({"index": 0}) res = run_tasks([inc_again_task], pool_size=10) assert res is True assert array[0] == 43
def test_failure(): blessed_crawl = CrawlTask(failures=0) doomed_crawl = CrawlTask(failures=1) extract_1 = ExtractTask({}, [blessed_crawl]) extract_2 = ExtractTask({}, [doomed_crawl]) assert doomed_crawl.retries_on_failure == 0 with pytest.raises(DaggerException) as exc_info: run_tasks([extract_1, extract_2]) assert doomed_crawl.retries_on_failure == 0 ex = exc_info.value assert set([doomed_crawl]) == ex.failed_tasks assert set([blessed_crawl, extract_1]) == ex.done_tasks assert set([extract_2]) == ex.pending_tasks
def test_retry_on_failure_with_abort(): blessed_crawl = CrawlTask(failures=0) retried_crawl = UnsafeRetriedCrawlTask(failures=3) extract_1 = ExtractTask({}, [blessed_crawl]) extract_2 = ExtractTask({}, [retried_crawl]) assert retried_crawl.retries_on_failure == UnsafeRetriedCrawlTask.DEFAULT_RETRIES > 0 with pytest.raises(DaggerException) as exc_info: run_tasks([extract_1, extract_2]) assert retried_crawl.retries_on_failure == 0 ex = exc_info.value assert set([retried_crawl]) == ex.failed_tasks assert set([blessed_crawl, extract_1]) == ex.done_tasks assert set([extract_2]) == ex.pending_tasks
def test_retry_on_one_failure_with_success(): """ Test that retrying leads to success if any retry succeeds """ blessed_crawl = CrawlTask(failures=0) retried_crawl = CrawlTask(failures=1, retries=3) extract_1 = ExtractTask({}, [blessed_crawl]) extract_2 = ExtractTask({}, [retried_crawl]) assert blessed_crawl.retries_on_failure == 0 assert retried_crawl.retries_on_failure == 3 result = run_tasks([extract_1, extract_2]) assert blessed_crawl.retries_on_failure == 0 assert retried_crawl.retries_on_failure == 2 assert result
def run(self): total_size = 0 for download_task in self.dependencies: owner = download_task.config["owner"] repo = download_task.config["repo"] outfile_name = "{}_{}".format(owner, repo) with open(outfile_name) as outfile: stats = json.load(outfile) total_size += stats["size"] print("Total size of all repos:", total_size) if __name__ == "__main__": repos = [("trustyou", "retwist")] download_tasks = [ DownloadGitHub({ "owner": owner, "repo": repo }) for (owner, repo) in repos ] stats_task = ComputeStats(None, download_tasks) run_tasks([stats_task])