def test_E_MergeConsumeNew(self): with phlsys_fs.chtmpdir_context(): requester = _MockRequesterObject() url = 'http://a.test' cache_path = 'phlurl_watcher_cache.json' # initialise without existing cache watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper( cache_path, requester) watcher = watcher_cache_wrapper.watcher # set state 'a is new' self.assertTrue(watcher.peek_has_url_recently_changed(url)) watcher2_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper( cache_path, requester) watcher2 = watcher2_cache_wrapper.watcher data_before_merge = watcher2.get_data_for_merging() watcher2.merge_data_consume_only(watcher.get_data_for_merging()) data_after_merge = watcher2.get_data_for_merging() self.assertEqual(data_before_merge, {}) # [ E] b.merge_data_consume_only(a.get_data_for_merging()) copies # elements which are present in b but not in a. self.assertEqual(data_after_merge, watcher.get_data_for_merging())
def test_A_Breathing(self): with phlsys_fs.chtmpdir_context(): requester = _MockRequesterObject() url = 'http://host.test' cache_path = 'phlurl_watcher_cache.json' # initialise without existing cache watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper( cache_path, requester) watcher = watcher_cache_wrapper.watcher # check that we can test and consume the content change self.assertTrue(watcher.peek_has_url_recently_changed(url)) self.assertTrue(watcher.peek_has_url_recently_changed(url)) self.assertTrue(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.peek_has_url_recently_changed(url)) # save and reload from the cache watcher_cache_wrapper.save() watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper( cache_path, requester) watcher = watcher_cache_wrapper.watcher # check that the content is still considered unchanged self.assertFalse(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.peek_has_url_recently_changed(url)) # check that refreshing resets the changed flags watcher.refresh() # check that we can test and consume the content change self.assertTrue(watcher.peek_has_url_recently_changed(url)) self.assertTrue(watcher.peek_has_url_recently_changed(url)) self.assertTrue(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.peek_has_url_recently_changed(url)) # update the content watcher.refresh() # save and reload from the cache watcher_cache_wrapper.save() watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper( cache_path, requester) watcher = watcher_cache_wrapper.watcher # check that we can consume the change self.assertTrue(watcher.peek_has_url_recently_changed(url)) self.assertTrue(watcher.peek_has_url_recently_changed(url)) self.assertTrue(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.has_url_recently_changed(url)) self.assertFalse(watcher.peek_has_url_recently_changed(url))
def test_D_MergeNotConsumeUnmatching(self): with phlsys_fs.chtmpdir_context(): requester = _MockRequesterObject() url = 'http://host.test' cache_path = 'phlurl_watcher_cache.json' # initialise without existing cache watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper( cache_path, requester) watcher = watcher_cache_wrapper.watcher # set state 'a is new' self.assertTrue(watcher.peek_has_url_recently_changed(url)) # clone the watcher watcher_cache_wrapper.save() watcher2 = watcher_cache_wrapper.watcher # [ D] can't consume newness in merge_data_consume_only() with # unmatched hashes watcher.has_url_recently_changed(url) data_after_consume = watcher.get_data_for_merging() watcher2.refresh() watcher2.merge_data_consume_only(data_after_consume) self.assertTrue(watcher.peek_has_url_recently_changed(url))
def process(args): _ = args # NOQA fs = abdt_fs.make_default_accessor() with fs.lockfile_context(): pid = fs.get_pid_or_none() if pid is not None and phlsys_pid.is_running(pid): raise Exception("cannot fetch whilst arcyd is running.") repo_config_path_list = fs.repo_config_path_list() repo_name_config_list = abdi_repoargs.parse_config_file_list( repo_config_path_list) url_watcher_wrapper = phlurl_watcher.FileCacheWatcherWrapper( fs.layout.urlwatcher_cache_path) # Let the user know what's happening before potentially blocking for a # while. print('Refreshing repository snoop status ..', end=' ') # Make sure that the output is actually visible by flushing stdout # XXX: Will use 'flush' parameter to 'print()' in Python 3.3 sys.stdout.flush() print("done") url_watcher_wrapper.watcher.refresh() for repo_name, repo_config in repo_name_config_list: print(repo_name + ' ..', end=' ') # Make sure that the output is actually visible by flushing stdout # XXX: Will use 'flush' parameter to 'print()' in Python 3.3 sys.stdout.flush() snoop_url = abdi_repoargs.get_repo_snoop_url(repo_config) sys_repo = phlsys_git.Repo(repo_config.repo_path) refcache_repo = phlgitx_refcache.Repo(sys_repo) differ_cache = abdt_differresultcache.Cache(refcache_repo) abd_repo = abdt_git.Repo(refcache_repo, differ_cache, "origin", repo_config.repo_desc) did_fetch = abdi_processrepoarglist.fetch_if_needed( url_watcher_wrapper.watcher, snoop_url, abd_repo, repo_config.repo_desc) if did_fetch: print('fetched') else: print('skipped') url_watcher_wrapper.save()
def do(repo_configs, sys_admin_emails, sleep_secs, is_no_loop, external_report_command, mail_sender, max_workers, overrun_secs): conduit_manager = _ConduitManager() fs_accessor = abdt_fs.make_default_accessor() url_watcher_wrapper = phlurl_watcher.FileCacheWatcherWrapper( fs_accessor.layout.urlwatcher_cache_path) # decide max workers based on number of CPUs if no value is specified if max_workers == 0: max_workers = determine_max_workers_default() repo_list = [] for name, config in repo_configs: repo_list.append( _ArcydManagedRepository(name, config, conduit_manager, url_watcher_wrapper, sys_admin_emails, mail_sender)) # if we always overrun half our workers then the loop is sustainable, if we # overrun more than that then we'll be lagging too far behind. In the event # that we only have one worker then we can't overrun any. max_overrun_workers = max_workers // 2 pool = phlmp_cyclingpool.CyclingPool(repo_list, max_workers, max_overrun_workers) cycle_timer = phlsys_timer.Timer() cycle_timer.start() exit_code = None while exit_code is None: # This timer needs to be separate from the cycle timer. The cycle timer # must be reset every time it is reported. The sleep timer makes sure # that each run of the loop takes a minimum amount of time. sleep_timer = phlsys_timer.Timer() sleep_timer.start() # refresh git snoops with abdt_logging.remote_io_read_event_context('refresh-git-snoop', ''): abdt_tryloop.critical_tryloop(url_watcher_wrapper.watcher.refresh, abdt_errident.GIT_SNOOP, '') with abdt_logging.remote_io_read_event_context('refresh-conduit', ''): conduit_manager.refresh_conduits() with abdt_logging.misc_operation_event_context( 'process-repos', '{} workers, {} repos'.format(max_workers, len(repo_list))): if max_workers > 1: for i, res in pool.cycle_results(overrun_secs=overrun_secs): repo = repo_list[i] repo.merge_from_worker(res) else: for r in repo_list: r() # important to do this before stopping arcyd and as soon as possible # after doing fetches url_watcher_wrapper.save() # report cycle stats report = { "cycle_time_secs": cycle_timer.restart(), "overrun_jobs": pool.num_active_jobs, } _LOGGER.debug("cycle-stats: {}".format(report)) if external_report_command: report_json = json.dumps(report) full_path = os.path.abspath(external_report_command) with abdt_logging.misc_operation_event_context( 'external-report-command', external_report_command): try: phlsys_subprocess.run(full_path, stdin=report_json) except phlsys_subprocess.CalledProcessError as e: _LOGGER.error( "External command: {} failed with exception: " "{}.".format(external_report_command, type(e).__name__)) _LOGGER.error( "VERBOSE MESSAGE: CycleReportJson:{}".format(e)) if is_no_loop: exit_code = abdi_processexitcodes.ExitCodes.ec_exit elif os.path.isfile(fs_accessor.layout.killfile): exit_code = abdi_processexitcodes.ExitCodes.ec_exit if phlsys_fs.read_text_file(fs_accessor.layout.killfile): _LOGGER.info("Killfile observed, reason given: {}".format( phlsys_fs.read_text_file(fs_accessor.layout.killfile))) else: _LOGGER.info("Killfile observed, arcyd will stop") os.remove(fs_accessor.layout.killfile) elif os.path.isfile(fs_accessor.layout.reloadfile): _LOGGER.info("Reloadfile observed, arcyd will reload") exit_code = abdi_processexitcodes.ExitCodes.ec_reload os.remove(fs_accessor.layout.reloadfile) # sleep to pad out the cycle secs_to_sleep = float(sleep_secs) - float(sleep_timer.duration) if secs_to_sleep > 0 and exit_code is None: with abdt_logging.misc_operation_event_context( 'sleep', secs_to_sleep): time.sleep(secs_to_sleep) # finish any jobs that overran for i, res in pool.finish_results(): repo = repo_list[i] repo.merge_from_worker(res) # important to do this before stopping arcyd and as soon as # possible after doing fetches url_watcher_wrapper.save() return exit_code