def test_E_MergeConsumeNew(self):

        with phlsys_fs.chtmpdir_context():

            requester = _MockRequesterObject()
            url = 'http://a.test'
            cache_path = 'phlurl_watcher_cache.json'

            # initialise without existing cache
            watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
                cache_path, requester)
            watcher = watcher_cache_wrapper.watcher

            # set state 'a is new'
            self.assertTrue(watcher.peek_has_url_recently_changed(url))

            watcher2_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
                cache_path, requester)
            watcher2 = watcher2_cache_wrapper.watcher

            data_before_merge = watcher2.get_data_for_merging()
            watcher2.merge_data_consume_only(watcher.get_data_for_merging())
            data_after_merge = watcher2.get_data_for_merging()
            self.assertEqual(data_before_merge, {})
            # [ E] b.merge_data_consume_only(a.get_data_for_merging()) copies
            #      elements which are present in b but not in a.
            self.assertEqual(data_after_merge, watcher.get_data_for_merging())
    def test_A_Breathing(self):

        with phlsys_fs.chtmpdir_context():

            requester = _MockRequesterObject()
            url = 'http://host.test'
            cache_path = 'phlurl_watcher_cache.json'

            # initialise without existing cache
            watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
                cache_path, requester)
            watcher = watcher_cache_wrapper.watcher

            # check that we can test and consume the content change
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
            self.assertTrue(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.peek_has_url_recently_changed(url))

            # save and reload from the cache
            watcher_cache_wrapper.save()
            watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
                cache_path, requester)
            watcher = watcher_cache_wrapper.watcher

            # check that the content is still considered unchanged
            self.assertFalse(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.peek_has_url_recently_changed(url))

            # check that refreshing resets the changed flags
            watcher.refresh()

            # check that we can test and consume the content change
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
            self.assertTrue(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.peek_has_url_recently_changed(url))

            # update the content
            watcher.refresh()

            # save and reload from the cache
            watcher_cache_wrapper.save()
            watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
                cache_path, requester)
            watcher = watcher_cache_wrapper.watcher

            # check that we can consume the change
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
            self.assertTrue(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.has_url_recently_changed(url))
            self.assertFalse(watcher.peek_has_url_recently_changed(url))
    def test_D_MergeNotConsumeUnmatching(self):

        with phlsys_fs.chtmpdir_context():

            requester = _MockRequesterObject()
            url = 'http://host.test'
            cache_path = 'phlurl_watcher_cache.json'

            # initialise without existing cache
            watcher_cache_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
                cache_path, requester)
            watcher = watcher_cache_wrapper.watcher

            # set state 'a is new'
            self.assertTrue(watcher.peek_has_url_recently_changed(url))

            # clone the watcher
            watcher_cache_wrapper.save()
            watcher2 = watcher_cache_wrapper.watcher

            # [ D] can't consume newness in merge_data_consume_only() with
            #      unmatched hashes
            watcher.has_url_recently_changed(url)
            data_after_consume = watcher.get_data_for_merging()
            watcher2.refresh()
            watcher2.merge_data_consume_only(data_after_consume)
            self.assertTrue(watcher.peek_has_url_recently_changed(url))
def process(args):

    _ = args  # NOQA
    fs = abdt_fs.make_default_accessor()

    with fs.lockfile_context():
        pid = fs.get_pid_or_none()
        if pid is not None and phlsys_pid.is_running(pid):
            raise Exception("cannot fetch whilst arcyd is running.")

        repo_config_path_list = fs.repo_config_path_list()
        repo_name_config_list = abdi_repoargs.parse_config_file_list(
            repo_config_path_list)

        url_watcher_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
            fs.layout.urlwatcher_cache_path)

        # Let the user know what's happening before potentially blocking for a
        # while.
        print('Refreshing repository snoop status ..', end=' ')
        # Make sure that the output is actually visible by flushing stdout
        # XXX: Will use 'flush' parameter to 'print()' in Python 3.3
        sys.stdout.flush()
        print("done")

        url_watcher_wrapper.watcher.refresh()

        for repo_name, repo_config in repo_name_config_list:
            print(repo_name + ' ..', end=' ')

            # Make sure that the output is actually visible by flushing stdout
            # XXX: Will use 'flush' parameter to 'print()' in Python 3.3
            sys.stdout.flush()

            snoop_url = abdi_repoargs.get_repo_snoop_url(repo_config)

            sys_repo = phlsys_git.Repo(repo_config.repo_path)
            refcache_repo = phlgitx_refcache.Repo(sys_repo)
            differ_cache = abdt_differresultcache.Cache(refcache_repo)
            abd_repo = abdt_git.Repo(refcache_repo, differ_cache, "origin",
                                     repo_config.repo_desc)

            did_fetch = abdi_processrepoarglist.fetch_if_needed(
                url_watcher_wrapper.watcher, snoop_url, abd_repo,
                repo_config.repo_desc)

            if did_fetch:
                print('fetched')
            else:
                print('skipped')

            url_watcher_wrapper.save()
Exemple #5
0
def do(repo_configs, sys_admin_emails, sleep_secs, is_no_loop,
       external_report_command, mail_sender, max_workers, overrun_secs):

    conduit_manager = _ConduitManager()

    fs_accessor = abdt_fs.make_default_accessor()
    url_watcher_wrapper = phlurl_watcher.FileCacheWatcherWrapper(
        fs_accessor.layout.urlwatcher_cache_path)

    # decide max workers based on number of CPUs if no value is specified
    if max_workers == 0:
        max_workers = determine_max_workers_default()

    repo_list = []
    for name, config in repo_configs:
        repo_list.append(
            _ArcydManagedRepository(name, config, conduit_manager,
                                    url_watcher_wrapper, sys_admin_emails,
                                    mail_sender))

    # if we always overrun half our workers then the loop is sustainable, if we
    # overrun more than that then we'll be lagging too far behind. In the event
    # that we only have one worker then we can't overrun any.
    max_overrun_workers = max_workers // 2

    pool = phlmp_cyclingpool.CyclingPool(repo_list, max_workers,
                                         max_overrun_workers)

    cycle_timer = phlsys_timer.Timer()
    cycle_timer.start()
    exit_code = None
    while exit_code is None:

        # This timer needs to be separate from the cycle timer. The cycle timer
        # must be reset every time it is reported. The sleep timer makes sure
        # that each run of the loop takes a minimum amount of time.
        sleep_timer = phlsys_timer.Timer()
        sleep_timer.start()

        # refresh git snoops
        with abdt_logging.remote_io_read_event_context('refresh-git-snoop',
                                                       ''):
            abdt_tryloop.critical_tryloop(url_watcher_wrapper.watcher.refresh,
                                          abdt_errident.GIT_SNOOP, '')

        with abdt_logging.remote_io_read_event_context('refresh-conduit', ''):
            conduit_manager.refresh_conduits()

        with abdt_logging.misc_operation_event_context(
                'process-repos',
                '{} workers, {} repos'.format(max_workers, len(repo_list))):
            if max_workers > 1:
                for i, res in pool.cycle_results(overrun_secs=overrun_secs):
                    repo = repo_list[i]
                    repo.merge_from_worker(res)
            else:
                for r in repo_list:
                    r()

        # important to do this before stopping arcyd and as soon as possible
        # after doing fetches
        url_watcher_wrapper.save()

        # report cycle stats
        report = {
            "cycle_time_secs": cycle_timer.restart(),
            "overrun_jobs": pool.num_active_jobs,
        }
        _LOGGER.debug("cycle-stats: {}".format(report))
        if external_report_command:
            report_json = json.dumps(report)
            full_path = os.path.abspath(external_report_command)
            with abdt_logging.misc_operation_event_context(
                    'external-report-command', external_report_command):
                try:
                    phlsys_subprocess.run(full_path, stdin=report_json)
                except phlsys_subprocess.CalledProcessError as e:
                    _LOGGER.error(
                        "External command: {} failed with exception: "
                        "{}.".format(external_report_command,
                                     type(e).__name__))
                    _LOGGER.error(
                        "VERBOSE MESSAGE: CycleReportJson:{}".format(e))

        if is_no_loop:
            exit_code = abdi_processexitcodes.ExitCodes.ec_exit
        elif os.path.isfile(fs_accessor.layout.killfile):
            exit_code = abdi_processexitcodes.ExitCodes.ec_exit
            if phlsys_fs.read_text_file(fs_accessor.layout.killfile):
                _LOGGER.info("Killfile observed, reason given: {}".format(
                    phlsys_fs.read_text_file(fs_accessor.layout.killfile)))
            else:
                _LOGGER.info("Killfile observed, arcyd will stop")
            os.remove(fs_accessor.layout.killfile)
        elif os.path.isfile(fs_accessor.layout.reloadfile):
            _LOGGER.info("Reloadfile observed, arcyd will reload")
            exit_code = abdi_processexitcodes.ExitCodes.ec_reload
            os.remove(fs_accessor.layout.reloadfile)

        # sleep to pad out the cycle
        secs_to_sleep = float(sleep_secs) - float(sleep_timer.duration)
        if secs_to_sleep > 0 and exit_code is None:
            with abdt_logging.misc_operation_event_context(
                    'sleep', secs_to_sleep):
                time.sleep(secs_to_sleep)

    # finish any jobs that overran
    for i, res in pool.finish_results():
        repo = repo_list[i]
        repo.merge_from_worker(res)

    # important to do this before stopping arcyd and as soon as
    # possible after doing fetches
    url_watcher_wrapper.save()

    return exit_code