def _ping(tgt, tgt_type, timeout, gather_job_timeout): client = salt.client.get_local_client(__opts__['conf_file']) pub_data = client.run_job(tgt, 'test.ping', (), tgt_type, '', timeout, '', listen=True) if not pub_data: return pub_data log.debug( 'manage runner will ping the following minion(s): %s', ', '.join(sorted(pub_data['minions'])) ) returned = set() for fn_ret in client.get_cli_event_returns( pub_data['jid'], pub_data['minions'], client._get_timeout(timeout), tgt, tgt_type, gather_job_timeout=gather_job_timeout): if fn_ret: for mid, _ in six.iteritems(fn_ret): log.debug('minion \'%s\' returned from ping', mid) returned.add(mid) not_returned = sorted(set(pub_data['minions']) - returned) returned = sorted(returned) return returned, not_returned
def _ping(tgt, tgt_type, timeout, gather_job_timeout): with salt.client.get_local_client(__opts__["conf_file"]) as client: pub_data = client.run_job( tgt, "test.ping", (), tgt_type, "", timeout, "", listen=True ) if not pub_data: return pub_data log.debug( "manage runner will ping the following minion(s): %s", ", ".join(sorted(pub_data["minions"])), ) returned = set() for fn_ret in client.get_cli_event_returns( pub_data["jid"], pub_data["minions"], client._get_timeout(timeout), tgt, tgt_type, gather_job_timeout=gather_job_timeout, ): if fn_ret: for mid, _ in fn_ret.items(): log.debug("minion '%s' returned from ping", mid) returned.add(mid) not_returned = sorted(set(pub_data["minions"]) - returned) returned = sorted(returned) return returned, not_returned
def local_async(self, *args, **kwargs): """ Run :ref:`execution modules <all-salt.modules>` asynchronously Wraps :py:meth:`salt.client.LocalClient.run_job`. :return: job ID """ with salt.client.get_local_client(mopts=self.opts) as client: return client.run_job(*args, **kwargs)
def fetch(self, minion_id, sync_type): log.debug("SyncObjects.fetch: %s/%s" % (minion_id, sync_type)) if minion_id is None: # We're probably being replayed to from the database log.warn("SyncObjects.fetch called with minion_id=None") return self._fetching_at[sync_type] = now() client = salt.client.LocalClient(config.get('cthulhu', 'salt_config_path')) # TODO clean up unused 'since' argument pub_data = client.run_job(minion_id, 'ceph.get_cluster_object', condition_kwarg([], {'cluster_name': self._cluster_name, 'sync_type': sync_type.str, 'since': None})) if not pub_data: log.error("Failed to start fetch job %s/%s" % (minion_id, sync_type)) # Don't throw an exception because if a fetch fails we should always else: log.debug("SyncObjects.fetch: jid=%s minions=%s" % (pub_data['jid'], pub_data['minions']))
def _ping(tgt, expr_form, timeout): client = salt.client.get_local_client(__opts__['conf_file']) pub_data = client.run_job(tgt, 'test.ping', (), expr_form, '', timeout, '') if not pub_data: return pub_data returned = set() for fn_ret in client.get_cli_event_returns(pub_data['jid'], pub_data['minions'], client._get_timeout(timeout), tgt, expr_form): if fn_ret: for mid, _ in six.iteritems(fn_ret): returned.add(mid) not_returned = set(pub_data['minions']) - returned return list(returned), list(not_returned)
def _ping(tgt, tgt_type, timeout): client = salt.client.get_local_client(__opts__['conf_file']) pub_data = client.run_job(tgt, 'test.ping', (), tgt_type, '', timeout, '') if not pub_data: return pub_data returned = set() for fn_ret in client.get_cli_event_returns( pub_data['jid'], pub_data['minions'], client._get_timeout(timeout), tgt, tgt_type): if fn_ret: for mid, _ in six.iteritems(fn_ret): returned.add(mid) not_returned = set(pub_data['minions']) - returned return list(returned), list(not_returned)
def _run_job( tgt, fun, arg, kwarg, tgt_type, timeout, job_retry=0, progress="log", show_progress=False, raise_no_tgt_match=True, ): """ Helper function to send execution module command using ``client.run_job`` method and collect results using ``client.get_event_iter_returns``. Implements basic retry mechanism. If ``client.get_event_iter_returns`` checks every second for job results, for minions that return no results after timeout expires, ``_run_job`` retries the job for such a minions until minions return results or ``job_retry`` threshold reached, in latter case ``error`` message logged with job details. :param tgt: (str) target to use with ``client.run_job`` function :param tgt_type: (str) target type to use with ``client.run_job`` function :param fun: (str) function name to use with ``client.run_job`` function :param arg: (list) arguments list to use with ``client.run_job`` function :param kwarg: (dict) keyword arguments dictionary to use with ``client.run_job`` function :param timeout: (int) timeout to use with ``client.run_job`` function :param job_retry: (int) times to retry the job :param progress: (str) progress display style, default is "log" :param show_progress: (bool) if True, prints execution progress :param raise_no_tgt_match: (bool) if True (default) raises error if no hosts matched """ if HAS_SALT: # initiate local client to run execution module commands 'salt "*" ...' client = salt.client.LocalClient() else: return {} ret = {} attempt = 0 minions_no_return = None stop_signal = Event() while attempt <= job_retry: stop_signal.clear() start_time = time.time() # publish job command pub_data = client.run_job( tgt=tgt, fun=fun, arg=arg, kwarg=kwarg, tgt_type=tgt_type, timeout=timeout ) # check if no minions matched by target # print(pub_data) if "jid" not in pub_data: # kill local client instance if hasattr(client, "destroy"): client.destroy() if raise_no_tgt_match: raise CommandExecutionError( "No minions matched by tgt '{}', tgt_type '{}'".format( tgt, tgt_type ) ) else: return {} if show_progress: events_thread = Thread( target=event, kwargs={ "jid": pub_data["jid"], "stop_signal": stop_signal, "progress": progress, }, daemon=True, # to not block once main process finishes ) events_thread.start() # collect job results until timeout while (time.time() - start_time) < timeout: job_results = client.get_cli_returns( jid=pub_data["jid"], minions=pub_data["minions"], timeout=1, tgt=tgt, tgt_type=tgt_type, ) # form results for item in job_results: ret.update(item) # check if all minions returned results if set(pub_data["minions"]) == set(ret.keys()): minions_no_return = None break else: minions_no_return = set(pub_data["minions"]) - set(ret.keys()) else: log.warning( "Nornir-runner:_run_job - {}s timeout; no results from {}; returned {}; jid {}; attempt: {}".format( timeout, list(minions_no_return), list(ret.keys()), pub_data["jid"], attempt, ) ) # retry job but only for minions that did not return results attempt += 1 tgt = list(minions_no_return) tgt_type = "list" # stop progress thread and wait for 5 seconds stop_signal.set() time.sleep(5) # inform user about retry log.info( "Retrying '{fun}' for '{tgt}', attempt {attempt}\n".format( fun=fun, tgt=tgt, attempt=attempt ) ) continue # if we get to this point - job did not timeout and we received results from all minions if minions_no_return is None: break else: log.error( "Nornir-runner:_run_job - no results from minions '{}'; tgt: {}; fun: {}; tgt_type: {}; timeout: {}; job_retry: {}; kwarg: {}".format( minions_no_return, tgt, fun, tgt_type, timeout, job_retry, kwarg ) ) # stop eventloop thread if show_progress: stop_signal.set() events_thread.join(timeout=10) # kill local client instance if hasattr(client, "destroy"): client.destroy() return ret
def run_for_tasks_count(): tasks_count = 10000 job_results_report = open("run_for_tasks_count_report.txt", "w", encoding="utf-8") worker_stats_before = client.cmd( tgt="nrp1", fun="nr.nornir", arg=["worker", "stats"], ) stats_before = client.cmd( tgt="nrp1", fun="nr.nornir", arg=["stats"], ) workers_count = len(worker_stats_before["nrp1"]) job_results_report.write("Stats before:\n{}\n".format( pprint.pformat(stats_before))) job_results_report.write("Workers Stats before:\n{}\n".format( pprint.pformat(worker_stats_before))) progress = Progress( "[progress.description]{task.description}", BarColumn(), "[progress.percentage]{task.percentage:>3.0f}%", "{task.completed}/{task.total}", TimeElapsedColumn(), refresh_per_second=5, ) # run tasks with progress: tasks_progress = progress.add_task( "Running {} tasks".format(tasks_count), total=tasks_count) while tasks_count: job_results_report.write("Tasks count: {}\n".format(tasks_count)) jobs = [] for _ in range(workers_count): if tasks_count <= 0: continue task = tasks_list[random.randint(0, len(tasks_list) - 1)] job = client.run_job(**task) jobs.append(job) job_results_report.write( "\nTask {}, started job:\n{}\n{}\n".format( tasks_count, job, task)) tasks_count -= 1 progress.update(tasks_progress, advance=1) # get jobs for j in jobs: results_iterator = client.get_cli_returns(timeout=60, **j) for i in results_iterator: job_results_report.write("\nFinished job: {}\n".format(j)) job_results_report.write(pprint.pformat(i) + "\n") # collect after stats worker_stats_after = client.cmd( tgt="nrp1", fun="nr.nornir", arg=["worker", "stats"], ) stats_after = client.cmd( tgt="nrp1", fun="nr.nornir", arg=["stats"], ) job_results_report.write("\nStats after:\n{}\n".format( pprint.pformat(stats_after))) job_results_report.write("\nWorkers Stats after:\n{}\n".format( pprint.pformat(worker_stats_after))) job_results_report.close() # run_for_tasks_count()