def main(args, now=None): if now is None: now = datetime.utcnow() option_parser = _make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error('takes no arguments') MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info('getting information about running jobs') emr_conn = EMRJobRunner(**_runner_kwargs(options)).make_emr_conn() cluster_summaries = _yield_all_clusters( emr_conn, cluster_states=['STARTING', 'BOOTSTRAPPING', 'RUNNING']) min_time = timedelta(hours=options.min_hours) job_info = _find_long_running_jobs(emr_conn, cluster_summaries, min_time, now=now) _print_report(job_info)
def _yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant cluster information from EMR. :param float max_days_ago: If set, don't fetch clusters created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = datetime.utcnow() emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) # use _DELAY to sleep 1 second before each API call (see #1091). Could # implement some sort of connection wrapper for this if it becomes more # generally useful. for cluster_summary in _yield_all_clusters(emr_conn, created_after=created_after, _delay=_DELAY): cluster_id = cluster_summary.id sleep(_DELAY) cluster = _patched_describe_cluster(emr_conn, cluster_id) cluster.steps = _list_all_steps(emr_conn, cluster_id, _delay=_DELAY) cluster.bootstrapactions = list( _yield_all_bootstrap_actions(emr_conn, cluster_id, _delay=_DELAY)) yield cluster
def yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant job flow information from EMR. :param float max_days_ago: If set, don't fetch job flows created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = datetime.utcnow() emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) for cluster_summary in _yield_all_clusters( emr_conn, created_after=created_after): cluster_id = cluster_summary.id cluster = patched_describe_cluster(emr_conn, cluster_id) cluster.steps = list(_yield_all_steps(emr_conn, cluster_id)) cluster.bootstrapactions = list( _yield_all_bootstrap_actions(emr_conn, cluster_id)) yield cluster
def yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant cluster information from EMR. :param float max_days_ago: If set, don't fetch clusters created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = datetime.utcnow() emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) for cluster_summary in _yield_all_clusters( emr_conn, created_after=created_after): cluster_id = cluster_summary.id cluster = patched_describe_cluster(emr_conn, cluster_id) cluster.steps = list(_yield_all_steps(emr_conn, cluster_id)) cluster.bootstrapactions = list( _yield_all_bootstrap_actions(emr_conn, cluster_id)) yield cluster
def _yield_clusters(max_days_ago=None, now=None, **runner_kwargs): """Get relevant cluster information from EMR. :param float max_days_ago: If set, don't fetch clusters created longer than this many days ago. :param now: the current UTC time, as a :py:class:`datetime.datetime`. Defaults to the current time. :param runner_kwargs: keyword args to pass through to :py:class:`~mrjob.emr.EMRJobRunner` """ if now is None: now = datetime.utcnow() emr_conn = EMRJobRunner(**runner_kwargs).make_emr_conn() # if --max-days-ago is set, only look at recent jobs created_after = None if max_days_ago is not None: created_after = now - timedelta(days=max_days_ago) # use _DELAY to sleep 1 second before each API call (see #1091). Could # implement some sort of connection wrapper for this if it becomes more # generally useful. for cluster_summary in _yield_all_clusters( emr_conn, created_after=created_after, _delay=_DELAY): cluster_id = cluster_summary.id sleep(_DELAY) cluster = _patched_describe_cluster(emr_conn, cluster_id) cluster.steps = _list_all_steps(emr_conn, cluster_id, _delay=_DELAY) cluster.bootstrapactions = list( _yield_all_bootstrap_actions(emr_conn, cluster_id, _delay=_DELAY)) yield cluster
def main(args, now=None): if now is None: now = datetime.utcnow() option_parser = _make_option_parser() options, args = option_parser.parse_args(args) if args: option_parser.error("takes no arguments") MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) log.info("getting information about running jobs") emr_conn = EMRJobRunner(**_runner_kwargs(options)).make_emr_conn() cluster_summaries = _yield_all_clusters(emr_conn, cluster_states=["STARTING", "BOOTSTRAPPING", "RUNNING"]) min_time = timedelta(hours=options.min_hours) job_info = _find_long_running_jobs(emr_conn, cluster_summaries, min_time, now=now) _print_report(job_info)
def _maybe_terminate_clusters(dry_run=False, max_hours_idle=None, mins_to_end_of_hour=None, now=None, pool_name=None, pooled_only=False, unpooled_only=False, max_mins_locked=None, quiet=False, **kwargs): if now is None: now = datetime.utcnow() # old default behavior if max_hours_idle is None and mins_to_end_of_hour is None: max_hours_idle = _DEFAULT_MAX_HOURS_IDLE runner = EMRJobRunner(**kwargs) emr_conn = runner.make_emr_conn() num_starting = 0 num_bootstrapping = 0 num_done = 0 num_idle = 0 num_non_streaming = 0 num_pending = 0 num_running = 0 # We don't filter by cluster state because we want this to work even # if Amazon adds another kind of idle state. for cluster_summary in _yield_all_clusters(emr_conn): cluster_id = cluster_summary.id # check if cluster is done if _is_cluster_done(cluster_summary): num_done += 1 continue # check if cluster is starting if _is_cluster_starting(cluster_summary): num_starting += 1 continue # check if cluster is bootstrapping if _is_cluster_bootstrapping(cluster_summary): num_bootstrapping += 1 continue # need steps to learn more about cluster steps = _list_all_steps(emr_conn, cluster_id) # we can't really tell if non-streaming jobs are idle or not, so # let them be (see Issue #60) if _is_cluster_non_streaming(steps): num_non_streaming += 1 continue if any(_is_step_running(step) for step in steps): num_running += 1 continue # cluster is idle time_idle = now - _time_last_active(cluster_summary, steps) time_to_end_of_hour = _est_time_to_hour(cluster_summary, now=now) is_pending = _cluster_has_pending_steps(steps) bootstrap_actions = list( _yield_all_bootstrap_actions(emr_conn, cluster_id)) _, pool = _pool_hash_and_name(bootstrap_actions) if is_pending: num_pending += 1 else: num_idle += 1 log.debug('cluster %s %s for %s, %s to end of hour, %s (%s)' % (cluster_id, 'pending' if is_pending else 'idle', strip_microseconds(time_idle), strip_microseconds(time_to_end_of_hour), ('unpooled' if pool is None else 'in %s pool' % pool), cluster_summary.name)) # filter out clusters that don't meet our criteria if (max_hours_idle is not None and time_idle <= timedelta(hours=max_hours_idle)): continue # mins_to_end_of_hour doesn't apply to jobs with pending steps if (mins_to_end_of_hour is not None and (is_pending or time_to_end_of_hour >= timedelta(minutes=mins_to_end_of_hour))): continue if (pooled_only and pool is None): continue if (unpooled_only and pool is not None): continue if (pool_name is not None and pool != pool_name): continue # terminate idle cluster _terminate_and_notify(runner=runner, cluster_id=cluster_id, cluster_name=cluster_summary.name, num_steps=len(steps), is_pending=is_pending, time_idle=time_idle, time_to_end_of_hour=time_to_end_of_hour, dry_run=dry_run, max_mins_locked=max_mins_locked, quiet=quiet) log.info('Cluster statuses: %d starting, %d bootstrapping, %d running,' ' %d pending, %d idle, %d active non-streaming, %d done' % (num_starting, num_bootstrapping, num_running, num_pending, num_idle, num_non_streaming, num_done))
def _maybe_terminate_clusters(dry_run=False, max_hours_idle=None, mins_to_end_of_hour=None, now=None, pool_name=None, pooled_only=False, unpooled_only=False, max_mins_locked=None, quiet=False, **kwargs): if now is None: now = datetime.utcnow() # old default behavior if max_hours_idle is None and mins_to_end_of_hour is None: max_hours_idle = _DEFAULT_MAX_HOURS_IDLE runner = EMRJobRunner(**kwargs) emr_conn = runner.make_emr_conn() num_starting = 0 num_bootstrapping = 0 num_done = 0 num_idle = 0 num_pending = 0 num_running = 0 # We don't filter by cluster state because we want this to work even # if Amazon adds another kind of idle state. for cluster_summary in _yield_all_clusters(emr_conn): cluster_id = cluster_summary.id # check if cluster is done if _is_cluster_done(cluster_summary): num_done += 1 continue # check if cluster is starting if _is_cluster_starting(cluster_summary): num_starting += 1 continue # check if cluster is bootstrapping if _is_cluster_bootstrapping(cluster_summary): num_bootstrapping += 1 continue # need steps to learn more about cluster steps = _list_all_steps(emr_conn, cluster_id) if any(_is_step_running(step) for step in steps): num_running += 1 continue # cluster is idle time_idle = now - _time_last_active(cluster_summary, steps) time_to_end_of_hour = _est_time_to_hour(cluster_summary, now=now) is_pending = _cluster_has_pending_steps(steps) bootstrap_actions = list(_yield_all_bootstrap_actions( emr_conn, cluster_id)) _, pool = _pool_hash_and_name(bootstrap_actions) if is_pending: num_pending += 1 else: num_idle += 1 log.debug( 'cluster %s %s for %s, %s to end of hour, %s (%s)' % (cluster_id, 'pending' if is_pending else 'idle', strip_microseconds(time_idle), strip_microseconds(time_to_end_of_hour), ('unpooled' if pool is None else 'in %s pool' % pool), cluster_summary.name)) # filter out clusters that don't meet our criteria if (max_hours_idle is not None and time_idle <= timedelta(hours=max_hours_idle)): continue # mins_to_end_of_hour doesn't apply to jobs with pending steps if (mins_to_end_of_hour is not None and (is_pending or time_to_end_of_hour >= timedelta( minutes=mins_to_end_of_hour))): continue if (pooled_only and pool is None): continue if (unpooled_only and pool is not None): continue if (pool_name is not None and pool != pool_name): continue # terminate idle cluster _terminate_and_notify( runner=runner, cluster_id=cluster_id, cluster_name=cluster_summary.name, num_steps=len(steps), is_pending=is_pending, time_idle=time_idle, time_to_end_of_hour=time_to_end_of_hour, dry_run=dry_run, max_mins_locked=max_mins_locked, quiet=quiet) log.info( 'Cluster statuses: %d starting, %d bootstrapping, %d running,' ' %d pending, %d idle, %d done' % ( num_starting, num_bootstrapping, num_running, num_pending, num_idle, num_done))