Exemple #1
0
    def test_owner_and_label_switches(self):
        runner_opts = ['--no-conf', '--owner=ads', '--label=ads_chain']
        runner = MRTwoStepJob(runner_opts).make_runner()
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'ads_chain')
        self.assertEqual(match.group(2), 'ads')
Exemple #2
0
    def test_owner_and_label_kwargs(self):
        runner = InlineMRJobRunner(conf_paths=[],
                                   owner='ads', label='ads_chain')
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'ads_chain')
        self.assertEqual(match.group(2), 'ads')
Exemple #3
0
    def test_auto_owner(self):
        os.environ['USER'] = '******'
        runner = InlineMRJobRunner(conf_paths=[])
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'no_script')
        self.assertEqual(match.group(2), 'mcp')
Exemple #4
0
    def test_empty_no_user(self):
        self.getuser_should_fail = True
        runner = InlineMRJobRunner(conf_paths=[])
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'no_script')
        self.assertEqual(match.group(2), 'no_user')
Exemple #5
0
    def test_auto_everything(self):
        test_start = datetime.datetime.utcnow()

        os.environ['USER'] = '******'
        runner = MRTwoStepJob(['--no-conf']).make_runner()
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'mr_two_step_job')
        self.assertEqual(match.group(2), 'mcp')

        job_start = datetime.datetime.strptime(
            match.group(3) + match.group(4), '%Y%m%d%H%M%S')
        job_start = job_start.replace(microsecond=int(match.group(5)))
        self.assertGreaterEqual(job_start, test_start)
        self.assertLessEqual(job_start - test_start,
                             datetime.timedelta(seconds=5))
Exemple #6
0
def cluster_to_basic_summary(cluster, now=None):
    """Extract fields such as creation time, owner, etc. from the job flow,
    so we can safely reference them without using :py:func:`getattr`.

    :param cluster: a :py:class:`boto.emr.EmrObject`
    :param now: the current UTC time, as a :py:class:`datetime.datetime`.
                Defaults to the current time.

    Returns a dictionary with the following keys. These will be ``None`` if the
    corresponding field in the job flow is unavailable.

    * *created*: UTC `datetime.datetime` that the job flow was created,
      or ``None``
    * *end*: UTC `datetime.datetime` that the job flow finished, or ``None``
    * *id*: job flow ID, or ``None`` (this should never happen)
    * *label*: The label for the job flow (usually the module name of the
      :py:class:`~mrjob.job.MRJob` script that started it), or
      ``None`` for non-:py:mod:`mrjob` job flows.
    * *name*: job flow name, or ``None`` (this should never happen)
    * *nih*: number of normalized instance hours used by the job flow.
    * *num_steps*: Number of steps in the job flow.
    * *owner*: The owner for the job flow (usually the user that started it),
      or ``None`` for non-:py:mod:`mrjob` job flows.
    * *pool*: pool name (e.g. ``'default'``) if the job flow is pooled,
      otherwise ``None``.
    * *ran*: How long the job flow ran, or has been running, as a
      :py:class:`datetime.timedelta`. This will be ``timedelta(0)`` if
      the job flow hasn't started.
    * *ready*: UTC `datetime.datetime` that the job flow finished
      bootstrapping, or ``None``
    * *state*: The job flow's state as a string (e.g. ``'RUNNING'``)
    """
    if now is None:
        now = datetime.utcnow()

    bcs = {}  # basic cluster summary to fill in

    bcs['id'] = getattr(cluster, 'id', None)
    bcs['name'] = getattr(cluster, 'name', None)

    status = getattr(cluster, 'status', None)
    timeline = getattr(status, 'timeline', None)

    bcs['created'] = to_datetime(getattr(
        timeline, 'creationdatetime', None))
    bcs['ready'] = to_datetime(getattr(timeline, 'readydatetime', None))
    bcs['end'] = to_datetime(getattr(timeline, 'enddatetime', None))

    if bcs['created']:
        bcs['ran'] = (bcs['end'] or now) - bcs['created']
    else:
        bcs['ran'] = timedelta(0)

    bcs['state'] = getattr(status, 'state', None)

    bcs['num_steps'] = len(getattr(cluster, 'steps', ()))

    bcs['pool'] = None
    bootstrap_actions = getattr(cluster, 'bootstrapactions', None)
    if bootstrap_actions:
        args = [arg.value for arg in bootstrap_actions[-1].args]
        if len(args) == 2 and args[0].startswith('pool-'):
            bcs['pool'] = args[1]

    m = JOB_KEY_RE.match(bcs['name'] or '')
    if m:
        bcs['label'], bcs['owner'] = m.group(1), m.group(2)
    else:
        bcs['label'], bcs['owner'] = None, None

    bcs['nih'] = float(getattr(cluster, 'normalizedinstancehours', '0'))

    return bcs
Exemple #7
0
def cluster_to_basic_summary(cluster, now=None):
    """Extract fields such as creation time, owner, etc. from the cluster,
    so we can safely reference them without using :py:func:`getattr`.

    :param cluster: a :py:class:`boto.emr.EmrObject`
    :param now: the current UTC time, as a :py:class:`datetime.datetime`.
                Defaults to the current time.

    Returns a dictionary with the following keys. These will be ``None`` if the
    corresponding field in the cluster is unavailable.

    * *created*: UTC `datetime.datetime` that the cluster was created,
      or ``None``
    * *end*: UTC `datetime.datetime` that the cluster finished, or ``None``
    * *id*: cluster ID, or ``None`` (this should never happen)
    * *label*: The label for the cluster (usually the module name of the
      :py:class:`~mrjob.job.MRJob` script that started it), or
      ``None`` for non-:py:mod:`mrjob` clusters.
    * *name*: cluster name, or ``None`` (this should never happen)
    * *nih*: number of normalized instance hours used by the cluster.
    * *num_steps*: Number of steps in the cluster.
    * *owner*: The owner for the cluster (usually the user that started it),
      or ``None`` for non-:py:mod:`mrjob` clusters.
    * *pool*: pool name (e.g. ``'default'``) if the cluster is pooled,
      otherwise ``None``.
    * *ran*: How long the cluster ran, or has been running, as a
      :py:class:`datetime.timedelta`. This will be ``timedelta(0)`` if
      the cluster hasn't started.
    * *ready*: UTC `datetime.datetime` that the cluster finished
      bootstrapping, or ``None``
    * *state*: The cluster's state as a string (e.g. ``'RUNNING'``)
    """
    if now is None:
        now = datetime.utcnow()

    bcs = {}  # basic cluster summary to fill in

    bcs['id'] = getattr(cluster, 'id', None)
    bcs['name'] = getattr(cluster, 'name', None)

    status = getattr(cluster, 'status', None)
    timeline = getattr(status, 'timeline', None)

    bcs['created'] = to_datetime(getattr(
        timeline, 'creationdatetime', None))
    bcs['ready'] = to_datetime(getattr(timeline, 'readydatetime', None))
    bcs['end'] = to_datetime(getattr(timeline, 'enddatetime', None))

    if bcs['created']:
        bcs['ran'] = (bcs['end'] or now) - bcs['created']
    else:
        bcs['ran'] = timedelta(0)

    bcs['state'] = getattr(status, 'state', None)

    bcs['num_steps'] = len(getattr(cluster, 'steps', ()))

    bcs['pool'] = None
    bootstrap_actions = getattr(cluster, 'bootstrapactions', None)
    if bootstrap_actions:
        args = [arg.value for arg in bootstrap_actions[-1].args]
        if len(args) == 2 and args[0].startswith('pool-'):
            bcs['pool'] = args[1]

    m = JOB_KEY_RE.match(bcs['name'] or '')
    if m:
        bcs['label'], bcs['owner'] = m.group(1), m.group(2)
    else:
        bcs['label'], bcs['owner'] = None, None

    bcs['nih'] = float(getattr(cluster, 'normalizedinstancehours', '0'))

    return bcs
Exemple #8
0
    def test_auto_label(self):
        runner = MRTwoStepJob(['--no-conf']).make_runner()
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'mr_two_step_job')
        self.assertEqual(match.group(2), getpass.getuser())
Exemple #9
0
    def test_empty(self):
        runner = InlineMRJobRunner(conf_paths=[])
        match = JOB_KEY_RE.match(runner.get_job_key())

        self.assertEqual(match.group(1), 'no_script')
        self.assertEqual(match.group(2), getpass.getuser())