コード例 #1
0
def refine(line, prefix=None, suffix=None):
    line = to_bytes(line)
    prefix = to_bytes(prefix) if prefix else b""
    suffix = to_bytes(suffix) if suffix else b""
    parts = line.split(b"]", 1)
    if len(parts) == 2:
        header, body = parts
        return b"".join((header, b"]", prefix, body, suffix))
    else:
        return line
コード例 #2
0
def decorate(source, line, version=VERSION, now=None, lineid=None):
    if now is None:
        now = datetime.utcnow()
    tstamp = to_bytes(now.strftime(ISOFORMAT))
    if not lineid:
        lineid = to_bytes(str(uuid.uuid4()))
    line = to_bytes(line)
    source = to_bytes(source)
    return b"".join((b"[MFLOG|", version, b"|", tstamp, b"Z|", source, b"|",
                     lineid, b"]", line))
コード例 #3
0
def parse(line):
    line = to_bytes(line)
    m = LINE_PARSER.match(to_bytes(line))
    if m:
        try:
            fields = list(m.groups())
            fields.append(datetime.strptime(to_unicode(fields[2]), ISOFORMAT))
            return MFLogline(*fields)
        except:
            pass
コード例 #4
0
ファイル: mflog.py プロジェクト: tobias-gp/metaflow
def refine(line, prefix=None, suffix=None):
    line = to_bytes(line)
    prefix = to_bytes(prefix) if prefix else b''
    suffix = to_bytes(suffix) if suffix else b''
    parts = line.split(b']', 1)
    if len(parts) == 2:
        header, body = parts
        return b''.join((header, b']', prefix, body, suffix))
    else:
        return line
コード例 #5
0
def resolve_workflow_name(obj, name):
    project = current.get("project_name")
    obj._is_workflow_name_modified = False
    if project:
        if name:
            raise MetaflowException(
                "--name is not supported for @projects. Use --branch instead.")
        workflow_name = current.project_flow_name
        project_branch = to_bytes(".".join((project, current.branch_name)))
        token_prefix = (
            "mfprj-%s" %
            to_unicode(base64.b32encode(sha1(project_branch).digest()))[:16])
        is_project = True
        # Argo Workflow names can't be longer than 253 characters, so we truncate
        # by default. Also, while project and branch allow for underscores, Argo
        # Workflows doesn't (DNS Subdomain names as defined in RFC 1123) - so we will
        # remove any underscores as well as convert the name to lower case.
        if len(workflow_name) > 253:
            name_hash = to_unicode(
                base64.b32encode(sha1(
                    to_bytes(workflow_name)).digest()))[:8].lower()
            workflow_name = "%s-%s" % (workflow_name[:242], name_hash)
            obj._is_workflow_name_modified = True
        if not VALID_NAME.search(workflow_name):
            workflow_name = (re.compile(r"^[^A-Za-z0-9]+").sub(
                "", workflow_name).replace("_", "").lower())
            obj._is_workflow_name_modified = True
    else:
        if name and not VALID_NAME.search(name):
            raise MetaflowException(
                "Name '%s' contains invalid characters. The "
                "name must consist of lower case alphanumeric characters, '-' or '.'"
                ", and must start and end with an alphanumeric character." %
                name)

        workflow_name = name if name else current.flow_name
        token_prefix = workflow_name
        is_project = False

        if len(workflow_name) > 253:
            msg = ("The full name of the workflow:\n*%s*\nis longer than 253 "
                   "characters.\n\n"
                   "To deploy this workflow to Argo Workflows, please "
                   "assign a shorter name\nusing the option\n"
                   "*argo-workflows --name <name> create*." % workflow_name)
            raise ArgoWorkflowsNameTooLong(msg)

        if not VALID_NAME.search(workflow_name):
            workflow_name = (re.compile(r"^[^A-Za-z0-9]+").sub(
                "", workflow_name).replace("_", "").lower())
            obj._is_workflow_name_modified = True

    return workflow_name, token_prefix.lower(), is_project
コード例 #6
0
def resolve_state_machine_name(obj, name):
    def attach_prefix(name):
        if SFN_STATE_MACHINE_PREFIX is not None:
            return SFN_STATE_MACHINE_PREFIX + "_" + name
        return name

    project = current.get("project_name")
    obj._is_state_machine_name_hashed = False
    if project:
        if name:
            raise MetaflowException(
                "--name is not supported for @projects. " "Use --branch instead."
            )
        state_machine_name = attach_prefix(current.project_flow_name)
        project_branch = to_bytes(".".join((project, current.branch_name)))
        token_prefix = (
            "mfprj-%s"
            % to_unicode(base64.b32encode(sha1(project_branch).digest()))[:16]
        )
        is_project = True
        # AWS Step Functions has a limit of 80 chars for state machine names.
        # We truncate the state machine name if the computed name is greater
        # than 60 chars and append a hashed suffix to ensure uniqueness.
        if len(state_machine_name) > 60:
            name_hash = to_unicode(
                base64.b32encode(sha1(to_bytes(state_machine_name)).digest())
            )[:16].lower()
            state_machine_name = "%s-%s" % (state_machine_name[:60], name_hash)
            obj._is_state_machine_name_hashed = True
    else:
        if name and VALID_NAME.search(name):
            raise MetaflowException("Name '%s' contains invalid characters." % name)

        state_machine_name = attach_prefix(name if name else current.flow_name)
        token_prefix = state_machine_name
        is_project = False

        if len(state_machine_name) > 80:
            msg = (
                "The full name of the workflow:\n*%s*\nis longer than 80 "
                "characters.\n\n"
                "To deploy this workflow to AWS Step Functions, please "
                "assign a shorter name\nusing the option\n"
                "*step-functions --name <name> create*." % state_machine_name
            )
            raise StepFunctionsStateMachineNameTooLong(msg)

    return state_machine_name, token_prefix.lower(), is_project
コード例 #7
0
ファイル: batch.py プロジェクト: sappier/metaflow
    def wait(self, stdout_location, stderr_location, echo=None):
        def wait_for_launch(job):
            status = job.status
            echo(
                "Task is starting (status %s)..." % status,
                "stderr",
                batch_id=job.id,
            )
            t = time.time()
            while True:
                if status != job.status or (time.time() - t) > 30:
                    status = job.status
                    echo(
                        "Task is starting (status %s)..." % status,
                        "stderr",
                        batch_id=job.id,
                    )
                    t = time.time()
                if job.is_running or job.is_done or job.is_crashed:
                    break
                select.poll().poll(200)

        prefix = b"[%s] " % util.to_bytes(self.job.id)
        stdout_tail = S3Tail(stdout_location)
        stderr_tail = S3Tail(stderr_location)

        # 1) Loop until the job has started
        wait_for_launch(self.job)

        # 2) Tail logs until the job has finished
        tail_logs(
            prefix=prefix,
            stdout_tail=stdout_tail,
            stderr_tail=stderr_tail,
            echo=echo,
            has_log_updates=lambda: self.job.is_running,
        )

        # In case of hard crashes (OOM), the final save_logs won't happen.
        # We can fetch the remaining logs from AWS CloudWatch and persist them
        # to Amazon S3.

        if self.job.is_crashed:
            msg = next(msg for msg in [
                self.job.reason,
                self.job.status_reason,
                "Task crashed.",
            ] if msg is not None)
            raise BatchException("%s "
                                 "This could be a transient error. "
                                 "Use @retry to retry." % msg)
        else:
            if self.job.is_running:
                # Kill the job if it is still running by throwing an exception.
                raise BatchException("Task failed!")
            echo(
                "Task finished with exit code %s." % self.job.status_code,
                "stderr",
                batch_id=self.job.id,
            )
コード例 #8
0
ファイル: test_s3.py プロジェクト: anniyanvr/metaflow
def test_put_one(s3root, objs, expected):
    with S3(s3root=s3root) as s3:
        for key, obj in objs:
            s3url = s3.put(key, obj)
            assert s3url in expected
            s3obj = s3.get(key)
            assert s3obj.key == key
            assert_results([s3obj], {s3url: expected[s3url]})
            assert s3obj.blob == to_bytes(obj)
            # put with overwrite disabled
            s3url = s3.put(key, "random_value", overwrite=False)
            assert s3url in expected
            s3obj = s3.get(key)
            assert s3obj.key == key
            assert_results([s3obj], {s3url: expected[s3url]})
            assert s3obj.blob == to_bytes(obj)
コード例 #9
0
def _token_generator(token_prefix):
    for i in range(10000):
        prefix = "%s-%d-" % (token_prefix, i)
        # we need to use a consistent hash here, which is why
        # random.seed(prefix) or random.seed(hash(prefix)) won't work
        random.seed(zlib.adler32(to_bytes(prefix)))
        yield prefix + "".join(random.sample(string.ascii_lowercase, 4))
コード例 #10
0
def set_should_persist(line):
    # this marker indicates that the logline should be persisted by
    # the receiver
    line = to_bytes(line)
    if is_structured(line) and not line.startswith(b"[!["):
        return b"[!" + line
    else:
        return line
コード例 #11
0
def unset_should_persist(line):
    # prior to persisting, the should_persist marker should be removed
    # from the logline using this function
    line = to_bytes(line)
    if is_structured(line) and line.startswith(b"[!["):
        return line[2:]
    else:
        return line
コード例 #12
0
def format(name):
    # AWS Event Bridge has a limit of 64 chars for rule names.
    # We truncate the rule name if the computed name is greater
    # than 64 chars and append a hashed suffix to ensure uniqueness.
    if len(name) > 64:
        name_hash = to_unicode(base64.b32encode(sha1(
            to_bytes(name)).digest()))[:16].lower()
        # construct an 64 character long rule name
        return '%s-%s' % (name[:47], name_hash)
    else:
        return name
コード例 #13
0
 def _print_available(tail, stream, should_persist=False):
     # print the latest batch of lines from S3Tail
     prefix = b"[%s] " % util.to_bytes(self._job.id)
     try:
         for line in tail:
             if should_persist:
                 line = set_should_persist(line)
             else:
                 line = refine(line, prefix=prefix)
             echo(line.strip().decode("utf-8", errors="replace"), stream)
     except Exception as ex:
         echo(
             "[ temporary error in fetching logs: %s ]" % ex,
             "stderr",
             job_id=self._job.id,
         )
コード例 #14
0
def cmd(cmdline, input, output):
    for path, data in input.items():
        with open(path, 'wb') as f:
            f.write(to_bytes(data))

    if subprocess.call(cmdline, shell=True):
        raise ExternalCommandFailed("Command '%s' returned a non-zero "
                                    "exit code." % cmdline)

    out = []
    for path in output:
        with open(path, 'rb') as f:
            out.append(f.read())

    if len(out) == 1:
        return out[0]
    else:
        return out
コード例 #15
0
def pytest_put_strings_case(meta=None):
    put_prefix = os.path.join(S3ROOT, PUT_PREFIX)
    data = [
        u"unicode: \u523a\u8eab means sashimi",
        b"bytes: \x00\x01\x02",
        "just a string",
    ]
    expected = {}
    objs = []
    for text in data:
        blob = to_bytes(text)
        checksum = sha1(blob).hexdigest()
        key = str(uuid4())
        expected[os.path.join(put_prefix, key)] = {
            None: ExpectedResult(
                size=len(blob),
                checksum=checksum,
                content_type=None,
                metadata=None,
                range=None,
            )
        }
        objs.append((key, text))
        if meta is not None:
            for content_type, usermeta in meta.values():
                key = str(uuid4())
                expected[os.path.join(put_prefix, key)] = {
                    None: ExpectedResult(
                        size=len(blob),
                        checksum=checksum,
                        content_type=content_type,
                        metadata=usermeta,
                        range=None,
                    )
                }
                objs.append(
                    S3PutObject(
                        key=key,
                        value=text,
                        content_type=content_type,
                        metadata=usermeta,
                    )
                )
    return {"argvalues": [(put_prefix, objs, expected)], "ids": ["put_strings"]}
コード例 #16
0
def sandbox(profile):
    overwrite_config(profile)
    # Prompt for user input.
    encoded_str = click.prompt('Following instructions from '
                               'https://metaflow.org/sandbox, '
                               'please paste the encoded magic string',
                               type=str)
    # Decode the bytes to env_dict.
    try:
        import base64, zlib
        from metaflow.util import to_bytes
        env_dict =\
            json.loads(to_unicode(zlib.decompress(base64.b64decode(to_bytes(encoded_str)))))
    except:
        # TODO: Add the URL for contact us page in the error?
        raise click.BadArgumentUsage('Could not decode the sandbox '\
                                     'configuration. Please contact us.')
    # Persist to a file.
    persist_env(env_dict, profile)
コード例 #17
0
    def wait(self, stdout_location, stderr_location, echo=None):
        def wait_for_launch(job):
            status = job.status
            echo('Task is starting (status %s)...' % status,
                 'stderr',
                 batch_id=job.id)
            t = time.time()
            while True:
                if status != job.status or (time.time() - t) > 30:
                    status = job.status
                    echo('Task is starting (status %s)...' % status,
                         'stderr',
                         batch_id=job.id)
                    t = time.time()
                if job.is_running or job.is_done or job.is_crashed:
                    break
                select.poll().poll(200)

        prefix = b'[%s] ' % util.to_bytes(self.job.id)

        def _print_available(tail, stream, should_persist=False):
            # print the latest batch of lines from S3Tail
            try:
                for line in tail:
                    if should_persist:
                        line = set_should_persist(line)
                    else:
                        line = refine(line, prefix=prefix)
                    echo(line.strip().decode('utf-8', errors='replace'),
                         stream)
            except Exception as ex:
                echo('[ temporary error in fetching logs: %s ]' % ex,
                     'stderr',
                     batch_id=self.job.id)

        stdout_tail = S3Tail(stdout_location)
        stderr_tail = S3Tail(stderr_location)

        # 1) Loop until the job has started
        wait_for_launch(self.job)

        # 2) Loop until the job has finished
        start_time = time.time()
        is_running = True
        next_log_update = start_time
        log_update_delay = 1

        while is_running:
            if time.time() > next_log_update:
                _print_available(stdout_tail, 'stdout')
                _print_available(stderr_tail, 'stderr')
                now = time.time()
                log_update_delay = update_delay(now - start_time)
                next_log_update = now + log_update_delay
                is_running = self.job.is_running

            # This sleep should never delay log updates. On the other hand,
            # we should exit this loop when the task has finished without
            # a long delay, regardless of the log tailing schedule
            d = min(log_update_delay, 5.0)
            select.poll().poll(d * 1000)

        # 3) Fetch remaining logs
        #
        # It is possible that we exit the loop above before all logs have been
        # shown.
        #
        # TODO if we notice AWS Batch failing to upload logs to S3, we can add a
        # HEAD request here to ensure that the file exists prior to calling
        # S3Tail and note the user about truncated logs if it doesn't
        _print_available(stdout_tail, 'stdout')
        _print_available(stderr_tail, 'stderr')
        # In case of hard crashes (OOM), the final save_logs won't happen.
        # We fetch the remaining logs from AWS CloudWatch and persist them to
        # Amazon S3.
        #
        # TODO: AWS CloudWatch fetch logs

        if self.job.is_crashed:
            if self.job.status_code == 1:
                msg = next(msg for msg in [
                    'Task failed with a general error. You may have a miscellaneous error(s), '
                    'such as "divide by zero" and other impermissible operations.'
                ] if msg is not None)
            if self.job.status_code == 2:
                msg = next(msg for msg in [
                    'Misuse of shell builtins. You may have a missing keyword or command, '
                    'or permission problem (and diff return code on a failed binary file '
                    'comparison).'
                ] if msg is not None)
            if self.job.status_code == 126:
                msg = next(msg for msg in [
                    'Command invoked cannot execute. May be a permission problem or command is '
                    'not an executable.'
                ] if msg is not None)
            if self.job.status_code == 127:
                msg = next(msg for msg in [
                    'Command not Found. May be a possible problem with $PATH or a type.'
                ] if msg is not None)
            if self.job.status_code == 128:
                msg = next(msg for msg in ['Invalid argument to exit.']
                           if msg is not None)
            if self.job.status_code > 129:
                msg = next(msg for msg in ['Signal related error']
                           if msg is not None)
            raise BatchException('%s '
                                 'This could be a transient error. '
                                 'Use @retry to retry.' % msg)
        else:
            if self.job.is_running:
                # Kill the job if it is still running by throwing an exception.
                raise BatchException("Task failed!")
            echo('Task finished with exit code %s.' % self.job.status_code,
                 'stderr',
                 batch_id=self.job.id)
コード例 #18
0
def is_structured(line):
    line = to_bytes(line)
    return line.startswith(b"[MFLOG|") or line.startswith(b"[![MFLOG|")
コード例 #19
0
ファイル: kubernetes.py プロジェクト: sappier/metaflow
    def wait(self, stdout_location, stderr_location, echo=None):
        def wait_for_launch(job):
            status = job.status
            echo(
                "Task is starting (Status %s)..." % status,
                "stderr",
                job_id=job.id,
            )
            t = time.time()
            while True:
                new_status = job.status
                if status != new_status or (time.time() - t) > 30:
                    status = new_status
                    echo(
                        "Task is starting (Status %s)..." % status,
                        "stderr",
                        job_id=job.id,
                    )
                    t = time.time()
                if job.is_running or job.is_done:
                    break
                time.sleep(1)

        prefix = b"[%s] " % util.to_bytes(self._job.id)
        stdout_tail = S3Tail(stdout_location)
        stderr_tail = S3Tail(stderr_location)

        # 1) Loop until the job has started
        wait_for_launch(self._job)

        # 2) Tail logs until the job has finished
        tail_logs(
            prefix=prefix,
            stdout_tail=stdout_tail,
            stderr_tail=stderr_tail,
            echo=echo,
            has_log_updates=lambda: self._job.is_running,
        )

        # 3) Fetch remaining logs
        #
        # It is possible that we exit the loop above before all logs have been
        # shown.
        #
        # TODO (savin): If we notice Kubernetes failing to upload logs to S3,
        #               we can add a HEAD request here to ensure that the file
        #               exists prior to calling S3Tail and note the user about
        #               truncated logs if it doesn't.
        # TODO (savin): For hard crashes, we can fetch logs from the pod.

        if self._job.has_failed:
            exit_code, reason = self._job.reason
            msg = next(msg for msg in [
                reason,
                "Task crashed",
            ] if msg is not None)
            if exit_code:
                if int(exit_code) == 139:
                    raise KubernetesException(
                        "Task failed with a segmentation fault.")
                else:
                    msg = "%s (exit code %s)" % (msg, exit_code)
            raise KubernetesException("%s. This could be a transient error. "
                                      "Use @retry to retry." % msg)

        exit_code, _ = self._job.reason
        echo(
            "Task finished with exit code %s." % exit_code,
            "stderr",
            job_id=self._job.id,
        )
コード例 #20
0
ファイル: batch.py プロジェクト: parampavar/metaflow
    def wait(self, stdout_location, stderr_location, echo=None):
        def wait_for_launch(job, child_jobs):
            status = job.status
            echo(
                "Task is starting (status %s)..." % status,
                "stderr",
                batch_id=job.id,
            )
            t = time.time()
            while True:
                if status != job.status or (time.time() - t) > 30:
                    if not child_jobs:
                        child_statuses = ""
                    else:
                        status_keys = set(
                            [child_job.status for child_job in child_jobs])
                        status_counts = [(
                            status,
                            len([
                                child_job.status == status
                                for child_job in child_jobs
                            ]),
                        ) for status in status_keys]
                        child_statuses = " (parallel node status: [{}])".format(
                            ", ".join([
                                "{}:{}".format(status, num)
                                for (status, num) in sorted(status_counts)
                            ]))
                    status = job.status
                    echo(
                        "Task is starting (status %s)... %s" %
                        (status, child_statuses),
                        "stderr",
                        batch_id=job.id,
                    )
                    t = time.time()
                if job.is_running or job.is_done or job.is_crashed:
                    break
                select.poll().poll(200)

        prefix = b"[%s] " % util.to_bytes(self.job.id)
        stdout_tail = S3Tail(stdout_location)
        stderr_tail = S3Tail(stderr_location)

        child_jobs = []
        if self.num_parallel > 1:
            for node in range(1, self.num_parallel):
                child_job = copy.copy(self.job)
                child_job._id = child_job._id + "#{}".format(node)
                child_jobs.append(child_job)

        # 1) Loop until the job has started
        wait_for_launch(self.job, child_jobs)

        # 2) Tail logs until the job has finished
        tail_logs(
            prefix=prefix,
            stdout_tail=stdout_tail,
            stderr_tail=stderr_tail,
            echo=echo,
            has_log_updates=lambda: self.job.is_running,
        )

        # In case of hard crashes (OOM), the final save_logs won't happen.
        # We can fetch the remaining logs from AWS CloudWatch and persist them
        # to Amazon S3.

        if self.job.is_crashed:
            msg = next(msg for msg in [
                self.job.reason,
                self.job.status_reason,
                "Task crashed.",
            ] if msg is not None)
            raise BatchException("%s "
                                 "This could be a transient error. "
                                 "Use @retry to retry." % msg)
        else:
            if self.job.is_running:
                # Kill the job if it is still running by throwing an exception.
                raise BatchException("Task failed!")
            echo(
                "Task finished with exit code %s." % self.job.status_code,
                "stderr",
                batch_id=self.job.id,
            )
コード例 #21
0
    def wait(self, stdout_location, stderr_location, echo=None):
        def update_delay(secs_since_start):
            # this sigmoid function reaches
            # - 0.1 after 11 minutes
            # - 0.5 after 15 minutes
            # - 1.0 after 23 minutes
            # in other words, the user will see very frequent updates
            # during the first 10 minutes
            sigmoid = 1.0 / (1.0 + math.exp(-0.01 * secs_since_start + 9.0))
            return 0.5 + sigmoid * 30.0

        def wait_for_launch(job):
            status = job.status
            echo(
                "Task is starting (%s)..." % status,
                "stderr",
                job_id=job.id,
            )
            t = time.time()
            start_time = time.time()
            while job.is_waiting:
                new_status = job.status
                if status != new_status or (time.time() - t) > 30:
                    status = new_status
                    echo(
                        "Task is starting (%s)..." % status,
                        "stderr",
                        job_id=job.id,
                    )
                    t = time.time()
                time.sleep(update_delay(time.time() - start_time))

        prefix = b"[%s] " % util.to_bytes(self._job.id)
        stdout_tail = S3Tail(stdout_location)
        stderr_tail = S3Tail(stderr_location)

        # 1) Loop until the job has started
        wait_for_launch(self._job)

        # 2) Tail logs until the job has finished
        tail_logs(
            prefix=prefix,
            stdout_tail=stdout_tail,
            stderr_tail=stderr_tail,
            echo=echo,
            has_log_updates=lambda: self._job.is_running,
        )

        # 3) Fetch remaining logs
        #
        # It is possible that we exit the loop above before all logs have been
        # shown.
        #
        # TODO : If we notice Kubernetes failing to upload logs to S3,
        #        we can add a HEAD request here to ensure that the file
        #        exists prior to calling S3Tail and note the user about
        #        truncated logs if it doesn't.
        # TODO : For hard crashes, we can fetch logs from the pod.

        if self._job.has_failed:
            exit_code, reason = self._job.reason
            msg = next(msg for msg in [
                reason,
                "Task crashed",
            ] if msg is not None)
            if exit_code:
                if int(exit_code) == 139:
                    raise KubernetesException(
                        "Task failed with a segmentation fault.")
                if int(exit_code) == 137:
                    raise KubernetesException(
                        "Task ran out of memory. "
                        "Increase the available memory by specifying "
                        "@resource(memory=...) for the step. ")
                else:
                    msg = "%s (exit code %s)" % (msg, exit_code)
            raise KubernetesException(
                "%s. This could be a transient error. Use @retry to retry." %
                msg)

        exit_code, _ = self._job.reason
        echo(
            "Task finished with exit code %s." % exit_code,
            "stderr",
            job_id=self._job.id,
        )
コード例 #22
0
ファイル: batch.py プロジェクト: cclauss/metaflow
    def wait(self, stdout_location, stderr_location, echo=None):
        def wait_for_launch(job):
            status = job.status
            echo(
                "Task is starting (status %s)..." % status,
                "stderr",
                batch_id=job.id,
            )
            t = time.time()
            while True:
                if status != job.status or (time.time() - t) > 30:
                    status = job.status
                    echo(
                        "Task is starting (status %s)..." % status,
                        "stderr",
                        batch_id=job.id,
                    )
                    t = time.time()
                if job.is_running or job.is_done or job.is_crashed:
                    break
                select.poll().poll(200)

        prefix = b"[%s] " % util.to_bytes(self.job.id)

        def _print_available(tail, stream, should_persist=False):
            # print the latest batch of lines from S3Tail
            try:
                for line in tail:
                    if should_persist:
                        line = set_should_persist(line)
                    else:
                        line = refine(line, prefix=prefix)
                    echo(line.strip().decode("utf-8", errors="replace"),
                         stream)
            except Exception as ex:
                echo(
                    "[ temporary error in fetching logs: %s ]" % ex,
                    "stderr",
                    batch_id=self.job.id,
                )

        stdout_tail = S3Tail(stdout_location)
        stderr_tail = S3Tail(stderr_location)

        # 1) Loop until the job has started
        wait_for_launch(self.job)

        # 2) Loop until the job has finished
        start_time = time.time()
        is_running = True
        next_log_update = start_time
        log_update_delay = 1

        while is_running:
            if time.time() > next_log_update:
                _print_available(stdout_tail, "stdout")
                _print_available(stderr_tail, "stderr")
                now = time.time()
                log_update_delay = update_delay(now - start_time)
                next_log_update = now + log_update_delay
                is_running = self.job.is_running

            # This sleep should never delay log updates. On the other hand,
            # we should exit this loop when the task has finished without
            # a long delay, regardless of the log tailing schedule
            d = min(log_update_delay, 5.0)
            select.poll().poll(d * 1000)

        # 3) Fetch remaining logs
        #
        # It is possible that we exit the loop above before all logs have been
        # shown.
        #
        # TODO if we notice AWS Batch failing to upload logs to S3, we can add a
        # HEAD request here to ensure that the file exists prior to calling
        # S3Tail and note the user about truncated logs if it doesn't
        _print_available(stdout_tail, "stdout")
        _print_available(stderr_tail, "stderr")
        # In case of hard crashes (OOM), the final save_logs won't happen.
        # We fetch the remaining logs from AWS CloudWatch and persist them to
        # Amazon S3.

        if self.job.is_crashed:
            msg = next(msg for msg in [
                self.job.reason,
                self.job.status_reason,
                "Task crashed.",
            ] if msg is not None)
            raise BatchException("%s "
                                 "This could be a transient error. "
                                 "Use @retry to retry." % msg)
        else:
            if self.job.is_running:
                # Kill the job if it is still running by throwing an exception.
                raise BatchException("Task failed!")
            echo(
                "Task finished with exit code %s." % self.job.status_code,
                "stderr",
                batch_id=self.job.id,
            )