Beispiel #1
0
    def loglines(self, stream, as_unicode=True):
        """
        Return an iterator over (utc_timestamp, logline) tuples.

        If as_unicode=False, logline is returned as a byte object. Otherwise,
        it is returned as a (unicode) string.
        """
        from metaflow.mflog.mflog import merge_logs

        global filecache

        ds_type = self.metadata_dict.get("ds-type")
        ds_root = self.metadata_dict.get("ds-root")
        if ds_type is None or ds_root is None:
            yield None, ""
            return
        if filecache is None:
            filecache = FileCache()

        attempt = self.current_attempt
        logs = filecache.get_logs_stream(ds_type, ds_root, stream, attempt,
                                         *self.path_components)
        for line in merge_logs([blob for _, blob in logs]):
            msg = to_unicode(line.msg) if as_unicode else line.msg
            yield line.utc_tstamp, msg
Beispiel #2
0
    def loglines(self, stream, as_unicode=True):
        """
        Return an iterator over (utc_timestamp, logline) tuples.

        If as_unicode=False, logline is returned as a byte object. Otherwise,
        it is returned as a (unicode) string.
        """
        from metaflow.mflog.mflog import merge_logs
        from metaflow.mflog import LOG_SOURCES
        from metaflow.datastore import DATASTORES

        ds_type = self.metadata_dict.get('ds-type')
        ds_root = self.metadata_dict.get('ds-root')

        ds_cls = DATASTORES.get(ds_type, None)
        if ds_cls is None:
            raise MetaflowInternalError('Datastore %s was not found' % ds_type)
        ds_cls.datastore_root = ds_root

        # It is possible that a task fails before any metadata has been
        # recorded. In this case, we assume that we are executing the
        # first attempt.
        #
        # FIXME: Technically we are looking at the latest *recorded* attempt
        # here. It is possible that logs exists for a newer attempt that
        # just failed to record metadata. We could make this logic more robust
        # and guarantee that we always return the latest available log.

        ds = ds_cls(self._object['flow_id'],
                    run_id=str(self._object['run_number']),
                    step_name=self._object['step_name'],
                    task_id=str(self._object['task_id']),
                    mode='r',
                    attempt=int(self.metadata_dict.get('attempt', 0)),
                    allow_unsuccessful=True)
        logs = ds.load_logs(LOG_SOURCES, stream)
        for line in merge_logs([blob for _, blob in logs]):
            msg = to_unicode(line.msg) if as_unicode else line.msg
            yield line.utc_tstamp, msg