Esempio n. 1
0
def logs(obj, input_path, stdout=None, stderr=None, both=None):
    types = set()
    if stdout:
        types.add('stdout')
        both = False
    if stderr:
        types.add('stderr')
        both = False
    if both:
        types.update(('stdout', 'stderr'))

    # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
    parts = input_path.split('/')
    if len(parts) == 2:
        run_id, step_name = parts
        task_id = None
    elif len(parts) == 3:
        run_id, step_name, task_id = parts
    else:
        raise CommandException("input_path should either be run_id/step_name"
                               "or run_id/step_name/task_id")

    if obj.datastore.datastore_root is None:
        obj.datastore.datastore_root = obj.datastore.get_datastore_root_from_config(
            obj.echo, create_on_absent=False)
    if obj.datastore.datastore_root is None:
        raise CommandException(
            "Could not find the location of the datastore -- did you correctly set the "
            "METAFLOW_DATASTORE_SYSROOT_%s environment variable" %
            (obj.datastore.TYPE).upper())

    from metaflow.datastore.datastore_set import MetaflowDatastoreSet
    datastore_set = MetaflowDatastoreSet(obj.datastore,
                                         obj.flow.name,
                                         run_id,
                                         steps=[step_name],
                                         metadata=obj.metadata,
                                         monitor=obj.monitor,
                                         event_logger=obj.event_logger)
    if task_id:
        ds_list = [datastore_set.get_with_pathspec(input_path)]
    else:
        ds_list = list(datastore_set)  # get all tasks
    for ds in ds_list:
        echo('Dumping logs of run_id=*{run_id}* '
             'step=*{step}* task_id=*{task_id}*'.format(run_id=ds.run_id,
                                                        step=ds.step_name,
                                                        task_id=ds.task_id),
             fg='magenta')

        for typ in ('stdout', 'stderr'):
            if typ in types:
                echo(typ, bold=True)
                click.secho(ds.load_log(typ).decode('UTF-8', errors='replace'),
                            nl=False)
Esempio n. 2
0
def dump(obj,
         input_path,
         private=None,
         max_value_size=None,
         include=None,
         file=None):

    output = {}
    kwargs = {
        'show_private': private,
        'max_value_size': max_value_size,
        'include': {t
                    for t in include.split(',') if t}
    }

    if obj.datastore.datastore_root is None:
        obj.datastore.datastore_root = obj.datastore.get_datastore_root_from_config(
            obj.echo, create_on_absent=False)
    if obj.datastore.datastore_root is None:
        raise CommandException(
            "Could not find the location of the datastore -- did you correctly set the "
            "METAFLOW_DATASTORE_SYSROOT_%s environment variable" %
            (obj.datastore.TYPE).upper())

    # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
    parts = input_path.split('/')
    if len(parts) == 2:
        run_id, step_name = parts
        task_id = None
    elif len(parts) == 3:
        run_id, step_name, task_id = parts
    else:
        raise CommandException("input_path should either be run_id/step_name"
                               "or run_id/step_name/task_id")

    from metaflow.datastore.datastore_set import MetaflowDatastoreSet

    datastore_set = MetaflowDatastoreSet(
        obj.datastore,
        obj.flow.name,
        run_id,
        steps=[step_name],
        metadata=obj.metadata,
        monitor=obj.monitor,
        event_logger=obj.event_logger,
        prefetch_data_artifacts=kwargs.get('include'))
    if task_id:
        ds_list = [datastore_set.get_with_pathspec(input_path)]
    else:
        ds_list = list(datastore_set)  # get all tasks

    for ds in ds_list:
        echo('Dumping output of run_id=*{run_id}* '
             'step=*{step}* task_id=*{task_id}*'.format(run_id=ds.run_id,
                                                        step=ds.step_name,
                                                        task_id=ds.task_id),
             fg='magenta')

        if file is None:
            echo_always(ds.format(**kwargs),
                        highlight='green',
                        highlight_bold=False,
                        err=False)
        else:
            output[ds.pathspec] = ds.to_dict(**kwargs)

    if file is not None:
        with open(file, 'wb') as f:
            pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
        echo('Artifacts written to *%s*' % file)
Esempio n. 3
0
def logs(obj,
         input_path,
         stdout=None,
         stderr=None,
         both=None,
         timestamps=False):
    types = set()
    if stdout:
        types.add('stdout')
        both = False
    if stderr:
        types.add('stderr')
        both = False
    if both:
        types.update(('stdout', 'stderr'))

    streams = list(sorted(types, reverse=True))

    # Pathspec can either be run_id/step_name or run_id/step_name/task_id.
    parts = input_path.split('/')
    if len(parts) == 2:
        run_id, step_name = parts
        task_id = None
    elif len(parts) == 3:
        run_id, step_name, task_id = parts
    else:
        raise CommandException("input_path should either be run_id/step_name "
                               "or run_id/step_name/task_id")

    if obj.datastore.datastore_root is None:
        obj.datastore.datastore_root = obj.datastore.get_datastore_root_from_config(
            obj.echo, create_on_absent=False)
    if obj.datastore.datastore_root is None:
        raise CommandException(
            "Could not find the location of the datastore -- did you correctly set the "
            "METAFLOW_DATASTORE_SYSROOT_%s environment variable" %
            (obj.datastore.TYPE).upper())

    if task_id:
        ds_list = [
            obj.datastore(obj.flow.name,
                          run_id=run_id,
                          step_name=step_name,
                          task_id=task_id,
                          mode='r',
                          allow_unsuccessful=True)
        ]
    else:
        from metaflow.datastore.datastore_set import MetaflowDatastoreSet
        datastore_set = MetaflowDatastoreSet(obj.datastore,
                                             obj.flow.name,
                                             run_id,
                                             steps=[step_name],
                                             metadata=obj.metadata,
                                             monitor=obj.monitor,
                                             event_logger=obj.event_logger)
        # get all successful tasks
        ds_list = list(datastore_set)

    if ds_list:

        def echo_unicode(line, **kwargs):
            click.secho(line.decode('UTF-8', errors='replace'), **kwargs)

        # old style logs are non mflog-style logs
        maybe_old_style = True
        for ds in ds_list:
            echo('Dumping logs of run_id=*{run_id}* '
                 'step=*{step}* task_id=*{task_id}*'.format(
                     run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id),
                 fg='magenta')

            for stream in streams:
                echo(stream, bold=True)
                logs = ds.load_logs(LOG_SOURCES, stream)
                if any(data for _, data in logs):
                    # attempt to read new, mflog-style logs
                    for line in mflog.merge_logs([blob for _, blob in logs]):
                        if timestamps:
                            ts = mflog.utc_to_local(line.utc_tstamp)
                            tstamp = ts.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
                            click.secho(tstamp + ' ',
                                        fg=LOGGER_TIMESTAMP,
                                        nl=False)
                        echo_unicode(line.msg)
                    maybe_old_style = False
                elif maybe_old_style:
                    # if they are not available, we may be looking at
                    # a legacy run (unless we have seen new-style data already
                    # for another stream). This return an empty string if
                    # nothing is found
                    log = ds.load_log_legacy(stream)
                    if log and timestamps:
                        raise CommandException(
                            "We can't show --timestamps for "
                            "old runs. Sorry!")
                    echo_unicode(log, nl=False)

    elif len(parts) == 2:
        # TODO if datastore provided a way to find unsuccessful task IDs, we
        # could make handle this case automatically
        raise CommandException("Successful tasks were not found at the given "
                               "path. You can see logs for unsuccessful tasks "
                               "by giving an exact task ID using the "
                               "run_id/step_name/task_id format.")