Esempi in Python per tokens_to_string

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: codalab.lib.formatting

Metodo/funzione: tokens_to_string

Esempi su hotexamples.com: 6

tokens_to_string in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per codalab.lib.formatting.tokens_to_string, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: worksheet_util.py Progetto: kelvinguu/codalab-cli

def convert_item_to_db(item):
    (bundle_info, subworksheet_info, value_obj, item_type) = item
    return (
        bundle_info['uuid'] if bundle_info else None,
        subworksheet_info['uuid'] if subworksheet_info else None,
        # TODO: change tables.py so that None's are allowed
        (formatting.tokens_to_string(value_obj) if item_type == TYPE_DIRECTIVE else value_obj) or '',
        item_type,
    )

Esempio n. 2

Mostra file

def convert_item_to_db(item):
    (bundle_info, subworksheet_info, value_obj, item_type) = item
    return (
        bundle_info['uuid'] if bundle_info else None,
        subworksheet_info['uuid'] if subworksheet_info else None,
        # TODO: change tables.py so that None's are allowed
        (formatting.tokens_to_string(value_obj) if item_type == TYPE_DIRECTIVE else value_obj) or '',
        item_type,
    )

Esempio n. 3

Mostra file

File: worksheet_util.py Progetto: robinjia/codalab-worksheets

def get_worksheet_lines(worksheet_info):
    """
    Generator that returns pretty-printed lines of text for the given worksheet.
    """
    lines = []
    for item in worksheet_info['items']:
        (bundle_info, subworksheet_info, value_obj, item_type) = item[:4]

        if item_type == TYPE_MARKUP:
            lines.append(value_obj)
        elif item_type == TYPE_DIRECTIVE:
            if len(value_obj) > 0 and value_obj[0] == DIRECTIVE_CHAR:
                # A comment
                # TODO: figure out why this form is consider a comment...
                lines.append('//' + ' '.join(value_obj[1:]))
            else:
                # A normal directive
                value = formatting.tokens_to_string(value_obj)
                value = (
                    DIRECTIVE_CHAR
                    + ('' if len(value) == 0 or value.startswith(DIRECTIVE_CHAR) else ' ')
                    + value
                )
                lines.append(value)
        elif item_type == TYPE_BUNDLE:
            if 'metadata' not in bundle_info:
                # This happens when we add bundles by uuid and don't actually make sure they exist
                # lines.append('ERROR: non-existent bundle %s' % bundle_info['uuid'])
                description = formatting.contents_str(None)
            else:
                metadata = bundle_info['metadata']
                # raise Exception(metadata)
                description = bundle_info['bundle_type']
                description += ' ' + metadata['name']
                deps = interpret_genpath(bundle_info, 'dependencies')
                if deps:
                    description += ' -- ' + deps
                command = bundle_info.get('command')
                if command:
                    command = command.replace('\n', ' ')
                    description += ' : ' + command
            lines.append(bundle_line(description, bundle_info['uuid']))
        elif item_type == TYPE_WORKSHEET:
            lines.append(
                worksheet_line(
                    'worksheet ' + formatting.contents_str(subworksheet_info.get('name')),
                    subworksheet_info['uuid'],
                )
            )
        else:
            raise RuntimeError('Invalid worksheet item type: %s' % type)
    return lines

Esempio n. 4

Mostra file

File: worksheet_util.py Progetto: codalab/codalab-cli

def get_worksheet_lines(worksheet_info):
    """
    Generator that returns pretty-printed lines of text for the given worksheet.
    """
    lines = []
    for item in worksheet_info['items']:
        (bundle_info, subworksheet_info, value_obj, item_type) = item

        if item_type == TYPE_MARKUP:
            lines.append(value_obj)
        elif item_type == TYPE_DIRECTIVE:
            if len(value_obj) > 0 and value_obj[0] == DIRECTIVE_CHAR:
                # A comment directive
                lines.append('//' + ' '.join(value_obj[1:]))
            else:
                # A normal directive
                value = formatting.tokens_to_string(value_obj)
                value = (
                    DIRECTIVE_CHAR
                    + ('' if len(value) == 0 or value.startswith(DIRECTIVE_CHAR) else ' ')
                    + value
                )
                lines.append(value)
        elif item_type == TYPE_BUNDLE:
            if 'metadata' not in bundle_info:
                # This happens when we add bundles by uuid and don't actually make sure they exist
                # lines.append('ERROR: non-existent bundle %s' % bundle_info['uuid'])
                description = formatting.contents_str(None)
            else:
                metadata = bundle_info['metadata']
                # raise Exception(metadata)
                description = bundle_info['bundle_type']
                description += ' ' + metadata['name']
                deps = interpret_genpath(bundle_info, 'dependencies')
                if deps:
                    description += ' -- ' + deps
                command = bundle_info.get('command')
                if command:
                    description += ' : ' + command
            lines.append(bundle_line(description, bundle_info['uuid']))
        elif item_type == TYPE_WORKSHEET:
            lines.append(
                worksheet_line(
                    'worksheet ' + formatting.contents_str(subworksheet_info.get('name')),
                    subworksheet_info['uuid'],
                )
            )
        else:
            raise RuntimeError('Invalid worksheet item type: %s' % type)
    return lines

Esempio n. 5

Mostra file

def fetch_interpreted_worksheet(uuid):
    """
    Return information about a worksheet. Calls
    - get_worksheet_info: get basic info
    - resolve_interpreted_items: get more information about a worksheet.
    In the future, for large worksheets, might want to break this up so
    that we can render something basic.
    Return:
        worksheet_info dict{}:
            key:[value_type] <description>
            blocks:[list]
                    Resolved worksheet blocks from raw_items.
                        Bundles will be grouped into table block items,
                        text items might be grouped into one markdown block etc.
            source:[list] source lines
            raw_to_block:[list]
                            Raw_items to its block index pair.
                                For example, assume the first resolved block item is a bundle table that has 2 rows,
                                then the 2nd element in the list would be [0, 1]
                                [0, 1]: 0 means the item belongs to the first block,
                                        1 means the item is the second item of the block (2nd bundle in our example)
                                NOTE: Used for setting focus on frontend
            block_to_raw:[dict]
                            Maps the blocks (table, markdown, records) to their corresponding source line indices,
                            it's mostly a reverse mapping of raw_to_block, by mostly: raw_to_block has some bug,
                            please refer to worksheet_utils flush_bundles function.
                            This can be used to index the source on the frontend
                            Example:
                            [0, 0]: 0
                            [0, 1]: 1
                            [1, 0]: 9
                            This means the first blocks' first item corresponds to the first line in source,
                            the second item corresponds to the second line in source
                            The second block corresponds the 10th line in source.
                            2-8 can be skipped for multiple reasons: blank lines, comments, schema lines etc.
                                NOTE: Used for setting focus on frontend
    This endpoint can be called with &brief=1 in order to give an abbreviated version,
    which does not resolve searches or wsearches.
    To return an interpreted worksheet that only resolves a particular search/wsearch,
    pass in the search query to the "directive" argument. The value for this argument
    must be a search/wsearch query -- for example, &directive=search 0x .limit=100
    """
    bundle_uuids = request.query.getall('bundle_uuid')
    brief = request.query.get("brief", "0") == "1"

    directive = request.query.get("directive", None)
    search_results = []

    worksheet_info = get_worksheet_info(uuid,
                                        fetch_items=True,
                                        fetch_permissions=True)

    # Shim in additional data for the frontend
    worksheet_info['items'] = resolve_items_into_infos(worksheet_info['items'])

    if worksheet_info['owner_id'] is None:
        worksheet_info['owner_name'] = None
    else:
        owner = local.model.get_user(user_id=worksheet_info['owner_id'])
        worksheet_info['owner_name'] = owner.user_name

    # Fetch items.
    worksheet_info['source'] = get_worksheet_lines(worksheet_info)

    if not directive and not brief:
        expanded_items = []
        for index, raw_item in enumerate(worksheet_info['items']):
            expanded = expand_search_item(raw_item)
            expanded_items.append(expanded)
            # Multiple items can correspond to the same source line (i.e: search directives)
            # raw_items_to_source_index.extend([index] * len(expanded))
        worksheet_info['items'] = list(chain.from_iterable(expanded_items))
    elif directive:
        # Only expand the search item corresponding to the given directive.
        # Used in async loading to only load a single table.
        items_to_show = []
        for i, item in enumerate(worksheet_info['items']):
            (bundle_info, subworksheet_info, value_obj, item_type, id,
             sort_key) = item
            if directive == formatting.tokens_to_string(value_obj):
                search_results = perform_search_query(value_obj)
                items_to_show.append(item)
                break
            elif item_type == TYPE_DIRECTIVE:
                # We need to include previous directives
                # so that the final search result can be properly
                # rendered (it may depend on a schema defined earlier
                # in the worksheet).
                items_to_show.append(item)
        # Make sure the search item is at the end of worksheet_info['items'],
        # so we can isolate it later after interpret_items is called.
        worksheet_info['items'] = items_to_show
        worksheet_info['items'].extend(search_results)

    # Set permissions
    worksheet_info['edit_permission'] = worksheet_info[
        'permission'] == GROUP_OBJECT_PERMISSION_ALL
    # Check enable chat box
    worksheet_info['enable_chat'] = local.config.get('enable_chat', False)
    # Format permissions into strings
    worksheet_info['permission_spec'] = permission_str(
        worksheet_info['permission'])
    for group_permission in worksheet_info['group_permissions']:
        group_permission['permission_spec'] = permission_str(
            group_permission['permission'])

    # Go and fetch more information about the worksheet contents by
    # resolving the interpreted items.
    try:
        interpreted_blocks = interpret_items(get_default_schemas(),
                                             worksheet_info['items'],
                                             db_model=local.model)
    except UsageError as e:
        interpreted_blocks = {'blocks': []}
        worksheet_info['error'] = str(e)

    # bundle_uuids is an optional argument that, if exists, contain the uuids of all the unfinished run bundles that need updating
    # In this case, full_worksheet will return a list of item parallel to ws.info.items that contain only items that need updating.
    # More specifically, all blocks that don't contain run bundles that need updating are None.
    # Also, a non-None block could contain a list of bundle_infos, which represent a list of bundles. Usually not all of them need updating.
    # The bundle_infos for bundles that don't need updating are also None.
    if bundle_uuids:
        for i, block in enumerate(interpreted_blocks['blocks']):
            if 'bundle_info' not in block:
                interpreted_blocks['blocks'][i] = None
            else:
                if isinstance(block['bundle_info'], dict):
                    block['bundle_info'] = [block['bundle_info']]
                is_relevant_block = False
                for j, bundle in enumerate(block['bundle_info']):
                    if bundle['uuid'] in bundle_uuids:
                        is_relevant_block = True
                    else:
                        block['bundle_info'][j] = None
                if not is_relevant_block:
                    interpreted_blocks['blocks'][i] = None
    # Grouped individual items into blocks
    worksheet_info['blocks'] = resolve_interpreted_blocks(
        interpreted_blocks['blocks'], brief=brief)
    worksheet_info['raw_to_block'] = interpreted_blocks['raw_to_block']
    worksheet_info['block_to_raw'] = interpreted_blocks['block_to_raw']

    if directive:
        # If we're only async loading a single table_block / subworksheets_block,
        # return only that block (which is at the end of worksheet_info['items'])
        worksheet_info['blocks'] = [worksheet_info['blocks'][-1]
                                    ] if len(search_results) else []

    for block in worksheet_info['blocks']:
        if block is None:
            continue
        if block['mode'] == 'table':
            for row_map in block['rows']:
                for k, v in row_map.items():
                    if v is None:
                        row_map[k] = formatting.contents_str(v)
        if 'bundle_info' in block:
            infos = []
            if isinstance(block['bundle_info'], list):
                infos = block['bundle_info']
            elif isinstance(block['bundle_info'], dict):
                infos = [block['bundle_info']]
            for bundle_info in infos:
                if bundle_info is None:
                    continue
                if 'bundle_type' not in bundle_info:
                    continue  # empty info: invalid bundle reference
                if isinstance(bundle_info, dict):
                    format_metadata(bundle_info.get('metadata'))
    # Frontend doesn't use individual 'items' for now
    del worksheet_info['items']
    if bundle_uuids:
        return {'blocks': worksheet_info['blocks']}
    return worksheet_info

Esempio n. 6

Mostra file

File: worksheet_util.py Progetto: robinjia/codalab-worksheets

def interpret_items(schemas, raw_items, db_model=None):
    """
    Interpret different items based on their types.
    :param schemas: initial mapping from name to list of schema items (columns of a table)
    :param raw_items: list of (raw) worksheet items (triples) to interpret
    :param db_model: database model which is used to query database
    :return: {'items': interpreted_items, ...}, where interpreted_items is a list of:
    {
        'mode': display mode ('markup' | 'contents' | 'image' | 'html', etc.)
        'interpreted': one of
            - rendered string
            - target = (bundle_uuid, genpath)
            - (header = (col1, ..., coln), rows = [{col1:value1, ..., coln:valuen}, ...]) [for tables]
            - {keywords: [...]} for mode = 'search' or 'wsearch'
        'properties': dict of properties (e.g., width, maxlines, etc.),
        'bundle_info': bundle_info or list of bundle_infos,
        'subworksheet_info': subworksheet,
    }
    In addition, return an alignment between the raw items and the interpreted items.
    Each interpreted item has a focusIndex, and possibly consists of a list of
    table rows (indexed by subFocusIndex).  Here is an example:
      --- Raw ---                   --- Interpreted ---
      rawIndex                                         (focusIndex, subFocusIndex)
      0        % display table
      1        [bundle]             [table - row 0     (0, 0)
      2        [bundle]                    - row 1]    (0, 1)
      3
      4        hello                [markup            (1, 0)
      5        world                       ]
      6        [worksheet]          [worksheet]        (2, 0)
      7
    The mapping should be computed as follows:
    - Some raw items contribute directly to a particular interpreted item.
    - Others (blank lines, directives, schema definitions) don't.
    - Those that don't should get mapped to the next interpreted item.
    """
    raw_to_block = []  # rawIndex => (focusIndex, subFocusIndex)

    # Set default schema
    current_schema = None
    default_display = ('table', 'default')
    current_display = default_display
    blocks = []
    bundle_infos = []
    worksheet_infos = []

    def get_schema(args):  # args is a list of schema names
        args = args if len(args) > 0 else ['default']
        schema = []
        for arg in args:
            # If schema doesn't exist, then treat as item (e.g., uuid).
            schema += schemas.get(arg, canonicalize_schema_items([arg.split(':', 2)]))
        return schema

    def is_missing(info):
        return 'metadata' not in info

    def parse_properties(args):
        properties = {}
        for item in args:
            if '=' not in item:
                raise UsageError('Expected <key>=<value>, but got %s' % item)
            key, value = item.split('=', 1)
            properties[key] = value
        return properties

    def genpath_to_target(bundle_info, genpath):
        # bundle_info, '/stdout' => target = (uuid, 'stdout')
        if not is_file_genpath(genpath):
            raise UsageError('Not file genpath: %s' % genpath)
        # strip off the leading / from genpath to create a subpath in the target.
        return (bundle_info['uuid'], genpath[1:])

    def flush_bundles(bundle_block_start_index):
        """
        Having collected bundles in |bundle_infos|, flush them into |blocks|,
        potentially as a single table depending on the mode.
        bundle_block_start_index: The raw index for % display <mode> schema
        """
        if len(bundle_infos) == 0:
            return

        def raise_genpath_usage_error():
            raise UsageError(
                'Expected \'% display '
                + mode
                + ' (genpath)\', but got \'% display '
                + ' '.join([mode] + args)
                + '\''
            )

        # Print out the curent bundles somehow
        mode = current_display[0]
        args = current_display[1:]
        if mode == 'hidden':
            pass
        elif mode == 'contents' or mode == 'image':
            for item_index, bundle_info in bundle_infos:
                if is_missing(bundle_info):
                    blocks.append(
                        MarkupBlockSchema()
                        .load({'text': 'ERROR: cannot access bundle', 'error': True})
                        .data
                    )
                    continue

                # Parse arguments
                if len(args) == 0:
                    raise_genpath_usage_error()
                # these two are required for the target
                (bundle_uuid, target_genpath) = genpath_to_target(bundle_info, args[0])
                properties = parse_properties(args[1:])

                block_object = {
                    'target_genpath': target_genpath,
                    'bundles_spec': BundleUUIDSpecSchema()
                    .load(BundleUUIDSpecSchema.create_json([bundle_info]))
                    .data,
                    'status': FetchStatusSchema.get_unknown_status(),
                }

                if mode == 'contents':
                    try:
                        block_object['max_lines'] = int(
                            properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)
                        )
                    except ValueError:
                        raise UsageError("maxlines must be integer")
                    blocks.append(BundleContentsBlockSchema().load(block_object).data)
                elif mode == 'image':
                    block_object['width'] = properties.get('width', None)
                    block_object['height'] = properties.get('height', None)
                    blocks.append(BundleImageBlockSchema().load(block_object).data)
        elif mode == 'record':
            # display record schema =>
            # key1: value1
            # key2: value2
            # ...
            schema = get_schema(args)
            for item_index, bundle_info in bundle_infos:
                header = ('key', 'value')
                rows = []
                for (name, genpath, post, _) in schema:
                    rows.append(
                        RecordsRowSchema()
                        .load(
                            {
                                'key': name + ':',
                                'value': apply_func(post, interpret_genpath(bundle_info, genpath)),
                            }
                        )
                        .data
                    )
                blocks.append(
                    RecordsBlockSchema()
                    .load(
                        {
                            'bundles_spec': BundleUUIDSpecSchema()
                            .load(BundleUUIDSpecSchema.create_json([bundle_info]))
                            .data,
                            'status': FetchStatusSchema.get_unknown_status(),
                            'header': header,
                            'rows': rows,
                            'sort_keys': [bundle_info["sort_key"]],
                            'first_bundle_source_index': bundle_block_start_index,
                            'using_schemas': args if len(args) > 0 else ['default'],
                        }
                    )
                    .data
                )
        elif mode == 'table':
            # display table schema =>
            # key1       key2
            # b1_value1  b1_value2
            # b2_value1  b2_value2
            schema = get_schema(args)
            header = tuple(name for (name, genpath, post, _) in schema)
            rows = []
            processed_bundle_infos = []
            # Cache the mapping between owner_id to owner on current worksheet
            owner_cache = {}
            for item_index, bundle_info in bundle_infos:
                if 'metadata' in bundle_info:
                    rows.append(
                        {
                            name: apply_func(
                                post,
                                interpret_genpath(
                                    bundle_info, genpath, db_model=db_model, owner_cache=owner_cache
                                ),
                            )
                            for (name, genpath, post, _) in schema
                        }
                    )
                    processed_bundle_infos.append(copy.deepcopy(bundle_info))
                else:
                    # The front-end relies on the name metadata field existing
                    processed_bundle_info = copy.deepcopy(bundle_info)
                    processed_bundle_info['metadata'] = {'name': '<invalid>'}
                    rows.append(
                        {
                            name: apply_func(
                                post, interpret_genpath(processed_bundle_info, genpath)
                            )
                            for (name, genpath, post, _) in schema
                        }
                    )
                    processed_bundle_infos.append(processed_bundle_info)

            blocks.append(
                TableBlockSchema()
                .load(
                    {
                        'bundles_spec': BundleUUIDSpecSchema()
                        .load(BundleUUIDSpecSchema.create_json(processed_bundle_infos))
                        .data,
                        'status': FetchStatusSchema.get_unknown_status(),
                        'header': header,
                        'rows': rows,
                        'sort_keys': [
                            processed_bundle_info["sort_key"]
                            for processed_bundle_info in processed_bundle_infos
                        ],
                        'first_bundle_source_index': bundle_block_start_index,
                        'using_schemas': args if len(args) > 0 else ['default'],
                    }
                )
                .data
            )

        elif mode == 'graph':
            # display graph <genpath> <properties>
            if len(args) == 0:
                raise_genpath_usage_error()
            # trajectories is list of {
            #   'uuid': ...,
            #   'display_name': ..., # What to show as the description of a bundle
            #   'target': (bundle_uuid, subpath)
            # }
            properties = parse_properties(args[1:])

            trajectories = [
                {
                    'bundle_uuid': bundle_info['uuid'],
                    'display_name': interpret_genpath(
                        bundle_info, properties.get('display_name', 'name')
                    ),
                    'target_genpath': genpath_to_target(bundle_info, args[0])[1],
                }
                for item_index, bundle_info in bundle_infos
            ]

            try:
                max_lines = int(properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES))
            except ValueError:
                raise UsageError("maxlines must be integer")

            blocks.append(
                GraphBlockSchema()
                .load(
                    {
                        'trajectories': trajectories,
                        'bundles_spec': BundleUUIDSpecSchema()
                        .load(BundleUUIDSpecSchema.create_json([bundle_infos[0][1]]))
                        .data,  # Only show the first one for now
                        # 'bundles_spec': BundleUUIDSpecSchema().load(BundleUUIDSpecSchema.create_json(
                        #     [copy.deepcopy(bundle_info) for item_index, bundle_info in bundle_infos]).data,
                        'max_lines': max_lines,
                        'xlabel': properties.get('xlabel', None),
                        'ylabel': properties.get('ylabel', None),
                    }
                )
                .data
            )
        else:
            raise UsageError('Unknown display mode: %s' % mode)
        bundle_infos[:] = []  # Clear

    def flush_worksheets():
        if len(worksheet_infos) == 0:
            return

        blocks.append(
            SubworksheetsBlock()
            .load(
                {
                    'subworksheet_infos': copy.deepcopy(worksheet_infos),
                    'sort_keys': [worksheet_info["sort_key"] for worksheet_info in worksheet_infos],
                }
            )
            .data
        )

        worksheet_infos[:] = []

    # Go through all the raw items...
    last_was_empty_line = False
    bundle_block_start_index = -1  # records line for
    current_schema_name = None
    current_schema_ids = []
    for raw_index, item in enumerate(raw_items):
        new_last_was_empty_line = True
        try:
            (bundle_info, subworksheet_info, value_obj, item_type, item_id, sort_key) = item

            is_bundle = item_type == TYPE_BUNDLE
            is_search = item_type == TYPE_DIRECTIVE and get_command(value_obj) == 'search'
            is_directive = item_type == TYPE_DIRECTIVE
            is_worksheet = item_type == TYPE_WORKSHEET
            if not is_bundle:
                flush_bundles(bundle_block_start_index)
                bundle_block_start_index = -1

            if not is_worksheet:
                flush_worksheets()

            # Reset display to minimize long distance dependencies of directives
            if not (is_bundle or is_search):
                current_display = default_display

            # Reset schema to minimize long distance dependencies of directives
            command = get_command(value_obj)
            if not is_directive or (command != "add" and command != "addschema"):
                if current_schema is not None:
                    blocks.append(
                        SchemaBlockSchema()
                        .load(
                            {
                                'status': FetchStatusSchema.get_unknown_status(),
                                'header': ["field", "generalized-path", "post-processor"],
                                'schema_name': current_schema_name,
                                'field_rows': [
                                    {
                                        "field": name,
                                        "generalized-path": path,
                                        "post-processor": post,
                                        "from_schema_name": from_schema_name,
                                    }
                                    for name, path, post, from_schema_name in current_schema
                                ],
                                'sort_keys': [sort_key],
                                'ids': current_schema_ids,
                            }
                        )
                        .data
                    )
                current_schema = None
                current_schema_ids = []

            if item_type == TYPE_BUNDLE:
                if bundle_block_start_index == -1:
                    bundle_block_start_index = raw_index
                bundle_info = dict(bundle_info, sort_key=sort_key)
                raw_to_block.append((len(blocks), len(bundle_infos)))
                bundle_infos.append((raw_index, bundle_info))
            elif item_type == TYPE_WORKSHEET:
                subworksheet_info = dict(subworksheet_info, sort_key=sort_key)
                raw_to_block.append((len(blocks), len(worksheet_infos)))
                worksheet_infos.append(subworksheet_info)
            elif item_type == TYPE_MARKUP:
                new_last_was_empty_line = value_obj == ''
                if (
                    len(blocks) > 0
                    and blocks[-1]['mode'] == BlockModes.markup_block
                    and not last_was_empty_line
                    and not new_last_was_empty_line
                ):
                    # Join with previous markup item
                    blocks[-1]['text'] += '\n' + value_obj
                    # Ids
                    blocks[-1]['ids'] = blocks[-1].get('ids', [])
                    blocks[-1]['ids'].append(item_id)
                    blocks[-1]['sort_keys'] = blocks[-1].get('sort_keys', [])
                    blocks[-1]['sort_keys'].append(sort_key)
                elif not new_last_was_empty_line:
                    block = (
                        MarkupBlockSchema()
                        .load(
                            {
                                'id': len(blocks),
                                'text': value_obj,
                                'ids': [item_id],
                                'sort_keys': [sort_key],
                            }
                        )
                        .data
                    )
                    blocks.append(block)
                # Important: set raw_to_block after so we can focus on current item.
                if new_last_was_empty_line:
                    raw_to_block.append(None)
                else:
                    raw_to_block.append((len(blocks) - 1, 0))
            elif item_type == TYPE_DIRECTIVE:
                command = get_command(value_obj)
                appended_directive_blocks_index = False
                if command == '%' or command == '' or command is None:
                    # Comment
                    pass
                elif command == 'schema':
                    # Start defining new schema
                    if len(value_obj) < 2:
                        raise UsageError("`schema` missing name")
                    name = value_obj[1]
                    current_schema_ids.append(item_id)
                    current_schema_name = name
                    schemas[name] = current_schema = []
                    # Schema block should also be considered when calculating the focus index
                    raw_to_block.append((len(blocks) - 1 + len(current_schema_ids), 0))
                    appended_directive_blocks_index = True
                elif command == 'addschema':
                    # Add to schema
                    if current_schema is None:
                        raise UsageError("`addschema` must be preceded by `schema` directive")
                    if len(value_obj) < 2:
                        raise UsageError("`addschema` missing name")
                    name = value_obj[1]
                    current_schema_ids.append(item_id)
                    current_schema += schemas[name]
                elif command == 'add':
                    # Add to schema
                    if current_schema is None:
                        raise UsageError("`add` must be preceded by `schema` directive")
                    current_schema_ids.append(item_id)
                    schema_item = canonicalize_schema_item(value_obj[1:], current_schema_name)
                    current_schema.append(schema_item)
                elif command == 'display':
                    # Set display
                    current_display = value_obj[1:]
                elif command in ('search', 'wsearch'):
                    # Show item placeholders in brief mode
                    blocks.append(
                        PlaceholderBlockSchema()
                        .load(
                            {
                                'directive': formatting.tokens_to_string(value_obj),
                                'sort_keys': [sort_key],
                            }
                        )
                        .data
                    )
                    appended_directive_blocks_index = True
                    raw_to_block.append((len(blocks) - 1, 0))
                else:
                    raise UsageError("unknown directive `%s`" % command)
                # Add an empty item placeholder for other directives, since they do not represent substantial items
                if not appended_directive_blocks_index:
                    raw_to_block.append(None)
            else:
                raise RuntimeError('Unknown worksheet item type: %s' % item_type)

            # Flush bundles, subworksheets and schema items once more at the end
            if raw_index == len(raw_items) - 1:
                flush_bundles(bundle_block_start_index)
                bundle_block_start_index = -1
                flush_worksheets()
                if current_schema is not None:
                    blocks.append(
                        SchemaBlockSchema()
                        .load(
                            {
                                'status': FetchStatusSchema.get_unknown_status(),
                                'header': ["field", "generalized-path", "post-processor"],
                                'schema_name': current_schema_name,
                                'field_rows': [
                                    {
                                        "field": name,
                                        "generalized-path": path,
                                        "post-processor": post,
                                        "from_schema_name": from_schema_name,
                                    }
                                    for name, path, post, from_schema_name in current_schema
                                ],
                                'sort_keys': [sort_key],
                                'ids': current_schema_ids,
                            }
                        )
                        .data
                    )

        except UsageError as e:
            current_schema = None
            bundle_infos[:] = []
            worksheet_infos[:] = []
            blocks.append(
                MarkupBlockSchema()
                .load(
                    {'text': 'Error in source line %d: %s' % (raw_index + 1, str(e)), 'error': True}
                )
                .data
            )

            raw_to_block.append((len(blocks) - 1, 0))

        except Exception:
            current_schema = None
            bundle_infos[:] = []
            worksheet_infos[:] = []
            import traceback

            traceback.print_exc()
            blocks.append(
                MarkupBlockSchema()
                .load(
                    {
                        'text': 'Unexpected error while parsing line %d' % (raw_index + 1),
                        'error': True,
                    }
                )
                .data
            )

            raw_to_block.append((len(blocks) - 1, 0))

        finally:
            last_was_empty_line = new_last_was_empty_line

    # TODO: fix inconsistencies resulting from UsageErrors thrown in flush_bundles()
    if len(raw_to_block) != len(raw_items):
        print("WARNING: Length of raw_to_block does not match length of raw_items", file=sys.stderr)

    # Package the result
    block_to_raw = {}
    next_interpreted_index = None
    # Go in reverse order so we can assign raw items that map to None to the next interpreted item
    for raw_index, interpreted_index in reversed(list(enumerate(raw_to_block))):
        if interpreted_index is None:  # e.g., blank line, directive
            interpreted_index = next_interpreted_index
            raw_to_block[raw_index] = interpreted_index
        else:
            interpreted_index_str = str(interpreted_index[0]) + ',' + str(interpreted_index[1])
            if interpreted_index_str not in block_to_raw:  # Bias towards the last item
                block_to_raw[interpreted_index_str] = raw_index
        next_interpreted_index = interpreted_index

    # Return the result
    result = {}
    result['blocks'] = blocks
    result['raw_to_block'] = raw_to_block
    result['block_to_raw'] = block_to_raw
    return result