def set_error_data(block_index, message): interpreted_blocks[block_index] = (MarkupBlockSchema().load({ 'id': block_index, 'text': 'ERROR: ' + message }).data)
def flush_bundles(): """ Having collected bundles in |bundle_infos|, flush them into |blocks|, potentially as a single table depending on the mode. """ if len(bundle_infos) == 0: return def raise_genpath_usage_error(): raise UsageError('Expected \'% display ' + mode + ' (genpath)\', but got \'% display ' + ' '.join([mode] + args) + '\'') # Print out the curent bundles somehow mode = current_display[0] args = current_display[1:] if mode == 'hidden': pass elif mode == 'contents' or mode == 'image': for item_index, bundle_info in bundle_infos: if is_missing(bundle_info): blocks.append(MarkupBlockSchema().load({ 'text': 'ERROR: cannot access bundle', 'error': True }).data) continue # Parse arguments if len(args) == 0: raise_genpath_usage_error() # these two are required for the target (bundle_uuid, target_genpath) = genpath_to_target(bundle_info, args[0]) properties = parse_properties(args[1:]) block_object = { 'target_genpath': target_genpath, 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json([bundle_info])).data, 'status': FetchStatusSchema.get_unknown_status(), } if mode == 'contents': try: block_object['max_lines'] = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") blocks.append( BundleContentsBlockSchema().load(block_object).data) elif mode == 'image': block_object['width'] = properties.get('width', None) block_object['height'] = properties.get('height', None) blocks.append( BundleImageBlockSchema().load(block_object).data) elif mode == 'record': # display record schema => # key1: value1 # key2: value2 # ... schema = get_schema(args) for item_index, bundle_info in bundle_infos: header = ('key', 'value') rows = [] for (name, genpath, post) in schema: rows.append(RecordsRowSchema().load({ 'key': name + ':', 'value': apply_func(post, interpret_genpath(bundle_info, genpath)), }).data) blocks.append(RecordsBlockSchema().load({ 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json([bundle_info])).data, 'status': FetchStatusSchema.get_unknown_status(), 'header': header, 'rows': rows, }).data) elif mode == 'table': # display table schema => # key1 key2 # b1_value1 b1_value2 # b2_value1 b2_value2 schema = get_schema(args) header = tuple(name for (name, genpath, post) in schema) rows = [] processed_bundle_infos = [] # Cache the mapping between owner_id to owner on current worksheet owner_cache = {} for item_index, bundle_info in bundle_infos: if 'metadata' in bundle_info: rows.append({ name: apply_func( post, interpret_genpath(bundle_info, genpath, db_model=db_model, owner_cache=owner_cache), ) for (name, genpath, post) in schema }) processed_bundle_infos.append(copy.deepcopy(bundle_info)) else: # The front-end relies on the name metadata field existing processed_bundle_info = copy.deepcopy(bundle_info) processed_bundle_info['metadata'] = {'name': '<invalid>'} rows.append({ name: apply_func( post, interpret_genpath(processed_bundle_info, genpath)) for (name, genpath, post) in schema }) processed_bundle_infos.append(processed_bundle_info) blocks.append(TableBlockSchema().load({ 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json( processed_bundle_infos)).data, 'status': FetchStatusSchema.get_unknown_status(), 'header': header, 'rows': rows, }).data) elif mode == 'graph': # display graph <genpath> <properties> if len(args) == 0: raise_genpath_usage_error() # trajectories is list of { # 'uuid': ..., # 'display_name': ..., # What to show as the description of a bundle # 'target': (bundle_uuid, subpath) # } properties = parse_properties(args[1:]) trajectories = [{ 'bundle_uuid': bundle_info['uuid'], 'display_name': interpret_genpath(bundle_info, properties.get('display_name', 'name')), 'target_genpath': genpath_to_target(bundle_info, args[0])[1], } for item_index, bundle_info in bundle_infos] try: max_lines = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") blocks.append(GraphBlockSchema().load({ 'trajectories': trajectories, 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json([ bundle_infos[0][1] ])).data, # Only show the first one for now # 'bundles_spec': BundleUUIDSpecSchema().load(BundleUUIDSpecSchema.create_json( # [copy.deepcopy(bundle_info) for item_index, bundle_info in bundle_infos]).data, 'max_lines': max_lines, 'xlabel': properties.get('xlabel', None), 'ylabel': properties.get('ylabel', None), }).data) else: raise UsageError('Unknown display mode: %s' % mode) bundle_infos[:] = [] # Clear
def interpret_items(schemas, raw_items, db_model=None): """ Interpret different items based on their types. :param schemas: initial mapping from name to list of schema items (columns of a table) :param raw_items: list of (raw) worksheet items (triples) to interpret :param db_model: database model which is used to query database :return: {'items': interpreted_items, ...}, where interpreted_items is a list of: { 'mode': display mode ('markup' | 'contents' | 'image' | 'html', etc.) 'interpreted': one of - rendered string - target = (bundle_uuid, genpath) - (header = (col1, ..., coln), rows = [{col1:value1, ..., coln:valuen}, ...]) [for tables] - {keywords: [...]} for mode = 'search' or 'wsearch' 'properties': dict of properties (e.g., width, maxlines, etc.), 'bundle_info': bundle_info or list of bundle_infos, 'subworksheet_info': subworksheet, } In addition, return an alignment between the raw items and the interpreted items. Each interpreted item has a focusIndex, and possibly consists of a list of table rows (indexed by subFocusIndex). Here is an example: --- Raw --- --- Interpreted --- rawIndex (focusIndex, subFocusIndex) 0 % display table 1 [bundle] [table - row 0 (0, 0) 2 [bundle] - row 1] (0, 1) 3 4 hello [markup (1, 0) 5 world ] 6 [worksheet] [worksheet] (2, 0) 7 The mapping should be computed as follows: - Some raw items contribute directly to a particular interpreted item. - Others (blank lines, directives, schema definitions) don't. - Those that don't should get mapped to the next interpreted item. """ raw_to_block = [] # rawIndex => (focusIndex, subFocusIndex) # Set default schema current_schema = None default_display = ('table', 'default') current_display = default_display blocks = [] bundle_infos = [] worksheet_infos = [] def get_schema(args): # args is a list of schema names args = args if len(args) > 0 else ['default'] schema = [] for arg in args: # If schema doesn't exist, then treat as item (e.g., uuid). schema += schemas.get( arg, canonicalize_schema_items([arg.split(':', 2)])) return schema def is_missing(info): return 'metadata' not in info def parse_properties(args): properties = {} for item in args: if '=' not in item: raise UsageError('Expected <key>=<value>, but got %s' % item) key, value = item.split('=', 1) properties[key] = value return properties def genpath_to_target(bundle_info, genpath): # bundle_info, '/stdout' => target = (uuid, 'stdout') if not is_file_genpath(genpath): raise UsageError('Not file genpath: %s' % genpath) # strip off the leading / from genpath to create a subpath in the target. return (bundle_info['uuid'], genpath[1:]) def flush_bundles(): """ Having collected bundles in |bundle_infos|, flush them into |blocks|, potentially as a single table depending on the mode. """ if len(bundle_infos) == 0: return def raise_genpath_usage_error(): raise UsageError('Expected \'% display ' + mode + ' (genpath)\', but got \'% display ' + ' '.join([mode] + args) + '\'') # Print out the curent bundles somehow mode = current_display[0] args = current_display[1:] if mode == 'hidden': pass elif mode == 'contents' or mode == 'image': for item_index, bundle_info in bundle_infos: if is_missing(bundle_info): blocks.append(MarkupBlockSchema().load({ 'text': 'ERROR: cannot access bundle', 'error': True }).data) continue # Parse arguments if len(args) == 0: raise_genpath_usage_error() # these two are required for the target (bundle_uuid, target_genpath) = genpath_to_target(bundle_info, args[0]) properties = parse_properties(args[1:]) block_object = { 'target_genpath': target_genpath, 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json([bundle_info])).data, 'status': FetchStatusSchema.get_unknown_status(), } if mode == 'contents': try: block_object['max_lines'] = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") blocks.append( BundleContentsBlockSchema().load(block_object).data) elif mode == 'image': block_object['width'] = properties.get('width', None) block_object['height'] = properties.get('height', None) blocks.append( BundleImageBlockSchema().load(block_object).data) elif mode == 'record': # display record schema => # key1: value1 # key2: value2 # ... schema = get_schema(args) for item_index, bundle_info in bundle_infos: header = ('key', 'value') rows = [] for (name, genpath, post) in schema: rows.append(RecordsRowSchema().load({ 'key': name + ':', 'value': apply_func(post, interpret_genpath(bundle_info, genpath)), }).data) blocks.append(RecordsBlockSchema().load({ 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json([bundle_info])).data, 'status': FetchStatusSchema.get_unknown_status(), 'header': header, 'rows': rows, }).data) elif mode == 'table': # display table schema => # key1 key2 # b1_value1 b1_value2 # b2_value1 b2_value2 schema = get_schema(args) header = tuple(name for (name, genpath, post) in schema) rows = [] processed_bundle_infos = [] # Cache the mapping between owner_id to owner on current worksheet owner_cache = {} for item_index, bundle_info in bundle_infos: if 'metadata' in bundle_info: rows.append({ name: apply_func( post, interpret_genpath(bundle_info, genpath, db_model=db_model, owner_cache=owner_cache), ) for (name, genpath, post) in schema }) processed_bundle_infos.append(copy.deepcopy(bundle_info)) else: # The front-end relies on the name metadata field existing processed_bundle_info = copy.deepcopy(bundle_info) processed_bundle_info['metadata'] = {'name': '<invalid>'} rows.append({ name: apply_func( post, interpret_genpath(processed_bundle_info, genpath)) for (name, genpath, post) in schema }) processed_bundle_infos.append(processed_bundle_info) blocks.append(TableBlockSchema().load({ 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json( processed_bundle_infos)).data, 'status': FetchStatusSchema.get_unknown_status(), 'header': header, 'rows': rows, }).data) elif mode == 'graph': # display graph <genpath> <properties> if len(args) == 0: raise_genpath_usage_error() # trajectories is list of { # 'uuid': ..., # 'display_name': ..., # What to show as the description of a bundle # 'target': (bundle_uuid, subpath) # } properties = parse_properties(args[1:]) trajectories = [{ 'bundle_uuid': bundle_info['uuid'], 'display_name': interpret_genpath(bundle_info, properties.get('display_name', 'name')), 'target_genpath': genpath_to_target(bundle_info, args[0])[1], } for item_index, bundle_info in bundle_infos] try: max_lines = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") blocks.append(GraphBlockSchema().load({ 'trajectories': trajectories, 'bundles_spec': BundleUUIDSpecSchema().load( BundleUUIDSpecSchema.create_json([ bundle_infos[0][1] ])).data, # Only show the first one for now # 'bundles_spec': BundleUUIDSpecSchema().load(BundleUUIDSpecSchema.create_json( # [copy.deepcopy(bundle_info) for item_index, bundle_info in bundle_infos]).data, 'max_lines': max_lines, 'xlabel': properties.get('xlabel', None), 'ylabel': properties.get('ylabel', None), }).data) else: raise UsageError('Unknown display mode: %s' % mode) bundle_infos[:] = [] # Clear def flush_worksheets(): if len(worksheet_infos) == 0: return blocks.append(SubworksheetsBlock().load({ 'subworksheet_infos': copy.deepcopy(worksheet_infos) }).data) worksheet_infos[:] = [] # Go through all the raw items... last_was_empty_line = False for raw_index, item in enumerate(raw_items): new_last_was_empty_line = True try: (bundle_info, subworksheet_info, value_obj, item_type, id, sort_key) = item is_bundle = item_type == TYPE_BUNDLE is_search = item_type == TYPE_DIRECTIVE and get_command( value_obj) == 'search' is_directive = item_type == TYPE_DIRECTIVE is_worksheet = item_type == TYPE_WORKSHEET if not is_bundle: flush_bundles() if not is_worksheet: flush_worksheets() # Reset display to minimize long distance dependencies of directives if not (is_bundle or is_search): current_display = default_display # Reset schema to minimize long distance dependencies of directives if not is_directive: current_schema = None if item_type == TYPE_BUNDLE: raw_to_block.append((len(blocks), len(bundle_infos))) bundle_infos.append((raw_index, bundle_info)) elif item_type == TYPE_WORKSHEET: raw_to_block.append((len(blocks), len(worksheet_infos))) worksheet_infos.append(subworksheet_info) elif item_type == TYPE_MARKUP: new_last_was_empty_line = value_obj == '' if (len(blocks) > 0 and blocks[-1]['mode'] == BlockModes.markup_block and not last_was_empty_line and not new_last_was_empty_line): # Join with previous markup item blocks[-1]['text'] += '\n' + value_obj # Ids blocks[-1]['ids'] = blocks[-1].get('ids', []) blocks[-1]['ids'].append(id) blocks[-1]['sort_keys'] = blocks[-1].get('sort_keys', []) blocks[-1]['sort_keys'].append(sort_key) elif not new_last_was_empty_line: block = (MarkupBlockSchema().load({ 'id': len(blocks), 'text': value_obj, 'ids': [id], 'sort_keys': [sort_key], }).data) blocks.append(block) # Important: set raw_to_block after so we can focus on current item. if new_last_was_empty_line: raw_to_block.append(None) else: raw_to_block.append((len(blocks) - 1, 0)) elif item_type == TYPE_DIRECTIVE: command = get_command(value_obj) if command == '%' or command == '' or command is None: # Comment pass elif command == 'schema': # Start defining new schema if len(value_obj) < 2: raise UsageError("`schema` missing name") name = value_obj[1] schemas[name] = current_schema = [] elif command == 'addschema': # Add to schema if current_schema is None: raise UsageError( "`addschema` must be preceded by `schema` directive" ) if len(value_obj) < 2: raise UsageError("`addschema` missing name") name = value_obj[1] current_schema += schemas[name] elif command == 'add': # Add to schema if current_schema is None: raise UsageError( "`add` must be preceded by `schema` directive") schema_item = canonicalize_schema_item(value_obj[1:]) current_schema.append(schema_item) elif command == 'display': # Set display current_display = value_obj[1:] else: raise UsageError("unknown directive `%s`" % command) raw_to_block.append(None) else: raise RuntimeError('Unknown worksheet item type: %s' % item_type) # Flush bundles once more at the end if raw_index == len(raw_items) - 1: flush_bundles() flush_worksheets() except UsageError as e: current_schema = None bundle_infos[:] = [] worksheet_infos[:] = [] blocks.append(MarkupBlockSchema().load({ 'text': 'Error in source line %d: %s' % (raw_index + 1, str(e)), 'error': True }).data) raw_to_block.append((len(blocks) - 1, 0)) except Exception: current_schema = None bundle_infos[:] = [] worksheet_infos[:] = [] import traceback traceback.print_exc() blocks.append(MarkupBlockSchema().load({ 'text': 'Unexpected error while parsing line %d' % (raw_index + 1), 'error': True, }).data) raw_to_block.append((len(blocks) - 1, 0)) finally: last_was_empty_line = new_last_was_empty_line # TODO: fix inconsistencies resulting from UsageErrors thrown in flush_bundles() if len(raw_to_block) != len(raw_items): print( "WARNING: Length of raw_to_block does not match length of raw_items", file=sys.stderr) # Package the result block_to_raw = {} next_interpreted_index = None # Go in reverse order so we can assign raw items that map to None to the next interpreted item for raw_index, interpreted_index in reversed(list( enumerate(raw_to_block))): if interpreted_index is None: # e.g., blank line, directive interpreted_index = next_interpreted_index raw_to_block[raw_index] = interpreted_index else: interpreted_index_str = str(interpreted_index[0]) + ',' + str( interpreted_index[1]) if interpreted_index_str not in block_to_raw: # Bias towards the last item block_to_raw[interpreted_index_str] = raw_index next_interpreted_index = interpreted_index # Return the result result = {} result['blocks'] = blocks result['raw_to_block'] = raw_to_block result['block_to_raw'] = block_to_raw return result