def convert_item_to_db(item): (bundle_info, subworksheet_info, value_obj, item_type) = item return ( bundle_info['uuid'] if bundle_info else None, subworksheet_info['uuid'] if subworksheet_info else None, # TODO: change tables.py so that None's are allowed (formatting.tokens_to_string(value_obj) if item_type == TYPE_DIRECTIVE else value_obj) or '', item_type, )
def get_worksheet_lines(worksheet_info): """ Generator that returns pretty-printed lines of text for the given worksheet. """ lines = [] for item in worksheet_info['items']: (bundle_info, subworksheet_info, value_obj, item_type) = item[:4] if item_type == TYPE_MARKUP: lines.append(value_obj) elif item_type == TYPE_DIRECTIVE: if len(value_obj) > 0 and value_obj[0] == DIRECTIVE_CHAR: # A comment # TODO: figure out why this form is consider a comment... lines.append('//' + ' '.join(value_obj[1:])) else: # A normal directive value = formatting.tokens_to_string(value_obj) value = ( DIRECTIVE_CHAR + ('' if len(value) == 0 or value.startswith(DIRECTIVE_CHAR) else ' ') + value ) lines.append(value) elif item_type == TYPE_BUNDLE: if 'metadata' not in bundle_info: # This happens when we add bundles by uuid and don't actually make sure they exist # lines.append('ERROR: non-existent bundle %s' % bundle_info['uuid']) description = formatting.contents_str(None) else: metadata = bundle_info['metadata'] # raise Exception(metadata) description = bundle_info['bundle_type'] description += ' ' + metadata['name'] deps = interpret_genpath(bundle_info, 'dependencies') if deps: description += ' -- ' + deps command = bundle_info.get('command') if command: command = command.replace('\n', ' ') description += ' : ' + command lines.append(bundle_line(description, bundle_info['uuid'])) elif item_type == TYPE_WORKSHEET: lines.append( worksheet_line( 'worksheet ' + formatting.contents_str(subworksheet_info.get('name')), subworksheet_info['uuid'], ) ) else: raise RuntimeError('Invalid worksheet item type: %s' % type) return lines
def get_worksheet_lines(worksheet_info): """ Generator that returns pretty-printed lines of text for the given worksheet. """ lines = [] for item in worksheet_info['items']: (bundle_info, subworksheet_info, value_obj, item_type) = item if item_type == TYPE_MARKUP: lines.append(value_obj) elif item_type == TYPE_DIRECTIVE: if len(value_obj) > 0 and value_obj[0] == DIRECTIVE_CHAR: # A comment directive lines.append('//' + ' '.join(value_obj[1:])) else: # A normal directive value = formatting.tokens_to_string(value_obj) value = ( DIRECTIVE_CHAR + ('' if len(value) == 0 or value.startswith(DIRECTIVE_CHAR) else ' ') + value ) lines.append(value) elif item_type == TYPE_BUNDLE: if 'metadata' not in bundle_info: # This happens when we add bundles by uuid and don't actually make sure they exist # lines.append('ERROR: non-existent bundle %s' % bundle_info['uuid']) description = formatting.contents_str(None) else: metadata = bundle_info['metadata'] # raise Exception(metadata) description = bundle_info['bundle_type'] description += ' ' + metadata['name'] deps = interpret_genpath(bundle_info, 'dependencies') if deps: description += ' -- ' + deps command = bundle_info.get('command') if command: description += ' : ' + command lines.append(bundle_line(description, bundle_info['uuid'])) elif item_type == TYPE_WORKSHEET: lines.append( worksheet_line( 'worksheet ' + formatting.contents_str(subworksheet_info.get('name')), subworksheet_info['uuid'], ) ) else: raise RuntimeError('Invalid worksheet item type: %s' % type) return lines
def fetch_interpreted_worksheet(uuid): """ Return information about a worksheet. Calls - get_worksheet_info: get basic info - resolve_interpreted_items: get more information about a worksheet. In the future, for large worksheets, might want to break this up so that we can render something basic. Return: worksheet_info dict{}: key:[value_type] <description> blocks:[list] Resolved worksheet blocks from raw_items. Bundles will be grouped into table block items, text items might be grouped into one markdown block etc. source:[list] source lines raw_to_block:[list] Raw_items to its block index pair. For example, assume the first resolved block item is a bundle table that has 2 rows, then the 2nd element in the list would be [0, 1] [0, 1]: 0 means the item belongs to the first block, 1 means the item is the second item of the block (2nd bundle in our example) NOTE: Used for setting focus on frontend block_to_raw:[dict] Maps the blocks (table, markdown, records) to their corresponding source line indices, it's mostly a reverse mapping of raw_to_block, by mostly: raw_to_block has some bug, please refer to worksheet_utils flush_bundles function. This can be used to index the source on the frontend Example: [0, 0]: 0 [0, 1]: 1 [1, 0]: 9 This means the first blocks' first item corresponds to the first line in source, the second item corresponds to the second line in source The second block corresponds the 10th line in source. 2-8 can be skipped for multiple reasons: blank lines, comments, schema lines etc. NOTE: Used for setting focus on frontend This endpoint can be called with &brief=1 in order to give an abbreviated version, which does not resolve searches or wsearches. To return an interpreted worksheet that only resolves a particular search/wsearch, pass in the search query to the "directive" argument. The value for this argument must be a search/wsearch query -- for example, &directive=search 0x .limit=100 """ bundle_uuids = request.query.getall('bundle_uuid') brief = request.query.get("brief", "0") == "1" directive = request.query.get("directive", None) search_results = [] worksheet_info = get_worksheet_info(uuid, fetch_items=True, fetch_permissions=True) # Shim in additional data for the frontend worksheet_info['items'] = resolve_items_into_infos(worksheet_info['items']) if worksheet_info['owner_id'] is None: worksheet_info['owner_name'] = None else: owner = local.model.get_user(user_id=worksheet_info['owner_id']) worksheet_info['owner_name'] = owner.user_name # Fetch items. worksheet_info['source'] = get_worksheet_lines(worksheet_info) if not directive and not brief: expanded_items = [] for index, raw_item in enumerate(worksheet_info['items']): expanded = expand_search_item(raw_item) expanded_items.append(expanded) # Multiple items can correspond to the same source line (i.e: search directives) # raw_items_to_source_index.extend([index] * len(expanded)) worksheet_info['items'] = list(chain.from_iterable(expanded_items)) elif directive: # Only expand the search item corresponding to the given directive. # Used in async loading to only load a single table. items_to_show = [] for i, item in enumerate(worksheet_info['items']): (bundle_info, subworksheet_info, value_obj, item_type, id, sort_key) = item if directive == formatting.tokens_to_string(value_obj): search_results = perform_search_query(value_obj) items_to_show.append(item) break elif item_type == TYPE_DIRECTIVE: # We need to include previous directives # so that the final search result can be properly # rendered (it may depend on a schema defined earlier # in the worksheet). items_to_show.append(item) # Make sure the search item is at the end of worksheet_info['items'], # so we can isolate it later after interpret_items is called. worksheet_info['items'] = items_to_show worksheet_info['items'].extend(search_results) # Set permissions worksheet_info['edit_permission'] = worksheet_info[ 'permission'] == GROUP_OBJECT_PERMISSION_ALL # Check enable chat box worksheet_info['enable_chat'] = local.config.get('enable_chat', False) # Format permissions into strings worksheet_info['permission_spec'] = permission_str( worksheet_info['permission']) for group_permission in worksheet_info['group_permissions']: group_permission['permission_spec'] = permission_str( group_permission['permission']) # Go and fetch more information about the worksheet contents by # resolving the interpreted items. try: interpreted_blocks = interpret_items(get_default_schemas(), worksheet_info['items'], db_model=local.model) except UsageError as e: interpreted_blocks = {'blocks': []} worksheet_info['error'] = str(e) # bundle_uuids is an optional argument that, if exists, contain the uuids of all the unfinished run bundles that need updating # In this case, full_worksheet will return a list of item parallel to ws.info.items that contain only items that need updating. # More specifically, all blocks that don't contain run bundles that need updating are None. # Also, a non-None block could contain a list of bundle_infos, which represent a list of bundles. Usually not all of them need updating. # The bundle_infos for bundles that don't need updating are also None. if bundle_uuids: for i, block in enumerate(interpreted_blocks['blocks']): if 'bundle_info' not in block: interpreted_blocks['blocks'][i] = None else: if isinstance(block['bundle_info'], dict): block['bundle_info'] = [block['bundle_info']] is_relevant_block = False for j, bundle in enumerate(block['bundle_info']): if bundle['uuid'] in bundle_uuids: is_relevant_block = True else: block['bundle_info'][j] = None if not is_relevant_block: interpreted_blocks['blocks'][i] = None # Grouped individual items into blocks worksheet_info['blocks'] = resolve_interpreted_blocks( interpreted_blocks['blocks'], brief=brief) worksheet_info['raw_to_block'] = interpreted_blocks['raw_to_block'] worksheet_info['block_to_raw'] = interpreted_blocks['block_to_raw'] if directive: # If we're only async loading a single table_block / subworksheets_block, # return only that block (which is at the end of worksheet_info['items']) worksheet_info['blocks'] = [worksheet_info['blocks'][-1] ] if len(search_results) else [] for block in worksheet_info['blocks']: if block is None: continue if block['mode'] == 'table': for row_map in block['rows']: for k, v in row_map.items(): if v is None: row_map[k] = formatting.contents_str(v) if 'bundle_info' in block: infos = [] if isinstance(block['bundle_info'], list): infos = block['bundle_info'] elif isinstance(block['bundle_info'], dict): infos = [block['bundle_info']] for bundle_info in infos: if bundle_info is None: continue if 'bundle_type' not in bundle_info: continue # empty info: invalid bundle reference if isinstance(bundle_info, dict): format_metadata(bundle_info.get('metadata')) # Frontend doesn't use individual 'items' for now del worksheet_info['items'] if bundle_uuids: return {'blocks': worksheet_info['blocks']} return worksheet_info
def interpret_items(schemas, raw_items, db_model=None): """ Interpret different items based on their types. :param schemas: initial mapping from name to list of schema items (columns of a table) :param raw_items: list of (raw) worksheet items (triples) to interpret :param db_model: database model which is used to query database :return: {'items': interpreted_items, ...}, where interpreted_items is a list of: { 'mode': display mode ('markup' | 'contents' | 'image' | 'html', etc.) 'interpreted': one of - rendered string - target = (bundle_uuid, genpath) - (header = (col1, ..., coln), rows = [{col1:value1, ..., coln:valuen}, ...]) [for tables] - {keywords: [...]} for mode = 'search' or 'wsearch' 'properties': dict of properties (e.g., width, maxlines, etc.), 'bundle_info': bundle_info or list of bundle_infos, 'subworksheet_info': subworksheet, } In addition, return an alignment between the raw items and the interpreted items. Each interpreted item has a focusIndex, and possibly consists of a list of table rows (indexed by subFocusIndex). Here is an example: --- Raw --- --- Interpreted --- rawIndex (focusIndex, subFocusIndex) 0 % display table 1 [bundle] [table - row 0 (0, 0) 2 [bundle] - row 1] (0, 1) 3 4 hello [markup (1, 0) 5 world ] 6 [worksheet] [worksheet] (2, 0) 7 The mapping should be computed as follows: - Some raw items contribute directly to a particular interpreted item. - Others (blank lines, directives, schema definitions) don't. - Those that don't should get mapped to the next interpreted item. """ raw_to_block = [] # rawIndex => (focusIndex, subFocusIndex) # Set default schema current_schema = None default_display = ('table', 'default') current_display = default_display blocks = [] bundle_infos = [] worksheet_infos = [] def get_schema(args): # args is a list of schema names args = args if len(args) > 0 else ['default'] schema = [] for arg in args: # If schema doesn't exist, then treat as item (e.g., uuid). schema += schemas.get(arg, canonicalize_schema_items([arg.split(':', 2)])) return schema def is_missing(info): return 'metadata' not in info def parse_properties(args): properties = {} for item in args: if '=' not in item: raise UsageError('Expected <key>=<value>, but got %s' % item) key, value = item.split('=', 1) properties[key] = value return properties def genpath_to_target(bundle_info, genpath): # bundle_info, '/stdout' => target = (uuid, 'stdout') if not is_file_genpath(genpath): raise UsageError('Not file genpath: %s' % genpath) # strip off the leading / from genpath to create a subpath in the target. return (bundle_info['uuid'], genpath[1:]) def flush_bundles(bundle_block_start_index): """ Having collected bundles in |bundle_infos|, flush them into |blocks|, potentially as a single table depending on the mode. bundle_block_start_index: The raw index for % display <mode> schema """ if len(bundle_infos) == 0: return def raise_genpath_usage_error(): raise UsageError( 'Expected \'% display ' + mode + ' (genpath)\', but got \'% display ' + ' '.join([mode] + args) + '\'' ) # Print out the curent bundles somehow mode = current_display[0] args = current_display[1:] if mode == 'hidden': pass elif mode == 'contents' or mode == 'image': for item_index, bundle_info in bundle_infos: if is_missing(bundle_info): blocks.append( MarkupBlockSchema() .load({'text': 'ERROR: cannot access bundle', 'error': True}) .data ) continue # Parse arguments if len(args) == 0: raise_genpath_usage_error() # these two are required for the target (bundle_uuid, target_genpath) = genpath_to_target(bundle_info, args[0]) properties = parse_properties(args[1:]) block_object = { 'target_genpath': target_genpath, 'bundles_spec': BundleUUIDSpecSchema() .load(BundleUUIDSpecSchema.create_json([bundle_info])) .data, 'status': FetchStatusSchema.get_unknown_status(), } if mode == 'contents': try: block_object['max_lines'] = int( properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES) ) except ValueError: raise UsageError("maxlines must be integer") blocks.append(BundleContentsBlockSchema().load(block_object).data) elif mode == 'image': block_object['width'] = properties.get('width', None) block_object['height'] = properties.get('height', None) blocks.append(BundleImageBlockSchema().load(block_object).data) elif mode == 'record': # display record schema => # key1: value1 # key2: value2 # ... schema = get_schema(args) for item_index, bundle_info in bundle_infos: header = ('key', 'value') rows = [] for (name, genpath, post, _) in schema: rows.append( RecordsRowSchema() .load( { 'key': name + ':', 'value': apply_func(post, interpret_genpath(bundle_info, genpath)), } ) .data ) blocks.append( RecordsBlockSchema() .load( { 'bundles_spec': BundleUUIDSpecSchema() .load(BundleUUIDSpecSchema.create_json([bundle_info])) .data, 'status': FetchStatusSchema.get_unknown_status(), 'header': header, 'rows': rows, 'sort_keys': [bundle_info["sort_key"]], 'first_bundle_source_index': bundle_block_start_index, 'using_schemas': args if len(args) > 0 else ['default'], } ) .data ) elif mode == 'table': # display table schema => # key1 key2 # b1_value1 b1_value2 # b2_value1 b2_value2 schema = get_schema(args) header = tuple(name for (name, genpath, post, _) in schema) rows = [] processed_bundle_infos = [] # Cache the mapping between owner_id to owner on current worksheet owner_cache = {} for item_index, bundle_info in bundle_infos: if 'metadata' in bundle_info: rows.append( { name: apply_func( post, interpret_genpath( bundle_info, genpath, db_model=db_model, owner_cache=owner_cache ), ) for (name, genpath, post, _) in schema } ) processed_bundle_infos.append(copy.deepcopy(bundle_info)) else: # The front-end relies on the name metadata field existing processed_bundle_info = copy.deepcopy(bundle_info) processed_bundle_info['metadata'] = {'name': '<invalid>'} rows.append( { name: apply_func( post, interpret_genpath(processed_bundle_info, genpath) ) for (name, genpath, post, _) in schema } ) processed_bundle_infos.append(processed_bundle_info) blocks.append( TableBlockSchema() .load( { 'bundles_spec': BundleUUIDSpecSchema() .load(BundleUUIDSpecSchema.create_json(processed_bundle_infos)) .data, 'status': FetchStatusSchema.get_unknown_status(), 'header': header, 'rows': rows, 'sort_keys': [ processed_bundle_info["sort_key"] for processed_bundle_info in processed_bundle_infos ], 'first_bundle_source_index': bundle_block_start_index, 'using_schemas': args if len(args) > 0 else ['default'], } ) .data ) elif mode == 'graph': # display graph <genpath> <properties> if len(args) == 0: raise_genpath_usage_error() # trajectories is list of { # 'uuid': ..., # 'display_name': ..., # What to show as the description of a bundle # 'target': (bundle_uuid, subpath) # } properties = parse_properties(args[1:]) trajectories = [ { 'bundle_uuid': bundle_info['uuid'], 'display_name': interpret_genpath( bundle_info, properties.get('display_name', 'name') ), 'target_genpath': genpath_to_target(bundle_info, args[0])[1], } for item_index, bundle_info in bundle_infos ] try: max_lines = int(properties.get('maxlines', DEFAULT_CONTENTS_MAX_LINES)) except ValueError: raise UsageError("maxlines must be integer") blocks.append( GraphBlockSchema() .load( { 'trajectories': trajectories, 'bundles_spec': BundleUUIDSpecSchema() .load(BundleUUIDSpecSchema.create_json([bundle_infos[0][1]])) .data, # Only show the first one for now # 'bundles_spec': BundleUUIDSpecSchema().load(BundleUUIDSpecSchema.create_json( # [copy.deepcopy(bundle_info) for item_index, bundle_info in bundle_infos]).data, 'max_lines': max_lines, 'xlabel': properties.get('xlabel', None), 'ylabel': properties.get('ylabel', None), } ) .data ) else: raise UsageError('Unknown display mode: %s' % mode) bundle_infos[:] = [] # Clear def flush_worksheets(): if len(worksheet_infos) == 0: return blocks.append( SubworksheetsBlock() .load( { 'subworksheet_infos': copy.deepcopy(worksheet_infos), 'sort_keys': [worksheet_info["sort_key"] for worksheet_info in worksheet_infos], } ) .data ) worksheet_infos[:] = [] # Go through all the raw items... last_was_empty_line = False bundle_block_start_index = -1 # records line for current_schema_name = None current_schema_ids = [] for raw_index, item in enumerate(raw_items): new_last_was_empty_line = True try: (bundle_info, subworksheet_info, value_obj, item_type, item_id, sort_key) = item is_bundle = item_type == TYPE_BUNDLE is_search = item_type == TYPE_DIRECTIVE and get_command(value_obj) == 'search' is_directive = item_type == TYPE_DIRECTIVE is_worksheet = item_type == TYPE_WORKSHEET if not is_bundle: flush_bundles(bundle_block_start_index) bundle_block_start_index = -1 if not is_worksheet: flush_worksheets() # Reset display to minimize long distance dependencies of directives if not (is_bundle or is_search): current_display = default_display # Reset schema to minimize long distance dependencies of directives command = get_command(value_obj) if not is_directive or (command != "add" and command != "addschema"): if current_schema is not None: blocks.append( SchemaBlockSchema() .load( { 'status': FetchStatusSchema.get_unknown_status(), 'header': ["field", "generalized-path", "post-processor"], 'schema_name': current_schema_name, 'field_rows': [ { "field": name, "generalized-path": path, "post-processor": post, "from_schema_name": from_schema_name, } for name, path, post, from_schema_name in current_schema ], 'sort_keys': [sort_key], 'ids': current_schema_ids, } ) .data ) current_schema = None current_schema_ids = [] if item_type == TYPE_BUNDLE: if bundle_block_start_index == -1: bundle_block_start_index = raw_index bundle_info = dict(bundle_info, sort_key=sort_key) raw_to_block.append((len(blocks), len(bundle_infos))) bundle_infos.append((raw_index, bundle_info)) elif item_type == TYPE_WORKSHEET: subworksheet_info = dict(subworksheet_info, sort_key=sort_key) raw_to_block.append((len(blocks), len(worksheet_infos))) worksheet_infos.append(subworksheet_info) elif item_type == TYPE_MARKUP: new_last_was_empty_line = value_obj == '' if ( len(blocks) > 0 and blocks[-1]['mode'] == BlockModes.markup_block and not last_was_empty_line and not new_last_was_empty_line ): # Join with previous markup item blocks[-1]['text'] += '\n' + value_obj # Ids blocks[-1]['ids'] = blocks[-1].get('ids', []) blocks[-1]['ids'].append(item_id) blocks[-1]['sort_keys'] = blocks[-1].get('sort_keys', []) blocks[-1]['sort_keys'].append(sort_key) elif not new_last_was_empty_line: block = ( MarkupBlockSchema() .load( { 'id': len(blocks), 'text': value_obj, 'ids': [item_id], 'sort_keys': [sort_key], } ) .data ) blocks.append(block) # Important: set raw_to_block after so we can focus on current item. if new_last_was_empty_line: raw_to_block.append(None) else: raw_to_block.append((len(blocks) - 1, 0)) elif item_type == TYPE_DIRECTIVE: command = get_command(value_obj) appended_directive_blocks_index = False if command == '%' or command == '' or command is None: # Comment pass elif command == 'schema': # Start defining new schema if len(value_obj) < 2: raise UsageError("`schema` missing name") name = value_obj[1] current_schema_ids.append(item_id) current_schema_name = name schemas[name] = current_schema = [] # Schema block should also be considered when calculating the focus index raw_to_block.append((len(blocks) - 1 + len(current_schema_ids), 0)) appended_directive_blocks_index = True elif command == 'addschema': # Add to schema if current_schema is None: raise UsageError("`addschema` must be preceded by `schema` directive") if len(value_obj) < 2: raise UsageError("`addschema` missing name") name = value_obj[1] current_schema_ids.append(item_id) current_schema += schemas[name] elif command == 'add': # Add to schema if current_schema is None: raise UsageError("`add` must be preceded by `schema` directive") current_schema_ids.append(item_id) schema_item = canonicalize_schema_item(value_obj[1:], current_schema_name) current_schema.append(schema_item) elif command == 'display': # Set display current_display = value_obj[1:] elif command in ('search', 'wsearch'): # Show item placeholders in brief mode blocks.append( PlaceholderBlockSchema() .load( { 'directive': formatting.tokens_to_string(value_obj), 'sort_keys': [sort_key], } ) .data ) appended_directive_blocks_index = True raw_to_block.append((len(blocks) - 1, 0)) else: raise UsageError("unknown directive `%s`" % command) # Add an empty item placeholder for other directives, since they do not represent substantial items if not appended_directive_blocks_index: raw_to_block.append(None) else: raise RuntimeError('Unknown worksheet item type: %s' % item_type) # Flush bundles, subworksheets and schema items once more at the end if raw_index == len(raw_items) - 1: flush_bundles(bundle_block_start_index) bundle_block_start_index = -1 flush_worksheets() if current_schema is not None: blocks.append( SchemaBlockSchema() .load( { 'status': FetchStatusSchema.get_unknown_status(), 'header': ["field", "generalized-path", "post-processor"], 'schema_name': current_schema_name, 'field_rows': [ { "field": name, "generalized-path": path, "post-processor": post, "from_schema_name": from_schema_name, } for name, path, post, from_schema_name in current_schema ], 'sort_keys': [sort_key], 'ids': current_schema_ids, } ) .data ) except UsageError as e: current_schema = None bundle_infos[:] = [] worksheet_infos[:] = [] blocks.append( MarkupBlockSchema() .load( {'text': 'Error in source line %d: %s' % (raw_index + 1, str(e)), 'error': True} ) .data ) raw_to_block.append((len(blocks) - 1, 0)) except Exception: current_schema = None bundle_infos[:] = [] worksheet_infos[:] = [] import traceback traceback.print_exc() blocks.append( MarkupBlockSchema() .load( { 'text': 'Unexpected error while parsing line %d' % (raw_index + 1), 'error': True, } ) .data ) raw_to_block.append((len(blocks) - 1, 0)) finally: last_was_empty_line = new_last_was_empty_line # TODO: fix inconsistencies resulting from UsageErrors thrown in flush_bundles() if len(raw_to_block) != len(raw_items): print("WARNING: Length of raw_to_block does not match length of raw_items", file=sys.stderr) # Package the result block_to_raw = {} next_interpreted_index = None # Go in reverse order so we can assign raw items that map to None to the next interpreted item for raw_index, interpreted_index in reversed(list(enumerate(raw_to_block))): if interpreted_index is None: # e.g., blank line, directive interpreted_index = next_interpreted_index raw_to_block[raw_index] = interpreted_index else: interpreted_index_str = str(interpreted_index[0]) + ',' + str(interpreted_index[1]) if interpreted_index_str not in block_to_raw: # Bias towards the last item block_to_raw[interpreted_index_str] = raw_index next_interpreted_index = interpreted_index # Return the result result = {} result['blocks'] = blocks result['raw_to_block'] = raw_to_block result['block_to_raw'] = block_to_raw return result